zstd (1.5.5)

Browse
Build Log
Usage
       1  /*
       2   * Copyright (c) Meta Platforms, Inc. and affiliates.
       3   * All rights reserved.
       4   *
       5   * This source code is licensed under both the BSD-style license (found in the
       6   * LICENSE file in the root directory of this source tree) and the GPLv2 (found
       7   * in the COPYING file in the root directory of this source tree).
       8   * You may select, at your option, one of the above-listed licenses.
       9   */
      10  
      11  #if defined (__cplusplus)
      12  extern "C" {
      13  #endif
      14  
      15  #ifndef ZSTD_ZDICT_H
      16  #define ZSTD_ZDICT_H
      17  
      18  /*======  Dependencies  ======*/
      19  #include <stddef.h>  /* size_t */
      20  
      21  
      22  /* =====   ZDICTLIB_API : control library symbols visibility   ===== */
      23  #ifndef ZDICTLIB_VISIBLE
      24     /* Backwards compatibility with old macro name */
      25  #  ifdef ZDICTLIB_VISIBILITY
      26  #    define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
      27  #  elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
      28  #    define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
      29  #  else
      30  #    define ZDICTLIB_VISIBLE
      31  #  endif
      32  #endif
      33  
      34  #ifndef ZDICTLIB_HIDDEN
      35  #  if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
      36  #    define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
      37  #  else
      38  #    define ZDICTLIB_HIDDEN
      39  #  endif
      40  #endif
      41  
      42  #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
      43  #  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
      44  #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
      45  #  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
      46  #else
      47  #  define ZDICTLIB_API ZDICTLIB_VISIBLE
      48  #endif
      49  
      50  /*******************************************************************************
      51   * Zstd dictionary builder
      52   *
      53   * FAQ
      54   * ===
      55   * Why should I use a dictionary?
      56   * ------------------------------
      57   *
      58   * Zstd can use dictionaries to improve compression ratio of small data.
      59   * Traditionally small files don't compress well because there is very little
      60   * repetition in a single sample, since it is small. But, if you are compressing
      61   * many similar files, like a bunch of JSON records that share the same
      62   * structure, you can train a dictionary on ahead of time on some samples of
      63   * these files. Then, zstd can use the dictionary to find repetitions that are
      64   * present across samples. This can vastly improve compression ratio.
      65   *
      66   * When is a dictionary useful?
      67   * ----------------------------
      68   *
      69   * Dictionaries are useful when compressing many small files that are similar.
      70   * The larger a file is, the less benefit a dictionary will have. Generally,
      71   * we don't expect dictionary compression to be effective past 100KB. And the
      72   * smaller a file is, the more we would expect the dictionary to help.
      73   *
      74   * How do I use a dictionary?
      75   * --------------------------
      76   *
      77   * Simply pass the dictionary to the zstd compressor with
      78   * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
      79   * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
      80   * more advanced functions that allow selecting some options, see zstd.h for
      81   * complete documentation.
      82   *
      83   * What is a zstd dictionary?
      84   * --------------------------
      85   *
      86   * A zstd dictionary has two pieces: Its header, and its content. The header
      87   * contains a magic number, the dictionary ID, and entropy tables. These
      88   * entropy tables allow zstd to save on header costs in the compressed file,
      89   * which really matters for small data. The content is just bytes, which are
      90   * repeated content that is common across many samples.
      91   *
      92   * What is a raw content dictionary?
      93   * ---------------------------------
      94   *
      95   * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
      96   * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
      97   * content dictionary.
      98   *
      99   * How do I train a dictionary?
     100   * ----------------------------
     101   *
     102   * Gather samples from your use case. These samples should be similar to each
     103   * other. If you have several use cases, you could try to train one dictionary
     104   * per use case.
     105   *
     106   * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
     107   * dictionary. There are a few advanced versions of this function, but this
     108   * is a great starting point. If you want to further tune your dictionary
     109   * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
     110   * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
     111   *
     112   * If the dictionary training function fails, that is likely because you
     113   * either passed too few samples, or a dictionary would not be effective
     114   * for your data. Look at the messages that the dictionary trainer printed,
     115   * if it doesn't say too few samples, then a dictionary would not be effective.
     116   *
     117   * How large should my dictionary be?
     118   * ----------------------------------
     119   *
     120   * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
     121   * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
     122   * dictionary larger than that. But, most use cases can get away with a
     123   * smaller dictionary. The advanced dictionary builders can automatically
     124   * shrink the dictionary for you, and select the smallest size that doesn't
     125   * hurt compression ratio too much. See the `shrinkDict` parameter.
     126   * A smaller dictionary can save memory, and potentially speed up
     127   * compression.
     128   *
     129   * How many samples should I provide to the dictionary builder?
     130   * ------------------------------------------------------------
     131   *
     132   * We generally recommend passing ~100x the size of the dictionary
     133   * in samples. A few thousand should suffice. Having too few samples
     134   * can hurt the dictionaries effectiveness. Having more samples will
     135   * only improve the dictionaries effectiveness. But having too many
     136   * samples can slow down the dictionary builder.
     137   *
     138   * How do I determine if a dictionary will be effective?
     139   * -----------------------------------------------------
     140   *
     141   * Simply train a dictionary and try it out. You can use zstd's built in
     142   * benchmarking tool to test the dictionary effectiveness.
     143   *
     144   *   # Benchmark levels 1-3 without a dictionary
     145   *   zstd -b1e3 -r /path/to/my/files
     146   *   # Benchmark levels 1-3 with a dictionary
     147   *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
     148   *
     149   * When should I retrain a dictionary?
     150   * -----------------------------------
     151   *
     152   * You should retrain a dictionary when its effectiveness drops. Dictionary
     153   * effectiveness drops as the data you are compressing changes. Generally, we do
     154   * expect dictionaries to "decay" over time, as your data changes, but the rate
     155   * at which they decay depends on your use case. Internally, we regularly
     156   * retrain dictionaries, and if the new dictionary performs significantly
     157   * better than the old dictionary, we will ship the new dictionary.
     158   *
     159   * I have a raw content dictionary, how do I turn it into a zstd dictionary?
     160   * -------------------------------------------------------------------------
     161   *
     162   * If you have a raw content dictionary, e.g. by manually constructing it, or
     163   * using a third-party dictionary builder, you can turn it into a zstd
     164   * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
     165   * provide some samples of the data. It will add the zstd header to the
     166   * raw content, which contains a dictionary ID and entropy tables, which
     167   * will improve compression ratio, and allow zstd to write the dictionary ID
     168   * into the frame, if you so choose.
     169   *
     170   * Do I have to use zstd's dictionary builder?
     171   * -------------------------------------------
     172   *
     173   * No! You can construct dictionary content however you please, it is just
     174   * bytes. It will always be valid as a raw content dictionary. If you want
     175   * a zstd dictionary, which can improve compression ratio, use
     176   * `ZDICT_finalizeDictionary()`.
     177   *
     178   * What is the attack surface of a zstd dictionary?
     179   * ------------------------------------------------
     180   *
     181   * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
     182   * zstd should never crash, or access out-of-bounds memory no matter what
     183   * the dictionary is. However, if an attacker can control the dictionary
     184   * during decompression, they can cause zstd to generate arbitrary bytes,
     185   * just like if they controlled the compressed data.
     186   *
     187   ******************************************************************************/
     188  
     189  
     190  /*! ZDICT_trainFromBuffer():
     191   *  Train a dictionary from an array of samples.
     192   *  Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
     193   *  f=20, and accel=1.
     194   *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
     195   *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
     196   *  The resulting dictionary will be saved into `dictBuffer`.
     197   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     198   *          or an error code, which can be tested with ZDICT_isError().
     199   *  Note:  Dictionary training will fail if there are not enough samples to construct a
     200   *         dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
     201   *         If dictionary training fails, you should use zstd without a dictionary, as the dictionary
     202   *         would've been ineffective anyways. If you believe your samples would benefit from a dictionary
     203   *         please open an issue with details, and we can look into it.
     204   *  Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
     205   *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
     206   *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
     207   *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
     208   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
     209   */
     210  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
     211                                      const void* samplesBuffer,
     212                                      const size_t* samplesSizes, unsigned nbSamples);
     213  
     214  typedef struct {
     215      int      compressionLevel;   /**< optimize for a specific zstd compression level; 0 means default */
     216      unsigned notificationLevel;  /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
     217      unsigned dictID;             /**< force dictID value; 0 means auto mode (32-bits random value)
     218                                    *   NOTE: The zstd format reserves some dictionary IDs for future use.
     219                                    *         You may use them in private settings, but be warned that they
     220                                    *         may be used by zstd in a public dictionary registry in the future.
     221                                    *         These dictionary IDs are:
     222                                    *           - low range  : <= 32767
     223                                    *           - high range : >= (2^31)
     224                                    */
     225  } ZDICT_params_t;
     226  
     227  /*! ZDICT_finalizeDictionary():
     228   * Given a custom content as a basis for dictionary, and a set of samples,
     229   * finalize dictionary by adding headers and statistics according to the zstd
     230   * dictionary format.
     231   *
     232   * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
     233   * supplied with an array of sizes `samplesSizes`, providing the size of each
     234   * sample in order. The samples are used to construct the statistics, so they
     235   * should be representative of what you will compress with this dictionary.
     236   *
     237   * The compression level can be set in `parameters`. You should pass the
     238   * compression level you expect to use in production. The statistics for each
     239   * compression level differ, so tuning the dictionary for the compression level
     240   * can help quite a bit.
     241   *
     242   * You can set an explicit dictionary ID in `parameters`, or allow us to pick
     243   * a random dictionary ID for you, but we can't guarantee no collisions.
     244   *
     245   * The dstDictBuffer and the dictContent may overlap, and the content will be
     246   * appended to the end of the header. If the header + the content doesn't fit in
     247   * maxDictSize the beginning of the content is truncated to make room, since it
     248   * is presumed that the most profitable content is at the end of the dictionary,
     249   * since that is the cheapest to reference.
     250   *
     251   * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
     252   *
     253   * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
     254   *          or an error code, which can be tested by ZDICT_isError().
     255   * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
     256   *       instructed to, using notificationLevel>0.
     257   * NOTE: This function currently may fail in several edge cases including:
     258   *         * Not enough samples
     259   *         * Samples are uncompressible
     260   *         * Samples are all exactly the same
     261   */
     262  ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
     263                                  const void* dictContent, size_t dictContentSize,
     264                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
     265                                  ZDICT_params_t parameters);
     266  
     267  
     268  /*======   Helper functions   ======*/
     269  ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize);  /**< extracts dictID; @return zero if error (not a valid dictionary) */
     270  ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize);  /* returns dict header size; returns a ZSTD error code on failure */
     271  ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
     272  ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
     273  
     274  #endif   /* ZSTD_ZDICT_H */
     275  
     276  #if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
     277  #define ZSTD_ZDICT_H_STATIC
     278  
     279  /* This can be overridden externally to hide static symbols. */
     280  #ifndef ZDICTLIB_STATIC_API
     281  #  if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
     282  #    define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
     283  #  elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
     284  #    define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
     285  #  else
     286  #    define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
     287  #  endif
     288  #endif
     289  
     290  /* ====================================================================================
     291   * The definitions in this section are considered experimental.
     292   * They should never be used with a dynamic library, as they may change in the future.
     293   * They are provided for advanced usages.
     294   * Use them only in association with static linking.
     295   * ==================================================================================== */
     296  
     297  #define ZDICT_DICTSIZE_MIN    256
     298  /* Deprecated: Remove in v1.6.0 */
     299  #define ZDICT_CONTENTSIZE_MIN 128
     300  
     301  /*! ZDICT_cover_params_t:
     302   *  k and d are the only required parameters.
     303   *  For others, value 0 means default.
     304   */
     305  typedef struct {
     306      unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
     307      unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
     308      unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
     309      unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
     310      double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
     311      unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
     312      unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
     313      ZDICT_params_t zParams;
     314  } ZDICT_cover_params_t;
     315  
     316  typedef struct {
     317      unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
     318      unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
     319      unsigned f;                  /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
     320      unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
     321      unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
     322      double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
     323      unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
     324      unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
     325      unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
     326  
     327      ZDICT_params_t zParams;
     328  } ZDICT_fastCover_params_t;
     329  
     330  /*! ZDICT_trainFromBuffer_cover():
     331   *  Train a dictionary from an array of samples using the COVER algorithm.
     332   *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
     333   *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
     334   *  The resulting dictionary will be saved into `dictBuffer`.
     335   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     336   *          or an error code, which can be tested with ZDICT_isError().
     337   *          See ZDICT_trainFromBuffer() for details on failure modes.
     338   *  Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
     339   *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
     340   *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
     341   *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
     342   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
     343   */
     344  ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
     345            void *dictBuffer, size_t dictBufferCapacity,
     346      const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
     347            ZDICT_cover_params_t parameters);
     348  
     349  /*! ZDICT_optimizeTrainFromBuffer_cover():
     350   * The same requirements as above hold for all the parameters except `parameters`.
     351   * This function tries many parameter combinations and picks the best parameters.
     352   * `*parameters` is filled with the best parameters found,
     353   * dictionary constructed with those parameters is stored in `dictBuffer`.
     354   *
     355   * All of the parameters d, k, steps are optional.
     356   * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
     357   * if steps is zero it defaults to its default value.
     358   * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
     359   *
     360   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     361   *          or an error code, which can be tested with ZDICT_isError().
     362   *          On success `*parameters` contains the parameters selected.
     363   *          See ZDICT_trainFromBuffer() for details on failure modes.
     364   * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
     365   */
     366  ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
     367            void* dictBuffer, size_t dictBufferCapacity,
     368      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
     369            ZDICT_cover_params_t* parameters);
     370  
     371  /*! ZDICT_trainFromBuffer_fastCover():
     372   *  Train a dictionary from an array of samples using a modified version of COVER algorithm.
     373   *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
     374   *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
     375   *  d and k are required.
     376   *  All other parameters are optional, will use default values if not provided
     377   *  The resulting dictionary will be saved into `dictBuffer`.
     378   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     379   *          or an error code, which can be tested with ZDICT_isError().
     380   *          See ZDICT_trainFromBuffer() for details on failure modes.
     381   *  Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
     382   *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
     383   *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
     384   *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
     385   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
     386   */
     387  ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
     388                      size_t dictBufferCapacity, const void *samplesBuffer,
     389                      const size_t *samplesSizes, unsigned nbSamples,
     390                      ZDICT_fastCover_params_t parameters);
     391  
     392  /*! ZDICT_optimizeTrainFromBuffer_fastCover():
     393   * The same requirements as above hold for all the parameters except `parameters`.
     394   * This function tries many parameter combinations (specifically, k and d combinations)
     395   * and picks the best parameters. `*parameters` is filled with the best parameters found,
     396   * dictionary constructed with those parameters is stored in `dictBuffer`.
     397   * All of the parameters d, k, steps, f, and accel are optional.
     398   * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
     399   * if steps is zero it defaults to its default value.
     400   * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
     401   * If f is zero, default value of 20 is used.
     402   * If accel is zero, default value of 1 is used.
     403   *
     404   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     405   *          or an error code, which can be tested with ZDICT_isError().
     406   *          On success `*parameters` contains the parameters selected.
     407   *          See ZDICT_trainFromBuffer() for details on failure modes.
     408   * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
     409   */
     410  ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
     411                      size_t dictBufferCapacity, const void* samplesBuffer,
     412                      const size_t* samplesSizes, unsigned nbSamples,
     413                      ZDICT_fastCover_params_t* parameters);
     414  
     415  typedef struct {
     416      unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
     417      ZDICT_params_t zParams;
     418  } ZDICT_legacy_params_t;
     419  
     420  /*! ZDICT_trainFromBuffer_legacy():
     421   *  Train a dictionary from an array of samples.
     422   *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
     423   *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
     424   *  The resulting dictionary will be saved into `dictBuffer`.
     425   * `parameters` is optional and can be provided with values set to 0 to mean "default".
     426   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
     427   *          or an error code, which can be tested with ZDICT_isError().
     428   *          See ZDICT_trainFromBuffer() for details on failure modes.
     429   *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
     430   *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
     431   *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
     432   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
     433   *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
     434   */
     435  ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
     436      void* dictBuffer, size_t dictBufferCapacity,
     437      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
     438      ZDICT_legacy_params_t parameters);
     439  
     440  
     441  /* Deprecation warnings */
     442  /* It is generally possible to disable deprecation warnings from compiler,
     443     for example with -Wno-deprecated-declarations for gcc
     444     or _CRT_SECURE_NO_WARNINGS in Visual.
     445     Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
     446  #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
     447  #  define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
     448  #else
     449  #  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
     450  #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
     451  #    define ZDICT_DEPRECATED(message) [[deprecated(message)]]
     452  #  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
     453  #    define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
     454  #  elif (ZDICT_GCC_VERSION >= 301)
     455  #    define ZDICT_DEPRECATED(message) __attribute__((deprecated))
     456  #  elif defined(_MSC_VER)
     457  #    define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
     458  #  else
     459  #    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
     460  #    define ZDICT_DEPRECATED(message)
     461  #  endif
     462  #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
     463  
     464  ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
     465  ZDICTLIB_STATIC_API
     466  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
     467                                    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
     468  
     469  
     470  #endif   /* ZSTD_ZDICT_H_STATIC */
     471  
     472  #if defined (__cplusplus)
     473  }
     474  #endif