(root)/
libxml2-2.12.3/
fuzz/
genSeed.c
       1  /*
       2   * xmlSeed.c: Generate the XML seed corpus for fuzzing.
       3   *
       4   * See Copyright for the status of this software.
       5   */
       6  
       7  #include <stdio.h>
       8  #include <string.h>
       9  #include <glob.h>
      10  #include <libgen.h>
      11  #include <sys/stat.h>
      12  
      13  #ifdef _WIN32
      14  #include <direct.h>
      15  #else
      16  #include <unistd.h>
      17  #endif
      18  
      19  #include <libxml/parser.h>
      20  #include <libxml/parserInternals.h>
      21  #include <libxml/HTMLparser.h>
      22  #include <libxml/xinclude.h>
      23  #include <libxml/xmlschemas.h>
      24  #include "fuzz.h"
      25  
      26  #define PATH_SIZE 500
      27  #define SEED_BUF_SIZE 16384
      28  #define EXPR_SIZE 4500
      29  
      30  typedef int
      31  (*fileFunc)(const char *base, FILE *out);
      32  
      33  typedef int
      34  (*mainFunc)(const char *arg);
      35  
      36  static struct {
      37      FILE *out;
      38      xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
      39      xmlExternalEntityLoader oldLoader;
      40      fileFunc processFile;
      41      const char *fuzzer;
      42      int counter;
      43      char cwd[PATH_SIZE];
      44  } globalData;
      45  
      46  #if defined(HAVE_SCHEMA_FUZZER) || \
      47      defined(HAVE_XML_FUZZER)
      48  /*
      49   * A custom entity loader that writes all external DTDs or entities to a
      50   * single file in the format expected by xmlFuzzEntityLoader.
      51   */
      52  static xmlParserInputPtr
      53  fuzzEntityRecorder(const char *URL, const char *ID,
      54                        xmlParserCtxtPtr ctxt) {
      55      xmlParserInputPtr in;
      56      static const int chunkSize = 16384;
      57      int len;
      58  
      59      in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
      60      if (in == NULL)
      61          return(NULL);
      62  
      63      if (globalData.entities == NULL) {
      64          globalData.entities = xmlHashCreate(4);
      65      } else if (xmlHashLookup(globalData.entities,
      66                               (const xmlChar *) URL) != NULL) {
      67          return(in);
      68      }
      69  
      70      do {
      71          len = xmlParserInputBufferGrow(in->buf, chunkSize);
      72          if (len < 0) {
      73              fprintf(stderr, "Error reading %s\n", URL);
      74              xmlFreeInputStream(in);
      75              return(NULL);
      76          }
      77      } while (len > 0);
      78  
      79      xmlFuzzWriteString(globalData.out, URL);
      80      xmlFuzzWriteString(globalData.out,
      81                         (char *) xmlBufContent(in->buf->buffer));
      82  
      83      xmlFreeInputStream(in);
      84  
      85      xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
      86                      globalData.entities);
      87  
      88      return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
      89  }
      90  
      91  static void
      92  fuzzRecorderInit(FILE *out) {
      93      globalData.out = out;
      94      globalData.entities = xmlHashCreate(8);
      95      globalData.oldLoader = xmlGetExternalEntityLoader();
      96      xmlSetExternalEntityLoader(fuzzEntityRecorder);
      97  }
      98  
      99  static void
     100  fuzzRecorderCleanup(void) {
     101      xmlSetExternalEntityLoader(globalData.oldLoader);
     102      xmlHashFree(globalData.entities, NULL);
     103      globalData.out = NULL;
     104      globalData.entities = NULL;
     105      globalData.oldLoader = NULL;
     106  }
     107  #endif
     108  
     109  #ifdef HAVE_XML_FUZZER
     110  static int
     111  processXml(const char *docFile, FILE *out) {
     112      int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
     113      xmlDocPtr doc;
     114  
     115      /* Parser options. */
     116      xmlFuzzWriteInt(out, opts, 4);
     117      /* Max allocations. */
     118      xmlFuzzWriteInt(out, 0, 4);
     119  
     120      fuzzRecorderInit(out);
     121  
     122      doc = xmlReadFile(docFile, NULL, opts);
     123  #ifdef LIBXML_XINCLUDE_ENABLED
     124      xmlXIncludeProcessFlags(doc, opts);
     125  #endif
     126      xmlFreeDoc(doc);
     127  
     128      fuzzRecorderCleanup();
     129  
     130      return(0);
     131  }
     132  #endif
     133  
     134  #ifdef HAVE_HTML_FUZZER
     135  static int
     136  processHtml(const char *docFile, FILE *out) {
     137      char buf[SEED_BUF_SIZE];
     138      FILE *file;
     139      size_t size;
     140  
     141      /* Parser options. */
     142      xmlFuzzWriteInt(out, 0, 4);
     143      /* Max allocations. */
     144      xmlFuzzWriteInt(out, 0, 4);
     145  
     146      /* Copy file */
     147      file = fopen(docFile, "rb");
     148      if (file == NULL) {
     149          fprintf(stderr, "couldn't open %s\n", docFile);
     150          return(0);
     151      }
     152      do {
     153          size = fread(buf, 1, SEED_BUF_SIZE, file);
     154          if (size > 0)
     155              fwrite(buf, 1, size, out);
     156      } while (size == SEED_BUF_SIZE);
     157      fclose(file);
     158  
     159      return(0);
     160  }
     161  #endif
     162  
     163  #ifdef HAVE_SCHEMA_FUZZER
     164  static int
     165  processSchema(const char *docFile, FILE *out) {
     166      xmlSchemaPtr schema;
     167      xmlSchemaParserCtxtPtr pctxt;
     168  
     169      /* Max allocations. */
     170      xmlFuzzWriteInt(out, 0, 4);
     171  
     172      fuzzRecorderInit(out);
     173  
     174      pctxt = xmlSchemaNewParserCtxt(docFile);
     175      xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
     176      schema = xmlSchemaParse(pctxt);
     177      xmlSchemaFreeParserCtxt(pctxt);
     178      xmlSchemaFree(schema);
     179  
     180      fuzzRecorderCleanup();
     181  
     182      return(0);
     183  }
     184  #endif
     185  
     186  #if defined(HAVE_HTML_FUZZER) || \
     187      defined(HAVE_SCHEMA_FUZZER) || \
     188      defined(HAVE_XML_FUZZER)
     189  static int
     190  processPattern(const char *pattern) {
     191      glob_t globbuf;
     192      int ret = 0;
     193      int res;
     194      size_t i;
     195  
     196      res = glob(pattern, 0, NULL, &globbuf);
     197      if (res == GLOB_NOMATCH)
     198          return(0);
     199      if (res != 0) {
     200          fprintf(stderr, "couldn't match pattern %s\n", pattern);
     201          return(-1);
     202      }
     203  
     204      for (i = 0; i < globbuf.gl_pathc; i++) {
     205          struct stat statbuf;
     206          char outPath[PATH_SIZE];
     207          char *dirBuf = NULL;
     208          char *baseBuf = NULL;
     209          const char *path, *dir, *base;
     210          FILE *out = NULL;
     211          int dirChanged = 0;
     212          size_t size;
     213  
     214          path = globbuf.gl_pathv[i];
     215  
     216          if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
     217              continue;
     218  
     219          dirBuf = (char *) xmlCharStrdup(path);
     220          baseBuf = (char *) xmlCharStrdup(path);
     221          if ((dirBuf == NULL) || (baseBuf == NULL)) {
     222              fprintf(stderr, "memory allocation failed\n");
     223              ret = -1;
     224              goto error;
     225          }
     226          dir = dirname(dirBuf);
     227          base = basename(baseBuf);
     228  
     229          size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
     230                          globalData.fuzzer, base);
     231          if (size >= PATH_SIZE) {
     232              fprintf(stderr, "creating path failed\n");
     233              ret = -1;
     234              goto error;
     235          }
     236          out = fopen(outPath, "wb");
     237          if (out == NULL) {
     238              fprintf(stderr, "couldn't open %s for writing\n", outPath);
     239              ret = -1;
     240              goto error;
     241          }
     242          if (chdir(dir) != 0) {
     243              fprintf(stderr, "couldn't chdir to %s\n", dir);
     244              ret = -1;
     245              goto error;
     246          }
     247          dirChanged = 1;
     248          if (globalData.processFile(base, out) != 0)
     249              ret = -1;
     250  
     251  error:
     252          if (out != NULL)
     253              fclose(out);
     254          xmlFree(dirBuf);
     255          xmlFree(baseBuf);
     256          if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
     257              fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
     258              ret = -1;
     259              break;
     260          }
     261      }
     262  
     263      globfree(&globbuf);
     264      return(ret);
     265  }
     266  #endif
     267  
     268  #ifdef HAVE_XPATH_FUZZER
     269  static int
     270  processXPath(const char *testDir, const char *prefix, const char *name,
     271               const char *data, const char *subdir, int xptr) {
     272      char pattern[PATH_SIZE];
     273      glob_t globbuf;
     274      size_t i, size;
     275      int ret = 0, res;
     276  
     277      size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
     278                      testDir, subdir, prefix);
     279      if (size >= PATH_SIZE)
     280          return(-1);
     281      res = glob(pattern, 0, NULL, &globbuf);
     282      if (res == GLOB_NOMATCH)
     283          return(0);
     284      if (res != 0) {
     285          fprintf(stderr, "couldn't match pattern %s\n", pattern);
     286          return(-1);
     287      }
     288  
     289      for (i = 0; i < globbuf.gl_pathc; i++) {
     290          char *path = globbuf.gl_pathv[i];
     291          struct stat statbuf;
     292          FILE *in;
     293          char expr[EXPR_SIZE];
     294  
     295          if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
     296              continue;
     297  
     298          in = fopen(path, "rb");
     299          if (in == NULL) {
     300              ret = -1;
     301              continue;
     302          }
     303  
     304          while (fgets(expr, EXPR_SIZE, in) != NULL) {
     305              char outPath[PATH_SIZE];
     306              FILE *out;
     307              int j;
     308  
     309              for (j = 0; expr[j] != 0; j++)
     310                  if (expr[j] == '\r' || expr[j] == '\n')
     311                      break;
     312              expr[j] = 0;
     313  
     314              size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
     315                              name, globalData.counter);
     316              if (size >= PATH_SIZE) {
     317                  ret = -1;
     318                  continue;
     319              }
     320              out = fopen(outPath, "wb");
     321              if (out == NULL) {
     322                  ret = -1;
     323                  continue;
     324              }
     325  
     326              /* Max allocations. */
     327              xmlFuzzWriteInt(out, 0, 4);
     328  
     329              if (xptr) {
     330                  xmlFuzzWriteString(out, expr);
     331              } else {
     332                  char xptrExpr[EXPR_SIZE+100];
     333  
     334                  /* Wrap XPath expressions as XPointer */
     335                  snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
     336                  xmlFuzzWriteString(out, xptrExpr);
     337              }
     338  
     339              xmlFuzzWriteString(out, data);
     340  
     341              fclose(out);
     342              globalData.counter++;
     343          }
     344  
     345          fclose(in);
     346      }
     347  
     348      globfree(&globbuf);
     349  
     350      return(ret);
     351  }
     352  
     353  static int
     354  processXPathDir(const char *testDir) {
     355      char pattern[PATH_SIZE];
     356      glob_t globbuf;
     357      size_t i, size;
     358      int ret = 0;
     359  
     360      globalData.counter = 1;
     361      if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
     362          ret = -1;
     363  
     364      size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
     365      if (size >= PATH_SIZE)
     366          return(1);
     367      if (glob(pattern, 0, NULL, &globbuf) != 0)
     368          return(1);
     369  
     370      for (i = 0; i < globbuf.gl_pathc; i++) {
     371          char *path = globbuf.gl_pathv[i];
     372          char *data;
     373          const char *docFile;
     374  
     375          data = xmlSlurpFile(path, NULL);
     376          if (data == NULL) {
     377              ret = -1;
     378              continue;
     379          }
     380          docFile = basename(path);
     381  
     382          globalData.counter = 1;
     383          if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
     384              ret = -1;
     385          if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
     386              ret = -1;
     387          if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
     388              ret = -1;
     389  
     390          xmlFree(data);
     391      }
     392  
     393      globfree(&globbuf);
     394  
     395      return(ret);
     396  }
     397  #endif
     398  
     399  int
     400  main(int argc, const char **argv) {
     401      mainFunc processArg = NULL;
     402      const char *fuzzer;
     403      int ret = 0;
     404      int i;
     405  
     406      if (argc < 3) {
     407          fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
     408          return(1);
     409      }
     410  
     411      xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
     412  
     413      fuzzer = argv[1];
     414      if (strcmp(fuzzer, "html") == 0) {
     415  #ifdef HAVE_HTML_FUZZER
     416          processArg = processPattern;
     417          globalData.processFile = processHtml;
     418  #endif
     419      } else if (strcmp(fuzzer, "schema") == 0) {
     420  #ifdef HAVE_SCHEMA_FUZZER
     421          processArg = processPattern;
     422          globalData.processFile = processSchema;
     423  #endif
     424      } else if (strcmp(fuzzer, "valid") == 0) {
     425  #ifdef HAVE_VALID_FUZZER
     426          processArg = processPattern;
     427          globalData.processFile = processXml;
     428  #endif
     429      } else if (strcmp(fuzzer, "xinclude") == 0) {
     430  #ifdef HAVE_XINCLUDE_FUZZER
     431          processArg = processPattern;
     432          globalData.processFile = processXml;
     433  #endif
     434      } else if (strcmp(fuzzer, "xml") == 0) {
     435  #ifdef HAVE_XML_FUZZER
     436          processArg = processPattern;
     437          globalData.processFile = processXml;
     438  #endif
     439      } else if (strcmp(fuzzer, "xpath") == 0) {
     440  #ifdef HAVE_XPATH_FUZZER
     441          processArg = processXPathDir;
     442  #endif
     443      } else {
     444          fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
     445          return(1);
     446      }
     447      globalData.fuzzer = fuzzer;
     448  
     449      if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
     450          fprintf(stderr, "couldn't get current directory\n");
     451          return(1);
     452      }
     453  
     454      if (processArg != NULL)
     455          for (i = 2; i < argc; i++)
     456              processArg(argv[i]);
     457  
     458      return(ret);
     459  }
     460