(root)/
Python-3.12.0/
Modules/
_xxtestfuzz/
fuzzer.c
       1  /* A fuzz test for CPython.
       2  
       3    The only exposed function is LLVMFuzzerTestOneInput, which is called by
       4    fuzzers and by the _fuzz module for smoke tests.
       5  
       6    To build exactly one fuzz test, as when running in oss-fuzz etc.,
       7    build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
       8    LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
       9        -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
      10  
      11    See the source code for LLVMFuzzerTestOneInput for details. */
      12  
      13  #include <Python.h>
      14  #include <stdlib.h>
      15  #include <inttypes.h>
      16  
      17  /*  Fuzz PyFloat_FromString as a proxy for float(str). */
      18  static int fuzz_builtin_float(const char* data, size_t size) {
      19      PyObject* s = PyBytes_FromStringAndSize(data, size);
      20      if (s == NULL) return 0;
      21      PyObject* f = PyFloat_FromString(s);
      22      if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
      23          PyErr_Clear();
      24      }
      25  
      26      Py_XDECREF(f);
      27      Py_DECREF(s);
      28      return 0;
      29  }
      30  
      31  #define MAX_INT_TEST_SIZE 0x10000
      32  
      33  /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
      34  static int fuzz_builtin_int(const char* data, size_t size) {
      35      /* Ignore test cases with very long ints to avoid timeouts
      36         int("9" * 1000000) is not a very interesting test caase */
      37      if (size > MAX_INT_TEST_SIZE) {
      38          return 0;
      39      }
      40      /* Pick a random valid base. (When the fuzzed function takes extra
      41         parameters, it's somewhat normal to hash the input to generate those
      42         parameters. We want to exercise all code paths, so we do so here.) */
      43      int base = _Py_HashBytes(data, size) % 37;
      44      if (base == 1) {
      45          // 1 is the only number between 0 and 36 that is not a valid base.
      46          base = 0;
      47      }
      48      if (base == -1) {
      49          return 0;  // An error occurred, bail early.
      50      }
      51      if (base < 0) {
      52          base = -base;
      53      }
      54  
      55      PyObject* s = PyUnicode_FromStringAndSize(data, size);
      56      if (s == NULL) {
      57          if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
      58              PyErr_Clear();
      59          }
      60          return 0;
      61      }
      62      PyObject* l = PyLong_FromUnicodeObject(s, base);
      63      if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
      64          PyErr_Clear();
      65      }
      66      PyErr_Clear();
      67      Py_XDECREF(l);
      68      Py_DECREF(s);
      69      return 0;
      70  }
      71  
      72  /* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
      73  static int fuzz_builtin_unicode(const char* data, size_t size) {
      74      PyObject* s = PyUnicode_FromStringAndSize(data, size);
      75      if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
      76          PyErr_Clear();
      77      }
      78      Py_XDECREF(s);
      79      return 0;
      80  }
      81  
      82  
      83  PyObject* struct_unpack_method = NULL;
      84  PyObject* struct_error = NULL;
      85  /* Called by LLVMFuzzerTestOneInput for initialization */
      86  static int init_struct_unpack(void) {
      87      /* Import struct.unpack */
      88      PyObject* struct_module = PyImport_ImportModule("struct");
      89      if (struct_module == NULL) {
      90          return 0;
      91      }
      92      struct_error = PyObject_GetAttrString(struct_module, "error");
      93      if (struct_error == NULL) {
      94          return 0;
      95      }
      96      struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
      97      return struct_unpack_method != NULL;
      98  }
      99  /* Fuzz struct.unpack(x, y) */
     100  static int fuzz_struct_unpack(const char* data, size_t size) {
     101      /* Everything up to the first null byte is considered the
     102         format. Everything after is the buffer */
     103      const char* first_null = memchr(data, '\0', size);
     104      if (first_null == NULL) {
     105          return 0;
     106      }
     107  
     108      size_t format_length = first_null - data;
     109      size_t buffer_length = size - format_length - 1;
     110  
     111      PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
     112      if (pattern == NULL) {
     113          return 0;
     114      }
     115      PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
     116      if (buffer == NULL) {
     117          Py_DECREF(pattern);
     118          return 0;
     119      }
     120  
     121      PyObject* unpacked = PyObject_CallFunctionObjArgs(
     122          struct_unpack_method, pattern, buffer, NULL);
     123      /* Ignore any overflow errors, these are easily triggered accidentally */
     124      if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
     125          PyErr_Clear();
     126      }
     127      /* The pascal format string will throw a negative size when passing 0
     128         like: struct.unpack('0p', b'') */
     129      if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
     130          PyErr_Clear();
     131      }
     132      /* Ignore any struct.error exceptions, these can be caused by invalid
     133         formats or incomplete buffers both of which are common. */
     134      if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
     135          PyErr_Clear();
     136      }
     137  
     138      Py_XDECREF(unpacked);
     139      Py_DECREF(pattern);
     140      Py_DECREF(buffer);
     141      return 0;
     142  }
     143  
     144  
     145  #define MAX_JSON_TEST_SIZE 0x100000
     146  
     147  PyObject* json_loads_method = NULL;
     148  /* Called by LLVMFuzzerTestOneInput for initialization */
     149  static int init_json_loads(void) {
     150      /* Import json.loads */
     151      PyObject* json_module = PyImport_ImportModule("json");
     152      if (json_module == NULL) {
     153          return 0;
     154      }
     155      json_loads_method = PyObject_GetAttrString(json_module, "loads");
     156      return json_loads_method != NULL;
     157  }
     158  /* Fuzz json.loads(x) */
     159  static int fuzz_json_loads(const char* data, size_t size) {
     160      /* Since python supports arbitrarily large ints in JSON,
     161         long inputs can lead to timeouts on boring inputs like
     162         `json.loads("9" * 100000)` */
     163      if (size > MAX_JSON_TEST_SIZE) {
     164          return 0;
     165      }
     166      PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
     167      if (input_bytes == NULL) {
     168          return 0;
     169      }
     170      PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes);
     171      if (parsed == NULL) {
     172          /* Ignore ValueError as the fuzzer will more than likely
     173             generate some invalid json and values */
     174          if (PyErr_ExceptionMatches(PyExc_ValueError) ||
     175          /* Ignore RecursionError as the fuzzer generates long sequences of
     176             arrays such as `[[[...` */
     177              PyErr_ExceptionMatches(PyExc_RecursionError) ||
     178          /* Ignore unicode errors, invalid byte sequences are common */
     179              PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
     180          ) {
     181              PyErr_Clear();
     182          }
     183      }
     184      Py_DECREF(input_bytes);
     185      Py_XDECREF(parsed);
     186      return 0;
     187  }
     188  
     189  #define MAX_RE_TEST_SIZE 0x10000
     190  
     191  PyObject* sre_compile_method = NULL;
     192  PyObject* sre_error_exception = NULL;
     193  int SRE_FLAG_DEBUG = 0;
     194  /* Called by LLVMFuzzerTestOneInput for initialization */
     195  static int init_sre_compile(void) {
     196      /* Import sre_compile.compile and sre.error */
     197      PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
     198      if (sre_compile_module == NULL) {
     199          return 0;
     200      }
     201      sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
     202      if (sre_compile_method == NULL) {
     203          return 0;
     204      }
     205  
     206      PyObject* sre_constants = PyImport_ImportModule("sre_constants");
     207      if (sre_constants == NULL) {
     208          return 0;
     209      }
     210      sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
     211      if (sre_error_exception == NULL) {
     212          return 0;
     213      }
     214      PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
     215      if (debug_flag == NULL) {
     216          return 0;
     217      }
     218      SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
     219      return 1;
     220  }
     221  /* Fuzz _sre.compile(x) */
     222  static int fuzz_sre_compile(const char* data, size_t size) {
     223      /* Ignore really long regex patterns that will timeout the fuzzer */
     224      if (size > MAX_RE_TEST_SIZE) {
     225          return 0;
     226      }
     227      /* We treat the first 2 bytes of the input as a number for the flags */
     228      if (size < 2) {
     229          return 0;
     230      }
     231      uint16_t flags = ((uint16_t*) data)[0];
     232      /* We remove the SRE_FLAG_DEBUG if present. This is because it
     233         prints to stdout which greatly decreases fuzzing speed */
     234      flags &= ~SRE_FLAG_DEBUG;
     235  
     236      /* Pull the pattern from the remaining bytes */
     237      PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
     238      if (pattern_bytes == NULL) {
     239          return 0;
     240      }
     241      PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
     242      if (flags_obj == NULL) {
     243          Py_DECREF(pattern_bytes);
     244          return 0;
     245      }
     246  
     247      /* compiled = _sre.compile(data[2:], data[0:2] */
     248      PyObject* compiled = PyObject_CallFunctionObjArgs(
     249          sre_compile_method, pattern_bytes, flags_obj, NULL);
     250      /* Ignore ValueError as the fuzzer will more than likely
     251         generate some invalid combination of flags */
     252      if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
     253          PyErr_Clear();
     254      }
     255      /* Ignore some common errors thrown by sre_parse:
     256         Overflow, Assertion, Recursion and Index */
     257      if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
     258                               PyErr_ExceptionMatches(PyExc_AssertionError) ||
     259                               PyErr_ExceptionMatches(PyExc_RecursionError) ||
     260                               PyErr_ExceptionMatches(PyExc_IndexError))
     261      ) {
     262          PyErr_Clear();
     263      }
     264      /* Ignore re.error */
     265      if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
     266          PyErr_Clear();
     267      }
     268  
     269      Py_DECREF(pattern_bytes);
     270      Py_DECREF(flags_obj);
     271      Py_XDECREF(compiled);
     272      return 0;
     273  }
     274  
     275  /* Some random patterns used to test re.match.
     276     Be careful not to add catostraphically slow regexes here, we want to
     277     exercise the matching code without causing timeouts.*/
     278  static const char* regex_patterns[] = {
     279      ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
     280      "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
     281      "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
     282      "(?:a*)*", "a{1,2}?"
     283  };
     284  const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
     285  PyObject** compiled_patterns = NULL;
     286  /* Called by LLVMFuzzerTestOneInput for initialization */
     287  static int init_sre_match(void) {
     288      PyObject* re_module = PyImport_ImportModule("re");
     289      if (re_module == NULL) {
     290          return 0;
     291      }
     292      compiled_patterns = (PyObject**) PyMem_RawMalloc(
     293          sizeof(PyObject*) * NUM_PATTERNS);
     294      if (compiled_patterns == NULL) {
     295          PyErr_NoMemory();
     296          return 0;
     297      }
     298  
     299      /* Precompile all the regex patterns on the first run for faster fuzzing */
     300      for (size_t i = 0; i < NUM_PATTERNS; i++) {
     301          PyObject* compiled = PyObject_CallMethod(
     302              re_module, "compile", "y", regex_patterns[i]);
     303          /* Bail if any of the patterns fail to compile */
     304          if (compiled == NULL) {
     305              return 0;
     306          }
     307          compiled_patterns[i] = compiled;
     308      }
     309      return 1;
     310  }
     311  /* Fuzz re.match(x) */
     312  static int fuzz_sre_match(const char* data, size_t size) {
     313      if (size < 1 || size > MAX_RE_TEST_SIZE) {
     314          return 0;
     315      }
     316      /* Use the first byte as a uint8_t specifying the index of the
     317         regex to use */
     318      unsigned char idx = (unsigned char) data[0];
     319      idx = idx % NUM_PATTERNS;
     320  
     321      /* Pull the string to match from the remaining bytes */
     322      PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
     323      if (to_match == NULL) {
     324          return 0;
     325      }
     326  
     327      PyObject* pattern = compiled_patterns[idx];
     328      PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
     329  
     330      PyObject* matches = PyObject_CallOneArg(match_callable, to_match);
     331  
     332      Py_XDECREF(matches);
     333      Py_DECREF(match_callable);
     334      Py_DECREF(to_match);
     335      return 0;
     336  }
     337  
     338  #define MAX_CSV_TEST_SIZE 0x100000
     339  PyObject* csv_module = NULL;
     340  PyObject* csv_error = NULL;
     341  /* Called by LLVMFuzzerTestOneInput for initialization */
     342  static int init_csv_reader(void) {
     343      /* Import csv and csv.Error */
     344      csv_module = PyImport_ImportModule("csv");
     345      if (csv_module == NULL) {
     346          return 0;
     347      }
     348      csv_error = PyObject_GetAttrString(csv_module, "Error");
     349      return csv_error != NULL;
     350  }
     351  /* Fuzz csv.reader([x]) */
     352  static int fuzz_csv_reader(const char* data, size_t size) {
     353      if (size < 1 || size > MAX_CSV_TEST_SIZE) {
     354          return 0;
     355      }
     356      /* Ignore non null-terminated strings since _csv can't handle
     357         embedded nulls */
     358      if (memchr(data, '\0', size) == NULL) {
     359          return 0;
     360      }
     361  
     362      PyObject* s = PyUnicode_FromString(data);
     363      /* Ignore exceptions until we have a valid string */
     364      if (s == NULL) {
     365          PyErr_Clear();
     366          return 0;
     367      }
     368  
     369      /* Split on \n so we can test multiple lines */
     370      PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
     371      if (lines == NULL) {
     372          Py_DECREF(s);
     373          return 0;
     374      }
     375  
     376      PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
     377      if (reader) {
     378          /* Consume all of the reader as an iterator */
     379          PyObject* parsed_line;
     380          while ((parsed_line = PyIter_Next(reader))) {
     381              Py_DECREF(parsed_line);
     382          }
     383      }
     384  
     385      /* Ignore csv.Error because we're probably going to generate
     386         some bad files (embedded new-lines, unterminated quotes etc) */
     387      if (PyErr_ExceptionMatches(csv_error)) {
     388          PyErr_Clear();
     389      }
     390  
     391      Py_XDECREF(reader);
     392      Py_DECREF(s);
     393      return 0;
     394  }
     395  
     396  #define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x100000
     397  PyObject* ast_literal_eval_method = NULL;
     398  /* Called by LLVMFuzzerTestOneInput for initialization */
     399  static int init_ast_literal_eval(void) {
     400      PyObject* ast_module = PyImport_ImportModule("ast");
     401      if (ast_module == NULL) {
     402          return 0;
     403      }
     404      ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval");
     405      return ast_literal_eval_method != NULL;
     406  }
     407  /* Fuzz ast.literal_eval(x) */
     408  static int fuzz_ast_literal_eval(const char* data, size_t size) {
     409      if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) {
     410          return 0;
     411      }
     412      /* Ignore non null-terminated strings since ast can't handle
     413         embedded nulls */
     414      if (memchr(data, '\0', size) == NULL) {
     415          return 0;
     416      }
     417  
     418      PyObject* s = PyUnicode_FromString(data);
     419      /* Ignore exceptions until we have a valid string */
     420      if (s == NULL) {
     421          PyErr_Clear();
     422          return 0;
     423      }
     424  
     425      PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s);
     426      /* Ignore some common errors thrown by ast.literal_eval */
     427      if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) ||
     428                              PyErr_ExceptionMatches(PyExc_TypeError) ||
     429                              PyErr_ExceptionMatches(PyExc_SyntaxError) ||
     430                              PyErr_ExceptionMatches(PyExc_MemoryError) ||
     431                              PyErr_ExceptionMatches(PyExc_RecursionError))
     432      ) {
     433          PyErr_Clear();
     434      }
     435  
     436      Py_XDECREF(literal);
     437      Py_DECREF(s);
     438      return 0;
     439  }
     440  
     441  /* Run fuzzer and abort on failure. */
     442  static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     443      int rv = fuzzer((const char*) data, size);
     444      if (PyErr_Occurred()) {
     445          /* Fuzz tests should handle expected errors for themselves.
     446             This is last-ditch check in case they didn't. */
     447          PyErr_Print();
     448          abort();
     449      }
     450      /* Someday the return value might mean something, propagate it. */
     451      return rv;
     452  }
     453  
     454  /* CPython generates a lot of leak warnings for whatever reason. */
     455  int __lsan_is_turned_off(void) { return 1; }
     456  
     457  
     458  int LLVMFuzzerInitialize(int *argc, char ***argv) {
     459      PyConfig config;
     460      PyConfig_InitPythonConfig(&config);
     461      config.install_signal_handlers = 0;
     462      /* Raise the limit above the default allows exercising larger things
     463       * now that we fall back to the _pylong module for large values. */
     464      config.int_max_str_digits = 8086;
     465      PyStatus status;
     466      status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]);
     467      if (PyStatus_Exception(status)) {
     468          goto fail;
     469      }
     470  
     471      status = Py_InitializeFromConfig(&config);
     472      if (PyStatus_Exception(status)) {
     473          goto fail;
     474      }
     475      PyConfig_Clear(&config);
     476  
     477      return 0;
     478  
     479  fail:
     480      PyConfig_Clear(&config);
     481      Py_ExitStatusException(status);
     482  }
     483  
     484  /* Fuzz test interface.
     485     This returns the bitwise or of all fuzz test's return values.
     486  
     487     All fuzz tests must return 0, as all nonzero return codes are reserved for
     488     future use -- we propagate the return values for that future case.
     489     (And we bitwise or when running multiple tests to verify that normally we
     490     only return 0.) */
     491  int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     492      assert(Py_IsInitialized());
     493  
     494      int rv = 0;
     495  
     496  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
     497      rv |= _run_fuzz(data, size, fuzz_builtin_float);
     498  #endif
     499  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
     500      rv |= _run_fuzz(data, size, fuzz_builtin_int);
     501  #endif
     502  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
     503      rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
     504  #endif
     505  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
     506      static int STRUCT_UNPACK_INITIALIZED = 0;
     507      if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
     508          PyErr_Print();
     509          abort();
     510      } else {
     511          STRUCT_UNPACK_INITIALIZED = 1;
     512      }
     513      rv |= _run_fuzz(data, size, fuzz_struct_unpack);
     514  #endif
     515  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
     516      static int JSON_LOADS_INITIALIZED = 0;
     517      if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
     518          PyErr_Print();
     519          abort();
     520      } else {
     521          JSON_LOADS_INITIALIZED = 1;
     522      }
     523  
     524      rv |= _run_fuzz(data, size, fuzz_json_loads);
     525  #endif
     526  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
     527      static int SRE_COMPILE_INITIALIZED = 0;
     528      if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
     529          if (!PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
     530              PyErr_Print();
     531              abort();
     532          }
     533          else {
     534              PyErr_Clear();
     535          }
     536      } else {
     537          SRE_COMPILE_INITIALIZED = 1;
     538      }
     539  
     540      if (SRE_COMPILE_INITIALIZED) {
     541          rv |= _run_fuzz(data, size, fuzz_sre_compile);
     542      }
     543  #endif
     544  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
     545      static int SRE_MATCH_INITIALIZED = 0;
     546      if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
     547          PyErr_Print();
     548          abort();
     549      } else {
     550          SRE_MATCH_INITIALIZED = 1;
     551      }
     552  
     553      rv |= _run_fuzz(data, size, fuzz_sre_match);
     554  #endif
     555  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
     556      static int CSV_READER_INITIALIZED = 0;
     557      if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
     558          PyErr_Print();
     559          abort();
     560      } else {
     561          CSV_READER_INITIALIZED = 1;
     562      }
     563  
     564      rv |= _run_fuzz(data, size, fuzz_csv_reader);
     565  #endif
     566  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval)
     567      static int AST_LITERAL_EVAL_INITIALIZED = 0;
     568      if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) {
     569          PyErr_Print();
     570          abort();
     571      } else {
     572          AST_LITERAL_EVAL_INITIALIZED = 1;
     573      }
     574  
     575      rv |= _run_fuzz(data, size, fuzz_ast_literal_eval);
     576  #endif
     577    return rv;
     578  }