(root)/
Python-3.11.7/
Modules/
_xxtestfuzz/
fuzzer.c
       1  /* A fuzz test for CPython.
       2  
       3    The only exposed function is LLVMFuzzerTestOneInput, which is called by
       4    fuzzers and by the _fuzz module for smoke tests.
       5  
       6    To build exactly one fuzz test, as when running in oss-fuzz etc.,
       7    build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
       8    LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
       9        -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
      10  
      11    See the source code for LLVMFuzzerTestOneInput for details. */
      12  
      13  #include <Python.h>
      14  #include <stdlib.h>
      15  #include <inttypes.h>
      16  
      17  /*  Fuzz PyFloat_FromString as a proxy for float(str). */
      18  static int fuzz_builtin_float(const char* data, size_t size) {
      19      PyObject* s = PyBytes_FromStringAndSize(data, size);
      20      if (s == NULL) return 0;
      21      PyObject* f = PyFloat_FromString(s);
      22      if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
      23          PyErr_Clear();
      24      }
      25  
      26      Py_XDECREF(f);
      27      Py_DECREF(s);
      28      return 0;
      29  }
      30  
      31  #define MAX_INT_TEST_SIZE 0x10000
      32  
      33  /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
      34  static int fuzz_builtin_int(const char* data, size_t size) {
      35      /* Ignore test cases with very long ints to avoid timeouts
      36         int("9" * 1000000) is not a very interesting test caase */
      37      if (size > MAX_INT_TEST_SIZE) {
      38          return 0;
      39      }
      40      /* Pick a random valid base. (When the fuzzed function takes extra
      41         parameters, it's somewhat normal to hash the input to generate those
      42         parameters. We want to exercise all code paths, so we do so here.) */
      43      int base = _Py_HashBytes(data, size) % 37;
      44      if (base == 1) {
      45          // 1 is the only number between 0 and 36 that is not a valid base.
      46          base = 0;
      47      }
      48      if (base == -1) {
      49          return 0;  // An error occurred, bail early.
      50      }
      51      if (base < 0) {
      52          base = -base;
      53      }
      54  
      55      PyObject* s = PyUnicode_FromStringAndSize(data, size);
      56      if (s == NULL) {
      57          if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
      58              PyErr_Clear();
      59          }
      60          return 0;
      61      }
      62      PyObject* l = PyLong_FromUnicodeObject(s, base);
      63      if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
      64          PyErr_Clear();
      65      }
      66      PyErr_Clear();
      67      Py_XDECREF(l);
      68      Py_DECREF(s);
      69      return 0;
      70  }
      71  
      72  /* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
      73  static int fuzz_builtin_unicode(const char* data, size_t size) {
      74      PyObject* s = PyUnicode_FromStringAndSize(data, size);
      75      if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
      76          PyErr_Clear();
      77      }
      78      Py_XDECREF(s);
      79      return 0;
      80  }
      81  
      82  
      83  PyObject* struct_unpack_method = NULL;
      84  PyObject* struct_error = NULL;
      85  /* Called by LLVMFuzzerTestOneInput for initialization */
      86  static int init_struct_unpack(void) {
      87      /* Import struct.unpack */
      88      PyObject* struct_module = PyImport_ImportModule("struct");
      89      if (struct_module == NULL) {
      90          return 0;
      91      }
      92      struct_error = PyObject_GetAttrString(struct_module, "error");
      93      if (struct_error == NULL) {
      94          return 0;
      95      }
      96      struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
      97      return struct_unpack_method != NULL;
      98  }
      99  /* Fuzz struct.unpack(x, y) */
     100  static int fuzz_struct_unpack(const char* data, size_t size) {
     101      /* Everything up to the first null byte is considered the
     102         format. Everything after is the buffer */
     103      const char* first_null = memchr(data, '\0', size);
     104      if (first_null == NULL) {
     105          return 0;
     106      }
     107  
     108      size_t format_length = first_null - data;
     109      size_t buffer_length = size - format_length - 1;
     110  
     111      PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
     112      if (pattern == NULL) {
     113          return 0;
     114      }
     115      PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
     116      if (buffer == NULL) {
     117          Py_DECREF(pattern);
     118          return 0;
     119      }
     120  
     121      PyObject* unpacked = PyObject_CallFunctionObjArgs(
     122          struct_unpack_method, pattern, buffer, NULL);
     123      /* Ignore any overflow errors, these are easily triggered accidentally */
     124      if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
     125          PyErr_Clear();
     126      }
     127      /* The pascal format string will throw a negative size when passing 0
     128         like: struct.unpack('0p', b'') */
     129      if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
     130          PyErr_Clear();
     131      }
     132      /* Ignore any struct.error exceptions, these can be caused by invalid
     133         formats or incomplete buffers both of which are common. */
     134      if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
     135          PyErr_Clear();
     136      }
     137  
     138      Py_XDECREF(unpacked);
     139      Py_DECREF(pattern);
     140      Py_DECREF(buffer);
     141      return 0;
     142  }
     143  
     144  
     145  #define MAX_JSON_TEST_SIZE 0x10000
     146  
     147  PyObject* json_loads_method = NULL;
     148  /* Called by LLVMFuzzerTestOneInput for initialization */
     149  static int init_json_loads(void) {
     150      /* Import json.loads */
     151      PyObject* json_module = PyImport_ImportModule("json");
     152      if (json_module == NULL) {
     153          return 0;
     154      }
     155      json_loads_method = PyObject_GetAttrString(json_module, "loads");
     156      return json_loads_method != NULL;
     157  }
     158  /* Fuzz json.loads(x) */
     159  static int fuzz_json_loads(const char* data, size_t size) {
     160      /* Since python supports arbitrarily large ints in JSON,
     161         long inputs can lead to timeouts on boring inputs like
     162         `json.loads("9" * 100000)` */
     163      if (size > MAX_JSON_TEST_SIZE) {
     164          return 0;
     165      }
     166      PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
     167      if (input_bytes == NULL) {
     168          return 0;
     169      }
     170      PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes);
     171      if (parsed == NULL) {
     172          /* Ignore ValueError as the fuzzer will more than likely
     173             generate some invalid json and values */
     174          if (PyErr_ExceptionMatches(PyExc_ValueError) ||
     175          /* Ignore RecursionError as the fuzzer generates long sequences of
     176             arrays such as `[[[...` */
     177              PyErr_ExceptionMatches(PyExc_RecursionError) ||
     178          /* Ignore unicode errors, invalid byte sequences are common */
     179              PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
     180          ) {
     181              PyErr_Clear();
     182          }
     183      }
     184      Py_DECREF(input_bytes);
     185      Py_XDECREF(parsed);
     186      return 0;
     187  }
     188  
     189  #define MAX_RE_TEST_SIZE 0x10000
     190  
     191  PyObject* re_compile_method = NULL;
     192  PyObject* re_error_exception = NULL;
     193  int RE_FLAG_DEBUG = 0;
     194  /* Called by LLVMFuzzerTestOneInput for initialization */
     195  static int init_sre_compile(void) {
     196      /* Import sre_compile.compile and sre.error */
     197      PyObject* re_module = PyImport_ImportModule("re");
     198      if (re_module == NULL) {
     199          return 0;
     200      }
     201      re_compile_method = PyObject_GetAttrString(re_module, "compile");
     202      if (re_compile_method == NULL) {
     203          return 0;
     204      }
     205  
     206      re_error_exception = PyObject_GetAttrString(re_module, "error");
     207      if (re_error_exception == NULL) {
     208          return 0;
     209      }
     210      PyObject* debug_flag = PyObject_GetAttrString(re_module, "DEBUG");
     211      if (debug_flag == NULL) {
     212          return 0;
     213      }
     214      RE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
     215      return 1;
     216  }
     217  /* Fuzz re.compile(x) */
     218  static int fuzz_sre_compile(const char* data, size_t size) {
     219      /* Ignore really long regex patterns that will timeout the fuzzer */
     220      if (size > MAX_RE_TEST_SIZE) {
     221          return 0;
     222      }
     223      /* We treat the first 2 bytes of the input as a number for the flags */
     224      if (size < 2) {
     225          return 0;
     226      }
     227      uint16_t flags = ((uint16_t*) data)[0];
     228      /* We remove the SRE_FLAG_DEBUG if present. This is because it
     229         prints to stdout which greatly decreases fuzzing speed */
     230      flags &= ~RE_FLAG_DEBUG;
     231  
     232      /* Pull the pattern from the remaining bytes */
     233      PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
     234      if (pattern_bytes == NULL) {
     235          return 0;
     236      }
     237      PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
     238      if (flags_obj == NULL) {
     239          Py_DECREF(pattern_bytes);
     240          return 0;
     241      }
     242  
     243      /* compiled = re.compile(data[2:], data[0:2] */
     244      PyObject* compiled = PyObject_CallFunctionObjArgs(
     245          re_compile_method, pattern_bytes, flags_obj, NULL);
     246      /* Ignore ValueError as the fuzzer will more than likely
     247         generate some invalid combination of flags */
     248      if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
     249          PyErr_Clear();
     250      }
     251      /* Ignore some common errors thrown by sre_parse:
     252         Overflow, Assertion, Recursion and Index */
     253      if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
     254                               PyErr_ExceptionMatches(PyExc_AssertionError) ||
     255                               PyErr_ExceptionMatches(PyExc_RecursionError) ||
     256                               PyErr_ExceptionMatches(PyExc_IndexError))
     257      ) {
     258          PyErr_Clear();
     259      }
     260      /* Ignore re.error */
     261      if (compiled == NULL && PyErr_ExceptionMatches(re_error_exception)) {
     262          PyErr_Clear();
     263      }
     264  
     265      Py_DECREF(pattern_bytes);
     266      Py_DECREF(flags_obj);
     267      Py_XDECREF(compiled);
     268      return 0;
     269  }
     270  
     271  /* Some random patterns used to test re.match.
     272     Be careful not to add catostraphically slow regexes here, we want to
     273     exercise the matching code without causing timeouts.*/
     274  static const char* regex_patterns[] = {
     275      ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
     276      "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
     277      "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
     278      "(?:a*)*", "a{1,2}?"
     279  };
     280  const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
     281  PyObject** compiled_patterns = NULL;
     282  /* Called by LLVMFuzzerTestOneInput for initialization */
     283  static int init_sre_match(void) {
     284      PyObject* re_module = PyImport_ImportModule("re");
     285      if (re_module == NULL) {
     286          return 0;
     287      }
     288      compiled_patterns = (PyObject**) PyMem_RawMalloc(
     289          sizeof(PyObject*) * NUM_PATTERNS);
     290      if (compiled_patterns == NULL) {
     291          PyErr_NoMemory();
     292          return 0;
     293      }
     294  
     295      /* Precompile all the regex patterns on the first run for faster fuzzing */
     296      for (size_t i = 0; i < NUM_PATTERNS; i++) {
     297          PyObject* compiled = PyObject_CallMethod(
     298              re_module, "compile", "y", regex_patterns[i]);
     299          /* Bail if any of the patterns fail to compile */
     300          if (compiled == NULL) {
     301              return 0;
     302          }
     303          compiled_patterns[i] = compiled;
     304      }
     305      return 1;
     306  }
     307  /* Fuzz re.match(x) */
     308  static int fuzz_sre_match(const char* data, size_t size) {
     309      if (size < 1 || size > MAX_RE_TEST_SIZE) {
     310          return 0;
     311      }
     312      /* Use the first byte as a uint8_t specifying the index of the
     313         regex to use */
     314      unsigned char idx = (unsigned char) data[0];
     315      idx = idx % NUM_PATTERNS;
     316  
     317      /* Pull the string to match from the remaining bytes */
     318      PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
     319      if (to_match == NULL) {
     320          return 0;
     321      }
     322  
     323      PyObject* pattern = compiled_patterns[idx];
     324      PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
     325  
     326      PyObject* matches = PyObject_CallOneArg(match_callable, to_match);
     327  
     328      Py_XDECREF(matches);
     329      Py_DECREF(match_callable);
     330      Py_DECREF(to_match);
     331      return 0;
     332  }
     333  
     334  #define MAX_CSV_TEST_SIZE 0x10000
     335  PyObject* csv_module = NULL;
     336  PyObject* csv_error = NULL;
     337  /* Called by LLVMFuzzerTestOneInput for initialization */
     338  static int init_csv_reader(void) {
     339      /* Import csv and csv.Error */
     340      csv_module = PyImport_ImportModule("csv");
     341      if (csv_module == NULL) {
     342          return 0;
     343      }
     344      csv_error = PyObject_GetAttrString(csv_module, "Error");
     345      return csv_error != NULL;
     346  }
     347  /* Fuzz csv.reader([x]) */
     348  static int fuzz_csv_reader(const char* data, size_t size) {
     349      if (size < 1 || size > MAX_CSV_TEST_SIZE) {
     350          return 0;
     351      }
     352      /* Ignore non null-terminated strings since _csv can't handle
     353         embedded nulls */
     354      if (memchr(data, '\0', size) == NULL) {
     355          return 0;
     356      }
     357  
     358      PyObject* s = PyUnicode_FromString(data);
     359      /* Ignore exceptions until we have a valid string */
     360      if (s == NULL) {
     361          PyErr_Clear();
     362          return 0;
     363      }
     364  
     365      /* Split on \n so we can test multiple lines */
     366      PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
     367      if (lines == NULL) {
     368          Py_DECREF(s);
     369          return 0;
     370      }
     371  
     372      PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
     373      if (reader) {
     374          /* Consume all of the reader as an iterator */
     375          PyObject* parsed_line;
     376          while ((parsed_line = PyIter_Next(reader))) {
     377              Py_DECREF(parsed_line);
     378          }
     379      }
     380  
     381      /* Ignore csv.Error because we're probably going to generate
     382         some bad files (embedded new-lines, unterminated quotes etc) */
     383      if (PyErr_ExceptionMatches(csv_error)) {
     384          PyErr_Clear();
     385      }
     386  
     387      Py_XDECREF(reader);
     388      Py_DECREF(s);
     389      return 0;
     390  }
     391  
     392  #define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000
     393  PyObject* ast_literal_eval_method = NULL;
     394  /* Called by LLVMFuzzerTestOneInput for initialization */
     395  static int init_ast_literal_eval(void) {
     396      PyObject* ast_module = PyImport_ImportModule("ast");
     397      if (ast_module == NULL) {
     398          return 0;
     399      }
     400      ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval");
     401      return ast_literal_eval_method != NULL;
     402  }
     403  /* Fuzz ast.literal_eval(x) */
     404  static int fuzz_ast_literal_eval(const char* data, size_t size) {
     405      if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) {
     406          return 0;
     407      }
     408      /* Ignore non null-terminated strings since ast can't handle
     409         embedded nulls */
     410      if (memchr(data, '\0', size) == NULL) {
     411          return 0;
     412      }
     413  
     414      PyObject* s = PyUnicode_FromString(data);
     415      /* Ignore exceptions until we have a valid string */
     416      if (s == NULL) {
     417          PyErr_Clear();
     418          return 0;
     419      }
     420  
     421      PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s);
     422      /* Ignore some common errors thrown by ast.literal_eval */
     423      if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) ||
     424                              PyErr_ExceptionMatches(PyExc_TypeError) ||
     425                              PyErr_ExceptionMatches(PyExc_SyntaxError) ||
     426                              PyErr_ExceptionMatches(PyExc_MemoryError) ||
     427                              PyErr_ExceptionMatches(PyExc_RecursionError))
     428      ) {
     429          PyErr_Clear();
     430      }
     431  
     432      Py_XDECREF(literal);
     433      Py_DECREF(s);
     434      return 0;
     435  }
     436  
     437  /* Run fuzzer and abort on failure. */
     438  static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     439      int rv = fuzzer((const char*) data, size);
     440      if (PyErr_Occurred()) {
     441          /* Fuzz tests should handle expected errors for themselves.
     442             This is last-ditch check in case they didn't. */
     443          PyErr_Print();
     444          abort();
     445      }
     446      /* Someday the return value might mean something, propagate it. */
     447      return rv;
     448  }
     449  
     450  /* CPython generates a lot of leak warnings for whatever reason. */
     451  int __lsan_is_turned_off(void) { return 1; }
     452  
     453  
     454  int LLVMFuzzerInitialize(int *argc, char ***argv) {
     455      PyConfig config;
     456      PyConfig_InitPythonConfig(&config);
     457      config.install_signal_handlers = 0;
     458      PyStatus status;
     459      status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]);
     460      if (PyStatus_Exception(status)) {
     461          goto fail;
     462      }
     463  
     464      status = Py_InitializeFromConfig(&config);
     465      if (PyStatus_Exception(status)) {
     466          goto fail;
     467      }
     468      PyConfig_Clear(&config);
     469  
     470      return 0;
     471  
     472  fail:
     473      PyConfig_Clear(&config);
     474      Py_ExitStatusException(status);
     475  }
     476  
     477  /* Fuzz test interface.
     478     This returns the bitwise or of all fuzz test's return values.
     479  
     480     All fuzz tests must return 0, as all nonzero return codes are reserved for
     481     future use -- we propagate the return values for that future case.
     482     (And we bitwise or when running multiple tests to verify that normally we
     483     only return 0.) */
     484  int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     485      assert(Py_IsInitialized());
     486  
     487      int rv = 0;
     488  
     489  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
     490      rv |= _run_fuzz(data, size, fuzz_builtin_float);
     491  #endif
     492  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
     493      rv |= _run_fuzz(data, size, fuzz_builtin_int);
     494  #endif
     495  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
     496      rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
     497  #endif
     498  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
     499      static int STRUCT_UNPACK_INITIALIZED = 0;
     500      if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
     501          PyErr_Print();
     502          abort();
     503      } else {
     504          STRUCT_UNPACK_INITIALIZED = 1;
     505      }
     506      rv |= _run_fuzz(data, size, fuzz_struct_unpack);
     507  #endif
     508  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
     509      static int JSON_LOADS_INITIALIZED = 0;
     510      if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
     511          PyErr_Print();
     512          abort();
     513      } else {
     514          JSON_LOADS_INITIALIZED = 1;
     515      }
     516  
     517      rv |= _run_fuzz(data, size, fuzz_json_loads);
     518  #endif
     519  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
     520      static int SRE_COMPILE_INITIALIZED = 0;
     521      if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
     522          PyErr_Print();
     523          abort();
     524      } else {
     525          SRE_COMPILE_INITIALIZED = 1;
     526      }
     527  
     528      rv |= _run_fuzz(data, size, fuzz_sre_compile);
     529  #endif
     530  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
     531      static int SRE_MATCH_INITIALIZED = 0;
     532      if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
     533          PyErr_Print();
     534          abort();
     535      } else {
     536          SRE_MATCH_INITIALIZED = 1;
     537      }
     538  
     539      rv |= _run_fuzz(data, size, fuzz_sre_match);
     540  #endif
     541  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
     542      static int CSV_READER_INITIALIZED = 0;
     543      if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
     544          PyErr_Print();
     545          abort();
     546      } else {
     547          CSV_READER_INITIALIZED = 1;
     548      }
     549  
     550      rv |= _run_fuzz(data, size, fuzz_csv_reader);
     551  #endif
     552  #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval)
     553      static int AST_LITERAL_EVAL_INITIALIZED = 0;
     554      if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) {
     555          PyErr_Print();
     556          abort();
     557      } else {
     558          AST_LITERAL_EVAL_INITIALIZED = 1;
     559      }
     560  
     561      rv |= _run_fuzz(data, size, fuzz_ast_literal_eval);
     562  #endif
     563    return rv;
     564  }