(root)/
Python-3.12.0/
Python/
suggestions.c
       1  #include "Python.h"
       2  #include "pycore_frame.h"
       3  #include "pycore_runtime.h"         // _PyRuntime
       4  #include "pycore_global_objects.h"  // _Py_ID()
       5  
       6  #include "pycore_pyerrors.h"
       7  #include "pycore_code.h"        // _PyCode_GetVarnames()
       8  #include "stdlib_module_names.h"  // _Py_stdlib_module_names
       9  
      10  #define MAX_CANDIDATE_ITEMS 750
      11  #define MAX_STRING_SIZE 40
      12  
      13  #define MOVE_COST 2
      14  #define CASE_COST 1
      15  
      16  #define LEAST_FIVE_BITS(n) ((n) & 31)
      17  
      18  static inline int
      19  substitution_cost(char a, char b)
      20  {
      21      if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
      22          // Not the same, not a case flip.
      23          return MOVE_COST;
      24      }
      25      if (a == b) {
      26          return 0;
      27      }
      28      if ('A' <= a && a <= 'Z') {
      29          a += ('a' - 'A');
      30      }
      31      if ('A' <= b && b <= 'Z') {
      32          b += ('a' - 'A');
      33      }
      34      if (a == b) {
      35          return CASE_COST;
      36      }
      37      return MOVE_COST;
      38  }
      39  
      40  /* Calculate the Levenshtein distance between string1 and string2 */
      41  static Py_ssize_t
      42  levenshtein_distance(const char *a, size_t a_size,
      43                       const char *b, size_t b_size,
      44                       size_t max_cost, size_t *buffer)
      45  {
      46      // Both strings are the same (by identity)
      47      if (a == b) {
      48          return 0;
      49      }
      50  
      51      // Trim away common affixes.
      52      while (a_size && b_size && a[0] == b[0]) {
      53          a++; a_size--;
      54          b++; b_size--;
      55      }
      56      while (a_size && b_size && a[a_size-1] == b[b_size-1]) {
      57          a_size--;
      58          b_size--;
      59      }
      60      if (a_size == 0 || b_size == 0) {
      61          return (a_size + b_size) * MOVE_COST;
      62      }
      63      if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
      64          return max_cost + 1;
      65      }
      66  
      67      // Prefer shorter buffer
      68      if (b_size < a_size) {
      69          const char *t = a; a = b; b = t;
      70          size_t t_size = a_size; a_size = b_size; b_size = t_size;
      71      }
      72  
      73      // quick fail when a match is impossible.
      74      if ((b_size - a_size) * MOVE_COST > max_cost) {
      75          return max_cost + 1;
      76      }
      77  
      78      // Instead of producing the whole traditional len(a)-by-len(b)
      79      // matrix, we can update just one row in place.
      80      // Initialize the buffer row
      81      size_t tmp = MOVE_COST;
      82      for (size_t i = 0; i < a_size; i++) {
      83          // cost from b[:0] to a[:i+1]
      84          buffer[i] = tmp;
      85          tmp += MOVE_COST;
      86      }
      87  
      88      size_t result = 0;
      89      for (size_t b_index = 0; b_index < b_size; b_index++) {
      90          char code = b[b_index];
      91          // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
      92          size_t distance = result = b_index * MOVE_COST;
      93          size_t minimum = SIZE_MAX;
      94          for (size_t index = 0; index < a_size; index++) {
      95  
      96              // cost(b[:b_index+1], a[:index+1]) = min(
      97              //     // 1) substitute
      98              //     cost(b[:b_index], a[:index])
      99              //         + substitution_cost(b[b_index], a[index]),
     100              //     // 2) delete from b
     101              //     cost(b[:b_index], a[:index+1]) + MOVE_COST,
     102              //     // 3) delete from a
     103              //     cost(b[:b_index+1], a[index]) + MOVE_COST
     104              // )
     105  
     106              // 1) Previous distance in this row is cost(b[:b_index], a[:index])
     107              size_t substitute = distance + substitution_cost(code, a[index]);
     108              // 2) cost(b[:b_index], a[:index+1]) from previous row
     109              distance = buffer[index];
     110              // 3) existing result is cost(b[:b_index+1], a[index])
     111  
     112              size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
     113              result = Py_MIN(insert_delete, substitute);
     114  
     115              // cost(b[:b_index+1], a[:index+1])
     116              buffer[index] = result;
     117              if (result < minimum) {
     118                  minimum = result;
     119              }
     120          }
     121          if (minimum > max_cost) {
     122              // Everything in this row is too big, so bail early.
     123              return max_cost + 1;
     124          }
     125      }
     126      return result;
     127  }
     128  
     129  static inline PyObject *
     130  calculate_suggestions(PyObject *dir,
     131                        PyObject *name)
     132  {
     133      assert(!PyErr_Occurred());
     134      assert(PyList_CheckExact(dir));
     135  
     136      Py_ssize_t dir_size = PyList_GET_SIZE(dir);
     137      if (dir_size >= MAX_CANDIDATE_ITEMS) {
     138          return NULL;
     139      }
     140  
     141      Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
     142      PyObject *suggestion = NULL;
     143      Py_ssize_t name_size;
     144      const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
     145      if (name_str == NULL) {
     146          return NULL;
     147      }
     148      size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
     149      if (buffer == NULL) {
     150          return PyErr_NoMemory();
     151      }
     152      for (int i = 0; i < dir_size; ++i) {
     153          PyObject *item = PyList_GET_ITEM(dir, i);
     154          if (_PyUnicode_Equal(name, item)) {
     155              continue;
     156          }
     157          Py_ssize_t item_size;
     158          const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
     159          if (item_str == NULL) {
     160              PyMem_Free(buffer);
     161              return NULL;
     162          }
     163          // No more than 1/3 of the involved characters should need changed.
     164          Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
     165          // Don't take matches we've already beaten.
     166          max_distance = Py_MIN(max_distance, suggestion_distance - 1);
     167          Py_ssize_t current_distance =
     168              levenshtein_distance(name_str, name_size, item_str,
     169                                   item_size, max_distance, buffer);
     170          if (current_distance > max_distance) {
     171              continue;
     172          }
     173          if (!suggestion || current_distance < suggestion_distance) {
     174              suggestion = item;
     175              suggestion_distance = current_distance;
     176          }
     177      }
     178      PyMem_Free(buffer);
     179      return Py_XNewRef(suggestion);
     180  }
     181  
     182  static PyObject *
     183  get_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
     184  {
     185      PyObject *name = exc->name; // borrowed reference
     186      PyObject *obj = exc->obj; // borrowed reference
     187  
     188      // Abort if we don't have an attribute name or we have an invalid one
     189      if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
     190          return NULL;
     191      }
     192  
     193      PyObject *dir = PyObject_Dir(obj);
     194      if (dir == NULL) {
     195          return NULL;
     196      }
     197  
     198      PyObject *suggestions = calculate_suggestions(dir, name);
     199      Py_DECREF(dir);
     200      return suggestions;
     201  }
     202  
     203  static PyObject *
     204  offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
     205  {
     206      PyObject* suggestion = get_suggestions_for_attribute_error(exc);
     207      if (suggestion == NULL) {
     208          return NULL;
     209      }
     210      // Add a trailer ". Did you mean: (...)?"
     211      PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
     212      Py_DECREF(suggestion);
     213      return result;
     214  }
     215  
     216  static PyObject *
     217  get_suggestions_for_name_error(PyObject* name, PyFrameObject* frame)
     218  {
     219      PyCodeObject *code = PyFrame_GetCode(frame);
     220      assert(code != NULL && code->co_localsplusnames != NULL);
     221  
     222      PyObject *varnames = _PyCode_GetVarnames(code);
     223      Py_DECREF(code);
     224      if (varnames == NULL) {
     225          return NULL;
     226      }
     227      PyObject *dir = PySequence_List(varnames);
     228      Py_DECREF(varnames);
     229      if (dir == NULL) {
     230          return NULL;
     231      }
     232  
     233      // Are we inside a method and the instance has an attribute called 'name'?
     234      int res = PySequence_Contains(dir, &_Py_ID(self));
     235      if (res < 0) {
     236          goto error;
     237      }
     238      if (res > 0) {
     239          PyObject* locals = PyFrame_GetLocals(frame);
     240          if (!locals) {
     241              goto error;
     242          }
     243          PyObject* self = PyDict_GetItemWithError(locals, &_Py_ID(self)); /* borrowed */
     244          if (!self) {
     245              Py_DECREF(locals);
     246              goto error;
     247          }
     248  
     249          PyObject *value;
     250          res = _PyObject_LookupAttr(self, name, &value);
     251          Py_DECREF(locals);
     252          if (res < 0) {
     253              goto error;
     254          }
     255          if (value) {
     256              Py_DECREF(value);
     257              Py_DECREF(dir);
     258              return PyUnicode_FromFormat("self.%U", name);
     259          }
     260      }
     261  
     262      PyObject *suggestions = calculate_suggestions(dir, name);
     263      Py_DECREF(dir);
     264      if (suggestions != NULL || PyErr_Occurred()) {
     265          return suggestions;
     266      }
     267  
     268      dir = PySequence_List(frame->f_frame->f_globals);
     269      if (dir == NULL) {
     270          return NULL;
     271      }
     272      suggestions = calculate_suggestions(dir, name);
     273      Py_DECREF(dir);
     274      if (suggestions != NULL || PyErr_Occurred()) {
     275          return suggestions;
     276      }
     277  
     278      dir = PySequence_List(frame->f_frame->f_builtins);
     279      if (dir == NULL) {
     280          return NULL;
     281      }
     282      suggestions = calculate_suggestions(dir, name);
     283      Py_DECREF(dir);
     284  
     285      return suggestions;
     286  
     287  error:
     288      Py_DECREF(dir);
     289      return NULL;
     290  }
     291  
     292  static bool
     293  is_name_stdlib_module(PyObject* name)
     294  {
     295      const char* the_name = PyUnicode_AsUTF8(name);
     296      Py_ssize_t len = Py_ARRAY_LENGTH(_Py_stdlib_module_names);
     297      for (Py_ssize_t i = 0; i < len; i++) {
     298          if (strcmp(the_name, _Py_stdlib_module_names[i]) == 0) {
     299              return 1;
     300          }
     301      }
     302      return 0;
     303  }
     304  
     305  static PyObject *
     306  offer_suggestions_for_name_error(PyNameErrorObject *exc)
     307  {
     308      PyObject *name = exc->name; // borrowed reference
     309      PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
     310      // Abort if we don't have a variable name or we have an invalid one
     311      // or if we don't have a traceback to work with
     312      if (name == NULL || !PyUnicode_CheckExact(name) ||
     313          traceback == NULL || !Py_IS_TYPE(traceback, &PyTraceBack_Type)
     314      ) {
     315          return NULL;
     316      }
     317  
     318      // Move to the traceback of the exception
     319      while (1) {
     320          PyTracebackObject *next = traceback->tb_next;
     321          if (next == NULL || !Py_IS_TYPE(next, &PyTraceBack_Type)) {
     322              break;
     323          }
     324          else {
     325              traceback = next;
     326          }
     327      }
     328  
     329      PyFrameObject *frame = traceback->tb_frame;
     330      assert(frame != NULL);
     331  
     332      PyObject* suggestion = get_suggestions_for_name_error(name, frame);
     333      if (suggestion == NULL && PyErr_Occurred()) {
     334          return NULL;
     335      }
     336  
     337      // Add a trailer ". Did you mean: (...)?"
     338      PyObject* result = NULL;
     339      if (!is_name_stdlib_module(name)) {
     340          if (suggestion == NULL) {
     341              return NULL;
     342          }
     343          result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
     344      } else if (suggestion == NULL) {
     345          result = PyUnicode_FromFormat(". Did you forget to import %R?", name);
     346      } else {
     347          result = PyUnicode_FromFormat(". Did you mean: %R? Or did you forget to import %R?", suggestion, name);
     348      }
     349      Py_XDECREF(suggestion);
     350      return result;
     351  }
     352  
     353  static PyObject *
     354  offer_suggestions_for_import_error(PyImportErrorObject *exc)
     355  {
     356      PyObject *mod_name = exc->name; // borrowed reference
     357      PyObject *name = exc->name_from; // borrowed reference
     358      if (name == NULL || mod_name == NULL || name == Py_None ||
     359          !PyUnicode_CheckExact(name) || !PyUnicode_CheckExact(mod_name)) {
     360          return NULL;
     361      }
     362  
     363      PyObject* mod = PyImport_GetModule(mod_name);
     364      if (mod == NULL) {
     365          return NULL;
     366      }
     367  
     368      PyObject *dir = PyObject_Dir(mod);
     369      Py_DECREF(mod);
     370      if (dir == NULL) {
     371          return NULL;
     372      }
     373  
     374      PyObject *suggestion = calculate_suggestions(dir, name);
     375      Py_DECREF(dir);
     376      if (!suggestion) {
     377          return NULL;
     378      }
     379  
     380      PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
     381      Py_DECREF(suggestion);
     382      return result;
     383  }
     384  
     385  // Offer suggestions for a given exception. Returns a python string object containing the
     386  // suggestions. This function returns NULL if no suggestion was found or if an exception happened,
     387  // users must call PyErr_Occurred() to disambiguate.
     388  PyObject *
     389  _Py_Offer_Suggestions(PyObject *exception)
     390  {
     391      PyObject *result = NULL;
     392      assert(!PyErr_Occurred());
     393      if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
     394          result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
     395      } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
     396          result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
     397      } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_ImportError)) {
     398          result = offer_suggestions_for_import_error((PyImportErrorObject *) exception);
     399      }
     400      return result;
     401  }
     402  
     403  Py_ssize_t
     404  _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
     405  {
     406      assert(PyUnicode_Check(a) && PyUnicode_Check(b));
     407      Py_ssize_t size_a, size_b;
     408      const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
     409      if (utf8_a == NULL) {
     410          return -1;
     411      }
     412      const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
     413      if (utf8_b == NULL) {
     414          return -1;
     415      }
     416      if (max_cost == -1) {
     417          max_cost = MOVE_COST * Py_MAX(size_a, size_b);
     418      }
     419      size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
     420      if (buffer == NULL) {
     421          PyErr_NoMemory();
     422          return -1;
     423      }
     424      Py_ssize_t res = levenshtein_distance(utf8_a, size_a,
     425                                      utf8_b, size_b, max_cost, buffer);
     426      PyMem_Free(buffer);
     427      return res;
     428  }
     429