1 #include "Python.h"
2 #include "pycore_frame.h"
3 #include "pycore_runtime.h" // _PyRuntime
4 #include "pycore_global_objects.h" // _Py_ID()
5
6 #include "pycore_pyerrors.h"
7 #include "pycore_code.h" // _PyCode_GetVarnames()
8 #include "stdlib_module_names.h" // _Py_stdlib_module_names
9
10 #define MAX_CANDIDATE_ITEMS 750
11 #define MAX_STRING_SIZE 40
12
13 #define MOVE_COST 2
14 #define CASE_COST 1
15
16 #define LEAST_FIVE_BITS(n) ((n) & 31)
17
18 static inline int
19 substitution_cost(char a, char b)
20 {
21 if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
22 // Not the same, not a case flip.
23 return MOVE_COST;
24 }
25 if (a == b) {
26 return 0;
27 }
28 if ('A' <= a && a <= 'Z') {
29 a += ('a' - 'A');
30 }
31 if ('A' <= b && b <= 'Z') {
32 b += ('a' - 'A');
33 }
34 if (a == b) {
35 return CASE_COST;
36 }
37 return MOVE_COST;
38 }
39
40 /* Calculate the Levenshtein distance between string1 and string2 */
41 static Py_ssize_t
42 levenshtein_distance(const char *a, size_t a_size,
43 const char *b, size_t b_size,
44 size_t max_cost, size_t *buffer)
45 {
46 // Both strings are the same (by identity)
47 if (a == b) {
48 return 0;
49 }
50
51 // Trim away common affixes.
52 while (a_size && b_size && a[0] == b[0]) {
53 a++; a_size--;
54 b++; b_size--;
55 }
56 while (a_size && b_size && a[a_size-1] == b[b_size-1]) {
57 a_size--;
58 b_size--;
59 }
60 if (a_size == 0 || b_size == 0) {
61 return (a_size + b_size) * MOVE_COST;
62 }
63 if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
64 return max_cost + 1;
65 }
66
67 // Prefer shorter buffer
68 if (b_size < a_size) {
69 const char *t = a; a = b; b = t;
70 size_t t_size = a_size; a_size = b_size; b_size = t_size;
71 }
72
73 // quick fail when a match is impossible.
74 if ((b_size - a_size) * MOVE_COST > max_cost) {
75 return max_cost + 1;
76 }
77
78 // Instead of producing the whole traditional len(a)-by-len(b)
79 // matrix, we can update just one row in place.
80 // Initialize the buffer row
81 size_t tmp = MOVE_COST;
82 for (size_t i = 0; i < a_size; i++) {
83 // cost from b[:0] to a[:i+1]
84 buffer[i] = tmp;
85 tmp += MOVE_COST;
86 }
87
88 size_t result = 0;
89 for (size_t b_index = 0; b_index < b_size; b_index++) {
90 char code = b[b_index];
91 // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
92 size_t distance = result = b_index * MOVE_COST;
93 size_t minimum = SIZE_MAX;
94 for (size_t index = 0; index < a_size; index++) {
95
96 // cost(b[:b_index+1], a[:index+1]) = min(
97 // // 1) substitute
98 // cost(b[:b_index], a[:index])
99 // + substitution_cost(b[b_index], a[index]),
100 // // 2) delete from b
101 // cost(b[:b_index], a[:index+1]) + MOVE_COST,
102 // // 3) delete from a
103 // cost(b[:b_index+1], a[index]) + MOVE_COST
104 // )
105
106 // 1) Previous distance in this row is cost(b[:b_index], a[:index])
107 size_t substitute = distance + substitution_cost(code, a[index]);
108 // 2) cost(b[:b_index], a[:index+1]) from previous row
109 distance = buffer[index];
110 // 3) existing result is cost(b[:b_index+1], a[index])
111
112 size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
113 result = Py_MIN(insert_delete, substitute);
114
115 // cost(b[:b_index+1], a[:index+1])
116 buffer[index] = result;
117 if (result < minimum) {
118 minimum = result;
119 }
120 }
121 if (minimum > max_cost) {
122 // Everything in this row is too big, so bail early.
123 return max_cost + 1;
124 }
125 }
126 return result;
127 }
128
129 static inline PyObject *
130 calculate_suggestions(PyObject *dir,
131 PyObject *name)
132 {
133 assert(!PyErr_Occurred());
134 assert(PyList_CheckExact(dir));
135
136 Py_ssize_t dir_size = PyList_GET_SIZE(dir);
137 if (dir_size >= MAX_CANDIDATE_ITEMS) {
138 return NULL;
139 }
140
141 Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
142 PyObject *suggestion = NULL;
143 Py_ssize_t name_size;
144 const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
145 if (name_str == NULL) {
146 return NULL;
147 }
148 size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
149 if (buffer == NULL) {
150 return PyErr_NoMemory();
151 }
152 for (int i = 0; i < dir_size; ++i) {
153 PyObject *item = PyList_GET_ITEM(dir, i);
154 if (_PyUnicode_Equal(name, item)) {
155 continue;
156 }
157 Py_ssize_t item_size;
158 const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
159 if (item_str == NULL) {
160 PyMem_Free(buffer);
161 return NULL;
162 }
163 // No more than 1/3 of the involved characters should need changed.
164 Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
165 // Don't take matches we've already beaten.
166 max_distance = Py_MIN(max_distance, suggestion_distance - 1);
167 Py_ssize_t current_distance =
168 levenshtein_distance(name_str, name_size, item_str,
169 item_size, max_distance, buffer);
170 if (current_distance > max_distance) {
171 continue;
172 }
173 if (!suggestion || current_distance < suggestion_distance) {
174 suggestion = item;
175 suggestion_distance = current_distance;
176 }
177 }
178 PyMem_Free(buffer);
179 return Py_XNewRef(suggestion);
180 }
181
182 static PyObject *
183 get_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
184 {
185 PyObject *name = exc->name; // borrowed reference
186 PyObject *obj = exc->obj; // borrowed reference
187
188 // Abort if we don't have an attribute name or we have an invalid one
189 if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
190 return NULL;
191 }
192
193 PyObject *dir = PyObject_Dir(obj);
194 if (dir == NULL) {
195 return NULL;
196 }
197
198 PyObject *suggestions = calculate_suggestions(dir, name);
199 Py_DECREF(dir);
200 return suggestions;
201 }
202
203 static PyObject *
204 offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
205 {
206 PyObject* suggestion = get_suggestions_for_attribute_error(exc);
207 if (suggestion == NULL) {
208 return NULL;
209 }
210 // Add a trailer ". Did you mean: (...)?"
211 PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
212 Py_DECREF(suggestion);
213 return result;
214 }
215
216 static PyObject *
217 get_suggestions_for_name_error(PyObject* name, PyFrameObject* frame)
218 {
219 PyCodeObject *code = PyFrame_GetCode(frame);
220 assert(code != NULL && code->co_localsplusnames != NULL);
221
222 PyObject *varnames = _PyCode_GetVarnames(code);
223 Py_DECREF(code);
224 if (varnames == NULL) {
225 return NULL;
226 }
227 PyObject *dir = PySequence_List(varnames);
228 Py_DECREF(varnames);
229 if (dir == NULL) {
230 return NULL;
231 }
232
233 // Are we inside a method and the instance has an attribute called 'name'?
234 int res = PySequence_Contains(dir, &_Py_ID(self));
235 if (res < 0) {
236 goto error;
237 }
238 if (res > 0) {
239 PyObject* locals = PyFrame_GetLocals(frame);
240 if (!locals) {
241 goto error;
242 }
243 PyObject* self = PyDict_GetItemWithError(locals, &_Py_ID(self)); /* borrowed */
244 if (!self) {
245 Py_DECREF(locals);
246 goto error;
247 }
248
249 PyObject *value;
250 res = _PyObject_LookupAttr(self, name, &value);
251 Py_DECREF(locals);
252 if (res < 0) {
253 goto error;
254 }
255 if (value) {
256 Py_DECREF(value);
257 Py_DECREF(dir);
258 return PyUnicode_FromFormat("self.%U", name);
259 }
260 }
261
262 PyObject *suggestions = calculate_suggestions(dir, name);
263 Py_DECREF(dir);
264 if (suggestions != NULL || PyErr_Occurred()) {
265 return suggestions;
266 }
267
268 dir = PySequence_List(frame->f_frame->f_globals);
269 if (dir == NULL) {
270 return NULL;
271 }
272 suggestions = calculate_suggestions(dir, name);
273 Py_DECREF(dir);
274 if (suggestions != NULL || PyErr_Occurred()) {
275 return suggestions;
276 }
277
278 dir = PySequence_List(frame->f_frame->f_builtins);
279 if (dir == NULL) {
280 return NULL;
281 }
282 suggestions = calculate_suggestions(dir, name);
283 Py_DECREF(dir);
284
285 return suggestions;
286
287 error:
288 Py_DECREF(dir);
289 return NULL;
290 }
291
292 static bool
293 is_name_stdlib_module(PyObject* name)
294 {
295 const char* the_name = PyUnicode_AsUTF8(name);
296 Py_ssize_t len = Py_ARRAY_LENGTH(_Py_stdlib_module_names);
297 for (Py_ssize_t i = 0; i < len; i++) {
298 if (strcmp(the_name, _Py_stdlib_module_names[i]) == 0) {
299 return 1;
300 }
301 }
302 return 0;
303 }
304
305 static PyObject *
306 offer_suggestions_for_name_error(PyNameErrorObject *exc)
307 {
308 PyObject *name = exc->name; // borrowed reference
309 PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
310 // Abort if we don't have a variable name or we have an invalid one
311 // or if we don't have a traceback to work with
312 if (name == NULL || !PyUnicode_CheckExact(name) ||
313 traceback == NULL || !Py_IS_TYPE(traceback, &PyTraceBack_Type)
314 ) {
315 return NULL;
316 }
317
318 // Move to the traceback of the exception
319 while (1) {
320 PyTracebackObject *next = traceback->tb_next;
321 if (next == NULL || !Py_IS_TYPE(next, &PyTraceBack_Type)) {
322 break;
323 }
324 else {
325 traceback = next;
326 }
327 }
328
329 PyFrameObject *frame = traceback->tb_frame;
330 assert(frame != NULL);
331
332 PyObject* suggestion = get_suggestions_for_name_error(name, frame);
333 if (suggestion == NULL && PyErr_Occurred()) {
334 return NULL;
335 }
336
337 // Add a trailer ". Did you mean: (...)?"
338 PyObject* result = NULL;
339 if (!is_name_stdlib_module(name)) {
340 if (suggestion == NULL) {
341 return NULL;
342 }
343 result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
344 } else if (suggestion == NULL) {
345 result = PyUnicode_FromFormat(". Did you forget to import %R?", name);
346 } else {
347 result = PyUnicode_FromFormat(". Did you mean: %R? Or did you forget to import %R?", suggestion, name);
348 }
349 Py_XDECREF(suggestion);
350 return result;
351 }
352
353 static PyObject *
354 offer_suggestions_for_import_error(PyImportErrorObject *exc)
355 {
356 PyObject *mod_name = exc->name; // borrowed reference
357 PyObject *name = exc->name_from; // borrowed reference
358 if (name == NULL || mod_name == NULL || name == Py_None ||
359 !PyUnicode_CheckExact(name) || !PyUnicode_CheckExact(mod_name)) {
360 return NULL;
361 }
362
363 PyObject* mod = PyImport_GetModule(mod_name);
364 if (mod == NULL) {
365 return NULL;
366 }
367
368 PyObject *dir = PyObject_Dir(mod);
369 Py_DECREF(mod);
370 if (dir == NULL) {
371 return NULL;
372 }
373
374 PyObject *suggestion = calculate_suggestions(dir, name);
375 Py_DECREF(dir);
376 if (!suggestion) {
377 return NULL;
378 }
379
380 PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
381 Py_DECREF(suggestion);
382 return result;
383 }
384
385 // Offer suggestions for a given exception. Returns a python string object containing the
386 // suggestions. This function returns NULL if no suggestion was found or if an exception happened,
387 // users must call PyErr_Occurred() to disambiguate.
388 PyObject *
389 _Py_Offer_Suggestions(PyObject *exception)
390 {
391 PyObject *result = NULL;
392 assert(!PyErr_Occurred());
393 if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
394 result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
395 } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
396 result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
397 } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_ImportError)) {
398 result = offer_suggestions_for_import_error((PyImportErrorObject *) exception);
399 }
400 return result;
401 }
402
403 Py_ssize_t
404 _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
405 {
406 assert(PyUnicode_Check(a) && PyUnicode_Check(b));
407 Py_ssize_t size_a, size_b;
408 const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
409 if (utf8_a == NULL) {
410 return -1;
411 }
412 const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
413 if (utf8_b == NULL) {
414 return -1;
415 }
416 if (max_cost == -1) {
417 max_cost = MOVE_COST * Py_MAX(size_a, size_b);
418 }
419 size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
420 if (buffer == NULL) {
421 PyErr_NoMemory();
422 return -1;
423 }
424 Py_ssize_t res = levenshtein_distance(utf8_a, size_a,
425 utf8_b, size_b, max_cost, buffer);
426 PyMem_Free(buffer);
427 return res;
428 }
429