1 #include <stdbool.h>
2
3 #include "Python.h"
4 #include "pycore_code.h" // write_location_entry_start()
5 #include "pycore_compile.h"
6 #include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
7 #include "pycore_pymem.h" // _PyMem_IsPtrFreed()
8
9
10 #define DEFAULT_CODE_SIZE 128
11 #define DEFAULT_LNOTAB_SIZE 16
12 #define DEFAULT_CNOTAB_SIZE 32
13
14 #undef SUCCESS
15 #undef ERROR
16 #define SUCCESS 0
17 #define ERROR -1
18
19 #define RETURN_IF_ERROR(X) \
20 if ((X) == -1) { \
21 return ERROR; \
22 }
23
24 typedef _PyCompilerSrcLocation location;
25 typedef _PyCompile_Instruction instruction;
26 typedef _PyCompile_InstructionSequence instr_sequence;
27
28 static inline bool
29 same_location(location a, location b)
30 {
31 return a.lineno == b.lineno &&
32 a.end_lineno == b.end_lineno &&
33 a.col_offset == b.col_offset &&
34 a.end_col_offset == b.end_col_offset;
35 }
36
37 struct assembler {
38 PyObject *a_bytecode; /* bytes containing bytecode */
39 int a_offset; /* offset into bytecode */
40 PyObject *a_except_table; /* bytes containing exception table */
41 int a_except_table_off; /* offset into exception table */
42 /* Location Info */
43 int a_lineno; /* lineno of last emitted instruction */
44 PyObject* a_linetable; /* bytes containing location info */
45 int a_location_off; /* offset of last written location info frame */
46 };
47
48 static int
49 assemble_init(struct assembler *a, int firstlineno)
50 {
51 memset(a, 0, sizeof(struct assembler));
52 a->a_lineno = firstlineno;
53 a->a_linetable = NULL;
54 a->a_location_off = 0;
55 a->a_except_table = NULL;
56 a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE);
57 if (a->a_bytecode == NULL) {
58 goto error;
59 }
60 a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE);
61 if (a->a_linetable == NULL) {
62 goto error;
63 }
64 a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
65 if (a->a_except_table == NULL) {
66 goto error;
67 }
68 return SUCCESS;
69 error:
70 Py_XDECREF(a->a_bytecode);
71 Py_XDECREF(a->a_linetable);
72 Py_XDECREF(a->a_except_table);
73 return ERROR;
74 }
75
76 static void
77 assemble_free(struct assembler *a)
78 {
79 Py_XDECREF(a->a_bytecode);
80 Py_XDECREF(a->a_linetable);
81 Py_XDECREF(a->a_except_table);
82 }
83
84 static inline void
85 write_except_byte(struct assembler *a, int byte) {
86 unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table);
87 p[a->a_except_table_off++] = byte;
88 }
89
90 #define CONTINUATION_BIT 64
91
92 static void
93 assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
94 {
95 assert ((msb | 128) == 128);
96 assert(value >= 0 && value < (1 << 30));
97 if (value >= 1 << 24) {
98 write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb);
99 msb = 0;
100 }
101 if (value >= 1 << 18) {
102 write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb);
103 msb = 0;
104 }
105 if (value >= 1 << 12) {
106 write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb);
107 msb = 0;
108 }
109 if (value >= 1 << 6) {
110 write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb);
111 msb = 0;
112 }
113 write_except_byte(a, (value&0x3f) | msb);
114 }
115
116 /* See Objects/exception_handling_notes.txt for details of layout */
117 #define MAX_SIZE_OF_ENTRY 20
118
119 static int
120 assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
121 _PyCompile_ExceptHandlerInfo *handler)
122 {
123 Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
124 if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) {
125 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2));
126 }
127 int size = end-start;
128 assert(end > start);
129 int target = handler->h_offset;
130 int depth = handler->h_startdepth - 1;
131 if (handler->h_preserve_lasti > 0) {
132 depth -= 1;
133 }
134 assert(depth >= 0);
135 int depth_lasti = (depth<<1) | handler->h_preserve_lasti;
136 assemble_emit_exception_table_item(a, start, (1<<7));
137 assemble_emit_exception_table_item(a, size, 0);
138 assemble_emit_exception_table_item(a, target, 0);
139 assemble_emit_exception_table_item(a, depth_lasti, 0);
140 return SUCCESS;
141 }
142
143 static int
144 assemble_exception_table(struct assembler *a, instr_sequence *instrs)
145 {
146 int ioffset = 0;
147 _PyCompile_ExceptHandlerInfo handler;
148 handler.h_offset = -1;
149 handler.h_preserve_lasti = -1;
150 int start = -1;
151 for (int i = 0; i < instrs->s_used; i++) {
152 instruction *instr = &instrs->s_instrs[i];
153 if (instr->i_except_handler_info.h_offset != handler.h_offset) {
154 if (handler.h_offset >= 0) {
155 RETURN_IF_ERROR(
156 assemble_emit_exception_table_entry(a, start, ioffset, &handler));
157 }
158 start = ioffset;
159 handler = instr->i_except_handler_info;
160 }
161 ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
162 }
163 if (handler.h_offset >= 0) {
164 RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
165 }
166 return SUCCESS;
167 }
168
169
170 /* Code location emitting code. See locations.md for a description of the format. */
171
172 #define MSB 0x80
173
174 static void
175 write_location_byte(struct assembler* a, int val)
176 {
177 PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255;
178 a->a_location_off++;
179 }
180
181
182 static uint8_t *
183 location_pointer(struct assembler* a)
184 {
185 return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) +
186 a->a_location_off;
187 }
188
189 static void
190 write_location_first_byte(struct assembler* a, int code, int length)
191 {
192 a->a_location_off += write_location_entry_start(
193 location_pointer(a), code, length);
194 }
195
196 static void
197 write_location_varint(struct assembler* a, unsigned int val)
198 {
199 uint8_t *ptr = location_pointer(a);
200 a->a_location_off += write_varint(ptr, val);
201 }
202
203
204 static void
205 write_location_signed_varint(struct assembler* a, int val)
206 {
207 uint8_t *ptr = location_pointer(a);
208 a->a_location_off += write_signed_varint(ptr, val);
209 }
210
211 static void
212 write_location_info_short_form(struct assembler* a, int length, int column, int end_column)
213 {
214 assert(length > 0 && length <= 8);
215 int column_low_bits = column & 7;
216 int column_group = column >> 3;
217 assert(column < 80);
218 assert(end_column >= column);
219 assert(end_column - column < 16);
220 write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length);
221 write_location_byte(a, (column_low_bits << 4) | (end_column - column));
222 }
223
224 static void
225 write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column)
226 {
227 assert(length > 0 && length <= 8);
228 assert(line_delta >= 0 && line_delta < 3);
229 assert(column < 128);
230 assert(end_column < 128);
231 write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length);
232 write_location_byte(a, column);
233 write_location_byte(a, end_column);
234 }
235
236 static void
237 write_location_info_long_form(struct assembler* a, location loc, int length)
238 {
239 assert(length > 0 && length <= 8);
240 write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length);
241 write_location_signed_varint(a, loc.lineno - a->a_lineno);
242 assert(loc.end_lineno >= loc.lineno);
243 write_location_varint(a, loc.end_lineno - loc.lineno);
244 write_location_varint(a, loc.col_offset + 1);
245 write_location_varint(a, loc.end_col_offset + 1);
246 }
247
248 static void
249 write_location_info_none(struct assembler* a, int length)
250 {
251 write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length);
252 }
253
254 static void
255 write_location_info_no_column(struct assembler* a, int length, int line_delta)
256 {
257 write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length);
258 write_location_signed_varint(a, line_delta);
259 }
260
261 #define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */
262
263
264 static int
265 write_location_info_entry(struct assembler* a, location loc, int isize)
266 {
267 Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
268 if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
269 assert(len > THEORETICAL_MAX_ENTRY_SIZE);
270 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, len*2));
271 }
272 if (loc.lineno < 0) {
273 write_location_info_none(a, isize);
274 return SUCCESS;
275 }
276 int line_delta = loc.lineno - a->a_lineno;
277 int column = loc.col_offset;
278 int end_column = loc.end_col_offset;
279 assert(column >= -1);
280 assert(end_column >= -1);
281 if (column < 0 || end_column < 0) {
282 if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) {
283 write_location_info_no_column(a, isize, line_delta);
284 a->a_lineno = loc.lineno;
285 return SUCCESS;
286 }
287 }
288 else if (loc.end_lineno == loc.lineno) {
289 if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) {
290 write_location_info_short_form(a, isize, column, end_column);
291 return SUCCESS;
292 }
293 if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) {
294 write_location_info_oneline_form(a, isize, line_delta, column, end_column);
295 a->a_lineno = loc.lineno;
296 return SUCCESS;
297 }
298 }
299 write_location_info_long_form(a, loc, isize);
300 a->a_lineno = loc.lineno;
301 return SUCCESS;
302 }
303
304 static int
305 assemble_emit_location(struct assembler* a, location loc, int isize)
306 {
307 if (isize == 0) {
308 return SUCCESS;
309 }
310 while (isize > 8) {
311 RETURN_IF_ERROR(write_location_info_entry(a, loc, 8));
312 isize -= 8;
313 }
314 return write_location_info_entry(a, loc, isize);
315 }
316
317 static int
318 assemble_location_info(struct assembler *a, instr_sequence *instrs,
319 int firstlineno)
320 {
321 a->a_lineno = firstlineno;
322 location loc = NO_LOCATION;
323 int size = 0;
324 for (int i = 0; i < instrs->s_used; i++) {
325 instruction *instr = &instrs->s_instrs[i];
326 if (!same_location(loc, instr->i_loc)) {
327 RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
328 loc = instr->i_loc;
329 size = 0;
330 }
331 size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
332 }
333 RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
334 return SUCCESS;
335 }
336
337 static void
338 write_instr(_Py_CODEUNIT *codestr, instruction *instr, int ilen)
339 {
340 int opcode = instr->i_opcode;
341 assert(!IS_PSEUDO_OPCODE(opcode));
342 int oparg = instr->i_oparg;
343 assert(HAS_ARG(opcode) || oparg == 0);
344 int caches = _PyOpcode_Caches[opcode];
345 switch (ilen - caches) {
346 case 4:
347 codestr->op.code = EXTENDED_ARG;
348 codestr->op.arg = (oparg >> 24) & 0xFF;
349 codestr++;
350 /* fall through */
351 case 3:
352 codestr->op.code = EXTENDED_ARG;
353 codestr->op.arg = (oparg >> 16) & 0xFF;
354 codestr++;
355 /* fall through */
356 case 2:
357 codestr->op.code = EXTENDED_ARG;
358 codestr->op.arg = (oparg >> 8) & 0xFF;
359 codestr++;
360 /* fall through */
361 case 1:
362 codestr->op.code = opcode;
363 codestr->op.arg = oparg & 0xFF;
364 codestr++;
365 break;
366 default:
367 Py_UNREACHABLE();
368 }
369 while (caches--) {
370 codestr->op.code = CACHE;
371 codestr->op.arg = 0;
372 codestr++;
373 }
374 }
375
376 /* assemble_emit_instr()
377 Extend the bytecode with a new instruction.
378 Update lnotab if necessary.
379 */
380
381 static int
382 assemble_emit_instr(struct assembler *a, instruction *instr)
383 {
384 Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
385 _Py_CODEUNIT *code;
386
387 int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
388 if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
389 if (len > PY_SSIZE_T_MAX / 2) {
390 return ERROR;
391 }
392 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, len * 2));
393 }
394 code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
395 a->a_offset += size;
396 write_instr(code, instr, size);
397 return SUCCESS;
398 }
399
400 static int
401 assemble_emit(struct assembler *a, instr_sequence *instrs,
402 int first_lineno, PyObject *const_cache)
403 {
404 RETURN_IF_ERROR(assemble_init(a, first_lineno));
405
406 for (int i = 0; i < instrs->s_used; i++) {
407 instruction *instr = &instrs->s_instrs[i];
408 RETURN_IF_ERROR(assemble_emit_instr(a, instr));
409 }
410
411 RETURN_IF_ERROR(assemble_location_info(a, instrs, a->a_lineno));
412
413 RETURN_IF_ERROR(assemble_exception_table(a, instrs));
414
415 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off));
416 RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table));
417
418 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, a->a_location_off));
419 RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_linetable));
420
421 RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, a->a_offset * sizeof(_Py_CODEUNIT)));
422 RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_bytecode));
423 return SUCCESS;
424 }
425
426 static PyObject *
427 dict_keys_inorder(PyObject *dict, Py_ssize_t offset)
428 {
429 PyObject *tuple, *k, *v;
430 Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
431
432 tuple = PyTuple_New(size);
433 if (tuple == NULL)
434 return NULL;
435 while (PyDict_Next(dict, &pos, &k, &v)) {
436 i = PyLong_AS_LONG(v);
437 assert((i - offset) < size);
438 assert((i - offset) >= 0);
439 PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k));
440 }
441 return tuple;
442 }
443
444 // This is in codeobject.c.
445 extern void _Py_set_localsplus_info(int, PyObject *, unsigned char,
446 PyObject *, PyObject *);
447
448 static void
449 compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus,
450 PyObject *names, PyObject *kinds)
451 {
452 PyObject *k, *v;
453 Py_ssize_t pos = 0;
454 while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) {
455 int offset = (int)PyLong_AS_LONG(v);
456 assert(offset >= 0);
457 assert(offset < nlocalsplus);
458 // For now we do not distinguish arg kinds.
459 _PyLocals_Kind kind = CO_FAST_LOCAL;
460 if (PyDict_Contains(umd->u_fasthidden, k)) {
461 kind |= CO_FAST_HIDDEN;
462 }
463 if (PyDict_GetItem(umd->u_cellvars, k) != NULL) {
464 kind |= CO_FAST_CELL;
465 }
466 _Py_set_localsplus_info(offset, k, kind, names, kinds);
467 }
468 int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
469
470 // This counter mirrors the fix done in fix_cell_offsets().
471 int numdropped = 0;
472 pos = 0;
473 while (PyDict_Next(umd->u_cellvars, &pos, &k, &v)) {
474 if (PyDict_GetItem(umd->u_varnames, k) != NULL) {
475 // Skip cells that are already covered by locals.
476 numdropped += 1;
477 continue;
478 }
479 int offset = (int)PyLong_AS_LONG(v);
480 assert(offset >= 0);
481 offset += nlocals - numdropped;
482 assert(offset < nlocalsplus);
483 _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds);
484 }
485
486 pos = 0;
487 while (PyDict_Next(umd->u_freevars, &pos, &k, &v)) {
488 int offset = (int)PyLong_AS_LONG(v);
489 assert(offset >= 0);
490 offset += nlocals - numdropped;
491 assert(offset < nlocalsplus);
492 _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds);
493 }
494 }
495
496 static PyCodeObject *
497 makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_cache,
498 PyObject *constslist, int maxdepth, int nlocalsplus, int code_flags,
499 PyObject *filename)
500 {
501 PyCodeObject *co = NULL;
502 PyObject *names = NULL;
503 PyObject *consts = NULL;
504 PyObject *localsplusnames = NULL;
505 PyObject *localspluskinds = NULL;
506 names = dict_keys_inorder(umd->u_names, 0);
507 if (!names) {
508 goto error;
509 }
510 if (_PyCompile_ConstCacheMergeOne(const_cache, &names) < 0) {
511 goto error;
512 }
513
514 consts = PyList_AsTuple(constslist); /* PyCode_New requires a tuple */
515 if (consts == NULL) {
516 goto error;
517 }
518 if (_PyCompile_ConstCacheMergeOne(const_cache, &consts) < 0) {
519 goto error;
520 }
521
522 assert(umd->u_posonlyargcount < INT_MAX);
523 assert(umd->u_argcount < INT_MAX);
524 assert(umd->u_kwonlyargcount < INT_MAX);
525 int posonlyargcount = (int)umd->u_posonlyargcount;
526 int posorkwargcount = (int)umd->u_argcount;
527 assert(INT_MAX - posonlyargcount - posorkwargcount > 0);
528 int kwonlyargcount = (int)umd->u_kwonlyargcount;
529
530 localsplusnames = PyTuple_New(nlocalsplus);
531 if (localsplusnames == NULL) {
532 goto error;
533 }
534 localspluskinds = PyBytes_FromStringAndSize(NULL, nlocalsplus);
535 if (localspluskinds == NULL) {
536 goto error;
537 }
538 compute_localsplus_info(umd, nlocalsplus, localsplusnames, localspluskinds);
539
540 struct _PyCodeConstructor con = {
541 .filename = filename,
542 .name = umd->u_name,
543 .qualname = umd->u_qualname ? umd->u_qualname : umd->u_name,
544 .flags = code_flags,
545
546 .code = a->a_bytecode,
547 .firstlineno = umd->u_firstlineno,
548 .linetable = a->a_linetable,
549
550 .consts = consts,
551 .names = names,
552
553 .localsplusnames = localsplusnames,
554 .localspluskinds = localspluskinds,
555
556 .argcount = posonlyargcount + posorkwargcount,
557 .posonlyargcount = posonlyargcount,
558 .kwonlyargcount = kwonlyargcount,
559
560 .stacksize = maxdepth,
561
562 .exceptiontable = a->a_except_table,
563 };
564
565 if (_PyCode_Validate(&con) < 0) {
566 goto error;
567 }
568
569 if (_PyCompile_ConstCacheMergeOne(const_cache, &localsplusnames) < 0) {
570 goto error;
571 }
572 con.localsplusnames = localsplusnames;
573
574 co = _PyCode_New(&con);
575 if (co == NULL) {
576 goto error;
577 }
578
579 error:
580 Py_XDECREF(names);
581 Py_XDECREF(consts);
582 Py_XDECREF(localsplusnames);
583 Py_XDECREF(localspluskinds);
584 return co;
585 }
586
587
588 PyCodeObject *
589 _PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
590 PyObject *consts, int maxdepth, instr_sequence *instrs,
591 int nlocalsplus, int code_flags, PyObject *filename)
592 {
593 PyCodeObject *co = NULL;
594
595 struct assembler a;
596 int res = assemble_emit(&a, instrs, umd->u_firstlineno, const_cache);
597 if (res == SUCCESS) {
598 co = makecode(umd, &a, const_cache, consts, maxdepth, nlocalsplus,
599 code_flags, filename);
600 }
601 assemble_free(&a);
602 return co;
603 }