(root)/
Python-3.12.0/
Python/
assemble.c
       1  #include <stdbool.h>
       2  
       3  #include "Python.h"
       4  #include "pycore_code.h"          // write_location_entry_start()
       5  #include "pycore_compile.h"
       6  #include "pycore_opcode.h"        // _PyOpcode_Caches[] and opcode category macros
       7  #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
       8  
       9  
      10  #define DEFAULT_CODE_SIZE 128
      11  #define DEFAULT_LNOTAB_SIZE 16
      12  #define DEFAULT_CNOTAB_SIZE 32
      13  
      14  #undef SUCCESS
      15  #undef ERROR
      16  #define SUCCESS 0
      17  #define ERROR -1
      18  
      19  #define RETURN_IF_ERROR(X)  \
      20      if ((X) == -1) {        \
      21          return ERROR;       \
      22      }
      23  
      24  typedef _PyCompilerSrcLocation location;
      25  typedef _PyCompile_Instruction instruction;
      26  typedef _PyCompile_InstructionSequence instr_sequence;
      27  
      28  static inline bool
      29  same_location(location a, location b)
      30  {
      31      return a.lineno == b.lineno &&
      32             a.end_lineno == b.end_lineno &&
      33             a.col_offset == b.col_offset &&
      34             a.end_col_offset == b.end_col_offset;
      35  }
      36  
      37  struct assembler {
      38      PyObject *a_bytecode;  /* bytes containing bytecode */
      39      int a_offset;              /* offset into bytecode */
      40      PyObject *a_except_table;  /* bytes containing exception table */
      41      int a_except_table_off;    /* offset into exception table */
      42      /* Location Info */
      43      int a_lineno;          /* lineno of last emitted instruction */
      44      PyObject* a_linetable; /* bytes containing location info */
      45      int a_location_off;    /* offset of last written location info frame */
      46  };
      47  
      48  static int
      49  assemble_init(struct assembler *a, int firstlineno)
      50  {
      51      memset(a, 0, sizeof(struct assembler));
      52      a->a_lineno = firstlineno;
      53      a->a_linetable = NULL;
      54      a->a_location_off = 0;
      55      a->a_except_table = NULL;
      56      a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE);
      57      if (a->a_bytecode == NULL) {
      58          goto error;
      59      }
      60      a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE);
      61      if (a->a_linetable == NULL) {
      62          goto error;
      63      }
      64      a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
      65      if (a->a_except_table == NULL) {
      66          goto error;
      67      }
      68      return SUCCESS;
      69  error:
      70      Py_XDECREF(a->a_bytecode);
      71      Py_XDECREF(a->a_linetable);
      72      Py_XDECREF(a->a_except_table);
      73      return ERROR;
      74  }
      75  
      76  static void
      77  assemble_free(struct assembler *a)
      78  {
      79      Py_XDECREF(a->a_bytecode);
      80      Py_XDECREF(a->a_linetable);
      81      Py_XDECREF(a->a_except_table);
      82  }
      83  
      84  static inline void
      85  write_except_byte(struct assembler *a, int byte) {
      86      unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table);
      87      p[a->a_except_table_off++] = byte;
      88  }
      89  
      90  #define CONTINUATION_BIT 64
      91  
      92  static void
      93  assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
      94  {
      95      assert ((msb | 128) == 128);
      96      assert(value >= 0 && value < (1 << 30));
      97      if (value >= 1 << 24) {
      98          write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb);
      99          msb = 0;
     100      }
     101      if (value >= 1 << 18) {
     102          write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb);
     103          msb = 0;
     104      }
     105      if (value >= 1 << 12) {
     106          write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb);
     107          msb = 0;
     108      }
     109      if (value >= 1 << 6) {
     110          write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb);
     111          msb = 0;
     112      }
     113      write_except_byte(a, (value&0x3f) | msb);
     114  }
     115  
     116  /* See Objects/exception_handling_notes.txt for details of layout */
     117  #define MAX_SIZE_OF_ENTRY 20
     118  
     119  static int
     120  assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
     121                                      _PyCompile_ExceptHandlerInfo *handler)
     122  {
     123      Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
     124      if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) {
     125          RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2));
     126      }
     127      int size = end-start;
     128      assert(end > start);
     129      int target = handler->h_offset;
     130      int depth = handler->h_startdepth - 1;
     131      if (handler->h_preserve_lasti > 0) {
     132          depth -= 1;
     133      }
     134      assert(depth >= 0);
     135      int depth_lasti = (depth<<1) | handler->h_preserve_lasti;
     136      assemble_emit_exception_table_item(a, start, (1<<7));
     137      assemble_emit_exception_table_item(a, size, 0);
     138      assemble_emit_exception_table_item(a, target, 0);
     139      assemble_emit_exception_table_item(a, depth_lasti, 0);
     140      return SUCCESS;
     141  }
     142  
     143  static int
     144  assemble_exception_table(struct assembler *a, instr_sequence *instrs)
     145  {
     146      int ioffset = 0;
     147      _PyCompile_ExceptHandlerInfo handler;
     148      handler.h_offset = -1;
     149      handler.h_preserve_lasti = -1;
     150      int start = -1;
     151      for (int i = 0; i < instrs->s_used; i++) {
     152          instruction *instr = &instrs->s_instrs[i];
     153          if (instr->i_except_handler_info.h_offset != handler.h_offset) {
     154              if (handler.h_offset >= 0) {
     155                  RETURN_IF_ERROR(
     156                      assemble_emit_exception_table_entry(a, start, ioffset, &handler));
     157              }
     158              start = ioffset;
     159              handler = instr->i_except_handler_info;
     160          }
     161          ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
     162      }
     163      if (handler.h_offset >= 0) {
     164          RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
     165      }
     166      return SUCCESS;
     167  }
     168  
     169  
     170  /* Code location emitting code. See locations.md for a description of the format. */
     171  
     172  #define MSB 0x80
     173  
     174  static void
     175  write_location_byte(struct assembler* a, int val)
     176  {
     177      PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255;
     178      a->a_location_off++;
     179  }
     180  
     181  
     182  static uint8_t *
     183  location_pointer(struct assembler* a)
     184  {
     185      return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) +
     186          a->a_location_off;
     187  }
     188  
     189  static void
     190  write_location_first_byte(struct assembler* a, int code, int length)
     191  {
     192      a->a_location_off += write_location_entry_start(
     193          location_pointer(a), code, length);
     194  }
     195  
     196  static void
     197  write_location_varint(struct assembler* a, unsigned int val)
     198  {
     199      uint8_t *ptr = location_pointer(a);
     200      a->a_location_off += write_varint(ptr, val);
     201  }
     202  
     203  
     204  static void
     205  write_location_signed_varint(struct assembler* a, int val)
     206  {
     207      uint8_t *ptr = location_pointer(a);
     208      a->a_location_off += write_signed_varint(ptr, val);
     209  }
     210  
     211  static void
     212  write_location_info_short_form(struct assembler* a, int length, int column, int end_column)
     213  {
     214      assert(length > 0 &&  length <= 8);
     215      int column_low_bits = column & 7;
     216      int column_group = column >> 3;
     217      assert(column < 80);
     218      assert(end_column >= column);
     219      assert(end_column - column < 16);
     220      write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length);
     221      write_location_byte(a, (column_low_bits << 4) | (end_column - column));
     222  }
     223  
     224  static void
     225  write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column)
     226  {
     227      assert(length > 0 &&  length <= 8);
     228      assert(line_delta >= 0 && line_delta < 3);
     229      assert(column < 128);
     230      assert(end_column < 128);
     231      write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length);
     232      write_location_byte(a, column);
     233      write_location_byte(a, end_column);
     234  }
     235  
     236  static void
     237  write_location_info_long_form(struct assembler* a, location loc, int length)
     238  {
     239      assert(length > 0 &&  length <= 8);
     240      write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length);
     241      write_location_signed_varint(a, loc.lineno - a->a_lineno);
     242      assert(loc.end_lineno >= loc.lineno);
     243      write_location_varint(a, loc.end_lineno - loc.lineno);
     244      write_location_varint(a, loc.col_offset + 1);
     245      write_location_varint(a, loc.end_col_offset + 1);
     246  }
     247  
     248  static void
     249  write_location_info_none(struct assembler* a, int length)
     250  {
     251      write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length);
     252  }
     253  
     254  static void
     255  write_location_info_no_column(struct assembler* a, int length, int line_delta)
     256  {
     257      write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length);
     258      write_location_signed_varint(a, line_delta);
     259  }
     260  
     261  #define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */
     262  
     263  
     264  static int
     265  write_location_info_entry(struct assembler* a, location loc, int isize)
     266  {
     267      Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
     268      if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
     269          assert(len > THEORETICAL_MAX_ENTRY_SIZE);
     270          RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, len*2));
     271      }
     272      if (loc.lineno < 0) {
     273          write_location_info_none(a, isize);
     274          return SUCCESS;
     275      }
     276      int line_delta = loc.lineno - a->a_lineno;
     277      int column = loc.col_offset;
     278      int end_column = loc.end_col_offset;
     279      assert(column >= -1);
     280      assert(end_column >= -1);
     281      if (column < 0 || end_column < 0) {
     282          if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) {
     283              write_location_info_no_column(a, isize, line_delta);
     284              a->a_lineno = loc.lineno;
     285              return SUCCESS;
     286          }
     287      }
     288      else if (loc.end_lineno == loc.lineno) {
     289          if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) {
     290              write_location_info_short_form(a, isize, column, end_column);
     291              return SUCCESS;
     292          }
     293          if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) {
     294              write_location_info_oneline_form(a, isize, line_delta, column, end_column);
     295              a->a_lineno = loc.lineno;
     296              return SUCCESS;
     297          }
     298      }
     299      write_location_info_long_form(a, loc, isize);
     300      a->a_lineno = loc.lineno;
     301      return SUCCESS;
     302  }
     303  
     304  static int
     305  assemble_emit_location(struct assembler* a, location loc, int isize)
     306  {
     307      if (isize == 0) {
     308          return SUCCESS;
     309      }
     310      while (isize > 8) {
     311          RETURN_IF_ERROR(write_location_info_entry(a, loc, 8));
     312          isize -= 8;
     313      }
     314      return write_location_info_entry(a, loc, isize);
     315  }
     316  
     317  static int
     318  assemble_location_info(struct assembler *a, instr_sequence *instrs,
     319                         int firstlineno)
     320  {
     321      a->a_lineno = firstlineno;
     322      location loc = NO_LOCATION;
     323      int size = 0;
     324      for (int i = 0; i < instrs->s_used; i++) {
     325          instruction *instr = &instrs->s_instrs[i];
     326          if (!same_location(loc, instr->i_loc)) {
     327                  RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
     328                  loc = instr->i_loc;
     329                  size = 0;
     330          }
     331          size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
     332      }
     333      RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
     334      return SUCCESS;
     335  }
     336  
     337  static void
     338  write_instr(_Py_CODEUNIT *codestr, instruction *instr, int ilen)
     339  {
     340      int opcode = instr->i_opcode;
     341      assert(!IS_PSEUDO_OPCODE(opcode));
     342      int oparg = instr->i_oparg;
     343      assert(HAS_ARG(opcode) || oparg == 0);
     344      int caches = _PyOpcode_Caches[opcode];
     345      switch (ilen - caches) {
     346          case 4:
     347              codestr->op.code = EXTENDED_ARG;
     348              codestr->op.arg = (oparg >> 24) & 0xFF;
     349              codestr++;
     350              /* fall through */
     351          case 3:
     352              codestr->op.code = EXTENDED_ARG;
     353              codestr->op.arg = (oparg >> 16) & 0xFF;
     354              codestr++;
     355              /* fall through */
     356          case 2:
     357              codestr->op.code = EXTENDED_ARG;
     358              codestr->op.arg = (oparg >> 8) & 0xFF;
     359              codestr++;
     360              /* fall through */
     361          case 1:
     362              codestr->op.code = opcode;
     363              codestr->op.arg = oparg & 0xFF;
     364              codestr++;
     365              break;
     366          default:
     367              Py_UNREACHABLE();
     368      }
     369      while (caches--) {
     370          codestr->op.code = CACHE;
     371          codestr->op.arg = 0;
     372          codestr++;
     373      }
     374  }
     375  
     376  /* assemble_emit_instr()
     377     Extend the bytecode with a new instruction.
     378     Update lnotab if necessary.
     379  */
     380  
     381  static int
     382  assemble_emit_instr(struct assembler *a, instruction *instr)
     383  {
     384      Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
     385      _Py_CODEUNIT *code;
     386  
     387      int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
     388      if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
     389          if (len > PY_SSIZE_T_MAX / 2) {
     390              return ERROR;
     391          }
     392          RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, len * 2));
     393      }
     394      code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
     395      a->a_offset += size;
     396      write_instr(code, instr, size);
     397      return SUCCESS;
     398  }
     399  
     400  static int
     401  assemble_emit(struct assembler *a, instr_sequence *instrs,
     402                int first_lineno, PyObject *const_cache)
     403  {
     404      RETURN_IF_ERROR(assemble_init(a, first_lineno));
     405  
     406      for (int i = 0; i < instrs->s_used; i++) {
     407          instruction *instr = &instrs->s_instrs[i];
     408          RETURN_IF_ERROR(assemble_emit_instr(a, instr));
     409      }
     410  
     411      RETURN_IF_ERROR(assemble_location_info(a, instrs, a->a_lineno));
     412  
     413      RETURN_IF_ERROR(assemble_exception_table(a, instrs));
     414  
     415      RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off));
     416      RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table));
     417  
     418      RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, a->a_location_off));
     419      RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_linetable));
     420  
     421      RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, a->a_offset * sizeof(_Py_CODEUNIT)));
     422      RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_bytecode));
     423      return SUCCESS;
     424  }
     425  
     426  static PyObject *
     427  dict_keys_inorder(PyObject *dict, Py_ssize_t offset)
     428  {
     429      PyObject *tuple, *k, *v;
     430      Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
     431  
     432      tuple = PyTuple_New(size);
     433      if (tuple == NULL)
     434          return NULL;
     435      while (PyDict_Next(dict, &pos, &k, &v)) {
     436          i = PyLong_AS_LONG(v);
     437          assert((i - offset) < size);
     438          assert((i - offset) >= 0);
     439          PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k));
     440      }
     441      return tuple;
     442  }
     443  
     444  // This is in codeobject.c.
     445  extern void _Py_set_localsplus_info(int, PyObject *, unsigned char,
     446                                     PyObject *, PyObject *);
     447  
     448  static void
     449  compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus,
     450                          PyObject *names, PyObject *kinds)
     451  {
     452      PyObject *k, *v;
     453      Py_ssize_t pos = 0;
     454      while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) {
     455          int offset = (int)PyLong_AS_LONG(v);
     456          assert(offset >= 0);
     457          assert(offset < nlocalsplus);
     458          // For now we do not distinguish arg kinds.
     459          _PyLocals_Kind kind = CO_FAST_LOCAL;
     460          if (PyDict_Contains(umd->u_fasthidden, k)) {
     461              kind |= CO_FAST_HIDDEN;
     462          }
     463          if (PyDict_GetItem(umd->u_cellvars, k) != NULL) {
     464              kind |= CO_FAST_CELL;
     465          }
     466          _Py_set_localsplus_info(offset, k, kind, names, kinds);
     467      }
     468      int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
     469  
     470      // This counter mirrors the fix done in fix_cell_offsets().
     471      int numdropped = 0;
     472      pos = 0;
     473      while (PyDict_Next(umd->u_cellvars, &pos, &k, &v)) {
     474          if (PyDict_GetItem(umd->u_varnames, k) != NULL) {
     475              // Skip cells that are already covered by locals.
     476              numdropped += 1;
     477              continue;
     478          }
     479          int offset = (int)PyLong_AS_LONG(v);
     480          assert(offset >= 0);
     481          offset += nlocals - numdropped;
     482          assert(offset < nlocalsplus);
     483          _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds);
     484      }
     485  
     486      pos = 0;
     487      while (PyDict_Next(umd->u_freevars, &pos, &k, &v)) {
     488          int offset = (int)PyLong_AS_LONG(v);
     489          assert(offset >= 0);
     490          offset += nlocals - numdropped;
     491          assert(offset < nlocalsplus);
     492          _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds);
     493      }
     494  }
     495  
     496  static PyCodeObject *
     497  makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_cache,
     498           PyObject *constslist, int maxdepth, int nlocalsplus, int code_flags,
     499           PyObject *filename)
     500  {
     501      PyCodeObject *co = NULL;
     502      PyObject *names = NULL;
     503      PyObject *consts = NULL;
     504      PyObject *localsplusnames = NULL;
     505      PyObject *localspluskinds = NULL;
     506      names = dict_keys_inorder(umd->u_names, 0);
     507      if (!names) {
     508          goto error;
     509      }
     510      if (_PyCompile_ConstCacheMergeOne(const_cache, &names) < 0) {
     511          goto error;
     512      }
     513  
     514      consts = PyList_AsTuple(constslist); /* PyCode_New requires a tuple */
     515      if (consts == NULL) {
     516          goto error;
     517      }
     518      if (_PyCompile_ConstCacheMergeOne(const_cache, &consts) < 0) {
     519          goto error;
     520      }
     521  
     522      assert(umd->u_posonlyargcount < INT_MAX);
     523      assert(umd->u_argcount < INT_MAX);
     524      assert(umd->u_kwonlyargcount < INT_MAX);
     525      int posonlyargcount = (int)umd->u_posonlyargcount;
     526      int posorkwargcount = (int)umd->u_argcount;
     527      assert(INT_MAX - posonlyargcount - posorkwargcount > 0);
     528      int kwonlyargcount = (int)umd->u_kwonlyargcount;
     529  
     530      localsplusnames = PyTuple_New(nlocalsplus);
     531      if (localsplusnames == NULL) {
     532          goto error;
     533      }
     534      localspluskinds = PyBytes_FromStringAndSize(NULL, nlocalsplus);
     535      if (localspluskinds == NULL) {
     536          goto error;
     537      }
     538      compute_localsplus_info(umd, nlocalsplus, localsplusnames, localspluskinds);
     539  
     540      struct _PyCodeConstructor con = {
     541          .filename = filename,
     542          .name = umd->u_name,
     543          .qualname = umd->u_qualname ? umd->u_qualname : umd->u_name,
     544          .flags = code_flags,
     545  
     546          .code = a->a_bytecode,
     547          .firstlineno = umd->u_firstlineno,
     548          .linetable = a->a_linetable,
     549  
     550          .consts = consts,
     551          .names = names,
     552  
     553          .localsplusnames = localsplusnames,
     554          .localspluskinds = localspluskinds,
     555  
     556          .argcount = posonlyargcount + posorkwargcount,
     557          .posonlyargcount = posonlyargcount,
     558          .kwonlyargcount = kwonlyargcount,
     559  
     560          .stacksize = maxdepth,
     561  
     562          .exceptiontable = a->a_except_table,
     563      };
     564  
     565     if (_PyCode_Validate(&con) < 0) {
     566          goto error;
     567      }
     568  
     569      if (_PyCompile_ConstCacheMergeOne(const_cache, &localsplusnames) < 0) {
     570          goto error;
     571      }
     572      con.localsplusnames = localsplusnames;
     573  
     574      co = _PyCode_New(&con);
     575      if (co == NULL) {
     576          goto error;
     577      }
     578  
     579  error:
     580      Py_XDECREF(names);
     581      Py_XDECREF(consts);
     582      Py_XDECREF(localsplusnames);
     583      Py_XDECREF(localspluskinds);
     584      return co;
     585  }
     586  
     587  
     588  PyCodeObject *
     589  _PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
     590                             PyObject *consts, int maxdepth, instr_sequence *instrs,
     591                             int nlocalsplus, int code_flags, PyObject *filename)
     592  {
     593      PyCodeObject *co = NULL;
     594  
     595      struct assembler a;
     596      int res = assemble_emit(&a, instrs, umd->u_firstlineno, const_cache);
     597      if (res == SUCCESS) {
     598          co = makecode(umd, &a, const_cache, consts, maxdepth, nlocalsplus,
     599                        code_flags, filename);
     600      }
     601      assemble_free(&a);
     602      return co;
     603  }