1  /* stringlib: bytes joining implementation */
       2  
       3  #if STRINGLIB_IS_UNICODE
       4  #error join.h only compatible with byte-wise strings
       5  #endif
       6  
       7  Py_LOCAL_INLINE(PyObject *)
       8  STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
       9  {
      10      const char *sepstr = STRINGLIB_STR(sep);
      11      Py_ssize_t seplen = STRINGLIB_LEN(sep);
      12      PyObject *res = NULL;
      13      char *p;
      14      Py_ssize_t seqlen = 0;
      15      Py_ssize_t sz = 0;
      16      Py_ssize_t i, nbufs;
      17      PyObject *seq, *item;
      18      Py_buffer *buffers = NULL;
      19  #define NB_STATIC_BUFFERS 10
      20      Py_buffer static_buffers[NB_STATIC_BUFFERS];
      21  #define GIL_THRESHOLD 1048576
      22      int drop_gil = 1;
      23      PyThreadState *save = NULL;
      24  
      25      seq = PySequence_Fast(iterable, "can only join an iterable");
      26      if (seq == NULL) {
      27          return NULL;
      28      }
      29  
      30      seqlen = PySequence_Fast_GET_SIZE(seq);
      31      if (seqlen == 0) {
      32          Py_DECREF(seq);
      33          return STRINGLIB_NEW(NULL, 0);
      34      }
      35  #if !STRINGLIB_MUTABLE
      36      if (seqlen == 1) {
      37          item = PySequence_Fast_GET_ITEM(seq, 0);
      38          if (STRINGLIB_CHECK_EXACT(item)) {
      39              Py_INCREF(item);
      40              Py_DECREF(seq);
      41              return item;
      42          }
      43      }
      44  #endif
      45      if (seqlen > NB_STATIC_BUFFERS) {
      46          buffers = PyMem_NEW(Py_buffer, seqlen);
      47          if (buffers == NULL) {
      48              Py_DECREF(seq);
      49              PyErr_NoMemory();
      50              return NULL;
      51          }
      52      }
      53      else {
      54          buffers = static_buffers;
      55      }
      56  
      57      /* Here is the general case.  Do a pre-pass to figure out the total
      58       * amount of space we'll need (sz), and see whether all arguments are
      59       * bytes-like.
      60       */
      61      for (i = 0, nbufs = 0; i < seqlen; i++) {
      62          Py_ssize_t itemlen;
      63          item = PySequence_Fast_GET_ITEM(seq, i);
      64          if (PyBytes_CheckExact(item)) {
      65              /* Fast path. */
      66              buffers[i].obj = Py_NewRef(item);
      67              buffers[i].buf = PyBytes_AS_STRING(item);
      68              buffers[i].len = PyBytes_GET_SIZE(item);
      69          }
      70          else {
      71              if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
      72                  PyErr_Format(PyExc_TypeError,
      73                               "sequence item %zd: expected a bytes-like object, "
      74                               "%.80s found",
      75                               i, Py_TYPE(item)->tp_name);
      76                  goto error;
      77              }
      78              /* If the backing objects are mutable, then dropping the GIL
      79               * opens up race conditions where another thread tries to modify
      80               * the object which we hold a buffer on it. Such code has data
      81               * races anyway, but this is a conservative approach that avoids
      82               * changing the behaviour of that data race.
      83               */
      84              drop_gil = 0;
      85          }
      86          nbufs = i + 1;  /* for error cleanup */
      87          itemlen = buffers[i].len;
      88          if (itemlen > PY_SSIZE_T_MAX - sz) {
      89              PyErr_SetString(PyExc_OverflowError,
      90                              "join() result is too long");
      91              goto error;
      92          }
      93          sz += itemlen;
      94          if (i != 0) {
      95              if (seplen > PY_SSIZE_T_MAX - sz) {
      96                  PyErr_SetString(PyExc_OverflowError,
      97                                  "join() result is too long");
      98                  goto error;
      99              }
     100              sz += seplen;
     101          }
     102          if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
     103              PyErr_SetString(PyExc_RuntimeError,
     104                              "sequence changed size during iteration");
     105              goto error;
     106          }
     107      }
     108  
     109      /* Allocate result space. */
     110      res = STRINGLIB_NEW(NULL, sz);
     111      if (res == NULL)
     112          goto error;
     113  
     114      /* Catenate everything. */
     115      p = STRINGLIB_STR(res);
     116      if (sz < GIL_THRESHOLD) {
     117          drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
     118      }
     119      if (drop_gil) {
     120          save = PyEval_SaveThread();
     121      }
     122      if (!seplen) {
     123          /* fast path */
     124          for (i = 0; i < nbufs; i++) {
     125              Py_ssize_t n = buffers[i].len;
     126              char *q = buffers[i].buf;
     127              memcpy(p, q, n);
     128              p += n;
     129          }
     130      }
     131      else {
     132          for (i = 0; i < nbufs; i++) {
     133              Py_ssize_t n;
     134              char *q;
     135              if (i) {
     136                  memcpy(p, sepstr, seplen);
     137                  p += seplen;
     138              }
     139              n = buffers[i].len;
     140              q = buffers[i].buf;
     141              memcpy(p, q, n);
     142              p += n;
     143          }
     144      }
     145      if (drop_gil) {
     146          PyEval_RestoreThread(save);
     147      }
     148      goto done;
     149  
     150  error:
     151      res = NULL;
     152  done:
     153      Py_DECREF(seq);
     154      for (i = 0; i < nbufs; i++)
     155          PyBuffer_Release(&buffers[i]);
     156      if (buffers != static_buffers)
     157          PyMem_Free(buffers);
     158      return res;
     159  }
     160  
     161  #undef NB_STATIC_BUFFERS
     162  #undef GIL_THRESHOLD