(root)/
Python-3.11.7/
Modules/
binascii.c
       1  /*
       2  ** Routines to represent binary data in ASCII and vice-versa
       3  **
       4  ** This module currently supports the following encodings:
       5  ** uuencode:
       6  **      each line encodes 45 bytes (except possibly the last)
       7  **      First char encodes (binary) length, rest data
       8  **      each char encodes 6 bits, as follows:
       9  **      binary: 01234567 abcdefgh ijklmnop
      10  **      ascii:  012345 67abcd efghij klmnop
      11  **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
      12  **      short binary data is zero-extended (so the bits are always in the
      13  **      right place), this does *not* reflect in the length.
      14  ** base64:
      15  **      Line breaks are insignificant, but lines are at most 76 chars
      16  **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
      17  **      is done via a table.
      18  **      Short binary data is filled (in ASCII) with '='.
      19  ** hqx:
      20  **      File starts with introductory text, real data starts and ends
      21  **      with colons.
      22  **      Data consists of three similar parts: info, datafork, resourcefork.
      23  **      Each part is protected (at the end) with a 16-bit crc
      24  **      The binary data is run-length encoded, and then ascii-fied:
      25  **      binary: 01234567 abcdefgh ijklmnop
      26  **      ascii:  012345 67abcd efghij klmnop
      27  **      ASCII encoding is table-driven, see the code.
      28  **      Short binary data results in the runt ascii-byte being output with
      29  **      the bits in the right place.
      30  **
      31  ** While I was reading dozens of programs that encode or decode the formats
      32  ** here (documentation? hihi:-) I have formulated Jansen's Observation:
      33  **
      34  **      Programs that encode binary data in ASCII are written in
      35  **      such a style that they are as unreadable as possible. Devices used
      36  **      include unnecessary global variables, burying important tables
      37  **      in unrelated sourcefiles, putting functions in include files,
      38  **      using seemingly-descriptive variable names for different purposes,
      39  **      calls to empty subroutines and a host of others.
      40  **
      41  ** I have attempted to break with this tradition, but I guess that that
      42  ** does make the performance sub-optimal. Oh well, too bad...
      43  **
      44  ** Jack Jansen, CWI, July 1995.
      45  **
      46  ** Added support for quoted-printable encoding, based on rfc 1521 et al
      47  ** quoted-printable encoding specifies that non printable characters (anything
      48  ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
      49  ** of the character.  It also specifies some other behavior to enable 8bit data
      50  ** in a mail message with little difficulty (maximum line sizes, protecting
      51  ** some cases of whitespace, etc).
      52  **
      53  ** Brandon Long, September 2001.
      54  */
      55  
      56  #ifndef Py_BUILD_CORE_BUILTIN
      57  #  define Py_BUILD_CORE_MODULE 1
      58  #endif
      59  
      60  #define PY_SSIZE_T_CLEAN
      61  
      62  #include "Python.h"
      63  #include "pycore_long.h"          // _PyLong_DigitValue
      64  #include "pycore_strhex.h"        // _Py_strhex_bytes_with_sep()
      65  #ifdef USE_ZLIB_CRC32
      66  #  include "zlib.h"
      67  #endif
      68  
      69  typedef struct binascii_state {
      70      PyObject *Error;
      71      PyObject *Incomplete;
      72  } binascii_state;
      73  
      74  static inline binascii_state *
      75  get_binascii_state(PyObject *module)
      76  {
      77      return (binascii_state *)PyModule_GetState(module);
      78  }
      79  
      80  
      81  static const unsigned char table_a2b_base64[] = {
      82      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      83      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      84      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
      85      52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
      86      -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
      87      15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
      88      -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
      89      41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
      90  
      91      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      92      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      93      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      94      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      95      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      96      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      97      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      98      -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      99  };
     100  
     101  #define BASE64_PAD '='
     102  
     103  /* Max binary chunk size; limited only by available memory */
     104  #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
     105  
     106  static const unsigned char table_b2a_base64[] =
     107  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     108  
     109  
     110  static const unsigned short crctab_hqx[256] = {
     111      0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
     112      0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
     113      0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
     114      0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
     115      0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
     116      0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
     117      0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
     118      0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
     119      0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
     120      0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
     121      0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
     122      0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
     123      0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
     124      0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
     125      0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
     126      0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
     127      0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
     128      0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
     129      0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
     130      0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
     131      0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
     132      0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
     133      0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
     134      0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
     135      0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
     136      0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
     137      0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
     138      0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
     139      0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
     140      0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
     141      0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
     142      0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
     143  };
     144  
     145  /*[clinic input]
     146  module binascii
     147  [clinic start generated code]*/
     148  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
     149  
     150  /*[python input]
     151  
     152  class ascii_buffer_converter(CConverter):
     153      type = 'Py_buffer'
     154      converter = 'ascii_buffer_converter'
     155      impl_by_reference = True
     156      c_default = "{NULL, NULL}"
     157  
     158      def cleanup(self):
     159          name = self.name
     160          return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
     161  
     162  [python start generated code]*/
     163  /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
     164  
     165  static int
     166  ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
     167  {
     168      if (arg == NULL) {
     169          PyBuffer_Release(buf);
     170          return 1;
     171      }
     172      if (PyUnicode_Check(arg)) {
     173          if (PyUnicode_READY(arg) < 0)
     174              return 0;
     175          if (!PyUnicode_IS_ASCII(arg)) {
     176              PyErr_SetString(PyExc_ValueError,
     177                              "string argument should contain only ASCII characters");
     178              return 0;
     179          }
     180          assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
     181          buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
     182          buf->len = PyUnicode_GET_LENGTH(arg);
     183          buf->obj = NULL;
     184          return 1;
     185      }
     186      if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
     187          PyErr_Format(PyExc_TypeError,
     188                       "argument should be bytes, buffer or ASCII string, "
     189                       "not '%.100s'", Py_TYPE(arg)->tp_name);
     190          return 0;
     191      }
     192      if (!PyBuffer_IsContiguous(buf, 'C')) {
     193          PyErr_Format(PyExc_TypeError,
     194                       "argument should be a contiguous buffer, "
     195                       "not '%.100s'", Py_TYPE(arg)->tp_name);
     196          PyBuffer_Release(buf);
     197          return 0;
     198      }
     199      return Py_CLEANUP_SUPPORTED;
     200  }
     201  
     202  #include "clinic/binascii.c.h"
     203  
     204  /*[clinic input]
     205  binascii.a2b_uu
     206  
     207      data: ascii_buffer
     208      /
     209  
     210  Decode a line of uuencoded data.
     211  [clinic start generated code]*/
     212  
     213  static PyObject *
     214  binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
     215  /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
     216  {
     217      const unsigned char *ascii_data;
     218      unsigned char *bin_data;
     219      int leftbits = 0;
     220      unsigned char this_ch;
     221      unsigned int leftchar = 0;
     222      PyObject *rv;
     223      Py_ssize_t ascii_len, bin_len;
     224      binascii_state *state;
     225  
     226      ascii_data = data->buf;
     227      ascii_len = data->len;
     228  
     229      assert(ascii_len >= 0);
     230  
     231      /* First byte: binary data length (in bytes) */
     232      bin_len = (*ascii_data++ - ' ') & 077;
     233      ascii_len--;
     234  
     235      /* Allocate the buffer */
     236      if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
     237          return NULL;
     238      bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
     239  
     240      for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
     241          /* XXX is it really best to add NULs if there's no more data */
     242          this_ch = (ascii_len > 0) ? *ascii_data : 0;
     243          if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
     244              /*
     245              ** Whitespace. Assume some spaces got eaten at
     246              ** end-of-line. (We check this later)
     247              */
     248              this_ch = 0;
     249          } else {
     250              /* Check the character for legality
     251              ** The 64 in stead of the expected 63 is because
     252              ** there are a few uuencodes out there that use
     253              ** '`' as zero instead of space.
     254              */
     255              if ( this_ch < ' ' || this_ch > (' ' + 64)) {
     256                  state = get_binascii_state(module);
     257                  if (state == NULL) {
     258                      return NULL;
     259                  }
     260                  PyErr_SetString(state->Error, "Illegal char");
     261                  Py_DECREF(rv);
     262                  return NULL;
     263              }
     264              this_ch = (this_ch - ' ') & 077;
     265          }
     266          /*
     267          ** Shift it in on the low end, and see if there's
     268          ** a byte ready for output.
     269          */
     270          leftchar = (leftchar << 6) | (this_ch);
     271          leftbits += 6;
     272          if ( leftbits >= 8 ) {
     273              leftbits -= 8;
     274              *bin_data++ = (leftchar >> leftbits) & 0xff;
     275              leftchar &= ((1 << leftbits) - 1);
     276              bin_len--;
     277          }
     278      }
     279      /*
     280      ** Finally, check that if there's anything left on the line
     281      ** that it's whitespace only.
     282      */
     283      while( ascii_len-- > 0 ) {
     284          this_ch = *ascii_data++;
     285          /* Extra '`' may be written as padding in some cases */
     286          if ( this_ch != ' ' && this_ch != ' '+64 &&
     287               this_ch != '\n' && this_ch != '\r' ) {
     288              state = get_binascii_state(module);
     289              if (state == NULL) {
     290                  return NULL;
     291              }
     292              PyErr_SetString(state->Error, "Trailing garbage");
     293              Py_DECREF(rv);
     294              return NULL;
     295          }
     296      }
     297      return rv;
     298  }
     299  
     300  /*[clinic input]
     301  binascii.b2a_uu
     302  
     303      data: Py_buffer
     304      /
     305      *
     306      backtick: bool(accept={int}) = False
     307  
     308  Uuencode line of data.
     309  [clinic start generated code]*/
     310  
     311  static PyObject *
     312  binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
     313  /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
     314  {
     315      unsigned char *ascii_data;
     316      const unsigned char *bin_data;
     317      int leftbits = 0;
     318      unsigned char this_ch;
     319      unsigned int leftchar = 0;
     320      binascii_state *state;
     321      Py_ssize_t bin_len, out_len;
     322      _PyBytesWriter writer;
     323  
     324      _PyBytesWriter_Init(&writer);
     325      bin_data = data->buf;
     326      bin_len = data->len;
     327      if ( bin_len > 45 ) {
     328          /* The 45 is a limit that appears in all uuencode's */
     329          state = get_binascii_state(module);
     330          if (state == NULL) {
     331              return NULL;
     332          }
     333          PyErr_SetString(state->Error, "At most 45 bytes at once");
     334          return NULL;
     335      }
     336  
     337      /* We're lazy and allocate to much (fixed up later) */
     338      out_len = 2 + (bin_len + 2) / 3 * 4;
     339      ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
     340      if (ascii_data == NULL)
     341          return NULL;
     342  
     343      /* Store the length */
     344      if (backtick && !bin_len)
     345          *ascii_data++ = '`';
     346      else
     347          *ascii_data++ = ' ' + (unsigned char)bin_len;
     348  
     349      for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
     350          /* Shift the data (or padding) into our buffer */
     351          if ( bin_len > 0 )              /* Data */
     352              leftchar = (leftchar << 8) | *bin_data;
     353          else                            /* Padding */
     354              leftchar <<= 8;
     355          leftbits += 8;
     356  
     357          /* See if there are 6-bit groups ready */
     358          while ( leftbits >= 6 ) {
     359              this_ch = (leftchar >> (leftbits-6)) & 0x3f;
     360              leftbits -= 6;
     361              if (backtick && !this_ch)
     362                  *ascii_data++ = '`';
     363              else
     364                  *ascii_data++ = this_ch + ' ';
     365          }
     366      }
     367      *ascii_data++ = '\n';       /* Append a courtesy newline */
     368  
     369      return _PyBytesWriter_Finish(&writer, ascii_data);
     370  }
     371  
     372  /*[clinic input]
     373  binascii.a2b_base64
     374  
     375      data: ascii_buffer
     376      /
     377      *
     378      strict_mode: bool(accept={int}) = False
     379  
     380  Decode a line of base64 data.
     381  
     382    strict_mode
     383      When set to True, bytes that are not part of the base64 standard are not allowed.
     384      The same applies to excess data after padding (= / ==).
     385  [clinic start generated code]*/
     386  
     387  static PyObject *
     388  binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
     389  /*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
     390  {
     391      assert(data->len >= 0);
     392  
     393      const unsigned char *ascii_data = data->buf;
     394      size_t ascii_len = data->len;
     395      binascii_state *state = NULL;
     396      char padding_started = 0;
     397  
     398      /* Allocate the buffer */
     399      Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
     400      _PyBytesWriter writer;
     401      _PyBytesWriter_Init(&writer);
     402      unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
     403      if (bin_data == NULL)
     404          return NULL;
     405      unsigned char *bin_data_start = bin_data;
     406  
     407      if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
     408          state = get_binascii_state(module);
     409          if (state) {
     410              PyErr_SetString(state->Error, "Leading padding not allowed");
     411          }
     412          goto error_end;
     413      }
     414  
     415      int quad_pos = 0;
     416      unsigned char leftchar = 0;
     417      int pads = 0;
     418      for (size_t i = 0; i < ascii_len; i++) {
     419          unsigned char this_ch = ascii_data[i];
     420  
     421          /* Check for pad sequences and ignore
     422          ** the invalid ones.
     423          */
     424          if (this_ch == BASE64_PAD) {
     425              padding_started = 1;
     426  
     427              if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
     428                  /* A pad sequence means we should not parse more input.
     429                  ** We've already interpreted the data from the quad at this point.
     430                  ** in strict mode, an error should raise if there's excess data after the padding.
     431                  */
     432                  if (strict_mode && i + 1 < ascii_len) {
     433                      state = get_binascii_state(module);
     434                      if (state) {
     435                          PyErr_SetString(state->Error, "Excess data after padding");
     436                      }
     437                      goto error_end;
     438                  }
     439  
     440                  goto done;
     441              }
     442              continue;
     443          }
     444  
     445          this_ch = table_a2b_base64[this_ch];
     446          if (this_ch >= 64) {
     447              if (strict_mode) {
     448                  state = get_binascii_state(module);
     449                  if (state) {
     450                      PyErr_SetString(state->Error, "Only base64 data is allowed");
     451                  }
     452                  goto error_end;
     453              }
     454              continue;
     455          }
     456  
     457          // Characters that are not '=', in the middle of the padding, are not allowed
     458          if (strict_mode && padding_started) {
     459              state = get_binascii_state(module);
     460              if (state) {
     461                  PyErr_SetString(state->Error, "Discontinuous padding not allowed");
     462              }
     463              goto error_end;
     464          }
     465          pads = 0;
     466  
     467          switch (quad_pos) {
     468              case 0:
     469                  quad_pos = 1;
     470                  leftchar = this_ch;
     471                  break;
     472              case 1:
     473                  quad_pos = 2;
     474                  *bin_data++ = (leftchar << 2) | (this_ch >> 4);
     475                  leftchar = this_ch & 0x0f;
     476                  break;
     477              case 2:
     478                  quad_pos = 3;
     479                  *bin_data++ = (leftchar << 4) | (this_ch >> 2);
     480                  leftchar = this_ch & 0x03;
     481                  break;
     482              case 3:
     483                  quad_pos = 0;
     484                  *bin_data++ = (leftchar << 6) | (this_ch);
     485                  leftchar = 0;
     486                  break;
     487          }
     488      }
     489  
     490      if (quad_pos != 0) {
     491          state = get_binascii_state(module);
     492          if (state == NULL) {
     493              /* error already set, from get_binascii_state */
     494          } else if (quad_pos == 1) {
     495              /*
     496              ** There is exactly one extra valid, non-padding, base64 character.
     497              ** This is an invalid length, as there is no possible input that
     498              ** could encoded into such a base64 string.
     499              */
     500              PyErr_Format(state->Error,
     501                           "Invalid base64-encoded string: "
     502                           "number of data characters (%zd) cannot be 1 more "
     503                           "than a multiple of 4",
     504                           (bin_data - bin_data_start) / 3 * 4 + 1);
     505          } else {
     506              PyErr_SetString(state->Error, "Incorrect padding");
     507          }
     508          error_end:
     509          _PyBytesWriter_Dealloc(&writer);
     510          return NULL;
     511      }
     512  
     513  done:
     514      return _PyBytesWriter_Finish(&writer, bin_data);
     515  }
     516  
     517  
     518  /*[clinic input]
     519  binascii.b2a_base64
     520  
     521      data: Py_buffer
     522      /
     523      *
     524      newline: bool(accept={int}) = True
     525  
     526  Base64-code line of data.
     527  [clinic start generated code]*/
     528  
     529  static PyObject *
     530  binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
     531  /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
     532  {
     533      unsigned char *ascii_data;
     534      const unsigned char *bin_data;
     535      int leftbits = 0;
     536      unsigned char this_ch;
     537      unsigned int leftchar = 0;
     538      Py_ssize_t bin_len, out_len;
     539      _PyBytesWriter writer;
     540      binascii_state *state;
     541  
     542      bin_data = data->buf;
     543      bin_len = data->len;
     544      _PyBytesWriter_Init(&writer);
     545  
     546      assert(bin_len >= 0);
     547  
     548      if ( bin_len > BASE64_MAXBIN ) {
     549          state = get_binascii_state(module);
     550          if (state == NULL) {
     551              return NULL;
     552          }
     553          PyErr_SetString(state->Error, "Too much data for base64 line");
     554          return NULL;
     555      }
     556  
     557      /* We're lazy and allocate too much (fixed up later).
     558         "+2" leaves room for up to two pad characters.
     559         Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
     560      out_len = bin_len*2 + 2;
     561      if (newline)
     562          out_len++;
     563      ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
     564      if (ascii_data == NULL)
     565          return NULL;
     566  
     567      for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
     568          /* Shift the data into our buffer */
     569          leftchar = (leftchar << 8) | *bin_data;
     570          leftbits += 8;
     571  
     572          /* See if there are 6-bit groups ready */
     573          while ( leftbits >= 6 ) {
     574              this_ch = (leftchar >> (leftbits-6)) & 0x3f;
     575              leftbits -= 6;
     576              *ascii_data++ = table_b2a_base64[this_ch];
     577          }
     578      }
     579      if ( leftbits == 2 ) {
     580          *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
     581          *ascii_data++ = BASE64_PAD;
     582          *ascii_data++ = BASE64_PAD;
     583      } else if ( leftbits == 4 ) {
     584          *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
     585          *ascii_data++ = BASE64_PAD;
     586      }
     587      if (newline)
     588          *ascii_data++ = '\n';       /* Append a courtesy newline */
     589  
     590      return _PyBytesWriter_Finish(&writer, ascii_data);
     591  }
     592  
     593  
     594  /*[clinic input]
     595  binascii.crc_hqx
     596  
     597      data: Py_buffer
     598      crc: unsigned_int(bitwise=True)
     599      /
     600  
     601  Compute CRC-CCITT incrementally.
     602  [clinic start generated code]*/
     603  
     604  static PyObject *
     605  binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
     606  /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
     607  {
     608      const unsigned char *bin_data;
     609      Py_ssize_t len;
     610  
     611      crc &= 0xffff;
     612      bin_data = data->buf;
     613      len = data->len;
     614  
     615      while(len-- > 0) {
     616          crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
     617      }
     618  
     619      return PyLong_FromUnsignedLong(crc);
     620  }
     621  
     622  #ifndef USE_ZLIB_CRC32
     623  /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
     624      Also known as: ISO 3307
     625  **********************************************************************|
     626  *                                                                    *|
     627  * Demonstration program to compute the 32-bit CRC used as the frame  *|
     628  * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
     629  * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
     630  * protocol).  The 32-bit FCS was added via the Federal Register,     *|
     631  * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
     632  * this polynomial is or will be included in CCITT V.41, which        *|
     633  * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
     634  * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
     635  * errors by a factor of 10^-5 over 16-bit FCS.                       *|
     636  *                                                                    *|
     637  **********************************************************************|
     638  
     639   Copyright (C) 1986 Gary S. Brown.  You may use this program, or
     640   code or tables extracted from it, as desired without restriction.
     641  
     642   First, the polynomial itself and its table of feedback terms.  The
     643   polynomial is
     644   X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
     645   Note that we take it "backwards" and put the highest-order term in
     646   the lowest-order bit.  The X^32 term is "implied"; the LSB is the
     647   X^31 term, etc.  The X^0 term (usually shown as "+1") results in
     648   the MSB being 1.
     649  
     650   Note that the usual hardware shift register implementation, which
     651   is what we're using (we're merely optimizing it by doing eight-bit
     652   chunks at a time) shifts bits into the lowest-order term.  In our
     653   implementation, that means shifting towards the right.  Why do we
     654   do it this way?  Because the calculated CRC must be transmitted in
     655   order from highest-order term to lowest-order term.  UARTs transmit
     656   characters in order from LSB to MSB.  By storing the CRC this way,
     657   we hand it to the UART in the order low-byte to high-byte; the UART
     658   sends each low-bit to hight-bit; and the result is transmission bit
     659   by bit from highest- to lowest-order term without requiring any bit
     660   shuffling on our part.  Reception works similarly.
     661  
     662   The feedback terms table consists of 256, 32-bit entries.  Notes:
     663  
     664    1. The table can be generated at runtime if desired; code to do so
     665       is shown later.  It might not be obvious, but the feedback
     666       terms simply represent the results of eight shift/xor opera-
     667       tions for all combinations of data and CRC register values.
     668  
     669    2. The CRC accumulation logic is the same for all CRC polynomials,
     670       be they sixteen or thirty-two bits wide.  You simply choose the
     671       appropriate table.  Alternatively, because the table can be
     672       generated at runtime, you can start by generating the table for
     673       the polynomial in question and use exactly the same "updcrc",
     674       if your application needn't simultaneously handle two CRC
     675       polynomials.  (Note, however, that XMODEM is strange.)
     676  
     677    3. For 16-bit CRCs, the table entries need be only 16 bits wide;
     678       of course, 32-bit entries work OK if the high 16 bits are zero.
     679  
     680    4. The values must be right-shifted by eight bits by the "updcrc"
     681       logic; the shift must be unsigned (bring in zeroes).  On some
     682       hardware you could probably optimize the shift in assembler by
     683       using byte-swap instructions.
     684  ********************************************************************/
     685  
     686  static const unsigned int crc_32_tab[256] = {
     687  0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
     688  0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
     689  0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
     690  0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
     691  0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
     692  0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
     693  0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
     694  0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
     695  0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
     696  0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
     697  0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
     698  0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
     699  0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
     700  0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
     701  0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
     702  0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
     703  0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
     704  0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
     705  0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
     706  0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
     707  0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
     708  0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
     709  0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
     710  0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
     711  0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
     712  0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
     713  0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
     714  0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
     715  0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
     716  0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
     717  0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
     718  0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
     719  0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
     720  0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
     721  0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
     722  0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
     723  0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
     724  0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
     725  0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
     726  0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
     727  0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
     728  0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
     729  0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
     730  0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
     731  0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
     732  0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
     733  0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
     734  0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
     735  0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
     736  0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
     737  0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
     738  0x2d02ef8dU
     739  };
     740  
     741  static unsigned int
     742  internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
     743  { /* By Jim Ahlstrom; All rights transferred to CNRI */
     744      unsigned int result;
     745  
     746      crc = ~ crc;
     747      while (len-- > 0) {
     748          crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
     749          /* Note:  (crc >> 8) MUST zero fill on left */
     750      }
     751  
     752      result = (crc ^ 0xFFFFFFFF);
     753      return result & 0xffffffff;
     754  }
     755  #endif  /* USE_ZLIB_CRC32 */
     756  
     757  /*[clinic input]
     758  binascii.crc32 -> unsigned_int
     759  
     760      data: Py_buffer
     761      crc: unsigned_int(bitwise=True) = 0
     762      /
     763  
     764  Compute CRC-32 incrementally.
     765  [clinic start generated code]*/
     766  
     767  static unsigned int
     768  binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
     769  /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
     770  
     771  #ifdef USE_ZLIB_CRC32
     772  /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
     773   * modules for historical reasons. */
     774  {
     775      /* Releasing the GIL for very small buffers is inefficient
     776         and may lower performance */
     777      if (data->len > 1024*5) {
     778          unsigned char *buf = data->buf;
     779          Py_ssize_t len = data->len;
     780  
     781          Py_BEGIN_ALLOW_THREADS
     782          /* Avoid truncation of length for very large buffers. crc32() takes
     783             length as an unsigned int, which may be narrower than Py_ssize_t. */
     784          while ((size_t)len > UINT_MAX) {
     785              crc = crc32(crc, buf, UINT_MAX);
     786              buf += (size_t) UINT_MAX;
     787              len -= (size_t) UINT_MAX;
     788          }
     789          crc = crc32(crc, buf, (unsigned int)len);
     790          Py_END_ALLOW_THREADS
     791      } else {
     792          crc = crc32(crc, data->buf, (unsigned int)data->len);
     793      }
     794      return crc & 0xffffffff;
     795  }
     796  #else  /* USE_ZLIB_CRC32 */
     797  {
     798      const unsigned char *bin_data = data->buf;
     799      Py_ssize_t len = data->len;
     800  
     801      /* Releasing the GIL for very small buffers is inefficient
     802         and may lower performance */
     803      if (len > 1024*5) {
     804          unsigned int result;
     805          Py_BEGIN_ALLOW_THREADS
     806          result = internal_crc32(bin_data, len, crc);
     807          Py_END_ALLOW_THREADS
     808          return result;
     809      } else {
     810          return internal_crc32(bin_data, len, crc);
     811      }
     812  }
     813  #endif  /* USE_ZLIB_CRC32 */
     814  
     815  /*[clinic input]
     816  binascii.b2a_hex
     817  
     818      data: Py_buffer
     819      sep: object = NULL
     820          An optional single character or byte to separate hex bytes.
     821      bytes_per_sep: int = 1
     822          How many bytes between separators.  Positive values count from the
     823          right, negative values count from the left.
     824  
     825  Hexadecimal representation of binary data.
     826  
     827  The return value is a bytes object.  This function is also
     828  available as "hexlify()".
     829  
     830  Example:
     831  >>> binascii.b2a_hex(b'\xb9\x01\xef')
     832  b'b901ef'
     833  >>> binascii.hexlify(b'\xb9\x01\xef', ':')
     834  b'b9:01:ef'
     835  >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
     836  b'b9_01ef'
     837  [clinic start generated code]*/
     838  
     839  static PyObject *
     840  binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
     841                        int bytes_per_sep)
     842  /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
     843  {
     844      return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
     845                                       sep, bytes_per_sep);
     846  }
     847  
     848  /*[clinic input]
     849  binascii.hexlify = binascii.b2a_hex
     850  
     851  Hexadecimal representation of binary data.
     852  
     853  The return value is a bytes object.  This function is also
     854  available as "b2a_hex()".
     855  [clinic start generated code]*/
     856  
     857  static PyObject *
     858  binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
     859                        int bytes_per_sep)
     860  /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
     861  {
     862      return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
     863                                       sep, bytes_per_sep);
     864  }
     865  
     866  /*[clinic input]
     867  binascii.a2b_hex
     868  
     869      hexstr: ascii_buffer
     870      /
     871  
     872  Binary data of hexadecimal representation.
     873  
     874  hexstr must contain an even number of hex digits (upper or lower case).
     875  This function is also available as "unhexlify()".
     876  [clinic start generated code]*/
     877  
     878  static PyObject *
     879  binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
     880  /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
     881  {
     882      const char* argbuf;
     883      Py_ssize_t arglen;
     884      PyObject *retval;
     885      char* retbuf;
     886      Py_ssize_t i, j;
     887      binascii_state *state;
     888  
     889      argbuf = hexstr->buf;
     890      arglen = hexstr->len;
     891  
     892      assert(arglen >= 0);
     893  
     894      /* XXX What should we do about strings with an odd length?  Should
     895       * we add an implicit leading zero, or a trailing zero?  For now,
     896       * raise an exception.
     897       */
     898      if (arglen % 2) {
     899          state = get_binascii_state(module);
     900          if (state == NULL) {
     901              return NULL;
     902          }
     903          PyErr_SetString(state->Error, "Odd-length string");
     904          return NULL;
     905      }
     906  
     907      retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
     908      if (!retval)
     909          return NULL;
     910      retbuf = PyBytes_AS_STRING(retval);
     911  
     912      for (i=j=0; i < arglen; i += 2) {
     913          unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
     914          unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
     915          if (top >= 16 || bot >= 16) {
     916              state = get_binascii_state(module);
     917              if (state == NULL) {
     918                  return NULL;
     919              }
     920              PyErr_SetString(state->Error,
     921                              "Non-hexadecimal digit found");
     922              goto finally;
     923          }
     924          retbuf[j++] = (top << 4) + bot;
     925      }
     926      return retval;
     927  
     928    finally:
     929      Py_DECREF(retval);
     930      return NULL;
     931  }
     932  
     933  /*[clinic input]
     934  binascii.unhexlify = binascii.a2b_hex
     935  
     936  Binary data of hexadecimal representation.
     937  
     938  hexstr must contain an even number of hex digits (upper or lower case).
     939  [clinic start generated code]*/
     940  
     941  static PyObject *
     942  binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
     943  /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
     944  {
     945      return binascii_a2b_hex_impl(module, hexstr);
     946  }
     947  
     948  #define MAXLINESIZE 76
     949  
     950  
     951  /*[clinic input]
     952  binascii.a2b_qp
     953  
     954      data: ascii_buffer
     955      header: bool(accept={int}) = False
     956  
     957  Decode a string of qp-encoded data.
     958  [clinic start generated code]*/
     959  
     960  static PyObject *
     961  binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
     962  /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
     963  {
     964      Py_ssize_t in, out;
     965      char ch;
     966      const unsigned char *ascii_data;
     967      unsigned char *odata;
     968      Py_ssize_t datalen = 0;
     969      PyObject *rv;
     970  
     971      ascii_data = data->buf;
     972      datalen = data->len;
     973  
     974      /* We allocate the output same size as input, this is overkill.
     975       */
     976      odata = (unsigned char *) PyMem_Calloc(1, datalen);
     977      if (odata == NULL) {
     978          PyErr_NoMemory();
     979          return NULL;
     980      }
     981  
     982      in = out = 0;
     983      while (in < datalen) {
     984          if (ascii_data[in] == '=') {
     985              in++;
     986              if (in >= datalen) break;
     987              /* Soft line breaks */
     988              if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
     989                  if (ascii_data[in] != '\n') {
     990                      while (in < datalen && ascii_data[in] != '\n') in++;
     991                  }
     992                  if (in < datalen) in++;
     993              }
     994              else if (ascii_data[in] == '=') {
     995                  /* broken case from broken python qp */
     996                  odata[out++] = '=';
     997                  in++;
     998              }
     999              else if ((in + 1 < datalen) &&
    1000                       ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
    1001                        (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
    1002                        (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
    1003                       ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
    1004                        (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
    1005                        (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
    1006                  /* hexval */
    1007                  ch = _PyLong_DigitValue[ascii_data[in]] << 4;
    1008                  in++;
    1009                  ch |= _PyLong_DigitValue[ascii_data[in]];
    1010                  in++;
    1011                  odata[out++] = ch;
    1012              }
    1013              else {
    1014                odata[out++] = '=';
    1015              }
    1016          }
    1017          else if (header && ascii_data[in] == '_') {
    1018              odata[out++] = ' ';
    1019              in++;
    1020          }
    1021          else {
    1022              odata[out] = ascii_data[in];
    1023              in++;
    1024              out++;
    1025          }
    1026      }
    1027      if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
    1028          PyMem_Free(odata);
    1029          return NULL;
    1030      }
    1031      PyMem_Free(odata);
    1032      return rv;
    1033  }
    1034  
    1035  static int
    1036  to_hex (unsigned char ch, unsigned char *s)
    1037  {
    1038      unsigned int uvalue = ch;
    1039  
    1040      s[1] = "0123456789ABCDEF"[uvalue % 16];
    1041      uvalue = (uvalue / 16);
    1042      s[0] = "0123456789ABCDEF"[uvalue % 16];
    1043      return 0;
    1044  }
    1045  
    1046  /* XXX: This is ridiculously complicated to be backward compatible
    1047   * (mostly) with the quopri module.  It doesn't re-create the quopri
    1048   * module bug where text ending in CRLF has the CR encoded */
    1049  
    1050  /*[clinic input]
    1051  binascii.b2a_qp
    1052  
    1053      data: Py_buffer
    1054      quotetabs: bool(accept={int}) = False
    1055      istext: bool(accept={int}) = True
    1056      header: bool(accept={int}) = False
    1057  
    1058  Encode a string using quoted-printable encoding.
    1059  
    1060  On encoding, when istext is set, newlines are not encoded, and white
    1061  space at end of lines is.  When istext is not set, \r and \n (CR/LF)
    1062  are both encoded.  When quotetabs is set, space and tabs are encoded.
    1063  [clinic start generated code]*/
    1064  
    1065  static PyObject *
    1066  binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
    1067                       int istext, int header)
    1068  /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
    1069  {
    1070      Py_ssize_t in, out;
    1071      const unsigned char *databuf;
    1072      unsigned char *odata;
    1073      Py_ssize_t datalen = 0, odatalen = 0;
    1074      PyObject *rv;
    1075      unsigned int linelen = 0;
    1076      unsigned char ch;
    1077      int crlf = 0;
    1078      const unsigned char *p;
    1079  
    1080      databuf = data->buf;
    1081      datalen = data->len;
    1082  
    1083      /* See if this string is using CRLF line ends */
    1084      /* XXX: this function has the side effect of converting all of
    1085       * the end of lines to be the same depending on this detection
    1086       * here */
    1087      p = (const unsigned char *) memchr(databuf, '\n', datalen);
    1088      if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
    1089          crlf = 1;
    1090  
    1091      /* First, scan to see how many characters need to be encoded */
    1092      in = 0;
    1093      while (in < datalen) {
    1094          Py_ssize_t delta = 0;
    1095          if ((databuf[in] > 126) ||
    1096              (databuf[in] == '=') ||
    1097              (header && databuf[in] == '_') ||
    1098              ((databuf[in] == '.') && (linelen == 0) &&
    1099               (in + 1 == datalen || databuf[in+1] == '\n' ||
    1100                databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
    1101              (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
    1102              ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
    1103              ((databuf[in] < 33) &&
    1104               (databuf[in] != '\r') && (databuf[in] != '\n') &&
    1105               (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
    1106          {
    1107              if ((linelen + 3) >= MAXLINESIZE) {
    1108                  linelen = 0;
    1109                  if (crlf)
    1110                      delta += 3;
    1111                  else
    1112                      delta += 2;
    1113              }
    1114              linelen += 3;
    1115              delta += 3;
    1116              in++;
    1117          }
    1118          else {
    1119              if (istext &&
    1120                  ((databuf[in] == '\n') ||
    1121                   ((in+1 < datalen) && (databuf[in] == '\r') &&
    1122                   (databuf[in+1] == '\n'))))
    1123              {
    1124                  linelen = 0;
    1125                  /* Protect against whitespace on end of line */
    1126                  if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
    1127                      delta += 2;
    1128                  if (crlf)
    1129                      delta += 2;
    1130                  else
    1131                      delta += 1;
    1132                  if (databuf[in] == '\r')
    1133                      in += 2;
    1134                  else
    1135                      in++;
    1136              }
    1137              else {
    1138                  if ((in + 1 != datalen) &&
    1139                      (databuf[in+1] != '\n') &&
    1140                      (linelen + 1) >= MAXLINESIZE) {
    1141                      linelen = 0;
    1142                      if (crlf)
    1143                          delta += 3;
    1144                      else
    1145                          delta += 2;
    1146                  }
    1147                  linelen++;
    1148                  delta++;
    1149                  in++;
    1150              }
    1151          }
    1152          if (PY_SSIZE_T_MAX - delta < odatalen) {
    1153              PyErr_NoMemory();
    1154              return NULL;
    1155          }
    1156          odatalen += delta;
    1157      }
    1158  
    1159      /* We allocate the output same size as input, this is overkill.
    1160       */
    1161      odata = (unsigned char *) PyMem_Calloc(1, odatalen);
    1162      if (odata == NULL) {
    1163          PyErr_NoMemory();
    1164          return NULL;
    1165      }
    1166  
    1167      in = out = linelen = 0;
    1168      while (in < datalen) {
    1169          if ((databuf[in] > 126) ||
    1170              (databuf[in] == '=') ||
    1171              (header && databuf[in] == '_') ||
    1172              ((databuf[in] == '.') && (linelen == 0) &&
    1173               (in + 1 == datalen || databuf[in+1] == '\n' ||
    1174                databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
    1175              (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
    1176              ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
    1177              ((databuf[in] < 33) &&
    1178               (databuf[in] != '\r') && (databuf[in] != '\n') &&
    1179               (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
    1180          {
    1181              if ((linelen + 3 )>= MAXLINESIZE) {
    1182                  odata[out++] = '=';
    1183                  if (crlf) odata[out++] = '\r';
    1184                  odata[out++] = '\n';
    1185                  linelen = 0;
    1186              }
    1187              odata[out++] = '=';
    1188              to_hex(databuf[in], &odata[out]);
    1189              out += 2;
    1190              in++;
    1191              linelen += 3;
    1192          }
    1193          else {
    1194              if (istext &&
    1195                  ((databuf[in] == '\n') ||
    1196                   ((in+1 < datalen) && (databuf[in] == '\r') &&
    1197                   (databuf[in+1] == '\n'))))
    1198              {
    1199                  linelen = 0;
    1200                  /* Protect against whitespace on end of line */
    1201                  if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
    1202                      ch = odata[out-1];
    1203                      odata[out-1] = '=';
    1204                      to_hex(ch, &odata[out]);
    1205                      out += 2;
    1206                  }
    1207  
    1208                  if (crlf) odata[out++] = '\r';
    1209                  odata[out++] = '\n';
    1210                  if (databuf[in] == '\r')
    1211                      in += 2;
    1212                  else
    1213                      in++;
    1214              }
    1215              else {
    1216                  if ((in + 1 != datalen) &&
    1217                      (databuf[in+1] != '\n') &&
    1218                      (linelen + 1) >= MAXLINESIZE) {
    1219                      odata[out++] = '=';
    1220                      if (crlf) odata[out++] = '\r';
    1221                      odata[out++] = '\n';
    1222                      linelen = 0;
    1223                  }
    1224                  linelen++;
    1225                  if (header && databuf[in] == ' ') {
    1226                      odata[out++] = '_';
    1227                      in++;
    1228                  }
    1229                  else {
    1230                      odata[out++] = databuf[in++];
    1231                  }
    1232              }
    1233          }
    1234      }
    1235      if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
    1236          PyMem_Free(odata);
    1237          return NULL;
    1238      }
    1239      PyMem_Free(odata);
    1240      return rv;
    1241  }
    1242  
    1243  /* List of functions defined in the module */
    1244  
    1245  static struct PyMethodDef binascii_module_methods[] = {
    1246      BINASCII_A2B_UU_METHODDEF
    1247      BINASCII_B2A_UU_METHODDEF
    1248      BINASCII_A2B_BASE64_METHODDEF
    1249      BINASCII_B2A_BASE64_METHODDEF
    1250      BINASCII_A2B_HEX_METHODDEF
    1251      BINASCII_B2A_HEX_METHODDEF
    1252      BINASCII_HEXLIFY_METHODDEF
    1253      BINASCII_UNHEXLIFY_METHODDEF
    1254      BINASCII_CRC_HQX_METHODDEF
    1255      BINASCII_CRC32_METHODDEF
    1256      BINASCII_A2B_QP_METHODDEF
    1257      BINASCII_B2A_QP_METHODDEF
    1258      {NULL, NULL}                             /* sentinel */
    1259  };
    1260  
    1261  
    1262  /* Initialization function for the module (*must* be called PyInit_binascii) */
    1263  PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
    1264  
    1265  static int
    1266  binascii_exec(PyObject *module) {
    1267      int result;
    1268      binascii_state *state = PyModule_GetState(module);
    1269      if (state == NULL) {
    1270          return -1;
    1271      }
    1272  
    1273      state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
    1274      if (state->Error == NULL) {
    1275          return -1;
    1276      }
    1277      Py_INCREF(state->Error);
    1278      result = PyModule_AddObject(module, "Error", state->Error);
    1279      if (result == -1) {
    1280          Py_DECREF(state->Error);
    1281          return -1;
    1282      }
    1283  
    1284      state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
    1285      if (state->Incomplete == NULL) {
    1286          return -1;
    1287      }
    1288      Py_INCREF(state->Incomplete);
    1289      result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
    1290      if (result == -1) {
    1291          Py_DECREF(state->Incomplete);
    1292          return -1;
    1293      }
    1294  
    1295      return 0;
    1296  }
    1297  
    1298  static PyModuleDef_Slot binascii_slots[] = {
    1299      {Py_mod_exec, binascii_exec},
    1300      {0, NULL}
    1301  };
    1302  
    1303  static int
    1304  binascii_traverse(PyObject *module, visitproc visit, void *arg)
    1305  {
    1306      binascii_state *state = get_binascii_state(module);
    1307      Py_VISIT(state->Error);
    1308      Py_VISIT(state->Incomplete);
    1309      return 0;
    1310  }
    1311  
    1312  static int
    1313  binascii_clear(PyObject *module)
    1314  {
    1315      binascii_state *state = get_binascii_state(module);
    1316      Py_CLEAR(state->Error);
    1317      Py_CLEAR(state->Incomplete);
    1318      return 0;
    1319  }
    1320  
    1321  static void
    1322  binascii_free(void *module)
    1323  {
    1324      binascii_clear((PyObject *)module);
    1325  }
    1326  
    1327  static struct PyModuleDef binasciimodule = {
    1328      PyModuleDef_HEAD_INIT,
    1329      "binascii",
    1330      doc_binascii,
    1331      sizeof(binascii_state),
    1332      binascii_module_methods,
    1333      binascii_slots,
    1334      binascii_traverse,
    1335      binascii_clear,
    1336      binascii_free
    1337  };
    1338  
    1339  PyMODINIT_FUNC
    1340  PyInit_binascii(void)
    1341  {
    1342      return PyModuleDef_Init(&binasciimodule);
    1343  }