(root)/
Python-3.11.7/
Python/
formatter_unicode.c
       1  /* implements the unicode (as opposed to string) version of the
       2     built-in formatters for string, int, float.  that is, the versions
       3     of int.__float__, etc., that take and return unicode objects */
       4  
       5  #include "Python.h"
       6  #include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
       7  #include "pycore_long.h"          // _PyLong_FormatWriter()
       8  #include <locale.h>
       9  
      10  /* Raises an exception about an unknown presentation type for this
      11   * type. */
      12  
      13  static void
      14  unknown_presentation_type(Py_UCS4 presentation_type,
      15                            const char* type_name)
      16  {
      17      /* %c might be out-of-range, hence the two cases. */
      18      if (presentation_type > 32 && presentation_type < 128)
      19          PyErr_Format(PyExc_ValueError,
      20                       "Unknown format code '%c' "
      21                       "for object of type '%.200s'",
      22                       (char)presentation_type,
      23                       type_name);
      24      else
      25          PyErr_Format(PyExc_ValueError,
      26                       "Unknown format code '\\x%x' "
      27                       "for object of type '%.200s'",
      28                       (unsigned int)presentation_type,
      29                       type_name);
      30  }
      31  
      32  static void
      33  invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
      34  {
      35      assert(specifier == ',' || specifier == '_');
      36      if (presentation_type > 32 && presentation_type < 128)
      37          PyErr_Format(PyExc_ValueError,
      38                       "Cannot specify '%c' with '%c'.",
      39                       specifier, (char)presentation_type);
      40      else
      41          PyErr_Format(PyExc_ValueError,
      42                       "Cannot specify '%c' with '\\x%x'.",
      43                       specifier, (unsigned int)presentation_type);
      44  }
      45  
      46  static void
      47  invalid_comma_and_underscore(void)
      48  {
      49      PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
      50  }
      51  
      52  /*
      53      get_integer consumes 0 or more decimal digit characters from an
      54      input string, updates *result with the corresponding positive
      55      integer, and returns the number of digits consumed.
      56  
      57      returns -1 on error.
      58  */
      59  static int
      60  get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
      61                    Py_ssize_t *result)
      62  {
      63      Py_ssize_t accumulator, digitval, pos = *ppos;
      64      int numdigits;
      65      int kind = PyUnicode_KIND(str);
      66      const void *data = PyUnicode_DATA(str);
      67  
      68      accumulator = numdigits = 0;
      69      for (; pos < end; pos++, numdigits++) {
      70          digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
      71          if (digitval < 0)
      72              break;
      73          /*
      74             Detect possible overflow before it happens:
      75  
      76                accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
      77                accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
      78          */
      79          if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
      80              PyErr_Format(PyExc_ValueError,
      81                           "Too many decimal digits in format string");
      82              *ppos = pos;
      83              return -1;
      84          }
      85          accumulator = accumulator * 10 + digitval;
      86      }
      87      *ppos = pos;
      88      *result = accumulator;
      89      return numdigits;
      90  }
      91  
      92  /************************************************************************/
      93  /*********** standard format specifier parsing **************************/
      94  /************************************************************************/
      95  
      96  /* returns true if this character is a specifier alignment token */
      97  Py_LOCAL_INLINE(int)
      98  is_alignment_token(Py_UCS4 c)
      99  {
     100      switch (c) {
     101      case '<': case '>': case '=': case '^':
     102          return 1;
     103      default:
     104          return 0;
     105      }
     106  }
     107  
     108  /* returns true if this character is a sign element */
     109  Py_LOCAL_INLINE(int)
     110  is_sign_element(Py_UCS4 c)
     111  {
     112      switch (c) {
     113      case ' ': case '+': case '-':
     114          return 1;
     115      default:
     116          return 0;
     117      }
     118  }
     119  
     120  /* Locale type codes. LT_NO_LOCALE must be zero. */
     121  enum LocaleType {
     122      LT_NO_LOCALE = 0,
     123      LT_DEFAULT_LOCALE = ',',
     124      LT_UNDERSCORE_LOCALE = '_',
     125      LT_UNDER_FOUR_LOCALE,
     126      LT_CURRENT_LOCALE
     127  };
     128  
     129  typedef struct {
     130      Py_UCS4 fill_char;
     131      Py_UCS4 align;
     132      int alternate;
     133      int no_neg_0;
     134      Py_UCS4 sign;
     135      Py_ssize_t width;
     136      enum LocaleType thousands_separators;
     137      Py_ssize_t precision;
     138      Py_UCS4 type;
     139  } InternalFormatSpec;
     140  
     141  
     142  /*
     143    ptr points to the start of the format_spec, end points just past its end.
     144    fills in format with the parsed information.
     145    returns 1 on success, 0 on failure.
     146    if failure, sets the exception
     147  */
     148  static int
     149  parse_internal_render_format_spec(PyObject *obj,
     150                                    PyObject *format_spec,
     151                                    Py_ssize_t start, Py_ssize_t end,
     152                                    InternalFormatSpec *format,
     153                                    char default_type,
     154                                    char default_align)
     155  {
     156      Py_ssize_t pos = start;
     157      int kind = PyUnicode_KIND(format_spec);
     158      const void *data = PyUnicode_DATA(format_spec);
     159      /* end-pos is used throughout this code to specify the length of
     160         the input string */
     161  #define READ_spec(index) PyUnicode_READ(kind, data, index)
     162  
     163      Py_ssize_t consumed;
     164      int align_specified = 0;
     165      int fill_char_specified = 0;
     166  
     167      format->fill_char = ' ';
     168      format->align = default_align;
     169      format->alternate = 0;
     170      format->no_neg_0 = 0;
     171      format->sign = '\0';
     172      format->width = -1;
     173      format->thousands_separators = LT_NO_LOCALE;
     174      format->precision = -1;
     175      format->type = default_type;
     176  
     177      /* If the second char is an alignment token,
     178         then parse the fill char */
     179      if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
     180          format->align = READ_spec(pos+1);
     181          format->fill_char = READ_spec(pos);
     182          fill_char_specified = 1;
     183          align_specified = 1;
     184          pos += 2;
     185      }
     186      else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
     187          format->align = READ_spec(pos);
     188          align_specified = 1;
     189          ++pos;
     190      }
     191  
     192      /* Parse the various sign options */
     193      if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
     194          format->sign = READ_spec(pos);
     195          ++pos;
     196      }
     197  
     198      /* If the next character is z, request coercion of negative 0.
     199         Applies only to floats. */
     200      if (end-pos >= 1 && READ_spec(pos) == 'z') {
     201          format->no_neg_0 = 1;
     202          ++pos;
     203      }
     204  
     205      /* If the next character is #, we're in alternate mode.  This only
     206         applies to integers. */
     207      if (end-pos >= 1 && READ_spec(pos) == '#') {
     208          format->alternate = 1;
     209          ++pos;
     210      }
     211  
     212      /* The special case for 0-padding (backwards compat) */
     213      if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
     214          format->fill_char = '0';
     215          if (!align_specified && default_align == '>') {
     216              format->align = '=';
     217          }
     218          ++pos;
     219      }
     220  
     221      consumed = get_integer(format_spec, &pos, end, &format->width);
     222      if (consumed == -1)
     223          /* Overflow error. Exception already set. */
     224          return 0;
     225  
     226      /* If consumed is 0, we didn't consume any characters for the
     227         width. In that case, reset the width to -1, because
     228         get_integer() will have set it to zero. -1 is how we record
     229         that the width wasn't specified. */
     230      if (consumed == 0)
     231          format->width = -1;
     232  
     233      /* Comma signifies add thousands separators */
     234      if (end-pos && READ_spec(pos) == ',') {
     235          format->thousands_separators = LT_DEFAULT_LOCALE;
     236          ++pos;
     237      }
     238      /* Underscore signifies add thousands separators */
     239      if (end-pos && READ_spec(pos) == '_') {
     240          if (format->thousands_separators != LT_NO_LOCALE) {
     241              invalid_comma_and_underscore();
     242              return 0;
     243          }
     244          format->thousands_separators = LT_UNDERSCORE_LOCALE;
     245          ++pos;
     246      }
     247      if (end-pos && READ_spec(pos) == ',') {
     248          if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
     249              invalid_comma_and_underscore();
     250              return 0;
     251          }
     252      }
     253  
     254      /* Parse field precision */
     255      if (end-pos && READ_spec(pos) == '.') {
     256          ++pos;
     257  
     258          consumed = get_integer(format_spec, &pos, end, &format->precision);
     259          if (consumed == -1)
     260              /* Overflow error. Exception already set. */
     261              return 0;
     262  
     263          /* Not having a precision after a dot is an error. */
     264          if (consumed == 0) {
     265              PyErr_Format(PyExc_ValueError,
     266                           "Format specifier missing precision");
     267              return 0;
     268          }
     269  
     270      }
     271  
     272      /* Finally, parse the type field. */
     273  
     274      if (end-pos > 1) {
     275          /* More than one char remains, so this is an invalid format
     276             specifier. */
     277          /* Create a temporary object that contains the format spec we're
     278             operating on.  It's format_spec[start:end] (in Python syntax). */
     279          PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
     280                                           (char*)data + kind*start,
     281                                           end-start);
     282          if (actual_format_spec != NULL) {
     283              PyErr_Format(PyExc_ValueError,
     284                  "Invalid format specifier '%U' for object of type '%.200s'",
     285                  actual_format_spec, Py_TYPE(obj)->tp_name);
     286              Py_DECREF(actual_format_spec);
     287          }
     288          return 0;
     289      }
     290  
     291      if (end-pos == 1) {
     292          format->type = READ_spec(pos);
     293          ++pos;
     294      }
     295  
     296      /* Do as much validating as we can, just by looking at the format
     297         specifier.  Do not take into account what type of formatting
     298         we're doing (int, float, string). */
     299  
     300      if (format->thousands_separators) {
     301          switch (format->type) {
     302          case 'd':
     303          case 'e':
     304          case 'f':
     305          case 'g':
     306          case 'E':
     307          case 'G':
     308          case '%':
     309          case 'F':
     310          case '\0':
     311              /* These are allowed. See PEP 378.*/
     312              break;
     313          case 'b':
     314          case 'o':
     315          case 'x':
     316          case 'X':
     317              /* Underscores are allowed in bin/oct/hex. See PEP 515. */
     318              if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
     319                  /* Every four digits, not every three, in bin/oct/hex. */
     320                  format->thousands_separators = LT_UNDER_FOUR_LOCALE;
     321                  break;
     322              }
     323              /* fall through */
     324          default:
     325              invalid_thousands_separator_type(format->thousands_separators, format->type);
     326              return 0;
     327          }
     328      }
     329  
     330      assert (format->align <= 127);
     331      assert (format->sign <= 127);
     332      return 1;
     333  }
     334  
     335  /* Calculate the padding needed. */
     336  static void
     337  calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
     338               Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
     339               Py_ssize_t *n_total)
     340  {
     341      if (width >= 0) {
     342          if (nchars > width)
     343              *n_total = nchars;
     344          else
     345              *n_total = width;
     346      }
     347      else {
     348          /* not specified, use all of the chars and no more */
     349          *n_total = nchars;
     350      }
     351  
     352      /* Figure out how much leading space we need, based on the
     353         aligning */
     354      if (align == '>')
     355          *n_lpadding = *n_total - nchars;
     356      else if (align == '^')
     357          *n_lpadding = (*n_total - nchars) / 2;
     358      else if (align == '<' || align == '=')
     359          *n_lpadding = 0;
     360      else {
     361          /* We should never have an unspecified alignment. */
     362          Py_UNREACHABLE();
     363      }
     364  
     365      *n_rpadding = *n_total - nchars - *n_lpadding;
     366  }
     367  
     368  /* Do the padding, and return a pointer to where the caller-supplied
     369     content goes. */
     370  static int
     371  fill_padding(_PyUnicodeWriter *writer,
     372               Py_ssize_t nchars,
     373               Py_UCS4 fill_char, Py_ssize_t n_lpadding,
     374               Py_ssize_t n_rpadding)
     375  {
     376      Py_ssize_t pos;
     377  
     378      /* Pad on left. */
     379      if (n_lpadding) {
     380          pos = writer->pos;
     381          _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
     382      }
     383  
     384      /* Pad on right. */
     385      if (n_rpadding) {
     386          pos = writer->pos + nchars + n_lpadding;
     387          _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
     388      }
     389  
     390      /* Pointer to the user content. */
     391      writer->pos += n_lpadding;
     392      return 0;
     393  }
     394  
     395  /************************************************************************/
     396  /*********** common routines for numeric formatting *********************/
     397  /************************************************************************/
     398  
     399  /* Locale info needed for formatting integers and the part of floats
     400     before and including the decimal. Note that locales only support
     401     8-bit chars, not unicode. */
     402  typedef struct {
     403      PyObject *decimal_point;
     404      PyObject *thousands_sep;
     405      const char *grouping;
     406      char *grouping_buffer;
     407  } LocaleInfo;
     408  
     409  #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
     410  
     411  /* describes the layout for an integer, see the comment in
     412     calc_number_widths() for details */
     413  typedef struct {
     414      Py_ssize_t n_lpadding;
     415      Py_ssize_t n_prefix;
     416      Py_ssize_t n_spadding;
     417      Py_ssize_t n_rpadding;
     418      char sign;
     419      Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
     420      Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
     421                                      any grouping chars. */
     422      Py_ssize_t n_decimal;   /* 0 if only an integer */
     423      Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
     424                                 excluding the decimal itself, if
     425                                 present. */
     426  
     427      /* These 2 are not the widths of fields, but are needed by
     428         STRINGLIB_GROUPING. */
     429      Py_ssize_t n_digits;    /* The number of digits before a decimal
     430                                 or exponent. */
     431      Py_ssize_t n_min_width; /* The min_width we used when we computed
     432                                 the n_grouped_digits width. */
     433  } NumberFieldWidths;
     434  
     435  
     436  /* Given a number of the form:
     437     digits[remainder]
     438     where ptr points to the start and end points to the end, find where
     439      the integer part ends. This could be a decimal, an exponent, both,
     440      or neither.
     441     If a decimal point is present, set *has_decimal and increment
     442      remainder beyond it.
     443     Results are undefined (but shouldn't crash) for improperly
     444      formatted strings.
     445  */
     446  static void
     447  parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
     448               Py_ssize_t *n_remainder, int *has_decimal)
     449  {
     450      Py_ssize_t remainder;
     451      int kind = PyUnicode_KIND(s);
     452      const void *data = PyUnicode_DATA(s);
     453  
     454      while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
     455          ++pos;
     456      remainder = pos;
     457  
     458      /* Does remainder start with a decimal point? */
     459      *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
     460  
     461      /* Skip the decimal point. */
     462      if (*has_decimal)
     463          remainder++;
     464  
     465      *n_remainder = end - remainder;
     466  }
     467  
     468  /* not all fields of format are used.  for example, precision is
     469     unused.  should this take discrete params in order to be more clear
     470     about what it does?  or is passing a single format parameter easier
     471     and more efficient enough to justify a little obfuscation?
     472     Return -1 on error. */
     473  static Py_ssize_t
     474  calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
     475                     Py_UCS4 sign_char, Py_ssize_t n_start,
     476                     Py_ssize_t n_end, Py_ssize_t n_remainder,
     477                     int has_decimal, const LocaleInfo *locale,
     478                     const InternalFormatSpec *format, Py_UCS4 *maxchar)
     479  {
     480      Py_ssize_t n_non_digit_non_padding;
     481      Py_ssize_t n_padding;
     482  
     483      spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
     484      spec->n_lpadding = 0;
     485      spec->n_prefix = n_prefix;
     486      spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
     487      spec->n_remainder = n_remainder;
     488      spec->n_spadding = 0;
     489      spec->n_rpadding = 0;
     490      spec->sign = '\0';
     491      spec->n_sign = 0;
     492  
     493      /* the output will look like:
     494         |                                                                                         |
     495         | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
     496         |                                                                                         |
     497  
     498         sign is computed from format->sign and the actual
     499         sign of the number
     500  
     501         prefix is given (it's for the '0x' prefix)
     502  
     503         digits is already known
     504  
     505         the total width is either given, or computed from the
     506         actual digits
     507  
     508         only one of lpadding, spadding, and rpadding can be non-zero,
     509         and it's calculated from the width and other fields
     510      */
     511  
     512      /* compute the various parts we're going to write */
     513      switch (format->sign) {
     514      case '+':
     515          /* always put a + or - */
     516          spec->n_sign = 1;
     517          spec->sign = (sign_char == '-' ? '-' : '+');
     518          break;
     519      case ' ':
     520          spec->n_sign = 1;
     521          spec->sign = (sign_char == '-' ? '-' : ' ');
     522          break;
     523      default:
     524          /* Not specified, or the default (-) */
     525          if (sign_char == '-') {
     526              spec->n_sign = 1;
     527              spec->sign = '-';
     528          }
     529      }
     530  
     531      /* The number of chars used for non-digits and non-padding. */
     532      n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
     533          spec->n_remainder;
     534  
     535      /* min_width can go negative, that's okay. format->width == -1 means
     536         we don't care. */
     537      if (format->fill_char == '0' && format->align == '=')
     538          spec->n_min_width = format->width - n_non_digit_non_padding;
     539      else
     540          spec->n_min_width = 0;
     541  
     542      if (spec->n_digits == 0)
     543          /* This case only occurs when using 'c' formatting, we need
     544             to special case it because the grouping code always wants
     545             to have at least one character. */
     546          spec->n_grouped_digits = 0;
     547      else {
     548          Py_UCS4 grouping_maxchar;
     549          spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
     550              NULL, 0,
     551              NULL, 0, spec->n_digits,
     552              spec->n_min_width,
     553              locale->grouping, locale->thousands_sep, &grouping_maxchar);
     554          if (spec->n_grouped_digits == -1) {
     555              return -1;
     556          }
     557          *maxchar = Py_MAX(*maxchar, grouping_maxchar);
     558      }
     559  
     560      /* Given the desired width and the total of digit and non-digit
     561         space we consume, see if we need any padding. format->width can
     562         be negative (meaning no padding), but this code still works in
     563         that case. */
     564      n_padding = format->width -
     565                          (n_non_digit_non_padding + spec->n_grouped_digits);
     566      if (n_padding > 0) {
     567          /* Some padding is needed. Determine if it's left, space, or right. */
     568          switch (format->align) {
     569          case '<':
     570              spec->n_rpadding = n_padding;
     571              break;
     572          case '^':
     573              spec->n_lpadding = n_padding / 2;
     574              spec->n_rpadding = n_padding - spec->n_lpadding;
     575              break;
     576          case '=':
     577              spec->n_spadding = n_padding;
     578              break;
     579          case '>':
     580              spec->n_lpadding = n_padding;
     581              break;
     582          default:
     583              /* Shouldn't get here */
     584              Py_UNREACHABLE();
     585          }
     586      }
     587  
     588      if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
     589          *maxchar = Py_MAX(*maxchar, format->fill_char);
     590  
     591      if (spec->n_decimal)
     592          *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
     593  
     594      return spec->n_lpadding + spec->n_sign + spec->n_prefix +
     595          spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
     596          spec->n_remainder + spec->n_rpadding;
     597  }
     598  
     599  /* Fill in the digit parts of a number's string representation,
     600     as determined in calc_number_widths().
     601     Return -1 on error, or 0 on success. */
     602  static int
     603  fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
     604              PyObject *digits, Py_ssize_t d_start,
     605              PyObject *prefix, Py_ssize_t p_start,
     606              Py_UCS4 fill_char,
     607              LocaleInfo *locale, int toupper)
     608  {
     609      /* Used to keep track of digits, decimal, and remainder. */
     610      Py_ssize_t d_pos = d_start;
     611      const unsigned int kind = writer->kind;
     612      const void *data = writer->data;
     613      Py_ssize_t r;
     614  
     615      if (spec->n_lpadding) {
     616          _PyUnicode_FastFill(writer->buffer,
     617                              writer->pos, spec->n_lpadding, fill_char);
     618          writer->pos += spec->n_lpadding;
     619      }
     620      if (spec->n_sign == 1) {
     621          PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
     622          writer->pos++;
     623      }
     624      if (spec->n_prefix) {
     625          _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
     626                                        prefix, p_start,
     627                                        spec->n_prefix);
     628          if (toupper) {
     629              Py_ssize_t t;
     630              for (t = 0; t < spec->n_prefix; t++) {
     631                  Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
     632                  c = Py_TOUPPER(c);
     633                  assert (c <= 127);
     634                  PyUnicode_WRITE(kind, data, writer->pos + t, c);
     635              }
     636          }
     637          writer->pos += spec->n_prefix;
     638      }
     639      if (spec->n_spadding) {
     640          _PyUnicode_FastFill(writer->buffer,
     641                              writer->pos, spec->n_spadding, fill_char);
     642          writer->pos += spec->n_spadding;
     643      }
     644  
     645      /* Only for type 'c' special case, it has no digits. */
     646      if (spec->n_digits != 0) {
     647          /* Fill the digits with InsertThousandsGrouping. */
     648          r = _PyUnicode_InsertThousandsGrouping(
     649                  writer, spec->n_grouped_digits,
     650                  digits, d_pos, spec->n_digits,
     651                  spec->n_min_width,
     652                  locale->grouping, locale->thousands_sep, NULL);
     653          if (r == -1)
     654              return -1;
     655          assert(r == spec->n_grouped_digits);
     656          d_pos += spec->n_digits;
     657      }
     658      if (toupper) {
     659          Py_ssize_t t;
     660          for (t = 0; t < spec->n_grouped_digits; t++) {
     661              Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
     662              c = Py_TOUPPER(c);
     663              if (c > 127) {
     664                  PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
     665                  return -1;
     666              }
     667              PyUnicode_WRITE(kind, data, writer->pos + t, c);
     668          }
     669      }
     670      writer->pos += spec->n_grouped_digits;
     671  
     672      if (spec->n_decimal) {
     673          _PyUnicode_FastCopyCharacters(
     674              writer->buffer, writer->pos,
     675              locale->decimal_point, 0, spec->n_decimal);
     676          writer->pos += spec->n_decimal;
     677          d_pos += 1;
     678      }
     679  
     680      if (spec->n_remainder) {
     681          _PyUnicode_FastCopyCharacters(
     682              writer->buffer, writer->pos,
     683              digits, d_pos, spec->n_remainder);
     684          writer->pos += spec->n_remainder;
     685          /* d_pos += spec->n_remainder; */
     686      }
     687  
     688      if (spec->n_rpadding) {
     689          _PyUnicode_FastFill(writer->buffer,
     690                              writer->pos, spec->n_rpadding,
     691                              fill_char);
     692          writer->pos += spec->n_rpadding;
     693      }
     694      return 0;
     695  }
     696  
     697  static const char no_grouping[1] = {CHAR_MAX};
     698  
     699  /* Find the decimal point character(s?), thousands_separator(s?), and
     700     grouping description, either for the current locale if type is
     701     LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
     702     LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
     703  static int
     704  get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
     705  {
     706      switch (type) {
     707      case LT_CURRENT_LOCALE: {
     708          struct lconv *lc = localeconv();
     709          if (_Py_GetLocaleconvNumeric(lc,
     710                                       &locale_info->decimal_point,
     711                                       &locale_info->thousands_sep) < 0) {
     712              return -1;
     713          }
     714  
     715          /* localeconv() grouping can become a dangling pointer or point
     716             to a different string if another thread calls localeconv() during
     717             the string formatting. Copy the string to avoid this risk. */
     718          locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
     719          if (locale_info->grouping_buffer == NULL) {
     720              PyErr_NoMemory();
     721              return -1;
     722          }
     723          locale_info->grouping = locale_info->grouping_buffer;
     724          break;
     725      }
     726      case LT_DEFAULT_LOCALE:
     727      case LT_UNDERSCORE_LOCALE:
     728      case LT_UNDER_FOUR_LOCALE:
     729          locale_info->decimal_point = PyUnicode_FromOrdinal('.');
     730          locale_info->thousands_sep = PyUnicode_FromOrdinal(
     731              type == LT_DEFAULT_LOCALE ? ',' : '_');
     732          if (!locale_info->decimal_point || !locale_info->thousands_sep)
     733              return -1;
     734          if (type != LT_UNDER_FOUR_LOCALE)
     735              locale_info->grouping = "\3"; /* Group every 3 characters.  The
     736                                           (implicit) trailing 0 means repeat
     737                                           infinitely. */
     738          else
     739              locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
     740          break;
     741      case LT_NO_LOCALE:
     742          locale_info->decimal_point = PyUnicode_FromOrdinal('.');
     743          locale_info->thousands_sep = PyUnicode_New(0, 0);
     744          if (!locale_info->decimal_point || !locale_info->thousands_sep)
     745              return -1;
     746          locale_info->grouping = no_grouping;
     747          break;
     748      }
     749      return 0;
     750  }
     751  
     752  static void
     753  free_locale_info(LocaleInfo *locale_info)
     754  {
     755      Py_XDECREF(locale_info->decimal_point);
     756      Py_XDECREF(locale_info->thousands_sep);
     757      PyMem_Free(locale_info->grouping_buffer);
     758  }
     759  
     760  /************************************************************************/
     761  /*********** string formatting ******************************************/
     762  /************************************************************************/
     763  
     764  static int
     765  format_string_internal(PyObject *value, const InternalFormatSpec *format,
     766                         _PyUnicodeWriter *writer)
     767  {
     768      Py_ssize_t lpad;
     769      Py_ssize_t rpad;
     770      Py_ssize_t total;
     771      Py_ssize_t len;
     772      int result = -1;
     773      Py_UCS4 maxchar;
     774  
     775      assert(PyUnicode_IS_READY(value));
     776      len = PyUnicode_GET_LENGTH(value);
     777  
     778      /* sign is not allowed on strings */
     779      if (format->sign != '\0') {
     780          if (format->sign == ' ') {
     781              PyErr_SetString(PyExc_ValueError,
     782                  "Space not allowed in string format specifier");
     783          }
     784          else {
     785              PyErr_SetString(PyExc_ValueError,
     786                  "Sign not allowed in string format specifier");
     787          }
     788          goto done;
     789      }
     790  
     791      /* negative 0 coercion is not allowed on strings */
     792      if (format->no_neg_0) {
     793          PyErr_SetString(PyExc_ValueError,
     794                          "Negative zero coercion (z) not allowed in string format "
     795                          "specifier");
     796          goto done;
     797      }
     798  
     799      /* alternate is not allowed on strings */
     800      if (format->alternate) {
     801          PyErr_SetString(PyExc_ValueError,
     802                          "Alternate form (#) not allowed in string format "
     803                          "specifier");
     804          goto done;
     805      }
     806  
     807      /* '=' alignment not allowed on strings */
     808      if (format->align == '=') {
     809          PyErr_SetString(PyExc_ValueError,
     810                          "'=' alignment not allowed "
     811                          "in string format specifier");
     812          goto done;
     813      }
     814  
     815      if ((format->width == -1 || format->width <= len)
     816          && (format->precision == -1 || format->precision >= len)) {
     817          /* Fast path */
     818          return _PyUnicodeWriter_WriteStr(writer, value);
     819      }
     820  
     821      /* if precision is specified, output no more that format.precision
     822         characters */
     823      if (format->precision >= 0 && len >= format->precision) {
     824          len = format->precision;
     825      }
     826  
     827      calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
     828  
     829      maxchar = writer->maxchar;
     830      if (lpad != 0 || rpad != 0)
     831          maxchar = Py_MAX(maxchar, format->fill_char);
     832      if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
     833          Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
     834          maxchar = Py_MAX(maxchar, valmaxchar);
     835      }
     836  
     837      /* allocate the resulting string */
     838      if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
     839          goto done;
     840  
     841      /* Write into that space. First the padding. */
     842      result = fill_padding(writer, len, format->fill_char, lpad, rpad);
     843      if (result == -1)
     844          goto done;
     845  
     846      /* Then the source string. */
     847      if (len) {
     848          _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
     849                                        value, 0, len);
     850      }
     851      writer->pos += (len + rpad);
     852      result = 0;
     853  
     854  done:
     855      return result;
     856  }
     857  
     858  
     859  /************************************************************************/
     860  /*********** long formatting ********************************************/
     861  /************************************************************************/
     862  
     863  static int
     864  format_long_internal(PyObject *value, const InternalFormatSpec *format,
     865                       _PyUnicodeWriter *writer)
     866  {
     867      int result = -1;
     868      Py_UCS4 maxchar = 127;
     869      PyObject *tmp = NULL;
     870      Py_ssize_t inumeric_chars;
     871      Py_UCS4 sign_char = '\0';
     872      Py_ssize_t n_digits;       /* count of digits need from the computed
     873                                    string */
     874      Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
     875                                     produces non-digits */
     876      Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
     877      Py_ssize_t n_total;
     878      Py_ssize_t prefix = 0;
     879      NumberFieldWidths spec;
     880      long x;
     881  
     882      /* Locale settings, either from the actual locale or
     883         from a hard-code pseudo-locale */
     884      LocaleInfo locale = LocaleInfo_STATIC_INIT;
     885  
     886      /* no precision allowed on integers */
     887      if (format->precision != -1) {
     888          PyErr_SetString(PyExc_ValueError,
     889                          "Precision not allowed in integer format specifier");
     890          goto done;
     891      }
     892      /* no negative zero coercion on integers */
     893      if (format->no_neg_0) {
     894          PyErr_SetString(PyExc_ValueError,
     895                          "Negative zero coercion (z) not allowed in integer"
     896                          " format specifier");
     897          goto done;
     898      }
     899  
     900      /* special case for character formatting */
     901      if (format->type == 'c') {
     902          /* error to specify a sign */
     903          if (format->sign != '\0') {
     904              PyErr_SetString(PyExc_ValueError,
     905                              "Sign not allowed with integer"
     906                              " format specifier 'c'");
     907              goto done;
     908          }
     909          /* error to request alternate format */
     910          if (format->alternate) {
     911              PyErr_SetString(PyExc_ValueError,
     912                              "Alternate form (#) not allowed with integer"
     913                              " format specifier 'c'");
     914              goto done;
     915          }
     916  
     917          /* taken from unicodeobject.c formatchar() */
     918          /* Integer input truncated to a character */
     919          x = PyLong_AsLong(value);
     920          if (x == -1 && PyErr_Occurred())
     921              goto done;
     922          if (x < 0 || x > 0x10ffff) {
     923              PyErr_SetString(PyExc_OverflowError,
     924                              "%c arg not in range(0x110000)");
     925              goto done;
     926          }
     927          tmp = PyUnicode_FromOrdinal(x);
     928          inumeric_chars = 0;
     929          n_digits = 1;
     930          maxchar = Py_MAX(maxchar, (Py_UCS4)x);
     931  
     932          /* As a sort-of hack, we tell calc_number_widths that we only
     933             have "remainder" characters. calc_number_widths thinks
     934             these are characters that don't get formatted, only copied
     935             into the output string. We do this for 'c' formatting,
     936             because the characters are likely to be non-digits. */
     937          n_remainder = 1;
     938      }
     939      else {
     940          int base;
     941          int leading_chars_to_skip = 0;  /* Number of characters added by
     942                                             PyNumber_ToBase that we want to
     943                                             skip over. */
     944  
     945          /* Compute the base and how many characters will be added by
     946             PyNumber_ToBase */
     947          switch (format->type) {
     948          case 'b':
     949              base = 2;
     950              leading_chars_to_skip = 2; /* 0b */
     951              break;
     952          case 'o':
     953              base = 8;
     954              leading_chars_to_skip = 2; /* 0o */
     955              break;
     956          case 'x':
     957          case 'X':
     958              base = 16;
     959              leading_chars_to_skip = 2; /* 0x */
     960              break;
     961          default:  /* shouldn't be needed, but stops a compiler warning */
     962          case 'd':
     963          case 'n':
     964              base = 10;
     965              break;
     966          }
     967  
     968          if (format->sign != '+' && format->sign != ' '
     969              && format->width == -1
     970              && format->type != 'X' && format->type != 'n'
     971              && !format->thousands_separators
     972              && PyLong_CheckExact(value))
     973          {
     974              /* Fast path */
     975              return _PyLong_FormatWriter(writer, value, base, format->alternate);
     976          }
     977  
     978          /* The number of prefix chars is the same as the leading
     979             chars to skip */
     980          if (format->alternate)
     981              n_prefix = leading_chars_to_skip;
     982  
     983          /* Do the hard part, converting to a string in a given base */
     984          tmp = _PyLong_Format(value, base);
     985          if (tmp == NULL || PyUnicode_READY(tmp) == -1)
     986              goto done;
     987  
     988          inumeric_chars = 0;
     989          n_digits = PyUnicode_GET_LENGTH(tmp);
     990  
     991          prefix = inumeric_chars;
     992  
     993          /* Is a sign character present in the output?  If so, remember it
     994             and skip it */
     995          if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
     996              sign_char = '-';
     997              ++prefix;
     998              ++leading_chars_to_skip;
     999          }
    1000  
    1001          /* Skip over the leading chars (0x, 0b, etc.) */
    1002          n_digits -= leading_chars_to_skip;
    1003          inumeric_chars += leading_chars_to_skip;
    1004      }
    1005  
    1006      /* Determine the grouping, separator, and decimal point, if any. */
    1007      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1008                          format->thousands_separators,
    1009                          &locale) == -1)
    1010          goto done;
    1011  
    1012      /* Calculate how much memory we'll need. */
    1013      n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
    1014                                   inumeric_chars + n_digits, n_remainder, 0,
    1015                                   &locale, format, &maxchar);
    1016      if (n_total == -1) {
    1017          goto done;
    1018      }
    1019  
    1020      /* Allocate the memory. */
    1021      if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
    1022          goto done;
    1023  
    1024      /* Populate the memory. */
    1025      result = fill_number(writer, &spec,
    1026                           tmp, inumeric_chars,
    1027                           tmp, prefix, format->fill_char,
    1028                           &locale, format->type == 'X');
    1029  
    1030  done:
    1031      Py_XDECREF(tmp);
    1032      free_locale_info(&locale);
    1033      return result;
    1034  }
    1035  
    1036  /************************************************************************/
    1037  /*********** float formatting *******************************************/
    1038  /************************************************************************/
    1039  
    1040  /* much of this is taken from unicodeobject.c */
    1041  static int
    1042  format_float_internal(PyObject *value,
    1043                        const InternalFormatSpec *format,
    1044                        _PyUnicodeWriter *writer)
    1045  {
    1046      char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1047      Py_ssize_t n_digits;
    1048      Py_ssize_t n_remainder;
    1049      Py_ssize_t n_total;
    1050      int has_decimal;
    1051      double val;
    1052      int precision, default_precision = 6;
    1053      Py_UCS4 type = format->type;
    1054      int add_pct = 0;
    1055      Py_ssize_t index;
    1056      NumberFieldWidths spec;
    1057      int flags = 0;
    1058      int result = -1;
    1059      Py_UCS4 maxchar = 127;
    1060      Py_UCS4 sign_char = '\0';
    1061      int float_type; /* Used to see if we have a nan, inf, or regular float. */
    1062      PyObject *unicode_tmp = NULL;
    1063  
    1064      /* Locale settings, either from the actual locale or
    1065         from a hard-code pseudo-locale */
    1066      LocaleInfo locale = LocaleInfo_STATIC_INIT;
    1067  
    1068      if (format->precision > INT_MAX) {
    1069          PyErr_SetString(PyExc_ValueError, "precision too big");
    1070          goto done;
    1071      }
    1072      precision = (int)format->precision;
    1073  
    1074      if (format->alternate)
    1075          flags |= Py_DTSF_ALT;
    1076      if (format->no_neg_0)
    1077          flags |= Py_DTSF_NO_NEG_0;
    1078  
    1079      if (type == '\0') {
    1080          /* Omitted type specifier.  Behaves in the same way as repr(x)
    1081             and str(x) if no precision is given, else like 'g', but with
    1082             at least one digit after the decimal point. */
    1083          flags |= Py_DTSF_ADD_DOT_0;
    1084          type = 'r';
    1085          default_precision = 0;
    1086      }
    1087  
    1088      if (type == 'n')
    1089          /* 'n' is the same as 'g', except for the locale used to
    1090             format the result. We take care of that later. */
    1091          type = 'g';
    1092  
    1093      val = PyFloat_AsDouble(value);
    1094      if (val == -1.0 && PyErr_Occurred())
    1095          goto done;
    1096  
    1097      if (type == '%') {
    1098          type = 'f';
    1099          val *= 100;
    1100          add_pct = 1;
    1101      }
    1102  
    1103      if (precision < 0)
    1104          precision = default_precision;
    1105      else if (type == 'r')
    1106          type = 'g';
    1107  
    1108      /* Cast "type", because if we're in unicode we need to pass an
    1109         8-bit char. This is safe, because we've restricted what "type"
    1110         can be. */
    1111      buf = PyOS_double_to_string(val, (char)type, precision, flags,
    1112                                  &float_type);
    1113      if (buf == NULL)
    1114          goto done;
    1115      n_digits = strlen(buf);
    1116  
    1117      if (add_pct) {
    1118          /* We know that buf has a trailing zero (since we just called
    1119             strlen() on it), and we don't use that fact any more. So we
    1120             can just write over the trailing zero. */
    1121          buf[n_digits] = '%';
    1122          n_digits += 1;
    1123      }
    1124  
    1125      if (format->sign != '+' && format->sign != ' '
    1126          && format->width == -1
    1127          && format->type != 'n'
    1128          && !format->thousands_separators)
    1129      {
    1130          /* Fast path */
    1131          result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
    1132          PyMem_Free(buf);
    1133          return result;
    1134      }
    1135  
    1136      /* Since there is no unicode version of PyOS_double_to_string,
    1137         just use the 8 bit version and then convert to unicode. */
    1138      unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
    1139      PyMem_Free(buf);
    1140      if (unicode_tmp == NULL)
    1141          goto done;
    1142  
    1143      /* Is a sign character present in the output?  If so, remember it
    1144         and skip it */
    1145      index = 0;
    1146      if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
    1147          sign_char = '-';
    1148          ++index;
    1149          --n_digits;
    1150      }
    1151  
    1152      /* Determine if we have any "remainder" (after the digits, might include
    1153         decimal or exponent or both (or neither)) */
    1154      parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
    1155  
    1156      /* Determine the grouping, separator, and decimal point, if any. */
    1157      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1158                          format->thousands_separators,
    1159                          &locale) == -1)
    1160          goto done;
    1161  
    1162      /* Calculate how much memory we'll need. */
    1163      n_total = calc_number_widths(&spec, 0, sign_char, index,
    1164                                   index + n_digits, n_remainder, has_decimal,
    1165                                   &locale, format, &maxchar);
    1166      if (n_total == -1) {
    1167          goto done;
    1168      }
    1169  
    1170      /* Allocate the memory. */
    1171      if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
    1172          goto done;
    1173  
    1174      /* Populate the memory. */
    1175      result = fill_number(writer, &spec,
    1176                           unicode_tmp, index,
    1177                           NULL, 0, format->fill_char,
    1178                           &locale, 0);
    1179  
    1180  done:
    1181      Py_XDECREF(unicode_tmp);
    1182      free_locale_info(&locale);
    1183      return result;
    1184  }
    1185  
    1186  /************************************************************************/
    1187  /*********** complex formatting *****************************************/
    1188  /************************************************************************/
    1189  
    1190  static int
    1191  format_complex_internal(PyObject *value,
    1192                          const InternalFormatSpec *format,
    1193                          _PyUnicodeWriter *writer)
    1194  {
    1195      double re;
    1196      double im;
    1197      char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1198      char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1199  
    1200      InternalFormatSpec tmp_format = *format;
    1201      Py_ssize_t n_re_digits;
    1202      Py_ssize_t n_im_digits;
    1203      Py_ssize_t n_re_remainder;
    1204      Py_ssize_t n_im_remainder;
    1205      Py_ssize_t n_re_total;
    1206      Py_ssize_t n_im_total;
    1207      int re_has_decimal;
    1208      int im_has_decimal;
    1209      int precision, default_precision = 6;
    1210      Py_UCS4 type = format->type;
    1211      Py_ssize_t i_re;
    1212      Py_ssize_t i_im;
    1213      NumberFieldWidths re_spec;
    1214      NumberFieldWidths im_spec;
    1215      int flags = 0;
    1216      int result = -1;
    1217      Py_UCS4 maxchar = 127;
    1218      enum PyUnicode_Kind rkind;
    1219      void *rdata;
    1220      Py_UCS4 re_sign_char = '\0';
    1221      Py_UCS4 im_sign_char = '\0';
    1222      int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
    1223      int im_float_type;
    1224      int add_parens = 0;
    1225      int skip_re = 0;
    1226      Py_ssize_t lpad;
    1227      Py_ssize_t rpad;
    1228      Py_ssize_t total;
    1229      PyObject *re_unicode_tmp = NULL;
    1230      PyObject *im_unicode_tmp = NULL;
    1231  
    1232      /* Locale settings, either from the actual locale or
    1233         from a hard-code pseudo-locale */
    1234      LocaleInfo locale = LocaleInfo_STATIC_INIT;
    1235  
    1236      if (format->precision > INT_MAX) {
    1237          PyErr_SetString(PyExc_ValueError, "precision too big");
    1238          goto done;
    1239      }
    1240      precision = (int)format->precision;
    1241  
    1242      /* Zero padding is not allowed. */
    1243      if (format->fill_char == '0') {
    1244          PyErr_SetString(PyExc_ValueError,
    1245                          "Zero padding is not allowed in complex format "
    1246                          "specifier");
    1247          goto done;
    1248      }
    1249  
    1250      /* Neither is '=' alignment . */
    1251      if (format->align == '=') {
    1252          PyErr_SetString(PyExc_ValueError,
    1253                          "'=' alignment flag is not allowed in complex format "
    1254                          "specifier");
    1255          goto done;
    1256      }
    1257  
    1258      re = PyComplex_RealAsDouble(value);
    1259      if (re == -1.0 && PyErr_Occurred())
    1260          goto done;
    1261      im = PyComplex_ImagAsDouble(value);
    1262      if (im == -1.0 && PyErr_Occurred())
    1263          goto done;
    1264  
    1265      if (format->alternate)
    1266          flags |= Py_DTSF_ALT;
    1267      if (format->no_neg_0)
    1268          flags |= Py_DTSF_NO_NEG_0;
    1269  
    1270      if (type == '\0') {
    1271          /* Omitted type specifier. Should be like str(self). */
    1272          type = 'r';
    1273          default_precision = 0;
    1274          if (re == 0.0 && copysign(1.0, re) == 1.0)
    1275              skip_re = 1;
    1276          else
    1277              add_parens = 1;
    1278      }
    1279  
    1280      if (type == 'n')
    1281          /* 'n' is the same as 'g', except for the locale used to
    1282             format the result. We take care of that later. */
    1283          type = 'g';
    1284  
    1285      if (precision < 0)
    1286          precision = default_precision;
    1287      else if (type == 'r')
    1288          type = 'g';
    1289  
    1290      /* Cast "type", because if we're in unicode we need to pass an
    1291         8-bit char. This is safe, because we've restricted what "type"
    1292         can be. */
    1293      re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
    1294                                     &re_float_type);
    1295      if (re_buf == NULL)
    1296          goto done;
    1297      im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
    1298                                     &im_float_type);
    1299      if (im_buf == NULL)
    1300          goto done;
    1301  
    1302      n_re_digits = strlen(re_buf);
    1303      n_im_digits = strlen(im_buf);
    1304  
    1305      /* Since there is no unicode version of PyOS_double_to_string,
    1306         just use the 8 bit version and then convert to unicode. */
    1307      re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
    1308      if (re_unicode_tmp == NULL)
    1309          goto done;
    1310      i_re = 0;
    1311  
    1312      im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
    1313      if (im_unicode_tmp == NULL)
    1314          goto done;
    1315      i_im = 0;
    1316  
    1317      /* Is a sign character present in the output?  If so, remember it
    1318         and skip it */
    1319      if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
    1320          re_sign_char = '-';
    1321          ++i_re;
    1322          --n_re_digits;
    1323      }
    1324      if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
    1325          im_sign_char = '-';
    1326          ++i_im;
    1327          --n_im_digits;
    1328      }
    1329  
    1330      /* Determine if we have any "remainder" (after the digits, might include
    1331         decimal or exponent or both (or neither)) */
    1332      parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
    1333                   &n_re_remainder, &re_has_decimal);
    1334      parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
    1335                   &n_im_remainder, &im_has_decimal);
    1336  
    1337      /* Determine the grouping, separator, and decimal point, if any. */
    1338      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1339                          format->thousands_separators,
    1340                          &locale) == -1)
    1341          goto done;
    1342  
    1343      /* Turn off any padding. We'll do it later after we've composed
    1344         the numbers without padding. */
    1345      tmp_format.fill_char = '\0';
    1346      tmp_format.align = '<';
    1347      tmp_format.width = -1;
    1348  
    1349      /* Calculate how much memory we'll need. */
    1350      n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
    1351                                      i_re, i_re + n_re_digits, n_re_remainder,
    1352                                      re_has_decimal, &locale, &tmp_format,
    1353                                      &maxchar);
    1354      if (n_re_total == -1) {
    1355          goto done;
    1356      }
    1357  
    1358      /* Same formatting, but always include a sign, unless the real part is
    1359       * going to be omitted, in which case we use whatever sign convention was
    1360       * requested by the original format. */
    1361      if (!skip_re)
    1362          tmp_format.sign = '+';
    1363      n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
    1364                                      i_im, i_im + n_im_digits, n_im_remainder,
    1365                                      im_has_decimal, &locale, &tmp_format,
    1366                                      &maxchar);
    1367      if (n_im_total == -1) {
    1368          goto done;
    1369      }
    1370  
    1371      if (skip_re)
    1372          n_re_total = 0;
    1373  
    1374      /* Add 1 for the 'j', and optionally 2 for parens. */
    1375      calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
    1376                   format->width, format->align, &lpad, &rpad, &total);
    1377  
    1378      if (lpad || rpad)
    1379          maxchar = Py_MAX(maxchar, format->fill_char);
    1380  
    1381      if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
    1382          goto done;
    1383      rkind = writer->kind;
    1384      rdata = writer->data;
    1385  
    1386      /* Populate the memory. First, the padding. */
    1387      result = fill_padding(writer,
    1388                            n_re_total + n_im_total + 1 + add_parens * 2,
    1389                            format->fill_char, lpad, rpad);
    1390      if (result == -1)
    1391          goto done;
    1392  
    1393      if (add_parens) {
    1394          PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
    1395          writer->pos++;
    1396      }
    1397  
    1398      if (!skip_re) {
    1399          result = fill_number(writer, &re_spec,
    1400                               re_unicode_tmp, i_re,
    1401                               NULL, 0,
    1402                               0,
    1403                               &locale, 0);
    1404          if (result == -1)
    1405              goto done;
    1406      }
    1407      result = fill_number(writer, &im_spec,
    1408                           im_unicode_tmp, i_im,
    1409                           NULL, 0,
    1410                           0,
    1411                           &locale, 0);
    1412      if (result == -1)
    1413          goto done;
    1414      PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
    1415      writer->pos++;
    1416  
    1417      if (add_parens) {
    1418          PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
    1419          writer->pos++;
    1420      }
    1421  
    1422      writer->pos += rpad;
    1423  
    1424  done:
    1425      PyMem_Free(re_buf);
    1426      PyMem_Free(im_buf);
    1427      Py_XDECREF(re_unicode_tmp);
    1428      Py_XDECREF(im_unicode_tmp);
    1429      free_locale_info(&locale);
    1430      return result;
    1431  }
    1432  
    1433  /************************************************************************/
    1434  /*********** built in formatters ****************************************/
    1435  /************************************************************************/
    1436  static int
    1437  format_obj(PyObject *obj, _PyUnicodeWriter *writer)
    1438  {
    1439      PyObject *str;
    1440      int err;
    1441  
    1442      str = PyObject_Str(obj);
    1443      if (str == NULL)
    1444          return -1;
    1445      err = _PyUnicodeWriter_WriteStr(writer, str);
    1446      Py_DECREF(str);
    1447      return err;
    1448  }
    1449  
    1450  int
    1451  _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1452                                  PyObject *obj,
    1453                                  PyObject *format_spec,
    1454                                  Py_ssize_t start, Py_ssize_t end)
    1455  {
    1456      InternalFormatSpec format;
    1457  
    1458      assert(PyUnicode_Check(obj));
    1459  
    1460      /* check for the special case of zero length format spec, make
    1461         it equivalent to str(obj) */
    1462      if (start == end) {
    1463          if (PyUnicode_CheckExact(obj))
    1464              return _PyUnicodeWriter_WriteStr(writer, obj);
    1465          else
    1466              return format_obj(obj, writer);
    1467      }
    1468  
    1469      /* parse the format_spec */
    1470      if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1471                                             &format, 's', '<'))
    1472          return -1;
    1473  
    1474      /* type conversion? */
    1475      switch (format.type) {
    1476      case 's':
    1477          /* no type conversion needed, already a string.  do the formatting */
    1478          return format_string_internal(obj, &format, writer);
    1479      default:
    1480          /* unknown */
    1481          unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1482          return -1;
    1483      }
    1484  }
    1485  
    1486  int
    1487  _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1488                               PyObject *obj,
    1489                               PyObject *format_spec,
    1490                               Py_ssize_t start, Py_ssize_t end)
    1491  {
    1492      PyObject *tmp = NULL;
    1493      InternalFormatSpec format;
    1494      int result = -1;
    1495  
    1496      /* check for the special case of zero length format spec, make
    1497         it equivalent to str(obj) */
    1498      if (start == end) {
    1499          if (PyLong_CheckExact(obj))
    1500              return _PyLong_FormatWriter(writer, obj, 10, 0);
    1501          else
    1502              return format_obj(obj, writer);
    1503      }
    1504  
    1505      /* parse the format_spec */
    1506      if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1507                                             &format, 'd', '>'))
    1508          goto done;
    1509  
    1510      /* type conversion? */
    1511      switch (format.type) {
    1512      case 'b':
    1513      case 'c':
    1514      case 'd':
    1515      case 'o':
    1516      case 'x':
    1517      case 'X':
    1518      case 'n':
    1519          /* no type conversion needed, already an int.  do the formatting */
    1520          result = format_long_internal(obj, &format, writer);
    1521          break;
    1522  
    1523      case 'e':
    1524      case 'E':
    1525      case 'f':
    1526      case 'F':
    1527      case 'g':
    1528      case 'G':
    1529      case '%':
    1530          /* convert to float */
    1531          tmp = PyNumber_Float(obj);
    1532          if (tmp == NULL)
    1533              goto done;
    1534          result = format_float_internal(tmp, &format, writer);
    1535          break;
    1536  
    1537      default:
    1538          /* unknown */
    1539          unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1540          goto done;
    1541      }
    1542  
    1543  done:
    1544      Py_XDECREF(tmp);
    1545      return result;
    1546  }
    1547  
    1548  int
    1549  _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1550                                PyObject *obj,
    1551                                PyObject *format_spec,
    1552                                Py_ssize_t start, Py_ssize_t end)
    1553  {
    1554      InternalFormatSpec format;
    1555  
    1556      /* check for the special case of zero length format spec, make
    1557         it equivalent to str(obj) */
    1558      if (start == end)
    1559          return format_obj(obj, writer);
    1560  
    1561      /* parse the format_spec */
    1562      if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1563                                             &format, '\0', '>'))
    1564          return -1;
    1565  
    1566      /* type conversion? */
    1567      switch (format.type) {
    1568      case '\0': /* No format code: like 'g', but with at least one decimal. */
    1569      case 'e':
    1570      case 'E':
    1571      case 'f':
    1572      case 'F':
    1573      case 'g':
    1574      case 'G':
    1575      case 'n':
    1576      case '%':
    1577          /* no conversion, already a float.  do the formatting */
    1578          return format_float_internal(obj, &format, writer);
    1579  
    1580      default:
    1581          /* unknown */
    1582          unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1583          return -1;
    1584      }
    1585  }
    1586  
    1587  int
    1588  _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1589                                  PyObject *obj,
    1590                                  PyObject *format_spec,
    1591                                  Py_ssize_t start, Py_ssize_t end)
    1592  {
    1593      InternalFormatSpec format;
    1594  
    1595      /* check for the special case of zero length format spec, make
    1596         it equivalent to str(obj) */
    1597      if (start == end)
    1598          return format_obj(obj, writer);
    1599  
    1600      /* parse the format_spec */
    1601      if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1602                                             &format, '\0', '>'))
    1603          return -1;
    1604  
    1605      /* type conversion? */
    1606      switch (format.type) {
    1607      case '\0': /* No format code: like 'g', but with at least one decimal. */
    1608      case 'e':
    1609      case 'E':
    1610      case 'f':
    1611      case 'F':
    1612      case 'g':
    1613      case 'G':
    1614      case 'n':
    1615          /* no conversion, already a complex.  do the formatting */
    1616          return format_complex_internal(obj, &format, writer);
    1617  
    1618      default:
    1619          /* unknown */
    1620          unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1621          return -1;
    1622      }
    1623  }