1  /* -*- Mode: C; c-file-style: "python" -*- */
       2  
       3  #include <Python.h>
       4  #include "pycore_dtoa.h"          // _Py_dg_strtod()
       5  #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
       6  #include <locale.h>
       7  
       8  /* Case-insensitive string match used for nan and inf detection; t should be
       9     lower-case.  Returns 1 for a successful match, 0 otherwise. */
      10  
      11  static int
      12  case_insensitive_match(const char *s, const char *t)
      13  {
      14      while(*t && Py_TOLOWER(*s) == *t) {
      15          s++;
      16          t++;
      17      }
      18      return *t ? 0 : 1;
      19  }
      20  
      21  /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
      22     "infinity", with an optional leading sign of "+" or "-".  On success,
      23     return the NaN or Infinity as a double and set *endptr to point just beyond
      24     the successfully parsed portion of the string.  On failure, return -1.0 and
      25     set *endptr to point to the start of the string. */
      26  
      27  #if _PY_SHORT_FLOAT_REPR == 1
      28  
      29  double
      30  _Py_parse_inf_or_nan(const char *p, char **endptr)
      31  {
      32      double retval;
      33      const char *s;
      34      int negate = 0;
      35  
      36      s = p;
      37      if (*s == '-') {
      38          negate = 1;
      39          s++;
      40      }
      41      else if (*s == '+') {
      42          s++;
      43      }
      44      if (case_insensitive_match(s, "inf")) {
      45          s += 3;
      46          if (case_insensitive_match(s, "inity"))
      47              s += 5;
      48          retval = _Py_dg_infinity(negate);
      49      }
      50      else if (case_insensitive_match(s, "nan")) {
      51          s += 3;
      52          retval = _Py_dg_stdnan(negate);
      53      }
      54      else {
      55          s = p;
      56          retval = -1.0;
      57      }
      58      *endptr = (char *)s;
      59      return retval;
      60  }
      61  
      62  #else
      63  
      64  double
      65  _Py_parse_inf_or_nan(const char *p, char **endptr)
      66  {
      67      double retval;
      68      const char *s;
      69      int negate = 0;
      70  
      71      s = p;
      72      if (*s == '-') {
      73          negate = 1;
      74          s++;
      75      }
      76      else if (*s == '+') {
      77          s++;
      78      }
      79      if (case_insensitive_match(s, "inf")) {
      80          s += 3;
      81          if (case_insensitive_match(s, "inity"))
      82              s += 5;
      83          retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
      84      }
      85      else if (case_insensitive_match(s, "nan")) {
      86          s += 3;
      87          retval = negate ? -Py_NAN : Py_NAN;
      88      }
      89      else {
      90          s = p;
      91          retval = -1.0;
      92      }
      93      *endptr = (char *)s;
      94      return retval;
      95  }
      96  
      97  #endif
      98  
      99  /**
     100   * _PyOS_ascii_strtod:
     101   * @nptr:    the string to convert to a numeric value.
     102   * @endptr:  if non-%NULL, it returns the character after
     103   *           the last character used in the conversion.
     104   *
     105   * Converts a string to a #gdouble value.
     106   * This function behaves like the standard strtod() function
     107   * does in the C locale. It does this without actually
     108   * changing the current locale, since that would not be
     109   * thread-safe.
     110   *
     111   * This function is typically used when reading configuration
     112   * files or other non-user input that should be locale independent.
     113   * To handle input from the user you should normally use the
     114   * locale-sensitive system strtod() function.
     115   *
     116   * If the correct value would cause overflow, plus or minus %HUGE_VAL
     117   * is returned (according to the sign of the value), and %ERANGE is
     118   * stored in %errno. If the correct value would cause underflow,
     119   * zero is returned and %ERANGE is stored in %errno.
     120   * If memory allocation fails, %ENOMEM is stored in %errno.
     121   *
     122   * This function resets %errno before calling strtod() so that
     123   * you can reliably detect overflow and underflow.
     124   *
     125   * Return value: the #gdouble value.
     126   **/
     127  
     128  #if _PY_SHORT_FLOAT_REPR == 1
     129  
     130  static double
     131  _PyOS_ascii_strtod(const char *nptr, char **endptr)
     132  {
     133      double result;
     134      _Py_SET_53BIT_PRECISION_HEADER;
     135  
     136      assert(nptr != NULL);
     137      /* Set errno to zero, so that we can distinguish zero results
     138         and underflows */
     139      errno = 0;
     140  
     141      _Py_SET_53BIT_PRECISION_START;
     142      result = _Py_dg_strtod(nptr, endptr);
     143      _Py_SET_53BIT_PRECISION_END;
     144  
     145      if (*endptr == nptr)
     146          /* string might represent an inf or nan */
     147          result = _Py_parse_inf_or_nan(nptr, endptr);
     148  
     149      return result;
     150  
     151  }
     152  
     153  #else
     154  
     155  /*
     156     Use system strtod;  since strtod is locale aware, we may
     157     have to first fix the decimal separator.
     158  
     159     Note that unlike _Py_dg_strtod, the system strtod may not always give
     160     correctly rounded results.
     161  */
     162  
     163  static double
     164  _PyOS_ascii_strtod(const char *nptr, char **endptr)
     165  {
     166      char *fail_pos;
     167      double val;
     168      struct lconv *locale_data;
     169      const char *decimal_point;
     170      size_t decimal_point_len;
     171      const char *p, *decimal_point_pos;
     172      const char *end = NULL; /* Silence gcc */
     173      const char *digits_pos = NULL;
     174      int negate = 0;
     175  
     176      assert(nptr != NULL);
     177  
     178      fail_pos = NULL;
     179  
     180      locale_data = localeconv();
     181      decimal_point = locale_data->decimal_point;
     182      decimal_point_len = strlen(decimal_point);
     183  
     184      assert(decimal_point_len != 0);
     185  
     186      decimal_point_pos = NULL;
     187  
     188      /* Parse infinities and nans */
     189      val = _Py_parse_inf_or_nan(nptr, endptr);
     190      if (*endptr != nptr)
     191          return val;
     192  
     193      /* Set errno to zero, so that we can distinguish zero results
     194         and underflows */
     195      errno = 0;
     196  
     197      /* We process the optional sign manually, then pass the remainder to
     198         the system strtod.  This ensures that the result of an underflow
     199         has the correct sign. (bug #1725)  */
     200      p = nptr;
     201      /* Process leading sign, if present */
     202      if (*p == '-') {
     203          negate = 1;
     204          p++;
     205      }
     206      else if (*p == '+') {
     207          p++;
     208      }
     209  
     210      /* Some platform strtods accept hex floats; Python shouldn't (at the
     211         moment), so we check explicitly for strings starting with '0x'. */
     212      if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
     213          goto invalid_string;
     214  
     215      /* Check that what's left begins with a digit or decimal point */
     216      if (!Py_ISDIGIT(*p) && *p != '.')
     217          goto invalid_string;
     218  
     219      digits_pos = p;
     220      if (decimal_point[0] != '.' ||
     221          decimal_point[1] != 0)
     222      {
     223          /* Look for a '.' in the input; if present, it'll need to be
     224             swapped for the current locale's decimal point before we
     225             call strtod.  On the other hand, if we find the current
     226             locale's decimal point then the input is invalid. */
     227          while (Py_ISDIGIT(*p))
     228              p++;
     229  
     230          if (*p == '.')
     231          {
     232              decimal_point_pos = p++;
     233  
     234              /* locate end of number */
     235              while (Py_ISDIGIT(*p))
     236                  p++;
     237  
     238              if (*p == 'e' || *p == 'E')
     239                  p++;
     240              if (*p == '+' || *p == '-')
     241                  p++;
     242              while (Py_ISDIGIT(*p))
     243                  p++;
     244              end = p;
     245          }
     246          else if (strncmp(p, decimal_point, decimal_point_len) == 0)
     247              /* Python bug #1417699 */
     248              goto invalid_string;
     249          /* For the other cases, we need not convert the decimal
     250             point */
     251      }
     252  
     253      if (decimal_point_pos) {
     254          char *copy, *c;
     255          /* Create a copy of the input, with the '.' converted to the
     256             locale-specific decimal point */
     257          copy = (char *)PyMem_Malloc(end - digits_pos +
     258                                      1 + decimal_point_len);
     259          if (copy == NULL) {
     260              *endptr = (char *)nptr;
     261              errno = ENOMEM;
     262              return val;
     263          }
     264  
     265          c = copy;
     266          memcpy(c, digits_pos, decimal_point_pos - digits_pos);
     267          c += decimal_point_pos - digits_pos;
     268          memcpy(c, decimal_point, decimal_point_len);
     269          c += decimal_point_len;
     270          memcpy(c, decimal_point_pos + 1,
     271                 end - (decimal_point_pos + 1));
     272          c += end - (decimal_point_pos + 1);
     273          *c = 0;
     274  
     275          val = strtod(copy, &fail_pos);
     276  
     277          if (fail_pos)
     278          {
     279              if (fail_pos > decimal_point_pos)
     280                  fail_pos = (char *)digits_pos +
     281                      (fail_pos - copy) -
     282                      (decimal_point_len - 1);
     283              else
     284                  fail_pos = (char *)digits_pos +
     285                      (fail_pos - copy);
     286          }
     287  
     288          PyMem_Free(copy);
     289  
     290      }
     291      else {
     292          val = strtod(digits_pos, &fail_pos);
     293      }
     294  
     295      if (fail_pos == digits_pos)
     296          goto invalid_string;
     297  
     298      if (negate && fail_pos != nptr)
     299          val = -val;
     300      *endptr = fail_pos;
     301  
     302      return val;
     303  
     304    invalid_string:
     305      *endptr = (char*)nptr;
     306      errno = EINVAL;
     307      return -1.0;
     308  }
     309  
     310  #endif
     311  
     312  /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
     313     as a string of ASCII characters) to a float.  The string should not have
     314     leading or trailing whitespace.  The conversion is independent of the
     315     current locale.
     316  
     317     If endptr is NULL, try to convert the whole string.  Raise ValueError and
     318     return -1.0 if the string is not a valid representation of a floating-point
     319     number.
     320  
     321     If endptr is non-NULL, try to convert as much of the string as possible.
     322     If no initial segment of the string is the valid representation of a
     323     floating-point number then *endptr is set to point to the beginning of the
     324     string, -1.0 is returned and again ValueError is raised.
     325  
     326     On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
     327     if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
     328     exception is raised.  Otherwise, overflow_exception should point to
     329     a Python exception, this exception will be raised, -1.0 will be returned,
     330     and *endptr will point just past the end of the converted value.
     331  
     332     If any other failure occurs (for example lack of memory), -1.0 is returned
     333     and the appropriate Python exception will have been set.
     334  */
     335  
     336  double
     337  PyOS_string_to_double(const char *s,
     338                        char **endptr,
     339                        PyObject *overflow_exception)
     340  {
     341      double x, result=-1.0;
     342      char *fail_pos;
     343  
     344      errno = 0;
     345      x = _PyOS_ascii_strtod(s, &fail_pos);
     346  
     347      if (errno == ENOMEM) {
     348          PyErr_NoMemory();
     349          fail_pos = (char *)s;
     350      }
     351      else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
     352          PyErr_Format(PyExc_ValueError,
     353                        "could not convert string to float: "
     354                        "'%.200s'", s);
     355      else if (fail_pos == s)
     356          PyErr_Format(PyExc_ValueError,
     357                        "could not convert string to float: "
     358                        "'%.200s'", s);
     359      else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
     360          PyErr_Format(overflow_exception,
     361                        "value too large to convert to float: "
     362                        "'%.200s'", s);
     363      else
     364          result = x;
     365  
     366      if (endptr != NULL)
     367          *endptr = fail_pos;
     368      return result;
     369  }
     370  
     371  /* Remove underscores that follow the underscore placement rule from
     372     the string and then call the `innerfunc` function on the result.
     373     It should return a new object or NULL on exception.
     374  
     375     `what` is used for the error message emitted when underscores are detected
     376     that don't follow the rule. `arg` is an opaque pointer passed to the inner
     377     function.
     378  
     379     This is used to implement underscore-agnostic conversion for floats
     380     and complex numbers.
     381  */
     382  PyObject *
     383  _Py_string_to_number_with_underscores(
     384      const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
     385      PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
     386  {
     387      char prev;
     388      const char *p, *last;
     389      char *dup, *end;
     390      PyObject *result;
     391  
     392      assert(s[orig_len] == '\0');
     393  
     394      if (strchr(s, '_') == NULL) {
     395          return innerfunc(s, orig_len, arg);
     396      }
     397  
     398      dup = PyMem_Malloc(orig_len + 1);
     399      if (dup == NULL) {
     400          return PyErr_NoMemory();
     401      }
     402      end = dup;
     403      prev = '\0';
     404      last = s + orig_len;
     405      for (p = s; *p; p++) {
     406          if (*p == '_') {
     407              /* Underscores are only allowed after digits. */
     408              if (!(prev >= '0' && prev <= '9')) {
     409                  goto error;
     410              }
     411          }
     412          else {
     413              *end++ = *p;
     414              /* Underscores are only allowed before digits. */
     415              if (prev == '_' && !(*p >= '0' && *p <= '9')) {
     416                  goto error;
     417              }
     418          }
     419          prev = *p;
     420      }
     421      /* Underscores are not allowed at the end. */
     422      if (prev == '_') {
     423          goto error;
     424      }
     425      /* No embedded NULs allowed. */
     426      if (p != last) {
     427          goto error;
     428      }
     429      *end = '\0';
     430      result = innerfunc(dup, end - dup, arg);
     431      PyMem_Free(dup);
     432      return result;
     433  
     434    error:
     435      PyMem_Free(dup);
     436      PyErr_Format(PyExc_ValueError,
     437                   "could not convert string to %s: "
     438                   "%R", what, obj);
     439      return NULL;
     440  }
     441  
     442  #if _PY_SHORT_FLOAT_REPR == 0
     443  
     444  /* Given a string that may have a decimal point in the current
     445     locale, change it back to a dot.  Since the string cannot get
     446     longer, no need for a maximum buffer size parameter. */
     447  Py_LOCAL_INLINE(void)
     448  change_decimal_from_locale_to_dot(char* buffer)
     449  {
     450      struct lconv *locale_data = localeconv();
     451      const char *decimal_point = locale_data->decimal_point;
     452  
     453      if (decimal_point[0] != '.' || decimal_point[1] != 0) {
     454          size_t decimal_point_len = strlen(decimal_point);
     455  
     456          if (*buffer == '+' || *buffer == '-')
     457              buffer++;
     458          while (Py_ISDIGIT(*buffer))
     459              buffer++;
     460          if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
     461              *buffer = '.';
     462              buffer++;
     463              if (decimal_point_len > 1) {
     464                  /* buffer needs to get smaller */
     465                  size_t rest_len = strlen(buffer +
     466                                       (decimal_point_len - 1));
     467                  memmove(buffer,
     468                      buffer + (decimal_point_len - 1),
     469                      rest_len);
     470                  buffer[rest_len] = 0;
     471              }
     472          }
     473      }
     474  }
     475  
     476  
     477  /* From the C99 standard, section 7.19.6:
     478  The exponent always contains at least two digits, and only as many more digits
     479  as necessary to represent the exponent.
     480  */
     481  #define MIN_EXPONENT_DIGITS 2
     482  
     483  /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
     484     in length. */
     485  Py_LOCAL_INLINE(void)
     486  ensure_minimum_exponent_length(char* buffer, size_t buf_size)
     487  {
     488      char *p = strpbrk(buffer, "eE");
     489      if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
     490          char *start = p + 2;
     491          int exponent_digit_cnt = 0;
     492          int leading_zero_cnt = 0;
     493          int in_leading_zeros = 1;
     494          int significant_digit_cnt;
     495  
     496          /* Skip over the exponent and the sign. */
     497          p += 2;
     498  
     499          /* Find the end of the exponent, keeping track of leading
     500             zeros. */
     501          while (*p && Py_ISDIGIT(*p)) {
     502              if (in_leading_zeros && *p == '0')
     503                  ++leading_zero_cnt;
     504              if (*p != '0')
     505                  in_leading_zeros = 0;
     506              ++p;
     507              ++exponent_digit_cnt;
     508          }
     509  
     510          significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
     511          if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
     512              /* If there are 2 exactly digits, we're done,
     513                 regardless of what they contain */
     514          }
     515          else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
     516              int extra_zeros_cnt;
     517  
     518              /* There are more than 2 digits in the exponent.  See
     519                 if we can delete some of the leading zeros */
     520              if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
     521                  significant_digit_cnt = MIN_EXPONENT_DIGITS;
     522              extra_zeros_cnt = exponent_digit_cnt -
     523                  significant_digit_cnt;
     524  
     525              /* Delete extra_zeros_cnt worth of characters from the
     526                 front of the exponent */
     527              assert(extra_zeros_cnt >= 0);
     528  
     529              /* Add one to significant_digit_cnt to copy the
     530                 trailing 0 byte, thus setting the length */
     531              memmove(start,
     532                  start + extra_zeros_cnt,
     533                  significant_digit_cnt + 1);
     534          }
     535          else {
     536              /* If there are fewer than 2 digits, add zeros
     537                 until there are 2, if there's enough room */
     538              int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
     539              if (start + zeros + exponent_digit_cnt + 1
     540                    < buffer + buf_size) {
     541                  memmove(start + zeros, start,
     542                      exponent_digit_cnt + 1);
     543                  memset(start, '0', zeros);
     544              }
     545          }
     546      }
     547  }
     548  
     549  /* Remove trailing zeros after the decimal point from a numeric string; also
     550     remove the decimal point if all digits following it are zero.  The numeric
     551     string must end in '\0', and should not have any leading or trailing
     552     whitespace.  Assumes that the decimal point is '.'. */
     553  Py_LOCAL_INLINE(void)
     554  remove_trailing_zeros(char *buffer)
     555  {
     556      char *old_fraction_end, *new_fraction_end, *end, *p;
     557  
     558      p = buffer;
     559      if (*p == '-' || *p == '+')
     560          /* Skip leading sign, if present */
     561          ++p;
     562      while (Py_ISDIGIT(*p))
     563          ++p;
     564  
     565      /* if there's no decimal point there's nothing to do */
     566      if (*p++ != '.')
     567          return;
     568  
     569      /* scan any digits after the point */
     570      while (Py_ISDIGIT(*p))
     571          ++p;
     572      old_fraction_end = p;
     573  
     574      /* scan up to ending '\0' */
     575      while (*p != '\0')
     576          p++;
     577      /* +1 to make sure that we move the null byte as well */
     578      end = p+1;
     579  
     580      /* scan back from fraction_end, looking for removable zeros */
     581      p = old_fraction_end;
     582      while (*(p-1) == '0')
     583          --p;
     584      /* and remove point if we've got that far */
     585      if (*(p-1) == '.')
     586          --p;
     587      new_fraction_end = p;
     588  
     589      memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
     590  }
     591  
     592  /* Ensure that buffer has a decimal point in it.  The decimal point will not
     593     be in the current locale, it will always be '.'. Don't add a decimal point
     594     if an exponent is present.  Also, convert to exponential notation where
     595     adding a '.0' would produce too many significant digits (see issue 5864).
     596  
     597     Returns a pointer to the fixed buffer, or NULL on failure.
     598  */
     599  Py_LOCAL_INLINE(char *)
     600  ensure_decimal_point(char* buffer, size_t buf_size, int precision)
     601  {
     602      int digit_count, insert_count = 0, convert_to_exp = 0;
     603      const char *chars_to_insert;
     604      char *digits_start;
     605  
     606      /* search for the first non-digit character */
     607      char *p = buffer;
     608      if (*p == '-' || *p == '+')
     609          /* Skip leading sign, if present.  I think this could only
     610             ever be '-', but it can't hurt to check for both. */
     611          ++p;
     612      digits_start = p;
     613      while (*p && Py_ISDIGIT(*p))
     614          ++p;
     615      digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
     616  
     617      if (*p == '.') {
     618          if (Py_ISDIGIT(*(p+1))) {
     619              /* Nothing to do, we already have a decimal
     620                 point and a digit after it */
     621          }
     622          else {
     623              /* We have a decimal point, but no following
     624                 digit.  Insert a zero after the decimal. */
     625              /* can't ever get here via PyOS_double_to_string */
     626              assert(precision == -1);
     627              ++p;
     628              chars_to_insert = "0";
     629              insert_count = 1;
     630          }
     631      }
     632      else if (!(*p == 'e' || *p == 'E')) {
     633          /* Don't add ".0" if we have an exponent. */
     634          if (digit_count == precision) {
     635              /* issue 5864: don't add a trailing .0 in the case
     636                 where the '%g'-formatted result already has as many
     637                 significant digits as were requested.  Switch to
     638                 exponential notation instead. */
     639              convert_to_exp = 1;
     640              /* no exponent, no point, and we shouldn't land here
     641                 for infs and nans, so we must be at the end of the
     642                 string. */
     643              assert(*p == '\0');
     644          }
     645          else {
     646              assert(precision == -1 || digit_count < precision);
     647              chars_to_insert = ".0";
     648              insert_count = 2;
     649          }
     650      }
     651      if (insert_count) {
     652          size_t buf_len = strlen(buffer);
     653          if (buf_len + insert_count + 1 >= buf_size) {
     654              /* If there is not enough room in the buffer
     655                 for the additional text, just skip it.  It's
     656                 not worth generating an error over. */
     657          }
     658          else {
     659              memmove(p + insert_count, p,
     660                  buffer + strlen(buffer) - p + 1);
     661              memcpy(p, chars_to_insert, insert_count);
     662          }
     663      }
     664      if (convert_to_exp) {
     665          int written;
     666          size_t buf_avail;
     667          p = digits_start;
     668          /* insert decimal point */
     669          assert(digit_count >= 1);
     670          memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
     671          p[1] = '.';
     672          p += digit_count+1;
     673          assert(p <= buf_size+buffer);
     674          buf_avail = buf_size+buffer-p;
     675          if (buf_avail == 0)
     676              return NULL;
     677          /* Add exponent.  It's okay to use lower case 'e': we only
     678             arrive here as a result of using the empty format code or
     679             repr/str builtins and those never want an upper case 'E' */
     680          written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
     681          if (!(0 <= written &&
     682                written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
     683              /* output truncated, or something else bad happened */
     684              return NULL;
     685          remove_trailing_zeros(buffer);
     686      }
     687      return buffer;
     688  }
     689  
     690  /* see FORMATBUFLEN in unicodeobject.c */
     691  #define FLOAT_FORMATBUFLEN 120
     692  
     693  /**
     694   * _PyOS_ascii_formatd:
     695   * @buffer: A buffer to place the resulting string in
     696   * @buf_size: The length of the buffer.
     697   * @format: The printf()-style format to use for the
     698   *          code to use for converting.
     699   * @d: The #gdouble to convert
     700   * @precision: The precision to use when formatting.
     701   *
     702   * Converts a #gdouble to a string, using the '.' as
     703   * decimal point. To format the number you pass in
     704   * a printf()-style format string. Allowed conversion
     705   * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
     706   *
     707   * 'Z' is the same as 'g', except it always has a decimal and
     708   *     at least one digit after the decimal.
     709   *
     710   * Return value: The pointer to the buffer with the converted string.
     711   * On failure returns NULL but does not set any Python exception.
     712   **/
     713  static char *
     714  _PyOS_ascii_formatd(char       *buffer,
     715                     size_t      buf_size,
     716                     const char *format,
     717                     double      d,
     718                     int         precision)
     719  {
     720      char format_char;
     721      size_t format_len = strlen(format);
     722  
     723      /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
     724         also with at least one character past the decimal. */
     725      char tmp_format[FLOAT_FORMATBUFLEN];
     726  
     727      /* The last character in the format string must be the format char */
     728      format_char = format[format_len - 1];
     729  
     730      if (format[0] != '%')
     731          return NULL;
     732  
     733      /* I'm not sure why this test is here.  It's ensuring that the format
     734         string after the first character doesn't have a single quote, a
     735         lowercase l, or a percent. This is the reverse of the commented-out
     736         test about 10 lines ago. */
     737      if (strpbrk(format + 1, "'l%"))
     738          return NULL;
     739  
     740      /* Also curious about this function is that it accepts format strings
     741         like "%xg", which are invalid for floats.  In general, the
     742         interface to this function is not very good, but changing it is
     743         difficult because it's a public API. */
     744  
     745      if (!(format_char == 'e' || format_char == 'E' ||
     746            format_char == 'f' || format_char == 'F' ||
     747            format_char == 'g' || format_char == 'G' ||
     748            format_char == 'Z'))
     749          return NULL;
     750  
     751      /* Map 'Z' format_char to 'g', by copying the format string and
     752         replacing the final char with a 'g' */
     753      if (format_char == 'Z') {
     754          if (format_len + 1 >= sizeof(tmp_format)) {
     755              /* The format won't fit in our copy.  Error out.  In
     756                 practice, this will never happen and will be
     757                 detected by returning NULL */
     758              return NULL;
     759          }
     760          strcpy(tmp_format, format);
     761          tmp_format[format_len - 1] = 'g';
     762          format = tmp_format;
     763      }
     764  
     765  
     766      /* Have PyOS_snprintf do the hard work */
     767      PyOS_snprintf(buffer, buf_size, format, d);
     768  
     769      /* Do various fixups on the return string */
     770  
     771      /* Get the current locale, and find the decimal point string.
     772         Convert that string back to a dot. */
     773      change_decimal_from_locale_to_dot(buffer);
     774  
     775      /* If an exponent exists, ensure that the exponent is at least
     776         MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
     777         for the extra zeros.  Also, if there are more than
     778         MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
     779         back to MIN_EXPONENT_DIGITS */
     780      ensure_minimum_exponent_length(buffer, buf_size);
     781  
     782      /* If format_char is 'Z', make sure we have at least one character
     783         after the decimal point (and make sure we have a decimal point);
     784         also switch to exponential notation in some edge cases where the
     785         extra character would produce more significant digits that we
     786         really want. */
     787      if (format_char == 'Z')
     788          buffer = ensure_decimal_point(buffer, buf_size, precision);
     789  
     790      return buffer;
     791  }
     792  
     793  /* The fallback code to use if _Py_dg_dtoa is not available. */
     794  
     795  char * PyOS_double_to_string(double val,
     796                                           char format_code,
     797                                           int precision,
     798                                           int flags,
     799                                           int *type)
     800  {
     801      char format[32];
     802      Py_ssize_t bufsize;
     803      char *buf;
     804      int t, exp;
     805      int upper = 0;
     806  
     807      /* Validate format_code, and map upper and lower case */
     808      switch (format_code) {
     809      case 'e':          /* exponent */
     810      case 'f':          /* fixed */
     811      case 'g':          /* general */
     812          break;
     813      case 'E':
     814          upper = 1;
     815          format_code = 'e';
     816          break;
     817      case 'F':
     818          upper = 1;
     819          format_code = 'f';
     820          break;
     821      case 'G':
     822          upper = 1;
     823          format_code = 'g';
     824          break;
     825      case 'r':          /* repr format */
     826          /* Supplied precision is unused, must be 0. */
     827          if (precision != 0) {
     828              PyErr_BadInternalCall();
     829              return NULL;
     830          }
     831          /* The repr() precision (17 significant decimal digits) is the
     832             minimal number that is guaranteed to have enough precision
     833             so that if the number is read back in the exact same binary
     834             value is recreated.  This is true for IEEE floating point
     835             by design, and also happens to work for all other modern
     836             hardware. */
     837          precision = 17;
     838          format_code = 'g';
     839          break;
     840      default:
     841          PyErr_BadInternalCall();
     842          return NULL;
     843      }
     844  
     845      /* Here's a quick-and-dirty calculation to figure out how big a buffer
     846         we need.  In general, for a finite float we need:
     847  
     848           1 byte for each digit of the decimal significand, and
     849  
     850           1 for a possible sign
     851           1 for a possible decimal point
     852           2 for a possible [eE][+-]
     853           1 for each digit of the exponent;  if we allow 19 digits
     854             total then we're safe up to exponents of 2**63.
     855           1 for the trailing nul byte
     856  
     857         This gives a total of 24 + the number of digits in the significand,
     858         and the number of digits in the significand is:
     859  
     860           for 'g' format: at most precision, except possibly
     861             when precision == 0, when it's 1.
     862           for 'e' format: precision+1
     863           for 'f' format: precision digits after the point, at least 1
     864             before.  To figure out how many digits appear before the point
     865             we have to examine the size of the number.  If fabs(val) < 1.0
     866             then there will be only one digit before the point.  If
     867             fabs(val) >= 1.0, then there are at most
     868  
     869           1+floor(log10(ceiling(fabs(val))))
     870  
     871             digits before the point (where the 'ceiling' allows for the
     872             possibility that the rounding rounds the integer part of val
     873             up).  A safe upper bound for the above quantity is
     874             1+floor(exp/3), where exp is the unique integer such that 0.5
     875             <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
     876             frexp.
     877  
     878         So we allow room for precision+1 digits for all formats, plus an
     879         extra floor(exp/3) digits for 'f' format.
     880  
     881      */
     882  
     883      if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
     884          /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
     885          bufsize = 5;
     886      else {
     887          bufsize = 25 + precision;
     888          if (format_code == 'f' && fabs(val) >= 1.0) {
     889              frexp(val, &exp);
     890              bufsize += exp/3;
     891          }
     892      }
     893  
     894      buf = PyMem_Malloc(bufsize);
     895      if (buf == NULL) {
     896          PyErr_NoMemory();
     897          return NULL;
     898      }
     899  
     900      /* Handle nan and inf. */
     901      if (Py_IS_NAN(val)) {
     902          strcpy(buf, "nan");
     903          t = Py_DTST_NAN;
     904      } else if (Py_IS_INFINITY(val)) {
     905          if (copysign(1., val) == 1.)
     906              strcpy(buf, "inf");
     907          else
     908              strcpy(buf, "-inf");
     909          t = Py_DTST_INFINITE;
     910      } else {
     911          t = Py_DTST_FINITE;
     912          if (flags & Py_DTSF_ADD_DOT_0)
     913              format_code = 'Z';
     914  
     915          PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
     916                        (flags & Py_DTSF_ALT ? "#" : ""), precision,
     917                        format_code);
     918          _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
     919  
     920          if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
     921              char *buf2 = buf + 1;
     922              while (*buf2 == '0' || *buf2 == '.') {
     923                  ++buf2;
     924              }
     925              if (*buf2 == 0 || *buf2 == 'e') {
     926                  size_t len = buf2 - buf + strlen(buf2);
     927                  assert(buf[len] == 0);
     928                  memmove(buf, buf+1, len);
     929              }
     930          }
     931      }
     932  
     933      /* Add sign when requested.  It's convenient (esp. when formatting
     934       complex numbers) to include a sign even for inf and nan. */
     935      if (flags & Py_DTSF_SIGN && buf[0] != '-') {
     936          size_t len = strlen(buf);
     937          /* the bufsize calculations above should ensure that we've got
     938             space to add a sign */
     939          assert((size_t)bufsize >= len+2);
     940          memmove(buf+1, buf, len+1);
     941          buf[0] = '+';
     942      }
     943      if (upper) {
     944          /* Convert to upper case. */
     945          char *p1;
     946          for (p1 = buf; *p1; p1++)
     947              *p1 = Py_TOUPPER(*p1);
     948      }
     949  
     950      if (type)
     951          *type = t;
     952      return buf;
     953  }
     954  
     955  #else  // _PY_SHORT_FLOAT_REPR == 1
     956  
     957  /* _Py_dg_dtoa is available. */
     958  
     959  /* I'm using a lookup table here so that I don't have to invent a non-locale
     960     specific way to convert to uppercase */
     961  #define OFS_INF 0
     962  #define OFS_NAN 1
     963  #define OFS_E 2
     964  
     965  /* The lengths of these are known to the code below, so don't change them */
     966  static const char * const lc_float_strings[] = {
     967      "inf",
     968      "nan",
     969      "e",
     970  };
     971  static const char * const uc_float_strings[] = {
     972      "INF",
     973      "NAN",
     974      "E",
     975  };
     976  
     977  
     978  /* Convert a double d to a string, and return a PyMem_Malloc'd block of
     979     memory contain the resulting string.
     980  
     981     Arguments:
     982       d is the double to be converted
     983       format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
     984         correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
     985       mode is one of '0', '2' or '3', and is completely determined by
     986         format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
     987       precision is the desired precision
     988       always_add_sign is nonzero if a '+' sign should be included for positive
     989         numbers
     990       add_dot_0_if_integer is nonzero if integers in non-exponential form
     991         should have ".0" added.  Only applies to format codes 'r' and 'g'.
     992       use_alt_formatting is nonzero if alternative formatting should be
     993         used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
     994         at most one of use_alt_formatting and add_dot_0_if_integer should
     995         be nonzero.
     996       type, if non-NULL, will be set to one of these constants to identify
     997         the type of the 'd' argument:
     998       Py_DTST_FINITE
     999       Py_DTST_INFINITE
    1000       Py_DTST_NAN
    1001  
    1002     Returns a PyMem_Malloc'd block of memory containing the resulting string,
    1003      or NULL on error. If NULL is returned, the Python error has been set.
    1004   */
    1005  
    1006  static char *
    1007  format_float_short(double d, char format_code,
    1008                     int mode, int precision,
    1009                     int always_add_sign, int add_dot_0_if_integer,
    1010                     int use_alt_formatting, int no_negative_zero,
    1011                     const char * const *float_strings, int *type)
    1012  {
    1013      char *buf = NULL;
    1014      char *p = NULL;
    1015      Py_ssize_t bufsize = 0;
    1016      char *digits, *digits_end;
    1017      int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
    1018      Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
    1019      _Py_SET_53BIT_PRECISION_HEADER;
    1020  
    1021      /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
    1022         Must be matched by a call to _Py_dg_freedtoa. */
    1023      _Py_SET_53BIT_PRECISION_START;
    1024      digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
    1025                           &digits_end);
    1026      _Py_SET_53BIT_PRECISION_END;
    1027  
    1028      decpt = (Py_ssize_t)decpt_as_int;
    1029      if (digits == NULL) {
    1030          /* The only failure mode is no memory. */
    1031          PyErr_NoMemory();
    1032          goto exit;
    1033      }
    1034      assert(digits_end != NULL && digits_end >= digits);
    1035      digits_len = digits_end - digits;
    1036  
    1037      if (no_negative_zero && sign == 1 &&
    1038              (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
    1039          sign = 0;
    1040      }
    1041  
    1042      if (digits_len && !Py_ISDIGIT(digits[0])) {
    1043          /* Infinities and nans here; adapt Gay's output,
    1044             so convert Infinity to inf and NaN to nan, and
    1045             ignore sign of nan. Then return. */
    1046  
    1047          /* ignore the actual sign of a nan */
    1048          if (digits[0] == 'n' || digits[0] == 'N')
    1049              sign = 0;
    1050  
    1051          /* We only need 5 bytes to hold the result "+inf\0" . */
    1052          bufsize = 5; /* Used later in an assert. */
    1053          buf = (char *)PyMem_Malloc(bufsize);
    1054          if (buf == NULL) {
    1055              PyErr_NoMemory();
    1056              goto exit;
    1057          }
    1058          p = buf;
    1059  
    1060          if (sign == 1) {
    1061              *p++ = '-';
    1062          }
    1063          else if (always_add_sign) {
    1064              *p++ = '+';
    1065          }
    1066          if (digits[0] == 'i' || digits[0] == 'I') {
    1067              strncpy(p, float_strings[OFS_INF], 3);
    1068              p += 3;
    1069  
    1070              if (type)
    1071                  *type = Py_DTST_INFINITE;
    1072          }
    1073          else if (digits[0] == 'n' || digits[0] == 'N') {
    1074              strncpy(p, float_strings[OFS_NAN], 3);
    1075              p += 3;
    1076  
    1077              if (type)
    1078                  *type = Py_DTST_NAN;
    1079          }
    1080          else {
    1081              /* shouldn't get here: Gay's code should always return
    1082                 something starting with a digit, an 'I',  or 'N' */
    1083              Py_UNREACHABLE();
    1084          }
    1085          goto exit;
    1086      }
    1087  
    1088      /* The result must be finite (not inf or nan). */
    1089      if (type)
    1090          *type = Py_DTST_FINITE;
    1091  
    1092  
    1093      /* We got digits back, format them.  We may need to pad 'digits'
    1094         either on the left or right (or both) with extra zeros, so in
    1095         general the resulting string has the form
    1096  
    1097           [<sign>]<zeros><digits><zeros>[<exponent>]
    1098  
    1099         where either of the <zeros> pieces could be empty, and there's a
    1100         decimal point that could appear either in <digits> or in the
    1101         leading or trailing <zeros>.
    1102  
    1103         Imagine an infinite 'virtual' string vdigits, consisting of the
    1104         string 'digits' (starting at index 0) padded on both the left and
    1105         right with infinite strings of zeros.  We want to output a slice
    1106  
    1107           vdigits[vdigits_start : vdigits_end]
    1108  
    1109         of this virtual string.  Thus if vdigits_start < 0 then we'll end
    1110         up producing some leading zeros; if vdigits_end > digits_len there
    1111         will be trailing zeros in the output.  The next section of code
    1112         determines whether to use an exponent or not, figures out the
    1113         position 'decpt' of the decimal point, and computes 'vdigits_start'
    1114         and 'vdigits_end'. */
    1115      vdigits_end = digits_len;
    1116      switch (format_code) {
    1117      case 'e':
    1118          use_exp = 1;
    1119          vdigits_end = precision;
    1120          break;
    1121      case 'f':
    1122          vdigits_end = decpt + precision;
    1123          break;
    1124      case 'g':
    1125          if (decpt <= -4 || decpt >
    1126              (add_dot_0_if_integer ? precision-1 : precision))
    1127              use_exp = 1;
    1128          if (use_alt_formatting)
    1129              vdigits_end = precision;
    1130          break;
    1131      case 'r':
    1132          /* convert to exponential format at 1e16.  We used to convert
    1133             at 1e17, but that gives odd-looking results for some values
    1134             when a 16-digit 'shortest' repr is padded with bogus zeros.
    1135             For example, repr(2e16+8) would give 20000000000000010.0;
    1136             the true value is 20000000000000008.0. */
    1137          if (decpt <= -4 || decpt > 16)
    1138              use_exp = 1;
    1139          break;
    1140      default:
    1141          PyErr_BadInternalCall();
    1142          goto exit;
    1143      }
    1144  
    1145      /* if using an exponent, reset decimal point position to 1 and adjust
    1146         exponent accordingly.*/
    1147      if (use_exp) {
    1148          exp = (int)decpt - 1;
    1149          decpt = 1;
    1150      }
    1151      /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
    1152         decpt < vdigits_end if add_dot_0_if_integer and no exponent */
    1153      vdigits_start = decpt <= 0 ? decpt-1 : 0;
    1154      if (!use_exp && add_dot_0_if_integer)
    1155          vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
    1156      else
    1157          vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
    1158  
    1159      /* double check inequalities */
    1160      assert(vdigits_start <= 0 &&
    1161             0 <= digits_len &&
    1162             digits_len <= vdigits_end);
    1163      /* decimal point should be in (vdigits_start, vdigits_end] */
    1164      assert(vdigits_start < decpt && decpt <= vdigits_end);
    1165  
    1166      /* Compute an upper bound how much memory we need. This might be a few
    1167         chars too long, but no big deal. */
    1168      bufsize =
    1169          /* sign, decimal point and trailing 0 byte */
    1170          3 +
    1171  
    1172          /* total digit count (including zero padding on both sides) */
    1173          (vdigits_end - vdigits_start) +
    1174  
    1175          /* exponent "e+100", max 3 numerical digits */
    1176          (use_exp ? 5 : 0);
    1177  
    1178      /* Now allocate the memory and initialize p to point to the start of
    1179         it. */
    1180      buf = (char *)PyMem_Malloc(bufsize);
    1181      if (buf == NULL) {
    1182          PyErr_NoMemory();
    1183          goto exit;
    1184      }
    1185      p = buf;
    1186  
    1187      /* Add a negative sign if negative, and a plus sign if non-negative
    1188         and always_add_sign is true. */
    1189      if (sign == 1)
    1190          *p++ = '-';
    1191      else if (always_add_sign)
    1192          *p++ = '+';
    1193  
    1194      /* note that exactly one of the three 'if' conditions is true,
    1195         so we include exactly one decimal point */
    1196      /* Zero padding on left of digit string */
    1197      if (decpt <= 0) {
    1198          memset(p, '0', decpt-vdigits_start);
    1199          p += decpt - vdigits_start;
    1200          *p++ = '.';
    1201          memset(p, '0', 0-decpt);
    1202          p += 0-decpt;
    1203      }
    1204      else {
    1205          memset(p, '0', 0-vdigits_start);
    1206          p += 0 - vdigits_start;
    1207      }
    1208  
    1209      /* Digits, with included decimal point */
    1210      if (0 < decpt && decpt <= digits_len) {
    1211          strncpy(p, digits, decpt-0);
    1212          p += decpt-0;
    1213          *p++ = '.';
    1214          strncpy(p, digits+decpt, digits_len-decpt);
    1215          p += digits_len-decpt;
    1216      }
    1217      else {
    1218          strncpy(p, digits, digits_len);
    1219          p += digits_len;
    1220      }
    1221  
    1222      /* And zeros on the right */
    1223      if (digits_len < decpt) {
    1224          memset(p, '0', decpt-digits_len);
    1225          p += decpt-digits_len;
    1226          *p++ = '.';
    1227          memset(p, '0', vdigits_end-decpt);
    1228          p += vdigits_end-decpt;
    1229      }
    1230      else {
    1231          memset(p, '0', vdigits_end-digits_len);
    1232          p += vdigits_end-digits_len;
    1233      }
    1234  
    1235      /* Delete a trailing decimal pt unless using alternative formatting. */
    1236      if (p[-1] == '.' && !use_alt_formatting)
    1237          p--;
    1238  
    1239      /* Now that we've done zero padding, add an exponent if needed. */
    1240      if (use_exp) {
    1241          *p++ = float_strings[OFS_E][0];
    1242          exp_len = sprintf(p, "%+.02d", exp);
    1243          p += exp_len;
    1244      }
    1245    exit:
    1246      if (buf) {
    1247          *p = '\0';
    1248          /* It's too late if this fails, as we've already stepped on
    1249             memory that isn't ours. But it's an okay debugging test. */
    1250          assert(p-buf < bufsize);
    1251      }
    1252      if (digits)
    1253          _Py_dg_freedtoa(digits);
    1254  
    1255      return buf;
    1256  }
    1257  
    1258  
    1259  char * PyOS_double_to_string(double val,
    1260                                           char format_code,
    1261                                           int precision,
    1262                                           int flags,
    1263                                           int *type)
    1264  {
    1265      const char * const *float_strings = lc_float_strings;
    1266      int mode;
    1267  
    1268      /* Validate format_code, and map upper and lower case. Compute the
    1269         mode and make any adjustments as needed. */
    1270      switch (format_code) {
    1271      /* exponent */
    1272      case 'E':
    1273          float_strings = uc_float_strings;
    1274          format_code = 'e';
    1275          /* Fall through. */
    1276      case 'e':
    1277          mode = 2;
    1278          precision++;
    1279          break;
    1280  
    1281      /* fixed */
    1282      case 'F':
    1283          float_strings = uc_float_strings;
    1284          format_code = 'f';
    1285          /* Fall through. */
    1286      case 'f':
    1287          mode = 3;
    1288          break;
    1289  
    1290      /* general */
    1291      case 'G':
    1292          float_strings = uc_float_strings;
    1293          format_code = 'g';
    1294          /* Fall through. */
    1295      case 'g':
    1296          mode = 2;
    1297          /* precision 0 makes no sense for 'g' format; interpret as 1 */
    1298          if (precision == 0)
    1299              precision = 1;
    1300          break;
    1301  
    1302      /* repr format */
    1303      case 'r':
    1304          mode = 0;
    1305          /* Supplied precision is unused, must be 0. */
    1306          if (precision != 0) {
    1307              PyErr_BadInternalCall();
    1308              return NULL;
    1309          }
    1310          break;
    1311  
    1312      default:
    1313          PyErr_BadInternalCall();
    1314          return NULL;
    1315      }
    1316  
    1317      return format_float_short(val, format_code, mode, precision,
    1318                                flags & Py_DTSF_SIGN,
    1319                                flags & Py_DTSF_ADD_DOT_0,
    1320                                flags & Py_DTSF_ALT,
    1321                                flags & Py_DTSF_NO_NEG_0,
    1322                                float_strings, type);
    1323  }
    1324  #endif  // _PY_SHORT_FLOAT_REPR == 1