1  /* nl_langinfo() replacement: query locale dependent information.
       2  
       3     Copyright (C) 2007-2023 Free Software Foundation, Inc.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include <langinfo.h>
      22  
      23  #include <locale.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  #if defined _WIN32 && ! defined __CYGWIN__
      27  # define WIN32_LEAN_AND_MEAN  /* avoid including junk */
      28  # include <windows.h>
      29  # include <stdio.h>
      30  #endif
      31  
      32  #if REPLACE_NL_LANGINFO && !NL_LANGINFO_MTSAFE
      33  # if defined _WIN32 && !defined __CYGWIN__
      34  
      35  #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
      36  #  include <windows.h>
      37  
      38  # elif HAVE_PTHREAD_API
      39  
      40  #  include <pthread.h>
      41  #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
      42  #   include <threads.h>
      43  #   pragma weak thrd_exit
      44  #   define c11_threads_in_use() (thrd_exit != NULL)
      45  #  else
      46  #   define c11_threads_in_use() 0
      47  #  endif
      48  
      49  # elif HAVE_THREADS_H
      50  
      51  #  include <threads.h>
      52  
      53  # endif
      54  #endif
      55  
      56  /* nl_langinfo() must be multithread-safe.  To achieve this without using
      57     thread-local storage:
      58       1. We use a specific static buffer for each possible argument.
      59          So that different threads can call nl_langinfo with different arguments,
      60          without interfering.
      61       2. We use a simple strcpy or memcpy to fill this static buffer.  Filling it
      62          through, for example, strcpy + strcat would not be guaranteed to leave
      63          the buffer's contents intact if another thread is currently accessing
      64          it.  If necessary, the contents is first assembled in a stack-allocated
      65          buffer.  */
      66  
      67  #if !REPLACE_NL_LANGINFO || GNULIB_defined_CODESET
      68  /* Return the codeset of the current locale, if this is easily deducible.
      69     Otherwise, return "".  */
      70  static char *
      71  ctype_codeset (void)
      72  {
      73    /* This function is only used on platforms which don't have uselocale().
      74       Therefore we don't need to look at the per-thread locale first, here.  */
      75    static char result[2 + 10 + 1];
      76    char buf[2 + 10 + 1];
      77    char locale[SETLOCALE_NULL_MAX];
      78    char *codeset;
      79    size_t codesetlen;
      80  
      81    if (setlocale_null_r (LC_CTYPE, locale, sizeof (locale)))
      82      locale[0] = '\0';
      83  
      84    codeset = buf;
      85    codeset[0] = '\0';
      86  
      87    if (locale[0])
      88      {
      89        /* If the locale name contains an encoding after the dot, return it.  */
      90        char *dot = strchr (locale, '.');
      91  
      92        if (dot)
      93          {
      94            /* Look for the possible @... trailer and remove it, if any.  */
      95            char *codeset_start = dot + 1;
      96            char const *modifier = strchr (codeset_start, '@');
      97  
      98            if (! modifier)
      99              codeset = codeset_start;
     100            else
     101              {
     102                codesetlen = modifier - codeset_start;
     103                if (codesetlen < sizeof buf)
     104                  {
     105                    codeset = memcpy (buf, codeset_start, codesetlen);
     106                    codeset[codesetlen] = '\0';
     107                  }
     108              }
     109          }
     110      }
     111  
     112  # if defined _WIN32 && ! defined __CYGWIN__
     113    /* If setlocale is successful, it returns the number of the
     114       codepage, as a string.  Otherwise, fall back on Windows API
     115       GetACP, which returns the locale's codepage as a number (although
     116       this doesn't change according to what the 'setlocale' call specified).
     117       Either way, prepend "CP" to make it a valid codeset name.  */
     118    codesetlen = strlen (codeset);
     119    if (0 < codesetlen && codesetlen < sizeof buf - 2)
     120      memmove (buf + 2, codeset, codesetlen + 1);
     121    else
     122      sprintf (buf + 2, "%u", GetACP ());
     123    /* For a locale name such as "French_France.65001", in Windows 10,
     124       setlocale now returns "French_France.utf8" instead.  */
     125    if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
     126      return (char *) "UTF-8";
     127    else
     128      {
     129        memcpy (buf, "CP", 2);
     130        strcpy (result, buf);
     131        return result;
     132      }
     133  # else
     134    strcpy (result, codeset);
     135    return result;
     136  #endif
     137  }
     138  #endif
     139  
     140  
     141  #if REPLACE_NL_LANGINFO
     142  
     143  /* Override nl_langinfo with support for added nl_item values.  */
     144  
     145  # undef nl_langinfo
     146  
     147  /* Without locking, on Solaris 11.3, test-nl_langinfo-mt fails, with message
     148     "thread5 disturbed by threadN!", even when threadN invokes only
     149        nl_langinfo (CODESET);
     150        nl_langinfo (CRNCYSTR);
     151     Similarly on Solaris 10.  */
     152  
     153  # if !NL_LANGINFO_MTSAFE /* Solaris */
     154  
     155  #  define ITEMS (MAXSTRMSG + 1)
     156  #  define MAX_RESULT_LEN 80
     157  
     158  static char *
     159  nl_langinfo_unlocked (nl_item item)
     160  {
     161    static char result[ITEMS][MAX_RESULT_LEN];
     162  
     163    /* The result of nl_langinfo is in storage that can be overwritten by
     164       other calls to nl_langinfo.  */
     165    char *tmp = nl_langinfo (item);
     166    if (item >= 0 && item < ITEMS && tmp != NULL)
     167      {
     168        size_t tmp_len = strlen (tmp);
     169        if (tmp_len < MAX_RESULT_LEN)
     170          strcpy (result[item], tmp);
     171        else
     172          {
     173            /* Produce a truncated result.  Oh well...  */
     174            result[item][MAX_RESULT_LEN - 1] = '\0';
     175            memcpy (result[item], tmp, MAX_RESULT_LEN - 1);
     176          }
     177        return result[item];
     178      }
     179    else
     180      return tmp;
     181  }
     182  
     183  /* Use a lock, so that no two threads can invoke nl_langinfo_unlocked
     184     at the same time.  */
     185  
     186  /* Prohibit renaming this symbol.  */
     187  #  undef gl_get_nl_langinfo_lock
     188  
     189  #  if defined _WIN32 && !defined __CYGWIN__
     190  
     191  extern __declspec(dllimport) CRITICAL_SECTION *gl_get_nl_langinfo_lock (void);
     192  
     193  static char *
     194  nl_langinfo_with_lock (nl_item item)
     195  {
     196    CRITICAL_SECTION *lock = gl_get_nl_langinfo_lock ();
     197    char *ret;
     198  
     199    EnterCriticalSection (lock);
     200    ret = nl_langinfo_unlocked (item);
     201    LeaveCriticalSection (lock);
     202  
     203    return ret;
     204  }
     205  
     206  #  elif HAVE_PTHREAD_API
     207  
     208  extern
     209  #   if defined _WIN32 || defined __CYGWIN__
     210    __declspec(dllimport)
     211  #   endif
     212    pthread_mutex_t *gl_get_nl_langinfo_lock (void);
     213  
     214  #   if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */
     215  
     216       /* Avoid the need to link with '-lpthread'.  */
     217  #    pragma weak pthread_mutex_lock
     218  #    pragma weak pthread_mutex_unlock
     219  
     220       /* Determine whether libpthread is in use.  */
     221  #    pragma weak pthread_mutexattr_gettype
     222       /* See the comments in lock.h.  */
     223  #    define pthread_in_use() \
     224         (pthread_mutexattr_gettype != NULL || c11_threads_in_use ())
     225  
     226  #   else
     227  #    define pthread_in_use() 1
     228  #   endif
     229  
     230  static char *
     231  nl_langinfo_with_lock (nl_item item)
     232  {
     233    if (pthread_in_use())
     234      {
     235        pthread_mutex_t *lock = gl_get_nl_langinfo_lock ();
     236        char *ret;
     237  
     238        if (pthread_mutex_lock (lock))
     239          abort ();
     240        ret = nl_langinfo_unlocked (item);
     241        if (pthread_mutex_unlock (lock))
     242          abort ();
     243  
     244        return ret;
     245      }
     246    else
     247      return nl_langinfo_unlocked (item);
     248  }
     249  
     250  #  elif HAVE_THREADS_H
     251  
     252  extern mtx_t *gl_get_nl_langinfo_lock (void);
     253  
     254  static char *
     255  nl_langinfo_with_lock (nl_item item)
     256  {
     257    mtx_t *lock = gl_get_nl_langinfo_lock ();
     258    char *ret;
     259  
     260    if (mtx_lock (lock) != thrd_success)
     261      abort ();
     262    ret = nl_langinfo_unlocked (item);
     263    if (mtx_unlock (lock) != thrd_success)
     264      abort ();
     265  
     266    return ret;
     267  }
     268  
     269  #  endif
     270  
     271  # else
     272  
     273  /* On other platforms, no lock is needed.  */
     274  #  define nl_langinfo_with_lock nl_langinfo
     275  
     276  # endif
     277  
     278  char *
     279  rpl_nl_langinfo (nl_item item)
     280  {
     281    switch (item)
     282      {
     283  # if GNULIB_defined_CODESET
     284      case CODESET:
     285        return ctype_codeset ();
     286  # endif
     287  # if GNULIB_defined_T_FMT_AMPM
     288      case T_FMT_AMPM:
     289        return (char *) "%I:%M:%S %p";
     290  # endif
     291  # if GNULIB_defined_ALTMON
     292      case ALTMON_1:
     293      case ALTMON_2:
     294      case ALTMON_3:
     295      case ALTMON_4:
     296      case ALTMON_5:
     297      case ALTMON_6:
     298      case ALTMON_7:
     299      case ALTMON_8:
     300      case ALTMON_9:
     301      case ALTMON_10:
     302      case ALTMON_11:
     303      case ALTMON_12:
     304        /* We don't ship the appropriate localizations with gnulib.  Therefore,
     305           treat ALTMON_i like MON_i.  */
     306        item = item - ALTMON_1 + MON_1;
     307        break;
     308  # endif
     309  # if GNULIB_defined_ERA
     310      case ERA:
     311        /* The format is not standardized.  In glibc it is a sequence of strings
     312           of the form "direction:offset:start_date:end_date:era_name:era_format"
     313           with an empty string at the end.  */
     314        return (char *) "";
     315      case ERA_D_FMT:
     316        /* The %Ex conversion in strftime behaves like %x if the locale does not
     317           have an alternative time format.  */
     318        item = D_FMT;
     319        break;
     320      case ERA_D_T_FMT:
     321        /* The %Ec conversion in strftime behaves like %c if the locale does not
     322           have an alternative time format.  */
     323        item = D_T_FMT;
     324        break;
     325      case ERA_T_FMT:
     326        /* The %EX conversion in strftime behaves like %X if the locale does not
     327           have an alternative time format.  */
     328        item = T_FMT;
     329        break;
     330      case ALT_DIGITS:
     331        /* The format is not standardized.  In glibc it is a sequence of 10
     332           strings, appended in memory.  */
     333        return (char *) "\0\0\0\0\0\0\0\0\0\0";
     334  # endif
     335  # if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
     336      case YESEXPR:
     337        return (char *) "^[yY]";
     338      case NOEXPR:
     339        return (char *) "^[nN]";
     340  # endif
     341      default:
     342        break;
     343      }
     344    return nl_langinfo_with_lock (item);
     345  }
     346  
     347  #else
     348  
     349  /* Provide nl_langinfo from scratch, either for native MS-Windows, or
     350     for old Unix platforms without locales, such as Linux libc5 or
     351     BeOS.  */
     352  
     353  # include <time.h>
     354  
     355  char *
     356  nl_langinfo (nl_item item)
     357  {
     358    char buf[100];
     359    struct tm tmm = { 0 };
     360  
     361    switch (item)
     362      {
     363      /* nl_langinfo items of the LC_CTYPE category */
     364      case CODESET:
     365        {
     366          char *codeset = ctype_codeset ();
     367          if (*codeset)
     368            return codeset;
     369        }
     370  # ifdef __BEOS__
     371        return (char *) "UTF-8";
     372  # else
     373        return (char *) "ISO-8859-1";
     374  # endif
     375      /* nl_langinfo items of the LC_NUMERIC category */
     376      case RADIXCHAR:
     377        return localeconv () ->decimal_point;
     378      case THOUSEP:
     379        return localeconv () ->thousands_sep;
     380  # ifdef GROUPING
     381      case GROUPING:
     382        return localeconv () ->grouping;
     383  # endif
     384      /* nl_langinfo items of the LC_TIME category.
     385         TODO: Really use the locale.  */
     386      case D_T_FMT:
     387      case ERA_D_T_FMT:
     388        return (char *) "%a %b %e %H:%M:%S %Y";
     389      case D_FMT:
     390      case ERA_D_FMT:
     391        return (char *) "%m/%d/%y";
     392      case T_FMT:
     393      case ERA_T_FMT:
     394        return (char *) "%H:%M:%S";
     395      case T_FMT_AMPM:
     396        return (char *) "%I:%M:%S %p";
     397      case AM_STR:
     398        {
     399          static char result[80];
     400          if (!strftime (buf, sizeof result, "%p", &tmm))
     401            return (char *) "AM";
     402          strcpy (result, buf);
     403          return result;
     404        }
     405      case PM_STR:
     406        {
     407          static char result[80];
     408          tmm.tm_hour = 12;
     409          if (!strftime (buf, sizeof result, "%p", &tmm))
     410            return (char *) "PM";
     411          strcpy (result, buf);
     412          return result;
     413        }
     414      case DAY_1:
     415      case DAY_2:
     416      case DAY_3:
     417      case DAY_4:
     418      case DAY_5:
     419      case DAY_6:
     420      case DAY_7:
     421        {
     422          static char result[7][50];
     423          static char const days[][sizeof "Wednesday"] = {
     424            "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
     425            "Friday", "Saturday"
     426          };
     427          tmm.tm_wday = item - DAY_1;
     428          if (!strftime (buf, sizeof result[0], "%A", &tmm))
     429            return (char *) days[item - DAY_1];
     430          strcpy (result[item - DAY_1], buf);
     431          return result[item - DAY_1];
     432        }
     433      case ABDAY_1:
     434      case ABDAY_2:
     435      case ABDAY_3:
     436      case ABDAY_4:
     437      case ABDAY_5:
     438      case ABDAY_6:
     439      case ABDAY_7:
     440        {
     441          static char result[7][30];
     442          static char const abdays[][sizeof "Sun"] = {
     443            "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
     444          };
     445          tmm.tm_wday = item - ABDAY_1;
     446          if (!strftime (buf, sizeof result[0], "%a", &tmm))
     447            return (char *) abdays[item - ABDAY_1];
     448          strcpy (result[item - ABDAY_1], buf);
     449          return result[item - ABDAY_1];
     450        }
     451      {
     452        static char const months[][sizeof "September"] = {
     453          "January", "February", "March", "April", "May", "June", "July",
     454          "September", "October", "November", "December"
     455        };
     456        case MON_1:
     457        case MON_2:
     458        case MON_3:
     459        case MON_4:
     460        case MON_5:
     461        case MON_6:
     462        case MON_7:
     463        case MON_8:
     464        case MON_9:
     465        case MON_10:
     466        case MON_11:
     467        case MON_12:
     468          {
     469            static char result[12][50];
     470            tmm.tm_mon = item - MON_1;
     471            if (!strftime (buf, sizeof result[0], "%B", &tmm))
     472              return (char *) months[item - MON_1];
     473            strcpy (result[item - MON_1], buf);
     474            return result[item - MON_1];
     475          }
     476        case ALTMON_1:
     477        case ALTMON_2:
     478        case ALTMON_3:
     479        case ALTMON_4:
     480        case ALTMON_5:
     481        case ALTMON_6:
     482        case ALTMON_7:
     483        case ALTMON_8:
     484        case ALTMON_9:
     485        case ALTMON_10:
     486        case ALTMON_11:
     487        case ALTMON_12:
     488          {
     489            static char result[12][50];
     490            tmm.tm_mon = item - ALTMON_1;
     491            /* The platforms without nl_langinfo() don't support strftime with
     492               %OB.  We don't even need to try.  */
     493            #if 0
     494            if (!strftime (buf, sizeof result[0], "%OB", &tmm))
     495            #endif
     496              if (!strftime (buf, sizeof result[0], "%B", &tmm))
     497                return (char *) months[item - ALTMON_1];
     498            strcpy (result[item - ALTMON_1], buf);
     499            return result[item - ALTMON_1];
     500          }
     501      }
     502      case ABMON_1:
     503      case ABMON_2:
     504      case ABMON_3:
     505      case ABMON_4:
     506      case ABMON_5:
     507      case ABMON_6:
     508      case ABMON_7:
     509      case ABMON_8:
     510      case ABMON_9:
     511      case ABMON_10:
     512      case ABMON_11:
     513      case ABMON_12:
     514        {
     515          static char result[12][30];
     516          static char const abmonths[][sizeof "Jan"] = {
     517            "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
     518            "Sep", "Oct", "Nov", "Dec"
     519          };
     520          tmm.tm_mon = item - ABMON_1;
     521          if (!strftime (buf, sizeof result[0], "%b", &tmm))
     522            return (char *) abmonths[item - ABMON_1];
     523          strcpy (result[item - ABMON_1], buf);
     524          return result[item - ABMON_1];
     525        }
     526      case ERA:
     527        return (char *) "";
     528      case ALT_DIGITS:
     529        return (char *) "\0\0\0\0\0\0\0\0\0\0";
     530      /* nl_langinfo items of the LC_MONETARY category.  */
     531      case CRNCYSTR:
     532        return localeconv () ->currency_symbol;
     533  # ifdef INT_CURR_SYMBOL
     534      case INT_CURR_SYMBOL:
     535        return localeconv () ->int_curr_symbol;
     536      case MON_DECIMAL_POINT:
     537        return localeconv () ->mon_decimal_point;
     538      case MON_THOUSANDS_SEP:
     539        return localeconv () ->mon_thousands_sep;
     540      case MON_GROUPING:
     541        return localeconv () ->mon_grouping;
     542      case POSITIVE_SIGN:
     543        return localeconv () ->positive_sign;
     544      case NEGATIVE_SIGN:
     545        return localeconv () ->negative_sign;
     546      case FRAC_DIGITS:
     547        return & localeconv () ->frac_digits;
     548      case INT_FRAC_DIGITS:
     549        return & localeconv () ->int_frac_digits;
     550      case P_CS_PRECEDES:
     551        return & localeconv () ->p_cs_precedes;
     552      case N_CS_PRECEDES:
     553        return & localeconv () ->n_cs_precedes;
     554      case P_SEP_BY_SPACE:
     555        return & localeconv () ->p_sep_by_space;
     556      case N_SEP_BY_SPACE:
     557        return & localeconv () ->n_sep_by_space;
     558      case P_SIGN_POSN:
     559        return & localeconv () ->p_sign_posn;
     560      case N_SIGN_POSN:
     561        return & localeconv () ->n_sign_posn;
     562  # endif
     563      /* nl_langinfo items of the LC_MESSAGES category
     564         TODO: Really use the locale. */
     565      case YESEXPR:
     566        return (char *) "^[yY]";
     567      case NOEXPR:
     568        return (char *) "^[nN]";
     569      default:
     570        return (char *) "";
     571      }
     572  }
     573  
     574  #endif