1  /* nl_langinfo() replacement: query locale dependent information.
       2  
       3     Copyright (C) 2007-2023 Free Software Foundation, Inc.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include <langinfo.h>
      22  
      23  #include <locale.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  #if defined _WIN32 && ! defined __CYGWIN__
      27  # define WIN32_LEAN_AND_MEAN  /* avoid including junk */
      28  # include <windows.h>
      29  # include <stdio.h>
      30  #endif
      31  
      32  #if REPLACE_NL_LANGINFO && !NL_LANGINFO_MTSAFE
      33  
      34  # if AVOID_ANY_THREADS
      35  
      36  /* The option '--disable-threads' explicitly requests no locking.  */
      37  
      38  # elif defined _WIN32 && !defined __CYGWIN__
      39  
      40  #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
      41  #  include <windows.h>
      42  
      43  # elif HAVE_PTHREAD_API
      44  
      45  #  include <pthread.h>
      46  #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
      47  #   include <threads.h>
      48  #   pragma weak thrd_exit
      49  #   define c11_threads_in_use() (thrd_exit != NULL)
      50  #  else
      51  #   define c11_threads_in_use() 0
      52  #  endif
      53  
      54  # elif HAVE_THREADS_H
      55  
      56  #  include <threads.h>
      57  
      58  # endif
      59  
      60  #endif
      61  
      62  /* nl_langinfo() must be multithread-safe.  To achieve this without using
      63     thread-local storage:
      64       1. We use a specific static buffer for each possible argument.
      65          So that different threads can call nl_langinfo with different arguments,
      66          without interfering.
      67       2. We use a simple strcpy or memcpy to fill this static buffer.  Filling it
      68          through, for example, strcpy + strcat would not be guaranteed to leave
      69          the buffer's contents intact if another thread is currently accessing
      70          it.  If necessary, the contents is first assembled in a stack-allocated
      71          buffer.  */
      72  
      73  #if !REPLACE_NL_LANGINFO || GNULIB_defined_CODESET
      74  /* Return the codeset of the current locale, if this is easily deducible.
      75     Otherwise, return "".  */
      76  static char *
      77  ctype_codeset (void)
      78  {
      79    /* This function is only used on platforms which don't have uselocale().
      80       Therefore we don't need to look at the per-thread locale first, here.  */
      81    static char result[2 + 10 + 1];
      82    char buf[2 + 10 + 1];
      83    char locale[SETLOCALE_NULL_MAX];
      84    char *codeset;
      85    size_t codesetlen;
      86  
      87    if (setlocale_null_r (LC_CTYPE, locale, sizeof (locale)))
      88      locale[0] = '\0';
      89  
      90    codeset = buf;
      91    codeset[0] = '\0';
      92  
      93    if (locale[0])
      94      {
      95        /* If the locale name contains an encoding after the dot, return it.  */
      96        char *dot = strchr (locale, '.');
      97  
      98        if (dot)
      99          {
     100            /* Look for the possible @... trailer and remove it, if any.  */
     101            char *codeset_start = dot + 1;
     102            char const *modifier = strchr (codeset_start, '@');
     103  
     104            if (! modifier)
     105              codeset = codeset_start;
     106            else
     107              {
     108                codesetlen = modifier - codeset_start;
     109                if (codesetlen < sizeof buf)
     110                  {
     111                    codeset = memcpy (buf, codeset_start, codesetlen);
     112                    codeset[codesetlen] = '\0';
     113                  }
     114              }
     115          }
     116      }
     117  
     118  # if defined _WIN32 && ! defined __CYGWIN__
     119    /* If setlocale is successful, it returns the number of the
     120       codepage, as a string.  Otherwise, fall back on Windows API
     121       GetACP, which returns the locale's codepage as a number (although
     122       this doesn't change according to what the 'setlocale' call specified).
     123       Either way, prepend "CP" to make it a valid codeset name.  */
     124    codesetlen = strlen (codeset);
     125    if (0 < codesetlen && codesetlen < sizeof buf - 2)
     126      memmove (buf + 2, codeset, codesetlen + 1);
     127    else
     128      sprintf (buf + 2, "%u", GetACP ());
     129    /* For a locale name such as "French_France.65001", in Windows 10,
     130       setlocale now returns "French_France.utf8" instead.  */
     131    if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
     132      return (char *) "UTF-8";
     133    else
     134      {
     135        memcpy (buf, "CP", 2);
     136        strcpy (result, buf);
     137        return result;
     138      }
     139  # else
     140    strcpy (result, codeset);
     141    return result;
     142  #endif
     143  }
     144  #endif
     145  
     146  
     147  #if REPLACE_NL_LANGINFO
     148  
     149  /* Override nl_langinfo with support for added nl_item values.  */
     150  
     151  # undef nl_langinfo
     152  
     153  /* Without locking, on Solaris 11.3, test-nl_langinfo-mt fails, with message
     154     "thread5 disturbed by threadN!", even when threadN invokes only
     155        nl_langinfo (CODESET);
     156        nl_langinfo (CRNCYSTR);
     157     Similarly on Solaris 10.  */
     158  
     159  # if !NL_LANGINFO_MTSAFE /* Solaris */
     160  
     161  #  define ITEMS (MAXSTRMSG + 1)
     162  #  define MAX_RESULT_LEN 80
     163  
     164  static char *
     165  nl_langinfo_unlocked (nl_item item)
     166  {
     167    static char result[ITEMS][MAX_RESULT_LEN];
     168  
     169    /* The result of nl_langinfo is in storage that can be overwritten by
     170       other calls to nl_langinfo.  */
     171    char *tmp = nl_langinfo (item);
     172    if (item >= 0 && item < ITEMS && tmp != NULL)
     173      {
     174        size_t tmp_len = strlen (tmp);
     175        if (tmp_len < MAX_RESULT_LEN)
     176          strcpy (result[item], tmp);
     177        else
     178          {
     179            /* Produce a truncated result.  Oh well...  */
     180            result[item][MAX_RESULT_LEN - 1] = '\0';
     181            memcpy (result[item], tmp, MAX_RESULT_LEN - 1);
     182          }
     183        return result[item];
     184      }
     185    else
     186      return tmp;
     187  }
     188  
     189  /* Use a lock, so that no two threads can invoke nl_langinfo_unlocked
     190     at the same time.  */
     191  
     192  /* Prohibit renaming this symbol.  */
     193  #  undef gl_get_nl_langinfo_lock
     194  
     195  #  if AVOID_ANY_THREADS
     196  
     197  /* The option '--disable-threads' explicitly requests no locking.  */
     198  #   define nl_langinfo_with_lock nl_langinfo_unlocked
     199  
     200  #  elif defined _WIN32 && !defined __CYGWIN__
     201  
     202  extern __declspec(dllimport) CRITICAL_SECTION *gl_get_nl_langinfo_lock (void);
     203  
     204  static char *
     205  nl_langinfo_with_lock (nl_item item)
     206  {
     207    CRITICAL_SECTION *lock = gl_get_nl_langinfo_lock ();
     208    char *ret;
     209  
     210    EnterCriticalSection (lock);
     211    ret = nl_langinfo_unlocked (item);
     212    LeaveCriticalSection (lock);
     213  
     214    return ret;
     215  }
     216  
     217  #  elif HAVE_PTHREAD_API
     218  
     219  extern
     220  #   if defined _WIN32 || defined __CYGWIN__
     221    __declspec(dllimport)
     222  #   endif
     223    pthread_mutex_t *gl_get_nl_langinfo_lock (void);
     224  
     225  #   if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */
     226  
     227       /* Avoid the need to link with '-lpthread'.  */
     228  #    pragma weak pthread_mutex_lock
     229  #    pragma weak pthread_mutex_unlock
     230  
     231       /* Determine whether libpthread is in use.  */
     232  #    pragma weak pthread_mutexattr_gettype
     233       /* See the comments in lock.h.  */
     234  #    define pthread_in_use() \
     235         (pthread_mutexattr_gettype != NULL || c11_threads_in_use ())
     236  
     237  #   else
     238  #    define pthread_in_use() 1
     239  #   endif
     240  
     241  static char *
     242  nl_langinfo_with_lock (nl_item item)
     243  {
     244    if (pthread_in_use())
     245      {
     246        pthread_mutex_t *lock = gl_get_nl_langinfo_lock ();
     247        char *ret;
     248  
     249        if (pthread_mutex_lock (lock))
     250          abort ();
     251        ret = nl_langinfo_unlocked (item);
     252        if (pthread_mutex_unlock (lock))
     253          abort ();
     254  
     255        return ret;
     256      }
     257    else
     258      return nl_langinfo_unlocked (item);
     259  }
     260  
     261  #  elif HAVE_THREADS_H
     262  
     263  extern mtx_t *gl_get_nl_langinfo_lock (void);
     264  
     265  static char *
     266  nl_langinfo_with_lock (nl_item item)
     267  {
     268    mtx_t *lock = gl_get_nl_langinfo_lock ();
     269    char *ret;
     270  
     271    if (mtx_lock (lock) != thrd_success)
     272      abort ();
     273    ret = nl_langinfo_unlocked (item);
     274    if (mtx_unlock (lock) != thrd_success)
     275      abort ();
     276  
     277    return ret;
     278  }
     279  
     280  #  endif
     281  
     282  # else
     283  
     284  /* On other platforms, no lock is needed.  */
     285  #  define nl_langinfo_with_lock nl_langinfo
     286  
     287  # endif
     288  
     289  char *
     290  rpl_nl_langinfo (nl_item item)
     291  {
     292    switch (item)
     293      {
     294  # if GNULIB_defined_CODESET
     295      case CODESET:
     296        return ctype_codeset ();
     297  # endif
     298  # if GNULIB_defined_T_FMT_AMPM
     299      case T_FMT_AMPM:
     300        return (char *) "%I:%M:%S %p";
     301  # endif
     302  # if GNULIB_defined_ALTMON
     303      case ALTMON_1:
     304      case ALTMON_2:
     305      case ALTMON_3:
     306      case ALTMON_4:
     307      case ALTMON_5:
     308      case ALTMON_6:
     309      case ALTMON_7:
     310      case ALTMON_8:
     311      case ALTMON_9:
     312      case ALTMON_10:
     313      case ALTMON_11:
     314      case ALTMON_12:
     315        /* We don't ship the appropriate localizations with gnulib.  Therefore,
     316           treat ALTMON_i like MON_i.  */
     317        item = item - ALTMON_1 + MON_1;
     318        break;
     319  # endif
     320  # if GNULIB_defined_ERA
     321      case ERA:
     322        /* The format is not standardized.  In glibc it is a sequence of strings
     323           of the form "direction:offset:start_date:end_date:era_name:era_format"
     324           with an empty string at the end.  */
     325        return (char *) "";
     326      case ERA_D_FMT:
     327        /* The %Ex conversion in strftime behaves like %x if the locale does not
     328           have an alternative time format.  */
     329        item = D_FMT;
     330        break;
     331      case ERA_D_T_FMT:
     332        /* The %Ec conversion in strftime behaves like %c if the locale does not
     333           have an alternative time format.  */
     334        item = D_T_FMT;
     335        break;
     336      case ERA_T_FMT:
     337        /* The %EX conversion in strftime behaves like %X if the locale does not
     338           have an alternative time format.  */
     339        item = T_FMT;
     340        break;
     341      case ALT_DIGITS:
     342        /* The format is not standardized.  In glibc it is a sequence of 10
     343           strings, appended in memory.  */
     344        return (char *) "\0\0\0\0\0\0\0\0\0\0";
     345  # endif
     346  # if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
     347      case YESEXPR:
     348        return (char *) "^[yY]";
     349      case NOEXPR:
     350        return (char *) "^[nN]";
     351  # endif
     352      default:
     353        break;
     354      }
     355    return nl_langinfo_with_lock (item);
     356  }
     357  
     358  #else
     359  
     360  /* Provide nl_langinfo from scratch, either for native MS-Windows, or
     361     for old Unix platforms without locales, such as Linux libc5 or
     362     BeOS.  */
     363  
     364  # include <time.h>
     365  
     366  char *
     367  nl_langinfo (nl_item item)
     368  {
     369    char buf[100];
     370    struct tm tmm = { 0 };
     371  
     372    switch (item)
     373      {
     374      /* nl_langinfo items of the LC_CTYPE category */
     375      case CODESET:
     376        {
     377          char *codeset = ctype_codeset ();
     378          if (*codeset)
     379            return codeset;
     380        }
     381  # ifdef __BEOS__
     382        return (char *) "UTF-8";
     383  # else
     384        return (char *) "ISO-8859-1";
     385  # endif
     386      /* nl_langinfo items of the LC_NUMERIC category */
     387      case RADIXCHAR:
     388        return localeconv () ->decimal_point;
     389      case THOUSEP:
     390        return localeconv () ->thousands_sep;
     391  # ifdef GROUPING
     392      case GROUPING:
     393        return localeconv () ->grouping;
     394  # endif
     395      /* nl_langinfo items of the LC_TIME category.
     396         TODO: Really use the locale.  */
     397      case D_T_FMT:
     398      case ERA_D_T_FMT:
     399        return (char *) "%a %b %e %H:%M:%S %Y";
     400      case D_FMT:
     401      case ERA_D_FMT:
     402        return (char *) "%m/%d/%y";
     403      case T_FMT:
     404      case ERA_T_FMT:
     405        return (char *) "%H:%M:%S";
     406      case T_FMT_AMPM:
     407        return (char *) "%I:%M:%S %p";
     408      case AM_STR:
     409        {
     410          static char result[80];
     411          if (!strftime (buf, sizeof result, "%p", &tmm))
     412            return (char *) "AM";
     413          strcpy (result, buf);
     414          return result;
     415        }
     416      case PM_STR:
     417        {
     418          static char result[80];
     419          tmm.tm_hour = 12;
     420          if (!strftime (buf, sizeof result, "%p", &tmm))
     421            return (char *) "PM";
     422          strcpy (result, buf);
     423          return result;
     424        }
     425      case DAY_1:
     426      case DAY_2:
     427      case DAY_3:
     428      case DAY_4:
     429      case DAY_5:
     430      case DAY_6:
     431      case DAY_7:
     432        {
     433          static char result[7][50];
     434          static char const days[][sizeof "Wednesday"] = {
     435            "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
     436            "Friday", "Saturday"
     437          };
     438          tmm.tm_wday = item - DAY_1;
     439          if (!strftime (buf, sizeof result[0], "%A", &tmm))
     440            return (char *) days[item - DAY_1];
     441          strcpy (result[item - DAY_1], buf);
     442          return result[item - DAY_1];
     443        }
     444      case ABDAY_1:
     445      case ABDAY_2:
     446      case ABDAY_3:
     447      case ABDAY_4:
     448      case ABDAY_5:
     449      case ABDAY_6:
     450      case ABDAY_7:
     451        {
     452          static char result[7][30];
     453          static char const abdays[][sizeof "Sun"] = {
     454            "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
     455          };
     456          tmm.tm_wday = item - ABDAY_1;
     457          if (!strftime (buf, sizeof result[0], "%a", &tmm))
     458            return (char *) abdays[item - ABDAY_1];
     459          strcpy (result[item - ABDAY_1], buf);
     460          return result[item - ABDAY_1];
     461        }
     462      {
     463        static char const months[][sizeof "September"] = {
     464          "January", "February", "March", "April", "May", "June", "July",
     465          "September", "October", "November", "December"
     466        };
     467        case MON_1:
     468        case MON_2:
     469        case MON_3:
     470        case MON_4:
     471        case MON_5:
     472        case MON_6:
     473        case MON_7:
     474        case MON_8:
     475        case MON_9:
     476        case MON_10:
     477        case MON_11:
     478        case MON_12:
     479          {
     480            static char result[12][50];
     481            tmm.tm_mon = item - MON_1;
     482            if (!strftime (buf, sizeof result[0], "%B", &tmm))
     483              return (char *) months[item - MON_1];
     484            strcpy (result[item - MON_1], buf);
     485            return result[item - MON_1];
     486          }
     487        case ALTMON_1:
     488        case ALTMON_2:
     489        case ALTMON_3:
     490        case ALTMON_4:
     491        case ALTMON_5:
     492        case ALTMON_6:
     493        case ALTMON_7:
     494        case ALTMON_8:
     495        case ALTMON_9:
     496        case ALTMON_10:
     497        case ALTMON_11:
     498        case ALTMON_12:
     499          {
     500            static char result[12][50];
     501            tmm.tm_mon = item - ALTMON_1;
     502            /* The platforms without nl_langinfo() don't support strftime with
     503               %OB.  We don't even need to try.  */
     504            #if 0
     505            if (!strftime (buf, sizeof result[0], "%OB", &tmm))
     506            #endif
     507              if (!strftime (buf, sizeof result[0], "%B", &tmm))
     508                return (char *) months[item - ALTMON_1];
     509            strcpy (result[item - ALTMON_1], buf);
     510            return result[item - ALTMON_1];
     511          }
     512      }
     513      case ABMON_1:
     514      case ABMON_2:
     515      case ABMON_3:
     516      case ABMON_4:
     517      case ABMON_5:
     518      case ABMON_6:
     519      case ABMON_7:
     520      case ABMON_8:
     521      case ABMON_9:
     522      case ABMON_10:
     523      case ABMON_11:
     524      case ABMON_12:
     525        {
     526          static char result[12][30];
     527          static char const abmonths[][sizeof "Jan"] = {
     528            "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
     529            "Sep", "Oct", "Nov", "Dec"
     530          };
     531          tmm.tm_mon = item - ABMON_1;
     532          if (!strftime (buf, sizeof result[0], "%b", &tmm))
     533            return (char *) abmonths[item - ABMON_1];
     534          strcpy (result[item - ABMON_1], buf);
     535          return result[item - ABMON_1];
     536        }
     537      case ERA:
     538        return (char *) "";
     539      case ALT_DIGITS:
     540        return (char *) "\0\0\0\0\0\0\0\0\0\0";
     541      /* nl_langinfo items of the LC_MONETARY category.  */
     542      case CRNCYSTR:
     543        return localeconv () ->currency_symbol;
     544  # ifdef INT_CURR_SYMBOL
     545      case INT_CURR_SYMBOL:
     546        return localeconv () ->int_curr_symbol;
     547      case MON_DECIMAL_POINT:
     548        return localeconv () ->mon_decimal_point;
     549      case MON_THOUSANDS_SEP:
     550        return localeconv () ->mon_thousands_sep;
     551      case MON_GROUPING:
     552        return localeconv () ->mon_grouping;
     553      case POSITIVE_SIGN:
     554        return localeconv () ->positive_sign;
     555      case NEGATIVE_SIGN:
     556        return localeconv () ->negative_sign;
     557      case FRAC_DIGITS:
     558        return & localeconv () ->frac_digits;
     559      case INT_FRAC_DIGITS:
     560        return & localeconv () ->int_frac_digits;
     561      case P_CS_PRECEDES:
     562        return & localeconv () ->p_cs_precedes;
     563      case N_CS_PRECEDES:
     564        return & localeconv () ->n_cs_precedes;
     565      case P_SEP_BY_SPACE:
     566        return & localeconv () ->p_sep_by_space;
     567      case N_SEP_BY_SPACE:
     568        return & localeconv () ->n_sep_by_space;
     569      case P_SIGN_POSN:
     570        return & localeconv () ->p_sign_posn;
     571      case N_SIGN_POSN:
     572        return & localeconv () ->n_sign_posn;
     573  # endif
     574      /* nl_langinfo items of the LC_MESSAGES category
     575         TODO: Really use the locale. */
     576      case YESEXPR:
     577        return (char *) "^[yY]";
     578      case NOEXPR:
     579        return (char *) "^[nN]";
     580      default:
     581        return (char *) "";
     582      }
     583  }
     584  
     585  #endif