(root)/
glibc-2.38/
elf/
dl-tls.c
       1  /* Thread-local storage handling in the ELF dynamic linker.  Generic version.
       2     Copyright (C) 2002-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <assert.h>
      20  #include <errno.h>
      21  #include <libintl.h>
      22  #include <signal.h>
      23  #include <stdlib.h>
      24  #include <unistd.h>
      25  #include <sys/param.h>
      26  #include <atomic.h>
      27  
      28  #include <tls.h>
      29  #include <dl-tls.h>
      30  #include <ldsodefs.h>
      31  
      32  #if PTHREAD_IN_LIBC
      33  # include <list.h>
      34  #endif
      35  
      36  #define TUNABLE_NAMESPACE rtld
      37  #include <dl-tunables.h>
      38  
      39  /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
      40  
      41     - IE TLS in libc.so for all dlmopen namespaces except in the initial
      42       one where libc.so is not loaded dynamically but at startup time,
      43     - IE TLS in other libraries which may be dynamically loaded even in the
      44       initial namespace,
      45     - and optionally for optimizing dynamic TLS access.
      46  
      47     The maximum number of namespaces is DL_NNS, but to support that many
      48     namespaces correctly the static TLS allocation should be significantly
      49     increased, which may cause problems with small thread stacks due to the
      50     way static TLS is accounted (bug 11787).
      51  
      52     So there is a rtld.nns tunable limit on the number of supported namespaces
      53     that affects the size of the static TLS and by default it's small enough
      54     not to cause problems with existing applications. The limit is not
      55     enforced or checked: it is the user's responsibility to increase rtld.nns
      56     if more dlmopen namespaces are used.
      57  
      58     Audit modules use their own namespaces, they are not included in rtld.nns,
      59     but come on top when computing the number of namespaces.  */
      60  
      61  /* Size of initial-exec TLS in libc.so.  This should be the maximum of
      62     observed PT_GNU_TLS sizes across all architectures.  Some
      63     architectures have lower values due to differences in type sizes
      64     and link editor capabilities.  */
      65  #define LIBC_IE_TLS 144
      66  
      67  /* Size of initial-exec TLS in libraries other than libc.so.
      68     This should be large enough to cover runtime libraries of the
      69     compiler such as libgomp and libraries in libc other than libc.so.  */
      70  #define OTHER_IE_TLS 144
      71  
      72  /* Default number of namespaces.  */
      73  #define DEFAULT_NNS 4
      74  
      75  /* Default for dl_tls_static_optional.  */
      76  #define OPTIONAL_TLS 512
      77  
      78  /* Compute the static TLS surplus based on the namespace count and the
      79     TLS space that can be used for optimizations.  */
      80  static inline int
      81  tls_static_surplus (int nns, int opt_tls)
      82  {
      83    return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
      84  }
      85  
      86  /* This value is chosen so that with default values for the tunables,
      87     the computation of dl_tls_static_surplus in
      88     _dl_tls_static_surplus_init yields the historic value 1664, for
      89     backwards compatibility.  */
      90  #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
      91  
      92  /* Calculate the size of the static TLS surplus, when the given
      93     number of audit modules are loaded.  Must be called after the
      94     number of audit modules is known and before static TLS allocation.  */
      95  void
      96  _dl_tls_static_surplus_init (size_t naudit)
      97  {
      98    size_t nns, opt_tls;
      99  
     100    nns = TUNABLE_GET (nns, size_t, NULL);
     101    opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
     102    if (nns > DL_NNS)
     103      nns = DL_NNS;
     104    if (DL_NNS - nns < naudit)
     105      _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
     106  		      (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
     107    nns += naudit;
     108  
     109    GL(dl_tls_static_optional) = opt_tls;
     110    assert (LEGACY_TLS >= 0);
     111    GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
     112  }
     113  
     114  /* Out-of-memory handler.  */
     115  static void
     116  __attribute__ ((__noreturn__))
     117  oom (void)
     118  {
     119    _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
     120  }
     121  
     122  
     123  void
     124  _dl_assign_tls_modid (struct link_map *l)
     125  {
     126    size_t result;
     127  
     128    if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
     129      {
     130        size_t disp = 0;
     131        struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
     132  
     133        /* Note that this branch will never be executed during program
     134  	 start since there are no gaps at that time.  Therefore it
     135  	 does not matter that the dl_tls_dtv_slotinfo is not allocated
     136  	 yet when the function is called for the first times.
     137  
     138  	 NB: the offset +1 is due to the fact that DTV[0] is used
     139  	 for something else.  */
     140        result = GL(dl_tls_static_nelem) + 1;
     141        if (result <= GL(dl_tls_max_dtv_idx))
     142  	do
     143  	  {
     144  	    while (result - disp < runp->len)
     145  	      {
     146  		if (runp->slotinfo[result - disp].map == NULL)
     147  		  break;
     148  
     149  		++result;
     150  		assert (result <= GL(dl_tls_max_dtv_idx) + 1);
     151  	      }
     152  
     153  	    if (result - disp < runp->len)
     154  	      {
     155  		/* Mark the entry as used, so any dependency see it.  */
     156  		atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
     157  		break;
     158  	      }
     159  
     160  	    disp += runp->len;
     161  	  }
     162  	while ((runp = runp->next) != NULL);
     163  
     164        if (result > GL(dl_tls_max_dtv_idx))
     165  	{
     166  	  /* The new index must indeed be exactly one higher than the
     167  	     previous high.  */
     168  	  assert (result == GL(dl_tls_max_dtv_idx) + 1);
     169  	  /* There is no gap anymore.  */
     170  	  GL(dl_tls_dtv_gaps) = false;
     171  
     172  	  goto nogaps;
     173  	}
     174      }
     175    else
     176      {
     177        /* No gaps, allocate a new entry.  */
     178      nogaps:
     179  
     180        result = GL(dl_tls_max_dtv_idx) + 1;
     181        /* Can be read concurrently.  */
     182        atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
     183      }
     184  
     185    l->l_tls_modid = result;
     186  }
     187  
     188  
     189  size_t
     190  _dl_count_modids (void)
     191  {
     192    /* The count is the max unless dlclose or failed dlopen created gaps.  */
     193    if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
     194      return GL(dl_tls_max_dtv_idx);
     195  
     196    /* We have gaps and are forced to count the non-NULL entries.  */
     197    size_t n = 0;
     198    struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
     199    while (runp != NULL)
     200      {
     201        for (size_t i = 0; i < runp->len; ++i)
     202  	if (runp->slotinfo[i].map != NULL)
     203  	  ++n;
     204  
     205        runp = runp->next;
     206      }
     207  
     208    return n;
     209  }
     210  
     211  
     212  #ifdef SHARED
     213  void
     214  _dl_determine_tlsoffset (void)
     215  {
     216    size_t max_align = TCB_ALIGNMENT;
     217    size_t freetop = 0;
     218    size_t freebottom = 0;
     219  
     220    /* The first element of the dtv slot info list is allocated.  */
     221    assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
     222    /* There is at this point only one element in the
     223       dl_tls_dtv_slotinfo_list list.  */
     224    assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
     225  
     226    struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
     227  
     228    /* Determining the offset of the various parts of the static TLS
     229       block has several dependencies.  In addition we have to work
     230       around bugs in some toolchains.
     231  
     232       Each TLS block from the objects available at link time has a size
     233       and an alignment requirement.  The GNU ld computes the alignment
     234       requirements for the data at the positions *in the file*, though.
     235       I.e, it is not simply possible to allocate a block with the size
     236       of the TLS program header entry.  The data is laid out assuming
     237       that the first byte of the TLS block fulfills
     238  
     239         p_vaddr mod p_align == &TLS_BLOCK mod p_align
     240  
     241       This means we have to add artificial padding at the beginning of
     242       the TLS block.  These bytes are never used for the TLS data in
     243       this module but the first byte allocated must be aligned
     244       according to mod p_align == 0 so that the first byte of the TLS
     245       block is aligned according to p_vaddr mod p_align.  This is ugly
     246       and the linker can help by computing the offsets in the TLS block
     247       assuming the first byte of the TLS block is aligned according to
     248       p_align.
     249  
     250       The extra space which might be allocated before the first byte of
     251       the TLS block need not go unused.  The code below tries to use
     252       that memory for the next TLS block.  This can work if the total
     253       memory requirement for the next TLS block is smaller than the
     254       gap.  */
     255  
     256  #if TLS_TCB_AT_TP
     257    /* We simply start with zero.  */
     258    size_t offset = 0;
     259  
     260    for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
     261      {
     262        assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
     263  
     264        size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
     265  			  & (slotinfo[cnt].map->l_tls_align - 1));
     266        size_t off;
     267        max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
     268  
     269        if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
     270  	{
     271  	  off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
     272  			 - firstbyte, slotinfo[cnt].map->l_tls_align)
     273  		+ firstbyte;
     274  	  if (off <= freebottom)
     275  	    {
     276  	      freetop = off;
     277  
     278  	      /* XXX For some architectures we perhaps should store the
     279  		 negative offset.  */
     280  	      slotinfo[cnt].map->l_tls_offset = off;
     281  	      continue;
     282  	    }
     283  	}
     284  
     285        off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
     286  		     slotinfo[cnt].map->l_tls_align) + firstbyte;
     287        if (off > offset + slotinfo[cnt].map->l_tls_blocksize
     288  		+ (freebottom - freetop))
     289  	{
     290  	  freetop = offset;
     291  	  freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
     292  	}
     293        offset = off;
     294  
     295        /* XXX For some architectures we perhaps should store the
     296  	 negative offset.  */
     297        slotinfo[cnt].map->l_tls_offset = off;
     298      }
     299  
     300    GL(dl_tls_static_used) = offset;
     301    GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
     302  					max_align)
     303  			       + TLS_TCB_SIZE);
     304  #elif TLS_DTV_AT_TP
     305    /* The TLS blocks start right after the TCB.  */
     306    size_t offset = TLS_TCB_SIZE;
     307  
     308    for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
     309      {
     310        assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
     311  
     312        size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
     313  			  & (slotinfo[cnt].map->l_tls_align - 1));
     314        size_t off;
     315        max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
     316  
     317        if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
     318  	{
     319  	  off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
     320  	  if (off - freebottom < firstbyte)
     321  	    off += slotinfo[cnt].map->l_tls_align;
     322  	  if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
     323  	    {
     324  	      slotinfo[cnt].map->l_tls_offset = off - firstbyte;
     325  	      freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
     326  			    - firstbyte);
     327  	      continue;
     328  	    }
     329  	}
     330  
     331        off = roundup (offset, slotinfo[cnt].map->l_tls_align);
     332        if (off - offset < firstbyte)
     333  	off += slotinfo[cnt].map->l_tls_align;
     334  
     335        slotinfo[cnt].map->l_tls_offset = off - firstbyte;
     336        if (off - firstbyte - offset > freetop - freebottom)
     337  	{
     338  	  freebottom = offset;
     339  	  freetop = off - firstbyte;
     340  	}
     341  
     342        offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
     343      }
     344  
     345    GL(dl_tls_static_used) = offset;
     346    GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
     347  				       TCB_ALIGNMENT);
     348  #else
     349  # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
     350  #endif
     351  
     352    /* The alignment requirement for the static TLS block.  */
     353    GLRO (dl_tls_static_align) = max_align;
     354  }
     355  #endif /* SHARED */
     356  
     357  static void *
     358  allocate_dtv (void *result)
     359  {
     360    dtv_t *dtv;
     361    size_t dtv_length;
     362  
     363    /* Relaxed MO, because the dtv size is later rechecked, not relied on.  */
     364    size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
     365    /* We allocate a few more elements in the dtv than are needed for the
     366       initial set of modules.  This should avoid in most cases expansions
     367       of the dtv.  */
     368    dtv_length = max_modid + DTV_SURPLUS;
     369    dtv = calloc (dtv_length + 2, sizeof (dtv_t));
     370    if (dtv != NULL)
     371      {
     372        /* This is the initial length of the dtv.  */
     373        dtv[0].counter = dtv_length;
     374  
     375        /* The rest of the dtv (including the generation counter) is
     376  	 Initialize with zero to indicate nothing there.  */
     377  
     378        /* Add the dtv to the thread data structures.  */
     379        INSTALL_DTV (result, dtv);
     380      }
     381    else
     382      result = NULL;
     383  
     384    return result;
     385  }
     386  
     387  /* Get size and alignment requirements of the static TLS block.  This
     388     function is no longer used by glibc itself, but the GCC sanitizers
     389     use it despite the GLIBC_PRIVATE status.  */
     390  void
     391  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
     392  {
     393    *sizep = GLRO (dl_tls_static_size);
     394    *alignp = GLRO (dl_tls_static_align);
     395  }
     396  
     397  /* Derive the location of the pointer to the start of the original
     398     allocation (before alignment) from the pointer to the TCB.  */
     399  static inline void **
     400  tcb_to_pointer_to_free_location (void *tcb)
     401  {
     402  #if TLS_TCB_AT_TP
     403    /* The TCB follows the TLS blocks, and the pointer to the front
     404       follows the TCB.  */
     405    void **original_pointer_location = tcb + TLS_TCB_SIZE;
     406  #elif TLS_DTV_AT_TP
     407    /* The TCB comes first, preceded by the pre-TCB, and the pointer is
     408       before that.  */
     409    void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
     410  #endif
     411    return original_pointer_location;
     412  }
     413  
     414  void *
     415  _dl_allocate_tls_storage (void)
     416  {
     417    void *result;
     418    size_t size = GLRO (dl_tls_static_size);
     419  
     420  #if TLS_DTV_AT_TP
     421    /* Memory layout is:
     422       [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
     423  			  ^ This should be returned.  */
     424    size += TLS_PRE_TCB_SIZE;
     425  #endif
     426  
     427    /* Perform the allocation.  Reserve space for the required alignment
     428       and the pointer to the original allocation.  */
     429    size_t alignment = GLRO (dl_tls_static_align);
     430    void *allocated = malloc (size + alignment + sizeof (void *));
     431    if (__glibc_unlikely (allocated == NULL))
     432      return NULL;
     433  
     434    /* Perform alignment and allocate the DTV.  */
     435  #if TLS_TCB_AT_TP
     436    /* The TCB follows the TLS blocks, which determine the alignment.
     437       (TCB alignment requirements have been taken into account when
     438       calculating GLRO (dl_tls_static_align).)  */
     439    void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
     440    result = aligned + size - TLS_TCB_SIZE;
     441  
     442    /* Clear the TCB data structure.  We can't ask the caller (i.e.
     443       libpthread) to do it, because we will initialize the DTV et al.  */
     444    memset (result, '\0', TLS_TCB_SIZE);
     445  #elif TLS_DTV_AT_TP
     446    /* Pre-TCB and TCB come before the TLS blocks.  The layout computed
     447       in _dl_determine_tlsoffset assumes that the TCB is aligned to the
     448       TLS block alignment, and not just the TLS blocks after it.  This
     449       can leave an unused alignment gap between the TCB and the TLS
     450       blocks.  */
     451    result = (void *) roundup
     452      (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
     453       alignment);
     454  
     455    /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
     456       it.  We can't ask the caller (i.e. libpthread) to do it, because
     457       we will initialize the DTV et al.  */
     458    memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
     459  #endif
     460  
     461    /* Record the value of the original pointer for later
     462       deallocation.  */
     463    *tcb_to_pointer_to_free_location (result) = allocated;
     464  
     465    result = allocate_dtv (result);
     466    if (result == NULL)
     467      free (allocated);
     468    return result;
     469  }
     470  
     471  
     472  #ifndef SHARED
     473  extern dtv_t _dl_static_dtv[];
     474  # define _dl_initial_dtv (&_dl_static_dtv[1])
     475  #endif
     476  
     477  static dtv_t *
     478  _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
     479  {
     480    /* Resize the dtv.  */
     481    dtv_t *newp;
     482    size_t newsize = max_modid + DTV_SURPLUS;
     483    size_t oldsize = dtv[-1].counter;
     484  
     485    if (dtv == GL(dl_initial_dtv))
     486      {
     487        /* This is the initial dtv that was either statically allocated in
     488  	 __libc_setup_tls or allocated during rtld startup using the
     489  	 dl-minimal.c malloc instead of the real malloc.  We can't free
     490  	 it, we have to abandon the old storage.  */
     491  
     492        newp = malloc ((2 + newsize) * sizeof (dtv_t));
     493        if (newp == NULL)
     494  	oom ();
     495        memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
     496      }
     497    else
     498      {
     499        newp = realloc (&dtv[-1],
     500  		      (2 + newsize) * sizeof (dtv_t));
     501        if (newp == NULL)
     502  	oom ();
     503      }
     504  
     505    newp[0].counter = newsize;
     506  
     507    /* Clear the newly allocated part.  */
     508    memset (newp + 2 + oldsize, '\0',
     509  	  (newsize - oldsize) * sizeof (dtv_t));
     510  
     511    /* Return the generation counter.  */
     512    return &newp[1];
     513  }
     514  
     515  
     516  /* Allocate initial TLS.  RESULT should be a non-NULL pointer to storage
     517     for the TLS space.  The DTV may be resized, and so this function may
     518     call malloc to allocate that space.  The loader's GL(dl_load_tls_lock)
     519     is taken when manipulating global TLS-related data in the loader.  */
     520  void *
     521  _dl_allocate_tls_init (void *result, bool init_tls)
     522  {
     523    if (result == NULL)
     524      /* The memory allocation failed.  */
     525      return NULL;
     526  
     527    dtv_t *dtv = GET_DTV (result);
     528    struct dtv_slotinfo_list *listp;
     529    size_t total = 0;
     530    size_t maxgen = 0;
     531  
     532    /* Protects global dynamic TLS related state.  */
     533    __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
     534  
     535    /* Check if the current dtv is big enough.   */
     536    if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
     537      {
     538        /* Resize the dtv.  */
     539        dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
     540  
     541        /* Install this new dtv in the thread data structures.  */
     542        INSTALL_DTV (result, &dtv[-1]);
     543      }
     544  
     545    /* We have to prepare the dtv for all currently loaded modules using
     546       TLS.  For those which are dynamically loaded we add the values
     547       indicating deferred allocation.  */
     548    listp = GL(dl_tls_dtv_slotinfo_list);
     549    while (1)
     550      {
     551        size_t cnt;
     552  
     553        for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
     554  	{
     555  	  struct link_map *map;
     556  	  void *dest;
     557  
     558  	  /* Check for the total number of used slots.  */
     559  	  if (total + cnt > GL(dl_tls_max_dtv_idx))
     560  	    break;
     561  
     562  	  map = listp->slotinfo[cnt].map;
     563  	  if (map == NULL)
     564  	    /* Unused entry.  */
     565  	    continue;
     566  
     567  	  /* Keep track of the maximum generation number.  This might
     568  	     not be the generation counter.  */
     569  	  assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
     570  	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
     571  
     572  	  dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
     573  	  dtv[map->l_tls_modid].pointer.to_free = NULL;
     574  
     575  	  if (map->l_tls_offset == NO_TLS_OFFSET
     576  	      || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
     577  	    continue;
     578  
     579  	  assert (map->l_tls_modid == total + cnt);
     580  	  assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
     581  #if TLS_TCB_AT_TP
     582  	  assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
     583  	  dest = (char *) result - map->l_tls_offset;
     584  #elif TLS_DTV_AT_TP
     585  	  dest = (char *) result + map->l_tls_offset;
     586  #else
     587  # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
     588  #endif
     589  
     590  	  /* Set up the DTV entry.  The simplified __tls_get_addr that
     591  	     some platforms use in static programs requires it.  */
     592  	  dtv[map->l_tls_modid].pointer.val = dest;
     593  
     594  	  /* Copy the initialization image and clear the BSS part.  For
     595  	     audit modules or dependencies with initial-exec TLS, we can not
     596  	     set the initial TLS image on default loader initialization
     597  	     because it would already be set by the audit setup.  However,
     598  	     subsequent thread creation would need to follow the default
     599  	     behaviour.   */
     600  	  if (map->l_ns != LM_ID_BASE && !init_tls)
     601  	    continue;
     602  	  memset (__mempcpy (dest, map->l_tls_initimage,
     603  			     map->l_tls_initimage_size), '\0',
     604  		  map->l_tls_blocksize - map->l_tls_initimage_size);
     605  	}
     606  
     607        total += cnt;
     608        if (total > GL(dl_tls_max_dtv_idx))
     609  	break;
     610  
     611        listp = listp->next;
     612        assert (listp != NULL);
     613      }
     614    __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
     615  
     616    /* The DTV version is up-to-date now.  */
     617    dtv[0].counter = maxgen;
     618  
     619    return result;
     620  }
     621  rtld_hidden_def (_dl_allocate_tls_init)
     622  
     623  void *
     624  _dl_allocate_tls (void *mem)
     625  {
     626    return _dl_allocate_tls_init (mem == NULL
     627  				? _dl_allocate_tls_storage ()
     628  				: allocate_dtv (mem), true);
     629  }
     630  rtld_hidden_def (_dl_allocate_tls)
     631  
     632  
     633  void
     634  _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
     635  {
     636    dtv_t *dtv = GET_DTV (tcb);
     637  
     638    /* We need to free the memory allocated for non-static TLS.  */
     639    for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
     640      free (dtv[1 + cnt].pointer.to_free);
     641  
     642    /* The array starts with dtv[-1].  */
     643    if (dtv != GL(dl_initial_dtv))
     644      free (dtv - 1);
     645  
     646    if (dealloc_tcb)
     647      free (*tcb_to_pointer_to_free_location (tcb));
     648  }
     649  rtld_hidden_def (_dl_deallocate_tls)
     650  
     651  
     652  #ifdef SHARED
     653  /* The __tls_get_addr function has two basic forms which differ in the
     654     arguments.  The IA-64 form takes two parameters, the module ID and
     655     offset.  The form used, among others, on IA-32 takes a reference to
     656     a special structure which contain the same information.  The second
     657     form seems to be more often used (in the moment) so we default to
     658     it.  Users of the IA-64 form have to provide adequate definitions
     659     of the following macros.  */
     660  # ifndef GET_ADDR_ARGS
     661  #  define GET_ADDR_ARGS tls_index *ti
     662  #  define GET_ADDR_PARAM ti
     663  # endif
     664  # ifndef GET_ADDR_MODULE
     665  #  define GET_ADDR_MODULE ti->ti_module
     666  # endif
     667  # ifndef GET_ADDR_OFFSET
     668  #  define GET_ADDR_OFFSET ti->ti_offset
     669  # endif
     670  
     671  /* Allocate one DTV entry.  */
     672  static struct dtv_pointer
     673  allocate_dtv_entry (size_t alignment, size_t size)
     674  {
     675    if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
     676      {
     677        /* The alignment is supported by malloc.  */
     678        void *ptr = malloc (size);
     679        return (struct dtv_pointer) { ptr, ptr };
     680      }
     681  
     682    /* Emulate memalign to by manually aligning a pointer returned by
     683       malloc.  First compute the size with an overflow check.  */
     684    size_t alloc_size = size + alignment;
     685    if (alloc_size < size)
     686      return (struct dtv_pointer) {};
     687  
     688    /* Perform the allocation.  This is the pointer we need to free
     689       later.  */
     690    void *start = malloc (alloc_size);
     691    if (start == NULL)
     692      return (struct dtv_pointer) {};
     693  
     694    /* Find the aligned position within the larger allocation.  */
     695    void *aligned = (void *) roundup ((uintptr_t) start, alignment);
     696  
     697    return (struct dtv_pointer) { .val = aligned, .to_free = start };
     698  }
     699  
     700  static struct dtv_pointer
     701  allocate_and_init (struct link_map *map)
     702  {
     703    struct dtv_pointer result = allocate_dtv_entry
     704      (map->l_tls_align, map->l_tls_blocksize);
     705    if (result.val == NULL)
     706      oom ();
     707  
     708    /* Initialize the memory.  */
     709    memset (__mempcpy (result.val, map->l_tls_initimage,
     710  		     map->l_tls_initimage_size),
     711  	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
     712  
     713    return result;
     714  }
     715  
     716  
     717  struct link_map *
     718  _dl_update_slotinfo (unsigned long int req_modid)
     719  {
     720    struct link_map *the_map = NULL;
     721    dtv_t *dtv = THREAD_DTV ();
     722  
     723    /* The global dl_tls_dtv_slotinfo array contains for each module
     724       index the generation counter current when the entry was created.
     725       This array never shrinks so that all module indices which were
     726       valid at some time can be used to access it.  Before the first
     727       use of a new module index in this function the array was extended
     728       appropriately.  Access also does not have to be guarded against
     729       modifications of the array.  It is assumed that pointer-size
     730       values can be read atomically even in SMP environments.  It is
     731       possible that other threads at the same time dynamically load
     732       code and therefore add to the slotinfo list.  This is a problem
     733       since we must not pick up any information about incomplete work.
     734       The solution to this is to ignore all dtv slots which were
     735       created after the one we are currently interested.  We know that
     736       dynamic loading for this module is completed and this is the last
     737       load operation we know finished.  */
     738    unsigned long int idx = req_modid;
     739    struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
     740  
     741    while (idx >= listp->len)
     742      {
     743        idx -= listp->len;
     744        listp = listp->next;
     745      }
     746  
     747    if (dtv[0].counter < listp->slotinfo[idx].gen)
     748      {
     749        /* CONCURRENCY NOTES:
     750  
     751  	 Here the dtv needs to be updated to new_gen generation count.
     752  
     753  	 This code may be called during TLS access when GL(dl_load_tls_lock)
     754  	 is not held.  In that case the user code has to synchronize with
     755  	 dlopen and dlclose calls of relevant modules.  A module m is
     756  	 relevant if the generation of m <= new_gen and dlclose of m is
     757  	 synchronized: a memory access here happens after the dlopen and
     758  	 before the dlclose of relevant modules.  The dtv entries for
     759  	 relevant modules need to be updated, other entries can be
     760  	 arbitrary.
     761  
     762  	 This e.g. means that the first part of the slotinfo list can be
     763  	 accessed race free, but the tail may be concurrently extended.
     764  	 Similarly relevant slotinfo entries can be read race free, but
     765  	 other entries are racy.  However updating a non-relevant dtv
     766  	 entry does not affect correctness.  For a relevant module m,
     767  	 max_modid >= modid of m.  */
     768        size_t new_gen = listp->slotinfo[idx].gen;
     769        size_t total = 0;
     770        size_t max_modid  = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
     771        assert (max_modid >= req_modid);
     772  
     773        /* We have to look through the entire dtv slotinfo list.  */
     774        listp =  GL(dl_tls_dtv_slotinfo_list);
     775        do
     776  	{
     777  	  for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
     778  	    {
     779  	      size_t modid = total + cnt;
     780  
     781  	      /* Later entries are not relevant.  */
     782  	      if (modid > max_modid)
     783  		break;
     784  
     785  	      size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
     786  
     787  	      if (gen > new_gen)
     788  		/* Not relevant.  */
     789  		continue;
     790  
     791  	      /* If the entry is older than the current dtv layout we
     792  		 know we don't have to handle it.  */
     793  	      if (gen <= dtv[0].counter)
     794  		continue;
     795  
     796  	      /* If there is no map this means the entry is empty.  */
     797  	      struct link_map *map
     798  		= atomic_load_relaxed (&listp->slotinfo[cnt].map);
     799  	      /* Check whether the current dtv array is large enough.  */
     800  	      if (dtv[-1].counter < modid)
     801  		{
     802  		  if (map == NULL)
     803  		    continue;
     804  
     805  		  /* Resize the dtv.  */
     806  		  dtv = _dl_resize_dtv (dtv, max_modid);
     807  
     808  		  assert (modid <= dtv[-1].counter);
     809  
     810  		  /* Install this new dtv in the thread data
     811  		     structures.  */
     812  		  INSTALL_NEW_DTV (dtv);
     813  		}
     814  
     815  	      /* If there is currently memory allocate for this
     816  		 dtv entry free it.  */
     817  	      /* XXX Ideally we will at some point create a memory
     818  		 pool.  */
     819  	      free (dtv[modid].pointer.to_free);
     820  	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
     821  	      dtv[modid].pointer.to_free = NULL;
     822  
     823  	      if (modid == req_modid)
     824  		the_map = map;
     825  	    }
     826  
     827  	  total += listp->len;
     828  	  if (total > max_modid)
     829  	    break;
     830  
     831  	  /* Synchronize with _dl_add_to_slotinfo.  Ideally this would
     832  	     be consume MO since we only need to order the accesses to
     833  	     the next node after the read of the address and on most
     834  	     hardware (other than alpha) a normal load would do that
     835  	     because of the address dependency.  */
     836  	  listp = atomic_load_acquire (&listp->next);
     837  	}
     838        while (listp != NULL);
     839  
     840        /* This will be the new maximum generation counter.  */
     841        dtv[0].counter = new_gen;
     842      }
     843  
     844    return the_map;
     845  }
     846  
     847  
     848  static void *
     849  __attribute_noinline__
     850  tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
     851  {
     852    /* The allocation was deferred.  Do it now.  */
     853    if (the_map == NULL)
     854      {
     855        /* Find the link map for this module.  */
     856        size_t idx = GET_ADDR_MODULE;
     857        struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
     858  
     859        while (idx >= listp->len)
     860  	{
     861  	  idx -= listp->len;
     862  	  listp = listp->next;
     863  	}
     864  
     865        the_map = listp->slotinfo[idx].map;
     866      }
     867  
     868    /* Make sure that, if a dlopen running in parallel forces the
     869       variable into static storage, we'll wait until the address in the
     870       static TLS block is set up, and use that.  If we're undecided
     871       yet, make sure we make the decision holding the lock as well.  */
     872    if (__glibc_unlikely (the_map->l_tls_offset
     873  			!= FORCED_DYNAMIC_TLS_OFFSET))
     874      {
     875        __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
     876        if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
     877  	{
     878  	  the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
     879  	  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
     880  	}
     881        else if (__glibc_likely (the_map->l_tls_offset
     882  			       != FORCED_DYNAMIC_TLS_OFFSET))
     883  	{
     884  #if TLS_TCB_AT_TP
     885  	  void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
     886  #elif TLS_DTV_AT_TP
     887  	  void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
     888  #else
     889  # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
     890  #endif
     891  	  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
     892  
     893  	  dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
     894  	  dtv[GET_ADDR_MODULE].pointer.val = p;
     895  
     896  	  return (char *) p + GET_ADDR_OFFSET;
     897  	}
     898        else
     899  	__rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
     900      }
     901    struct dtv_pointer result = allocate_and_init (the_map);
     902    dtv[GET_ADDR_MODULE].pointer = result;
     903    assert (result.to_free != NULL);
     904  
     905    return (char *) result.val + GET_ADDR_OFFSET;
     906  }
     907  
     908  
     909  static struct link_map *
     910  __attribute_noinline__
     911  update_get_addr (GET_ADDR_ARGS)
     912  {
     913    struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
     914    dtv_t *dtv = THREAD_DTV ();
     915  
     916    void *p = dtv[GET_ADDR_MODULE].pointer.val;
     917  
     918    if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
     919      return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
     920  
     921    return (void *) p + GET_ADDR_OFFSET;
     922  }
     923  
     924  /* For all machines that have a non-macro version of __tls_get_addr, we
     925     want to use rtld_hidden_proto/rtld_hidden_def in order to call the
     926     internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
     927     in ld.so for __tls_get_addr.  */
     928  
     929  #ifndef __tls_get_addr
     930  extern void * __tls_get_addr (GET_ADDR_ARGS);
     931  rtld_hidden_proto (__tls_get_addr)
     932  rtld_hidden_def (__tls_get_addr)
     933  #endif
     934  
     935  /* The generic dynamic and local dynamic model cannot be used in
     936     statically linked applications.  */
     937  void *
     938  __tls_get_addr (GET_ADDR_ARGS)
     939  {
     940    dtv_t *dtv = THREAD_DTV ();
     941  
     942    /* Update is needed if dtv[0].counter < the generation of the accessed
     943       module.  The global generation counter is used here as it is easier
     944       to check.  Synchronization for the relaxed MO access is guaranteed
     945       by user code, see CONCURRENCY NOTES in _dl_update_slotinfo.  */
     946    size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
     947    if (__glibc_unlikely (dtv[0].counter != gen))
     948      return update_get_addr (GET_ADDR_PARAM);
     949  
     950    void *p = dtv[GET_ADDR_MODULE].pointer.val;
     951  
     952    if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
     953      return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
     954  
     955    return (char *) p + GET_ADDR_OFFSET;
     956  }
     957  #endif
     958  
     959  
     960  /* Look up the module's TLS block as for __tls_get_addr,
     961     but never touch anything.  Return null if it's not allocated yet.  */
     962  void *
     963  _dl_tls_get_addr_soft (struct link_map *l)
     964  {
     965    if (__glibc_unlikely (l->l_tls_modid == 0))
     966      /* This module has no TLS segment.  */
     967      return NULL;
     968  
     969    dtv_t *dtv = THREAD_DTV ();
     970    /* This may be called without holding the GL(dl_load_tls_lock).  Reading
     971       arbitrary gen value is fine since this is best effort code.  */
     972    size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
     973    if (__glibc_unlikely (dtv[0].counter != gen))
     974      {
     975        /* This thread's DTV is not completely current,
     976  	 but it might already cover this module.  */
     977  
     978        if (l->l_tls_modid >= dtv[-1].counter)
     979  	/* Nope.  */
     980  	return NULL;
     981  
     982        size_t idx = l->l_tls_modid;
     983        struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
     984        while (idx >= listp->len)
     985  	{
     986  	  idx -= listp->len;
     987  	  listp = listp->next;
     988  	}
     989  
     990        /* We've reached the slot for this module.
     991  	 If its generation counter is higher than the DTV's,
     992  	 this thread does not know about this module yet.  */
     993        if (dtv[0].counter < listp->slotinfo[idx].gen)
     994  	return NULL;
     995      }
     996  
     997    void *data = dtv[l->l_tls_modid].pointer.val;
     998    if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
     999      /* The DTV is current, but this thread has not yet needed
    1000         to allocate this module's segment.  */
    1001      data = NULL;
    1002  
    1003    return data;
    1004  }
    1005  
    1006  
    1007  void
    1008  _dl_add_to_slotinfo (struct link_map *l, bool do_add)
    1009  {
    1010    /* Now that we know the object is loaded successfully add
    1011       modules containing TLS data to the dtv info table.  We
    1012       might have to increase its size.  */
    1013    struct dtv_slotinfo_list *listp;
    1014    struct dtv_slotinfo_list *prevp;
    1015    size_t idx = l->l_tls_modid;
    1016  
    1017    /* Find the place in the dtv slotinfo list.  */
    1018    listp = GL(dl_tls_dtv_slotinfo_list);
    1019    prevp = NULL;		/* Needed to shut up gcc.  */
    1020    do
    1021      {
    1022        /* Does it fit in the array of this list element?  */
    1023        if (idx < listp->len)
    1024  	break;
    1025        idx -= listp->len;
    1026        prevp = listp;
    1027        listp = listp->next;
    1028      }
    1029    while (listp != NULL);
    1030  
    1031    if (listp == NULL)
    1032      {
    1033        /* When we come here it means we have to add a new element
    1034  	 to the slotinfo list.  And the new module must be in
    1035  	 the first slot.  */
    1036        assert (idx == 0);
    1037  
    1038        listp = (struct dtv_slotinfo_list *)
    1039  	malloc (sizeof (struct dtv_slotinfo_list)
    1040  		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
    1041        if (listp == NULL)
    1042  	{
    1043  	  /* We ran out of memory while resizing the dtv slotinfo list.  */
    1044  	  _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
    1045  cannot create TLS data structures"));
    1046  	}
    1047  
    1048        listp->len = TLS_SLOTINFO_SURPLUS;
    1049        listp->next = NULL;
    1050        memset (listp->slotinfo, '\0',
    1051  	      TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
    1052        /* Synchronize with _dl_update_slotinfo.  */
    1053        atomic_store_release (&prevp->next, listp);
    1054      }
    1055  
    1056    /* Add the information into the slotinfo data structure.  */
    1057    if (do_add)
    1058      {
    1059        /* Can be read concurrently.  See _dl_update_slotinfo.  */
    1060        atomic_store_relaxed (&listp->slotinfo[idx].map, l);
    1061        atomic_store_relaxed (&listp->slotinfo[idx].gen,
    1062  			    GL(dl_tls_generation) + 1);
    1063      }
    1064  }
    1065  
    1066  #if PTHREAD_IN_LIBC
    1067  static inline void __attribute__((always_inline))
    1068  init_one_static_tls (struct pthread *curp, struct link_map *map)
    1069  {
    1070  # if TLS_TCB_AT_TP
    1071    void *dest = (char *) curp - map->l_tls_offset;
    1072  # elif TLS_DTV_AT_TP
    1073    void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
    1074  # else
    1075  #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
    1076  # endif
    1077  
    1078    /* Initialize the memory.  */
    1079    memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
    1080  	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
    1081  }
    1082  
    1083  void
    1084  _dl_init_static_tls (struct link_map *map)
    1085  {
    1086    lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
    1087  
    1088    /* Iterate over the list with system-allocated threads first.  */
    1089    list_t *runp;
    1090    list_for_each (runp, &GL (dl_stack_used))
    1091      init_one_static_tls (list_entry (runp, struct pthread, list), map);
    1092  
    1093    /* Now the list with threads using user-allocated stacks.  */
    1094    list_for_each (runp, &GL (dl_stack_user))
    1095      init_one_static_tls (list_entry (runp, struct pthread, list), map);
    1096  
    1097    lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
    1098  }
    1099  #endif /* PTHREAD_IN_LIBC */