(root)/
glibc-2.38/
stdlib/
cxa_thread_atexit_impl.c
       1  /* Register destructors for C++ TLS variables declared with thread_local.
       2     Copyright (C) 2013-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  /* CONCURRENCY NOTES:
      20  
      21     This documents concurrency for the non-POD TLS destructor registration,
      22     calling and destruction.  The functions __cxa_thread_atexit_impl,
      23     _dl_close_worker and __call_tls_dtors are the three main routines that may
      24     run concurrently and access shared data.  The shared data in all possible
      25     combinations of all three functions are the link map list, a link map for a
      26     DSO and the link map member l_tls_dtor_count.
      27  
      28     __cxa_thread_atexit_impl acquires the dl_load_lock before accessing any
      29     shared state and hence multiple of its instances can safely execute
      30     concurrently.
      31  
      32     _dl_close_worker acquires the dl_load_lock before accessing any shared state
      33     as well and hence can concurrently execute multiple of its own instances as
      34     well as those of __cxa_thread_atexit_impl safely.  Not all accesses to
      35     l_tls_dtor_count are protected by the dl_load_lock, so we need to
      36     synchronize using atomics.
      37  
      38     __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it
      39     decrements the value by one.  It does not need the big lock because it does
      40     not access any other shared state except for the current DSO link map and
      41     its member l_tls_dtor_count.
      42  
      43     Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero,
      44     unloads the DSO, thus deallocating the current link map.  This is the goal
      45     of maintaining l_tls_dtor_count - to unload the DSO and free resources if
      46     there are no pending destructors to be called.
      47  
      48     We want to eliminate the inconsistent state where the DSO is unloaded in
      49     _dl_close_worker before it is used in __call_tls_dtors.  This could happen
      50     if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0,
      51     since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that
      52     it is safe to unload the DSO.  Hence, to ensure that this does not happen,
      53     the following conditions must be met:
      54  
      55     1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO is
      56        unloaded and its link map is freed
      57     2. The link map dereference in __call_tls_dtors happens before the
      58        l_tls_dtor_count dereference.
      59  
      60     To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should
      61     have release semantics and the load in _dl_close_worker should have acquire
      62     semantics.
      63  
      64     Concurrent executions of __call_tls_dtors should only ensure that the value
      65     is accessed atomically; no reordering constraints need to be considered.
      66     Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl.
      67  
      68     There is still a possibility on concurrent execution of _dl_close_worker and
      69     __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count
      70     as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but
      71     _dl_close_worker does not unload the DSO, having read the old value.  This
      72     is not very different from a case where __call_tls_dtors is called after
      73     _dl_close_worker on the DSO and hence is an accepted execution.  */
      74  
      75  #include <stdio.h>
      76  #include <stdlib.h>
      77  #include <ldsodefs.h>
      78  #include <pointer_guard.h>
      79  
      80  typedef void (*dtor_func) (void *);
      81  
      82  struct dtor_list
      83  {
      84    dtor_func func;
      85    void *obj;
      86    struct link_map *map;
      87    struct dtor_list *next;
      88  };
      89  
      90  static __thread struct dtor_list *tls_dtor_list;
      91  static __thread void *dso_symbol_cache;
      92  static __thread struct link_map *lm_cache;
      93  
      94  /* Register a destructor for TLS variables declared with the 'thread_local'
      95     keyword.  This function is only called from code generated by the C++
      96     compiler.  FUNC is the destructor function and OBJ is the object to be
      97     passed to the destructor.  DSO_SYMBOL is the __dso_handle symbol that each
      98     DSO has at a unique address in its map, added from crtbegin.o during the
      99     linking phase.  */
     100  int
     101  __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol)
     102  {
     103    PTR_MANGLE (func);
     104  
     105    /* Prepend.  */
     106    struct dtor_list *new = calloc (1, sizeof (struct dtor_list));
     107    if (__glibc_unlikely (new == NULL))
     108      __libc_fatal ("Fatal glibc error: failed to register TLS destructor: "
     109  		  "out of memory\n");
     110    new->func = func;
     111    new->obj = obj;
     112    new->next = tls_dtor_list;
     113    tls_dtor_list = new;
     114  
     115    /* We have to acquire the big lock to prevent a racing dlclose from pulling
     116       our DSO from underneath us while we're setting up our destructor.  */
     117    __rtld_lock_lock_recursive (GL(dl_load_lock));
     118  
     119    /* See if we already encountered the DSO.  */
     120    if (__glibc_unlikely (dso_symbol_cache != dso_symbol))
     121      {
     122        ElfW(Addr) caller = (ElfW(Addr)) dso_symbol;
     123  
     124        struct link_map *l = _dl_find_dso_for_object (caller);
     125  
     126        /* If the address is not recognized the call comes from the main
     127  	 program (we hope).  */
     128        lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded;
     129      }
     130  
     131    /* This increment may only be concurrently observed either by the decrement
     132       in __call_tls_dtors since the other l_tls_dtor_count access in
     133       _dl_close_worker is protected by the dl_load_lock.  The execution in
     134       __call_tls_dtors does not really depend on this value beyond the fact that
     135       it should be atomic, so Relaxed MO should be sufficient.  */
     136    atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1);
     137    __rtld_lock_unlock_recursive (GL(dl_load_lock));
     138  
     139    new->map = lm_cache;
     140  
     141    return 0;
     142  }
     143  
     144  /* Call the destructors.  This is called either when a thread returns from the
     145     initial function or when the process exits via the exit function.  */
     146  void
     147  __call_tls_dtors (void)
     148  {
     149    while (tls_dtor_list)
     150      {
     151        struct dtor_list *cur = tls_dtor_list;
     152        dtor_func func = cur->func;
     153        PTR_DEMANGLE (func);
     154  
     155        tls_dtor_list = tls_dtor_list->next;
     156        func (cur->obj);
     157  
     158        /* Ensure that the MAP dereference happens before
     159  	 l_tls_dtor_count decrement.  That way, we protect this access from a
     160  	 potential DSO unload in _dl_close_worker, which happens when
     161  	 l_tls_dtor_count is 0.  See CONCURRENCY NOTES for more detail.  */
     162        atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1);
     163        free (cur);
     164      }
     165  }
     166  libc_hidden_def (__call_tls_dtors)