(root)/
glibc-2.38/
nscd/
cache.c
       1  /* Copyright (c) 1998-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     This program is free software; you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published
       6     by the Free Software Foundation; version 2 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program; if not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include <assert.h>
      18  #include <atomic.h>
      19  #include <errno.h>
      20  #include <error.h>
      21  #include <inttypes.h>
      22  #include <limits.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  #include <libintl.h>
      26  #include <arpa/inet.h>
      27  #include <sys/mman.h>
      28  #include <sys/param.h>
      29  #include <sys/stat.h>
      30  #include <sys/uio.h>
      31  #include <nss.h>
      32  
      33  #include "nscd.h"
      34  #include "dbg_log.h"
      35  
      36  
      37  /* Wrapper functions with error checking for standard functions.  */
      38  extern void *xcalloc (size_t n, size_t s);
      39  
      40  
      41  /* Number of times a value is reloaded without being used.  UINT_MAX
      42     means unlimited.  */
      43  unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
      44  
      45  
      46  static time_t (*const readdfcts[LASTREQ]) (struct database_dyn *,
      47  					   struct hashentry *,
      48  					   struct datahead *) =
      49  {
      50    [GETPWBYNAME] = readdpwbyname,
      51    [GETPWBYUID] = readdpwbyuid,
      52    [GETGRBYNAME] = readdgrbyname,
      53    [GETGRBYGID] = readdgrbygid,
      54    [GETHOSTBYNAME] = readdhstbyname,
      55    [GETHOSTBYNAMEv6] = readdhstbynamev6,
      56    [GETHOSTBYADDR] = readdhstbyaddr,
      57    [GETHOSTBYADDRv6] = readdhstbyaddrv6,
      58    [GETAI] = readdhstai,
      59    [INITGROUPS] = readdinitgroups,
      60    [GETSERVBYNAME] = readdservbyname,
      61    [GETSERVBYPORT] = readdservbyport,
      62    [GETNETGRENT] = readdgetnetgrent,
      63    [INNETGR] = readdinnetgr
      64  };
      65  
      66  
      67  /* Search the cache for a matching entry and return it when found.  If
      68     this fails search the negative cache and return (void *) -1 if this
      69     search was successful.  Otherwise return NULL.
      70  
      71     This function must be called with the read-lock held.  */
      72  struct datahead *
      73  cache_search (request_type type, const void *key, size_t len,
      74  	      struct database_dyn *table, uid_t owner)
      75  {
      76    unsigned long int hash = __nss_hash (key, len) % table->head->module;
      77  
      78    unsigned long int nsearched = 0;
      79    struct datahead *result = NULL;
      80  
      81    ref_t work = table->head->array[hash];
      82    while (work != ENDREF)
      83      {
      84        ++nsearched;
      85  
      86        struct hashentry *here = (struct hashentry *) (table->data + work);
      87  
      88        if (type == here->type && len == here->len
      89  	  && memcmp (key, table->data + here->key, len) == 0
      90  	  && here->owner == owner)
      91  	{
      92  	  /* We found the entry.  Increment the appropriate counter.  */
      93  	  struct datahead *dh
      94  	    = (struct datahead *) (table->data + here->packet);
      95  
      96  	  /* See whether we must ignore the entry.  */
      97  	  if (dh->usable)
      98  	    {
      99  	      /* We do not synchronize the memory here.  The statistics
     100  		 data is not crucial, we synchronize only once in a while
     101  		 in the cleanup threads.  */
     102  	      if (dh->notfound)
     103  		++table->head->neghit;
     104  	      else
     105  		{
     106  		  ++table->head->poshit;
     107  
     108  		  if (dh->nreloads != 0)
     109  		    dh->nreloads = 0;
     110  		}
     111  
     112  	      result = dh;
     113  	      break;
     114  	    }
     115  	}
     116  
     117        work = here->next;
     118      }
     119  
     120    if (nsearched > table->head->maxnsearched)
     121      table->head->maxnsearched = nsearched;
     122  
     123    return result;
     124  }
     125  
     126  /* Add a new entry to the cache.  The return value is zero if the function
     127     call was successful.
     128  
     129     This function must be called with the read-lock held.
     130  
     131     We modify the table but we nevertheless only acquire a read-lock.
     132     This is ok since we use operations which would be safe even without
     133     locking, given that the `prune_cache' function never runs.  Using
     134     the readlock reduces the chance of conflicts.  */
     135  int
     136  cache_add (int type, const void *key, size_t len, struct datahead *packet,
     137  	   bool first, struct database_dyn *table,
     138  	   uid_t owner, bool prune_wakeup)
     139  {
     140    if (__glibc_unlikely (debug_level >= 2))
     141      {
     142        const char *str;
     143        char buf[INET6_ADDRSTRLEN + 1];
     144        if (type == GETHOSTBYADDR || type == GETHOSTBYADDRv6)
     145  	str = inet_ntop (type == GETHOSTBYADDR ? AF_INET : AF_INET6,
     146  			 key, buf, sizeof (buf));
     147        else
     148  	str = key;
     149  
     150        dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
     151  	       str, serv2str[type], dbnames[table - dbs],
     152  	       first ? _(" (first)") : "");
     153      }
     154  
     155    unsigned long int hash = __nss_hash (key, len) % table->head->module;
     156    struct hashentry *newp;
     157  
     158    newp = mempool_alloc (table, sizeof (struct hashentry), 0);
     159    /* If we cannot allocate memory, just do not do anything.  */
     160    if (newp == NULL)
     161      {
     162        /* If necessary mark the entry as unusable so that lookups will
     163  	 not use it.  */
     164        if (first)
     165  	packet->usable = false;
     166  
     167        return -1;
     168      }
     169  
     170    newp->type = type;
     171    newp->first = first;
     172    newp->len = len;
     173    newp->key = (char *) key - table->data;
     174    assert (newp->key + newp->len <= table->head->first_free);
     175    newp->owner = owner;
     176    newp->packet = (char *) packet - table->data;
     177    assert ((newp->packet & BLOCK_ALIGN_M1) == 0);
     178  
     179    /* Put the new entry in the first position.  */
     180    /* TODO Review concurrency.  Use atomic_exchange_release.  */
     181    newp->next = atomic_load_relaxed (&table->head->array[hash]);
     182    while (!atomic_compare_exchange_weak_release (&table->head->array[hash],
     183  						(ref_t *) &newp->next,
     184  						(ref_t) ((char *) newp
     185  							 - table->data)));
     186  
     187    /* Update the statistics.  */
     188    if (packet->notfound)
     189      ++table->head->negmiss;
     190    else if (first)
     191      ++table->head->posmiss;
     192  
     193    /* We depend on this value being correct and at least as high as the
     194       real number of entries.  */
     195    atomic_fetch_add_relaxed (&table->head->nentries, 1);
     196  
     197    /* It does not matter that we are not loading the just increment
     198       value, this is just for statistics.  */
     199    unsigned long int nentries = table->head->nentries;
     200    if (nentries > table->head->maxnentries)
     201      table->head->maxnentries = nentries;
     202  
     203    if (table->persistent)
     204      // XXX async OK?
     205      msync ((void *) table->head,
     206  	   (char *) &table->head->array[hash] - (char *) table->head
     207  	   + sizeof (ref_t), MS_ASYNC);
     208  
     209    /* We do not have to worry about the pruning thread if we are
     210       re-adding the data since this is done by the pruning thread.  We
     211       also do not have to do anything in case this is not the first
     212       time the data is entered since different data heads all have the
     213       same timeout.  */
     214    if (first && prune_wakeup)
     215      {
     216        /* Perhaps the prune thread for the table is not running in a long
     217  	 time.  Wake it if necessary.  */
     218        pthread_mutex_lock (&table->prune_lock);
     219        time_t next_wakeup = table->wakeup_time;
     220        bool do_wakeup = false;
     221        if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL)
     222  	{
     223  	  table->wakeup_time = packet->timeout;
     224  	  do_wakeup = true;
     225  	}
     226        pthread_mutex_unlock (&table->prune_lock);
     227        if (do_wakeup)
     228  	pthread_cond_signal (&table->prune_cond);
     229      }
     230  
     231    return 0;
     232  }
     233  
     234  /* Walk through the table and remove all entries which lifetime ended.
     235  
     236     We have a problem here.  To actually remove the entries we must get
     237     the write-lock.  But since we want to keep the time we have the
     238     lock as short as possible we cannot simply acquire the lock when we
     239     start looking for timedout entries.
     240  
     241     Therefore we do it in two stages: first we look for entries which
     242     must be invalidated and remember them.  Then we get the lock and
     243     actually remove them.  This is complicated by the way we have to
     244     free the data structures since some hash table entries share the same
     245     data.  */
     246  time_t
     247  prune_cache (struct database_dyn *table, time_t now, int fd)
     248  {
     249    size_t cnt = table->head->module;
     250  
     251    /* If this table is not actually used don't do anything.  */
     252    if (cnt == 0)
     253      {
     254        if (fd != -1)
     255  	{
     256  	  /* Reply to the INVALIDATE initiator.  */
     257  	  int32_t resp = 0;
     258  	  writeall (fd, &resp, sizeof (resp));
     259  	}
     260  
     261        /* No need to do this again anytime soon.  */
     262        return 24 * 60 * 60;
     263      }
     264  
     265    /* If we check for the modification of the underlying file we invalidate
     266       the entries also in this case.  */
     267    if (table->check_file && now != LONG_MAX)
     268      {
     269        struct traced_file *runp = table->traced_files;
     270  
     271        while (runp != NULL)
     272  	{
     273  #ifdef HAVE_INOTIFY
     274  	  if (runp->inotify_descr[TRACED_FILE] == -1)
     275  #endif
     276  	    {
     277  	      struct stat64 st;
     278  
     279  	      if (stat64 (runp->fname, &st) < 0)
     280  		{
     281  		  /* Print a diagnostic that the traced file was missing.
     282  		     We must not disable tracing since the file might return
     283  		     shortly and we want to reload it at the next pruning.
     284  		     Disabling tracing here would go against the configuration
     285  		     as specified by the user via check-files.  */
     286  		  char buf[128];
     287  		  dbg_log (_("checking for monitored file `%s': %s"),
     288  			   runp->fname, strerror_r (errno, buf, sizeof (buf)));
     289  		}
     290  	      else
     291  		{
     292  		  /* This must be `!=` to catch cases where users turn the
     293  		     clocks back and we still want to detect any time difference
     294  		     in mtime.  */
     295  		  if (st.st_mtime != runp->mtime)
     296  		    {
     297  		      dbg_log (_("monitored file `%s` changed (mtime)"),
     298  			       runp->fname);
     299  		      /* The file changed. Invalidate all entries.  */
     300  		      now = LONG_MAX;
     301  		      runp->mtime = st.st_mtime;
     302  #ifdef HAVE_INOTIFY
     303  		      /* Attempt to install a watch on the file.  */
     304  		      install_watches (runp);
     305  #endif
     306  		    }
     307  		}
     308  	    }
     309  
     310  	  runp = runp->next;
     311  	}
     312      }
     313  
     314    /* We run through the table and find values which are not valid anymore.
     315  
     316       Note that for the initial step, finding the entries to be removed,
     317       we don't need to get any lock.  It is at all timed assured that the
     318       linked lists are set up correctly and that no second thread prunes
     319       the cache.  */
     320    bool *mark;
     321    size_t memory_needed = cnt * sizeof (bool);
     322    bool mark_use_alloca;
     323    if (__glibc_likely (memory_needed <= MAX_STACK_USE))
     324      {
     325        mark = alloca (cnt * sizeof (bool));
     326        memset (mark, '\0', memory_needed);
     327        mark_use_alloca = true;
     328      }
     329    else
     330      {
     331        mark = xcalloc (1, memory_needed);
     332        mark_use_alloca = false;
     333      }
     334    size_t first = cnt + 1;
     335    size_t last = 0;
     336    char *const data = table->data;
     337    bool any = false;
     338  
     339    if (__glibc_unlikely (debug_level > 2))
     340      dbg_log (_("pruning %s cache; time %ld"),
     341  	     dbnames[table - dbs], (long int) now);
     342  
     343  #define NO_TIMEOUT LONG_MAX
     344    time_t next_timeout = NO_TIMEOUT;
     345    do
     346      {
     347        ref_t run = table->head->array[--cnt];
     348  
     349        while (run != ENDREF)
     350  	{
     351  	  struct hashentry *runp = (struct hashentry *) (data + run);
     352  	  struct datahead *dh = (struct datahead *) (data + runp->packet);
     353  
     354  	  /* Some debug support.  */
     355  	  if (__glibc_unlikely (debug_level > 2))
     356  	    {
     357  	      char buf[INET6_ADDRSTRLEN];
     358  	      const char *str;
     359  
     360  	      if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
     361  		{
     362  		  inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
     363  			     data + runp->key, buf, sizeof (buf));
     364  		  str = buf;
     365  		}
     366  	      else
     367  		str = data + runp->key;
     368  
     369  	      dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
     370  		       serv2str[runp->type], str, dh->timeout);
     371  	    }
     372  
     373  	  /* Check whether the entry timed out.  */
     374  	  if (dh->timeout < now)
     375  	    {
     376  	      /* This hash bucket could contain entries which need to
     377  		 be looked at.  */
     378  	      mark[cnt] = true;
     379  
     380  	      first = MIN (first, cnt);
     381  	      last = MAX (last, cnt);
     382  
     383  	      /* We only have to look at the data of the first entries
     384  		 since the count information is kept in the data part
     385  		 which is shared.  */
     386  	      if (runp->first)
     387  		{
     388  
     389  		  /* At this point there are two choices: we reload the
     390  		     value or we discard it.  Do not change NRELOADS if
     391  		     we never not reload the record.  */
     392  		  if ((reload_count != UINT_MAX
     393  		       && __builtin_expect (dh->nreloads >= reload_count, 0))
     394  		      /* We always remove negative entries.  */
     395  		      || dh->notfound
     396  		      /* Discard everything if the user explicitly
     397  			 requests it.  */
     398  		      || now == LONG_MAX)
     399  		    {
     400  		      /* Remove the value.  */
     401  		      dh->usable = false;
     402  
     403  		      /* We definitely have some garbage entries now.  */
     404  		      any = true;
     405  		    }
     406  		  else
     407  		    {
     408  		      /* Reload the value.  We do this only for the
     409  			 initially used key, not the additionally
     410  			 added derived value.  */
     411  		      assert (runp->type < LASTREQ
     412  			      && readdfcts[runp->type] != NULL);
     413  
     414  		      time_t timeout = readdfcts[runp->type] (table, runp, dh);
     415  		      next_timeout = MIN (next_timeout, timeout);
     416  
     417  		      /* If the entry has been replaced, we might need
     418  			 cleanup.  */
     419  		      any |= !dh->usable;
     420  		    }
     421  		}
     422  	    }
     423  	  else
     424  	    {
     425  	      assert (dh->usable);
     426  	      next_timeout = MIN (next_timeout, dh->timeout);
     427  	    }
     428  
     429  	  run = runp->next;
     430  	}
     431      }
     432    while (cnt > 0);
     433  
     434    if (__glibc_unlikely (fd != -1))
     435      {
     436        /* Reply to the INVALIDATE initiator that the cache has been
     437  	 invalidated.  */
     438        int32_t resp = 0;
     439        writeall (fd, &resp, sizeof (resp));
     440      }
     441  
     442    if (first <= last)
     443      {
     444        struct hashentry *head = NULL;
     445  
     446        /* Now we have to get the write lock since we are about to modify
     447  	 the table.  */
     448        if (__glibc_unlikely (pthread_rwlock_trywrlock (&table->lock) != 0))
     449  	{
     450  	  ++table->head->wrlockdelayed;
     451  	  pthread_rwlock_wrlock (&table->lock);
     452  	}
     453  
     454        /* Now we start modifying the data.  Make sure all readers of the
     455  	 data are aware of this and temporarily don't use the data.  */
     456        atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
     457        assert ((table->head->gc_cycle & 1) == 1);
     458  
     459        while (first <= last)
     460  	{
     461  	  if (mark[first])
     462  	    {
     463  	      ref_t *old = &table->head->array[first];
     464  	      ref_t run = table->head->array[first];
     465  
     466  	      assert (run != ENDREF);
     467  	      do
     468  		{
     469  		  struct hashentry *runp = (struct hashentry *) (data + run);
     470  		  struct datahead *dh
     471  		    = (struct datahead *) (data + runp->packet);
     472  
     473  		  if (! dh->usable)
     474  		    {
     475  		      /* We need the list only for debugging but it is
     476  			 more costly to avoid creating the list than
     477  			 doing it.  */
     478  		      runp->dellist = head;
     479  		      head = runp;
     480  
     481  		      /* No need for an atomic operation, we have the
     482  			 write lock.  */
     483  		      --table->head->nentries;
     484  
     485  		      run = *old = runp->next;
     486  		    }
     487  		  else
     488  		    {
     489  		      old = &runp->next;
     490  		      run = runp->next;
     491  		    }
     492  		}
     493  	      while (run != ENDREF);
     494  	    }
     495  
     496  	  ++first;
     497  	}
     498  
     499        /* Now we are done modifying the data.  */
     500        atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
     501        assert ((table->head->gc_cycle & 1) == 0);
     502  
     503        /* It's all done.  */
     504        pthread_rwlock_unlock (&table->lock);
     505  
     506        /* Make sure the data is saved to disk.  */
     507        if (table->persistent)
     508  	msync (table->head,
     509  	       data + table->head->first_free - (char *) table->head,
     510  	       MS_ASYNC);
     511  
     512        /* One extra pass if we do debugging.  */
     513        if (__glibc_unlikely (debug_level > 0))
     514  	{
     515  	  struct hashentry *runp = head;
     516  
     517  	  while (runp != NULL)
     518  	    {
     519  	      char buf[INET6_ADDRSTRLEN];
     520  	      const char *str;
     521  
     522  	      if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
     523  		{
     524  		  inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
     525  			     data + runp->key, buf, sizeof (buf));
     526  		  str = buf;
     527  		}
     528  	      else
     529  		str = data + runp->key;
     530  
     531  	      dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);
     532  
     533  	      runp = runp->dellist;
     534  	    }
     535  	}
     536      }
     537  
     538    if (__glibc_unlikely (! mark_use_alloca))
     539      free (mark);
     540  
     541    /* Run garbage collection if any entry has been removed or replaced.  */
     542    if (any)
     543      gc (table);
     544  
     545    /* If there is no entry in the database and we therefore have no new
     546       timeout value, tell the caller to wake up in 24 hours.  */
     547    return next_timeout == NO_TIMEOUT ? 24 * 60 * 60 : next_timeout - now;
     548  }