(root)/
glibc-2.38/
nscd/
mem.c
       1  /* Cache memory handling.
       2     Copyright (C) 2004-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     This program is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published
       7     by the Free Software Foundation; version 2 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program; if not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <assert.h>
      19  #include <errno.h>
      20  #include <error.h>
      21  #include <fcntl.h>
      22  #include <inttypes.h>
      23  #include <libintl.h>
      24  #include <limits.h>
      25  #include <obstack.h>
      26  #include <stdlib.h>
      27  #include <string.h>
      28  #include <unistd.h>
      29  #include <sys/mman.h>
      30  #include <sys/param.h>
      31  
      32  #include "dbg_log.h"
      33  #include "nscd.h"
      34  
      35  
      36  static int
      37  sort_he (const void *p1, const void *p2)
      38  {
      39    struct hashentry *h1 = *(struct hashentry **) p1;
      40    struct hashentry *h2 = *(struct hashentry **) p2;
      41  
      42    if (h1 < h2)
      43      return -1;
      44    if (h1 > h2)
      45      return 1;
      46    return 0;
      47  }
      48  
      49  
      50  static int
      51  sort_he_data (const void *p1, const void *p2)
      52  {
      53    struct hashentry *h1 = *(struct hashentry **) p1;
      54    struct hashentry *h2 = *(struct hashentry **) p2;
      55  
      56    if (h1->packet < h2->packet)
      57      return -1;
      58    if (h1->packet > h2->packet)
      59      return 1;
      60    return 0;
      61  }
      62  
      63  
      64  /* Basic definitions for the bitmap implementation.  Only BITMAP_T
      65     needs to be changed to choose a different word size.  */
      66  #define BITMAP_T uint8_t
      67  #define BITS (CHAR_BIT * sizeof (BITMAP_T))
      68  #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
      69  #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
      70  
      71  
      72  static void
      73  markrange (BITMAP_T *mark, ref_t start, size_t len)
      74  {
      75    /* Adjust parameters for block alignment.  */
      76    assert ((start & BLOCK_ALIGN_M1) == 0);
      77    start /= BLOCK_ALIGN;
      78    len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
      79  
      80    size_t elem = start / BITS;
      81  
      82    if (start % BITS != 0)
      83      {
      84        if (start % BITS + len <= BITS)
      85  	{
      86  	  /* All fits in the partial byte.  */
      87  	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
      88  	  return;
      89  	}
      90  
      91        mark[elem++] |= ALLBITS << (start % BITS);
      92        len -= BITS - (start % BITS);
      93      }
      94  
      95    while (len >= BITS)
      96      {
      97        mark[elem++] = ALLBITS;
      98        len -= BITS;
      99      }
     100  
     101    if (len > 0)
     102      mark[elem] |= ALLBITS >> (BITS - len);
     103  }
     104  
     105  
     106  void
     107  gc (struct database_dyn *db)
     108  {
     109    /* We need write access.  */
     110    pthread_rwlock_wrlock (&db->lock);
     111  
     112    /* And the memory handling lock.  */
     113    pthread_mutex_lock (&db->memlock);
     114  
     115    /* We need an array representing the data area.  All memory
     116       allocation is BLOCK_ALIGN aligned so this is the level at which
     117       we have to look at the memory.  We use a mark and sweep algorithm
     118       where the marks are placed in this array.  */
     119    assert (db->head->first_free % BLOCK_ALIGN == 0);
     120  
     121    BITMAP_T *mark;
     122    bool mark_use_malloc;
     123    /* In prune_cache we are also using a dynamically allocated array.
     124       If the array in the caller is too large we have malloc'ed it.  */
     125    size_t stack_used = sizeof (bool) * db->head->module;
     126    if (__glibc_unlikely (stack_used > MAX_STACK_USE))
     127      stack_used = 0;
     128    size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
     129    size_t memory_needed = nmark * sizeof (BITMAP_T);
     130    if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
     131      {
     132        mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
     133        mark_use_malloc = false;
     134        memset (mark, '\0', memory_needed);
     135      }
     136    else
     137      {
     138        mark = (BITMAP_T *) xcalloc (1, memory_needed);
     139        mark_use_malloc = true;
     140      }
     141  
     142    /* Create an array which can hold pointer to all the entries in hash
     143       entries.  */
     144    memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
     145    struct hashentry **he;
     146    struct hashentry **he_data;
     147    bool he_use_malloc;
     148    if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
     149      {
     150        he = alloca_account (memory_needed, stack_used);
     151        he_use_malloc = false;
     152      }
     153    else
     154      {
     155        he = xmalloc (memory_needed);
     156        he_use_malloc = true;
     157      }
     158    he_data = &he[db->head->nentries];
     159  
     160    size_t cnt = 0;
     161    for (size_t idx = 0; idx < db->head->module; ++idx)
     162      {
     163        ref_t *prevp = &db->head->array[idx];
     164        ref_t run = *prevp;
     165  
     166        while (run != ENDREF)
     167  	{
     168  	  assert (cnt < db->head->nentries);
     169  	  he[cnt] = (struct hashentry *) (db->data + run);
     170  
     171  	  he[cnt]->prevp = prevp;
     172  	  prevp = &he[cnt]->next;
     173  
     174  	  /* This is the hash entry itself.  */
     175  	  markrange (mark, run, sizeof (struct hashentry));
     176  
     177  	  /* Add the information for the data itself.  We do this
     178  	     only for the one special entry marked with FIRST.  */
     179  	  if (he[cnt]->first)
     180  	    {
     181  	      struct datahead *dh
     182  		= (struct datahead *) (db->data + he[cnt]->packet);
     183  	      markrange (mark, he[cnt]->packet, dh->allocsize);
     184  	    }
     185  
     186  	  run = he[cnt]->next;
     187  
     188  	  ++cnt;
     189  	}
     190      }
     191    assert (cnt == db->head->nentries);
     192  
     193    /* Sort the entries by the addresses of the referenced data.  All
     194       the entries pointing to the same DATAHEAD object will have the
     195       same key.  Stability of the sorting is unimportant.  */
     196    memcpy (he_data, he, cnt * sizeof (struct hashentry *));
     197    qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
     198  
     199    /* Sort the entries by their address.  */
     200    qsort (he, cnt, sizeof (struct hashentry *), sort_he);
     201  
     202  #define obstack_chunk_alloc xmalloc
     203  #define obstack_chunk_free free
     204    struct obstack ob;
     205    obstack_init (&ob);
     206  
     207    /* Determine the highest used address.  */
     208    size_t high = nmark;
     209    while (high > 0 && mark[high - 1] == 0)
     210      --high;
     211  
     212    /* No memory used.  */
     213    if (high == 0)
     214      {
     215        db->head->first_free = 0;
     216        goto out;
     217      }
     218  
     219    /* Determine the highest offset.  */
     220    BITMAP_T mask = HIGHBIT;
     221    while ((mark[high - 1] & mask) == 0)
     222      mask >>= 1;
     223  
     224    /* Now we can iterate over the MARK array and find bits which are not
     225       set.  These represent memory which can be recovered.  */
     226    size_t byte = 0;
     227    /* Find the first gap.  */
     228    while (byte < high && mark[byte] == ALLBITS)
     229      ++byte;
     230  
     231    if (byte == high
     232        || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
     233      /* No gap.  */
     234      goto out;
     235  
     236    mask = 1;
     237    cnt = 0;
     238    while ((mark[byte] & mask) != 0)
     239      {
     240        ++cnt;
     241        mask <<= 1;
     242      }
     243    ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
     244    assert (off_free <= db->head->first_free);
     245  
     246    struct hashentry **next_hash = he;
     247    struct hashentry **next_data = he_data;
     248  
     249    /* Skip over the hash entries in the first block which does not get
     250       moved.  */
     251    while (next_hash < &he[db->head->nentries]
     252  	 && *next_hash < (struct hashentry *) (db->data + off_free))
     253      ++next_hash;
     254  
     255    while (next_data < &he_data[db->head->nentries]
     256  	 && (*next_data)->packet < off_free)
     257      ++next_data;
     258  
     259  
     260    /* Now we start modifying the data.  Make sure all readers of the
     261       data are aware of this and temporarily don't use the data.  */
     262    atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
     263    assert ((db->head->gc_cycle & 1) == 1);
     264  
     265  
     266    /* We do not perform the move operations right away since the
     267       he_data array is not sorted by the address of the data.  */
     268    struct moveinfo
     269    {
     270      void *from;
     271      void *to;
     272      size_t size;
     273      struct moveinfo *next;
     274    } *moves = NULL;
     275  
     276    while (byte < high)
     277      {
     278        /* Search for the next filled block.  BYTE is the index of the
     279  	 entry in MARK, MASK is the bit, and CNT is the bit number.
     280  	 OFF_FILLED is the corresponding offset.  */
     281        if ((mark[byte] & ~(mask - 1)) == 0)
     282  	{
     283  	  /* No other bit set in the same element of MARK.  Search in the
     284  	     following memory.  */
     285  	  do
     286  	    ++byte;
     287  	  while (byte < high && mark[byte] == 0);
     288  
     289  	  if (byte == high)
     290  	    /* That was it.  */
     291  	    break;
     292  
     293  	  mask = 1;
     294  	  cnt = 0;
     295  	}
     296        /* Find the exact bit.  */
     297        while ((mark[byte] & mask) == 0)
     298  	{
     299  	  ++cnt;
     300  	  mask <<= 1;
     301  	}
     302  
     303        ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
     304        assert (off_alloc <= db->head->first_free);
     305  
     306        /* Find the end of the used area.  */
     307        if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
     308  	{
     309  	  /* All other bits set.  Search the next bytes in MARK.  */
     310  	  do
     311  	    ++byte;
     312  	  while (byte < high && mark[byte] == ALLBITS);
     313  
     314  	  mask = 1;
     315  	  cnt = 0;
     316  	}
     317        if (byte < high)
     318  	{
     319  	  /* Find the exact bit.  */
     320  	  while ((mark[byte] & mask) != 0)
     321  	    {
     322  	      ++cnt;
     323  	      mask <<= 1;
     324  	    }
     325  	}
     326  
     327        ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
     328        assert (off_allocend <= db->head->first_free);
     329        /* Now we know that we can copy the area from OFF_ALLOC to
     330  	 OFF_ALLOCEND (not included) to the memory starting at
     331  	 OFF_FREE.  First fix up all the entries for the
     332  	 displacement.  */
     333        ref_t disp = off_alloc - off_free;
     334  
     335        struct moveinfo *new_move;
     336        if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
     337  			    1))
     338  	new_move = alloca_account (sizeof (*new_move), stack_used);
     339        else
     340  	new_move = obstack_alloc (&ob, sizeof (*new_move));
     341        new_move->from = db->data + off_alloc;
     342        new_move->to = db->data + off_free;
     343        new_move->size = off_allocend - off_alloc;
     344        /* Create a circular list to be always able to append at the end.  */
     345        if (moves == NULL)
     346  	moves = new_move->next = new_move;
     347        else
     348  	{
     349  	  new_move->next = moves->next;
     350  	  moves = moves->next = new_move;
     351  	}
     352  
     353        /* The following loop will prepare to move this much data.  */
     354        off_free += off_allocend - off_alloc;
     355  
     356        while (off_alloc < off_allocend)
     357  	{
     358  	  /* Determine whether the next entry is for a hash entry or
     359  	     the data.  */
     360  	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
     361  	    {
     362  	      /* Just correct the forward reference.  */
     363  	      *(*next_hash++)->prevp -= disp;
     364  
     365  	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
     366  			    & ~BLOCK_ALIGN_M1);
     367  	    }
     368  	  else
     369  	    {
     370  	      assert (next_data < &he_data[db->head->nentries]);
     371  	      assert ((*next_data)->packet == off_alloc);
     372  
     373  	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
     374  	      do
     375  		{
     376  		  assert ((*next_data)->key >= (*next_data)->packet);
     377  		  assert ((*next_data)->key + (*next_data)->len
     378  			  <= (*next_data)->packet + dh->allocsize);
     379  
     380  		  (*next_data)->packet -= disp;
     381  		  (*next_data)->key -= disp;
     382  		  ++next_data;
     383  		}
     384  	      while (next_data < &he_data[db->head->nentries]
     385  		     && (*next_data)->packet == off_alloc);
     386  
     387  	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
     388  	    }
     389  	}
     390        assert (off_alloc == off_allocend);
     391  
     392        assert (off_alloc <= db->head->first_free);
     393        if (off_alloc == db->head->first_free)
     394  	/* We are done, that was the last block.  */
     395  	break;
     396      }
     397    assert (next_hash == &he[db->head->nentries]);
     398    assert (next_data == &he_data[db->head->nentries]);
     399  
     400    /* Now perform the actual moves.  */
     401    if (moves != NULL)
     402      {
     403        struct moveinfo *runp = moves->next;
     404        do
     405  	{
     406  	  assert ((char *) runp->to >= db->data);
     407  	  assert ((char *) runp->to + runp->size
     408  		  <= db->data  + db->head->first_free);
     409  	  assert ((char *) runp->from >= db->data);
     410  	  assert ((char *) runp->from + runp->size
     411  		  <= db->data  + db->head->first_free);
     412  
     413  	  /* The regions may overlap.  */
     414  	  memmove (runp->to, runp->from, runp->size);
     415  	  runp = runp->next;
     416  	}
     417        while (runp != moves->next);
     418  
     419        if (__glibc_unlikely (debug_level >= 3))
     420  	dbg_log (_("freed %zu bytes in %s cache"),
     421  		 (size_t) (db->head->first_free
     422  			   - ((char *) moves->to + moves->size - db->data)),
     423  		 dbnames[db - dbs]);
     424  
     425        /* The byte past the end of the last copied block is the next
     426  	 available byte.  */
     427        db->head->first_free = (char *) moves->to + moves->size - db->data;
     428  
     429        /* Consistency check.  */
     430        if (__glibc_unlikely (debug_level >= 3))
     431  	{
     432  	  for (size_t idx = 0; idx < db->head->module; ++idx)
     433  	    {
     434  	      ref_t run = db->head->array[idx];
     435  	      size_t cnt = 0;
     436  
     437  	      while (run != ENDREF)
     438  		{
     439  		  if (run + sizeof (struct hashentry) > db->head->first_free)
     440  		    {
     441  		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
     442  			       "%" PRIu32 "+%zu > %zu\n",
     443  			       cnt, idx, run, sizeof (struct hashentry),
     444  			       (size_t) db->head->first_free);
     445  		      break;
     446  		    }
     447  
     448  		  struct hashentry *he = (struct hashentry *) (db->data + run);
     449  
     450  		  if (he->key + he->len > db->head->first_free)
     451  		    dbg_log ("key of entry %zu in hash bucket %zu out of "
     452  			     "bounds: %" PRIu32 "+%zu > %zu\n",
     453  			     cnt, idx, he->key, (size_t) he->len,
     454  			     (size_t) db->head->first_free);
     455  
     456  		  if (he->packet + sizeof (struct datahead)
     457  		      > db->head->first_free)
     458  		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
     459  			     "bounds: %" PRIu32 "+%zu > %zu\n",
     460  			     cnt, idx, he->packet, sizeof (struct datahead),
     461  			     (size_t) db->head->first_free);
     462  		  else
     463  		    {
     464  		      struct datahead *dh = (struct datahead *) (db->data
     465  								 + he->packet);
     466  		      if (he->packet + dh->allocsize
     467  			  > db->head->first_free)
     468  			dbg_log ("full key of entry %zu in hash bucket %zu "
     469  				 "out of bounds: %" PRIu32 "+%zu > %zu",
     470  				 cnt, idx, he->packet, (size_t) dh->allocsize,
     471  				 (size_t) db->head->first_free);
     472  		    }
     473  
     474  		  run = he->next;
     475  		  ++cnt;
     476  		}
     477  	    }
     478  	}
     479      }
     480  
     481    /* Make sure the data on disk is updated.  */
     482    if (db->persistent)
     483      msync (db->head, db->data + db->head->first_free - (char *) db->head,
     484  	   MS_ASYNC);
     485  
     486  
     487    /* Now we are done modifying the data.  */
     488    atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
     489    assert ((db->head->gc_cycle & 1) == 0);
     490  
     491    /* We are done.  */
     492   out:
     493    pthread_mutex_unlock (&db->memlock);
     494    pthread_rwlock_unlock (&db->lock);
     495  
     496    if (he_use_malloc)
     497      free (he);
     498    if (mark_use_malloc)
     499      free (mark);
     500  
     501    obstack_free (&ob, NULL);
     502  }
     503  
     504  
     505  void *
     506  mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
     507  {
     508    /* Make sure LEN is a multiple of our maximum alignment so we can
     509       keep track of used memory is multiples of this alignment value.  */
     510    if ((len & BLOCK_ALIGN_M1) != 0)
     511      len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
     512  
     513    if (data_alloc)
     514      pthread_rwlock_rdlock (&db->lock);
     515  
     516    pthread_mutex_lock (&db->memlock);
     517  
     518    assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
     519  
     520    bool tried_resize = false;
     521    void *res;
     522   retry:
     523    res = db->data + db->head->first_free;
     524  
     525    if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
     526      {
     527        if (! tried_resize)
     528  	{
     529  	  /* Try to resize the database.  Grow size of 1/8th.  */
     530  	  size_t oldtotal = (sizeof (struct database_pers_head)
     531  			     + roundup (db->head->module * sizeof (ref_t),
     532  					ALIGN)
     533  			     + db->head->data_size);
     534  	  size_t new_data_size = (db->head->data_size
     535  				  + MAX (2 * len, db->head->data_size / 8));
     536  	  size_t newtotal = (sizeof (struct database_pers_head)
     537  			     + roundup (db->head->module * sizeof (ref_t), ALIGN)
     538  			     + new_data_size);
     539  	  if (newtotal > db->max_db_size)
     540  	    {
     541  	      new_data_size -= newtotal - db->max_db_size;
     542  	      newtotal = db->max_db_size;
     543  	    }
     544  
     545  	  if (db->mmap_used && newtotal > oldtotal
     546  	      /* We only have to adjust the file size.  The new pages
     547  		 become magically available.  */
     548  	      && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
     549  							  newtotal
     550  							  - oldtotal)) == 0)
     551  	    {
     552  	      db->head->data_size = new_data_size;
     553  	      tried_resize = true;
     554  	      goto retry;
     555  	    }
     556  	}
     557  
     558        if (data_alloc)
     559  	pthread_rwlock_unlock (&db->lock);
     560  
     561        if (! db->last_alloc_failed)
     562  	{
     563  	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
     564  
     565  	  db->last_alloc_failed = true;
     566  	}
     567  
     568        ++db->head->addfailed;
     569  
     570        /* No luck.  */
     571        res = NULL;
     572      }
     573    else
     574      {
     575        db->head->first_free += len;
     576  
     577        db->last_alloc_failed = false;
     578  
     579      }
     580  
     581    pthread_mutex_unlock (&db->memlock);
     582  
     583    return res;
     584  }