(root)/
glibc-2.38/
malloc/
memusage.c
       1  /* Profile heap and stack memory usage of running program.
       2     Copyright (C) 1998-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <assert.h>
      20  #include <dlfcn.h>
      21  #include <errno.h>
      22  #include <error.h>
      23  #include <fcntl.h>
      24  #include <libintl.h>
      25  #include <stdatomic.h>
      26  #include <stdbool.h>
      27  #include <stdio.h>
      28  #include <stdlib.h>
      29  #include <stdarg.h>
      30  #include <sys/mman.h>
      31  #include <sys/time.h>
      32  #include <unistd.h>
      33  #include <unistd_ext.h>
      34  
      35  #include <hp-timing.h>
      36  #include <machine-sp.h>
      37  #include <stackinfo.h>  /* For _STACK_GROWS_UP  */
      38  
      39  /* Pointer to the real functions.  These are determined used `dlsym'
      40     when really needed.  */
      41  static void *(*mallocp)(size_t);
      42  static void *(*reallocp) (void *, size_t);
      43  static void *(*callocp) (size_t, size_t);
      44  static void (*freep) (void *);
      45  
      46  static void *(*mmapp) (void *, size_t, int, int, int, off_t);
      47  static void *(*mmap64p) (void *, size_t, int, int, int, off64_t);
      48  static int (*munmapp) (void *, size_t);
      49  static void *(*mremapp) (void *, size_t, size_t, int, void *);
      50  
      51  enum
      52  {
      53    idx_malloc = 0,
      54    idx_realloc,
      55    idx_calloc,
      56    idx_free,
      57    idx_mmap_r,
      58    idx_mmap_w,
      59    idx_mmap_a,
      60    idx_mremap,
      61    idx_munmap,
      62    idx_last
      63  };
      64  
      65  
      66  struct header
      67  {
      68    size_t length;
      69    size_t magic;
      70  };
      71  
      72  #define MAGIC 0xfeedbeaf
      73  
      74  
      75  static _Atomic unsigned long int calls[idx_last];
      76  static _Atomic unsigned long int failed[idx_last];
      77  static _Atomic size_t total[idx_last];
      78  static _Atomic size_t grand_total;
      79  static _Atomic unsigned long int histogram[65536 / 16];
      80  static _Atomic unsigned long int large;
      81  static _Atomic unsigned long int calls_total;
      82  static _Atomic unsigned long int inplace;
      83  static _Atomic unsigned long int decreasing;
      84  static _Atomic unsigned long int realloc_free;
      85  static _Atomic unsigned long int inplace_mremap;
      86  static _Atomic unsigned long int decreasing_mremap;
      87  static _Atomic size_t current_heap;
      88  static _Atomic size_t peak_use[3];
      89  static __thread uintptr_t start_sp;
      90  
      91  /* A few macros to make the source more readable.  */
      92  #define peak_heap       peak_use[0]
      93  #define peak_stack      peak_use[1]
      94  #define peak_total      peak_use[2]
      95  
      96  #define DEFAULT_BUFFER_SIZE     32768
      97  static size_t buffer_size;
      98  
      99  static int fd = -1;
     100  
     101  static bool not_me;
     102  static int initialized;
     103  static bool trace_mmap;
     104  extern const char *__progname;
     105  
     106  struct entry
     107  {
     108    uint64_t heap;
     109    uint64_t stack;
     110    uint32_t time_low;
     111    uint32_t time_high;
     112  };
     113  
     114  static struct entry buffer[2 * DEFAULT_BUFFER_SIZE];
     115  static _Atomic uint32_t buffer_cnt;
     116  static struct entry first;
     117  
     118  static void
     119  gettime (struct entry *e)
     120  {
     121  #if HP_TIMING_INLINE
     122    hp_timing_t now;
     123    HP_TIMING_NOW (now);
     124    e->time_low = now & 0xffffffff;
     125    e->time_high = now >> 32;
     126  #else
     127    struct __timespec64 now;
     128    uint64_t usecs;
     129    __clock_gettime64 (CLOCK_REALTIME, &now);
     130    usecs = (uint64_t)now.tv_nsec / 1000 + (uint64_t)now.tv_sec * 1000000;
     131    e->time_low = usecs & 0xffffffff;
     132    e->time_high = usecs >> 32;
     133  #endif
     134  }
     135  
     136  static inline void
     137  peak_atomic_max (_Atomic size_t *peak, size_t val)
     138  {
     139    size_t v;
     140    do
     141      {
     142        v = atomic_load_explicit (peak, memory_order_relaxed);
     143        if (v >= val)
     144  	break;
     145      }
     146    while (! atomic_compare_exchange_weak (peak, &v, val));
     147  }
     148  
     149  /* Update the global data after a successful function call.  */
     150  static void
     151  update_data (struct header *result, size_t len, size_t old_len)
     152  {
     153    if (result != NULL)
     154      {
     155        /* Record the information we need and mark the block using a
     156           magic number.  */
     157        result->length = len;
     158        result->magic = MAGIC;
     159      }
     160  
     161    /* Compute current heap usage and compare it with the maximum value.  */
     162    size_t heap
     163      = atomic_fetch_add_explicit (&current_heap, len - old_len,
     164  				 memory_order_relaxed) + len - old_len;
     165    peak_atomic_max (&peak_heap, heap);
     166  
     167    /* Compute current stack usage and compare it with the maximum
     168       value.  The base stack pointer might not be set if this is not
     169       the main thread and it is the first call to any of these
     170       functions.  */
     171    if (__glibc_unlikely (!start_sp))
     172      start_sp = __thread_stack_pointer ();
     173  
     174    uintptr_t sp = __thread_stack_pointer ();
     175  #ifdef _STACK_GROWS_UP
     176    /* This can happen in threads where we didn't catch the thread's
     177       stack early enough.  */
     178    if (__glibc_unlikely (sp < start_sp))
     179      start_sp = sp;
     180    size_t current_stack = sp - start_sp;
     181  #else
     182    /* This can happen in threads where we didn't catch the thread's
     183       stack early enough.  */
     184    if (__glibc_unlikely (sp > start_sp))
     185      start_sp = sp;
     186    size_t current_stack = start_sp - sp;
     187  #endif
     188    peak_atomic_max (&peak_stack, current_stack);
     189  
     190    /* Add up heap and stack usage and compare it with the maximum value.  */
     191    peak_atomic_max (&peak_total, heap + current_stack);
     192  
     193    /* Store the value only if we are writing to a file.  */
     194    if (fd != -1)
     195      {
     196        uint32_t idx = atomic_fetch_add_explicit (&buffer_cnt, 1,
     197  						memory_order_relaxed);
     198        if (idx + 1 >= 2 * buffer_size)
     199          {
     200            /* We try to reset the counter to the correct range.  If
     201               this fails because of another thread increasing the
     202               counter it does not matter since that thread will take
     203               care of the correction.  */
     204            uint32_t reset = (idx + 1) % (2 * buffer_size);
     205  	  uint32_t expected = idx + 1;
     206  	  atomic_compare_exchange_weak (&buffer_cnt, &expected, reset);
     207            if (idx >= 2 * buffer_size)
     208              idx = reset - 1;
     209          }
     210        assert (idx < 2 * DEFAULT_BUFFER_SIZE);
     211  
     212        buffer[idx].heap = current_heap;
     213        buffer[idx].stack = current_stack;
     214        gettime (&buffer[idx]);
     215  
     216        /* Write out buffer if it is full.  */
     217        if (idx + 1 == buffer_size || idx + 1 == 2 * buffer_size)
     218          {
     219  	  uint32_t write_size = buffer_size * sizeof (buffer[0]);
     220  	  write_all (fd, &buffer[idx + 1 - buffer_size], write_size);
     221          }
     222      }
     223  }
     224  
     225  
     226  /* Interrupt handler.  */
     227  static void
     228  int_handler (int signo)
     229  {
     230    /* Nothing gets allocated.  Just record the stack pointer position.  */
     231    update_data (NULL, 0, 0);
     232  }
     233  
     234  
     235  /* Find out whether this is the program we are supposed to profile.
     236     For this the name in the variable `__progname' must match the one
     237     given in the environment variable MEMUSAGE_PROG_NAME.  If the variable
     238     is not present every program assumes it should be profiling.
     239  
     240     If this is the program open a file descriptor to the output file.
     241     We will write to it whenever the buffer overflows.  The name of the
     242     output file is determined by the environment variable MEMUSAGE_OUTPUT.
     243  
     244     If the environment variable MEMUSAGE_BUFFER_SIZE is set its numerical
     245     value determines the size of the internal buffer.  The number gives
     246     the number of elements in the buffer.  By setting the number to one
     247     one effectively selects unbuffered operation.
     248  
     249     If MEMUSAGE_NO_TIMER is not present an alarm handler is installed
     250     which at the highest possible frequency records the stack pointer.  */
     251  static void
     252  me (void)
     253  {
     254    const char *env = getenv ("MEMUSAGE_PROG_NAME");
     255    size_t prog_len = strlen (__progname);
     256  
     257    initialized = -1;
     258    mallocp = (void *(*)(size_t))dlsym (RTLD_NEXT, "malloc");
     259    reallocp = (void *(*)(void *, size_t))dlsym (RTLD_NEXT, "realloc");
     260    callocp = (void *(*)(size_t, size_t))dlsym (RTLD_NEXT, "calloc");
     261    freep = (void (*)(void *))dlsym (RTLD_NEXT, "free");
     262  
     263    mmapp = (void *(*)(void *, size_t, int, int, int, off_t))dlsym (RTLD_NEXT,
     264                                                                    "mmap");
     265    mmap64p =
     266      (void *(*)(void *, size_t, int, int, int, off64_t))dlsym (RTLD_NEXT,
     267                                                                "mmap64");
     268    mremapp = (void *(*)(void *, size_t, size_t, int, void *))dlsym (RTLD_NEXT,
     269                                                                     "mremap");
     270    munmapp = (int (*)(void *, size_t))dlsym (RTLD_NEXT, "munmap");
     271    initialized = 1;
     272  
     273    if (env != NULL)
     274      {
     275        /* Check for program name.  */
     276        size_t len = strlen (env);
     277        if (len > prog_len || strcmp (env, &__progname[prog_len - len]) != 0
     278            || (prog_len != len && __progname[prog_len - len - 1] != '/'))
     279          not_me = true;
     280      }
     281  
     282    /* Only open the file if it's really us.  */
     283    if (!not_me && fd == -1)
     284      {
     285        const char *outname;
     286  
     287        if (!start_sp)
     288          start_sp = __thread_stack_pointer ();
     289  
     290        outname = getenv ("MEMUSAGE_OUTPUT");
     291        if (outname != NULL && outname[0] != '\0'
     292            && (access (outname, R_OK | W_OK) == 0 || errno == ENOENT))
     293          {
     294            fd = creat64 (outname, 0666);
     295  
     296            if (fd == -1)
     297              /* Don't do anything in future calls if we cannot write to
     298                 the output file.  */
     299              not_me = true;
     300            else
     301              {
     302                /* Write the first entry.  */
     303                first.heap = 0;
     304                first.stack = 0;
     305                gettime (&first);
     306                /* Write it two times since we need the starting and end time. */
     307  	      write_all (fd, &first, sizeof (first));
     308  	      write_all (fd, &first, sizeof (first));
     309  
     310                /* Determine the buffer size.  We use the default if the
     311                   environment variable is not present.  */
     312                buffer_size = DEFAULT_BUFFER_SIZE;
     313                const char *str_buffer_size = getenv ("MEMUSAGE_BUFFER_SIZE");
     314                if (str_buffer_size != NULL)
     315                  {
     316                    buffer_size = atoi (str_buffer_size);
     317                    if (buffer_size == 0 || buffer_size > DEFAULT_BUFFER_SIZE)
     318                      buffer_size = DEFAULT_BUFFER_SIZE;
     319                  }
     320  
     321                /* Possibly enable timer-based stack pointer retrieval.  */
     322                if (getenv ("MEMUSAGE_NO_TIMER") == NULL)
     323                  {
     324                    struct sigaction act;
     325  
     326                    act.sa_handler = (sighandler_t) &int_handler;
     327                    act.sa_flags = SA_RESTART;
     328                    sigfillset (&act.sa_mask);
     329  
     330                    if (sigaction (SIGPROF, &act, NULL) >= 0)
     331                      {
     332                        struct itimerval timer;
     333  
     334                        timer.it_value.tv_sec = 0;
     335                        timer.it_value.tv_usec = 1;
     336                        timer.it_interval = timer.it_value;
     337                        setitimer (ITIMER_PROF, &timer, NULL);
     338                      }
     339                  }
     340              }
     341          }
     342  
     343        if (!not_me && getenv ("MEMUSAGE_TRACE_MMAP") != NULL)
     344          trace_mmap = true;
     345      }
     346  }
     347  
     348  
     349  /* Record the initial stack position.  */
     350  static void
     351  __attribute__ ((constructor))
     352  init (void)
     353  {
     354    start_sp = __thread_stack_pointer ();
     355    if (!initialized)
     356      me ();
     357  }
     358  
     359  
     360  /* `malloc' replacement.  We keep track of the memory usage if this is the
     361     correct program.  */
     362  void *
     363  malloc (size_t len)
     364  {
     365    struct header *result = NULL;
     366  
     367    /* Determine real implementation if not already happened.  */
     368    if (__glibc_unlikely (initialized <= 0))
     369      {
     370        if (initialized == -1)
     371          return NULL;
     372  
     373        me ();
     374      }
     375  
     376    /* If this is not the correct program just use the normal function.  */
     377    if (not_me)
     378      return (*mallocp)(len);
     379  
     380    /* Keep track of number of calls.  */
     381    atomic_fetch_add_explicit (&calls[idx_malloc], 1, memory_order_relaxed);
     382    /* Keep track of total memory consumption for `malloc'.  */
     383    atomic_fetch_add_explicit (&total[idx_malloc], len, memory_order_relaxed);
     384    /* Keep track of total memory requirement.  */
     385    atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed);
     386    /* Remember the size of the request.  */
     387    if (len < 65536)
     388      atomic_fetch_add_explicit (&histogram[len / 16], 1, memory_order_relaxed);
     389    else
     390      atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     391    /* Total number of calls of any of the functions.  */
     392    atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed);
     393  
     394    /* Do the real work.  */
     395    result = (struct header *) (*mallocp)(len + sizeof (struct header));
     396    if (result == NULL)
     397      {
     398        atomic_fetch_add_explicit (&failed[idx_malloc], 1,
     399  				 memory_order_relaxed);
     400        return NULL;
     401      }
     402  
     403    /* Update the allocation data and write out the records if necessary.  */
     404    update_data (result, len, 0);
     405  
     406    /* Return the pointer to the user buffer.  */
     407    return (void *) (result + 1);
     408  }
     409  
     410  
     411  /* `realloc' replacement.  We keep track of the memory usage if this is the
     412     correct program.  */
     413  void *
     414  realloc (void *old, size_t len)
     415  {
     416    struct header *result = NULL;
     417    struct header *real;
     418    size_t old_len;
     419  
     420    /* Determine real implementation if not already happened.  */
     421    if (__glibc_unlikely (initialized <= 0))
     422      {
     423        if (initialized == -1)
     424          return NULL;
     425  
     426        me ();
     427      }
     428  
     429    /* If this is not the correct program just use the normal function.  */
     430    if (not_me)
     431      return (*reallocp)(old, len);
     432  
     433    if (old == NULL)
     434      {
     435        /* This is really a `malloc' call.  */
     436        real = NULL;
     437        old_len = 0;
     438      }
     439    else
     440      {
     441        real = ((struct header *) old) - 1;
     442        if (real->magic != MAGIC)
     443          /* This is no memory allocated here.  */
     444          return (*reallocp)(old, len);
     445  
     446        old_len = real->length;
     447      }
     448  
     449    /* Keep track of number of calls.  */
     450    atomic_fetch_add_explicit (&calls[idx_realloc], 1, memory_order_relaxed);
     451    if (len > old_len)
     452      {
     453        /* Keep track of total memory consumption for `realloc'.  */
     454        atomic_fetch_add_explicit (&total[idx_realloc], len - old_len,
     455  				 memory_order_relaxed);
     456        /* Keep track of total memory requirement.  */
     457        atomic_fetch_add_explicit (&grand_total, len - old_len,
     458  				 memory_order_relaxed);
     459      }
     460  
     461    if (len == 0 && old != NULL)
     462      {
     463        /* Special case.  */
     464        atomic_fetch_add_explicit (&realloc_free, 1, memory_order_relaxed);
     465        /* Keep track of total memory freed using `free'.  */
     466        atomic_fetch_add_explicit (&total[idx_free], real->length,
     467  				 memory_order_relaxed);
     468  
     469        /* Update the allocation data and write out the records if necessary.  */
     470        update_data (NULL, 0, old_len);
     471  
     472        /* Do the real work.  */
     473        (*freep) (real);
     474  
     475        return NULL;
     476      }
     477  
     478    /* Remember the size of the request.  */
     479    if (len < 65536)
     480      atomic_fetch_add_explicit (&histogram[len / 16], 1, memory_order_relaxed);
     481    else
     482      atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     483    /* Total number of calls of any of the functions.  */
     484    atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed);
     485  
     486    /* Do the real work.  */
     487    result = (struct header *) (*reallocp)(real, len + sizeof (struct header));
     488    if (result == NULL)
     489      {
     490        atomic_fetch_add_explicit (&failed[idx_realloc], 1,
     491  				 memory_order_relaxed);
     492        return NULL;
     493      }
     494  
     495    /* Record whether the reduction/increase happened in place.  */
     496    if (real == result)
     497      atomic_fetch_add_explicit (&inplace, 1, memory_order_relaxed);
     498    /* Was the buffer increased?  */
     499    if (old_len > len)
     500      atomic_fetch_add_explicit (&decreasing, 1, memory_order_relaxed);
     501  
     502    /* Update the allocation data and write out the records if necessary.  */
     503    update_data (result, len, old_len);
     504  
     505    /* Return the pointer to the user buffer.  */
     506    return (void *) (result + 1);
     507  }
     508  
     509  
     510  /* `calloc' replacement.  We keep track of the memory usage if this is the
     511     correct program.  */
     512  void *
     513  calloc (size_t n, size_t len)
     514  {
     515    struct header *result;
     516    size_t size = n * len;
     517  
     518    /* Determine real implementation if not already happened.  */
     519    if (__glibc_unlikely (initialized <= 0))
     520      {
     521        if (initialized == -1)
     522          return NULL;
     523  
     524        me ();
     525      }
     526  
     527    /* If this is not the correct program just use the normal function.  */
     528    if (not_me)
     529      return (*callocp)(n, len);
     530  
     531    /* Keep track of number of calls.  */
     532    atomic_fetch_add_explicit (&calls[idx_calloc], 1, memory_order_relaxed);
     533    /* Keep track of total memory consumption for `calloc'.  */
     534    atomic_fetch_add_explicit (&total[idx_calloc], size, memory_order_relaxed);
     535    /* Keep track of total memory requirement.  */
     536    atomic_fetch_add_explicit (&grand_total, size, memory_order_relaxed);
     537    /* Remember the size of the request.  */
     538    if (size < 65536)
     539      atomic_fetch_add_explicit (&histogram[size / 16], 1,
     540  			       memory_order_relaxed);
     541    else
     542      atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     543    /* Total number of calls of any of the functions.  */
     544    ++calls_total;
     545  
     546    /* Do the real work.  */
     547    result = (struct header *) (*mallocp)(size + sizeof (struct header));
     548    if (result == NULL)
     549      {
     550        atomic_fetch_add_explicit (&failed[idx_calloc], 1,
     551  				 memory_order_relaxed);
     552        return NULL;
     553      }
     554  
     555    /* Update the allocation data and write out the records if necessary.  */
     556    update_data (result, size, 0);
     557  
     558    /* Do what `calloc' would have done and return the buffer to the caller.  */
     559    return memset (result + 1, '\0', size);
     560  }
     561  
     562  
     563  /* `free' replacement.  We keep track of the memory usage if this is the
     564     correct program.  */
     565  void
     566  free (void *ptr)
     567  {
     568    struct header *real;
     569  
     570    /* Determine real implementation if not already happened.  */
     571    if (__glibc_unlikely (initialized <= 0))
     572      {
     573        if (initialized == -1)
     574          return;
     575  
     576        me ();
     577      }
     578  
     579    /* If this is not the correct program just use the normal function.  */
     580    if (not_me)
     581      {
     582        (*freep) (ptr);
     583        return;
     584      }
     585  
     586    /* `free (NULL)' has no effect.  */
     587    if (ptr == NULL)
     588      {
     589        atomic_fetch_add_explicit (&calls[idx_free], 1, memory_order_relaxed);
     590        return;
     591      }
     592  
     593    /* Determine the pointer to the header.  */
     594    real = ((struct header *) ptr) - 1;
     595    if (real->magic != MAGIC)
     596      {
     597        /* This block wasn't allocated here.  */
     598        (*freep) (ptr);
     599        return;
     600      }
     601  
     602    /* Keep track of number of calls.  */
     603    atomic_fetch_add_explicit (&calls[idx_free], 1, memory_order_relaxed);
     604    /* Keep track of total memory freed using `free'.  */
     605    atomic_fetch_add_explicit (&total[idx_free], real->length,
     606  			     memory_order_relaxed);
     607  
     608    /* Update the allocation data and write out the records if necessary.  */
     609    update_data (NULL, 0, real->length);
     610  
     611    /* Do the real work.  */
     612    (*freep) (real);
     613  }
     614  
     615  
     616  /* `mmap' replacement.  We do not have to keep track of the size since
     617     `munmap' will get it as a parameter.  */
     618  void *
     619  mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset)
     620  {
     621    void *result = NULL;
     622  
     623    /* Determine real implementation if not already happened.  */
     624    if (__glibc_unlikely (initialized <= 0))
     625      {
     626        if (initialized == -1)
     627          return NULL;
     628  
     629        me ();
     630      }
     631  
     632    /* Always get a block.  We don't need extra memory.  */
     633    result = (*mmapp)(start, len, prot, flags, fd, offset);
     634  
     635    if (!not_me && trace_mmap)
     636      {
     637        int idx = (flags & MAP_ANON
     638                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
     639  
     640        /* Keep track of number of calls.  */
     641        atomic_fetch_add_explicit (&calls[idx], 1, memory_order_relaxed);
     642        /* Keep track of total memory consumption for `malloc'.  */
     643        atomic_fetch_add_explicit (&total[idx], len, memory_order_relaxed);
     644        /* Keep track of total memory requirement.  */
     645        atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed);
     646        /* Remember the size of the request.  */
     647        if (len < 65536)
     648          atomic_fetch_add_explicit (&histogram[len / 16], 1,
     649  				   memory_order_relaxed);
     650        else
     651          atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     652        /* Total number of calls of any of the functions.  */
     653        atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed);
     654  
     655        /* Check for failures.  */
     656        if (result == NULL)
     657          atomic_fetch_add_explicit (&failed[idx], 1, memory_order_relaxed);
     658        else if (idx == idx_mmap_w)
     659          /* Update the allocation data and write out the records if
     660             necessary.  Note the first parameter is NULL which means
     661             the size is not tracked.  */
     662          update_data (NULL, len, 0);
     663      }
     664  
     665    /* Return the pointer to the user buffer.  */
     666    return result;
     667  }
     668  
     669  
     670  /* `mmap64' replacement.  We do not have to keep track of the size since
     671     `munmap' will get it as a parameter.  */
     672  void *
     673  mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset)
     674  {
     675    void *result = NULL;
     676  
     677    /* Determine real implementation if not already happened.  */
     678    if (__glibc_unlikely (initialized <= 0))
     679      {
     680        if (initialized == -1)
     681          return NULL;
     682  
     683        me ();
     684      }
     685  
     686    /* Always get a block.  We don't need extra memory.  */
     687    result = (*mmap64p)(start, len, prot, flags, fd, offset);
     688  
     689    if (!not_me && trace_mmap)
     690      {
     691        int idx = (flags & MAP_ANON
     692                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
     693  
     694        /* Keep track of number of calls.  */
     695        atomic_fetch_add_explicit (&calls[idx], 1, memory_order_relaxed);
     696        /* Keep track of total memory consumption for `malloc'.  */
     697        atomic_fetch_add_explicit (&total[idx], len, memory_order_relaxed);
     698        /* Keep track of total memory requirement.  */
     699        atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed);
     700        /* Remember the size of the request.  */
     701        if (len < 65536)
     702          atomic_fetch_add_explicit (&histogram[len / 16], 1,
     703  				   memory_order_relaxed);
     704        else
     705          atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     706        /* Total number of calls of any of the functions.  */
     707        atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed);
     708  
     709        /* Check for failures.  */
     710        if (result == NULL)
     711          atomic_fetch_add_explicit (&failed[idx], 1, memory_order_relaxed);
     712        else if (idx == idx_mmap_w)
     713          /* Update the allocation data and write out the records if
     714             necessary.  Note the first parameter is NULL which means
     715             the size is not tracked.  */
     716          update_data (NULL, len, 0);
     717      }
     718  
     719    /* Return the pointer to the user buffer.  */
     720    return result;
     721  }
     722  
     723  
     724  /* `mremap' replacement.  We do not have to keep track of the size since
     725     `munmap' will get it as a parameter.  */
     726  void *
     727  mremap (void *start, size_t old_len, size_t len, int flags, ...)
     728  {
     729    void *result = NULL;
     730    va_list ap;
     731  
     732    va_start (ap, flags);
     733    void *newaddr = (flags & MREMAP_FIXED) ? va_arg (ap, void *) : NULL;
     734    va_end (ap);
     735  
     736    /* Determine real implementation if not already happened.  */
     737    if (__glibc_unlikely (initialized <= 0))
     738      {
     739        if (initialized == -1)
     740          return NULL;
     741  
     742        me ();
     743      }
     744  
     745    /* Always get a block.  We don't need extra memory.  */
     746    result = (*mremapp)(start, old_len, len, flags, newaddr);
     747  
     748    if (!not_me && trace_mmap)
     749      {
     750        /* Keep track of number of calls.  */
     751        atomic_fetch_add_explicit (&calls[idx_mremap], 1, memory_order_relaxed);
     752        if (len > old_len)
     753          {
     754            /* Keep track of total memory consumption for `malloc'.  */
     755            atomic_fetch_add_explicit (&total[idx_mremap], len - old_len,
     756  				     memory_order_relaxed);
     757            /* Keep track of total memory requirement.  */
     758            atomic_fetch_add_explicit (&grand_total, len - old_len,
     759  				     memory_order_relaxed);
     760          }
     761        /* Remember the size of the request.  */
     762        if (len < 65536)
     763          atomic_fetch_add_explicit (&histogram[len / 16], 1,
     764  				   memory_order_relaxed);
     765        else
     766          atomic_fetch_add_explicit (&large, 1, memory_order_relaxed);
     767        /* Total number of calls of any of the functions.  */
     768        atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed);
     769  
     770        /* Check for failures.  */
     771        if (result == NULL)
     772          atomic_fetch_add_explicit (&failed[idx_mremap], 1,
     773  				   memory_order_relaxed);
     774        else
     775          {
     776            /* Record whether the reduction/increase happened in place.  */
     777            if (start == result)
     778              atomic_fetch_add_explicit (&inplace_mremap, 1,
     779  				       memory_order_relaxed);
     780            /* Was the buffer increased?  */
     781            if (old_len > len)
     782              atomic_fetch_add_explicit (&decreasing_mremap, 1,
     783  				       memory_order_relaxed);
     784  
     785            /* Update the allocation data and write out the records if
     786               necessary.  Note the first parameter is NULL which means
     787               the size is not tracked.  */
     788            update_data (NULL, len, old_len);
     789          }
     790      }
     791  
     792    /* Return the pointer to the user buffer.  */
     793    return result;
     794  }
     795  
     796  
     797  /* `munmap' replacement.  */
     798  int
     799  munmap (void *start, size_t len)
     800  {
     801    int result;
     802  
     803    /* Determine real implementation if not already happened.  */
     804    if (__glibc_unlikely (initialized <= 0))
     805      {
     806        if (initialized == -1)
     807          return -1;
     808  
     809        me ();
     810      }
     811  
     812    /* Do the real work.  */
     813    result = (*munmapp)(start, len);
     814  
     815    if (!not_me && trace_mmap)
     816      {
     817        /* Keep track of number of calls.  */
     818        atomic_fetch_add_explicit (&calls[idx_munmap], 1, memory_order_relaxed);
     819  
     820        if (__glibc_likely (result == 0))
     821          {
     822            /* Keep track of total memory freed using `free'.  */
     823            atomic_fetch_add_explicit (&total[idx_munmap], len,
     824  				     memory_order_relaxed);
     825  
     826            /* Update the allocation data and write out the records if
     827               necessary.  */
     828            update_data (NULL, 0, len);
     829          }
     830        else
     831          atomic_fetch_add_explicit (&failed[idx_munmap], 1,
     832  				   memory_order_relaxed);
     833      }
     834  
     835    return result;
     836  }
     837  
     838  
     839  /* Write some statistics to standard error.  */
     840  static void
     841  __attribute__ ((destructor))
     842  dest (void)
     843  {
     844    int percent, cnt;
     845    unsigned long int maxcalls;
     846  
     847    /* If we haven't done anything here just return.  */
     848    if (not_me)
     849      return;
     850  
     851    /* If we should call any of the memory functions don't do any profiling.  */
     852    not_me = true;
     853  
     854    /* Finish the output file.  */
     855    if (fd != -1)
     856      {
     857        /* Write the partially filled buffer.  */
     858        struct entry *start = buffer;
     859        uint32_t write_cnt = buffer_cnt;
     860  
     861        if (buffer_cnt > buffer_size)
     862          {
     863            start = buffer + buffer_size;
     864            write_cnt = buffer_cnt - buffer_size;
     865          }
     866  
     867        write_all (fd, start, write_cnt * sizeof (buffer[0]));
     868  
     869        /* Go back to the beginning of the file.  We allocated two records
     870           here when we opened the file.  */
     871        lseek (fd, 0, SEEK_SET);
     872        /* Write out a record containing the total size.  */
     873        first.stack = peak_total;
     874        write_all (fd, &first, sizeof (first));
     875        /* Write out another record containing the maximum for heap and
     876           stack.  */
     877        first.heap = peak_heap;
     878        first.stack = peak_stack;
     879        gettime (&first);
     880        write_all (fd, &first, sizeof (first));
     881  
     882        /* Close the file.  */
     883        close (fd);
     884        fd = -1;
     885      }
     886  
     887    /* Write a colorful statistic.  */
     888    fprintf (stderr, "\n\
     889  \e[01;32mMemory usage summary:\e[0;0m heap total: %llu, heap peak: %lu, stack peak: %lu\n\
     890  \e[04;34m         total calls   total memory   failed calls\e[0m\n\
     891  \e[00;34m malloc|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n\
     892  \e[00;34mrealloc|\e[0m %10lu   %12llu   %s%12lu\e[00;00m  (nomove:%ld, dec:%ld, free:%ld)\n\
     893  \e[00;34m calloc|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n\
     894  \e[00;34m   free|\e[0m %10lu   %12llu\n",
     895             (unsigned long long int) grand_total, (unsigned long int) peak_heap,
     896             (unsigned long int) peak_stack,
     897             (unsigned long int) calls[idx_malloc],
     898             (unsigned long long int) total[idx_malloc],
     899             failed[idx_malloc] ? "\e[01;41m" : "",
     900             (unsigned long int) failed[idx_malloc],
     901             (unsigned long int) calls[idx_realloc],
     902             (unsigned long long int) total[idx_realloc],
     903             failed[idx_realloc] ? "\e[01;41m" : "",
     904             (unsigned long int) failed[idx_realloc],
     905             (unsigned long int) inplace,
     906             (unsigned long int) decreasing,
     907             (unsigned long int) realloc_free,
     908             (unsigned long int) calls[idx_calloc],
     909             (unsigned long long int) total[idx_calloc],
     910             failed[idx_calloc] ? "\e[01;41m" : "",
     911             (unsigned long int) failed[idx_calloc],
     912             (unsigned long int) calls[idx_free],
     913             (unsigned long long int) total[idx_free]);
     914  
     915    if (trace_mmap)
     916      fprintf (stderr, "\
     917  \e[00;34mmmap(r)|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n\
     918  \e[00;34mmmap(w)|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n\
     919  \e[00;34mmmap(a)|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n\
     920  \e[00;34m mremap|\e[0m %10lu   %12llu   %s%12lu\e[00;00m  (nomove: %ld, dec:%ld)\n\
     921  \e[00;34m munmap|\e[0m %10lu   %12llu   %s%12lu\e[00;00m\n",
     922               (unsigned long int) calls[idx_mmap_r],
     923               (unsigned long long int) total[idx_mmap_r],
     924               failed[idx_mmap_r] ? "\e[01;41m" : "",
     925               (unsigned long int) failed[idx_mmap_r],
     926               (unsigned long int) calls[idx_mmap_w],
     927               (unsigned long long int) total[idx_mmap_w],
     928               failed[idx_mmap_w] ? "\e[01;41m" : "",
     929               (unsigned long int) failed[idx_mmap_w],
     930               (unsigned long int) calls[idx_mmap_a],
     931               (unsigned long long int) total[idx_mmap_a],
     932               failed[idx_mmap_a] ? "\e[01;41m" : "",
     933               (unsigned long int) failed[idx_mmap_a],
     934               (unsigned long int) calls[idx_mremap],
     935               (unsigned long long int) total[idx_mremap],
     936               failed[idx_mremap] ? "\e[01;41m" : "",
     937               (unsigned long int) failed[idx_mremap],
     938               (unsigned long int) inplace_mremap,
     939               (unsigned long int) decreasing_mremap,
     940               (unsigned long int) calls[idx_munmap],
     941               (unsigned long long int) total[idx_munmap],
     942               failed[idx_munmap] ? "\e[01;41m" : "",
     943               (unsigned long int) failed[idx_munmap]);
     944  
     945    /* Write out a histoogram of the sizes of the allocations.  */
     946    fprintf (stderr, "\e[01;32mHistogram for block sizes:\e[0;0m\n");
     947  
     948    /* Determine the maximum of all calls for each size range.  */
     949    maxcalls = large;
     950    for (cnt = 0; cnt < 65536; cnt += 16)
     951      if (histogram[cnt / 16] > maxcalls)
     952        maxcalls = histogram[cnt / 16];
     953  
     954    for (cnt = 0; cnt < 65536; cnt += 16)
     955      /* Only write out the nonzero entries.  */
     956      if (histogram[cnt / 16] != 0)
     957        {
     958          percent = (histogram[cnt / 16] * 100) / calls_total;
     959          fprintf (stderr, "%5d-%-5d%12lu ", cnt, cnt + 15,
     960                   (unsigned long int) histogram[cnt / 16]);
     961          if (percent == 0)
     962            fputs (" <1% \e[41;37m", stderr);
     963          else
     964            fprintf (stderr, "%3d%% \e[41;37m", percent);
     965  
     966          /* Draw a bar with a length corresponding to the current
     967             percentage.  */
     968          percent = (histogram[cnt / 16] * 50) / maxcalls;
     969          while (percent-- > 0)
     970            fputc ('=', stderr);
     971          fputs ("\e[0;0m\n", stderr);
     972        }
     973  
     974    if (large != 0)
     975      {
     976        percent = (large * 100) / calls_total;
     977        fprintf (stderr, "   large   %12lu ", (unsigned long int) large);
     978        if (percent == 0)
     979          fputs (" <1% \e[41;37m", stderr);
     980        else
     981          fprintf (stderr, "%3d%% \e[41;37m", percent);
     982        percent = (large * 50) / maxcalls;
     983        while (percent-- > 0)
     984          fputc ('=', stderr);
     985        fputs ("\e[0;0m\n", stderr);
     986      }
     987  
     988    /* Any following malloc/free etc. calls should generate statistics again,
     989       because otherwise freeing something that has been malloced before
     990       this destructor (including struct header in front of it) wouldn't
     991       be properly freed.  */
     992    not_me = false;
     993  }