(root)/
glibc-2.38/
benchtests/
bench-pthread-lock-base.c
       1  /* Measure lock functions for different threads and critical sections.
       2     Copyright (C) 2022-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #define TEST_MAIN
      20  #define TIMEOUT (20 * 60)
      21  
      22  #include <stdio.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  #include <unistd.h>
      26  #include <math.h>
      27  #include <pthread.h>
      28  #include <sys/time.h>
      29  #include <sys/sysinfo.h>
      30  #include "bench-timing.h"
      31  #include "json-lib.h"
      32  
      33  static bench_lock_t lock;
      34  static bench_lock_attr_t attr;
      35  static pthread_barrier_t barrier;
      36  
      37  #define START_ITERS 1000
      38  
      39  #pragma GCC push_options
      40  #pragma GCC optimize(1)
      41  
      42  static int __attribute__ ((noinline)) fibonacci (int i)
      43  {
      44    asm("");
      45    if (i > 2)
      46      return fibonacci (i - 1) + fibonacci (i - 2);
      47    return 10 + i;
      48  }
      49  
      50  static void
      51  do_filler (void)
      52  {
      53    char buf1[512], buf2[512];
      54    int f = fibonacci (4);
      55    memcpy (buf1, buf2, f);
      56  }
      57  
      58  static void
      59  do_filler_shared (void)
      60  {
      61    static char buf1[512], buf2[512];
      62    int f = fibonacci (4);
      63    memcpy (buf1, buf2, f);
      64  }
      65  
      66  #pragma GCC pop_options
      67  
      68  #define UNIT_WORK_CRT do_filler_shared ()
      69  #define UNIT_WORK_NON_CRT do_filler ()
      70  
      71  static inline void
      72  critical_section (int length)
      73  {
      74    for (int i = length; i >= 0; i--)
      75      UNIT_WORK_CRT;
      76  }
      77  
      78  static inline void
      79  non_critical_section (int length)
      80  {
      81    for (int i = length; i >= 0; i--)
      82      UNIT_WORK_NON_CRT;
      83  }
      84  
      85  typedef struct Worker_Params
      86  {
      87    long iters;
      88    int crt_len;
      89    int non_crt_len;
      90    timing_t duration;
      91  } Worker_Params;
      92  
      93  static void *
      94  worker (void *v)
      95  {
      96    timing_t start, stop;
      97    Worker_Params *p = (Worker_Params *) v;
      98    long iters = p->iters;
      99    int crt_len = p->crt_len;
     100    int non_crt_len = p->non_crt_len;
     101  
     102    pthread_barrier_wait (&barrier);
     103    TIMING_NOW (start);
     104    while (iters--)
     105      {
     106        LOCK (&lock);
     107        critical_section (crt_len);
     108        UNLOCK (&lock);
     109        non_critical_section (non_crt_len);
     110      }
     111    TIMING_NOW (stop);
     112  
     113    TIMING_DIFF (p->duration, start, stop);
     114    return NULL;
     115  }
     116  
     117  static double
     118  do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
     119  {
     120    int i;
     121    timing_t mean;
     122    Worker_Params *p, params[num_threads];
     123    pthread_t threads[num_threads];
     124  
     125    LOCK_INIT (&lock, &attr);
     126    pthread_barrier_init (&barrier, NULL, num_threads);
     127  
     128    for (i = 0; i < num_threads; i++)
     129      {
     130        p = &params[i];
     131        p->iters = iters;
     132        p->crt_len = crt_len;
     133        p->non_crt_len = non_crt_len;
     134        pthread_create (&threads[i], NULL, worker, (void *) p);
     135      }
     136    for (i = 0; i < num_threads; i++)
     137      pthread_join (threads[i], NULL);
     138  
     139    LOCK_DESTROY (&lock);
     140    pthread_barrier_destroy (&barrier);
     141  
     142    mean = 0;
     143    for (i = 0; i < num_threads; i++)
     144      mean += params[i].duration;
     145    mean /= num_threads;
     146    return mean;
     147  }
     148  
     149  #define RUN_COUNT 10
     150  #define MIN_TEST_SEC 0.01
     151  
     152  static void
     153  do_bench_one (const char *name, int num_threads, int crt_len, int non_crt_len,
     154  	      json_ctx_t *js)
     155  {
     156    timing_t cur;
     157    struct timeval ts, te;
     158    double tsd, ted, td;
     159    long iters, iters_limit, total_iters;
     160    timing_t curs[RUN_COUNT + 2];
     161    int i, j;
     162    double mean, stdev;
     163  
     164    iters = START_ITERS;
     165    iters_limit = LONG_MAX / 100;
     166  
     167    while (1)
     168      {
     169        gettimeofday (&ts, NULL);
     170        cur = do_one_test (num_threads, crt_len, non_crt_len, iters);
     171        gettimeofday (&te, NULL);
     172        /* Make sure the test to run at least MIN_TEST_SEC.  */
     173        tsd = ts.tv_sec + ts.tv_usec / 1000000.0;
     174        ted = te.tv_sec + te.tv_usec / 1000000.0;
     175        td = ted - tsd;
     176        if (td >= MIN_TEST_SEC || iters >= iters_limit)
     177  	break;
     178  
     179        iters *= 10;
     180      }
     181  
     182    curs[0] = cur;
     183    for (i = 1; i < RUN_COUNT + 2; i++)
     184      curs[i] = do_one_test (num_threads, crt_len, non_crt_len, iters);
     185  
     186    /* Sort the results so we can discard the fastest and slowest
     187       times as outliers.  */
     188    for (i = 0; i < RUN_COUNT + 1; i++)
     189      for (j = i + 1; j < RUN_COUNT + 2; j++)
     190        if (curs[i] > curs[j])
     191  	{
     192  	  timing_t temp = curs[i];
     193  	  curs[i] = curs[j];
     194  	  curs[j] = temp;
     195  	}
     196  
     197    /* Calculate mean and standard deviation.  */
     198    mean = 0.0;
     199    total_iters = iters * num_threads;
     200    for (i = 1; i < RUN_COUNT + 1; i++)
     201      mean += (double) curs[i] / (double) total_iters;
     202    mean /= RUN_COUNT;
     203  
     204    stdev = 0.0;
     205    for (i = 1; i < RUN_COUNT + 1; i++)
     206      {
     207        double s = (double) curs[i] / (double) total_iters - mean;
     208        stdev += s * s;
     209      }
     210    stdev = sqrt (stdev / (RUN_COUNT - 1));
     211  
     212    char buf[256];
     213    snprintf (buf, sizeof buf, "%s,non_crt_len=%d,crt_len=%d,threads=%d", name,
     214  	    non_crt_len, crt_len, num_threads);
     215  
     216    json_attr_object_begin (js, buf);
     217  
     218    json_attr_double (js, "duration", (double) cur);
     219    json_attr_double (js, "iterations", (double) total_iters);
     220    json_attr_double (js, "mean", mean);
     221    json_attr_double (js, "stdev", stdev);
     222    json_attr_double (js, "min-outlier",
     223  		    (double) curs[0] / (double) total_iters);
     224    json_attr_double (js, "min", (double) curs[1] / (double) total_iters);
     225    json_attr_double (js, "max",
     226  		    (double) curs[RUN_COUNT] / (double) total_iters);
     227    json_attr_double (js, "max-outlier",
     228  		    (double) curs[RUN_COUNT + 1] / (double) total_iters);
     229  
     230    json_attr_object_end (js);
     231  }
     232  
     233  #define TH_CONF_MAX 10
     234  
     235  int
     236  do_bench (void)
     237  {
     238    int rv = 0;
     239    json_ctx_t json_ctx;
     240    int i, j, k;
     241    int th_num, th_conf, nprocs;
     242    int threads[TH_CONF_MAX];
     243    int crt_lens[] = { 0, 1, 2, 4, 8, 16, 32, 64, 128 };
     244    int non_crt_lens[] = { 1, 32, 128 };
     245    char name[128];
     246  
     247    json_init (&json_ctx, 2, stdout);
     248    json_attr_object_begin (&json_ctx, TEST_NAME);
     249  
     250    /* The thread config begins from 1, and increases by 2x until nprocs.
     251       We also wants to test over-saturation case (1.25*nprocs).  */
     252    nprocs = get_nprocs ();
     253    th_num = 1;
     254    for (th_conf = 0; th_conf < (TH_CONF_MAX - 2) && th_num < nprocs; th_conf++)
     255      {
     256        threads[th_conf] = th_num;
     257        th_num <<= 1;
     258      }
     259    threads[th_conf++] = nprocs;
     260    threads[th_conf++] = nprocs + nprocs / 4;
     261  
     262    LOCK_ATTR_INIT (&attr);
     263    snprintf (name, sizeof name, "type=adaptive");
     264  
     265    for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
     266      {
     267        int non_crt_len = non_crt_lens[k];
     268        for (j = 0; j < (sizeof (crt_lens) / sizeof (int)); j++)
     269  	{
     270  	  int crt_len = crt_lens[j];
     271  	  for (i = 0; i < th_conf; i++)
     272  	    {
     273  	      th_num = threads[i];
     274  	      do_bench_one (name, th_num, crt_len, non_crt_len, &json_ctx);
     275  	    }
     276  	}
     277      }
     278  
     279    json_attr_object_end (&json_ctx);
     280  
     281    return rv;
     282  }
     283  
     284  #define TEST_FUNCTION do_bench ()
     285  
     286  #include "../test-skeleton.c"