(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.c/
sort-1.c
       1  /* Test and benchmark of a couple of parallel sorting algorithms.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     GCC is free software; you can redistribute it and/or modify it under
       5     the terms of the GNU General Public License as published by the Free
       6     Software Foundation; either version 3, or (at your option) any later
       7     version.
       8  
       9     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      10     WARRANTY; without even the implied warranty of MERCHANTABILITY or
      11     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      12     for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with GCC; see the file COPYING3.  If not see
      16     <http://www.gnu.org/licenses/>.  */
      17  
      18  /* { dg-additional-options "-Wno-deprecated-declarations" } */
      19  
      20  #include <limits.h>
      21  #include <omp.h>
      22  #include <stdbool.h>
      23  #include <stdio.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  int failures;
      28  
      29  #define THRESHOLD 100
      30  
      31  static void
      32  verify (const char *name, double stime, int *array, int count)
      33  {
      34    int i;
      35    double etime = omp_get_wtime ();
      36  
      37    printf ("%s: %g\n", name, etime - stime);
      38    for (i = 1; i < count; i++)
      39      if (array[i] < array[i - 1])
      40        {
      41  	printf ("%s: incorrectly sorted\n", name);
      42  	failures = 1;
      43        }
      44  }
      45  
      46  static void
      47  insertsort (int *array, int s, int e)
      48  {
      49    int i, j, val;
      50    for (i = s + 1; i <= e; i++)
      51      {
      52        val = array[i];
      53        j = i;
      54        while (j-- > s && val < array[j])
      55  	array[j + 1] = array[j];
      56        array[j + 1] = val;
      57      }
      58  }
      59  
      60  struct int_pair
      61  {
      62    int lo;
      63    int hi;
      64  };
      65  
      66  struct int_pair_stack
      67  {
      68    struct int_pair *top;
      69  #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
      70    struct int_pair arr[STACK_SIZE];
      71  };
      72  
      73  static inline void
      74  init_int_pair_stack (struct int_pair_stack *stack)
      75  {
      76    stack->top = &stack->arr[0];
      77  }
      78  
      79  static inline void
      80  push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
      81  {
      82    stack->top->lo = lo;
      83    stack->top->hi = hi;
      84    stack->top++;
      85  }
      86  
      87  static inline void
      88  pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
      89  {
      90    stack->top--;
      91    *lo = stack->top->lo;
      92    *hi = stack->top->hi;
      93  }
      94  
      95  static inline int
      96  size_int_pair_stack (struct int_pair_stack *stack)
      97  {
      98    return stack->top - &stack->arr[0];
      99  }
     100  
     101  static inline void
     102  busy_wait (void)
     103  {
     104  #if defined __i386__ || defined __x86_64__
     105    __builtin_ia32_pause ();
     106  #elif defined __ia64__
     107    __asm volatile ("hint @pause" : : : "memory");
     108  #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
     109    __asm volatile ("membar #LoadLoad" : : : "memory");
     110  #else
     111    __asm volatile ("" : : : "memory");
     112  #endif
     113  }
     114  
     115  static inline void
     116  swap (int *array, int a, int b)
     117  {
     118    int val = array[a];
     119    array[a] = array[b];
     120    array[b] = val;
     121  }
     122  
     123  static inline int
     124  choose_pivot (int *array, int lo, int hi)
     125  {
     126    int mid = (lo + hi) / 2;
     127  
     128    if (array[mid] < array[lo])
     129      swap (array, lo, mid);
     130    if (array[hi] < array[mid])
     131      {
     132        swap (array, mid, hi);
     133        if (array[mid] < array[lo])
     134  	swap (array, lo, mid);
     135      }
     136    return array[mid];
     137  }
     138  
     139  static inline int
     140  partition (int *array, int lo, int hi)
     141  {
     142    int pivot = choose_pivot (array, lo, hi);
     143    int left = lo;
     144    int right = hi;
     145  
     146    for (;;)
     147      {
     148        while (array[++left] < pivot);
     149        while (array[--right] > pivot);
     150        if (left >= right)
     151  	break;
     152        swap (array, left, right);
     153      }
     154    return left;
     155  }
     156  
     157  static void
     158  sort1 (int *array, int count)
     159  {
     160    omp_lock_t lock;
     161    struct int_pair_stack global_stack;
     162    int busy = 1;
     163    int num_threads;
     164  
     165    omp_init_lock (&lock);
     166    init_int_pair_stack (&global_stack);
     167    #pragma omp parallel firstprivate (array, count)
     168    {
     169      int lo = 0, hi = 0, mid, next_lo, next_hi;
     170      bool idle = true;
     171      struct int_pair_stack local_stack;
     172  
     173      init_int_pair_stack (&local_stack);
     174      if (omp_get_thread_num () == 0)
     175        {
     176  	num_threads = omp_get_num_threads ();
     177  	hi = count - 1;
     178  	idle = false;
     179        }
     180  
     181      for (;;)
     182        {
     183  	if (hi - lo < THRESHOLD)
     184  	  {
     185  	    insertsort (array, lo, hi);
     186  	    lo = hi;
     187  	  }
     188  	if (lo >= hi)
     189  	  {
     190  	    if (size_int_pair_stack (&local_stack) == 0)
     191  	      {
     192  	      again:
     193  		omp_set_lock (&lock);
     194  		if (size_int_pair_stack (&global_stack) == 0)
     195  		  {
     196  		    if (!idle)
     197  		      busy--;
     198  		    if (busy == 0)
     199  		      {
     200  			omp_unset_lock (&lock);
     201  			break;
     202  		      }
     203  		    omp_unset_lock (&lock);
     204  		    idle = true;
     205  		    while (size_int_pair_stack (&global_stack) == 0
     206  			   && busy)
     207  		      busy_wait ();
     208  		    goto again;
     209  		  }
     210  		if (idle)
     211  		  busy++;
     212  		pop_int_pair_stack (&global_stack, &lo, &hi);
     213  		omp_unset_lock (&lock);
     214  		idle = false;
     215  	      }
     216  	    else
     217  	      pop_int_pair_stack (&local_stack, &lo, &hi);
     218  	  }
     219  
     220  	mid = partition (array, lo, hi);
     221  	if (mid - lo < hi - mid)
     222  	  {
     223  	    next_lo = mid;
     224  	    next_hi = hi;
     225  	    hi = mid - 1;
     226  	  }
     227  	else
     228  	  {
     229  	    next_lo = lo;
     230  	    next_hi = mid - 1;
     231  	    lo = mid;
     232  	  }
     233  
     234  	if (next_hi - next_lo < THRESHOLD)
     235  	  insertsort (array, next_lo, next_hi);
     236  	else
     237  	  {
     238  	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
     239  	      {
     240  		int size;
     241  
     242  		omp_set_lock (&lock);
     243  		size = size_int_pair_stack (&global_stack);
     244  		if (size < num_threads - 1 && size < STACK_SIZE)
     245  		  push_int_pair_stack (&global_stack, next_lo, next_hi);
     246  		else
     247  		  push_int_pair_stack (&local_stack, next_lo, next_hi);
     248  		omp_unset_lock (&lock);
     249  	      }
     250  	    else
     251  	      push_int_pair_stack (&local_stack, next_lo, next_hi);
     252  	  }
     253        }
     254      }
     255    omp_destroy_lock (&lock);
     256  }
     257  
     258  static void
     259  sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
     260  {
     261    int mid;
     262  
     263    if (hi - lo < THRESHOLD)
     264      {
     265        insertsort (array, lo, hi);
     266        return;
     267      }
     268  
     269    mid = partition (array, lo, hi);
     270  
     271    if (*busy >= num_threads)
     272      {
     273        sort2_1 (array, lo, mid - 1, num_threads, busy);
     274        sort2_1 (array, mid, hi, num_threads, busy);
     275        return;
     276      }
     277  
     278    #pragma omp atomic
     279      *busy += 1;
     280  
     281    #pragma omp parallel num_threads (2) \
     282  		       firstprivate (array, lo, hi, mid, num_threads, busy)
     283    {
     284      if (omp_get_thread_num () == 0)
     285        sort2_1 (array, lo, mid - 1, num_threads, busy);
     286      else
     287        {
     288  	sort2_1 (array, mid, hi, num_threads, busy);
     289  	#pragma omp atomic
     290  	  *busy -= 1;
     291        }
     292    }
     293  }
     294  
     295  static void
     296  sort2 (int *array, int count)
     297  {
     298    int num_threads;
     299    int busy = 1;
     300  
     301    #pragma omp parallel
     302      #pragma omp single nowait
     303        num_threads = omp_get_num_threads ();
     304  
     305    sort2_1 (array, 0, count - 1, num_threads, &busy);
     306  }
     307  
     308  #if _OPENMP >= 200805
     309  static void
     310  sort3_1 (int *array, int lo, int hi)
     311  {
     312    int mid;
     313  
     314    if (hi - lo < THRESHOLD)
     315      {
     316        insertsort (array, lo, hi);
     317        return;
     318      }
     319  
     320    mid = partition (array, lo, hi);
     321    #pragma omp task
     322      sort3_1 (array, lo, mid - 1);
     323    sort3_1 (array, mid, hi);
     324  }
     325  
     326  static void
     327  sort3 (int *array, int count)
     328  {
     329    #pragma omp parallel
     330      #pragma omp single
     331        sort3_1 (array, 0, count - 1);
     332  }
     333  #endif
     334  
     335  int
     336  main (int argc, char **argv)
     337  {
     338    int i, count = 1000000;
     339    double stime;
     340    int *unsorted, *sorted, num_threads;
     341    if (argc >= 2)
     342      count = strtoul (argv[1], NULL, 0);
     343  
     344    unsorted = malloc (count * sizeof (int));
     345    sorted = malloc (count * sizeof (int));
     346    if (unsorted == NULL || sorted == NULL)
     347      {
     348        puts ("allocation failure");
     349        exit (1);
     350      }
     351  
     352    srand (0xdeadbeef);
     353    for (i = 0; i < count; i++)
     354      unsorted[i] = rand ();
     355  
     356    omp_set_nested (1);
     357    omp_set_dynamic (0);
     358    #pragma omp parallel
     359      #pragma omp single nowait
     360        num_threads = omp_get_num_threads ();
     361    printf ("Threads: %d\n", num_threads);
     362  
     363    memcpy (sorted, unsorted, count * sizeof (int));
     364    stime = omp_get_wtime ();
     365    sort1 (sorted, count);
     366    verify ("sort1", stime, sorted, count);
     367  
     368    memcpy (sorted, unsorted, count * sizeof (int));
     369    stime = omp_get_wtime ();
     370    sort2 (sorted, count);
     371    verify ("sort2", stime, sorted, count);
     372  
     373  #if _OPENMP >= 200805
     374    memcpy (sorted, unsorted, count * sizeof (int));
     375    stime = omp_get_wtime ();
     376    sort3 (sorted, count);
     377    verify ("sort3", stime, sorted, count);
     378  #endif
     379  
     380    return 0;
     381  }