(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.c/
affinity-1.c
       1  /* Affinity tests.
       2     Copyright (C) 2013-2023 Free Software Foundation, Inc.
       3  
       4     GCC is free software; you can redistribute it and/or modify it under
       5     the terms of the GNU General Public License as published by the Free
       6     Software Foundation; either version 3, or (at your option) any later
       7     version.
       8  
       9     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      10     WARRANTY; without even the implied warranty of MERCHANTABILITY or
      11     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      12     for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with GCC; see the file COPYING3.  If not see
      16     <http://www.gnu.org/licenses/>.  */
      17  
      18  /* { dg-do run } */
      19  /* { dg-set-target-env-var OMP_PROC_BIND "false" } */
      20  /* { dg-additional-options "-Wno-deprecated-declarations" } */
      21  /* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl -Wno-deprecated-declarations" { target *-*-linux* } } */
      22  
      23  #ifndef _GNU_SOURCE
      24  #define _GNU_SOURCE
      25  #endif
      26  #include "config.h"
      27  #include <omp.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  #include <unistd.h>
      32  
      33  #ifdef DO_FORK
      34  #include <signal.h>
      35  #include <sys/wait.h>
      36  #endif
      37  #ifdef HAVE_PTHREAD_AFFINITY_NP
      38  #include <sched.h>
      39  #include <pthread.h>
      40  #ifdef INTERPOSE_GETAFFINITY
      41  #include <dlfcn.h>
      42  #endif
      43  #endif
      44  
      45  struct place
      46  {
      47    int start, len;
      48  };
      49  struct places
      50  {
      51    const char *name;
      52    int count;
      53    struct place places[8];
      54  } places_array[] = {
      55    { "", 1, { { -1, -1 } } },
      56    { "{0}:8", 8,
      57      { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
      58        { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
      59    { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
      60    { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
      61    { "{1}:7:1", 7,
      62      { { 1, 1 }, { 2, 1 }, { 3, 1 },
      63        { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
      64    { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
      65      { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
      66    { "1,2,{2,3,!2},3,3,!3,!{5:3:-1,!4,!5},{4},5,!4,!5,"
      67      "1:2,!{1},!2,7:3:-2,!{5},!7,!3", 3,
      68      { { 1, 1 }, { 2, 1 }, { 3, 1 } } }
      69  };
      70  
      71  unsigned long contig_cpucount;
      72  unsigned long min_cpusetsize;
      73  
      74  #if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
      75      && defined (CPU_ALLOC_SIZE)
      76  
      77  #if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
      78  int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);
      79  
      80  int
      81  pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
      82  {
      83    int ret;
      84    unsigned long i, max;
      85    if (orig_getaffinity_np == NULL)
      86      {
      87        orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
      88  			    dlsym (RTLD_NEXT, "pthread_getaffinity_np");
      89        if (orig_getaffinity_np == NULL)
      90  	exit (0);
      91      }
      92    ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
      93    if (ret != 0)
      94      return ret;
      95    if (contig_cpucount == 0)
      96      {
      97        max = 8 * cpusetsize;
      98        for (i = 0; i < max; i++)
      99  	if (!CPU_ISSET_S (i, cpusetsize, cpuset))
     100  	  break;
     101        contig_cpucount = i;
     102        min_cpusetsize = cpusetsize;
     103      }
     104    return ret;
     105  }
     106  #endif
     107  
     108  void
     109  print_affinity (struct place p)
     110  {
     111    static unsigned long size;
     112    if (size == 0)
     113      {
     114        if (min_cpusetsize)
     115  	size = min_cpusetsize;
     116        else
     117  	{
     118  	  size = sysconf (_SC_NPROCESSORS_CONF);
     119  	  size = CPU_ALLOC_SIZE (size);
     120  	  if (size < sizeof (cpu_set_t))
     121  	    size = sizeof (cpu_set_t);
     122  	}
     123      }
     124    cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size);
     125    if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
     126      {
     127        unsigned long i, len, max = 8 * size;
     128        int notfirst = 0, unexpected = 1;
     129  
     130        printf (" bound to {");
     131        for (i = 0, len = 0; i < max; i++)
     132  	if (CPU_ISSET_S (i, size, cpusetp))
     133  	  {
     134  	    if (len == 0)
     135  	      {
     136  		if (notfirst)
     137  		  {
     138  		    unexpected = 1;
     139  		    printf (",");
     140  		  }
     141  		else if (i == (unsigned long) p.start)
     142  		  unexpected = 0;
     143  		notfirst = 1;
     144  		printf ("%lu", i);
     145  	      }
     146  	    ++len;
     147  	  }
     148  	else
     149  	  {
     150  	    if (len && len != (unsigned long) p.len)
     151  	      unexpected = 1;
     152  	    if (len > 1)
     153  	      printf (":%lu", len);
     154  	    len = 0;
     155  	  }
     156        if (len && len != (unsigned long) p.len)
     157  	unexpected = 1;
     158        if (len > 1)
     159  	printf (":%lu", len);
     160        printf ("}");
     161        if (p.start != -1 && unexpected)
     162  	{
     163  	  printf (", expected {%d", p.start);
     164  	  if (p.len != 1)
     165  	    printf (":%d", p.len);
     166  	  printf ("} instead");
     167  	}
     168        else if (p.start != -1)
     169  	printf (", verified");
     170      }
     171  }
     172  #else
     173  void
     174  print_affinity (struct place p)
     175  {
     176    (void) p.start;
     177    (void) p.len;
     178  }
     179  #endif
     180  
     181  
     182  int
     183  main ()
     184  {
     185    char *env_proc_bind = getenv ("OMP_PROC_BIND");
     186    int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
     187    int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
     188    int test_spread_master_close
     189      = (env_proc_bind
     190         && (strcmp (env_proc_bind, "spread,master,close") == 0
     191  	   || strcmp (env_proc_bind, "spread,primary,close") == 0));
     192    char *env_places = getenv ("OMP_PLACES");
     193    int test_places = 0;
     194  
     195    if (omp_proc_bind_master != omp_proc_bind_primary)
     196      abort ();
     197  
     198  #ifdef DO_FORK
     199    if (env_places == NULL && contig_cpucount >= 8 && test_false
     200        && getenv ("GOMP_AFFINITY") == NULL)
     201      {
     202        int i, j, status;
     203        pid_t pid;
     204        for (j = 0; j < 3; j++)
     205  	{
     206  	  if (setenv ("OMP_PROC_BIND",
     207  		      j > 1 ? "spread,primary,close"
     208  			    : (j ? "spread,master,close" : "true"), 1) < 0)
     209  	    break;
     210  	  for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
     211  	       i; --i)
     212  	    {
     213  	      if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
     214  		break;
     215  	      pid = fork ();
     216  	      if (pid == -1)
     217  		break;
     218  	      if (pid == 0)
     219  		{
     220  		  execl ("/proc/self/exe", "affinity-1.exe", NULL);
     221  		  _exit (1);
     222  		}
     223  	      if (waitpid (pid, &status, 0) < 0)
     224  		break;
     225  	      if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
     226  		abort ();
     227  	      else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
     228  		break;
     229  	    }
     230  	  if (i)
     231  	    break;
     232  	}
     233      }
     234  #endif
     235  
     236    int first = 1;
     237    if (env_proc_bind)
     238      {
     239        printf ("OMP_PROC_BIND='%s'", env_proc_bind);
     240        first = 0;
     241      }
     242    if (env_places)
     243      printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
     244    printf ("\n");
     245  
     246    if (env_places && contig_cpucount >= 8
     247        && (test_true || test_spread_master_close))
     248      {
     249        for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
     250  	   test_places; --test_places)
     251  	if (strcmp (env_places, places_array[test_places].name) == 0)
     252  	  break;
     253      }
     254  
     255  #define verify(if_true, if_s_m_c) \
     256    if (test_false && omp_get_proc_bind () != omp_proc_bind_false)	\
     257      abort ();								\
     258    if (test_true && omp_get_proc_bind () != if_true)			\
     259      abort ();								\
     260    if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c)	\
     261      abort ();
     262  
     263    verify (omp_proc_bind_true, omp_proc_bind_spread);
     264  
     265    printf ("Initial thread");
     266    print_affinity (places_array[test_places].places[0]);
     267    printf ("\n");
     268    omp_set_nested (1);
     269    omp_set_dynamic (0);
     270  
     271    #pragma omp parallel if (0)
     272    {
     273      verify (omp_proc_bind_true, omp_proc_bind_master);
     274      #pragma omp parallel if (0)
     275      {
     276        verify (omp_proc_bind_true, omp_proc_bind_close);
     277        #pragma omp parallel if (0)
     278        {
     279  	verify (omp_proc_bind_true, omp_proc_bind_close);
     280        }
     281        #pragma omp parallel if (0) proc_bind (spread)
     282        {
     283  	verify (omp_proc_bind_spread, omp_proc_bind_spread);
     284        }
     285      }
     286      #pragma omp parallel if (0) proc_bind (master)
     287      {
     288        verify (omp_proc_bind_master, omp_proc_bind_close);
     289        #pragma omp parallel if (0)
     290        {
     291  	verify (omp_proc_bind_master, omp_proc_bind_close);
     292        }
     293        #pragma omp parallel if (0) proc_bind (spread)
     294        {
     295  	verify (omp_proc_bind_spread, omp_proc_bind_spread);
     296        }
     297      }
     298    }
     299  
     300    /* True/spread */
     301    #pragma omp parallel num_threads (4)
     302    {
     303      verify (omp_proc_bind_true, omp_proc_bind_master);
     304      #pragma omp critical
     305      {
     306        struct place p = places_array[0].places[0];
     307        int thr = omp_get_thread_num ();
     308        printf ("#1 thread %d", thr);
     309        if (omp_get_num_threads () == 4 && test_spread_master_close)
     310  	switch (places_array[test_places].count)
     311  	  {
     312  	  case 8:
     313  	    /* T = 4, P = 8, each subpartition has 2 places.  */
     314  	  case 7:
     315  	    /* T = 4, P = 7, each subpartition has 2 places, but
     316  	       last partition, which has just one place.  */
     317  	    p = places_array[test_places].places[2 * thr];
     318  	    break;
     319  	  case 5:
     320  	    /* T = 4, P = 5, first subpartition has 2 places, the
     321  	       rest just one.  */
     322  	    p = places_array[test_places].places[thr ? 1 + thr : 0];
     323  	    break;
     324  	  case 3:
     325  	    /* T = 4, P = 3, unit sized subpartitions, first gets
     326  	       thr0 and thr3, second thr1, third thr2.  */
     327  	    p = places_array[test_places].places[thr == 3 ? 0 : thr];
     328  	    break;
     329  	  case 2:
     330  	    /* T = 4, P = 2, unit sized subpartitions, each with
     331  	       2 threads.  */
     332  	    p = places_array[test_places].places[thr / 2];
     333  	    break;
     334  	  }
     335        print_affinity (p);
     336        printf ("\n");
     337      }
     338      #pragma omp barrier
     339      if (omp_get_thread_num () == 3)
     340        {
     341  	/* True/spread, true/master.  */
     342  	#pragma omp parallel num_threads (3)
     343  	{
     344  	  verify (omp_proc_bind_true, omp_proc_bind_close);
     345  	  #pragma omp critical
     346  	  {
     347  	    struct place p = places_array[0].places[0];
     348  	    int thr = omp_get_thread_num ();
     349  	    printf ("#1,#1 thread 3,%d", thr);
     350  	    if (omp_get_num_threads () == 3 && test_spread_master_close)
     351  	      /* Outer is spread, inner master, so just bind to the
     352  		 place or the master thread, which is thr 3 above.  */
     353  	      switch (places_array[test_places].count)
     354  		{
     355  		case 8:
     356  		case 7:
     357  		  p = places_array[test_places].places[6];
     358  		  break;
     359  		case 5:
     360  		  p = places_array[test_places].places[4];
     361  		  break;
     362  		case 3:
     363  		  p = places_array[test_places].places[0];
     364  		  break;
     365  		case 2:
     366  		  p = places_array[test_places].places[1];
     367  		  break;
     368  		}
     369  	    print_affinity (p);
     370  	    printf ("\n");
     371  	  }
     372  	}
     373  	/* True/spread, spread.  */
     374  	#pragma omp parallel num_threads (5) proc_bind (spread)
     375  	{
     376  	  verify (omp_proc_bind_spread, omp_proc_bind_close);
     377  	  #pragma omp critical
     378  	  {
     379  	    struct place p = places_array[0].places[0];
     380  	    int thr = omp_get_thread_num ();
     381  	    printf ("#1,#2 thread 3,%d", thr);
     382  	    if (omp_get_num_threads () == 5 && test_spread_master_close)
     383  	      /* Outer is spread, inner spread.  */
     384  	      switch (places_array[test_places].count)
     385  		{
     386  		case 8:
     387  		  /* T = 5, P = 2, unit sized subpartitions.  */
     388  		  p = places_array[test_places].places[thr == 4 ? 6
     389  						       : 6 + thr / 2];
     390  		  break;
     391  		/* The rest are T = 5, P = 1.  */
     392  		case 7:
     393  		  p = places_array[test_places].places[6];
     394  		  break;
     395  		case 5:
     396  		  p = places_array[test_places].places[4];
     397  		  break;
     398  		case 3:
     399  		  p = places_array[test_places].places[0];
     400  		  break;
     401  		case 2:
     402  		  p = places_array[test_places].places[1];
     403  		  break;
     404  		}
     405  	    print_affinity (p);
     406  	    printf ("\n");
     407  	  }
     408  	  #pragma omp barrier
     409  	  if (omp_get_thread_num () == 3)
     410  	    {
     411  	      /* True/spread, spread, close.  */
     412  	      #pragma omp parallel num_threads (5) proc_bind (close)
     413  	      {
     414  		verify (omp_proc_bind_close, omp_proc_bind_close);
     415  		#pragma omp critical
     416  		{
     417  		  struct place p = places_array[0].places[0];
     418  		  int thr = omp_get_thread_num ();
     419  		  printf ("#1,#2,#1 thread 3,3,%d", thr);
     420  		  if (omp_get_num_threads () == 5 && test_spread_master_close)
     421  		    /* Outer is spread, inner spread, innermost close.  */
     422  		    switch (places_array[test_places].count)
     423  		      {
     424  		      /* All are T = 5, P = 1.  */
     425  		      case 8:
     426  			p = places_array[test_places].places[7];
     427  			break;
     428  		      case 7:
     429  			p = places_array[test_places].places[6];
     430  			break;
     431  		      case 5:
     432  			p = places_array[test_places].places[4];
     433  			break;
     434  		      case 3:
     435  			p = places_array[test_places].places[0];
     436  			break;
     437  		      case 2:
     438  			p = places_array[test_places].places[1];
     439  			break;
     440  		      }
     441  		  print_affinity (p);
     442  		  printf ("\n");
     443  		}
     444  	      }
     445  	    }
     446  	}
     447  	/* True/spread, master.  */
     448  	#pragma omp parallel num_threads (4) proc_bind(master)
     449  	{
     450  	  verify (omp_proc_bind_master, omp_proc_bind_close);
     451  	  #pragma omp critical
     452  	  {
     453  	    struct place p = places_array[0].places[0];
     454  	    int thr = omp_get_thread_num ();
     455  	    printf ("#1,#3 thread 3,%d", thr);
     456  	    if (omp_get_num_threads () == 4 && test_spread_master_close)
     457  	      /* Outer is spread, inner master, so just bind to the
     458  		 place or the master thread, which is thr 3 above.  */
     459  	      switch (places_array[test_places].count)
     460  		{
     461  		case 8:
     462  		case 7:
     463  		  p = places_array[test_places].places[6];
     464  		  break;
     465  		case 5:
     466  		  p = places_array[test_places].places[4];
     467  		  break;
     468  		case 3:
     469  		  p = places_array[test_places].places[0];
     470  		  break;
     471  		case 2:
     472  		  p = places_array[test_places].places[1];
     473  		  break;
     474  		}
     475  	    print_affinity (p);
     476  	    printf ("\n");
     477  	  }
     478  	}
     479  	/* True/spread, close.  */
     480  	#pragma omp parallel num_threads (6) proc_bind (close)
     481  	{
     482  	  verify (omp_proc_bind_close, omp_proc_bind_close);
     483  	  #pragma omp critical
     484  	  {
     485  	    struct place p = places_array[0].places[0];
     486  	    int thr = omp_get_thread_num ();
     487  	    printf ("#1,#4 thread 3,%d", thr);
     488  	    if (omp_get_num_threads () == 6 && test_spread_master_close)
     489  	      /* Outer is spread, inner close.  */
     490  	      switch (places_array[test_places].count)
     491  		{
     492  		case 8:
     493  		  /* T = 6, P = 2, unit sized subpartitions.  */
     494  		  p = places_array[test_places].places[6 + thr / 3];
     495  		  break;
     496  		/* The rest are T = 6, P = 1.  */
     497  		case 7:
     498  		  p = places_array[test_places].places[6];
     499  		  break;
     500  		case 5:
     501  		  p = places_array[test_places].places[4];
     502  		  break;
     503  		case 3:
     504  		  p = places_array[test_places].places[0];
     505  		  break;
     506  		case 2:
     507  		  p = places_array[test_places].places[1];
     508  		  break;
     509  		}
     510  	    print_affinity (p);
     511  	    printf ("\n");
     512  	  }
     513  	}
     514        }
     515    }
     516  
     517    /* Spread.  */
     518    #pragma omp parallel num_threads (5) proc_bind(spread)
     519    {
     520      verify (omp_proc_bind_spread, omp_proc_bind_master);
     521      #pragma omp critical
     522      {
     523        struct place p = places_array[0].places[0];
     524        int thr = omp_get_thread_num ();
     525        printf ("#2 thread %d", thr);
     526        if (omp_get_num_threads () == 5
     527  	  && (test_spread_master_close || test_true))
     528  	switch (places_array[test_places].count)
     529  	  {
     530  	  case 8:
     531  	    /* T = 5, P = 8, first 3 subpartitions have 2 places, last
     532  	       2 one place.  */
     533  	    p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
     534  	    break;
     535  	  case 7:
     536  	    /* T = 5, P = 7, first 2 subpartitions have 2 places, last
     537  	       3 one place.  */
     538  	    p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
     539  	    break;
     540  	  case 5:
     541  	    /* T = 5, P = 5, unit sized subpartitions, each one with one
     542  	       thread.  */
     543  	    p = places_array[test_places].places[thr];
     544  	    break;
     545  	  case 3:
     546  	    /* T = 5, P = 3, unit sized subpartitions, first gets
     547  	       thr0 and thr3, second thr1 and thr4, third thr2.  */
     548  	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
     549  	    break;
     550  	  case 2:
     551  	    /* T = 5, P = 2, unit sized subpartitions, first with
     552  	       thr{0,1,4} and second with thr{2,3}.  */
     553  	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
     554  	    break;
     555  	  }
     556        print_affinity (p);
     557        printf ("\n");
     558      }
     559      #pragma omp barrier
     560      if (omp_get_thread_num () == 3)
     561        {
     562  	int pp = 0;
     563  	switch (places_array[test_places].count)
     564  	  {
     565  	  case 8: pp = 6; break;
     566  	  case 7: pp = 5; break;
     567  	  case 5: pp = 3; break;
     568  	  case 2: pp = 1; break;
     569  	  }
     570  	/* Spread, spread/master.  */
     571  	#pragma omp parallel num_threads (3) firstprivate (pp)
     572  	{
     573  	  verify (omp_proc_bind_spread, omp_proc_bind_close);
     574  	  #pragma omp critical
     575  	  {
     576  	    struct place p = places_array[0].places[0];
     577  	    int thr = omp_get_thread_num ();
     578  	    printf ("#2,#1 thread 3,%d", thr);
     579  	    if (test_spread_master_close || test_true)
     580  	      /* Outer is spread, inner spread resp. master, bit we have
     581  		 just unit sized partitions.  */
     582  	      p = places_array[test_places].places[pp];
     583  	    print_affinity (p);
     584  	    printf ("\n");
     585  	  }
     586  	}
     587  	/* Spread, spread.  */
     588  	#pragma omp parallel num_threads (5) proc_bind (spread) \
     589  			     firstprivate (pp)
     590  	{
     591  	  verify (omp_proc_bind_spread, omp_proc_bind_close);
     592  	  #pragma omp critical
     593  	  {
     594  	    struct place p = places_array[0].places[0];
     595  	    int thr = omp_get_thread_num ();
     596  	    printf ("#2,#2 thread 3,%d", thr);
     597  	    if (test_spread_master_close || test_true)
     598  	      /* Outer is spread, inner spread, bit we have
     599  		 just unit sized partitions.  */
     600  	      p = places_array[test_places].places[pp];
     601  	    print_affinity (p);
     602  	    printf ("\n");
     603  	  }
     604  	}
     605  	/* Spread, master.  */
     606  	#pragma omp parallel num_threads (4) proc_bind(master) \
     607  			     firstprivate(pp)
     608  	{
     609  	  verify (omp_proc_bind_master, omp_proc_bind_close);
     610  	  #pragma omp critical
     611  	  {
     612  	    struct place p = places_array[0].places[0];
     613  	    int thr = omp_get_thread_num ();
     614  	    printf ("#2,#3 thread 3,%d", thr);
     615  	    if (test_spread_master_close || test_true)
     616  	      /* Outer is spread, inner master, bit we have
     617  		 just unit sized partitions.  */
     618  	      p = places_array[test_places].places[pp];
     619  	    print_affinity (p);
     620  	    printf ("\n");
     621  	  }
     622  	}
     623  	/* Spread, close.  */
     624  	#pragma omp parallel num_threads (6) proc_bind (close) \
     625  			     firstprivate (pp)
     626  	{
     627  	  verify (omp_proc_bind_close, omp_proc_bind_close);
     628  	  #pragma omp critical
     629  	  {
     630  	    struct place p = places_array[0].places[0];
     631  	    int thr = omp_get_thread_num ();
     632  	    printf ("#2,#4 thread 3,%d", thr);
     633  	    if (test_spread_master_close || test_true)
     634  	      /* Outer is spread, inner close, bit we have
     635  		 just unit sized partitions.  */
     636  	      p = places_array[test_places].places[pp];
     637  	    print_affinity (p);
     638  	    printf ("\n");
     639  	  }
     640  	}
     641        }
     642    }
     643  
     644    /* Master.  */
     645    #pragma omp parallel num_threads (3) proc_bind(master)
     646    {
     647      verify (omp_proc_bind_master, omp_proc_bind_master);
     648      #pragma omp critical
     649      {
     650        struct place p = places_array[0].places[0];
     651        int thr = omp_get_thread_num ();
     652        printf ("#3 thread %d", thr);
     653        if (test_spread_master_close || test_true)
     654  	p = places_array[test_places].places[0];
     655        print_affinity (p);
     656        printf ("\n");
     657      }
     658      #pragma omp barrier
     659      if (omp_get_thread_num () == 2)
     660        {
     661  	/* Master, master.  */
     662  	#pragma omp parallel num_threads (4)
     663  	{
     664  	  verify (omp_proc_bind_master, omp_proc_bind_close);
     665  	  #pragma omp critical
     666  	  {
     667  	    struct place p = places_array[0].places[0];
     668  	    int thr = omp_get_thread_num ();
     669  	    printf ("#3,#1 thread 2,%d", thr);
     670  	    if (test_spread_master_close || test_true)
     671  	      /* Outer is master, inner is master.  */
     672  	      p = places_array[test_places].places[0];
     673  	    print_affinity (p);
     674  	    printf ("\n");
     675  	  }
     676  	}
     677  	/* Master, spread.  */
     678  	#pragma omp parallel num_threads (4) proc_bind (spread)
     679  	{
     680  	  verify (omp_proc_bind_spread, omp_proc_bind_close);
     681  	  #pragma omp critical
     682  	  {
     683  	    struct place p = places_array[0].places[0];
     684  	    int thr = omp_get_thread_num ();
     685  	    printf ("#3,#2 thread 2,%d", thr);
     686  	    if (omp_get_num_threads () == 4
     687  		&& (test_spread_master_close || test_true))
     688  	      /* Outer is master, inner is spread.  */
     689  	      switch (places_array[test_places].count)
     690  		{
     691  		case 8:
     692  		  /* T = 4, P = 8, each subpartition has 2 places.  */
     693  		case 7:
     694  		  /* T = 4, P = 7, each subpartition has 2 places, but
     695  		     last partition, which has just one place.  */
     696  		  p = places_array[test_places].places[2 * thr];
     697  		  break;
     698  		case 5:
     699  		  /* T = 4, P = 5, first subpartition has 2 places, the
     700  		     rest just one.  */
     701  		  p = places_array[test_places].places[thr ? 1 + thr : 0];
     702  		  break;
     703  		case 3:
     704  		  /* T = 4, P = 3, unit sized subpartitions, first gets
     705  		     thr0 and thr3, second thr1, third thr2.  */
     706  		  p = places_array[test_places].places[thr == 3 ? 0 : thr];
     707  		  break;
     708  		case 2:
     709  		  /* T = 4, P = 2, unit sized subpartitions, each with
     710  		     2 threads.  */
     711  		  p = places_array[test_places].places[thr / 2];
     712  		  break;
     713  		}
     714  	    print_affinity (p);
     715  	    printf ("\n");
     716  	  }
     717  	  #pragma omp barrier
     718  	  if (omp_get_thread_num () == 0)
     719  	    {
     720  	      /* Master, spread, close.  */
     721  	      #pragma omp parallel num_threads (5) proc_bind (close)
     722  	      {
     723  		verify (omp_proc_bind_close, omp_proc_bind_close);
     724  		#pragma omp critical
     725  		{
     726  		  struct place p = places_array[0].places[0];
     727  		  int thr = omp_get_thread_num ();
     728  		  printf ("#3,#2,#1 thread 2,0,%d", thr);
     729  		  if (omp_get_num_threads () == 5
     730  		      && (test_spread_master_close || test_true))
     731  		    /* Outer is master, inner spread, innermost close.  */
     732  		    switch (places_array[test_places].count)
     733  		      {
     734  		      /* First 3 are T = 5, P = 2.  */
     735  		      case 8:
     736  		      case 7:
     737  		      case 5:
     738  			p = places_array[test_places].places[(thr & 2) / 2];
     739  			break;
     740  		      /* All the rest are T = 5, P = 1.  */
     741  		      case 3:
     742  		      case 2:
     743  			p = places_array[test_places].places[0];
     744  			break;
     745  		      }
     746  		  print_affinity (p);
     747  		  printf ("\n");
     748  		}
     749  	      }
     750  	    }
     751  	  #pragma omp barrier
     752  	  if (omp_get_thread_num () == 3)
     753  	    {
     754  	      /* Master, spread, close.  */
     755  	      #pragma omp parallel num_threads (5) proc_bind (close)
     756  	      {
     757  		verify (omp_proc_bind_close, omp_proc_bind_close);
     758  		#pragma omp critical
     759  		{
     760  		  struct place p = places_array[0].places[0];
     761  		  int thr = omp_get_thread_num ();
     762  		  printf ("#3,#2,#2 thread 2,3,%d", thr);
     763  		  if (omp_get_num_threads () == 5
     764  		      && (test_spread_master_close || test_true))
     765  		    /* Outer is master, inner spread, innermost close.  */
     766  		    switch (places_array[test_places].count)
     767  		      {
     768  		      case 8:
     769  			/* T = 5, P = 2.  */
     770  			p = places_array[test_places].places[6
     771  							     + (thr & 2) / 2];
     772  			break;
     773  		      /* All the rest are T = 5, P = 1.  */
     774  		      case 7:
     775  			p = places_array[test_places].places[6];
     776  			break;
     777  		      case 5:
     778  			p = places_array[test_places].places[4];
     779  			break;
     780  		      case 3:
     781  			p = places_array[test_places].places[0];
     782  			break;
     783  		      case 2:
     784  			p = places_array[test_places].places[1];
     785  			break;
     786  		      }
     787  		  print_affinity (p);
     788  		  printf ("\n");
     789  		}
     790  	      }
     791  	    }
     792  	}
     793  	/* Master, master.  */
     794  	#pragma omp parallel num_threads (4) proc_bind(master)
     795  	{
     796  	  verify (omp_proc_bind_master, omp_proc_bind_close);
     797  	  #pragma omp critical
     798  	  {
     799  	    struct place p = places_array[0].places[0];
     800  	    int thr = omp_get_thread_num ();
     801  	    printf ("#3,#3 thread 2,%d", thr);
     802  	    if (test_spread_master_close || test_true)
     803  	      /* Outer is master, inner master.  */
     804  	      p = places_array[test_places].places[0];
     805  	    print_affinity (p);
     806  	    printf ("\n");
     807  	  }
     808  	}
     809  	/* Master, close.  */
     810  	#pragma omp parallel num_threads (6) proc_bind (close)
     811  	{
     812  	  verify (omp_proc_bind_close, omp_proc_bind_close);
     813  	  #pragma omp critical
     814  	  {
     815  	    struct place p = places_array[0].places[0];
     816  	    int thr = omp_get_thread_num ();
     817  	    printf ("#3,#4 thread 2,%d", thr);
     818  	    if (omp_get_num_threads () == 6
     819  		&& (test_spread_master_close || test_true))
     820  	      switch (places_array[test_places].count)
     821  		{
     822  		case 8:
     823  		  /* T = 6, P = 8.  */
     824  		case 7:
     825  		  /* T = 6, P = 7.  */
     826  		  p = places_array[test_places].places[thr];
     827  		  break;
     828  		case 5:
     829  		  /* T = 6, P = 5.  thr{0,5} go into the first place.  */
     830  		  p = places_array[test_places].places[thr == 5 ? 0 : thr];
     831  		  break;
     832  		case 3:
     833  		  /* T = 6, P = 3, two threads into each place.  */
     834  		  p = places_array[test_places].places[thr / 2];
     835  		  break;
     836  		case 2:
     837  		  /* T = 6, P = 2, 3 threads into each place.  */
     838  		  p = places_array[test_places].places[thr / 3];
     839  		  break;
     840  		}
     841  	    print_affinity (p);
     842  	    printf ("\n");
     843  	  }
     844  	}
     845        }
     846    }
     847  
     848    #pragma omp parallel num_threads (5) proc_bind(close)
     849    {
     850      verify (omp_proc_bind_close, omp_proc_bind_master);
     851      #pragma omp critical
     852      {
     853        struct place p = places_array[0].places[0];
     854        int thr = omp_get_thread_num ();
     855        printf ("#4 thread %d", thr);
     856        if (omp_get_num_threads () == 5
     857  	  && (test_spread_master_close || test_true))
     858  	switch (places_array[test_places].count)
     859  	  {
     860  	  case 8:
     861  	    /* T = 5, P = 8.  */
     862  	  case 7:
     863  	    /* T = 5, P = 7.  */
     864  	  case 5:
     865  	    /* T = 5, P = 5.  */
     866  	    p = places_array[test_places].places[thr];
     867  	    break;
     868  	  case 3:
     869  	    /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
     870  	       thr2 in third.  */
     871  	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
     872  	    break;
     873  	  case 2:
     874  	    /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second.  */
     875  	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
     876  	    break;
     877  	  }
     878        print_affinity (p);
     879        printf ("\n");
     880      }
     881      #pragma omp barrier
     882      if (omp_get_thread_num () == 2)
     883        {
     884  	int pp = 0;
     885  	switch (places_array[test_places].count)
     886  	  {
     887  	  case 8:
     888  	  case 7:
     889  	  case 5:
     890  	  case 3:
     891  	    pp = 2;
     892  	    break;
     893  	  case 2:
     894  	    pp = 1;
     895  	    break;
     896  	  }
     897  	/* Close, close/master.  */
     898  	#pragma omp parallel num_threads (4) firstprivate (pp)
     899  	{
     900  	  verify (omp_proc_bind_close, omp_proc_bind_close);
     901  	  #pragma omp critical
     902  	  {
     903  	    struct place p = places_array[0].places[0];
     904  	    int thr = omp_get_thread_num ();
     905  	    printf ("#4,#1 thread 2,%d", thr);
     906  	    if (test_spread_master_close)
     907  	      /* Outer is close, inner is master.  */
     908  	      p = places_array[test_places].places[pp];
     909  	    else if (omp_get_num_threads () == 4 && test_true)
     910  	      /* Outer is close, inner is close.  */
     911  	      switch (places_array[test_places].count)
     912  		{
     913  		case 8:
     914  		  /* T = 4, P = 8.  */
     915  		case 7:
     916  		  /* T = 4, P = 7.  */
     917  		  p = places_array[test_places].places[2 + thr];
     918  		  break;
     919  		case 5:
     920  		  /* T = 4, P = 5.  There is wrap-around for thr3.  */
     921  		  p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
     922  		  break;
     923  		case 3:
     924  		  /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
     925  		     into p1.  */
     926  		  p = places_array[test_places].places[(2 + thr) % 3];
     927  		  break;
     928  		case 2:
     929  		  /* T = 4, P = 2, 2 threads into each place.  */
     930  		  p = places_array[test_places].places[1 - thr / 2];
     931  		  break;
     932  		}
     933  
     934  	    print_affinity (p);
     935  	    printf ("\n");
     936  	  }
     937  	}
     938  	/* Close, spread.  */
     939  	#pragma omp parallel num_threads (4) proc_bind (spread)
     940  	{
     941  	  verify (omp_proc_bind_spread, omp_proc_bind_close);
     942  	  #pragma omp critical
     943  	  {
     944  	    struct place p = places_array[0].places[0];
     945  	    int thr = omp_get_thread_num ();
     946  	    printf ("#4,#2 thread 2,%d", thr);
     947  	    if (omp_get_num_threads () == 4
     948  		&& (test_spread_master_close || test_true))
     949  	      /* Outer is close, inner is spread.  */
     950  	      switch (places_array[test_places].count)
     951  		{
     952  		case 8:
     953  		  /* T = 4, P = 8, each subpartition has 2 places.  */
     954  		case 7:
     955  		  /* T = 4, P = 7, each subpartition has 2 places, but
     956  		     last partition, which has just one place.  */
     957  		  p = places_array[test_places].places[thr == 3 ? 0
     958  						       : 2 + 2 * thr];
     959  		  break;
     960  		case 5:
     961  		  /* T = 4, P = 5, first subpartition has 2 places, the
     962  		     rest just one.  */
     963  		  p = places_array[test_places].places[thr == 3 ? 0
     964  						       : 2 + thr];
     965  		  break;
     966  		case 3:
     967  		  /* T = 4, P = 3, unit sized subpartitions, third gets
     968  		     thr0 and thr3, first thr1, second thr2.  */
     969  		  p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
     970  		  break;
     971  		case 2:
     972  		  /* T = 4, P = 2, unit sized subpartitions, each with
     973  		     2 threads.  */
     974  		  p = places_array[test_places].places[1 - thr / 2];
     975  		  break;
     976  		}
     977  	    print_affinity (p);
     978  	    printf ("\n");
     979  	  }
     980  	  #pragma omp barrier
     981  	  if (omp_get_thread_num () == 0)
     982  	    {
     983  	      /* Close, spread, close.  */
     984  	      #pragma omp parallel num_threads (5) proc_bind (close)
     985  	      {
     986  		verify (omp_proc_bind_close, omp_proc_bind_close);
     987  		#pragma omp critical
     988  		{
     989  		  struct place p = places_array[0].places[0];
     990  		  int thr = omp_get_thread_num ();
     991  		  printf ("#4,#2,#1 thread 2,0,%d", thr);
     992  		  if (omp_get_num_threads () == 5
     993  		      && (test_spread_master_close || test_true))
     994  		    /* Outer is close, inner spread, innermost close.  */
     995  		    switch (places_array[test_places].count)
     996  		      {
     997  		      case 8:
     998  		      case 7:
     999  			/* T = 5, P = 2.  */
    1000  			p = places_array[test_places].places[2
    1001  							     + (thr & 2) / 2];
    1002  			break;
    1003  		      /* All the rest are T = 5, P = 1.  */
    1004  		      case 5:
    1005  		      case 3:
    1006  			p = places_array[test_places].places[2];
    1007  			break;
    1008  		      case 2:
    1009  			p = places_array[test_places].places[1];
    1010  			break;
    1011  		      }
    1012  		  print_affinity (p);
    1013  		  printf ("\n");
    1014  		}
    1015  	      }
    1016  	    }
    1017  	  #pragma omp barrier
    1018  	  if (omp_get_thread_num () == 2)
    1019  	    {
    1020  	      /* Close, spread, close.  */
    1021  	      #pragma omp parallel num_threads (5) proc_bind (close)
    1022  	      {
    1023  		verify (omp_proc_bind_close, omp_proc_bind_close);
    1024  		#pragma omp critical
    1025  		{
    1026  		  struct place p = places_array[0].places[0];
    1027  		  int thr = omp_get_thread_num ();
    1028  		  printf ("#4,#2,#2 thread 2,2,%d", thr);
    1029  		  if (omp_get_num_threads () == 5
    1030  		      && (test_spread_master_close || test_true))
    1031  		    /* Outer is close, inner spread, innermost close.  */
    1032  		    switch (places_array[test_places].count)
    1033  		      {
    1034  		      case 8:
    1035  			/* T = 5, P = 2.  */
    1036  			p = places_array[test_places].places[6
    1037  							     + (thr & 2) / 2];
    1038  			break;
    1039  		      /* All the rest are T = 5, P = 1.  */
    1040  		      case 7:
    1041  			p = places_array[test_places].places[6];
    1042  			break;
    1043  		      case 5:
    1044  			p = places_array[test_places].places[4];
    1045  			break;
    1046  		      case 3:
    1047  			p = places_array[test_places].places[1];
    1048  			break;
    1049  		      case 2:
    1050  			p = places_array[test_places].places[0];
    1051  			break;
    1052  		      }
    1053  		  print_affinity (p);
    1054  		  printf ("\n");
    1055  		}
    1056  	      }
    1057  	    }
    1058  	  #pragma omp barrier
    1059  	  if (omp_get_thread_num () == 3)
    1060  	    {
    1061  	      /* Close, spread, close.  */
    1062  	      #pragma omp parallel num_threads (5) proc_bind (close)
    1063  	      {
    1064  		verify (omp_proc_bind_close, omp_proc_bind_close);
    1065  		#pragma omp critical
    1066  		{
    1067  		  struct place p = places_array[0].places[0];
    1068  		  int thr = omp_get_thread_num ();
    1069  		  printf ("#4,#2,#3 thread 2,3,%d", thr);
    1070  		  if (omp_get_num_threads () == 5
    1071  		      && (test_spread_master_close || test_true))
    1072  		    /* Outer is close, inner spread, innermost close.  */
    1073  		    switch (places_array[test_places].count)
    1074  		      {
    1075  		      case 8:
    1076  		      case 7:
    1077  		      case 5:
    1078  			/* T = 5, P = 2.  */
    1079  			p = places_array[test_places].places[(thr & 2) / 2];
    1080  			break;
    1081  		      /* All the rest are T = 5, P = 1.  */
    1082  		      case 3:
    1083  			p = places_array[test_places].places[2];
    1084  			break;
    1085  		      case 2:
    1086  			p = places_array[test_places].places[0];
    1087  			break;
    1088  		      }
    1089  		  print_affinity (p);
    1090  		  printf ("\n");
    1091  		}
    1092  	      }
    1093  	    }
    1094  	}
    1095  	/* Close, master.  */
    1096  	#pragma omp parallel num_threads (4) proc_bind(master) \
    1097  			     firstprivate (pp)
    1098  	{
    1099  	  verify (omp_proc_bind_master, omp_proc_bind_close);
    1100  	  #pragma omp critical
    1101  	  {
    1102  	    struct place p = places_array[0].places[0];
    1103  	    int thr = omp_get_thread_num ();
    1104  	    printf ("#4,#3 thread 2,%d", thr);
    1105  	    if (test_spread_master_close || test_true)
    1106  	      /* Outer is close, inner master.  */
    1107  	      p = places_array[test_places].places[pp];
    1108  	    print_affinity (p);
    1109  	    printf ("\n");
    1110  	  }
    1111  	}
    1112  	/* Close, close.  */
    1113  	#pragma omp parallel num_threads (6) proc_bind (close)
    1114  	{
    1115  	  verify (omp_proc_bind_close, omp_proc_bind_close);
    1116  	  #pragma omp critical
    1117  	  {
    1118  	    struct place p = places_array[0].places[0];
    1119  	    int thr = omp_get_thread_num ();
    1120  	    printf ("#4,#4 thread 2,%d", thr);
    1121  	    if (omp_get_num_threads () == 6
    1122  		&& (test_spread_master_close || test_true))
    1123  	      switch (places_array[test_places].count)
    1124  		{
    1125  		case 8:
    1126  		  /* T = 6, P = 8.  */
    1127  		  p = places_array[test_places].places[2 + thr];
    1128  		  break;
    1129  		case 7:
    1130  		  /* T = 6, P = 7.  */
    1131  		  p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
    1132  		  break;
    1133  		case 5:
    1134  		  /* T = 6, P = 5.  thr{0,5} go into the third place.  */
    1135  		  p = places_array[test_places].places[thr >= 3 ? thr - 3
    1136  						       : 2 + thr];
    1137  		  break;
    1138  		case 3:
    1139  		  /* T = 6, P = 3, two threads into each place.  */
    1140  		  p = places_array[test_places].places[thr < 2 ? 2
    1141  						       : thr / 2 - 1];
    1142  		  break;
    1143  		case 2:
    1144  		  /* T = 6, P = 2, 3 threads into each place.  */
    1145  		  p = places_array[test_places].places[1 - thr / 3];
    1146  		  break;
    1147  		}
    1148  	    print_affinity (p);
    1149  	    printf ("\n");
    1150  	  }
    1151  	}
    1152        }
    1153    }
    1154  
    1155    return 0;
    1156  }