1  /* Atomic operations.  X86 version.
       2     Copyright (C) 2018-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifndef _X86_ATOMIC_MACHINE_H
      20  #define _X86_ATOMIC_MACHINE_H 1
      21  
      22  #include <stdint.h>
      23  #include <tls.h>			/* For tcbhead_t.  */
      24  #include <libc-pointer-arith.h>		/* For cast_to_integer.  */
      25  
      26  #define LOCK_PREFIX "lock;"
      27  
      28  #define USE_ATOMIC_COMPILER_BUILTINS	1
      29  
      30  #ifdef __x86_64__
      31  # define __HAVE_64B_ATOMICS		1
      32  # define SP_REG				"rsp"
      33  # define SEG_REG			"fs"
      34  # define BR_CONSTRAINT			"q"
      35  # define IBR_CONSTRAINT			"iq"
      36  #else
      37  /* Since the Pentium, i386 CPUs have supported 64-bit atomics, but the
      38     i386 psABI supplement provides only 4-byte alignment for uint64_t
      39     inside structs, so it is currently not possible to use 64-bit
      40     atomics on this platform.  */
      41  # define __HAVE_64B_ATOMICS		0
      42  # define SP_REG				"esp"
      43  # define SEG_REG			"gs"
      44  # define BR_CONSTRAINT			"r"
      45  # define IBR_CONSTRAINT			"ir"
      46  #endif
      47  #define ATOMIC_EXCHANGE_USES_CAS	0
      48  
      49  #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
      50    __sync_val_compare_and_swap (mem, oldval, newval)
      51  #define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
      52    (! __sync_bool_compare_and_swap (mem, oldval, newval))
      53  
      54  
      55  #define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
      56    ({ __typeof (*mem) ret;						      \
      57       __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
      58  		       "je 0f\n\t"					      \
      59  		       "lock\n"						      \
      60  		       "0:\tcmpxchgb %b2, %1"				      \
      61  		       : "=a" (ret), "=m" (*mem)			      \
      62  		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
      63  			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
      64       ret; })
      65  
      66  #define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
      67    ({ __typeof (*mem) ret;						      \
      68       __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
      69  		       "je 0f\n\t"					      \
      70  		       "lock\n"						      \
      71  		       "0:\tcmpxchgw %w2, %1"				      \
      72  		       : "=a" (ret), "=m" (*mem)			      \
      73  		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
      74  			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
      75       ret; })
      76  
      77  #define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
      78    ({ __typeof (*mem) ret;						      \
      79       __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
      80  		       "je 0f\n\t"					      \
      81  		       "lock\n"						      \
      82  		       "0:\tcmpxchgl %2, %1"				      \
      83  		       : "=a" (ret), "=m" (*mem)			      \
      84  		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
      85  			 "i" (offsetof (tcbhead_t, multiple_threads)));       \
      86       ret; })
      87  
      88  #ifdef __x86_64__
      89  # define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
      90    ({ __typeof (*mem) ret;						      \
      91       __asm __volatile ("cmpl $0, %%fs:%P5\n\t"				      \
      92  		       "je 0f\n\t"					      \
      93  		       "lock\n"						      \
      94  		       "0:\tcmpxchgq %q2, %1"				      \
      95  		       : "=a" (ret), "=m" (*mem)			      \
      96  		       : "q" ((int64_t) cast_to_integer (newval)),	      \
      97  			 "m" (*mem),					      \
      98  			 "0" ((int64_t) cast_to_integer (oldval)),	      \
      99  			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
     100       ret; })
     101  # define do_exchange_and_add_val_64_acq(pfx, mem, value) 0
     102  # define do_add_val_64_acq(pfx, mem, value) do { } while (0)
     103  #else
     104  /* XXX We do not really need 64-bit compare-and-exchange.  At least
     105     not in the moment.  Using it would mean causing portability
     106     problems since not many other 32-bit architectures have support for
     107     such an operation.  So don't define any code for now.  If it is
     108     really going to be used the code below can be used on Intel Pentium
     109     and later, but NOT on i486.  */
     110  # define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
     111    ({ __typeof (*mem) ret = *(mem);					      \
     112       __atomic_link_error ();						      \
     113       ret = (newval);							      \
     114       ret = (oldval);							      \
     115       ret; })
     116  
     117  # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval)	      \
     118    ({ __typeof (*mem) ret = *(mem);					      \
     119       __atomic_link_error ();						      \
     120       ret = (newval);							      \
     121       ret = (oldval);							      \
     122       ret; })
     123  
     124  # define do_exchange_and_add_val_64_acq(pfx, mem, value) \
     125    ({ __typeof (value) __addval = (value);				      \
     126       __typeof (*mem) __result;						      \
     127       __typeof (mem) __memp = (mem);					      \
     128       __typeof (*mem) __tmpval;						      \
     129       __result = *__memp;						      \
     130       do									      \
     131         __tmpval = __result;						      \
     132       while ((__result = pfx##_compare_and_exchange_val_64_acq		      \
     133  	     (__memp, __result + __addval, __result)) == __tmpval);	      \
     134       __result; })
     135  
     136  # define do_add_val_64_acq(pfx, mem, value) \
     137    {									      \
     138      __typeof (value) __addval = (value);				      \
     139      __typeof (mem) __memp = (mem);					      \
     140      __typeof (*mem) __oldval = *__memp;					      \
     141      __typeof (*mem) __tmpval;						      \
     142      do									      \
     143        __tmpval = __oldval;						      \
     144      while ((__oldval = pfx##_compare_and_exchange_val_64_acq		      \
     145  	    (__memp, __oldval + __addval, __oldval)) == __tmpval);	      \
     146    }
     147  #endif
     148  
     149  
     150  /* Note that we need no lock prefix.  */
     151  #define atomic_exchange_acq(mem, newvalue) \
     152    ({ __typeof (*mem) result;						      \
     153       if (sizeof (*mem) == 1)						      \
     154         __asm __volatile ("xchgb %b0, %1"				      \
     155  			 : "=q" (result), "=m" (*mem)			      \
     156  			 : "0" (newvalue), "m" (*mem));			      \
     157       else if (sizeof (*mem) == 2)					      \
     158         __asm __volatile ("xchgw %w0, %1"				      \
     159  			 : "=r" (result), "=m" (*mem)			      \
     160  			 : "0" (newvalue), "m" (*mem));			      \
     161       else if (sizeof (*mem) == 4)					      \
     162         __asm __volatile ("xchgl %0, %1"					      \
     163  			 : "=r" (result), "=m" (*mem)			      \
     164  			 : "0" (newvalue), "m" (*mem));			      \
     165       else if (__HAVE_64B_ATOMICS)					      \
     166         __asm __volatile ("xchgq %q0, %1"				      \
     167  			 : "=r" (result), "=m" (*mem)			      \
     168  			 : "0" ((int64_t) cast_to_integer (newvalue)),        \
     169  			   "m" (*mem));					      \
     170       else								      \
     171         {								      \
     172  	 result = 0;							      \
     173  	 __atomic_link_error ();					      \
     174         }								      \
     175       result; })
     176  
     177  
     178  #define __arch_exchange_and_add_body(lock, pfx, mem, value) \
     179    ({ __typeof (*mem) __result;						      \
     180       __typeof (value) __addval = (value);				      \
     181       if (sizeof (*mem) == 1)						      \
     182         __asm __volatile (lock "xaddb %b0, %1"				      \
     183  			 : "=q" (__result), "=m" (*mem)			      \
     184  			 : "0" (__addval), "m" (*mem),			      \
     185  			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
     186       else if (sizeof (*mem) == 2)					      \
     187         __asm __volatile (lock "xaddw %w0, %1"				      \
     188  			 : "=r" (__result), "=m" (*mem)			      \
     189  			 : "0" (__addval), "m" (*mem),			      \
     190  			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
     191       else if (sizeof (*mem) == 4)					      \
     192         __asm __volatile (lock "xaddl %0, %1"				      \
     193  			 : "=r" (__result), "=m" (*mem)			      \
     194  			 : "0" (__addval), "m" (*mem),			      \
     195  			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
     196       else if (__HAVE_64B_ATOMICS)					      \
     197         __asm __volatile (lock "xaddq %q0, %1"				      \
     198  			 : "=r" (__result), "=m" (*mem)			      \
     199  			 : "0" ((int64_t) cast_to_integer (__addval)),     \
     200  			   "m" (*mem),					      \
     201  			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
     202       else								      \
     203         __result = do_exchange_and_add_val_64_acq (pfx, (mem), __addval);      \
     204       __result; })
     205  
     206  #define atomic_exchange_and_add(mem, value) \
     207    __sync_fetch_and_add (mem, value)
     208  
     209  #define __arch_exchange_and_add_cprefix \
     210    "cmpl $0, %%" SEG_REG ":%P4\n\tje 0f\n\tlock\n0:\t"
     211  
     212  #define catomic_exchange_and_add(mem, value) \
     213    __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c,    \
     214  				mem, value)
     215  
     216  
     217  #define __arch_add_body(lock, pfx, apfx, mem, value) \
     218    do {									      \
     219      if (__builtin_constant_p (value) && (value) == 1)			      \
     220        pfx##_increment (mem);						      \
     221      else if (__builtin_constant_p (value) && (value) == -1)		      \
     222        pfx##_decrement (mem);						      \
     223      else if (sizeof (*mem) == 1)					      \
     224        __asm __volatile (lock "addb %b1, %0"				      \
     225  			: "=m" (*mem)					      \
     226  			: IBR_CONSTRAINT (value), "m" (*mem),		      \
     227  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     228      else if (sizeof (*mem) == 2)					      \
     229        __asm __volatile (lock "addw %w1, %0"				      \
     230  			: "=m" (*mem)					      \
     231  			: "ir" (value), "m" (*mem),			      \
     232  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     233      else if (sizeof (*mem) == 4)					      \
     234        __asm __volatile (lock "addl %1, %0"				      \
     235  			: "=m" (*mem)					      \
     236  			: "ir" (value), "m" (*mem),			      \
     237  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     238      else if (__HAVE_64B_ATOMICS)					      \
     239        __asm __volatile (lock "addq %q1, %0"				      \
     240  			: "=m" (*mem)					      \
     241  			: "ir" ((int64_t) cast_to_integer (value)),	      \
     242  			  "m" (*mem),					      \
     243  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     244      else								      \
     245        do_add_val_64_acq (apfx, (mem), (value));				      \
     246    } while (0)
     247  
     248  # define atomic_add(mem, value) \
     249    __arch_add_body (LOCK_PREFIX, atomic, __arch, mem, value)
     250  
     251  #define __arch_add_cprefix \
     252    "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
     253  
     254  #define catomic_add(mem, value) \
     255    __arch_add_body (__arch_add_cprefix, atomic, __arch_c, mem, value)
     256  
     257  
     258  #define atomic_add_negative(mem, value) \
     259    ({ unsigned char __result;						      \
     260       if (sizeof (*mem) == 1)						      \
     261         __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1"		      \
     262  			 : "=m" (*mem), "=qm" (__result)		      \
     263  			 : IBR_CONSTRAINT (value), "m" (*mem));		      \
     264       else if (sizeof (*mem) == 2)					      \
     265         __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1"		      \
     266  			 : "=m" (*mem), "=qm" (__result)		      \
     267  			 : "ir" (value), "m" (*mem));			      \
     268       else if (sizeof (*mem) == 4)					      \
     269         __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1"		      \
     270  			 : "=m" (*mem), "=qm" (__result)		      \
     271  			 : "ir" (value), "m" (*mem));			      \
     272       else if (__HAVE_64B_ATOMICS)					      \
     273         __asm __volatile (LOCK_PREFIX "addq %q2, %0; sets %1"		      \
     274  			 : "=m" (*mem), "=qm" (__result)		      \
     275  			 : "ir" ((int64_t) cast_to_integer (value)),	      \
     276  			   "m" (*mem));					      \
     277       else								      \
     278         __atomic_link_error ();						      \
     279       __result; })
     280  
     281  
     282  #define atomic_add_zero(mem, value) \
     283    ({ unsigned char __result;						      \
     284       if (sizeof (*mem) == 1)						      \
     285         __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1"		      \
     286  			 : "=m" (*mem), "=qm" (__result)		      \
     287  			 : IBR_CONSTRAINT (value), "m" (*mem));		      \
     288       else if (sizeof (*mem) == 2)					      \
     289         __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1"		      \
     290  			 : "=m" (*mem), "=qm" (__result)		      \
     291  			 : "ir" (value), "m" (*mem));			      \
     292       else if (sizeof (*mem) == 4)					      \
     293         __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1"		      \
     294  			 : "=m" (*mem), "=qm" (__result)		      \
     295  			 : "ir" (value), "m" (*mem));			      \
     296       else if (__HAVE_64B_ATOMICS)					      \
     297         __asm __volatile (LOCK_PREFIX "addq %q2, %0; setz %1"		      \
     298  			 : "=m" (*mem), "=qm" (__result)		      \
     299  			 : "ir" ((int64_t) cast_to_integer (value)),	      \
     300  			   "m" (*mem));					      \
     301       else								      \
     302         __atomic_link_error ();					      \
     303       __result; })
     304  
     305  
     306  #define __arch_increment_body(lock, pfx, mem) \
     307    do {									      \
     308      if (sizeof (*mem) == 1)						      \
     309        __asm __volatile (lock "incb %b0"					      \
     310  			: "=m" (*mem)					      \
     311  			: "m" (*mem),					      \
     312  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     313      else if (sizeof (*mem) == 2)					      \
     314        __asm __volatile (lock "incw %w0"					      \
     315  			: "=m" (*mem)					      \
     316  			: "m" (*mem),					      \
     317  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     318      else if (sizeof (*mem) == 4)					      \
     319        __asm __volatile (lock "incl %0"					      \
     320  			: "=m" (*mem)					      \
     321  			: "m" (*mem),					      \
     322  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     323      else if (__HAVE_64B_ATOMICS)					      \
     324        __asm __volatile (lock "incq %q0"					      \
     325  			: "=m" (*mem)					      \
     326  			: "m" (*mem),					      \
     327  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     328      else								      \
     329        do_add_val_64_acq (pfx, mem, 1);					      \
     330    } while (0)
     331  
     332  #define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem)
     333  
     334  #define __arch_increment_cprefix \
     335    "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
     336  
     337  #define catomic_increment(mem) \
     338    __arch_increment_body (__arch_increment_cprefix, __arch_c, mem)
     339  
     340  
     341  #define atomic_increment_and_test(mem) \
     342    ({ unsigned char __result;						      \
     343       if (sizeof (*mem) == 1)						      \
     344         __asm __volatile (LOCK_PREFIX "incb %b0; sete %b1"		      \
     345  			 : "=m" (*mem), "=qm" (__result)		      \
     346  			 : "m" (*mem));					      \
     347       else if (sizeof (*mem) == 2)					      \
     348         __asm __volatile (LOCK_PREFIX "incw %w0; sete %w1"		      \
     349  			 : "=m" (*mem), "=qm" (__result)		      \
     350  			 : "m" (*mem));					      \
     351       else if (sizeof (*mem) == 4)					      \
     352         __asm __volatile (LOCK_PREFIX "incl %0; sete %1"			      \
     353  			 : "=m" (*mem), "=qm" (__result)		      \
     354  			 : "m" (*mem));					      \
     355       else if (__HAVE_64B_ATOMICS)					      \
     356         __asm __volatile (LOCK_PREFIX "incq %q0; sete %1"		      \
     357  			 : "=m" (*mem), "=qm" (__result)		      \
     358  			 : "m" (*mem));					      \
     359       else								      \
     360         __atomic_link_error ();					      \
     361       __result; })
     362  
     363  
     364  #define __arch_decrement_body(lock, pfx, mem) \
     365    do {									      \
     366      if (sizeof (*mem) == 1)						      \
     367        __asm __volatile (lock "decb %b0"					      \
     368  			: "=m" (*mem)					      \
     369  			: "m" (*mem),					      \
     370  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     371      else if (sizeof (*mem) == 2)					      \
     372        __asm __volatile (lock "decw %w0"					      \
     373  			: "=m" (*mem)					      \
     374  			: "m" (*mem),					      \
     375  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     376      else if (sizeof (*mem) == 4)					      \
     377        __asm __volatile (lock "decl %0"					      \
     378  			: "=m" (*mem)					      \
     379  			: "m" (*mem),					      \
     380  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     381      else if (__HAVE_64B_ATOMICS)					      \
     382        __asm __volatile (lock "decq %q0"					      \
     383  			: "=m" (*mem)					      \
     384  			: "m" (*mem),					      \
     385  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     386      else								      \
     387        do_add_val_64_acq (pfx, mem, -1);					      \
     388    } while (0)
     389  
     390  #define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem)
     391  
     392  #define __arch_decrement_cprefix \
     393    "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
     394  
     395  #define catomic_decrement(mem) \
     396    __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem)
     397  
     398  
     399  #define atomic_decrement_and_test(mem) \
     400    ({ unsigned char __result;						      \
     401       if (sizeof (*mem) == 1)						      \
     402         __asm __volatile (LOCK_PREFIX "decb %b0; sete %1"		      \
     403  			 : "=m" (*mem), "=qm" (__result)		      \
     404  			 : "m" (*mem));					      \
     405       else if (sizeof (*mem) == 2)					      \
     406         __asm __volatile (LOCK_PREFIX "decw %w0; sete %1"		      \
     407  			 : "=m" (*mem), "=qm" (__result)		      \
     408  			 : "m" (*mem));					      \
     409       else if (sizeof (*mem) == 4)					      \
     410         __asm __volatile (LOCK_PREFIX "decl %0; sete %1"			      \
     411  			 : "=m" (*mem), "=qm" (__result)		      \
     412  			 : "m" (*mem));					      \
     413       else								      \
     414         __asm __volatile (LOCK_PREFIX "decq %q0; sete %1"		      \
     415  			 : "=m" (*mem), "=qm" (__result)		      \
     416  			 : "m" (*mem));					      \
     417       __result; })
     418  
     419  
     420  #define atomic_bit_set(mem, bit) \
     421    do {									      \
     422      if (sizeof (*mem) == 1)						      \
     423        __asm __volatile (LOCK_PREFIX "orb %b2, %0"			      \
     424  			: "=m" (*mem)					      \
     425  			: "m" (*mem), IBR_CONSTRAINT (1L << (bit)));	      \
     426      else if (sizeof (*mem) == 2)					      \
     427        __asm __volatile (LOCK_PREFIX "orw %w2, %0"			      \
     428  			: "=m" (*mem)					      \
     429  			: "m" (*mem), "ir" (1L << (bit)));		      \
     430      else if (sizeof (*mem) == 4)					      \
     431        __asm __volatile (LOCK_PREFIX "orl %2, %0"			      \
     432  			: "=m" (*mem)					      \
     433  			: "m" (*mem), "ir" (1L << (bit)));		      \
     434      else if (__builtin_constant_p (bit) && (bit) < 32)			      \
     435        __asm __volatile (LOCK_PREFIX "orq %2, %0"			      \
     436  			: "=m" (*mem)					      \
     437  			: "m" (*mem), "i" (1L << (bit)));		      \
     438      else if (__HAVE_64B_ATOMICS)					      \
     439        __asm __volatile (LOCK_PREFIX "orq %q2, %0"			      \
     440  			: "=m" (*mem)					      \
     441  			: "m" (*mem), "r" (1UL << (bit)));		      \
     442      else								      \
     443        __atomic_link_error ();						      \
     444    } while (0)
     445  
     446  
     447  #define atomic_bit_test_set(mem, bit) \
     448    ({ unsigned char __result;						      \
     449       if (sizeof (*mem) == 1)						      \
     450         __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"		      \
     451  			 : "=q" (__result), "=m" (*mem)			      \
     452  			 : "m" (*mem), IBR_CONSTRAINT (bit));		      \
     453       else if (sizeof (*mem) == 2)					      \
     454         __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"		      \
     455  			 : "=q" (__result), "=m" (*mem)			      \
     456  			 : "m" (*mem), "ir" (bit));			      \
     457       else if (sizeof (*mem) == 4)					      \
     458         __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"		      \
     459  			 : "=q" (__result), "=m" (*mem)			      \
     460  			 : "m" (*mem), "ir" (bit));			      \
     461       else if (__HAVE_64B_ATOMICS)					      \
     462         __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0"		      \
     463  			 : "=q" (__result), "=m" (*mem)			      \
     464  			 : "m" (*mem), "ir" (bit));			      \
     465       else							      	      \
     466         __atomic_link_error ();					      \
     467       __result; })
     468  
     469  
     470  #define __arch_and_body(lock, mem, mask) \
     471    do {									      \
     472      if (sizeof (*mem) == 1)						      \
     473        __asm __volatile (lock "andb %b1, %0"				      \
     474  			: "=m" (*mem)					      \
     475  			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
     476  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     477      else if (sizeof (*mem) == 2)					      \
     478        __asm __volatile (lock "andw %w1, %0"				      \
     479  			: "=m" (*mem)					      \
     480  			: "ir" (mask), "m" (*mem),			      \
     481  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     482      else if (sizeof (*mem) == 4)					      \
     483        __asm __volatile (lock "andl %1, %0"				      \
     484  			: "=m" (*mem)					      \
     485  			: "ir" (mask), "m" (*mem),			      \
     486  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     487      else if (__HAVE_64B_ATOMICS)					      \
     488        __asm __volatile (lock "andq %q1, %0"				      \
     489  			: "=m" (*mem)					      \
     490  			: "ir" (mask), "m" (*mem),			      \
     491  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     492      else								      \
     493        __atomic_link_error ();						      \
     494    } while (0)
     495  
     496  #define __arch_cprefix \
     497    "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
     498  
     499  #define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
     500  
     501  #define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
     502  
     503  
     504  #define __arch_or_body(lock, mem, mask) \
     505    do {									      \
     506      if (sizeof (*mem) == 1)						      \
     507        __asm __volatile (lock "orb %b1, %0"				      \
     508  			: "=m" (*mem)					      \
     509  			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
     510  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     511      else if (sizeof (*mem) == 2)					      \
     512        __asm __volatile (lock "orw %w1, %0"				      \
     513  			: "=m" (*mem)					      \
     514  			: "ir" (mask), "m" (*mem),			      \
     515  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     516      else if (sizeof (*mem) == 4)					      \
     517        __asm __volatile (lock "orl %1, %0"				      \
     518  			: "=m" (*mem)					      \
     519  			: "ir" (mask), "m" (*mem),			      \
     520  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     521      else if (__HAVE_64B_ATOMICS)					      \
     522        __asm __volatile (lock "orq %q1, %0"				      \
     523  			: "=m" (*mem)					      \
     524  			: "ir" (mask), "m" (*mem),			      \
     525  			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     526      else								      \
     527        __atomic_link_error ();						      \
     528    } while (0)
     529  
     530  #define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
     531  
     532  #define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
     533  
     534  /* We don't use mfence because it is supposedly slower due to having to
     535     provide stronger guarantees (e.g., regarding self-modifying code).  */
     536  #define atomic_full_barrier() \
     537      __asm __volatile (LOCK_PREFIX "orl $0, (%%" SP_REG ")" ::: "memory")
     538  #define atomic_read_barrier() __asm ("" ::: "memory")
     539  #define atomic_write_barrier() __asm ("" ::: "memory")
     540  
     541  #define atomic_spin_nop() __asm ("pause")
     542  
     543  #endif /* atomic-machine.h */