1  /* Monotonically increasing wide counters (at least 62 bits).
       2     Copyright (C) 2016-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <atomic_wide_counter.h>
      20  
      21  #if !__HAVE_64B_ATOMICS
      22  
      23  /* Values we add or xor are less than or equal to 1<<31, so we only
      24     have to make overflow-and-addition atomic wrt. to concurrent load
      25     operations and xor operations.  To do that, we split each counter
      26     into two 32b values of which we reserve the MSB of each to
      27     represent an overflow from the lower-order half to the higher-order
      28     half.
      29  
      30     In the common case, the state is (higher-order / lower-order half, and . is
      31     basically concatenation of the bits):
      32     0.h     / 0.l  = h.l
      33  
      34     When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
      35     following steps S1-S4 (the values these represent are on the right-hand
      36     side):
      37     S1:  0.h     / 1.L == (h+1).L
      38     S2:  1.(h+1) / 1.L == (h+1).L
      39     S3:  1.(h+1) / 0.L == (h+1).L
      40     S4:  0.(h+1) / 0.L == (h+1).L
      41     If the LSB of the higher-order half is set, readers will ignore the
      42     overflow bit in the lower-order half.
      43  
      44     To get an atomic snapshot in load operations, we exploit that the
      45     higher-order half is monotonically increasing; if we load a value V from
      46     it, then read the lower-order half, and then read the higher-order half
      47     again and see the same value V, we know that both halves have existed in
      48     the sequence of values the full counter had.  This is similar to the
      49     validated reads in the time-based STMs in GCC's libitm (e.g.,
      50     method_ml_wt).
      51  
      52     One benefit of this scheme is that this makes load operations
      53     obstruction-free because unlike if we would just lock the counter, readers
      54     can almost always interpret a snapshot of each halves.  Readers can be
      55     forced to read a new snapshot when the read is concurrent with an overflow.
      56     However, overflows will happen infrequently, so load operations are
      57     practically lock-free.  */
      58  
      59  uint64_t
      60  __atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
      61                                           unsigned int op)
      62  {
      63    /* S1. Note that this is an atomic read-modify-write so it extends the
      64       release sequence of release MO store at S3.  */
      65    unsigned int l = atomic_fetch_add_relaxed (&c->__value32.__low, op);
      66    unsigned int h = atomic_load_relaxed (&c->__value32.__high);
      67    uint64_t result = ((uint64_t) h << 31) | l;
      68    l += op;
      69    if ((l >> 31) > 0)
      70      {
      71        /* Overflow.  Need to increment higher-order half.  Note that all
      72           add operations are ordered in happens-before.  */
      73        h++;
      74        /* S2. Release MO to synchronize with the loads of the higher-order half
      75           in the load operation.  See __atomic_wide_counter_load_relaxed.  */
      76        atomic_store_release (&c->__value32.__high,
      77                              h | ((unsigned int) 1 << 31));
      78        l ^= (unsigned int) 1 << 31;
      79        /* S3.  See __atomic_wide_counter_load_relaxed.  */
      80        atomic_store_release (&c->__value32.__low, l);
      81        /* S4.  Likewise.  */
      82        atomic_store_release (&c->__value32.__high, h);
      83      }
      84    return result;
      85  }
      86  
      87  uint64_t
      88  __atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
      89  {
      90    unsigned int h, l, h2;
      91    do
      92      {
      93        /* This load and the second one below to the same location read from the
      94           stores in the overflow handling of the add operation or the
      95           initializing stores (which is a simple special case because
      96           initialization always completely happens before further use).
      97           Because no two stores to the higher-order half write the same value,
      98           the loop ensures that if we continue to use the snapshot, this load
      99           and the second one read from the same store operation.  All candidate
     100           store operations have release MO.
     101           If we read from S2 in the first load, then we will see the value of
     102           S1 on the next load (because we synchronize with S2), or a value
     103           later in modification order.  We correctly ignore the lower-half's
     104           overflow bit in this case.  If we read from S4, then we will see the
     105           value of S3 in the next load (or a later value), which does not have
     106           the overflow bit set anymore.
     107            */
     108        h = atomic_load_acquire (&c->__value32.__high);
     109        /* This will read from the release sequence of S3 (i.e, either the S3
     110           store or the read-modify-writes at S1 following S3 in modification
     111           order).  Thus, the read synchronizes with S3, and the following load
     112           of the higher-order half will read from the matching S2 (or a later
     113           value).
     114           Thus, if we read a lower-half value here that already overflowed and
     115           belongs to an increased higher-order half value, we will see the
     116           latter and h and h2 will not be equal.  */
     117        l = atomic_load_acquire (&c->__value32.__low);
     118        /* See above.  */
     119        h2 = atomic_load_relaxed (&c->__value32.__high);
     120      }
     121    while (h != h2);
     122    if (((l >> 31) > 0) && ((h >> 31) > 0))
     123      l ^= (unsigned int) 1 << 31;
     124    return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
     125  }
     126  
     127  #endif /* !__HAVE_64B_ATOMICS */