1  //===-- tsan_dense_alloc.h --------------------------------------*- C++ -*-===//
       2  //
       3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4  // See https://llvm.org/LICENSE.txt for license information.
       5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6  //
       7  //===----------------------------------------------------------------------===//
       8  //
       9  // This file is a part of ThreadSanitizer (TSan), a race detector.
      10  //
      11  // A DenseSlabAlloc is a freelist-based allocator of fixed-size objects.
      12  // DenseSlabAllocCache is a thread-local cache for DenseSlabAlloc.
      13  // The only difference with traditional slab allocators is that DenseSlabAlloc
      14  // allocates/free indices of objects and provide a functionality to map
      15  // the index onto the real pointer. The index is u32, that is, 2 times smaller
      16  // than uptr (hense the Dense prefix).
      17  //===----------------------------------------------------------------------===//
      18  #ifndef TSAN_DENSE_ALLOC_H
      19  #define TSAN_DENSE_ALLOC_H
      20  
      21  #include "sanitizer_common/sanitizer_common.h"
      22  #include "tsan_defs.h"
      23  
      24  namespace __tsan {
      25  
      26  class DenseSlabAllocCache {
      27    static const uptr kSize = 128;
      28    typedef u32 IndexT;
      29    uptr pos;
      30    IndexT cache[kSize];
      31    template <typename, uptr, uptr, u64>
      32    friend class DenseSlabAlloc;
      33  };
      34  
      35  template <typename T, uptr kL1Size, uptr kL2Size, u64 kReserved = 0>
      36  class DenseSlabAlloc {
      37   public:
      38    typedef DenseSlabAllocCache Cache;
      39    typedef typename Cache::IndexT IndexT;
      40  
      41    static_assert((kL1Size & (kL1Size - 1)) == 0,
      42                  "kL1Size must be a power-of-two");
      43    static_assert((kL2Size & (kL2Size - 1)) == 0,
      44                  "kL2Size must be a power-of-two");
      45    static_assert((kL1Size * kL2Size) <= (1ull << (sizeof(IndexT) * 8)),
      46                  "kL1Size/kL2Size are too large");
      47    static_assert(((kL1Size * kL2Size - 1) & kReserved) == 0,
      48                  "reserved bits don't fit");
      49    static_assert(sizeof(T) > sizeof(IndexT),
      50                  "it doesn't make sense to use dense alloc");
      51  
      52    DenseSlabAlloc(LinkerInitialized, const char *name) : name_(name) {}
      53  
      54    explicit DenseSlabAlloc(const char *name)
      55        : DenseSlabAlloc(LINKER_INITIALIZED, name) {
      56      // It can be very large.
      57      // Don't page it in for linker initialized objects.
      58      internal_memset(map_, 0, sizeof(map_));
      59    }
      60  
      61    ~DenseSlabAlloc() {
      62      for (uptr i = 0; i < kL1Size; i++) {
      63        if (map_[i] != 0)
      64          UnmapOrDie(map_[i], kL2Size * sizeof(T));
      65      }
      66    }
      67  
      68    IndexT Alloc(Cache *c) {
      69      if (c->pos == 0)
      70        Refill(c);
      71      return c->cache[--c->pos];
      72    }
      73  
      74    void Free(Cache *c, IndexT idx) {
      75      DCHECK_NE(idx, 0);
      76      if (c->pos == Cache::kSize)
      77        Drain(c);
      78      c->cache[c->pos++] = idx;
      79    }
      80  
      81    T *Map(IndexT idx) {
      82      DCHECK_NE(idx, 0);
      83      DCHECK_LE(idx, kL1Size * kL2Size);
      84      return &map_[idx / kL2Size][idx % kL2Size];
      85    }
      86  
      87    void FlushCache(Cache *c) {
      88      while (c->pos) Drain(c);
      89    }
      90  
      91    void InitCache(Cache *c) {
      92      c->pos = 0;
      93      internal_memset(c->cache, 0, sizeof(c->cache));
      94    }
      95  
      96    uptr AllocatedMemory() const {
      97      return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T);
      98    }
      99  
     100    template <typename Func>
     101    void ForEach(Func func) {
     102      Lock lock(&mtx_);
     103      uptr fillpos = atomic_load_relaxed(&fillpos_);
     104      for (uptr l1 = 0; l1 < fillpos; l1++) {
     105        for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]);
     106      }
     107    }
     108  
     109   private:
     110    T *map_[kL1Size];
     111    Mutex mtx_;
     112    // The freelist is organized as a lock-free stack of batches of nodes.
     113    // The stack itself uses Block::next links, while the batch within each
     114    // stack node uses Block::batch links.
     115    // Low 32-bits of freelist_ is the node index, top 32-bits is ABA-counter.
     116    atomic_uint64_t freelist_ = {0};
     117    atomic_uintptr_t fillpos_ = {0};
     118    const char *const name_;
     119  
     120    struct Block {
     121      IndexT next;
     122      IndexT batch;
     123    };
     124  
     125    Block *MapBlock(IndexT idx) { return reinterpret_cast<Block *>(Map(idx)); }
     126  
     127    static constexpr u64 kCounterInc = 1ull << 32;
     128    static constexpr u64 kCounterMask = ~(kCounterInc - 1);
     129  
     130    NOINLINE void Refill(Cache *c) {
     131      // Pop 1 batch of nodes from the freelist.
     132      IndexT idx;
     133      u64 xchg;
     134      u64 cmp = atomic_load(&freelist_, memory_order_acquire);
     135      do {
     136        idx = static_cast<IndexT>(cmp);
     137        if (!idx)
     138          return AllocSuperBlock(c);
     139        Block *ptr = MapBlock(idx);
     140        xchg = ptr->next | (cmp & kCounterMask);
     141      } while (!atomic_compare_exchange_weak(&freelist_, &cmp, xchg,
     142                                             memory_order_acq_rel));
     143      // Unpack it into c->cache.
     144      while (idx) {
     145        c->cache[c->pos++] = idx;
     146        idx = MapBlock(idx)->batch;
     147      }
     148    }
     149  
     150    NOINLINE void Drain(Cache *c) {
     151      // Build a batch of at most Cache::kSize / 2 nodes linked by Block::batch.
     152      IndexT head_idx = 0;
     153      for (uptr i = 0; i < Cache::kSize / 2 && c->pos; i++) {
     154        IndexT idx = c->cache[--c->pos];
     155        Block *ptr = MapBlock(idx);
     156        ptr->batch = head_idx;
     157        head_idx = idx;
     158      }
     159      // Push it onto the freelist stack.
     160      Block *head = MapBlock(head_idx);
     161      u64 xchg;
     162      u64 cmp = atomic_load(&freelist_, memory_order_acquire);
     163      do {
     164        head->next = static_cast<IndexT>(cmp);
     165        xchg = head_idx | (cmp & kCounterMask) + kCounterInc;
     166      } while (!atomic_compare_exchange_weak(&freelist_, &cmp, xchg,
     167                                             memory_order_acq_rel));
     168    }
     169  
     170    NOINLINE void AllocSuperBlock(Cache *c) {
     171      Lock lock(&mtx_);
     172      uptr fillpos = atomic_load_relaxed(&fillpos_);
     173      if (fillpos == kL1Size) {
     174        Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n", name_, kL1Size,
     175               kL2Size);
     176        Die();
     177      }
     178      VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n", name_,
     179              fillpos, kL1Size, kL2Size);
     180      T *batch = (T *)MmapOrDie(kL2Size * sizeof(T), name_);
     181      map_[fillpos] = batch;
     182      // Reserve 0 as invalid index.
     183      for (IndexT i = fillpos ? 0 : 1; i < kL2Size; i++) {
     184        new (batch + i) T;
     185        c->cache[c->pos++] = i + fillpos * kL2Size;
     186        if (c->pos == Cache::kSize)
     187          Drain(c);
     188      }
     189      atomic_store_relaxed(&fillpos_, fillpos + 1);
     190      CHECK(c->pos);
     191    }
     192  };
     193  
     194  }  // namespace __tsan
     195  
     196  #endif  // TSAN_DENSE_ALLOC_H