(root)/
gcc-13.2.0/
libsanitizer/
sanitizer_common/
sanitizer_deadlock_detector.h
       1  //===-- sanitizer_deadlock_detector.h ---------------------------*- C++ -*-===//
       2  //
       3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4  // See https://llvm.org/LICENSE.txt for license information.
       5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6  //
       7  //===----------------------------------------------------------------------===//
       8  //
       9  // This file is a part of Sanitizer runtime.
      10  // The deadlock detector maintains a directed graph of lock acquisitions.
      11  // When a lock event happens, the detector checks if the locks already held by
      12  // the current thread are reachable from the newly acquired lock.
      13  //
      14  // The detector can handle only a fixed amount of simultaneously live locks
      15  // (a lock is alive if it has been locked at least once and has not been
      16  // destroyed). When the maximal number of locks is reached the entire graph
      17  // is flushed and the new lock epoch is started. The node ids from the old
      18  // epochs can not be used with any of the detector methods except for
      19  // nodeBelongsToCurrentEpoch().
      20  //
      21  // FIXME: this is work in progress, nothing really works yet.
      22  //
      23  //===----------------------------------------------------------------------===//
      24  
      25  #ifndef SANITIZER_DEADLOCK_DETECTOR_H
      26  #define SANITIZER_DEADLOCK_DETECTOR_H
      27  
      28  #include "sanitizer_bvgraph.h"
      29  #include "sanitizer_common.h"
      30  
      31  namespace __sanitizer {
      32  
      33  // Thread-local state for DeadlockDetector.
      34  // It contains the locks currently held by the owning thread.
      35  template <class BV>
      36  class DeadlockDetectorTLS {
      37   public:
      38    // No CTOR.
      39    void clear() {
      40      bv_.clear();
      41      epoch_ = 0;
      42      n_recursive_locks = 0;
      43      n_all_locks_ = 0;
      44    }
      45  
      46    bool empty() const { return bv_.empty(); }
      47  
      48    void ensureCurrentEpoch(uptr current_epoch) {
      49      if (epoch_ == current_epoch) return;
      50      bv_.clear();
      51      epoch_ = current_epoch;
      52      n_recursive_locks = 0;
      53      n_all_locks_ = 0;
      54    }
      55  
      56    uptr getEpoch() const { return epoch_; }
      57  
      58    // Returns true if this is the first (non-recursive) acquisition of this lock.
      59    bool addLock(uptr lock_id, uptr current_epoch, u32 stk) {
      60      CHECK_EQ(epoch_, current_epoch);
      61      if (!bv_.setBit(lock_id)) {
      62        // The lock is already held by this thread, it must be recursive.
      63        CHECK_LT(n_recursive_locks, ARRAY_SIZE(recursive_locks));
      64        recursive_locks[n_recursive_locks++] = lock_id;
      65        return false;
      66      }
      67      CHECK_LT(n_all_locks_, ARRAY_SIZE(all_locks_with_contexts_));
      68      // lock_id < BV::kSize, can cast to a smaller int.
      69      u32 lock_id_short = static_cast<u32>(lock_id);
      70      LockWithContext l = {lock_id_short, stk};
      71      all_locks_with_contexts_[n_all_locks_++] = l;
      72      return true;
      73    }
      74  
      75    void removeLock(uptr lock_id) {
      76      if (n_recursive_locks) {
      77        for (sptr i = n_recursive_locks - 1; i >= 0; i--) {
      78          if (recursive_locks[i] == lock_id) {
      79            n_recursive_locks--;
      80            Swap(recursive_locks[i], recursive_locks[n_recursive_locks]);
      81            return;
      82          }
      83        }
      84      }
      85      if (!bv_.clearBit(lock_id))
      86        return;  // probably addLock happened before flush
      87      if (n_all_locks_) {
      88        for (sptr i = n_all_locks_ - 1; i >= 0; i--) {
      89          if (all_locks_with_contexts_[i].lock == static_cast<u32>(lock_id)) {
      90            Swap(all_locks_with_contexts_[i],
      91                 all_locks_with_contexts_[n_all_locks_ - 1]);
      92            n_all_locks_--;
      93            break;
      94          }
      95        }
      96      }
      97    }
      98  
      99    u32 findLockContext(uptr lock_id) {
     100      for (uptr i = 0; i < n_all_locks_; i++)
     101        if (all_locks_with_contexts_[i].lock == static_cast<u32>(lock_id))
     102          return all_locks_with_contexts_[i].stk;
     103      return 0;
     104    }
     105  
     106    const BV &getLocks(uptr current_epoch) const {
     107      CHECK_EQ(epoch_, current_epoch);
     108      return bv_;
     109    }
     110  
     111    uptr getNumLocks() const { return n_all_locks_; }
     112    uptr getLock(uptr idx) const { return all_locks_with_contexts_[idx].lock; }
     113  
     114   private:
     115    BV bv_;
     116    uptr epoch_;
     117    uptr recursive_locks[64];
     118    uptr n_recursive_locks;
     119    struct LockWithContext {
     120      u32 lock;
     121      u32 stk;
     122    };
     123    LockWithContext all_locks_with_contexts_[64];
     124    uptr n_all_locks_;
     125  };
     126  
     127  // DeadlockDetector.
     128  // For deadlock detection to work we need one global DeadlockDetector object
     129  // and one DeadlockDetectorTLS object per evey thread.
     130  // This class is not thread safe, all concurrent accesses should be guarded
     131  // by an external lock.
     132  // Most of the methods of this class are not thread-safe (i.e. should
     133  // be protected by an external lock) unless explicitly told otherwise.
     134  template <class BV>
     135  class DeadlockDetector {
     136   public:
     137    typedef BV BitVector;
     138  
     139    uptr size() const { return g_.size(); }
     140  
     141    // No CTOR.
     142    void clear() {
     143      current_epoch_ = 0;
     144      available_nodes_.clear();
     145      recycled_nodes_.clear();
     146      g_.clear();
     147      n_edges_ = 0;
     148    }
     149  
     150    // Allocate new deadlock detector node.
     151    // If we are out of available nodes first try to recycle some.
     152    // If there is nothing to recycle, flush the graph and increment the epoch.
     153    // Associate 'data' (opaque user's object) with the new node.
     154    uptr newNode(uptr data) {
     155      if (!available_nodes_.empty())
     156        return getAvailableNode(data);
     157      if (!recycled_nodes_.empty()) {
     158        for (sptr i = n_edges_ - 1; i >= 0; i--) {
     159          if (recycled_nodes_.getBit(edges_[i].from) ||
     160              recycled_nodes_.getBit(edges_[i].to)) {
     161            Swap(edges_[i], edges_[n_edges_ - 1]);
     162            n_edges_--;
     163          }
     164        }
     165        CHECK(available_nodes_.empty());
     166        // removeEdgesFrom was called in removeNode.
     167        g_.removeEdgesTo(recycled_nodes_);
     168        available_nodes_.setUnion(recycled_nodes_);
     169        recycled_nodes_.clear();
     170        return getAvailableNode(data);
     171      }
     172      // We are out of vacant nodes. Flush and increment the current_epoch_.
     173      current_epoch_ += size();
     174      recycled_nodes_.clear();
     175      available_nodes_.setAll();
     176      g_.clear();
     177      n_edges_ = 0;
     178      return getAvailableNode(data);
     179    }
     180  
     181    // Get data associated with the node created by newNode().
     182    uptr getData(uptr node) const { return data_[nodeToIndex(node)]; }
     183  
     184    bool nodeBelongsToCurrentEpoch(uptr node) {
     185      return node && (node / size() * size()) == current_epoch_;
     186    }
     187  
     188    void removeNode(uptr node) {
     189      uptr idx = nodeToIndex(node);
     190      CHECK(!available_nodes_.getBit(idx));
     191      CHECK(recycled_nodes_.setBit(idx));
     192      g_.removeEdgesFrom(idx);
     193    }
     194  
     195    void ensureCurrentEpoch(DeadlockDetectorTLS<BV> *dtls) {
     196      dtls->ensureCurrentEpoch(current_epoch_);
     197    }
     198  
     199    // Returns true if there is a cycle in the graph after this lock event.
     200    // Ideally should be called before the lock is acquired so that we can
     201    // report a deadlock before a real deadlock happens.
     202    bool onLockBefore(DeadlockDetectorTLS<BV> *dtls, uptr cur_node) {
     203      ensureCurrentEpoch(dtls);
     204      uptr cur_idx = nodeToIndex(cur_node);
     205      return g_.isReachable(cur_idx, dtls->getLocks(current_epoch_));
     206    }
     207  
     208    u32 findLockContext(DeadlockDetectorTLS<BV> *dtls, uptr node) {
     209      return dtls->findLockContext(nodeToIndex(node));
     210    }
     211  
     212    // Add cur_node to the set of locks held currently by dtls.
     213    void onLockAfter(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) {
     214      ensureCurrentEpoch(dtls);
     215      uptr cur_idx = nodeToIndex(cur_node);
     216      dtls->addLock(cur_idx, current_epoch_, stk);
     217    }
     218  
     219    // Experimental *racy* fast path function.
     220    // Returns true if all edges from the currently held locks to cur_node exist.
     221    bool hasAllEdges(DeadlockDetectorTLS<BV> *dtls, uptr cur_node) {
     222      uptr local_epoch = dtls->getEpoch();
     223      // Read from current_epoch_ is racy.
     224      if (cur_node && local_epoch == current_epoch_ &&
     225          local_epoch == nodeToEpoch(cur_node)) {
     226        uptr cur_idx = nodeToIndexUnchecked(cur_node);
     227        for (uptr i = 0, n = dtls->getNumLocks(); i < n; i++) {
     228          if (!g_.hasEdge(dtls->getLock(i), cur_idx))
     229            return false;
     230        }
     231        return true;
     232      }
     233      return false;
     234    }
     235  
     236    // Adds edges from currently held locks to cur_node,
     237    // returns the number of added edges, and puts the sources of added edges
     238    // into added_edges[].
     239    // Should be called before onLockAfter.
     240    uptr addEdges(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk,
     241                  int unique_tid) {
     242      ensureCurrentEpoch(dtls);
     243      uptr cur_idx = nodeToIndex(cur_node);
     244      uptr added_edges[40];
     245      uptr n_added_edges = g_.addEdges(dtls->getLocks(current_epoch_), cur_idx,
     246                                       added_edges, ARRAY_SIZE(added_edges));
     247      for (uptr i = 0; i < n_added_edges; i++) {
     248        if (n_edges_ < ARRAY_SIZE(edges_)) {
     249          Edge e = {(u16)added_edges[i], (u16)cur_idx,
     250                    dtls->findLockContext(added_edges[i]), stk,
     251                    unique_tid};
     252          edges_[n_edges_++] = e;
     253        }
     254      }
     255      return n_added_edges;
     256    }
     257  
     258    bool findEdge(uptr from_node, uptr to_node, u32 *stk_from, u32 *stk_to,
     259                  int *unique_tid) {
     260      uptr from_idx = nodeToIndex(from_node);
     261      uptr to_idx = nodeToIndex(to_node);
     262      for (uptr i = 0; i < n_edges_; i++) {
     263        if (edges_[i].from == from_idx && edges_[i].to == to_idx) {
     264          *stk_from = edges_[i].stk_from;
     265          *stk_to = edges_[i].stk_to;
     266          *unique_tid = edges_[i].unique_tid;
     267          return true;
     268        }
     269      }
     270      return false;
     271    }
     272  
     273    // Test-only function. Handles the before/after lock events,
     274    // returns true if there is a cycle.
     275    bool onLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) {
     276      ensureCurrentEpoch(dtls);
     277      bool is_reachable = !isHeld(dtls, cur_node) && onLockBefore(dtls, cur_node);
     278      addEdges(dtls, cur_node, stk, 0);
     279      onLockAfter(dtls, cur_node, stk);
     280      return is_reachable;
     281    }
     282  
     283    // Handles the try_lock event, returns false.
     284    // When a try_lock event happens (i.e. a try_lock call succeeds) we need
     285    // to add this lock to the currently held locks, but we should not try to
     286    // change the lock graph or to detect a cycle.  We may want to investigate
     287    // whether a more aggressive strategy is possible for try_lock.
     288    bool onTryLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) {
     289      ensureCurrentEpoch(dtls);
     290      uptr cur_idx = nodeToIndex(cur_node);
     291      dtls->addLock(cur_idx, current_epoch_, stk);
     292      return false;
     293    }
     294  
     295    // Returns true iff dtls is empty (no locks are currently held) and we can
     296    // add the node to the currently held locks w/o changing the global state.
     297    // This operation is thread-safe as it only touches the dtls.
     298    bool onFirstLock(DeadlockDetectorTLS<BV> *dtls, uptr node, u32 stk = 0) {
     299      if (!dtls->empty()) return false;
     300      if (dtls->getEpoch() && dtls->getEpoch() == nodeToEpoch(node)) {
     301        dtls->addLock(nodeToIndexUnchecked(node), nodeToEpoch(node), stk);
     302        return true;
     303      }
     304      return false;
     305    }
     306  
     307    // Finds a path between the lock 'cur_node' (currently not held in dtls)
     308    // and some currently held lock, returns the length of the path
     309    // or 0 on failure.
     310    uptr findPathToLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, uptr *path,
     311                        uptr path_size) {
     312      tmp_bv_.copyFrom(dtls->getLocks(current_epoch_));
     313      uptr idx = nodeToIndex(cur_node);
     314      CHECK(!tmp_bv_.getBit(idx));
     315      uptr res = g_.findShortestPath(idx, tmp_bv_, path, path_size);
     316      for (uptr i = 0; i < res; i++)
     317        path[i] = indexToNode(path[i]);
     318      if (res)
     319        CHECK_EQ(path[0], cur_node);
     320      return res;
     321    }
     322  
     323    // Handle the unlock event.
     324    // This operation is thread-safe as it only touches the dtls.
     325    void onUnlock(DeadlockDetectorTLS<BV> *dtls, uptr node) {
     326      if (dtls->getEpoch() == nodeToEpoch(node))
     327        dtls->removeLock(nodeToIndexUnchecked(node));
     328    }
     329  
     330    // Tries to handle the lock event w/o writing to global state.
     331    // Returns true on success.
     332    // This operation is thread-safe as it only touches the dtls
     333    // (modulo racy nature of hasAllEdges).
     334    bool onLockFast(DeadlockDetectorTLS<BV> *dtls, uptr node, u32 stk = 0) {
     335      if (hasAllEdges(dtls, node)) {
     336        dtls->addLock(nodeToIndexUnchecked(node), nodeToEpoch(node), stk);
     337        return true;
     338      }
     339      return false;
     340    }
     341  
     342    bool isHeld(DeadlockDetectorTLS<BV> *dtls, uptr node) const {
     343      return dtls->getLocks(current_epoch_).getBit(nodeToIndex(node));
     344    }
     345  
     346    uptr testOnlyGetEpoch() const { return current_epoch_; }
     347    bool testOnlyHasEdge(uptr l1, uptr l2) {
     348      return g_.hasEdge(nodeToIndex(l1), nodeToIndex(l2));
     349    }
     350    // idx1 and idx2 are raw indices to g_, not lock IDs.
     351    bool testOnlyHasEdgeRaw(uptr idx1, uptr idx2) {
     352      return g_.hasEdge(idx1, idx2);
     353    }
     354  
     355    void Print() {
     356      for (uptr from = 0; from < size(); from++)
     357        for (uptr to = 0; to < size(); to++)
     358          if (g_.hasEdge(from, to))
     359            Printf("  %zx => %zx\n", from, to);
     360    }
     361  
     362   private:
     363    void check_idx(uptr idx) const { CHECK_LT(idx, size()); }
     364  
     365    void check_node(uptr node) const {
     366      CHECK_GE(node, size());
     367      CHECK_EQ(current_epoch_, nodeToEpoch(node));
     368    }
     369  
     370    uptr indexToNode(uptr idx) const {
     371      check_idx(idx);
     372      return idx + current_epoch_;
     373    }
     374  
     375    uptr nodeToIndexUnchecked(uptr node) const { return node % size(); }
     376  
     377    uptr nodeToIndex(uptr node) const {
     378      check_node(node);
     379      return nodeToIndexUnchecked(node);
     380    }
     381  
     382    uptr nodeToEpoch(uptr node) const { return node / size() * size(); }
     383  
     384    uptr getAvailableNode(uptr data) {
     385      uptr idx = available_nodes_.getAndClearFirstOne();
     386      data_[idx] = data;
     387      return indexToNode(idx);
     388    }
     389  
     390    struct Edge {
     391      u16 from;
     392      u16 to;
     393      u32 stk_from;
     394      u32 stk_to;
     395      int unique_tid;
     396    };
     397  
     398    uptr current_epoch_;
     399    BV available_nodes_;
     400    BV recycled_nodes_;
     401    BV tmp_bv_;
     402    BVGraph<BV> g_;
     403    uptr data_[BV::kSize];
     404    Edge edges_[BV::kSize * 32];
     405    uptr n_edges_;
     406  };
     407  
     408  } // namespace __sanitizer
     409  
     410  #endif // SANITIZER_DEADLOCK_DETECTOR_H