1  //===-- dfsan_interface.h -------------------------------------------------===//
       2  //
       3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4  // See https://llvm.org/LICENSE.txt for license information.
       5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6  //
       7  //===----------------------------------------------------------------------===//
       8  //
       9  // This file is a part of DataFlowSanitizer.
      10  //
      11  // Public interface header.
      12  //===----------------------------------------------------------------------===//
      13  #ifndef DFSAN_INTERFACE_H
      14  #define DFSAN_INTERFACE_H
      15  
      16  #include <stddef.h>
      17  #include <stdint.h>
      18  #include <sanitizer/common_interface_defs.h>
      19  
      20  #ifdef __cplusplus
      21  extern "C" {
      22  #endif
      23  
      24  typedef uint8_t dfsan_label;
      25  typedef uint32_t dfsan_origin;
      26  
      27  /// Signature of the callback argument to dfsan_set_write_callback().
      28  typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
      29  
      30  /// Signature of the callback argument to dfsan_set_conditional_callback().
      31  typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
      32                                               dfsan_origin origin);
      33  
      34  /// Computes the union of \c l1 and \c l2, resulting in a union label.
      35  dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
      36  
      37  /// Sets the label for each address in [addr,addr+size) to \c label.
      38  void dfsan_set_label(dfsan_label label, void *addr, size_t size);
      39  
      40  /// Sets the label for each address in [addr,addr+size) to the union of the
      41  /// current label for that address and \c label.
      42  void dfsan_add_label(dfsan_label label, void *addr, size_t size);
      43  
      44  /// Retrieves the label associated with the given data.
      45  ///
      46  /// The type of 'data' is arbitrary.  The function accepts a value of any type,
      47  /// which can be truncated or extended (implicitly or explicitly) as necessary.
      48  /// The truncation/extension operations will preserve the label of the original
      49  /// value.
      50  dfsan_label dfsan_get_label(long data);
      51  
      52  /// Retrieves the immediate origin associated with the given data. The returned
      53  /// origin may point to another origin.
      54  ///
      55  /// The type of 'data' is arbitrary.
      56  dfsan_origin dfsan_get_origin(long data);
      57  
      58  /// Retrieves the label associated with the data at the given address.
      59  dfsan_label dfsan_read_label(const void *addr, size_t size);
      60  
      61  /// Return the origin associated with the first taint byte in the size bytes
      62  /// from the address addr.
      63  dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size);
      64  
      65  /// Returns whether the given label label contains the label elem.
      66  int dfsan_has_label(dfsan_label label, dfsan_label elem);
      67  
      68  /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
      69  /// with the application memory.  Use this call to start over the taint tracking
      70  /// within the same process.
      71  ///
      72  /// Note: If another thread is working with tainted data during the flush, that
      73  /// taint could still be written to shadow after the flush.
      74  void dfsan_flush(void);
      75  
      76  /// Sets a callback to be invoked on calls to write().  The callback is invoked
      77  /// before the write is done.  The write is not guaranteed to succeed when the
      78  /// callback executes.  Pass in NULL to remove any callback.
      79  void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
      80  
      81  /// Sets a callback to be invoked on any conditional expressions which have a
      82  /// taint label set. This can be used to find where tainted data influences
      83  /// the behavior of the program.
      84  /// These callbacks will only be added when -dfsan-conditional-callbacks=true.
      85  void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
      86  
      87  /// Conditional expressions occur during signal handlers.
      88  /// Making callbacks that handle signals well is tricky, so when
      89  /// -dfsan-conditional-callbacks=true, conditional expressions used in signal
      90  /// handlers will add the labels they see into a global (bitwise-or together).
      91  /// This function returns all label bits seen in signal handler conditions.
      92  dfsan_label dfsan_get_labels_in_signal_conditional();
      93  
      94  /// Interceptor hooks.
      95  /// Whenever a dfsan's custom function is called the corresponding
      96  /// hook is called it non-zero. The hooks should be defined by the user.
      97  /// The primary use case is taint-guided fuzzing, where the fuzzer
      98  /// needs to see the parameters of the function and the labels.
      99  /// FIXME: implement more hooks.
     100  void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
     101                              size_t n, dfsan_label s1_label,
     102                              dfsan_label s2_label, dfsan_label n_label);
     103  void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
     104                               size_t n, dfsan_label s1_label,
     105                               dfsan_label s2_label, dfsan_label n_label);
     106  
     107  /// Prints the origin trace of the label at the address addr to stderr. It also
     108  /// prints description at the beginning of the trace. If origin tracking is not
     109  /// on, or the address is not labeled, it prints nothing.
     110  void dfsan_print_origin_trace(const void *addr, const char *description);
     111  /// As above, but use an origin id from dfsan_get_origin() instead of address.
     112  /// Does not include header line with taint label and address information.
     113  void dfsan_print_origin_id_trace(dfsan_origin origin);
     114  
     115  /// Prints the origin trace of the label at the address \p addr to a
     116  /// pre-allocated output buffer. If origin tracking is not on, or the address is
     117  /// not labeled, it prints nothing.
     118  ///
     119  /// Typical usage:
     120  /// \code
     121  ///   char kDescription[] = "...";
     122  ///   char buf[1024];
     123  ///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
     124  /// \endcode
     125  ///
     126  /// Typical usage that handles truncation:
     127  /// \code
     128  ///   char buf[1024];
     129  ///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
     130  ///
     131  ///   if (len < sizeof(buf)) {
     132  ///     ProcessOriginTrace(buf);
     133  ///   } else {
     134  ///     char *tmpbuf = new char[len + 1];
     135  ///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
     136  ///     ProcessOriginTrace(tmpbuf);
     137  ///     delete[] tmpbuf;
     138  ///   }
     139  /// \endcode
     140  ///
     141  /// \param addr The tainted memory address whose origin we are printing.
     142  /// \param description A description printed at the beginning of the trace.
     143  /// \param [out] out_buf The output buffer to write the results to.
     144  /// \param out_buf_size The size of \p out_buf.
     145  ///
     146  /// \returns The number of symbols that should have been written to \p out_buf
     147  /// (not including trailing null byte '\0'). Thus, the string is truncated iff
     148  /// return value is not less than \p out_buf_size.
     149  size_t dfsan_sprint_origin_trace(const void *addr, const char *description,
     150                                   char *out_buf, size_t out_buf_size);
     151  /// As above, but use an origin id from dfsan_get_origin() instead of address.
     152  /// Does not include header line with taint label and address information.
     153  size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf,
     154                                      size_t out_buf_size);
     155  
     156  /// Prints the stack trace leading to this call to a pre-allocated output
     157  /// buffer.
     158  ///
     159  /// For usage examples, see dfsan_sprint_origin_trace.
     160  ///
     161  /// \param [out] out_buf The output buffer to write the results to.
     162  /// \param out_buf_size The size of \p out_buf.
     163  ///
     164  /// \returns The number of symbols that should have been written to \p out_buf
     165  /// (not including trailing null byte '\0'). Thus, the string is truncated iff
     166  /// return value is not less than \p out_buf_size.
     167  size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size);
     168  
     169  /// Retrieves the very first origin associated with the data at the given
     170  /// address.
     171  dfsan_origin dfsan_get_init_origin(const void *addr);
     172  
     173  /// Returns the value of -dfsan-track-origins.
     174  /// * 0: do not track origins.
     175  /// * 1: track origins at memory store operations.
     176  /// * 2: track origins at memory load and store operations.
     177  int dfsan_get_track_origins(void);
     178  #ifdef __cplusplus
     179  }  // extern "C"
     180  
     181  template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
     182    dfsan_set_label(label, (void *)&data, sizeof(T));
     183  }
     184  
     185  #endif
     186  
     187  #endif  // DFSAN_INTERFACE_H