1  /* Proof-of-concept of a -fanalyzer plugin.
       2     Detect (some) uses of CPython API outside of the Global Interpreter Lock.
       3     https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
       4  */
       5  /* { dg-options "-g" } */
       6  
       7  #define INCLUDE_MEMORY
       8  #include "gcc-plugin.h"
       9  #include "config.h"
      10  #include "system.h"
      11  #include "coretypes.h"
      12  #include "make-unique.h"
      13  #include "diagnostic.h"
      14  #include "tree.h"
      15  #include "gimple.h"
      16  #include "gimple-iterator.h"
      17  #include "gimple-walk.h"
      18  #include "diagnostic-event-id.h"
      19  #include "analyzer/analyzer.h"
      20  #include "analyzer/analyzer-logging.h"
      21  #include "json.h"
      22  #include "analyzer/sm.h"
      23  #include "analyzer/pending-diagnostic.h"
      24  
      25  int plugin_is_GPL_compatible;
      26  
      27  #if ENABLE_ANALYZER
      28  
      29  namespace ana {
      30  
      31  static bool
      32  type_based_on_pyobject_p (tree type)
      33  {
      34    /* Ideally we'd also check for "subclasses" here by iterating up the
      35       first field of each struct.  */
      36    if (TREE_CODE (type) != RECORD_TYPE)
      37      return false;
      38    tree name = TYPE_IDENTIFIER (type);
      39    if (!name)
      40      return false;
      41    return id_equal (name, "PyObject");
      42  }
      43  
      44  /* An experimental state machine, for tracking whether the GIL is held,
      45     as global state..  */
      46  
      47  class gil_state_machine : public state_machine
      48  {
      49  public:
      50    gil_state_machine (logger *logger);
      51  
      52    bool inherited_state_p () const final override { return false; }
      53  
      54    bool on_stmt (sm_context *sm_ctxt,
      55  		const supernode *node,
      56  		const gimple *stmt) const final override;
      57  
      58    bool can_purge_p (state_t s) const final override;
      59  
      60    void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
      61  					     const supernode *node,
      62  					     const gimple *stmt,
      63  					     tree op) const;
      64  
      65   private:
      66    void check_for_pyobject_in_call (sm_context *sm_ctxt,
      67  				   const supernode *node,
      68  				   const gcall *call,
      69  				   tree callee_fndecl) const;
      70  
      71   public:
      72    /* These states are "global", rather than per-expression.  */
      73  
      74    /* State for when we've released the GIL.  */
      75    state_t m_released_gil;
      76  
      77    /* Stop state.  */
      78    state_t m_stop;
      79  };
      80  
      81  /* Subclass for diagnostics involving the GIL.  */
      82  
      83  class gil_diagnostic : public pending_diagnostic
      84  {
      85  public:
      86    /* There isn't a warning ID for us to use.  */
      87    int get_controlling_option () const final override
      88    {
      89      return 0;
      90    }
      91  
      92    location_t fixup_location (location_t loc,
      93  			     bool) const final override
      94    {
      95      /* Ideally we'd check for specific macros here, and only
      96         resolve certain macros.  */
      97      if (linemap_location_from_macro_expansion_p (line_table, loc))
      98        loc = linemap_resolve_location (line_table, loc,
      99  				      LRK_MACRO_EXPANSION_POINT, NULL);
     100      return loc;
     101    }
     102  
     103    label_text describe_state_change (const evdesc::state_change &change)
     104      final override
     105    {
     106      if (change.is_global_p ()
     107  	&& change.m_new_state == m_sm.m_released_gil)
     108        return change.formatted_print ("releasing the GIL here");
     109      if (change.is_global_p ()
     110  	&& change.m_new_state == m_sm.get_start_state ())
     111        return change.formatted_print ("acquiring the GIL here");
     112      return label_text ();
     113    }
     114  
     115    diagnostic_event::meaning
     116    get_meaning_for_state_change (const evdesc::state_change &change)
     117      const final override
     118    {
     119      if (change.is_global_p ())
     120        {
     121  	if (change.m_new_state == m_sm.m_released_gil)
     122  	  return diagnostic_event::meaning (diagnostic_event::VERB_release,
     123  					    diagnostic_event::NOUN_lock);
     124  	else if (change.m_new_state == m_sm.get_start_state ())
     125  	  return diagnostic_event::meaning (diagnostic_event::VERB_acquire,
     126  					    diagnostic_event::NOUN_lock);
     127        }
     128      return diagnostic_event::meaning ();
     129    }
     130   protected:
     131    gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
     132    {
     133    }
     134  
     135   private:
     136    const gil_state_machine &m_sm;
     137  };
     138  
     139  class double_save_thread : public gil_diagnostic
     140  {
     141   public:
     142    double_save_thread (const gil_state_machine &sm, const gcall *call)
     143    : gil_diagnostic (sm), m_call (call)
     144    {}
     145  
     146    const char *get_kind () const final override
     147    {
     148      return "double_save_thread";
     149    }
     150  
     151    bool subclass_equal_p (const pending_diagnostic &base_other) const override
     152    {
     153      const double_save_thread &sub_other
     154        = (const double_save_thread &)base_other;
     155      return m_call == sub_other.m_call;
     156    }
     157  
     158    bool emit (rich_location *rich_loc) final override
     159    {
     160      return warning_at (rich_loc, get_controlling_option (),
     161  		       "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
     162    }
     163  
     164    label_text describe_final_event (const evdesc::final_event &ev) final override
     165    {
     166      return ev.formatted_print ("nested usage of %qs here",
     167  			       "Py_BEGIN_ALLOW_THREADS");
     168    }
     169  
     170   private:
     171    const gcall *m_call;
     172  };
     173  
     174  class fncall_without_gil : public gil_diagnostic
     175  {
     176   public:
     177    fncall_without_gil (const gil_state_machine &sm, const gcall *call,
     178  		      tree callee_fndecl, unsigned arg_idx)
     179    : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
     180      m_arg_idx (arg_idx)
     181    {}
     182  
     183    const char *get_kind () const final override
     184    {
     185      return "fncall_without_gil";
     186    }
     187  
     188    bool subclass_equal_p (const pending_diagnostic &base_other) const override
     189    {
     190      const fncall_without_gil &sub_other
     191        = (const fncall_without_gil &)base_other;
     192      return (m_call == sub_other.m_call
     193  	    && m_callee_fndecl == sub_other.m_callee_fndecl
     194  	    && m_arg_idx == sub_other.m_arg_idx);
     195    }
     196  
     197    bool emit (rich_location *rich_loc) final override
     198    {
     199      auto_diagnostic_group d;
     200      if (m_callee_fndecl)
     201        return warning_at (rich_loc, get_controlling_option (),
     202  			 "use of PyObject as argument %i of %qE"
     203  			 " without the GIL",
     204  			 m_arg_idx + 1, m_callee_fndecl);
     205      else
     206        return warning_at (rich_loc, get_controlling_option (),
     207  			 "use of PyObject as argument %i of call"
     208  			 " without the GIL",
     209  			 m_arg_idx + 1, m_callee_fndecl);
     210    }
     211  
     212    label_text describe_final_event (const evdesc::final_event &ev) final override
     213    {
     214      if (m_callee_fndecl)
     215        return ev.formatted_print ("use of PyObject as argument %i of %qE here"
     216  				 " without the GIL",
     217  				 m_arg_idx + 1, m_callee_fndecl);
     218      else
     219        return ev.formatted_print ("use of PyObject as argument %i of call here"
     220  				 " without the GIL",
     221  				 m_arg_idx + 1, m_callee_fndecl);
     222    }
     223  
     224   private:
     225    const gcall *m_call;
     226    tree m_callee_fndecl;
     227    unsigned m_arg_idx;
     228  };
     229  
     230  class pyobject_usage_without_gil : public gil_diagnostic
     231  {
     232   public:
     233    pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
     234    : gil_diagnostic (sm), m_expr (expr)
     235    {}
     236  
     237    const char *get_kind () const final override
     238    {
     239      return "pyobject_usage_without_gil";
     240    }
     241  
     242    bool subclass_equal_p (const pending_diagnostic &base_other) const override
     243    {
     244      return same_tree_p (m_expr,
     245  			((const pyobject_usage_without_gil&)base_other).m_expr);
     246    }
     247  
     248    bool emit (rich_location *rich_loc) final override
     249    {
     250      auto_diagnostic_group d;
     251      return warning_at (rich_loc, get_controlling_option (),
     252  		       "use of PyObject %qE without the GIL", m_expr);
     253    }
     254  
     255    label_text describe_final_event (const evdesc::final_event &ev) final override
     256    {
     257      return ev.formatted_print ("PyObject %qE used here without the GIL",
     258  			       m_expr);
     259    }
     260  
     261   private:
     262    tree m_expr;
     263  };
     264  
     265  /* gil_state_machine's ctor.  */
     266  
     267  gil_state_machine::gil_state_machine (logger *logger)
     268  : state_machine ("gil", logger)
     269  {
     270    m_released_gil = add_state ("released_gil");
     271    m_stop = add_state ("stop");
     272  }
     273  
     274  struct cb_data
     275  {
     276    cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
     277  	   const supernode *snode, const gimple *stmt)
     278    : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
     279    {
     280    }
     281  
     282    const gil_state_machine &m_sm;
     283    sm_context *m_sm_ctxt;
     284    const supernode *m_snode;
     285    const gimple *m_stmt;
     286  };
     287  
     288  static bool
     289  check_for_pyobject (gimple *, tree op, tree, void *data)
     290  {
     291    cb_data *d = (cb_data *)data;
     292    d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
     293  						d->m_stmt, op);
     294    return true;
     295  }
     296  
     297  /* Assuming that the GIL has been released, complain about any
     298     PyObject * arguments passed to CALL.  */
     299  
     300  void
     301  gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
     302  					       const supernode *node,
     303  					       const gcall *call,
     304  					       tree callee_fndecl) const
     305  {
     306    for (unsigned i = 0; i < gimple_call_num_args (call); i++)
     307      {
     308        tree arg = gimple_call_arg (call, i);
     309        if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
     310  	continue;
     311        tree type = TREE_TYPE (TREE_TYPE (arg));
     312        if (type_based_on_pyobject_p (type))
     313  	{
     314  	  sm_ctxt->warn (node, call, NULL_TREE,
     315  			 make_unique<fncall_without_gil> (*this, call,
     316  							  callee_fndecl,
     317  							  i));
     318  	  sm_ctxt->set_global_state (m_stop);
     319  	}
     320      }
     321  }
     322  
     323  /* Implementation of state_machine::on_stmt vfunc for gil_state_machine.  */
     324  
     325  bool
     326  gil_state_machine::on_stmt (sm_context *sm_ctxt,
     327  			    const supernode *node,
     328  			    const gimple *stmt) const
     329  {
     330    const state_t global_state = sm_ctxt->get_global_state ();
     331    if (const gcall *call = dyn_cast <const gcall *> (stmt))
     332      {
     333        if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
     334  	{
     335  	  if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
     336  	    {
     337  	      if (0)
     338  		inform (input_location, "found call to %qs",
     339  			"PyEval_SaveThread");
     340  	      if (global_state == m_released_gil)
     341  		{
     342  		  sm_ctxt->warn (node, stmt, NULL_TREE,
     343  				 make_unique<double_save_thread> (*this, call));
     344  		  sm_ctxt->set_global_state (m_stop);
     345  		}
     346  	      else
     347  		sm_ctxt->set_global_state (m_released_gil);
     348  	      return true;
     349  	    }
     350  	  else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
     351  				    call, 1))
     352  	    {
     353  	      if (0)
     354  		inform (input_location, "found call to %qs",
     355  			"PyEval_SaveThread");
     356  	      if (global_state == m_released_gil)
     357  		sm_ctxt->set_global_state (m_start);
     358  	      return true;
     359  	    }
     360  	  else if (global_state == m_released_gil)
     361  	    {
     362  	      /* Find PyObject * args of calls to fns with unknown bodies.  */
     363  	      if (!fndecl_has_gimple_body_p (callee_fndecl))
     364  		check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
     365  	    }
     366  	}
     367        else if (global_state == m_released_gil)
     368  	check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
     369      }
     370    else
     371      if (global_state == m_released_gil)
     372        {
     373  	/* Walk the stmt, finding uses of PyObject (or "subclasses").  */
     374  	cb_data d (*this, sm_ctxt, node, stmt);
     375  	walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
     376  				       check_for_pyobject,
     377  				       check_for_pyobject,
     378  				       check_for_pyobject);
     379      }
     380    return false;
     381  }
     382  
     383  bool
     384  gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
     385  {
     386    return true;
     387  }
     388  
     389  void
     390  gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
     391  							 const supernode *node,
     392  							 const gimple *stmt,
     393  							 tree op) const
     394  {
     395    tree type = TREE_TYPE (op);
     396    if (type_based_on_pyobject_p (type))
     397      {
     398        sm_ctxt->warn (node, stmt, NULL_TREE,
     399  		     make_unique<pyobject_usage_without_gil> (*this, op));
     400        sm_ctxt->set_global_state (m_stop);
     401      }
     402  }
     403  
     404  /* Callback handler for the PLUGIN_ANALYZER_INIT event.  */
     405  
     406  static void
     407  gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
     408  {
     409    ana::plugin_analyzer_init_iface *iface
     410      = (ana::plugin_analyzer_init_iface *)gcc_data;
     411    LOG_SCOPE (iface->get_logger ());
     412    if (0)
     413      inform (input_location, "got here: gil_analyzer_init_cb");
     414    iface->register_state_machine
     415      (make_unique<gil_state_machine> (iface->get_logger ()));
     416  }
     417  
     418  } // namespace ana
     419  
     420  #endif /* #if ENABLE_ANALYZER */
     421  
     422  int
     423  plugin_init (struct plugin_name_args *plugin_info,
     424  	     struct plugin_gcc_version *version)
     425  {
     426  #if ENABLE_ANALYZER
     427    const char *plugin_name = plugin_info->base_name;
     428    if (0)
     429      inform (input_location, "got here; %qs", plugin_name);
     430    register_callback (plugin_info->base_name,
     431  		     PLUGIN_ANALYZER_INIT,
     432  		     ana::gil_analyzer_init_cb,
     433  		     NULL); /* void *user_data */
     434  #else
     435    sorry_no_analyzer ();
     436  #endif
     437    return 0;
     438  }