(root)/
gcc-13.2.0/
gcc/
config/
riscv/
riscv-vsetvl.h
       1  /* VSETVL pass header for RISC-V 'V' Extension for GNU compiler.
       2     Copyright (C) 2022-2023 Free Software Foundation, Inc.
       3     Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
       4  
       5  This file is part of GCC.
       6  
       7  GCC is free software; you can redistribute it and/or modify
       8  it under the terms of the GNU General Public License as published by
       9  the Free Software Foundation; either version 3, or(at your option)
      10  any later version.
      11  
      12  GCC is distributed in the hope that it will be useful,
      13  but WITHOUT ANY WARRANTY; without even the implied warranty of
      14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15  GNU General Public License for more details.
      16  
      17  You should have received a copy of the GNU General Public License
      18  along with GCC; see the file COPYING3.  If not see
      19  <http://www.gnu.org/licenses/>.  */
      20  
      21  #ifndef GCC_RISCV_VSETVL_H
      22  #define GCC_RISCV_VSETVL_H
      23  
      24  #define IS_AGNOSTIC(VALUE) (bool) (VALUE & 0x1 || (VALUE >> 1 & 0x1))
      25  
      26  namespace riscv_vector {
      27  
      28  /* Classification of vsetvl instruction.  */
      29  enum vsetvl_type
      30  {
      31    VSETVL_NORMAL,
      32    VSETVL_VTYPE_CHANGE_ONLY,
      33    VSETVL_DISCARD_RESULT,
      34    NUM_VSETVL_TYPE
      35  };
      36  
      37  enum emit_type
      38  {
      39    /* emit_insn directly.  */
      40    EMIT_DIRECT,
      41    EMIT_BEFORE,
      42    EMIT_AFTER,
      43  };
      44  
      45  enum demand_type
      46  {
      47    DEMAND_AVL,
      48    DEMAND_SEW,
      49    DEMAND_LMUL,
      50    DEMAND_RATIO,
      51    DEMAND_NONZERO_AVL,
      52    DEMAND_GE_SEW,
      53    DEMAND_TAIL_POLICY,
      54    DEMAND_MASK_POLICY,
      55    NUM_DEMAND
      56  };
      57  
      58  enum demand_status
      59  {
      60    DEMAND_FALSE,
      61    DEMAND_TRUE,
      62    DEMAND_ANY,
      63  };
      64  
      65  enum fusion_type
      66  {
      67    INVALID_FUSION,
      68    VALID_AVL_FUSION,
      69    KILLED_AVL_FUSION
      70  };
      71  
      72  enum merge_type
      73  {
      74    LOCAL_MERGE,
      75    GLOBAL_MERGE
      76  };
      77  
      78  enum def_type
      79  {
      80    REAL_SET = 1 << 0,
      81    PHI_SET = 1 << 1,
      82    BB_HEAD_SET = 1 << 2,
      83    BB_END_SET = 1 << 3,
      84    /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
      85       PHI_SET, BB_HEAD_SET, BB_END_SET and
      86       CLOBBER_DEF def_info types. Currently,
      87       we conservatively do not optimize clobber
      88       def since we don't see the case that we
      89       need to optimize it.  */
      90    CLOBBER_DEF = 1 << 4
      91  };
      92  
      93  /* AVL info for RVV instruction. Most RVV instructions have AVL operand in
      94     implicit dependency. The AVL comparison between 2 RVV instructions is
      95     very important since it affects our decision whether we should insert
      96     a vsetvl instruction in this situation. AVL operand of all RVV instructions
      97     can only be either a const_int value with < 32 or a reg value which can be
      98     define by either a real RTL instruction or a PHI instruction. So we need a
      99     standalone method to define AVL comparison and we can not simpily use
     100     operator "==" to compare 2 RTX value since it's to strict which will make
     101     use miss a lot of optimization opportunities. This method handle these
     102     following cases:
     103  
     104       -  Background:
     105  	  Insert-vsetvl PASS is working after RA.
     106  
     107       -  Terminology:
     108  	  - pr: Pseudo-register.
     109  	  - hr: Hardware-register.
     110  
     111       -  Case 1:
     112  
     113  	Before RA:
     114  	  li pr138,13
     115  	  insn1 (implicit depend on pr138).
     116  	  li pr138,14
     117  	  insn2 (implicit depend on pr139).
     118  
     119  	After RA:
     120  	  li hr5,13
     121  	  insn1 (implicit depend on hr5).
     122  	  li hr5,14
     123  	  insn2 (implicit depend on hr5).
     124  
     125  	Correct IR after vsetvl PASS:
     126  	  li hr5,13
     127  	  vsetvl1 zero,hr5....
     128  	  insn1 (implicit depend on hr5).
     129  	  li hr5,14
     130  	  vsetvl2 zero,hr5....
     131  	  insn2 (implicit depend on hr5).
     132  
     133       In this case, both insn1 and insn2 are using hr5 as the same AVL.
     134       If we use "rtx_equal_p" or "REGNO (AVL1) == REGNO (AVL)", we will end
     135       up with missing the vsetvl2 instruction which creates wrong result.
     136  
     137       Note: Using "==" operator to compare 2 AVL RTX strictly can fix this
     138       issue. However, it is a too strict comparison method since not all member
     139       variables in RTX data structure are not neccessary to be the same. It will
     140       make us miss a lot of optimization opportunities.
     141  
     142       -  Case 2:
     143  
     144  	After RA:
     145  	bb 0:
     146  	  li hr5,13
     147  	bb 1:
     148  	  li hr5,14
     149  	bb2:
     150  	  insn1 (implicit depend on hr5).
     151  	  insn2 (implicit depend on hr5).
     152  
     153       In this case, we may end up with different AVL RTX and produce redundant
     154       vsetvl instruction.
     155  
     156       VALUE is the implicit dependency in each RVV instruction.
     157       SOURCE is the source definition information of AVL operand.  */
     158  class avl_info
     159  {
     160  private:
     161    rtx m_value;
     162    rtl_ssa::set_info *m_source;
     163  
     164  public:
     165    avl_info () : m_value (NULL_RTX), m_source (nullptr) {}
     166    avl_info (const avl_info &);
     167    avl_info (rtx, rtl_ssa::set_info *);
     168    rtx get_value () const { return m_value; }
     169    rtl_ssa::set_info *get_source () const { return m_source; }
     170    bool single_source_equal_p (const avl_info &) const;
     171    bool multiple_source_equal_p (const avl_info &) const;
     172    avl_info &operator= (const avl_info &);
     173    bool operator== (const avl_info &) const;
     174    bool operator!= (const avl_info &) const;
     175  
     176    bool has_avl_imm () const
     177    {
     178      return get_value () && CONST_INT_P (get_value ());
     179    }
     180    bool has_avl_reg () const { return get_value () && REG_P (get_value ()); }
     181    bool has_avl_no_reg () const { return !get_value (); }
     182    bool has_non_zero_avl () const;
     183  };
     184  
     185  /* Basic structure to save VL/VTYPE information.  */
     186  struct vl_vtype_info
     187  {
     188  protected:
     189    /* AVL can be either register or const_int.  */
     190    avl_info m_avl;
     191    /* Fields from VTYPE. The VTYPE checking depend on the flag
     192       dem_* before.  */
     193    uint8_t m_sew;
     194    riscv_vector::vlmul_type m_vlmul;
     195    uint8_t m_ratio;
     196    bool m_ta;
     197    bool m_ma;
     198  
     199  public:
     200    void set_sew (uint8_t sew) { m_sew = sew; }
     201    void set_vlmul (riscv_vector::vlmul_type vlmul) { m_vlmul = vlmul; }
     202    void set_ratio (uint8_t ratio) { m_ratio = ratio; }
     203    void set_ta (bool ta) { m_ta = ta; }
     204    void set_ma (bool ma) { m_ma = ma; }
     205  
     206    vl_vtype_info ()
     207      : m_avl (avl_info ()), m_sew (0), m_vlmul (riscv_vector::LMUL_RESERVED),
     208        m_ratio (0), m_ta (0), m_ma (0)
     209    {}
     210    vl_vtype_info (const vl_vtype_info &) = default;
     211    vl_vtype_info &operator= (const vl_vtype_info &) = default;
     212    vl_vtype_info (avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool,
     213  		 bool);
     214  
     215    bool operator== (const vl_vtype_info &) const;
     216    bool operator!= (const vl_vtype_info &) const;
     217  
     218    bool has_avl_imm () const { return m_avl.has_avl_imm (); }
     219    bool has_avl_reg () const { return m_avl.has_avl_reg (); }
     220    bool has_avl_no_reg () const { return m_avl.has_avl_no_reg (); }
     221    bool has_non_zero_avl () const { return m_avl.has_non_zero_avl (); };
     222  
     223    rtx get_avl () const { return m_avl.get_value (); }
     224    const avl_info &get_avl_info () const { return m_avl; }
     225    rtl_ssa::set_info *get_avl_source () const { return m_avl.get_source (); }
     226    void set_avl_info (const avl_info &avl) { m_avl = avl; }
     227    uint8_t get_sew () const { return m_sew; }
     228    riscv_vector::vlmul_type get_vlmul () const { return m_vlmul; }
     229    uint8_t get_ratio () const { return m_ratio; }
     230    bool get_ta () const { return m_ta; }
     231    bool get_ma () const { return m_ma; }
     232  
     233    bool same_avl_p (const vl_vtype_info &) const;
     234    bool same_vtype_p (const vl_vtype_info &) const;
     235    bool same_vlmax_p (const vl_vtype_info &) const;
     236  };
     237  
     238  class vector_insn_info : public vl_vtype_info
     239  {
     240  private:
     241    enum state_type
     242    {
     243      UNINITIALIZED,
     244      VALID,
     245      UNKNOWN,
     246      EMPTY,
     247      /* The empty block can not be polluted as dirty.  */
     248      HARD_EMPTY,
     249  
     250      /* The block is polluted as containing VSETVL instruction during dem
     251         backward propagation to gain better LCM optimization even though
     252         such VSETVL instruction is not really emit yet during this time.  */
     253      DIRTY,
     254      /* The block is polluted with killed AVL.
     255         We will backward propagate such case:
     256  	 bb 0: def a5, 55 (empty).
     257  	 ...
     258  	 bb 1: vsetvli zero, a5.
     259  	 ...
     260  	 bb 2: empty.
     261  	 ...
     262  	 bb 3: def a3, 55 (empty).
     263  	 ...
     264  	 bb 4: vsetvli zero, a3.
     265  
     266         To elide vsetvli in bb 4, we need to backward pollute bb 3 and bb 2
     267         as DIRTY block as long as there is a block def AVL which has the same
     268         source with AVL in bb 4. Such polluted block, we call it as
     269         DIRTY_WITH_KILLED_AVL
     270      */
     271      DIRTY_WITH_KILLED_AVL
     272    };
     273  
     274    enum state_type m_state;
     275  
     276    bool m_demands[NUM_DEMAND];
     277  
     278    /* TODO: Assume INSN1 = INSN holding of definition of AVL.
     279  		  INSN2 = INSN that is inserted a vsetvl insn before.
     280       We may need to add a new member to save INSN of holding AVL.
     281       m_insn is holding the INSN that is inserted a vsetvl insn before in
     282       Phase 2. Ideally, most of the time INSN1 == INSN2. However, considering
     283       such case:
     284  
     285  	vmv.x.s (INSN2)
     286  	vle8.v (INSN1)
     287  
     288       If these 2 instructions are compatible, we should only issue a vsetvl INSN
     289       (with AVL included) before vmv.x.s, but vmv.x.s is not the INSN holding the
     290       definition of AVL.  */
     291    rtl_ssa::insn_info *m_insn;
     292  
     293    /* Parse the instruction to get VL/VTYPE information and demanding
     294     * information.  */
     295    /* This is only called by simple_vsetvl subroutine when optimize == 0.
     296       Since RTL_SSA can not be enabled when optimize == 0, we don't initialize
     297       the m_insn.  */
     298    void parse_insn (rtx_insn *);
     299  
     300    friend class vector_infos_manager;
     301  
     302  public:
     303    vector_insn_info ()
     304      : vl_vtype_info (), m_state (UNINITIALIZED), m_demands{false},
     305        m_insn (nullptr)
     306    {}
     307  
     308    /* This is only called by lazy_vsetvl subroutine when optimize > 0.
     309       We use RTL_SSA framework to initialize the insn_info.  */
     310    void parse_insn (rtl_ssa::insn_info *);
     311  
     312    bool operator>= (const vector_insn_info &) const;
     313    bool operator== (const vector_insn_info &) const;
     314  
     315    bool uninit_p () const { return m_state == UNINITIALIZED; }
     316    bool valid_p () const { return m_state == VALID; }
     317    bool unknown_p () const { return m_state == UNKNOWN; }
     318    bool empty_p () const { return m_state == EMPTY || m_state == HARD_EMPTY; }
     319    bool hard_empty_p () const { return m_state == HARD_EMPTY; }
     320    bool dirty_p () const
     321    {
     322      return m_state == DIRTY || m_state == DIRTY_WITH_KILLED_AVL;
     323    }
     324    bool dirty_with_killed_avl_p () const
     325    {
     326      return m_state == DIRTY_WITH_KILLED_AVL;
     327    }
     328    bool real_dirty_p () const { return m_state == DIRTY; }
     329    bool valid_or_dirty_p () const
     330    {
     331      return m_state == VALID || m_state == DIRTY
     332  	   || m_state == DIRTY_WITH_KILLED_AVL;
     333    }
     334    bool available_p (const vector_insn_info &) const;
     335  
     336    static vector_insn_info get_unknown ()
     337    {
     338      vector_insn_info info;
     339      info.set_unknown ();
     340      return info;
     341    }
     342  
     343    static vector_insn_info get_hard_empty ()
     344    {
     345      vector_insn_info info;
     346      info.set_hard_empty ();
     347      return info;
     348    }
     349  
     350    void set_valid () { m_state = VALID; }
     351    void set_unknown () { m_state = UNKNOWN; }
     352    void set_empty () { m_state = EMPTY; }
     353    void set_hard_empty () { m_state = HARD_EMPTY; }
     354    void set_dirty (enum fusion_type type)
     355    {
     356      gcc_assert (type == VALID_AVL_FUSION || type == KILLED_AVL_FUSION);
     357      if (type == VALID_AVL_FUSION)
     358        m_state = DIRTY;
     359      else
     360        m_state = DIRTY_WITH_KILLED_AVL;
     361    }
     362    void set_dirty (bool dirty_with_killed_avl_p)
     363    {
     364      if (dirty_with_killed_avl_p)
     365        m_state = DIRTY_WITH_KILLED_AVL;
     366      else
     367        m_state = DIRTY;
     368    }
     369    void set_insn (rtl_ssa::insn_info *insn) { m_insn = insn; }
     370  
     371    bool demand_p (enum demand_type type) const { return m_demands[type]; }
     372    void demand (enum demand_type type) { m_demands[type] = true; }
     373    void set_demand (enum demand_type type, bool value)
     374    {
     375      m_demands[type] = value;
     376    }
     377    void fuse_avl (const vector_insn_info &, const vector_insn_info &);
     378    void fuse_sew_lmul (const vector_insn_info &, const vector_insn_info &);
     379    void fuse_tail_policy (const vector_insn_info &, const vector_insn_info &);
     380    void fuse_mask_policy (const vector_insn_info &, const vector_insn_info &);
     381  
     382    bool compatible_p (const vector_insn_info &) const;
     383    bool skip_avl_compatible_p (const vector_insn_info &) const;
     384    bool compatible_avl_p (const vl_vtype_info &) const;
     385    bool compatible_avl_p (const avl_info &) const;
     386    bool compatible_vtype_p (const vl_vtype_info &) const;
     387    bool compatible_p (const vl_vtype_info &) const;
     388    vector_insn_info merge (const vector_insn_info &, enum merge_type) const;
     389  
     390    rtl_ssa::insn_info *get_insn () const { return m_insn; }
     391    const bool *get_demands (void) const { return m_demands; }
     392    rtx get_avl_reg_rtx (void) const
     393    {
     394      return gen_rtx_REG (Pmode, get_avl_source ()->regno ());
     395    }
     396    bool update_fault_first_load_avl (rtl_ssa::insn_info *);
     397  
     398    void dump (FILE *) const;
     399  };
     400  
     401  struct vector_block_info
     402  {
     403    /* The local_dem vector insn_info of the block.  */
     404    vector_insn_info local_dem;
     405  
     406    /* The reaching_out vector insn_info of the block.  */
     407    vector_insn_info reaching_out;
     408  
     409    /* The static execute probability of the demand info.  */
     410    profile_probability probability;
     411  
     412    vector_block_info () = default;
     413  };
     414  
     415  class vector_infos_manager
     416  {
     417  public:
     418    auto_vec<vector_insn_info> vector_insn_infos;
     419    auto_vec<vector_block_info> vector_block_infos;
     420    auto_vec<vector_insn_info *> vector_exprs;
     421    hash_set<rtx_insn *> to_refine_vsetvls;
     422    hash_set<rtx_insn *> to_delete_vsetvls;
     423  
     424    struct edge_list *vector_edge_list;
     425    sbitmap *vector_kill;
     426    sbitmap *vector_del;
     427    sbitmap *vector_insert;
     428    sbitmap *vector_antic;
     429    sbitmap *vector_transp;
     430    sbitmap *vector_comp;
     431    sbitmap *vector_avin;
     432    sbitmap *vector_avout;
     433  
     434    vector_infos_manager ();
     435  
     436    /* Create a new expr in expr list if it is not exist.  */
     437    void create_expr (vector_insn_info &);
     438  
     439    /* Get the expr id of the pair of expr.  */
     440    size_t get_expr_id (const vector_insn_info &) const;
     441  
     442    /* Return the number of expr that is set in the bitmap.  */
     443    size_t expr_set_num (sbitmap) const;
     444  
     445    /* Get all relaxer expression id for corresponding vector info.  */
     446    auto_vec<size_t> get_all_available_exprs (const vector_insn_info &) const;
     447  
     448    /* Return true if all expression set in bitmap are same AVL.  */
     449    bool all_same_avl_p (const basic_block, sbitmap) const;
     450  
     451    /* Return true if all expression set in bitmap are same ratio.  */
     452    bool all_same_ratio_p (sbitmap) const;
     453  
     454    void release (void);
     455    void create_bitmap_vectors (void);
     456    void free_bitmap_vectors (void);
     457  
     458    void dump (FILE *) const;
     459  };
     460  
     461  struct demands_pair
     462  {
     463    demand_status first[NUM_DEMAND];
     464    demand_status second[NUM_DEMAND];
     465    bool match_cond_p (const bool *dems1, const bool *dems2) const
     466    {
     467      for (unsigned i = 0; i < NUM_DEMAND; i++)
     468        {
     469  	if (first[i] != DEMAND_ANY && first[i] != dems1[i])
     470  	  return false;
     471  	if (second[i] != DEMAND_ANY && second[i] != dems2[i])
     472  	  return false;
     473        }
     474      return true;
     475    }
     476  };
     477  
     478  struct demands_cond
     479  {
     480    demands_pair pair;
     481    using CONDITION_TYPE
     482      = bool (*) (const vector_insn_info &, const vector_insn_info &);
     483    CONDITION_TYPE incompatible_p;
     484    bool dual_incompatible_p (const vector_insn_info &info1,
     485  			    const vector_insn_info &info2) const
     486    {
     487      return ((pair.match_cond_p (info1.get_demands (), info2.get_demands ())
     488  	     && incompatible_p (info1, info2))
     489  	    || (pair.match_cond_p (info2.get_demands (), info1.get_demands ())
     490  		&& incompatible_p (info2, info1)));
     491    }
     492  };
     493  
     494  struct demands_fuse_rule
     495  {
     496    demands_pair pair;
     497    bool demand_sew_p;
     498    bool demand_lmul_p;
     499    bool demand_ratio_p;
     500    bool demand_ge_sew_p;
     501  
     502    using NEW_SEW
     503      = unsigned (*) (const vector_insn_info &, const vector_insn_info &);
     504    using NEW_VLMUL
     505      = vlmul_type (*) (const vector_insn_info &, const vector_insn_info &);
     506    using NEW_RATIO
     507      = unsigned (*) (const vector_insn_info &, const vector_insn_info &);
     508    NEW_SEW new_sew;
     509    NEW_VLMUL new_vlmul;
     510    NEW_RATIO new_ratio;
     511  };
     512  
     513  } // namespace riscv_vector
     514  #endif