1  /* A class for building vector constant patterns.
       2     Copyright (C) 2017-2023 Free Software Foundation, Inc.
       3  
       4  This file is part of GCC.
       5  
       6  GCC is free software; you can redistribute it and/or modify it under
       7  the terms of the GNU General Public License as published by the Free
       8  Software Foundation; either version 3, or (at your option) any later
       9  version.
      10  
      11  GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12  WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14  for more details.
      15  
      16  You should have received a copy of the GNU General Public License
      17  along with GCC; see the file COPYING3.  If not see
      18  <http://www.gnu.org/licenses/>.  */
      19  
      20  #ifndef GCC_VECTOR_BUILDER_H
      21  #define GCC_VECTOR_BUILDER_H
      22  
      23  /* This class is a wrapper around auto_vec<T> for building vectors of T.
      24     It aims to encode each vector as npatterns interleaved patterns,
      25     where each pattern represents a sequence:
      26  
      27       { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
      28  
      29     The first three elements in each pattern provide enough information
      30     to derive the other elements.  If all patterns have a STEP of zero,
      31     we only need to encode the first two elements in each pattern.
      32     If BASE1 is also equal to BASE0 for all patterns, we only need to
      33     encode the first element in each pattern.  The number of encoded
      34     elements per pattern is given by nelts_per_pattern.
      35  
      36     The class can be used in two ways:
      37  
      38     1. It can be used to build a full image of the vector, which is then
      39        canonicalized by finalize ().  In this case npatterns is initially
      40        the number of elements in the vector and nelts_per_pattern is
      41        initially 1.
      42  
      43     2. It can be used to build a vector that already has a known encoding.
      44        This is preferred since it is more efficient and copes with
      45        variable-length vectors.  finalize () then canonicalizes the encoding
      46        to a simpler form if possible.
      47  
      48     Shape is the type that specifies the number of elements in the vector
      49     and (where relevant) the type of each element.
      50  
      51     The derived class Derived provides the functionality of this class
      52     for specific Ts.  Derived needs to provide the following interface:
      53  
      54        bool equal_p (T elt1, T elt2) const;
      55  
      56  	  Return true if elements ELT1 and ELT2 are equal.
      57  
      58        bool allow_steps_p () const;
      59  
      60  	  Return true if a stepped representation is OK.  We don't allow
      61  	  linear series for anything other than integers, to avoid problems
      62  	  with rounding.
      63  
      64        bool integral_p (T elt) const;
      65  
      66  	  Return true if element ELT can be interpreted as an integer.
      67  
      68        StepType step (T elt1, T elt2) const;
      69  
      70  	  Return the value of element ELT2 minus the value of element ELT1,
      71  	  given integral_p (ELT1) && integral_p (ELT2).  There is no fixed
      72  	  choice of StepType.
      73  
      74        T apply_step (T base, unsigned int factor, StepType step) const;
      75  
      76  	  Return a vector element with the value BASE + FACTOR * STEP.
      77  
      78        bool can_elide_p (T elt) const;
      79  
      80  	  Return true if we can drop element ELT, even if the retained
      81  	  elements are different.  This is provided for TREE_OVERFLOW
      82  	  handling.
      83  
      84        void note_representative (T *elt1_ptr, T elt2);
      85  
      86  	  Record that ELT2 is being elided, given that ELT1_PTR points to
      87  	  the last encoded element for the containing pattern.  This is
      88  	  again provided for TREE_OVERFLOW handling.
      89  
      90        static poly_uint64 shape_nelts (Shape shape);
      91  
      92  	  Return the number of elements in SHAPE.
      93  
      94      The class provides additional functionality for the case in which
      95      T can describe a vector constant as well as an individual element.
      96      This functionality requires:
      97  
      98        static poly_uint64 nelts_of (T x);
      99  
     100  	  Return the number of elements in vector constant X.
     101  
     102        static unsigned int npatterns_of (T x);
     103  
     104  	  Return the number of patterns used to encode vector constant X.
     105  
     106        static unsigned int nelts_per_pattern_of (T x);
     107  
     108  	  Return the number of elements used to encode each pattern
     109  	  in vector constant X.  */
     110  
     111  template<typename T, typename Shape, typename Derived>
     112  class vector_builder : public auto_vec<T, 32>
     113  {
     114  public:
     115    vector_builder ();
     116  
     117    poly_uint64 full_nelts () const { return m_full_nelts; }
     118    unsigned int npatterns () const { return m_npatterns; }
     119    unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
     120    unsigned int encoded_nelts () const;
     121    bool encoded_full_vector_p () const;
     122    T elt (unsigned int) const;
     123    unsigned int count_dups (int, int, int) const;
     124  
     125    bool operator == (const Derived &) const;
     126    bool operator != (const Derived &x) const { return !operator == (x); }
     127  
     128    bool new_unary_operation (Shape, T, bool);
     129    bool new_binary_operation (Shape, T, T, bool);
     130  
     131    void finalize ();
     132  
     133    static unsigned int binary_encoded_nelts (T, T);
     134  
     135  protected:
     136    void new_vector (poly_uint64, unsigned int, unsigned int);
     137    void reshape (unsigned int, unsigned int);
     138    bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
     139    bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
     140    bool try_npatterns (unsigned int);
     141  
     142  private:
     143    vector_builder (const vector_builder &);
     144    vector_builder &operator= (const vector_builder &);
     145    Derived *derived () { return static_cast<Derived *> (this); }
     146    const Derived *derived () const;
     147  
     148    poly_uint64 m_full_nelts;
     149    unsigned int m_npatterns;
     150    unsigned int m_nelts_per_pattern;
     151  };
     152  
     153  template<typename T, typename Shape, typename Derived>
     154  inline const Derived *
     155  vector_builder<T, Shape, Derived>::derived () const
     156  {
     157    return static_cast<const Derived *> (this);
     158  }
     159  
     160  template<typename T, typename Shape, typename Derived>
     161  inline
     162  vector_builder<T, Shape, Derived>::vector_builder ()
     163    : m_full_nelts (0),
     164      m_npatterns (0),
     165      m_nelts_per_pattern (0)
     166  {}
     167  
     168  /* Return the number of elements that are explicitly encoded.  The vec
     169     starts with these explicitly-encoded elements and may contain additional
     170     elided elements.  */
     171  
     172  template<typename T, typename Shape, typename Derived>
     173  inline unsigned int
     174  vector_builder<T, Shape, Derived>::encoded_nelts () const
     175  {
     176    return m_npatterns * m_nelts_per_pattern;
     177  }
     178  
     179  /* Return true if every element of the vector is explicitly encoded.  */
     180  
     181  template<typename T, typename Shape, typename Derived>
     182  inline bool
     183  vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
     184  {
     185    return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
     186  }
     187  
     188  /* Start building a vector that has FULL_NELTS elements.  Initially
     189     encode it using NPATTERNS patterns with NELTS_PER_PATTERN each.  */
     190  
     191  template<typename T, typename Shape, typename Derived>
     192  void
     193  vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
     194  					       unsigned int npatterns,
     195  					       unsigned int nelts_per_pattern)
     196  {
     197    m_full_nelts = full_nelts;
     198    m_npatterns = npatterns;
     199    m_nelts_per_pattern = nelts_per_pattern;
     200    this->reserve (encoded_nelts ());
     201    this->truncate (0);
     202  }
     203  
     204  /* Return true if this vector and OTHER have the same elements and
     205     are encoded in the same way.  */
     206  
     207  template<typename T, typename Shape, typename Derived>
     208  bool
     209  vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
     210  {
     211    if (maybe_ne (m_full_nelts, other.m_full_nelts)
     212        || m_npatterns != other.m_npatterns
     213        || m_nelts_per_pattern != other.m_nelts_per_pattern)
     214      return false;
     215  
     216    unsigned int nelts = encoded_nelts ();
     217    for (unsigned int i = 0; i < nelts; ++i)
     218      if (!derived ()->equal_p ((*this)[i], other[i]))
     219        return false;
     220  
     221    return true;
     222  }
     223  
     224  /* Return the value of vector element I, which might or might not be
     225     encoded explicitly.  */
     226  
     227  template<typename T, typename Shape, typename Derived>
     228  T
     229  vector_builder<T, Shape, Derived>::elt (unsigned int i) const
     230  {
     231    /* First handle elements that are already present in the underlying
     232       vector, regardless of whether they're part of the encoding or not.  */
     233    if (i < this->length ())
     234      return (*this)[i];
     235  
     236    /* Extrapolation is only possible if the encoding has been fully
     237       populated.  */
     238    gcc_checking_assert (encoded_nelts () <= this->length ());
     239  
     240    /* Identify the pattern that contains element I and work out the index of
     241       the last encoded element for that pattern.  */
     242    unsigned int pattern = i % m_npatterns;
     243    unsigned int count = i / m_npatterns;
     244    unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
     245    T final = (*this)[final_i];
     246  
     247    /* If there are no steps, the final encoded value is the right one.  */
     248    if (m_nelts_per_pattern <= 2)
     249      return final;
     250  
     251    /* Otherwise work out the value from the last two encoded elements.  */
     252    T prev = (*this)[final_i - m_npatterns];
     253    return derived ()->apply_step (final, count - 2,
     254  				 derived ()->step (prev, final));
     255  }
     256  
     257  /* Try to start building a new vector of shape SHAPE that holds the result of
     258     a unary operation on vector constant VEC.  ALLOW_STEPPED_P is true if the
     259     operation can handle stepped encodings directly, without having to expand
     260     the full sequence.
     261  
     262     Return true if the operation is possible, which it always is when
     263     ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
     264  
     265  template<typename T, typename Shape, typename Derived>
     266  bool
     267  vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
     268  							bool allow_stepped_p)
     269  {
     270    poly_uint64 full_nelts = Derived::shape_nelts (shape);
     271    gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
     272    unsigned int npatterns = Derived::npatterns_of (vec);
     273    unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
     274    if (!allow_stepped_p && nelts_per_pattern > 2)
     275      {
     276        if (!full_nelts.is_constant ())
     277  	return false;
     278        npatterns = full_nelts.to_constant ();
     279        nelts_per_pattern = 1;
     280      }
     281    derived ()->new_vector (shape, npatterns, nelts_per_pattern);
     282    return true;
     283  }
     284  
     285  /* Try to start building a new vector of shape SHAPE that holds the result of
     286     a binary operation on vector constants VEC1 and VEC2.  ALLOW_STEPPED_P is
     287     true if the operation can handle stepped encodings directly, without
     288     having to expand the full sequence.
     289  
     290     Return true if the operation is possible.  Leave the builder unchanged
     291     otherwise.  */
     292  
     293  template<typename T, typename Shape, typename Derived>
     294  bool
     295  vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
     296  							 T vec1, T vec2,
     297  							 bool allow_stepped_p)
     298  {
     299    poly_uint64 full_nelts = Derived::shape_nelts (shape);
     300    gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
     301  	      && known_eq (full_nelts, Derived::nelts_of (vec2)));
     302    /* Conceptually we split the patterns in VEC1 and VEC2 until we have
     303       an equal number for both.  Each split pattern requires the same
     304       number of elements per pattern as the original.  E.g. splitting:
     305  
     306         { 1, 2, 3, ... }
     307  
     308       into two gives:
     309  
     310         { 1, 3, 5, ... }
     311         { 2, 4, 6, ... }
     312  
     313       while splitting:
     314  
     315         { 1, 0, ... }
     316  
     317       into two gives:
     318  
     319         { 1, 0, ... }
     320         { 0, 0, ... }.  */
     321    unsigned int npatterns
     322      = least_common_multiple (Derived::npatterns_of (vec1),
     323  			     Derived::npatterns_of (vec2));
     324    unsigned int nelts_per_pattern
     325      = MAX (Derived::nelts_per_pattern_of (vec1),
     326  	   Derived::nelts_per_pattern_of (vec2));
     327    if (!allow_stepped_p && nelts_per_pattern > 2)
     328      {
     329        if (!full_nelts.is_constant ())
     330  	return false;
     331        npatterns = full_nelts.to_constant ();
     332        nelts_per_pattern = 1;
     333      }
     334    derived ()->new_vector (shape, npatterns, nelts_per_pattern);
     335    return true;
     336  }
     337  
     338  /* Return the number of elements that the caller needs to operate on in
     339     order to handle a binary operation on vector constants VEC1 and VEC2.
     340     This static function is used instead of new_binary_operation if the
     341     result of the operation is not a constant vector.  */
     342  
     343  template<typename T, typename Shape, typename Derived>
     344  unsigned int
     345  vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
     346  {
     347    poly_uint64 nelts = Derived::nelts_of (vec1);
     348    gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
     349    /* See new_binary_operation for details.  */
     350    unsigned int npatterns
     351      = least_common_multiple (Derived::npatterns_of (vec1),
     352  			     Derived::npatterns_of (vec2));
     353    unsigned int nelts_per_pattern
     354      = MAX (Derived::nelts_per_pattern_of (vec1),
     355  	   Derived::nelts_per_pattern_of (vec2));
     356    unsigned HOST_WIDE_INT const_nelts;
     357    if (nelts.is_constant (&const_nelts))
     358      return MIN (npatterns * nelts_per_pattern, const_nelts);
     359    return npatterns * nelts_per_pattern;
     360  }
     361  
     362  /* Return the number of leading duplicate elements in the range
     363     [START:END:STEP].  The value is always at least 1.  */
     364  
     365  template<typename T, typename Shape, typename Derived>
     366  unsigned int
     367  vector_builder<T, Shape, Derived>::count_dups (int start, int end,
     368  					       int step) const
     369  {
     370    gcc_assert ((end - start) % step == 0);
     371  
     372    unsigned int ndups = 1;
     373    for (int i = start + step;
     374         i != end && derived ()->equal_p (elt (i), elt (start));
     375         i += step)
     376      ndups++;
     377    return ndups;
     378  }
     379  
     380  /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
     381     but without changing the underlying vector.  */
     382  
     383  template<typename T, typename Shape, typename Derived>
     384  void
     385  vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
     386  					    unsigned int nelts_per_pattern)
     387  {
     388    unsigned int old_encoded_nelts = encoded_nelts ();
     389    unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
     390    gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
     391    unsigned int next = new_encoded_nelts - npatterns;
     392    for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
     393      {
     394        derived ()->note_representative (&(*this)[next], (*this)[i]);
     395        next += 1;
     396        if (next == new_encoded_nelts)
     397  	next -= npatterns;
     398      }
     399    m_npatterns = npatterns;
     400    m_nelts_per_pattern = nelts_per_pattern;
     401  }
     402  
     403  /* Return true if elements [START, END) contain a repeating sequence of
     404     STEP elements.  */
     405  
     406  template<typename T, typename Shape, typename Derived>
     407  bool
     408  vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
     409  							 unsigned int end,
     410  							 unsigned int step)
     411  {
     412    for (unsigned int i = start; i < end - step; ++i)
     413      if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
     414        return false;
     415    return true;
     416  }
     417  
     418  /* Return true if elements [START, END) contain STEP interleaved linear
     419     series.  */
     420  
     421  template<typename T, typename Shape, typename Derived>
     422  bool
     423  vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
     424  						       unsigned int end,
     425  						       unsigned int step)
     426  {
     427    if (!derived ()->allow_steps_p ())
     428      return false;
     429  
     430    for (unsigned int i = start + step * 2; i < end; ++i)
     431      {
     432        T elt1 = (*this)[i - step * 2];
     433        T elt2 = (*this)[i - step];
     434        T elt3 = (*this)[i];
     435  
     436        if (!derived ()->integral_p (elt1)
     437  	  || !derived ()->integral_p (elt2)
     438  	  || !derived ()->integral_p (elt3))
     439  	return false;
     440  
     441        if (maybe_ne (derived ()->step (elt1, elt2),
     442  		    derived ()->step (elt2, elt3)))
     443  	return false;
     444  
     445        if (!derived ()->can_elide_p (elt3))
     446  	return false;
     447      }
     448    return true;
     449  }
     450  
     451  /* Try to change the number of encoded patterns to NPATTERNS, returning
     452     true on success.  */
     453  
     454  template<typename T, typename Shape, typename Derived>
     455  bool
     456  vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
     457  {
     458    if (m_nelts_per_pattern == 1)
     459      {
     460        /* See whether NPATTERNS is valid with the current 1-element-per-pattern
     461  	 encoding.  */
     462        if (repeating_sequence_p (0, encoded_nelts (), npatterns))
     463  	{
     464  	  reshape (npatterns, 1);
     465  	  return true;
     466  	}
     467  
     468        /* We can only increase the number of elements per pattern if all
     469  	 elements are still encoded explicitly.  */
     470        if (!encoded_full_vector_p ())
     471  	return false;
     472      }
     473  
     474    if (m_nelts_per_pattern <= 2)
     475      {
     476        /* See whether NPATTERNS is valid with a 2-element-per-pattern
     477  	 encoding.  */
     478        if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
     479  	{
     480  	  reshape (npatterns, 2);
     481  	  return true;
     482  	}
     483  
     484        /* We can only increase the number of elements per pattern if all
     485  	 elements are still encoded explicitly.  */
     486        if (!encoded_full_vector_p ())
     487  	return false;
     488      }
     489  
     490    if (m_nelts_per_pattern <= 3)
     491      {
     492        /* See whether we have NPATTERNS interleaved linear series,
     493  	 giving a 3-element-per-pattern encoding.  */
     494        if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
     495  	{
     496  	  reshape (npatterns, 3);
     497  	  return true;
     498  	}
     499        return false;
     500      }
     501  
     502    gcc_unreachable ();
     503  }
     504  
     505  /* Replace the current encoding with the canonical form.  */
     506  
     507  template<typename T, typename Shape, typename Derived>
     508  void
     509  vector_builder<T, Shape, Derived>::finalize ()
     510  {
     511    /* The encoding requires the same number of elements to come from each
     512       pattern.  */
     513    gcc_assert (multiple_p (m_full_nelts, m_npatterns));
     514  
     515    /* Allow the caller to build more elements than necessary.  For example,
     516       it's often convenient to build a stepped vector from the natural
     517       encoding of three elements even if the vector itself only has two.  */
     518    unsigned HOST_WIDE_INT const_full_nelts;
     519    if (m_full_nelts.is_constant (&const_full_nelts)
     520        && const_full_nelts <= encoded_nelts ())
     521      {
     522        m_npatterns = const_full_nelts;
     523        m_nelts_per_pattern = 1;
     524      }
     525  
     526    /* Try to whittle down the number of elements per pattern.  That is:
     527  
     528       1. If we have stepped patterns whose steps are all 0, reduce the
     529          number of elements per pattern from 3 to 2.
     530  
     531       2. If we have background fill values that are the same as the
     532          foreground values, reduce the number of elements per pattern
     533          from 2 to 1.  */
     534    while (m_nelts_per_pattern > 1
     535  	 && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
     536  				  encoded_nelts (), m_npatterns))
     537      /* The last two sequences of M_NPATTERNS elements are equal,
     538         so remove the last one.  */
     539      reshape (m_npatterns, m_nelts_per_pattern - 1);
     540  
     541    if (pow2p_hwi (m_npatterns))
     542      {
     543        /* Try to halve the number of patterns while doing so gives a
     544  	 valid pattern.  This approach is linear in the number of
     545  	 elements, whereas searcing from 1 up would be O(n*log(n)).
     546  
     547  	 Each halving step tries to keep the number of elements per pattern
     548  	 the same.  If that isn't possible, and if all elements are still
     549  	 explicitly encoded, the halving step can instead increase the number
     550  	 of elements per pattern.
     551  
     552  	 E.g. for:
     553  
     554  	     { 0, 2, 3, 4, 5, 6, 7, 8 }  npatterns == 8  full_nelts == 8
     555  
     556  	 we first realize that the second half of the sequence is not
     557  	 equal to the first, so we cannot maintain 1 element per pattern
     558  	 for npatterns == 4.  Instead we halve the number of patterns
     559  	 and double the number of elements per pattern, treating this
     560  	 as a "foreground" { 0, 2, 3, 4 } against a "background" of
     561  	 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
     562  
     563  	     { 0, 2, 3, 4 | 5, 6, 7, 8 }  npatterns == 4
     564  
     565  	 Next we realize that this is *not* a foreround of { 0, 2 }
     566  	 against a background of { 3, 4 | 3, 4 ... }, so the only
     567  	 remaining option for reducing the number of patterns is
     568  	 to use a foreground of { 0, 2 } against a stepped background
     569  	 of { 1, 2 | 3, 4 | 5, 6 ... }.  This is valid because we still
     570  	 haven't elided any elements:
     571  
     572  	     { 0, 2 | 3, 4 | 5, 6 }  npatterns == 2
     573  
     574  	 This in turn can be reduced to a foreground of { 0 } against a
     575  	 stepped background of { 1 | 2 | 3 ... }:
     576  
     577  	     { 0 | 2 | 3 }  npatterns == 1
     578  
     579  	 This last step would not have been possible for:
     580  
     581  	     { 0, 0 | 3, 4 | 5, 6 }  npatterns == 2.  */
     582        while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
     583  	continue;
     584  
     585        /* Builders of arbitrary fixed-length vectors can use:
     586  
     587  	     new_vector (x, x, 1)
     588  
     589  	 so that every element is specified explicitly.  Handle cases
     590  	 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
     591  	 would be for 2-bit elements.  We'll have treated them as
     592  	 duplicates in the loop above.  */
     593        if (m_nelts_per_pattern == 1
     594  	  && m_full_nelts.is_constant (&const_full_nelts)
     595  	  && this->length () >= const_full_nelts
     596  	  && (m_npatterns & 3) == 0
     597  	  && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
     598  				 m_npatterns / 4))
     599  	{
     600  	  reshape (m_npatterns / 4, 3);
     601  	  while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
     602  	    continue;
     603  	}
     604      }
     605    else
     606      /* For the non-power-of-2 case, do a simple search up from 1.  */
     607      for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
     608        if (m_npatterns % i == 0 && try_npatterns (i))
     609  	break;
     610  }
     611  
     612  #endif