(root)/
gawk-5.2.2/
support/
dfa.h
       1  /* dfa.h - declarations for GNU deterministic regexp compiler
       2     Copyright (C) 1988, 1998, 2007, 2009-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software; you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3, or (at your option)
       7     any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program; if not, write to the Free Software
      16     Foundation, Inc.,
      17     51 Franklin Street - Fifth Floor, Boston, MA  02110-1301, USA */
      18  
      19  /* Written June, 1988 by Mike Haertel */
      20  
      21  #ifndef DFA_H_
      22  #define DFA_H_
      23  
      24  #ifndef GAWK
      25  /* This file uses _Noreturn, _GL_ATTRIBUTE_DEALLOC, _GL_ATTRIBUTE_MALLOC,
      26     _GL_ATTRIBUTE_PURE, _GL_ATTRIBUTE_RETURNS_NONNULL.  */
      27  #if !_GL_CONFIG_H_INCLUDED
      28   #error "Please include config.h first."
      29  #endif
      30  #endif /* GAWK */
      31  
      32  #include "idx.h"
      33  #include <regex.h>
      34  #include <stddef.h>
      35  #include <stdlib.h>
      36  
      37  #ifdef __cplusplus
      38  extern "C" {
      39  #endif
      40  
      41  struct localeinfo; /* See localeinfo.h.  */
      42  
      43  /* Element of a list of strings, at least one of which is known to
      44     appear in any R.E. matching the DFA. */
      45  struct dfamust
      46  {
      47    bool exact;
      48    bool begline;
      49    bool endline;
      50    char must[FLEXIBLE_ARRAY_MEMBER];
      51  };
      52  
      53  /* The dfa structure. It is completely opaque. */
      54  struct dfa;
      55  
      56  /* Needed when Gnulib is not used.  */
      57  #ifndef _GL_ATTRIBUTE_MALLOC
      58  # define _GL_ATTRIBUTE_MALLOC
      59  # define _GL_ATTRIBUTE_DEALLOC(f, i)
      60  # define _GL_ATTRIBUTE_DEALLOC_FREE
      61  # define _GL_ATTRIBUTE_RETURNS_NONNULL
      62  #endif
      63  
      64  /* Entry points. */
      65  
      66  /* Allocate a struct dfa.  The struct dfa is completely opaque.
      67     It should be initialized via dfasyntax or dfacopysyntax before other use.
      68     The returned pointer should be passed directly to free() after
      69     calling dfafree() on it. */
      70  extern struct dfa *dfaalloc (void)
      71    _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE
      72    _GL_ATTRIBUTE_RETURNS_NONNULL;
      73  
      74  /* DFA options that can be ORed together, for dfasyntax's 4th arg.  */
      75  enum
      76    {
      77      /* ^ and $ match only the start and end of data, and do not match
      78         end-of-line within data.  This is always false for grep, but
      79         possibly true for other apps.  */
      80      DFA_ANCHOR = 1 << 0,
      81  
      82      /* '\0' in data is end-of-line, instead of the traditional '\n'.  */
      83      DFA_EOL_NUL = 1 << 1,
      84  
      85      /* Treat [:alpha:] etc. as an error at the top level, instead of
      86         merely a warning.  */
      87      DFA_CONFUSING_BRACKETS_ERROR = 1 << 2,
      88  
      89      /* Warn about stray backslashes before ordinary characters other
      90         than ] and } which are special because even though POSIX
      91         says \] and \} have undefined interpretation, platforms
      92         reliably ignore those stray backlashes and warning about them
      93         would likely cause more trouble than it's worth.  */
      94      DFA_STRAY_BACKSLASH_WARN = 1 << 3,
      95  
      96      /* Warn about * appearing out of context at the start of an
      97         expression or subexpression.  */
      98      DFA_STAR_WARN = 1 << 4,
      99  
     100      /* Warn about +, ?, {...} appearing out of context at the start of
     101         an expression or subexpression.  */
     102      DFA_PLUS_WARN = 1 << 5,
     103    };
     104  
     105  /* Initialize or reinitialize a DFA.  The arguments are:
     106     1. The DFA to operate on.
     107     2. Information about the current locale.
     108     3. Syntax bits described in regex.h.
     109     4. Additional DFA options described above.  */
     110  extern void dfasyntax (struct dfa *, struct localeinfo const *,
     111                         reg_syntax_t, int);
     112  
     113  /* Initialize or reinitialize a DFA from an already-initialized DFA.  */
     114  extern void dfacopysyntax (struct dfa *, struct dfa const *);
     115  
     116  /* Parse the given string of given length into the given struct dfa.  */
     117  extern void dfaparse (char const *, idx_t, struct dfa *);
     118  
     119  struct dfamust;
     120  
     121  /* Free the storage held by the components of a struct dfamust. */
     122  extern void dfamustfree (struct dfamust *);
     123  
     124  /* Allocate and return a struct dfamust from a struct dfa that was
     125     initialized by dfaparse and not yet given to dfacomp.  */
     126  extern struct dfamust *dfamust (struct dfa const *)
     127    _GL_ATTRIBUTE_DEALLOC (dfamustfree, 1);
     128  
     129  /* Compile the given string of the given length into the given struct dfa.
     130     The last argument says whether to build a searching or an exact matcher.
     131     A null first argument means the struct dfa has already been
     132     initialized by dfaparse; the second argument is ignored.  */
     133  extern void dfacomp (char const *, idx_t, struct dfa *, bool);
     134  
     135  /* Search through a buffer looking for a match to the given struct dfa.
     136     Find the first occurrence of a string matching the regexp in the
     137     buffer, and the shortest possible version thereof.  Return a pointer to
     138     the first character after the match, or NULL if none is found.  BEGIN
     139     points to the beginning of the buffer, and END points to the first byte
     140     after its end.  Note however that we store a sentinel byte (usually
     141     newline) in *END, so the actual buffer must be one byte longer.
     142     When ALLOW_NL is true, newlines may appear in the matching string.
     143     If COUNT is non-NULL, increment *COUNT once for each newline processed.
     144     Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
     145     encountered a back-reference.  The caller can use this to decide
     146     whether to fall back on a backtracking matcher.  */
     147  extern char *dfaexec (struct dfa *d, char const *begin, char *end,
     148                        bool allow_nl, idx_t *count, bool *backref);
     149  
     150  /* Return a superset for D.  The superset matches everything that D
     151     matches, along with some other strings (though the latter should be
     152     rare, for efficiency reasons).  Return a null pointer if no useful
     153     superset is available.  */
     154  extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
     155  
     156  /* The DFA is likely to be fast.  */
     157  extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
     158  
     159  /* Return true if every construct in D is supported by this DFA matcher.  */
     160  extern bool dfasupported (struct dfa const *) _GL_ATTRIBUTE_PURE;
     161  
     162  /* Free the storage held by the components of a struct dfa. */
     163  extern void dfafree (struct dfa *);
     164  
     165  /* Error handling. */
     166  
     167  /* dfawarn() is called by the regexp routines whenever a regex is compiled
     168     that likely doesn't do what the user wanted.  It takes a single
     169     argument, a NUL-terminated string describing the situation.  The user
     170     must supply a dfawarn.  */
     171  extern void dfawarn (const char *);
     172  
     173  /* dfaerror() is called by the regexp routines whenever an error occurs.  It
     174     takes a single argument, a NUL-terminated string describing the error.
     175     The user must supply a dfaerror.  */
     176  extern _Noreturn void dfaerror (const char *);
     177  
     178  #ifdef __cplusplus
     179  }
     180  #endif
     181  
     182  #endif /* dfa.h */