glibc (2.38)

(root)/
include/
regex.h
       1  /* Definitions for data structures and routines for the regular
       2     expression library.
       3     Copyright (C) 1985, 1989-2023 Free Software Foundation, Inc.
       4     This file is part of the GNU C Library.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <https://www.gnu.org/licenses/>.  */
      19  
      20  #ifndef _REGEX_H
      21  #define _REGEX_H 1
      22  
      23  #include <sys/types.h>
      24  
      25  /* Allow the use in C++ code.  */
      26  #ifdef __cplusplus
      27  extern "C" {
      28  #endif
      29  
      30  /* Define __USE_GNU to declare GNU extensions that violate the
      31     POSIX name space rules.  */
      32  #ifdef _GNU_SOURCE
      33  # define __USE_GNU 1
      34  #endif
      35  
      36  #ifdef _REGEX_LARGE_OFFSETS
      37  
      38  /* Use types and values that are wide enough to represent signed and
      39     unsigned byte offsets in memory.  This currently works only when
      40     the regex code is used outside of the GNU C library; it is not yet
      41     supported within glibc itself, and glibc users should not define
      42     _REGEX_LARGE_OFFSETS.  */
      43  
      44  /* The type of object sizes.  */
      45  typedef size_t __re_size_t;
      46  
      47  /* The type of object sizes, in places where the traditional code
      48     uses unsigned long int.  */
      49  typedef size_t __re_long_size_t;
      50  
      51  #else
      52  
      53  /* The traditional GNU regex implementation mishandles strings longer
      54     than INT_MAX.  */
      55  typedef unsigned int __re_size_t;
      56  typedef unsigned long int __re_long_size_t;
      57  
      58  #endif
      59  
      60  /* The following two types have to be signed and unsigned integer type
      61     wide enough to hold a value of a pointer.  For most ANSI compilers
      62     ptrdiff_t and size_t should be likely OK.  Still size of these two
      63     types is 2 for Microsoft C.  Ugh... */
      64  typedef long int s_reg_t;
      65  typedef unsigned long int active_reg_t;
      66  
      67  /* The following bits are used to determine the regexp syntax we
      68     recognize.  The set/not-set meanings are chosen so that Emacs syntax
      69     remains the value 0.  The bits are given in alphabetical order, and
      70     the definitions shifted by one from the previous bit; thus, when we
      71     add or remove a bit, only one other definition need change.  */
      72  typedef unsigned long int reg_syntax_t;
      73  
      74  #ifdef __USE_GNU
      75  /* If this bit is not set, then \ inside a bracket expression is literal.
      76     If set, then such a \ quotes the following character.  */
      77  # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
      78  
      79  /* If this bit is not set, then + and ? are operators, and \+ and \? are
      80       literals.
      81     If set, then \+ and \? are operators and + and ? are literals.  */
      82  # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
      83  
      84  /* If this bit is set, then character classes are supported.  They are:
      85       [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
      86       [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
      87     If not set, then character classes are not supported.  */
      88  # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
      89  
      90  /* If this bit is set, then ^ and $ are always anchors (outside bracket
      91       expressions, of course).
      92     If this bit is not set, then it depends:
      93  	^  is an anchor if it is at the beginning of a regular
      94  	   expression or after an open-group or an alternation operator;
      95  	$  is an anchor if it is at the end of a regular expression, or
      96  	   before a close-group or an alternation operator.
      97  
      98     This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
      99     POSIX draft 11.2 says that * etc. in leading positions is undefined.
     100     We already implemented a previous draft which made those constructs
     101     invalid, though, so we haven't changed the code back.  */
     102  # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
     103  
     104  /* If this bit is set, then special characters are always special
     105       regardless of where they are in the pattern.
     106     If this bit is not set, then special characters are special only in
     107       some contexts; otherwise they are ordinary.  Specifically,
     108       * + ? and intervals are only special when not after the beginning,
     109       open-group, or alternation operator.  */
     110  # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
     111  
     112  /* If this bit is set, then *, +, ?, and { cannot be first in an re or
     113       immediately after an alternation or begin-group operator.  */
     114  # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
     115  
     116  /* If this bit is set, then . matches newline.
     117     If not set, then it doesn't.  */
     118  # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
     119  
     120  /* If this bit is set, then . doesn't match NUL.
     121     If not set, then it does.  */
     122  # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
     123  
     124  /* If this bit is set, nonmatching lists [^...] do not match newline.
     125     If not set, they do.  */
     126  # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
     127  
     128  /* If this bit is set, either \{...\} or {...} defines an
     129       interval, depending on RE_NO_BK_BRACES.
     130     If not set, \{, \}, {, and } are literals.  */
     131  # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
     132  
     133  /* If this bit is set, +, ? and | aren't recognized as operators.
     134     If not set, they are.  */
     135  # define RE_LIMITED_OPS (RE_INTERVALS << 1)
     136  
     137  /* If this bit is set, newline is an alternation operator.
     138     If not set, newline is literal.  */
     139  # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
     140  
     141  /* If this bit is set, then '{...}' defines an interval, and \{ and \}
     142       are literals.
     143    If not set, then '\{...\}' defines an interval.  */
     144  # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
     145  
     146  /* If this bit is set, (...) defines a group, and \( and \) are literals.
     147     If not set, \(...\) defines a group, and ( and ) are literals.  */
     148  # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
     149  
     150  /* If this bit is set, then \<digit> matches <digit>.
     151     If not set, then \<digit> is a back-reference.  */
     152  # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
     153  
     154  /* If this bit is set, then | is an alternation operator, and \| is literal.
     155     If not set, then \| is an alternation operator, and | is literal.  */
     156  # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
     157  
     158  /* If this bit is set, then an ending range point collating higher
     159       than the starting range point, as in [z-a], is invalid.
     160     If not set, then when ending range point collates higher than the
     161       starting range point, the range is ignored.  */
     162  # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
     163  
     164  /* If this bit is set, then an unmatched ) is ordinary.
     165     If not set, then an unmatched ) is invalid.  */
     166  # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
     167  
     168  /* If this bit is set, succeed as soon as we match the whole pattern,
     169     without further backtracking.  */
     170  # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
     171  
     172  /* If this bit is set, do not process the GNU regex operators.
     173     If not set, then the GNU regex operators are recognized. */
     174  # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
     175  
     176  /* If this bit is set, turn on internal regex debugging.
     177     If not set, and debugging was on, turn it off.
     178     This only works if regex.c is compiled -DDEBUG.
     179     We define this bit always, so that all that's needed to turn on
     180     debugging is to recompile regex.c; the calling code can always have
     181     this bit set, and it won't affect anything in the normal case. */
     182  # define RE_DEBUG (RE_NO_GNU_OPS << 1)
     183  
     184  /* If this bit is set, a syntactically invalid interval is treated as
     185     a string of ordinary characters.  For example, the ERE 'a{1' is
     186     treated as 'a\{1'.  */
     187  # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
     188  
     189  /* If this bit is set, then ignore case when matching.
     190     If not set, then case is significant.  */
     191  # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
     192  
     193  /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
     194     for ^, because it is difficult to scan the regex backwards to find
     195     whether ^ should be special.  */
     196  # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
     197  
     198  /* If this bit is set, then \{ cannot be first in a regex or
     199     immediately after an alternation, open-group or \} operator.  */
     200  # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
     201  
     202  /* If this bit is set, then no_sub will be set to 1 during
     203     re_compile_pattern.  */
     204  # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
     205  #endif
     206  
     207  /* This global variable defines the particular regexp syntax to use (for
     208     some interfaces).  When a regexp is compiled, the syntax used is
     209     stored in the pattern buffer, so changing this does not affect
     210     already-compiled regexps.  */
     211  extern reg_syntax_t re_syntax_options;
     212  
     213  #ifdef __USE_GNU
     214  /* Define combinations of the above bits for the standard possibilities.
     215     (The [[[ comments delimit what gets put into the Texinfo file, so
     216     don't delete them!)  */
     217  /* [[[begin syntaxes]]] */
     218  # define RE_SYNTAX_EMACS 0
     219  
     220  # define RE_SYNTAX_AWK							\
     221    (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
     222     | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
     223     | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
     224     | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
     225     | RE_CHAR_CLASSES							\
     226     | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
     227  
     228  # define RE_SYNTAX_GNU_AWK						\
     229    ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
     230      | RE_INVALID_INTERVAL_ORD)						\
     231     & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS				\
     232        | RE_CONTEXT_INVALID_OPS ))
     233  
     234  # define RE_SYNTAX_POSIX_AWK						\
     235    (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
     236     | RE_INTERVALS	    | RE_NO_GNU_OPS				\
     237     | RE_INVALID_INTERVAL_ORD)
     238  
     239  # define RE_SYNTAX_GREP							\
     240    ((RE_SYNTAX_POSIX_BASIC | RE_NEWLINE_ALT)				\
     241     & ~(RE_CONTEXT_INVALID_DUP | RE_DOT_NOT_NULL))
     242  
     243  # define RE_SYNTAX_EGREP						\
     244    ((RE_SYNTAX_POSIX_EXTENDED | RE_INVALID_INTERVAL_ORD | RE_NEWLINE_ALT) \
     245     & ~(RE_CONTEXT_INVALID_OPS | RE_DOT_NOT_NULL))
     246  
     247  /* POSIX grep -E behavior is no longer incompatible with GNU.  */
     248  # define RE_SYNTAX_POSIX_EGREP						\
     249    RE_SYNTAX_EGREP
     250  
     251  /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
     252  # define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
     253  
     254  # define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
     255  
     256  /* Syntax bits common to both basic and extended POSIX regex syntax.  */
     257  # define _RE_SYNTAX_POSIX_COMMON					\
     258    (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
     259     | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
     260  
     261  # define RE_SYNTAX_POSIX_BASIC						\
     262    (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
     263  
     264  /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
     265     RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
     266     isn't minimal, since other operators, such as \`, aren't disabled.  */
     267  # define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
     268    (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
     269  
     270  # define RE_SYNTAX_POSIX_EXTENDED					\
     271    (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
     272     | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
     273     | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
     274     | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
     275  
     276  /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
     277     removed and RE_NO_BK_REFS is added.  */
     278  # define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
     279    (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
     280     | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
     281     | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
     282     | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
     283  /* [[[end syntaxes]]] */
     284  
     285  /* Maximum number of duplicates an interval can allow.  POSIX-conforming
     286     systems might define this in <limits.h>, but we want our
     287     value, so remove any previous define.  */
     288  # ifdef _REGEX_INCLUDE_LIMITS_H
     289  #  include <limits.h>
     290  # endif
     291  # ifdef RE_DUP_MAX
     292  #  undef RE_DUP_MAX
     293  # endif
     294  
     295  /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
     296     the counter as a 2-byte signed integer.  This is no longer true, so
     297     RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
     298     ((SIZE_MAX - 9) / 10) if _REGEX_LARGE_OFFSETS is defined.
     299     However, there would be a huge performance problem if someone
     300     actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
     301     its historical value.  */
     302  # define RE_DUP_MAX (0x7fff)
     303  #endif
     304  
     305  
     306  /* POSIX 'cflags' bits (i.e., information for 'regcomp').  */
     307  
     308  /* If this bit is set, then use extended regular expression syntax.
     309     If not set, then use basic regular expression syntax.  */
     310  #define REG_EXTENDED 1
     311  
     312  /* If this bit is set, then ignore case when matching.
     313     If not set, then case is significant.  */
     314  #define REG_ICASE (1 << 1)
     315  
     316  /* If this bit is set, then anchors do not match at newline
     317       characters in the string.
     318     If not set, then anchors do match at newlines.  */
     319  #define REG_NEWLINE (1 << 2)
     320  
     321  /* If this bit is set, then report only success or fail in regexec.
     322     If not set, then returns differ between not matching and errors.  */
     323  #define REG_NOSUB (1 << 3)
     324  
     325  
     326  /* POSIX 'eflags' bits (i.e., information for regexec).  */
     327  
     328  /* If this bit is set, then the beginning-of-line operator doesn't match
     329       the beginning of the string (presumably because it's not the
     330       beginning of a line).
     331     If not set, then the beginning-of-line operator does match the
     332       beginning of the string.  */
     333  #define REG_NOTBOL 1
     334  
     335  /* Like REG_NOTBOL, except for the end-of-line.  */
     336  #define REG_NOTEOL (1 << 1)
     337  
     338  /* Use PMATCH[0] to delimit the start and end of the search in the
     339     buffer.  */
     340  #define REG_STARTEND (1 << 2)
     341  
     342  
     343  /* If any error codes are removed, changed, or added, update the
     344     '__re_error_msgid' table in regcomp.c.  */
     345  
     346  typedef enum
     347  {
     348    _REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
     349    _REG_NOERROR = 0,	/* Success.  */
     350    _REG_NOMATCH,		/* Didn't find a match (for regexec).  */
     351  
     352    /* POSIX regcomp return error codes.  (In the order listed in the
     353       standard.)  */
     354    _REG_BADPAT,		/* Invalid pattern.  */
     355    _REG_ECOLLATE,	/* Invalid collating element.  */
     356    _REG_ECTYPE,		/* Invalid character class name.  */
     357    _REG_EESCAPE,		/* Trailing backslash.  */
     358    _REG_ESUBREG,		/* Invalid back reference.  */
     359    _REG_EBRACK,		/* Unmatched left bracket.  */
     360    _REG_EPAREN,		/* Parenthesis imbalance.  */
     361    _REG_EBRACE,		/* Unmatched \{.  */
     362    _REG_BADBR,		/* Invalid contents of \{\}.  */
     363    _REG_ERANGE,		/* Invalid range end.  */
     364    _REG_ESPACE,		/* Ran out of memory.  */
     365    _REG_BADRPT,		/* No preceding re for repetition op.  */
     366  
     367    /* Error codes we've added.  */
     368    _REG_EEND,		/* Premature end.  */
     369    _REG_ESIZE,		/* Too large (e.g., repeat count too large).  */
     370    _REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
     371  } reg_errcode_t;
     372  
     373  #if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
     374  # define REG_ENOSYS	_REG_ENOSYS
     375  #endif
     376  #define REG_NOERROR	_REG_NOERROR
     377  #define REG_NOMATCH	_REG_NOMATCH
     378  #define REG_BADPAT	_REG_BADPAT
     379  #define REG_ECOLLATE	_REG_ECOLLATE
     380  #define REG_ECTYPE	_REG_ECTYPE
     381  #define REG_EESCAPE	_REG_EESCAPE
     382  #define REG_ESUBREG	_REG_ESUBREG
     383  #define REG_EBRACK	_REG_EBRACK
     384  #define REG_EPAREN	_REG_EPAREN
     385  #define REG_EBRACE	_REG_EBRACE
     386  #define REG_BADBR	_REG_BADBR
     387  #define REG_ERANGE	_REG_ERANGE
     388  #define REG_ESPACE	_REG_ESPACE
     389  #define REG_BADRPT	_REG_BADRPT
     390  #define REG_EEND	_REG_EEND
     391  #define REG_ESIZE	_REG_ESIZE
     392  #define REG_ERPAREN	_REG_ERPAREN
     393  
     394  /* This data structure represents a compiled pattern.  Before calling
     395     the pattern compiler, the fields 'buffer', 'allocated', 'fastmap',
     396     and 'translate' can be set.  After the pattern has been compiled,
     397     the fields 're_nsub', 'not_bol' and 'not_eol' are available.  All
     398     other fields are private to the regex routines.  */
     399  
     400  #ifndef RE_TRANSLATE_TYPE
     401  # define __RE_TRANSLATE_TYPE unsigned char *
     402  # ifdef __USE_GNU
     403  #  define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
     404  # endif
     405  #endif
     406  
     407  #ifdef __USE_GNU
     408  # define __REPB_PREFIX(name) name
     409  #else
     410  # define __REPB_PREFIX(name) __##name
     411  #endif
     412  
     413  struct re_pattern_buffer
     414  {
     415    /* Space that holds the compiled pattern.  The type
     416       'struct re_dfa_t' is private and is not declared here.  */
     417    struct re_dfa_t *__REPB_PREFIX(buffer);
     418  
     419    /* Number of bytes to which 'buffer' points.  */
     420    __re_long_size_t __REPB_PREFIX(allocated);
     421  
     422    /* Number of bytes actually used in 'buffer'.  */
     423    __re_long_size_t __REPB_PREFIX(used);
     424  
     425    /* Syntax setting with which the pattern was compiled.  */
     426    reg_syntax_t __REPB_PREFIX(syntax);
     427  
     428    /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
     429       fastmap, if there is one, to skip over impossible starting points
     430       for matches.  */
     431    char *__REPB_PREFIX(fastmap);
     432  
     433    /* Either a translate table to apply to all characters before
     434       comparing them, or zero for no translation.  The translation is
     435       applied to a pattern when it is compiled and to a string when it
     436       is matched.  */
     437    __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
     438  
     439    /* Number of subexpressions found by the compiler.  */
     440    size_t re_nsub;
     441  
     442    /* Zero if this pattern cannot match the empty string, one else.
     443       Well, in truth it's used only in 're_search_2', to see whether or
     444       not we should use the fastmap, so we don't set this absolutely
     445       perfectly; see 're_compile_fastmap' (the "duplicate" case).  */
     446    unsigned __REPB_PREFIX(can_be_null) : 1;
     447  
     448    /* If REGS_UNALLOCATED, allocate space in the 'regs' structure
     449       for 'max (RE_NREGS, re_nsub + 1)' groups.
     450       If REGS_REALLOCATE, reallocate space if necessary.
     451       If REGS_FIXED, use what's there.  */
     452  #ifdef __USE_GNU
     453  # define REGS_UNALLOCATED 0
     454  # define REGS_REALLOCATE 1
     455  # define REGS_FIXED 2
     456  #endif
     457    unsigned __REPB_PREFIX(regs_allocated) : 2;
     458  
     459    /* Set to zero when 're_compile_pattern' compiles a pattern; set to
     460       one by 're_compile_fastmap' if it updates the fastmap.  */
     461    unsigned __REPB_PREFIX(fastmap_accurate) : 1;
     462  
     463    /* If set, 're_match_2' does not return information about
     464       subexpressions.  */
     465    unsigned __REPB_PREFIX(no_sub) : 1;
     466  
     467    /* If set, a beginning-of-line anchor doesn't match at the beginning
     468       of the string.  */
     469    unsigned __REPB_PREFIX(not_bol) : 1;
     470  
     471    /* Similarly for an end-of-line anchor.  */
     472    unsigned __REPB_PREFIX(not_eol) : 1;
     473  
     474    /* If true, an anchor at a newline matches.  */
     475    unsigned __REPB_PREFIX(newline_anchor) : 1;
     476  };
     477  
     478  typedef struct re_pattern_buffer regex_t;
     479  
     480  /* Type for byte offsets within the string.  POSIX mandates this.  */
     481  #ifdef _REGEX_LARGE_OFFSETS
     482  /* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
     483     ptrdiff_t and ssize_t.  We don't know of any hosts where ptrdiff_t
     484     is wider than ssize_t, so ssize_t is safe.  ptrdiff_t is not
     485     visible here, so use ssize_t.  */
     486  typedef ssize_t regoff_t;
     487  #else
     488  /* The traditional GNU regex implementation mishandles strings longer
     489     than INT_MAX.  */
     490  typedef int regoff_t;
     491  #endif
     492  
     493  
     494  #ifdef __USE_GNU
     495  /* This is the structure we store register match data in.  See
     496     regex.texinfo for a full description of what registers match.  */
     497  struct re_registers
     498  {
     499    __re_size_t num_regs;
     500    regoff_t *start;
     501    regoff_t *end;
     502  };
     503  
     504  
     505  /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
     506     're_match_2' returns information about at least this many registers
     507     the first time a 'regs' structure is passed.  */
     508  # ifndef RE_NREGS
     509  #  define RE_NREGS 30
     510  # endif
     511  #endif
     512  
     513  
     514  /* POSIX specification for registers.  Aside from the different names than
     515     're_registers', POSIX uses an array of structures, instead of a
     516     structure of arrays.  */
     517  typedef struct
     518  {
     519    regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
     520    regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
     521  } regmatch_t;
     522  
     523  /* Declarations for routines.  */
     524  
     525  #ifndef _REGEX_NELTS
     526  # if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \
     527  	&& !defined __STDC_NO_VLA__)
     528  #  define _REGEX_NELTS(n) n
     529  # else
     530  #  define _REGEX_NELTS(n)
     531  # endif
     532  #endif
     533  
     534  #if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
     535  # pragma GCC diagnostic push
     536  # pragma GCC diagnostic ignored "-Wvla"
     537  #endif
     538  
     539  #ifndef _Attr_access_
     540  # ifdef __attr_access
     541  #  define _Attr_access_(arg) __attr_access (arg)
     542  # elif defined __GNUC__ && 10 <= __GNUC__
     543  #  define _Attr_access_(x) __attribute__ ((__access__ x))
     544  # else
     545  #  define _Attr_access_(x)
     546  # endif
     547  #endif
     548  
     549  #ifdef __USE_GNU
     550  /* Sets the current default syntax to SYNTAX, and return the old syntax.
     551     You can also simply assign to the 're_syntax_options' variable.  */
     552  extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
     553  
     554  /* Compile the regular expression PATTERN, with length LENGTH
     555     and syntax given by the global 're_syntax_options', into the buffer
     556     BUFFER.  Return NULL if successful, and an error string if not.
     557  
     558     To free the allocated storage, you must call 'regfree' on BUFFER.
     559     Note that the translate table must either have been initialized by
     560     'regcomp', with a malloc'ed value, or set to NULL before calling
     561     'regfree'.  */
     562  extern const char *re_compile_pattern (const char *__pattern, size_t __length,
     563  				       struct re_pattern_buffer *__buffer)
     564      _Attr_access_ ((__read_only__, 1, 2));
     565  
     566  
     567  /* Compile a fastmap for the compiled pattern in BUFFER; used to
     568     accelerate searches.  Return 0 if successful and -2 if was an
     569     internal error.  */
     570  extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
     571  
     572  
     573  /* Search in the string STRING (with length LENGTH) for the pattern
     574     compiled into BUFFER.  Start searching at position START, for RANGE
     575     characters.  Return the starting position of the match, -1 for no
     576     match, or -2 for an internal error.  Also return register
     577     information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
     578  extern regoff_t re_search (struct re_pattern_buffer *__buffer,
     579  			   const char *__String, regoff_t __length,
     580  			   regoff_t __start, regoff_t __range,
     581  			   struct re_registers *__regs)
     582      _Attr_access_ ((__read_only__, 2, 3));
     583  
     584  
     585  /* Like 're_search', but search in the concatenation of STRING1 and
     586     STRING2.  Also, stop searching at index START + STOP.  */
     587  extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
     588  			     const char *__string1, regoff_t __length1,
     589  			     const char *__string2, regoff_t __length2,
     590  			     regoff_t __start, regoff_t __range,
     591  			     struct re_registers *__regs,
     592  			     regoff_t __stop)
     593      _Attr_access_ ((__read_only__, 2, 3))
     594      _Attr_access_ ((__read_only__, 4, 5));
     595  
     596  
     597  /* Like 're_search', but return how many characters in STRING the regexp
     598     in BUFFER matched, starting at position START.  */
     599  extern regoff_t re_match (struct re_pattern_buffer *__buffer,
     600  			  const char *__String, regoff_t __length,
     601  			  regoff_t __start, struct re_registers *__regs)
     602      _Attr_access_ ((__read_only__, 2, 3));
     603  
     604  
     605  /* Relates to 're_match' as 're_search_2' relates to 're_search'.  */
     606  extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
     607  			    const char *__string1, regoff_t __length1,
     608  			    const char *__string2, regoff_t __length2,
     609  			    regoff_t __start, struct re_registers *__regs,
     610  			    regoff_t __stop)
     611      _Attr_access_ ((__read_only__, 2, 3))
     612      _Attr_access_ ((__read_only__, 4, 5));
     613  
     614  
     615  /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
     616     ENDS.  Subsequent matches using BUFFER and REGS will use this memory
     617     for recording register information.  STARTS and ENDS must be
     618     allocated with malloc, and must each be at least 'NUM_REGS * sizeof
     619     (regoff_t)' bytes long.
     620  
     621     If NUM_REGS == 0, then subsequent matches should allocate their own
     622     register data.
     623  
     624     Unless this function is called, the first search or match using
     625     BUFFER will allocate its own register data, without
     626     freeing the old data.  */
     627  extern void re_set_registers (struct re_pattern_buffer *__buffer,
     628  			      struct re_registers *__regs,
     629  			      __re_size_t __num_regs,
     630  			      regoff_t *__starts, regoff_t *__ends);
     631  #endif	/* Use GNU */
     632  
     633  #if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_MISC)
     634  /* 4.2 bsd compatibility.  */
     635  extern char *re_comp (const char *);
     636  extern int re_exec (const char *);
     637  #endif
     638  
     639  /* For plain 'restrict', use glibc's __restrict if defined.
     640     Otherwise, GCC 2.95 and later have "__restrict"; C99 compilers have
     641     "restrict", and "configure" may have defined "restrict".
     642     Other compilers use __restrict, __restrict__, and _Restrict, and
     643     'configure' might #define 'restrict' to those words, so pick a
     644     different name.  */
     645  #ifndef _Restrict_
     646  # if defined __restrict \
     647       || 2 < __GNUC__ + (95 <= __GNUC_MINOR__) \
     648       || __clang_major__ >= 3
     649  #  define _Restrict_ __restrict
     650  # elif 199901L <= __STDC_VERSION__ || defined restrict
     651  #  define _Restrict_ restrict
     652  # else
     653  #  define _Restrict_
     654  # endif
     655  #endif
     656  /* For the ISO C99 syntax
     657       array_name[restrict]
     658     use glibc's __restrict_arr if available.
     659     Otherwise, GCC 3.1 and clang support this syntax (but not in C++ mode).
     660     Other ISO C99 compilers support it as well.  */
     661  #ifndef _Restrict_arr_
     662  # ifdef __restrict_arr
     663  #  define _Restrict_arr_ __restrict_arr
     664  # elif ((199901L <= __STDC_VERSION__ \
     665           || 3 < __GNUC__ + (1 <= __GNUC_MINOR__) \
     666           || __clang_major__ >= 3) \
     667          && !defined __cplusplus)
     668  #  define _Restrict_arr_ _Restrict_
     669  # else
     670  #  define _Restrict_arr_
     671  # endif
     672  #endif
     673  
     674  /* POSIX compatibility.  */
     675  extern int regcomp (regex_t *_Restrict_ __preg,
     676  		    const char *_Restrict_ __pattern,
     677  		    int __cflags);
     678  
     679  extern int regexec (const regex_t *_Restrict_ __preg,
     680  		    const char *_Restrict_ __String, size_t __nmatch,
     681  		    regmatch_t __pmatch[_Restrict_arr_
     682  					_REGEX_NELTS (__nmatch)],
     683  		    int __eflags);
     684  
     685  extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
     686  			char *_Restrict_ __errbuf, size_t __errbuf_size)
     687      _Attr_access_ ((__write_only__, 3, 4));
     688  
     689  extern void regfree (regex_t *__preg);
     690  
     691  #if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
     692  # pragma GCC diagnostic pop
     693  #endif
     694  
     695  #ifdef __cplusplus
     696  }
     697  #endif	/* C++ */
     698  
     699  #endif /* regex.h */