(root)/
sed-4.9/
sed/
sed.h
       1  /*  GNU SED, a batch stream editor.
       2      Copyright (C) 1989-2022 Free Software Foundation, Inc.
       3  
       4      This program is free software; you can redistribute it and/or modify
       5      it under the terms of the GNU General Public License as published by
       6      the Free Software Foundation; either version 3, or (at your option)
       7      any later version.
       8  
       9      This program is distributed in the hope that it will be useful,
      10      but WITHOUT ANY WARRANTY; without even the implied warranty of
      11      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12      GNU General Public License for more details.
      13  
      14      You should have received a copy of the GNU General Public License
      15      along with this program; If not, see <https://www.gnu.org/licenses/>. */
      16  
      17  #include <config.h>
      18  #include "basicdefs.h"
      19  #include "dfa.h"
      20  #include "localeinfo.h"
      21  #include "regex.h"
      22  #include <stdio.h>
      23  #include "unlocked-io.h"
      24  
      25  #include "utils.h"
      26  
      27  /* Struct vector is used to describe a compiled sed program. */
      28  struct vector {
      29    struct sed_cmd *v;	/* a dynamically allocated array */
      30    size_t v_allocated;	/* ... number of slots allocated */
      31    size_t v_length;	/* ... number of slots in use */
      32  };
      33  
      34  /* This structure tracks files used by sed so that they may all be
      35     closed cleanly at normal program termination.  A flag is kept that tells
      36     if a missing newline was encountered, so that it is added on the
      37     next line and the two lines are not concatenated.  */
      38  struct output {
      39    char *name;
      40    bool missing_newline;
      41    FILE *fp;
      42    struct output *link;
      43  };
      44  
      45  struct text_buf {
      46    char *text;
      47    size_t text_length;
      48  };
      49  
      50  struct regex {
      51    regex_t pattern;
      52    int flags;
      53    size_t sz;
      54    struct dfa *dfa;
      55    bool begline;
      56    bool endline;
      57    char re[1];
      58  };
      59  
      60  struct readcmd {
      61    char *fname;
      62    bool append; /* true: append (default); false: prepend (gnu extension) */
      63  };
      64  
      65  enum replacement_types {
      66    REPL_ASIS = 0,
      67    REPL_UPPERCASE = 1,
      68    REPL_LOWERCASE = 2,
      69    REPL_UPPERCASE_FIRST = 4,
      70    REPL_LOWERCASE_FIRST = 8,
      71    REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
      72  
      73    /* These are given to aid in debugging */
      74    REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
      75    REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
      76    REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
      77    REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
      78  };
      79  
      80  enum text_types {
      81    TEXT_BUFFER,
      82    TEXT_REPLACEMENT,
      83    TEXT_REGEX
      84  };
      85  
      86  enum posixicity_types {
      87    POSIXLY_EXTENDED,	/* with GNU extensions */
      88    POSIXLY_CORRECT,	/* with POSIX-compatible GNU extensions */
      89    POSIXLY_BASIC		/* pedantically POSIX */
      90  };
      91  
      92  enum addr_state {
      93    RANGE_INACTIVE,	/* never been active */
      94    RANGE_ACTIVE,		/* between first and second address */
      95    RANGE_CLOSED		/* like RANGE_INACTIVE, but range has ended once */
      96  };
      97  
      98  enum addr_types {
      99    ADDR_IS_NULL,		/* null address */
     100    ADDR_IS_REGEX,	/* a.addr_regex is valid */
     101    ADDR_IS_NUM,		/* a.addr_number is valid */
     102    ADDR_IS_NUM_MOD,	/* a.addr_number is valid, addr_step is modulo */
     103    ADDR_IS_STEP,		/* address is +N (only valid for addr2) */
     104    ADDR_IS_STEP_MOD,	/* address is ~N (only valid for addr2) */
     105    ADDR_IS_LAST		/* address is $ */
     106  };
     107  
     108  struct addr {
     109    enum addr_types addr_type;
     110    countT addr_number;
     111    countT addr_step;
     112    struct regex *addr_regex;
     113  };
     114  
     115  
     116  struct replacement {
     117    char *prefix;
     118    size_t prefix_length;
     119    int subst_id;
     120    enum replacement_types repl_type;
     121    struct replacement *next;
     122  };
     123  
     124  struct subst {
     125    struct regex *regx;
     126    struct replacement *replacement;
     127    countT numb;		/* if >0, only substitute for match number "numb" */
     128    struct output *outf;	/* 'w' option given */
     129    unsigned global : 1;	/* 'g' option given */
     130    unsigned print : 2;	/* 'p' option given (before/after eval) */
     131    unsigned eval : 1;	/* 'e' option given */
     132    unsigned max_id : 4;  /* maximum backreference on the RHS */
     133  #ifdef lint
     134    char* replacement_buffer;
     135  #endif
     136  };
     137  
     138  
     139  
     140  
     141  struct sed_cmd {
     142    struct addr *a1;	/* save space: usually is NULL */
     143    struct addr *a2;
     144  
     145    /* See description the enum, above.  */
     146    enum addr_state range_state;
     147  
     148    /* Non-zero if command is to be applied to non-matches. */
     149    char addr_bang;
     150  
     151    /* The actual command character. */
     152    char cmd;
     153  
     154    /* auxiliary data for various commands */
     155    union {
     156      /* This structure is used for a, i, and c commands. */
     157      struct text_buf cmd_txt;
     158  
     159      /* This is used for the l, q and Q commands. */
     160      int int_arg;
     161  
     162      /* This is used for the {}, b, and t commands. */
     163      countT jump_index;
     164  
     165      /* This is used for the r command. */
     166      struct readcmd readcmd;
     167  
     168      /* This is used for the hairy s command. */
     169      struct subst *cmd_subst;
     170  
     171      /* This is used for the w command. */
     172      struct output *outf;
     173  
     174      /* This is used for the R command.
     175         (despite the struct name, it is used for both in and out files). */
     176      struct output *inf;
     177  
     178      /* This is used for the y command. */
     179      unsigned char *translate;
     180      char **translatemb;
     181  
     182      /* This is used for the ':' command (debug only).  */
     183      char* label_name;
     184    } x;
     185  };
     186  
     187  
     188  _Noreturn void bad_prog (const char *why);
     189  size_t normalize_text (char *text, size_t len, enum text_types buftype);
     190  struct vector *compile_string (struct vector *, char *str, size_t len);
     191  struct vector *compile_file (struct vector *, const char *cmdfile);
     192  void check_final_program (struct vector *);
     193  void rewind_read_files (void);
     194  void finish_program (struct vector *);
     195  
     196  struct regex *compile_regex (struct buffer *b, int flags, int needed_sub);
     197  int match_regex (struct regex *regex,
     198                   char *buf, size_t buflen, size_t buf_start_offset,
     199                   struct re_registers *regarray, int regsize);
     200  #ifdef lint
     201  void release_regex (struct regex *);
     202  #endif
     203  
     204  void
     205  debug_print_command (const struct vector *program, const struct sed_cmd *sc);
     206  void
     207  debug_print_program (const struct vector *program);
     208  void
     209  debug_print_char (char c);
     210  
     211  int process_files (struct vector *, char **argv);
     212  
     213  int main (int, char **);
     214  
     215  extern struct localeinfo localeinfo;
     216  
     217  extern int extended_regexp_flags;
     218  
     219  /* one-byte buffer delimiter */
     220  extern char buffer_delimiter;
     221  
     222  /* If set, fflush(stdout) on every line output,
     223     and turn off stream buffering on inputs.  */
     224  extern bool unbuffered;
     225  
     226  /* If set, don't write out the line unless explicitly told to. */
     227  extern bool no_default_output;
     228  
     229  /* If set, reset line counts on every new file. */
     230  extern bool separate_files;
     231  
     232  /* If set, follow symlinks when invoked with -i option */
     233  extern bool follow_symlinks;
     234  
     235  /* Do we need to be pedantically POSIX compliant? */
     236  extern enum posixicity_types posixicity;
     237  
     238  /* How long should the `l' command's output line be? */
     239  extern countT lcmd_out_line_len;
     240  
     241  /* How do we edit files in-place? (we don't if NULL) */
     242  extern char *in_place_extension;
     243  
     244  /* The mode to use to read and write files, either "rt"/"w" or "rb"/"wb".  */
     245  extern char const *read_mode;
     246  extern char const *write_mode;
     247  
     248  /* Should we use EREs? */
     249  extern bool use_extended_syntax_p;
     250  
     251  /* Declarations for multibyte character sets.  */
     252  extern int mb_cur_max;
     253  extern bool is_utf8;
     254  
     255  /* If set, operate in 'sandbox' mode - disable e/r/w commands */
     256  extern bool sandbox;
     257  
     258  /* If set, print debugging information.  */
     259  extern bool debug;
     260  
     261  #define MBRTOWC(pwc, s, n, ps) \
     262    (mb_cur_max == 1 ? \
     263     (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
     264     mbrtowc ((pwc), (s), (n), (ps)))
     265  
     266  #define WCRTOMB(s, wc, ps) \
     267    (mb_cur_max == 1 ? \
     268     (*(s) = wctob ((wint_t) (wc)), 1) : \
     269     wcrtomb ((s), (wc), (ps)))
     270  
     271  #define MBSINIT(s) \
     272    (mb_cur_max == 1 ? 1 : mbsinit ((s)))
     273  
     274  #define MBRLEN(s, n, ps) \
     275    (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
     276  
     277  #define IS_MB_CHAR(ch, ps)                \
     278    (mb_cur_max == 1 ? 0 : is_mb_char (ch, ps))
     279  
     280  extern int is_mb_char (int ch, mbstate_t *ps);
     281  extern void initialize_mbcs (void);
     282  
     283  /* Use this to suppress gcc's '...may be used before initialized' warnings. */
     284  #ifdef lint
     285  # define IF_LINT(Code) Code
     286  #else
     287  # define IF_LINT(Code) /* empty */
     288  #endif
     289  
     290  #ifndef FALLTHROUGH
     291  # if __GNUC__ < 7
     292  #  define FALLTHROUGH ((void) 0)
     293  # else
     294  #  define FALLTHROUGH __attribute__ ((__fallthrough__))
     295  # endif
     296  #endif