(root)/
tar-1.35/
lib/
wordsplit.h
       1  /* wordsplit - a word splitter
       2     Copyright (C) 2009-2018 Sergey Poznyakoff
       3  
       4     This program is free software; you can redistribute it and/or modify it
       5     under the terms of the GNU General Public License as published by the
       6     Free Software Foundation; either version 3 of the License, or (at your
       7     option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License along
      15     with this program. If not, see <http://www.gnu.org/licenses/>. */
      16  
      17  #ifndef __WORDSPLIT_H
      18  #define __WORDSPLIT_H
      19  
      20  #include <stddef.h>
      21  #include <attribute.h>
      22  
      23  typedef struct wordsplit wordsplit_t;
      24  
      25  /* Structure used to direct the splitting.  Members marked with [Input]
      26     can be defined before calling wordsplit(), those marked with [Output]
      27     provide return values when the function returns.  If neither mark is
      28     used, the member is internal and must not be used by the caller.
      29  
      30     In the comments below, the identifiers in parentheses indicate bits that
      31     must be set (or unset, if starting with !) in ws_flags (if starting with
      32     WRDSF_) or ws_options (if starting with WRDSO_) to initialize or use the
      33     given member.
      34  
      35     If not redefined explicitly, most of them are set to some reasonable
      36     default value upon entry to wordsplit(). */
      37  struct wordsplit
      38  {
      39    size_t ws_wordc;          /* [Output] Number of words in ws_wordv. */
      40    char **ws_wordv;          /* [Output] Array of parsed out words. */
      41    size_t ws_offs;           /* [Input] (WRDSF_DOOFFS) Number of initial
      42  			       elements in ws_wordv to fill with NULLs. */
      43    size_t ws_wordn;          /* Number of elements ws_wordv can accommodate. */
      44    unsigned ws_flags;        /* [Input] Flags passed to wordsplit. */
      45    unsigned ws_options;      /* [Input] (WRDSF_OPTIONS)
      46  			       Additional options. */
      47    size_t ws_maxwords;       /* [Input] (WRDSO_MAXWORDS) Return at most that
      48  			       many words */
      49    size_t ws_wordi;          /* [Output] (WRDSF_INCREMENTAL) Total number of
      50  			       words returned so far */
      51  
      52    const char *ws_delim;     /* [Input] (WRDSF_DELIM) Word delimiters. */
      53    const char *ws_comment;   /* [Input] (WRDSF_COMMENT) Comment characters. */
      54    const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
      55  			       with backslash. */
      56    void (*ws_alloc_die) (wordsplit_t *wsp);
      57                              /* [Input] (WRDSF_ALLOC_DIE) Function called when
      58  			       out of memory.  Must not return. */
      59    void (*ws_error) (const char *, ...)
      60  		ATTRIBUTE_FORMAT ((printf, 1, 2));
      61                              /* [Input] (WRDSF_ERROR) Function used for error
      62  			       reporting */
      63    void (*ws_debug) (const char *, ...)
      64  		ATTRIBUTE_FORMAT ((printf, 1, 2));
      65                              /* [Input] (WRDSF_DEBUG) Function used for debug
      66  			       output. */
      67    const char **ws_env;      /* [Input] (WRDSF_ENV, !WRDSF_NOVAR) Array of
      68  			       environment variables. */
      69  
      70    char **ws_envbuf;
      71    size_t ws_envidx;
      72    size_t ws_envsiz;
      73  
      74    int (*ws_getvar) (char **ret, const char *var, size_t len, void *clos);
      75                              /* [Input] (WRDSF_GETVAR, !WRDSF_NOVAR) Looks up
      76  			       the name VAR (LEN bytes long) in the table of
      77  			       variables and if found returns in memory
      78  			       location pointed to by RET the value of that
      79  			       variable.  Returns WRDSE_OK (0) on success,
      80  			       and an error code (see WRDSE_* defines below)
      81  			       on error.  User-specific errors can be returned
      82  			       by storing the error diagnostic string in RET
      83  			       and returning WRDSE_USERERR.
      84                                 Whatever is stored in RET, it must be allocated
      85  			       using malloc(3). */
      86    void *ws_closure;         /* [Input] (WRDSF_CLOSURE) Passed as the CLOS
      87  			       argument to ws_getvar and ws_command. */
      88    int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv,
      89                       void *clos);
      90  	                    /* [Input] (!WRDSF_NOCMD) Returns in the memory
      91  			       location pointed to by RET the expansion of
      92  			       the command CMD (LEN bytes long).  If WRDSO_ARGV
      93  			       option is set, ARGV contains CMD split out to
      94  			       words.  Otherwise ARGV is NULL.
      95  
      96  			       See ws_getvar for a discussion of possible
      97  			       return values. */
      98  
      99    const char *ws_input;     /* Input string (the S argument to wordsplit. */
     100    size_t ws_len;            /* Length of ws_input. */
     101    size_t ws_endp;           /* Points past the last processed byte in
     102  			       ws_input. */
     103    int ws_errno;             /* [Output] Error code, if an error occurred. */
     104    char *ws_usererr;         /* Points to textual description of
     105  			       the error, if ws_errno is WRDSE_USERERR.  Must
     106  			       be allocated with malloc(3). */
     107    struct wordsplit_node *ws_head, *ws_tail;
     108                              /* Doubly-linked list of parsed out nodes. */
     109    int ws_lvl;               /* Invocation nesting level. */
     110  };
     111  
     112  /* Initial size for ws_env, if allocated automatically */
     113  #define WORDSPLIT_ENV_INIT 16
     114  
     115  /* Wordsplit flags. */
     116  /* Append the words found to the array resulting from a previous
     117     call. */
     118  #define WRDSF_APPEND            0x00000001
     119  /* Insert ws_offs initial NULLs in the array ws_wordv.
     120     (These are not counted in the returned ws_wordc.) */
     121  #define WRDSF_DOOFFS            0x00000002
     122  /* Don't do command substitution. */
     123  #define WRDSF_NOCMD             0x00000004
     124  /* The parameter p resulted from a previous call to
     125     wordsplit(), and wordsplit_free() was not called. Reuse the
     126     allocated storage. */
     127  #define WRDSF_REUSE             0x00000008
     128  /* Print errors */
     129  #define WRDSF_SHOWERR           0x00000010
     130  /* Consider it an error if an undefined variable is expanded. */
     131  #define WRDSF_UNDEF             0x00000020
     132  /* Don't do variable expansion. */
     133  #define WRDSF_NOVAR             0x00000040
     134  /* Abort on ENOMEM error */
     135  #define WRDSF_ENOMEMABRT        0x00000080
     136  /* Trim off any leading and trailind whitespace */
     137  #define WRDSF_WS                0x00000100
     138  /* Handle single quotes */
     139  #define WRDSF_SQUOTE            0x00000200
     140  /* Handle double quotes */
     141  #define WRDSF_DQUOTE            0x00000400
     142  /* Handle single and double quotes */
     143  #define WRDSF_QUOTE             (WRDSF_SQUOTE|WRDSF_DQUOTE)
     144  /* Replace each input sequence of repeated delimiters with a single
     145     delimiter */
     146  #define WRDSF_SQUEEZE_DELIMS    0x00000800
     147  /* Return delimiters */
     148  #define WRDSF_RETURN_DELIMS     0x00001000
     149  /* Treat sed expressions as words */
     150  #define WRDSF_SED_EXPR          0x00002000
     151  /* ws_delim field is initialized */
     152  #define WRDSF_DELIM             0x00004000
     153  /* ws_comment field is initialized */
     154  #define WRDSF_COMMENT           0x00008000
     155  /* ws_alloc_die field is initialized */
     156  #define WRDSF_ALLOC_DIE         0x00010000
     157  /* ws_error field is initialized */
     158  #define WRDSF_ERROR             0x00020000
     159  /* ws_debug field is initialized */
     160  #define WRDSF_DEBUG             0x00040000
     161  /* ws_env field is initialized */
     162  #define WRDSF_ENV               0x00080000
     163  /* ws_getvar field is initialized */
     164  #define WRDSF_GETVAR            0x00100000
     165  /* enable debugging */
     166  #define WRDSF_SHOWDBG           0x00200000
     167  /* Don't split input into words.  Useful for side effects. */
     168  #define WRDSF_NOSPLIT           0x00400000
     169  /* Keep undefined variables in place, instead of expanding them to
     170     empty strings. */
     171  #define WRDSF_KEEPUNDEF         0x00800000
     172  /* Warn about undefined variables */
     173  #define WRDSF_WARNUNDEF         0x01000000
     174  /* Handle C escapes */
     175  #define WRDSF_CESCAPES          0x02000000
     176  /* ws_closure is set */
     177  #define WRDSF_CLOSURE           0x04000000
     178  /* ws_env is a Key/Value environment, i.e. the value of a variable is
     179     stored in the element that follows its name. */
     180  #define WRDSF_ENV_KV            0x08000000
     181  /* ws_escape is set */
     182  #define WRDSF_ESCAPE            0x10000000
     183  /* Incremental mode */
     184  #define WRDSF_INCREMENTAL       0x20000000
     185  /* Perform pathname and tilde expansion */
     186  #define WRDSF_PATHEXPAND        0x40000000
     187  /* ws_options is initialized */
     188  #define WRDSF_OPTIONS           0x80000000
     189  
     190  #define WRDSF_DEFFLAGS	       \
     191    (WRDSF_NOVAR | WRDSF_NOCMD | \
     192     WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
     193  
     194  /* Remove the word that produces empty string after path expansion */
     195  #define WRDSO_NULLGLOB        0x00000001
     196  /* Print error message if path expansion produces empty string */
     197  #define WRDSO_FAILGLOB        0x00000002
     198  /* Allow a leading period to be matched by metacharacters. */
     199  #define WRDSO_DOTGLOB         0x00000004
     200  /* ws_command needs argv parameter */
     201  #define WRDSO_ARGV            0x00000008
     202  /* Keep backslash in unrecognized escape sequences in words */
     203  #define WRDSO_BSKEEP_WORD     0x00000010
     204  /* Handle octal escapes in words */
     205  #define WRDSO_OESC_WORD       0x00000020
     206  /* Handle hex escapes in words */
     207  #define WRDSO_XESC_WORD       0x00000040
     208  
     209  /* ws_maxwords field is initialized */
     210  #define WRDSO_MAXWORDS        0x00000080
     211  
     212  /* Keep backslash in unrecognized escape sequences in quoted strings */
     213  #define WRDSO_BSKEEP_QUOTE    0x00000100
     214  /* Handle octal escapes in quoted strings */
     215  #define WRDSO_OESC_QUOTE      0x00000200
     216  /* Handle hex escapes in quoted strings */
     217  #define WRDSO_XESC_QUOTE      0x00000400
     218  
     219  #define WRDSO_BSKEEP          WRDSO_BSKEEP_WORD
     220  #define WRDSO_OESC            WRDSO_OESC_WORD
     221  #define WRDSO_XESC            WRDSO_XESC_WORD
     222  
     223  /* Indices into ws_escape */
     224  #define WRDSX_WORD  0
     225  #define WRDSX_QUOTE 1
     226  
     227  /* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
     228  #define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
     229  /* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
     230  #define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
     231  
     232  #define WRDSE_OK         0
     233  #define WRDSE_EOF        WRDSE_OK
     234  #define WRDSE_QUOTE      1
     235  #define WRDSE_NOSPACE    2
     236  #define WRDSE_USAGE      3
     237  #define WRDSE_CBRACE     4
     238  #define WRDSE_UNDEF      5
     239  #define WRDSE_NOINPUT    6
     240  #define WRDSE_PAREN      7
     241  #define WRDSE_GLOBERR    8
     242  #define WRDSE_USERERR    9
     243  
     244  int wordsplit (const char *s, wordsplit_t *ws, unsigned flags);
     245  int wordsplit_len (const char *s, size_t len, wordsplit_t *ws, unsigned flags);
     246  void wordsplit_free (wordsplit_t *ws);
     247  void wordsplit_free_words (wordsplit_t *ws);
     248  void wordsplit_free_envbuf (wordsplit_t *ws);
     249  int wordsplit_get_words (wordsplit_t *ws, size_t *wordc, char ***wordv);
     250  
     251  int wordsplit_append (wordsplit_t *wsp, int argc, char **argv);
     252  
     253  int wordsplit_c_unquote_char (int c);
     254  int wordsplit_c_quote_char (int c);
     255  size_t wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote);
     256  void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
     257  
     258  void wordsplit_perror (wordsplit_t *ws);
     259  const char *wordsplit_strerror (wordsplit_t *ws);
     260  
     261  void wordsplit_clearerr (wordsplit_t *ws);
     262  
     263  #endif