1  /* GNU m4 -- A simple macro processor
       2  
       3     Copyright (C) 1989-1994, 2006-2007, 2009-2014, 2016-2017, 2020-2021
       4     Free Software Foundation, Inc.
       5  
       6     This file is part of GNU M4.
       7  
       8     GNU M4 is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     GNU M4 is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.
      20  */
      21  
      22  /* This file contains the functions, that performs the basic argument
      23     parsing and macro expansion.  */
      24  
      25  #include "m4.h"
      26  
      27  static void expand_macro (symbol *);
      28  static void expand_token (struct obstack *, token_type, token_data *, int);
      29  
      30  /* Current recursion level in expand_macro ().  */
      31  int expansion_level = 0;
      32  
      33  /* The number of the current call of expand_macro ().  */
      34  static int macro_call_id = 0;
      35  
      36  /* The shared stack of collected arguments for macro calls; as each
      37     argument is collected, it is finished and its location stored in
      38     argv_stack.  Normally, this stack can be used simultaneously by
      39     multiple macro calls; the exception is when an outer macro has
      40     generated some text, then calls a nested macro, in which case the
      41     nested macro must use a local stack to leave the unfinished text
      42     alone.  Too bad obstack.h does not provide an easy way to reopen a
      43     finished object for further growth, but in practice this does not
      44     hurt us too much.  */
      45  static struct obstack argc_stack;
      46  
      47  /* The shared stack of pointers to collected arguments for macro
      48     calls.  This object is never finished; we exploit the fact that
      49     obstack_blank_fast is documented to take a negative size to reduce
      50     the size again.  */
      51  static struct obstack argv_stack;
      52  
      53  /*----------------------------------------------------------------------.
      54  | This function read all input, and expands each token, one at a time.  |
      55  `----------------------------------------------------------------------*/
      56  
      57  void
      58  expand_input (void)
      59  {
      60    token_type t;
      61    token_data td;
      62    int line;
      63  
      64    obstack_init (&argc_stack);
      65    obstack_init (&argv_stack);
      66  
      67    while ((t = next_token (&td, &line)) != TOKEN_EOF)
      68      expand_token ((struct obstack *) NULL, t, &td, line);
      69  
      70    obstack_free (&argc_stack, NULL);
      71    obstack_free (&argv_stack, NULL);
      72  }
      73  
      74  
      75  /*----------------------------------------------------------------.
      76  | Expand one token, according to its type.  Potential macro names |
      77  | (TOKEN_WORD) are looked up in the symbol table, to see if they  |
      78  | have a macro definition.  If they have, they are expanded as    |
      79  | macros, otherwise the text is just copied to the output.        |
      80  `----------------------------------------------------------------*/
      81  
      82  static void
      83  expand_token (struct obstack *obs, token_type t, token_data *td, int line)
      84  {
      85    symbol *sym;
      86  
      87    switch (t)
      88      { /* TOKSW */
      89      case TOKEN_EOF:
      90      case TOKEN_MACDEF:
      91        break;
      92  
      93      case TOKEN_OPEN:
      94      case TOKEN_COMMA:
      95      case TOKEN_CLOSE:
      96      case TOKEN_SIMPLE:
      97      case TOKEN_STRING:
      98        shipout_text (obs, TOKEN_DATA_TEXT (td), strlen (TOKEN_DATA_TEXT (td)),
      99                      line);
     100        break;
     101  
     102      case TOKEN_WORD:
     103        sym = lookup_symbol (TOKEN_DATA_TEXT (td), SYMBOL_LOOKUP);
     104        if (sym == NULL || SYMBOL_TYPE (sym) == TOKEN_VOID
     105            || (SYMBOL_TYPE (sym) == TOKEN_FUNC
     106                && SYMBOL_BLIND_NO_ARGS (sym)
     107                && peek_token () != TOKEN_OPEN))
     108          {
     109  #ifdef ENABLE_CHANGEWORD
     110            shipout_text (obs, TOKEN_DATA_ORIG_TEXT (td),
     111                          strlen (TOKEN_DATA_ORIG_TEXT (td)), line);
     112  #else
     113            shipout_text (obs, TOKEN_DATA_TEXT (td),
     114                          strlen (TOKEN_DATA_TEXT (td)), line);
     115  #endif
     116          }
     117        else
     118          expand_macro (sym);
     119        break;
     120  
     121      default:
     122        M4ERROR ((warning_status, 0,
     123                  "INTERNAL ERROR: bad token type in expand_token ()"));
     124        abort ();
     125      }
     126  }
     127  
     128  
     129  /*-------------------------------------------------------------------.
     130  | This function parses one argument to a macro call.  It expects the |
     131  | first left parenthesis, or the separating comma, to have been read |
     132  | by the caller.  It skips leading whitespace, and reads and expands |
     133  | tokens, until it finds a comma or an right parenthesis at the same |
     134  | level of parentheses.  It returns a flag indicating whether the    |
     135  | argument read is the last for the active macro call.  The argument |
     136  | is built on the obstack OBS, indirectly through expand_token ().   |
     137  `-------------------------------------------------------------------*/
     138  
     139  static bool
     140  expand_argument (struct obstack *obs, token_data *argp)
     141  {
     142    token_type t;
     143    token_data td;
     144    char *text;
     145    int paren_level;
     146    const char *file = current_file;
     147    int line = current_line;
     148  
     149    TOKEN_DATA_TYPE (argp) = TOKEN_VOID;
     150  
     151    /* Skip leading white space.  */
     152    do
     153      {
     154        t = next_token (&td, NULL);
     155      }
     156    while (t == TOKEN_SIMPLE && c_isspace (*TOKEN_DATA_TEXT (&td)));
     157  
     158    paren_level = 0;
     159  
     160    while (1)
     161      {
     162  
     163        switch (t)
     164          { /* TOKSW */
     165          case TOKEN_COMMA:
     166          case TOKEN_CLOSE:
     167            if (paren_level == 0)
     168              {
     169                /* The argument MUST be finished, whether we want it or not.  */
     170                obstack_1grow (obs, '\0');
     171                text = (char *) obstack_finish (obs);
     172  
     173                if (TOKEN_DATA_TYPE (argp) == TOKEN_VOID)
     174                  {
     175                    TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
     176                    TOKEN_DATA_TEXT (argp) = text;
     177                  }
     178                return t == TOKEN_COMMA;
     179              }
     180            FALLTHROUGH;
     181          case TOKEN_OPEN:
     182          case TOKEN_SIMPLE:
     183            text = TOKEN_DATA_TEXT (&td);
     184  
     185            if (*text == '(')
     186              paren_level++;
     187            else if (*text == ')')
     188              paren_level--;
     189            expand_token (obs, t, &td, line);
     190            break;
     191  
     192          case TOKEN_EOF:
     193            /* current_file changed to "" if we see TOKEN_EOF, use the
     194               previous value we stored earlier.  */
     195            m4_failure_at_line (0, file, line,
     196                                _("ERROR: end of file in argument list"));
     197  
     198          case TOKEN_WORD:
     199          case TOKEN_STRING:
     200            expand_token (obs, t, &td, line);
     201            break;
     202  
     203          case TOKEN_MACDEF:
     204            if (obstack_object_size (obs) == 0)
     205              {
     206                TOKEN_DATA_TYPE (argp) = TOKEN_FUNC;
     207                TOKEN_DATA_FUNC (argp) = TOKEN_DATA_FUNC (&td);
     208              }
     209            break;
     210  
     211          default:
     212            M4ERROR ((warning_status, 0,
     213                      "INTERNAL ERROR: bad token type in expand_argument ()"));
     214            abort ();
     215          }
     216  
     217        t = next_token (&td, NULL);
     218      }
     219  }
     220  
     221  /*-------------------------------------------------------------.
     222  | Collect all the arguments to a call of the macro SYM.  The   |
     223  | arguments are stored on the obstack ARGUMENTS and a table of |
     224  | pointers to the arguments on the obstack ARGPTR.             |
     225  `-------------------------------------------------------------*/
     226  
     227  static void
     228  collect_arguments (symbol *sym, struct obstack *argptr,
     229                     struct obstack *arguments)
     230  {
     231    token_data td;
     232    token_data *tdp;
     233    bool more_args;
     234    bool groks_macro_args = SYMBOL_MACRO_ARGS (sym);
     235  
     236    TOKEN_DATA_TYPE (&td) = TOKEN_TEXT;
     237    TOKEN_DATA_TEXT (&td) = SYMBOL_NAME (sym);
     238    tdp = (token_data *) obstack_copy (arguments, &td, sizeof td);
     239    obstack_ptr_grow (argptr, tdp);
     240  
     241    if (peek_token () == TOKEN_OPEN)
     242      {
     243        next_token (&td, NULL); /* gobble parenthesis */
     244        do
     245          {
     246            more_args = expand_argument (arguments, &td);
     247  
     248            if (!groks_macro_args && TOKEN_DATA_TYPE (&td) == TOKEN_FUNC)
     249              {
     250                TOKEN_DATA_TYPE (&td) = TOKEN_TEXT;
     251                TOKEN_DATA_TEXT (&td) = (char *) "";
     252              }
     253            tdp = (token_data *) obstack_copy (arguments, &td, sizeof td);
     254            obstack_ptr_grow (argptr, tdp);
     255          }
     256        while (more_args);
     257      }
     258  }
     259  
     260  
     261  /*-------------------------------------------------------------------.
     262  | The actual call of a macro is handled by call_macro ().            |
     263  | call_macro () is passed a symbol SYM, whose type is used to call   |
     264  | either a builtin function, or the user macro expansion function    |
     265  | expand_user_macro () (lives in builtin.c).  There are ARGC         |
     266  | arguments to the call, stored in the ARGV table.  The expansion is |
     267  | left on the obstack EXPANSION.  Macro tracing is also handled      |
     268  | here.                                                              |
     269  `-------------------------------------------------------------------*/
     270  
     271  void
     272  call_macro (symbol *sym, int argc, token_data **argv,
     273                   struct obstack *expansion)
     274  {
     275    switch (SYMBOL_TYPE (sym))
     276      {
     277      case TOKEN_FUNC:
     278        (*SYMBOL_FUNC (sym)) (expansion, argc, argv);
     279        break;
     280  
     281      case TOKEN_TEXT:
     282        expand_user_macro (expansion, sym, argc, argv);
     283        break;
     284  
     285      case TOKEN_VOID:
     286      default:
     287        M4ERROR ((warning_status, 0,
     288                  "INTERNAL ERROR: bad symbol type in call_macro ()"));
     289        abort ();
     290      }
     291  }
     292  
     293  /*-------------------------------------------------------------------.
     294  | The macro expansion is handled by expand_macro ().  It parses the  |
     295  | arguments, using collect_arguments (), and builds a table of       |
     296  | pointers to the arguments.  The arguments themselves are stored on |
     297  | a local obstack.  Expand_macro () uses call_macro () to do the     |
     298  | call of the macro.                                                 |
     299  |                                                                    |
     300  | Expand_macro () is potentially recursive, since it calls           |
     301  | expand_argument (), which might call expand_token (), which might  |
     302  | call expand_macro ().                                              |
     303  `-------------------------------------------------------------------*/
     304  
     305  static void
     306  expand_macro (symbol *sym)
     307  {
     308    struct obstack arguments;     /* Alternate obstack if argc_stack is busy.  */
     309    unsigned argv_base;           /* Size of argv_stack on entry.  */
     310    bool use_argc_stack = true;   /* Whether argc_stack is safe.  */
     311    token_data **argv;
     312    int argc;
     313    struct obstack *expansion;
     314    const char *expanded;
     315    bool traced;
     316    int my_call_id;
     317  
     318    /* Report errors at the location where the open parenthesis (if any)
     319       was found, but after expansion, restore global state back to the
     320       location of the close parenthesis.  This is safe since we
     321       guarantee that macro expansion does not alter the state of
     322       current_file/current_line (dnl, include, and sinclude are special
     323       cased in the input engine to ensure this fact).  */
     324    const char *loc_open_file = current_file;
     325    int loc_open_line = current_line;
     326    const char *loc_close_file;
     327    int loc_close_line;
     328  
     329    SYMBOL_PENDING_EXPANSIONS (sym)++;
     330    expansion_level++;
     331    if (nesting_limit > 0 && expansion_level > nesting_limit)
     332      m4_failure (0, _("recursion limit of %d exceeded, use -L<N> to change it"),
     333                  nesting_limit);
     334  
     335    macro_call_id++;
     336    my_call_id = macro_call_id;
     337  
     338    traced = (debug_level & DEBUG_TRACE_ALL) || SYMBOL_TRACED (sym);
     339  
     340    argv_base = obstack_object_size (&argv_stack);
     341    if (obstack_object_size (&argc_stack) > 0)
     342      {
     343        /* We cannot use argc_stack if this is a nested invocation, and an
     344           outer invocation has an unfinished argument being
     345           collected.  */
     346        obstack_init (&arguments);
     347        use_argc_stack = false;
     348      }
     349  
     350    if (traced && (debug_level & DEBUG_TRACE_CALL))
     351      trace_prepre (SYMBOL_NAME (sym), my_call_id);
     352  
     353    collect_arguments (sym, &argv_stack,
     354                       use_argc_stack ? &argc_stack : &arguments);
     355  
     356    argc = ((obstack_object_size (&argv_stack) - argv_base)
     357            / sizeof (token_data *));
     358    argv = (token_data **) ((uintptr_t) obstack_base (&argv_stack) + argv_base);
     359  
     360    loc_close_file = current_file;
     361    loc_close_line = current_line;
     362    current_file = loc_open_file;
     363    current_line = loc_open_line;
     364  
     365    if (traced)
     366      trace_pre (SYMBOL_NAME (sym), my_call_id, argc, argv);
     367  
     368    expansion = push_string_init ();
     369    call_macro (sym, argc, argv, expansion);
     370    expanded = push_string_finish ();
     371  
     372    if (traced)
     373      trace_post (SYMBOL_NAME (sym), my_call_id, argc, expanded);
     374  
     375    current_file = loc_close_file;
     376    current_line = loc_close_line;
     377  
     378    --expansion_level;
     379    --SYMBOL_PENDING_EXPANSIONS (sym);
     380  
     381    if (SYMBOL_DELETED (sym))
     382      free_symbol (sym);
     383  
     384    if (use_argc_stack)
     385      obstack_free (&argc_stack, argv[0]);
     386    else
     387      obstack_free (&arguments, NULL);
     388    obstack_blank_fast (&argv_stack, -argc * sizeof (token_data *));
     389  }