1  /* Copyright 2010-2023 Free Software Foundation, Inc.
       2  
       3     This program is free software: you can redistribute it and/or modify
       4     it under the terms of the GNU General Public License as published by
       5     the Free Software Foundation, either version 3 of the License, or
       6     (at your option) any later version.
       7  
       8     This program is distributed in the hope that it will be useful,
       9     but WITHOUT ANY WARRANTY; without even the implied warranty of
      10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      11     GNU General Public License for more details.
      12  
      13     You should have received a copy of the GNU General Public License
      14     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      15  
      16  #include <config.h>
      17  #include <string.h>
      18  #include <stdbool.h>
      19  #include "uniconv.h"
      20  #include "unistr.h"
      21  
      22  #include "parser.h"
      23  #include "text.h"
      24  #include "source_marks.h"
      25  #include "debug.h"
      26  
      27  void
      28  gather_def_item (ELEMENT *current, enum command_id next_command)
      29  {
      30    enum element_type type;
      31    ELEMENT *def_item;
      32    int contents_count, i;
      33  
      34    if (next_command
      35        && next_command != CM_defline && next_command != CM_deftypeline)
      36      type = ET_inter_def_item; /* Between @def*x and @def*. */
      37    else
      38      type = ET_def_item;
      39  
      40    if (!current->cmd)
      41      return;
      42  
      43    /* Check this isn't an "x" type command.
      44       "This may happen for a construct like:
      45       @deffnx a b @section
      46       but otherwise the end of line will lead to the command closing." */
      47    if (command_data(current->cmd).flags & CF_line)
      48      return;
      49  
      50    contents_count = current->contents.number;
      51    if (contents_count == 0)
      52      return;
      53  
      54    /* Starting from the end, collect everything that is not a ET_def_line and
      55       put it into the ET_def_item. */
      56    def_item = new_element (type);
      57    for (i = 0; i < contents_count; i++)
      58      {
      59        ELEMENT *last_child, *item_content;
      60        last_child = last_contents_child (current);
      61        if (last_child->type == ET_def_line)
      62          break;
      63        item_content = pop_element_from_contents (current);
      64        insert_into_contents (def_item, item_content, 0);
      65      }
      66  
      67    if (def_item->contents.number > 0)
      68      add_to_element_contents (current, def_item);
      69    else
      70      destroy_element (def_item);
      71  }
      72  
      73  
      74  /* Starting at I in the contents, return the next non-whitespace element,
      75     incrementing I.  Return null if no more elements. */
      76  ELEMENT *
      77  next_bracketed_or_word_agg (ELEMENT *current, int *i)
      78  {
      79    int num = 0;
      80    ELEMENT *new;
      81    ELEMENT *e;
      82    int j;
      83    while (1)
      84      {
      85        if (*i == current->contents.number)
      86          break;
      87        e = current->contents.list[*i];
      88        if (e->type == ET_spaces
      89            || e->type == ET_spaces_inserted
      90            || e->type == ET_delimiter)
      91          {
      92            if (num > 0)
      93              break;
      94            else
      95              {
      96                (*i)++;
      97              }
      98          }
      99        else
     100          {
     101           /* e is a text or command element or bracketed argument */
     102            (*i)++;
     103            num++;
     104          }
     105      }
     106  
     107    if (num == 0)
     108      return 0;
     109  
     110    if (num == 1)
     111      return current->contents.list[*i - 1];
     112  
     113    new = new_element (ET_def_aggregate);
     114    for (j = 0; j < num; j++)
     115      {
     116        add_to_element_contents (new,
     117                                 remove_from_contents (current, *i - num));
     118        /* Note: if we did a lot of this could write a special
     119           "splicing" function. */
     120      }
     121    insert_into_contents (current, new, *i - num);
     122    *i = *i - num + 1;
     123    return new;
     124  }
     125  
     126  typedef struct {
     127      enum command_id alias;
     128      enum command_id command;
     129      char *category;
     130      char *translation_context;
     131  } DEF_ALIAS;
     132  
     133  DEF_ALIAS def_aliases[] = {
     134    CM_defun, CM_deffn, "Function", "category of functions for @defun",
     135    CM_defmac, CM_deffn, "Macro", 0,
     136    CM_defspec, CM_deffn, "Special Form", 0,
     137    CM_defvar, CM_defvr, "Variable", "category of variables for @defvar",
     138    CM_defopt, CM_defvr, "User Option", 0,
     139    CM_deftypefun, CM_deftypefn, "Function", "category of functions for @deftypefun",
     140    CM_deftypevar, CM_deftypevr, "Variable", "category of variables in typed languages for @deftypevar",
     141    CM_defivar, CM_defcv, "Instance Variable", "category of instance variables in object-oriented programming for @defivar",
     142    CM_deftypeivar, CM_deftypecv, "Instance Variable", "category of instance variables with data type in object-oriented programming for @deftypeivar",
     143    CM_defmethod, CM_defop, "Method", "category of methods in object-oriented programming for @defmethod",
     144    CM_deftypemethod, CM_deftypeop, "Method", "category of methods with data type in object-oriented programming for @deftypemethod",
     145    0, 0, 0, 0
     146  };
     147  
     148  typedef struct {
     149      enum command_id command;
     150      char **arguments;
     151  } DEF_MAP;
     152  
     153    /*
     154       Meaning of these:
     155       CATEGORY - type of entity, e.g. "Function"
     156       CLASS - class for object-oriented programming
     157       TYPE - data type of a variable or function return value
     158       NAME - name of entity being documented
     159       ARGUMENTS - arguments to a function or macro                  */
     160  
     161  char *defline_arguments[] = {"category", "name", "arg", 0};
     162  char *deftypeline_arguments[] = {"category", "type", "name", "argtype", 0};
     163  char *defvr_arguments[] = {"category", "name", 0};
     164  char *deftypefn_arguments[] = {"category", "type", "name", "argtype", 0};
     165  char *deftypeop_arguments[] = {"category", "class" , "type", "name", "argtype", 0};
     166  char *deftypevr_arguments[] = {"category", "type", "name", 0};
     167  char *defcv_arguments[] = {"category", "class" , "name", 0};
     168  char *deftypecv_arguments[] = {"category", "class" , "type", "name", 0};
     169  char *defop_arguments[] = {"category", "class" , "name", "arg", 0};
     170  char *deftp_arguments[] = {"category", "name", "argtype", 0};
     171  
     172  DEF_MAP def_maps[] = {
     173    CM_defline, defline_arguments,
     174    CM_deftypeline, deftypeline_arguments,
     175    CM_deffn, defline_arguments,
     176    CM_defvr, defvr_arguments,
     177    CM_deftypefn, deftypefn_arguments,
     178    CM_deftypeop, deftypeop_arguments,
     179    CM_deftypevr, deftypevr_arguments,
     180    CM_defcv, defcv_arguments,
     181    CM_deftypecv, deftypecv_arguments,
     182    CM_defop, defop_arguments,
     183    CM_deftp, deftp_arguments,
     184  };
     185  
     186  /* Split non-space text elements into strings without [ ] ( ) , and single
     187     character strings with one of them. */
     188  static void
     189  split_delimiters (ELEMENT *current, int starting_idx)
     190  {
     191    int i;
     192    static char *chars = "[](),";
     193    for (i = starting_idx; i < current->contents.number; i++)
     194      {
     195        ELEMENT *e = current->contents.list[i];
     196        char *p;
     197        ELEMENT *new;
     198        int len;
     199        /* count UTF-8 encoded Unicode characters for source marks locations */
     200        size_t current_position = 0;
     201        uint8_t *u8_text = 0;
     202        uint8_t *u8_p;
     203  
     204        if (e->type != ET_NONE
     205            || e->text.end == 0)
     206          continue;
     207        p = e->text.text;
     208  
     209        if (e->source_mark_list.number)
     210          u8_text = u8_strconv_from_encoding (p, "UTF-8",
     211                                              iconveh_question_mark);
     212        u8_p = u8_text;
     213  
     214        while (1)
     215          {
     216            size_t u8_len = 0;
     217            if (strchr (chars, *p))
     218              {
     219                new = new_element (ET_delimiter);
     220                text_append_n (&new->text, p, 1);
     221  
     222                if (u8_text)
     223                  {
     224                    u8_len = u8_mbsnlen (u8_p, 1);
     225                    u8_p += u8_len;
     226                  }
     227                current_position = relocate_source_marks (&(e->source_mark_list), new,
     228                                                   current_position, u8_len);
     229  
     230                insert_into_contents (current, new, i++);
     231                add_extra_string_dup (new, "def_role", "delimiter");
     232                if (!*++p)
     233                  break;
     234                continue;
     235              }
     236  
     237            len = strcspn (p, chars);
     238            new = new_element (ET_NONE);
     239            text_append_n (&new->text, p, len);
     240  
     241            if (u8_text)
     242              {
     243                u8_len = u8_mbsnlen (u8_p, len);
     244                u8_p += u8_len;
     245              }
     246            current_position = relocate_source_marks (&(e->source_mark_list), new,
     247                                            current_position, u8_len);
     248  
     249            insert_into_contents (current, new, i++);
     250            if (!*(p += len))
     251              break;
     252          }
     253        destroy_element (remove_from_contents (current, i--));
     254        free (u8_text);
     255      }
     256  }
     257  
     258  
     259  /* Divide any text elements into separate elements, separating whitespace
     260     and non-whitespace. */
     261  static void
     262  split_def_args (ELEMENT *current, int starting_idx)
     263  {
     264    int i;
     265    for (i = starting_idx; i < current->contents.number; i++)
     266      {
     267        ELEMENT *e = current->contents.list[i];
     268        char *p;
     269        ELEMENT *new;
     270        int len;
     271        /* count UTF-8 encoded Unicode characters for source marks locations */
     272        size_t current_position = 0;
     273        uint8_t *u8_text = 0;
     274        uint8_t *u8_p;
     275  
     276        if (e->type == ET_bracketed_arg)
     277          {
     278            isolate_last_space (e);
     279            continue;
     280          }
     281  
     282        if (e->text.end == 0)
     283          continue;
     284  
     285        p = e->text.text;
     286  
     287        if (e->source_mark_list.number)
     288          u8_text = u8_strconv_from_encoding (p, "UTF-8",
     289                                              iconveh_question_mark);
     290        u8_p = u8_text;
     291  
     292        while (1)
     293          {
     294            size_t u8_len = 0;
     295            len = strspn (p, whitespace_chars);
     296            if (len)
     297              {
     298                new = new_element (ET_spaces);
     299                add_extra_string_dup (new, "def_role", "spaces");
     300              }
     301            else
     302              {
     303                len = strcspn (p, whitespace_chars);
     304                new = new_element (ET_NONE);
     305              }
     306            if (u8_text)
     307              {
     308                u8_len = u8_mbsnlen (u8_p, len);
     309                u8_p += u8_len;
     310              }
     311  
     312            current_position = relocate_source_marks (&(e->source_mark_list), new,
     313                                  current_position, u8_len);
     314            text_append_n (&new->text, p, len);
     315            insert_into_contents (current, new, i++);
     316            if (!*(p += len))
     317              break;
     318          }
     319        destroy_element (remove_from_contents (current, i--));
     320        free (u8_text);
     321      }
     322  }
     323  
     324  DEF_ARG **
     325  parse_def (enum command_id command, ELEMENT *current)
     326  {
     327    int contents_idx = 0;
     328    int type, set_type_not_arg;
     329    int i, i_def;
     330    int arg_types_nr;
     331    ELEMENT *e, *e1;
     332    DEF_ARG **result;
     333    char **arguments_list;
     334  
     335    split_def_args (current, contents_idx);
     336  
     337    /* Check for "def alias" - for example @defun for @deffn. */
     338    if (command_data(command).flags & CF_def_alias)
     339      {
     340        char *category;
     341        int i;
     342        for (i = 0; i < sizeof (def_aliases) / sizeof (*def_aliases); i++)
     343          {
     344            if (def_aliases[i].alias == command)
     345              goto found;
     346          }
     347        fatal ("no alias for CF_def_alias command");
     348      found:
     349        /* Prepended content is inserted into contents, so
     350           @defun is converted into @deffn Function */
     351  
     352        category = def_aliases[i].category;
     353        command = def_aliases[i].command;
     354  
     355        /* Used when category text has a space in it. */
     356        e = new_element (ET_bracketed_inserted);
     357        insert_into_contents (current, e, contents_idx);
     358        e1 = new_element (ET_NONE);
     359        text_append_n (&e1->text, category, strlen (category));
     360        add_to_element_contents (e, e1);
     361        if (global_documentlanguage && *global_documentlanguage)
     362          {
     363            e1->type = ET_untranslated;
     364            add_extra_string_dup (e1, "documentlanguage",
     365                                  global_documentlanguage);
     366            if (def_aliases[i].translation_context)
     367              add_extra_string_dup (e1, "translation_context",
     368                                    def_aliases[i].translation_context);
     369          }
     370  
     371        e = new_element (ET_spaces_inserted);
     372        text_append_n (&e->text, " ", 1);
     373        add_extra_string_dup (e, "def_role", "spaces");
     374        insert_into_contents (current, e, contents_idx + 1);
     375      }
     376  
     377    /* prepare the arguments numbers and list */
     378    if (command_data(command).flags & CF_MACRO)
     379      {
     380        int args_number;
     381        MACRO *macro_record = lookup_macro (command);
     382        ELEMENT *macro;
     383        if (!macro_record)
     384          fatal ("no linemacro record for arguments parsing");
     385        macro = macro_record->element;
     386        args_number = macro->args.number - 1;
     387        arguments_list = malloc ((args_number + 1) * sizeof (char *));
     388        arguments_list[args_number] = 0;
     389        arg_types_nr = args_number;
     390        if (args_number > 0)
     391          {
     392            int arg_index;
     393            ELEMENT **args = macro->args.list;
     394            for (arg_index = 1; arg_index <= args_number; arg_index++)
     395              {
     396                if (args[arg_index]->type == ET_macro_arg)
     397                  arguments_list[arg_index -1] = args[arg_index]->text.text;
     398                else
     399                  arguments_list[arg_index -1] = 0;
     400              }
     401            /* remove one for the rest of the line argument */
     402            arg_types_nr--;
     403          }
     404        result = malloc ((args_number+1) * sizeof (DEF_ARG *));
     405      }
     406    else
     407      {
     408       /* Read arguments as CATEGORY [CLASS] [TYPE] NAME [ARGUMENTS]. */
     409  
     410        for (i_def = 0; i_def < sizeof (def_maps) / sizeof (*def_maps); i_def++)
     411          {
     412            if (def_maps[i_def].command == command)
     413              goto def_found;
     414          }
     415        fatal ("no arguments for def command");
     416       def_found:
     417  
     418        /* determine non arg/argtype number of arguments */
     419        arg_types_nr = 0;
     420        arguments_list = def_maps[i_def].arguments;
     421        while (arguments_list[arg_types_nr])
     422          {
     423            char *arg_type_name = arguments_list[arg_types_nr];
     424  
     425            /* FIXME keep information about arg/argtype? */
     426            if (!strcmp (arg_type_name, "arg")
     427                || !strcmp (arg_type_name, "argtype"))
     428              break;
     429            arg_types_nr++;
     430          }
     431        result = malloc ((arg_types_nr+1) * sizeof (DEF_ARG *));
     432      }
     433  
     434    for (i = 0; i < arg_types_nr; i++)
     435      {
     436        ELEMENT *e = next_bracketed_or_word_agg (current, &contents_idx);
     437  
     438        if (e)
     439          {
     440            char *arg_type_name = arguments_list[i];
     441            DEF_ARG *def_arg = malloc (sizeof (DEF_ARG));
     442  
     443            result[i] = def_arg;
     444            def_arg->arg_type = strdup(arg_type_name);
     445            def_arg->element = e;
     446          }
     447        else
     448          break;
     449      }
     450  
     451    result[i] = 0;
     452    if (command_data(command).flags & CF_MACRO)
     453      {
     454        while (contents_idx < current->contents.number
     455               && current->contents.list[contents_idx]->type == ET_spaces)
     456          contents_idx++;
     457        /* note that element at contents_idx is not collected at that point */
     458        /* arguments_list[i] NULL should only happen if there is no
     459           argument at all for the linemacro */
     460        if (contents_idx < current->contents.number && arguments_list[i])
     461          {
     462            DEF_ARG *def_arg = malloc (sizeof (DEF_ARG));
     463            int contents_nr = current->contents.number - contents_idx;
     464  
     465            result[i] = def_arg;
     466            result[i+1] = 0;
     467  
     468            def_arg->arg_type = strdup (arguments_list[i]);
     469            if (contents_nr == 1)
     470              def_arg->element = current->contents.list[contents_idx];
     471            else
     472              {
     473                ELEMENT *new = new_element (ET_def_aggregate);
     474                int j;
     475                for (j = 0; j < contents_nr; j++)
     476                  {
     477                    add_to_element_contents (new,
     478                                             remove_from_contents (current,
     479                                                                   contents_idx));
     480                  }
     481                add_to_element_contents (current, new);
     482                def_arg->element = new;
     483              }
     484          }
     485        return result;
     486      }
     487  
     488    for (i = 0; i < arg_types_nr; i++)
     489      {
     490        if (result[i])
     491          {
     492            DEF_ARG *def_arg = result[i];
     493            if (def_arg->element)
     494              add_extra_string_dup (def_arg->element, "def_role", def_arg->arg_type);
     495          }
     496        else
     497          break;
     498      }
     499  
     500    /* Process args */
     501    split_delimiters (current, contents_idx);
     502  
     503    /* For some commands, alternate between "arg" and "typearg". This matters for
     504       the DocBook output. */
     505    /* In that case set_type_not_arg is both used to set to argtype and
     506       to switch sign to switch between arg and argtype */
     507    if (command == CM_deftypeline || command == CM_deftypefn
     508        || command == CM_deftypeop || command == CM_deftp)
     509      set_type_not_arg = -1;
     510    else
     511      set_type_not_arg = 1;
     512  
     513    type = set_type_not_arg;
     514    for (i = contents_idx; i < current->contents.number; i++)
     515      {
     516        e = contents_child_by_index (current, i);
     517        if (e->type == ET_spaces
     518            || e->type == ET_spaces_inserted)
     519          {
     520            continue;
     521          }
     522        if (e->type == ET_delimiter)
     523          {
     524            type = set_type_not_arg;
     525            continue;
     526          }
     527        if (e->cmd && e->cmd != CM_code)
     528          {
     529            add_extra_string_dup (e, "def_role", "arg");
     530            type = set_type_not_arg;
     531            continue;
     532          }
     533        add_extra_string_dup (e, "def_role",
     534                              (type == 1 ? "arg" : "typearg"));
     535        type *= set_type_not_arg;
     536      }
     537    return result;
     538  }