1  /* Copyright 2010-2023 Free Software Foundation, Inc.
       2  
       3     This program is free software: you can redistribute it and/or modify
       4     it under the terms of the GNU General Public License as published by
       5     the Free Software Foundation, either version 3 of the License, or
       6     (at your option) any later version.
       7  
       8     This program is distributed in the hope that it will be useful,
       9     but WITHOUT ANY WARRANTY; without even the implied warranty of
      10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      11     GNU General Public License for more details.
      12  
      13     You should have received a copy of the GNU General Public License
      14     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      15  
      16  #include <config.h>
      17  
      18  /* Avoid namespace conflicts. */
      19  #define context perl_context
      20  
      21  #define PERL_NO_GET_CONTEXT
      22  #include "EXTERN.h"
      23  #include "perl.h"
      24  /* Avoid warnings about Perl headers redefining symbols that gnulib
      25     redefined already. */
      26  #if defined _WIN32 && !defined __CYGWIN__
      27    #undef free
      28  #endif
      29  #include "XSUB.h"
      30  
      31  #undef context
      32  
      33  #include <libintl.h>
      34  
      35  #include <stdlib.h>
      36  #include <stdio.h>
      37  #include <dirent.h>
      38  #include <string.h>
      39  
      40  #include "parser.h"
      41  #include "debug.h"
      42  #include "tree.h"
      43  #include "input.h"
      44  #include "source_marks.h"
      45  #include "labels.h"
      46  #include "indices.h"
      47  #include "errors.h"
      48  #include "api.h"
      49  
      50  #ifdef ENABLE_NLS
      51  
      52  #define LOCALEDIR DATADIR "/locale"
      53  
      54  /* Use the uninstalled locales dir.  Currently unused.
      55     The texinfo.mo files are not actually created here, only the
      56     texinfo_document.mo files, which aren't used by parsetexi. */
      57  static void
      58  find_locales_dir (char *builddir)
      59  {
      60    DIR *dir;
      61    char *s;
      62  
      63    dTHX;
      64  
      65    /* Can't use asprintf here, because it might come from Gnulib, and
      66       will then use malloc that is different from Perl's malloc, whereas
      67       free below is redirected to Perl's implementation.  This could
      68       cause crashes if the two malloc/free implementations were different.  */
      69    s = malloc (strlen (builddir) + strlen ("/LocaleData") + 1);
      70    sprintf (s, "%s/LocaleData", builddir);
      71    dir = opendir (s);
      72    if (!dir)
      73      {
      74        free (s);
      75        fprintf (stderr, "Locales dir for document strings not found: %s\n",
      76                 strerror (errno));
      77      }
      78    else
      79      {
      80        bindtextdomain (PACKAGE, s);
      81        free (s);
      82        closedir (dir);
      83      }
      84  }
      85  
      86  int
      87  init (int texinfo_uninstalled, char *builddir)
      88  {
      89    setlocale (LC_ALL, "");
      90  
      91    /* Note: this uses the installed translations even when running an
      92       uninstalled program. */
      93    bindtextdomain (PACKAGE, LOCALEDIR);
      94  
      95    textdomain (PACKAGE);
      96  
      97    return 1;
      98  }
      99  
     100  #else
     101  
     102  int
     103  init (int texinfo_uninstalled, char *builddir)
     104  {
     105    return 1;
     106  }
     107  
     108  #endif
     109  
     110  static void
     111  reset_floats ()
     112  {
     113    floats_number = 0;
     114  }
     115  
     116  static ELEMENT *Root;
     117  
     118  void
     119  reset_parser_except_conf (void)
     120  {
     121    /* do before destroying tree because index entries refer to in-tree
     122       elements. */
     123    wipe_indices ();
     124  
     125    if (Root)
     126      {
     127        destroy_element_and_children (Root);
     128        Root = 0;
     129      }
     130    wipe_user_commands ();
     131    wipe_macros ();
     132    init_index_commands ();
     133    wipe_errors ();
     134    reset_context_stack ();
     135    reset_command_stack (&nesting_context.basic_inline_stack);
     136    reset_command_stack (&nesting_context.basic_inline_stack_on_line);
     137    reset_command_stack (&nesting_context.basic_inline_stack_block);
     138    reset_command_stack (&nesting_context.regions_stack);
     139    memset (&nesting_context, 0, sizeof (nesting_context));
     140    reset_floats ();
     141    wipe_global_info ();
     142    /* it is not totally obvious that is it better to reset the
     143       list to avoid memory leaks rather than reuse the iconv
     144       opened handlers */
     145    reset_encoding_list ();
     146    set_input_encoding ("utf-8");
     147    reset_internal_xrefs ();
     148    reset_labels ();
     149    input_reset_input_stack ();
     150    source_marks_reset_counters ();
     151    free_small_strings ();
     152  
     153    reset_obstacks ();
     154  
     155    current_node = current_section = current_part = 0;
     156  }
     157  
     158  void
     159  reset_parser (int debug_output)
     160  {
     161    dTHX;
     162  
     163    /* NOTE: Do not call 'malloc' or 'free' in this function.
     164  
     165       Since this file (api.c) includes the Perl headers,
     166       we get the Perl redefinitions, which we do not want, as we don't use
     167       them throughout the rest of the program. */
     168  
     169    /* We cannot call debug() here, because the configuration of the previous
     170       parser invokation has not been reset already, and new configuration has
     171       not been read, so we need to pass the configuration information
     172       directly */
     173    /*
     174    debug ("!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!");
     175    */
     176    if (debug_output)
     177      fprintf (stderr,
     178            "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n");
     179  
     180    reset_parser_except_conf ();
     181    wipe_values ();
     182    clear_expanded_formats ();
     183    clear_include_directories ();
     184    reset_conf ();
     185  
     186    global_documentlanguage_fixed = 0;
     187    set_documentlanguage (0);
     188  
     189    doc_encoding_for_input_file_name = 1;
     190    set_input_file_name_encoding (0);
     191    set_locale_encoding (0);
     192  
     193    global_accept_internalvalue = 0;
     194  }
     195  
     196  /* Determine directory path based on file name.
     197     Set ROOT to root of tree obtained by parsing FILENAME.
     198     Used for parse_texi_file. */
     199  int
     200  parse_file (char *filename)
     201  {
     202    /*
     203    debug_output = 0;
     204    */
     205    char *p, *q;
     206  
     207    int status;
     208    
     209    status = input_push_file (filename);
     210    if (status)
     211      return status;
     212  
     213    /* Strip off a leading directory path, by looking for the last
     214       '/' in filename. */
     215    p = 0;
     216    q = strchr (filename, '/');
     217    while (q)
     218      {
     219        p = q;
     220        q = strchr (q + 1, '/');
     221      }
     222  
     223    if (p)
     224      {
     225        char saved = *p;
     226        *p = '\0';
     227        add_include_directory (filename);
     228        *p = saved;
     229      }
     230  
     231    Root = parse_texi_document ();
     232    if (Root)
     233      return 0;
     234    return 1;
     235  }
     236  
     237  /* Used for parse_texi_text.  STRING should be a UTF-8 buffer. */
     238  void
     239  parse_text (char *string, int line_nr)
     240  {
     241    reset_parser_except_conf ();
     242    input_push_text (strdup (string), line_nr, 0, 0);
     243    Root = parse_texi_document ();
     244  }
     245  
     246  /* Set ROOT to root of tree obtained by parsing the Texinfo code in STRING.
     247     STRING should be a UTF-8 buffer.  Used for parse_texi_line. */
     248  void
     249  parse_string (char *string, int line_nr)
     250  {
     251    ELEMENT *root_elt;
     252  
     253    reset_parser_except_conf ();
     254    root_elt = new_element (ET_root_line);
     255    input_push_text (strdup (string), line_nr, 0, 0);
     256    Root = parse_texi (root_elt, root_elt);
     257  }
     258  
     259  /* Used for parse_texi_piece.  STRING should be a UTF-8 buffer. */
     260  void
     261  parse_piece (char *string, int line_nr)
     262  {
     263    ELEMENT *before_node_section, *document_root;
     264  
     265    reset_parser_except_conf ();
     266    before_node_section = setup_document_root_and_before_node_section ();
     267    document_root = before_node_section->parent;
     268  
     269    input_push_text (strdup (string), line_nr, 0, 0);
     270    Root = parse_texi (document_root, before_node_section);
     271  }
     272  
     273  
     274  static void element_to_perl_hash (ELEMENT *e);
     275  
     276  /* Return reference to Perl array built from e.  If any of
     277     the elements in E don't have 'hv' set, set it to an empty
     278     hash table, or create it if there is no parent element, indicating the 
     279     element is not in the tree.
     280     Note that not having 'hv' set should be rare (actually never happen),
     281     as the contents and args children are processed before the extra
     282     information where build_perl_array is called.
     283   */
     284  static SV *
     285  build_perl_array (ELEMENT_LIST *e)
     286  {
     287    SV *sv;
     288    AV *av;
     289    int i;
     290  
     291    dTHX;
     292  
     293    av = newAV ();
     294    sv = newRV_inc ((SV *) av);
     295  
     296    for (i = 0; i < e->number; i++)
     297      {
     298        if (!e->list[i]->hv)
     299          {
     300            if (e->list[i]->parent)
     301              e->list[i]->hv = newHV ();
     302            else
     303              {
     304                /* Out-of-tree element */
     305                /* WARNING: This is possibly recursive. */
     306                element_to_perl_hash (e->list[i]);
     307              }
     308          }
     309        av_store (av, i, newRV_inc ((SV *) e->list[i]->hv));
     310      }
     311    return sv;
     312  }
     313  
     314  /* Used to create a "Perl-internal" string that represents a sequence
     315     of Unicode codepoints with no specific encoding. */
     316  static SV *
     317  newSVpv_utf8 (char *str, STRLEN len)
     318  {
     319    SV *sv;
     320    dTHX;
     321  
     322    sv = newSVpv (str, len);
     323    SvUTF8_on (sv);
     324    return sv;
     325  }
     326  
     327  static void
     328  store_additional_info (ELEMENT *e, ASSOCIATED_INFO* a, char *key)
     329  {
     330    dTHX;
     331  
     332    if (a->info_number > 0)
     333      {
     334        HV *extra;
     335        int i;
     336        int nr_info = 0; /* number of info type stored */
     337  
     338  
     339        /* Use sv_2mortal so that reference count is decremented if
     340           the hash is not saved. */
     341        extra = (HV *) sv_2mortal((SV *)newHV ());
     342  
     343        for (i = 0; i < a->info_number; i++)
     344          {
     345  #define STORE(sv) hv_store (extra, key, strlen (key), sv, 0)
     346            char *key = a->info[i].key;
     347            ELEMENT *f = (ELEMENT *) a->info[i].value;
     348  
     349            if (a->info[i].type == extra_deleted)
     350              continue;
     351            nr_info++;
     352  
     353            switch (a->info[i].type)
     354              {
     355              case extra_element:
     356                /* For references to other parts of the tree, create the hash so 
     357                   we can point to it. */
     358                /* Note that this does not happen much, as the contents
     359                   and args are processed before the extra information.  It only
     360                   happens for root commands (sections, nodes) and associated
     361                   commands, and could also happen for subentry as it is not
     362                   a children of the associated index command */
     363                if (!f->hv)
     364                  f->hv = newHV ();
     365                STORE(newRV_inc ((SV *)f->hv));
     366                break;
     367              case extra_element_oot:
     368                /* Note that this is only used for info hash, with simple
     369                   elements that are associated to one element only, should
     370                   not be referred to elsewhere (and should not contain other
     371                   commands or containers) */
     372                if (f->hv)
     373                  fatal ("element_to_perl_hash extra_element_oot twice\n");
     374                element_to_perl_hash (f);
     375                STORE(newRV_inc ((SV *)f->hv));
     376                break;
     377              case extra_contents:
     378                {
     379                if (f)
     380                  STORE(build_perl_array (&f->contents));
     381                break;
     382                }
     383              case extra_string:
     384                { /* A simple string. */
     385                char *value = (char *) f;
     386                STORE(newSVpv_utf8 (value, 0));
     387                break;
     388                }
     389              case extra_integer:
     390                { /* A simple integer.  The intptr_t cast here prevents
     391                     a warning on MinGW ("cast from pointer to integer of
     392                     different size"). */
     393                IV value = (IV) (intptr_t) f;
     394                STORE(newSViv (value));
     395                break;
     396                }
     397              case extra_misc_args:
     398                {
     399                int j;
     400                AV *av;
     401                av = newAV ();
     402                av_unshift (av, f->contents.number);
     403  
     404                STORE(newRV_inc ((SV *)av));
     405                /* An array of strings or integers. */
     406                for (j = 0; j < f->contents.number; j++)
     407                  {
     408                    KEY_PAIR *k;
     409                    k = lookup_extra (f->contents.list[j], "integer");
     410                    if (k)
     411                      {
     412                        IV value = (IV) (intptr_t) k->value;
     413                        av_store (av, j, newSViv (value));
     414                      }
     415                    else if (f->contents.list[j]->text.end > 0)
     416                      {
     417                        SV *sv = newSVpv_utf8 (f->contents.list[j]->text.text,
     418                                               f->contents.list[j]->text.end);
     419                        av_store (av, j, sv);
     420                      }
     421                    else
     422                      {
     423                        /* Empty strings permitted. */
     424                        av_store (av, j, newSVpv ("", 0));
     425                      }
     426                  }
     427                break;
     428                }
     429              default:
     430                fatal ("unknown extra type");
     431                break;
     432              }
     433          }
     434  #undef STORE
     435  
     436        if (nr_info > 0)
     437          hv_store (e->hv, key, strlen (key),
     438                    newRV_inc((SV *)extra), 0);
     439      }
     440  }
     441  
     442  static void
     443  store_source_mark_list (ELEMENT *e)
     444  {
     445    dTHX;
     446  
     447    if (e->source_mark_list.number > 0)
     448      {
     449        AV *av;
     450        SV *sv;
     451        int i;
     452        av = newAV ();
     453        sv = newRV_noinc ((SV *) av);
     454        hv_store (e->hv, "source_marks", strlen ("source_marks"), sv, 0);
     455  
     456        for (i = 0; i < e->source_mark_list.number; i++)
     457          {
     458            HV *source_mark;
     459            SV *sv;
     460            SOURCE_MARK *s_mark = e->source_mark_list.list[i];
     461            IV source_mark_position;
     462            IV source_mark_counter;
     463            source_mark = newHV ();
     464  #define STORE(key, value) hv_store (source_mark, key, strlen (key), value, 0)
     465             /* A simple integer.  The intptr_t cast here prevents
     466                a warning on MinGW ("cast from pointer to integer of
     467                different size"). */
     468            source_mark_counter = (IV) (intptr_t) s_mark->counter;
     469            STORE("counter", newSViv (source_mark_counter));
     470            if (s_mark->position > 0)
     471              {
     472                source_mark_position = (IV) (intptr_t) s_mark->position;
     473                STORE("position", newSViv (source_mark_position));
     474              }
     475            if (s_mark->element)
     476              {
     477                ELEMENT *e = s_mark->element;
     478                /* should only be referred to in one source mark */
     479                if (e->hv)
     480                  fatal ("element_to_perl_hash source mark elt twice");
     481                element_to_perl_hash (e);
     482                STORE("element", newRV_inc ((SV *)e->hv));
     483              }
     484            if (s_mark->line)
     485              {
     486                SV *sv = newSVpv_utf8 (s_mark->line, 0);
     487                STORE("line", sv);
     488              }
     489  
     490  #define SAVE_S_M_STATUS(X) \
     491             case SM_status_ ## X: \
     492             sv = newSVpv_utf8 (#X, 0);\
     493             STORE("status", sv); \
     494             break;
     495  
     496            switch (s_mark->status)
     497              {
     498                SAVE_S_M_STATUS (start)
     499                SAVE_S_M_STATUS (end)
     500  
     501                /* for SM_status_none */
     502                default:
     503                  break;
     504              }
     505  
     506  #define SAVE_S_M_TYPE(X) \
     507             case SM_type_ ## X: \
     508             sv = newSVpv_utf8 (#X, 0);\
     509             STORE("sourcemark_type", sv); \
     510             break;
     511  
     512            switch (s_mark->type)
     513              {
     514                SAVE_S_M_TYPE (include)
     515                SAVE_S_M_TYPE (setfilename)
     516                SAVE_S_M_TYPE (delcomment)
     517                SAVE_S_M_TYPE (defline_continuation)
     518                SAVE_S_M_TYPE (macro_expansion)
     519                SAVE_S_M_TYPE (linemacro_expansion)
     520                SAVE_S_M_TYPE (value_expansion)
     521                SAVE_S_M_TYPE (ignored_conditional_block)
     522                SAVE_S_M_TYPE (expanded_conditional_command)
     523  
     524                /* for SM_type_none */
     525                default:
     526                  break;
     527              }
     528  
     529            av_push (av, newRV_noinc ((SV *)source_mark));
     530  #undef STORE
     531          }
     532      }
     533  }
     534  
     535  static int hashes_ready = 0;
     536  static U32 HSH_parent = 0;
     537  static U32 HSH_type = 0;
     538  static U32 HSH_cmdname = 0;
     539  static U32 HSH_contents = 0;
     540  static U32 HSH_args = 0;
     541  static U32 HSH_text = 0;
     542  static U32 HSH_extra = 0;
     543  static U32 HSH_info = 0;
     544  static U32 HSH_source_info = 0;
     545  static U32 HSH_file_name = 0;
     546  static U32 HSH_line_nr = 0;
     547  static U32 HSH_macro = 0;
     548  
     549  /* Set E->hv and 'hv' on E's descendants.  e->parent->hv is assumed
     550     to already exist. */
     551  static void
     552  element_to_perl_hash (ELEMENT *e)
     553  {
     554    SV *sv;
     555  
     556    dTHX;
     557  
     558    /* e->hv may already exist if there was an extra value elsewhere
     559       referring to e. */
     560    if (!e->hv)
     561      {
     562        e->hv = newHV ();
     563      }
     564  
     565    if (!hashes_ready)
     566      {
     567        hashes_ready = 1;
     568        PERL_HASH(HSH_parent, "parent", strlen ("parent"));
     569        PERL_HASH(HSH_type, "type", strlen ("type"));
     570        PERL_HASH(HSH_cmdname, "cmdname", strlen ("cmdname"));
     571        PERL_HASH(HSH_contents, "contents", strlen ("contents"));
     572        PERL_HASH(HSH_args, "args", strlen ("args"));
     573        PERL_HASH(HSH_text, "text", strlen ("text"));
     574        PERL_HASH(HSH_extra, "extra", strlen ("extra"));
     575        PERL_HASH(HSH_info, "info", strlen ("info"));
     576        PERL_HASH(HSH_source_info, "source_info", strlen ("source_info"));
     577  
     578        PERL_HASH(HSH_file_name, "file_name", strlen ("file_name"));
     579        PERL_HASH(HSH_line_nr, "line_nr", strlen ("line_nr"));
     580        PERL_HASH(HSH_macro, "macro", strlen ("macro"));
     581      }
     582  
     583    if (e->parent)
     584      {
     585        if (!e->parent->hv)
     586          fatal ("parent hv not already set\n");
     587        sv = newRV_inc ((SV *) e->parent->hv);
     588        hv_store (e->hv, "parent", strlen ("parent"), sv, HSH_parent);
     589      }
     590  
     591    if (e->type)
     592      {
     593        sv = newSVpv (element_type_names[e->type], 0);
     594        hv_store (e->hv, "type", strlen ("type"), sv, HSH_type);
     595      }
     596  
     597    if (e->cmd)
     598      {
     599        sv = newSVpv (command_name(e->cmd), 0);
     600        hv_store (e->hv, "cmdname", strlen ("cmdname"), sv, HSH_cmdname);
     601  
     602        /* Note we could optimize the call to newSVpv here and
     603           elsewhere by passing an appropriate second argument. */
     604      }
     605  
     606    if (e->contents.number > 0)
     607      {
     608        AV *av;
     609        int i;
     610  
     611        av = newAV ();
     612        sv = newRV_noinc ((SV *) av);
     613        av_unshift (av, e->contents.number);
     614  
     615        hv_store (e->hv, "contents", strlen ("contents"), sv, HSH_contents);
     616        for (i = 0; i < e->contents.number; i++)
     617          {
     618            element_to_perl_hash (e->contents.list[i]);
     619            sv = newRV_noinc ((SV *) e->contents.list[i]->hv);
     620            av_store (av, i, sv);
     621          }
     622      }
     623  
     624    if (e->args.number > 0)
     625      {
     626        AV *av;
     627        int i;
     628  
     629        av = newAV ();
     630        sv = newRV_noinc ((SV *) av);
     631        av_unshift (av, e->args.number);
     632  
     633        hv_store (e->hv, "args", strlen ("args"), sv, HSH_args);
     634        for (i = 0; i < e->args.number; i++)
     635          {
     636            element_to_perl_hash (e->args.list[i]);
     637            sv = newRV_inc ((SV *) e->args.list[i]->hv);
     638            av_store (av, i, sv);
     639          }
     640      }
     641  
     642    if (e->text.space > 0)
     643      {
     644        sv = newSVpv_utf8 (e->text.text, e->text.end);
     645        hv_store (e->hv, "text", strlen ("text"), sv, HSH_text);
     646      }
     647  
     648    store_additional_info (e, &e->extra_info, "extra");
     649    store_additional_info (e, &e->info_info, "info");
     650  
     651    store_source_mark_list (e);
     652  
     653    if (e->source_info.line_nr)
     654      {
     655  #define STORE(key, sv, hsh) hv_store (hv, key, strlen (key), sv, hsh)
     656        SOURCE_INFO *source_info = &e->source_info;
     657        HV *hv = newHV ();
     658        hv_store (e->hv, "source_info", strlen ("source_info"),
     659                  newRV_noinc((SV *)hv), HSH_source_info);
     660  
     661        if (source_info->file_name)
     662          {
     663            STORE("file_name", newSVpv (source_info->file_name, 0),
     664                  HSH_file_name);
     665          }
     666        else
     667          STORE("file_name", newSVpv ("", 0), HSH_file_name);
     668  
     669        if (source_info->line_nr)
     670          {
     671            STORE("line_nr", newSViv (source_info->line_nr), HSH_line_nr);
     672          }
     673  
     674        if (source_info->macro)
     675          {
     676            STORE("macro", newSVpv_utf8 (source_info->macro, 0), HSH_macro);
     677          }
     678        else
     679          STORE("macro", newSVpv ("", 0), HSH_macro);
     680  #undef STORE
     681      }
     682  }
     683  
     684  HV *
     685  build_texinfo_tree (void)
     686  {
     687    if (! Root)
     688        /* use an empty element with contents if there is nothing.
     689           This should only happen if the input file was not opened
     690           or no parse_* function was called after initialization
     691           and should not happen with the current calling code.
     692        */
     693        Root = new_element (ET_NONE);
     694    element_to_perl_hash (Root);
     695    return Root->hv;
     696  }
     697  
     698  /* Return array of target elements.  build_texinfo_tree must
     699     be called first. */
     700  AV *
     701  build_target_elements_list (void)
     702  {
     703    AV *target_array;
     704    SV *sv;
     705    int i;
     706  
     707    dTHX;
     708  
     709    target_array = newAV ();
     710    av_unshift (target_array, labels_number);
     711  
     712    for (i = 0; i < labels_number; i++)
     713      {
     714        sv = newRV_inc (target_elements_list[i]->hv);
     715        av_store (target_array, i, sv);
     716      }
     717  
     718    return target_array;
     719  }
     720  
     721  AV *
     722  build_internal_xref_list (void)
     723  {
     724    AV *list_av;
     725    SV *sv;
     726    int i;
     727  
     728    dTHX;
     729  
     730    list_av = newAV ();
     731    av_unshift (list_av, internal_xref_number);
     732  
     733    for (i = 0; i < internal_xref_number; i++)
     734      {
     735        sv = newRV_inc (internal_xref_list[i]->hv);
     736        av_store (list_av, i, sv);
     737      }
     738  
     739    return list_av;
     740  }
     741  
     742  /* Return hash for list of @float's that appeared in the file. */
     743  /* not used for now, since the normalization of of float type is done
     744     outside of the barser. Could be done here again when possible */
     745  HV *
     746  build_float_list (void)
     747  {
     748    HV *float_hash;
     749    SV **type_array;
     750    SV *sv;
     751    AV *av;
     752    int i;
     753  
     754    dTHX;
     755  
     756    float_hash = newHV ();
     757  
     758    for (i = 0; i < floats_number; i++)
     759      {
     760        type_array = hv_fetch (float_hash,
     761                               floats_list[i].type,
     762                               strlen (floats_list[i].type),
     763                               0);
     764        if (!type_array)
     765          {
     766            av = newAV ();
     767            hv_store (float_hash,
     768                      floats_list[i].type,
     769                      strlen (floats_list[i].type),
     770                      newRV_noinc ((SV *)av),
     771                      0);
     772          }
     773        else
     774          {
     775            av = (AV *)SvRV (*type_array);
     776          }
     777        sv = newRV_inc ((SV *)floats_list[i].element->hv);
     778        av_push (av, sv);
     779      }
     780  
     781    return float_hash;
     782  }
     783  
     784  /* Ensure that I->hv is a hash value for a single entry in 
     785     $self->{'index_names'}, containing information about a single index. */
     786  static void
     787  build_single_index_data (INDEX *i)
     788  {
     789  #define STORE(key, value) hv_store (hv, key, strlen (key), value, 0)
     790  
     791    HV *hv;
     792    AV *entries;
     793    int j;
     794    int entry_number;
     795  
     796    dTHX;
     797  
     798    if (!i->hv)
     799      {
     800        hv = newHV ();
     801        i->hv = (void *) hv;
     802      }
     803    else
     804      {
     805        hv = (HV *) i->hv;
     806      }
     807  
     808    STORE("name", newSVpv_utf8 (i->name, 0));
     809    STORE("in_code", i->in_code ? newSViv(1) : newSViv(0));
     810  
     811    if (i->merged_in)
     812      {
     813        /* This index is merged in another one. */
     814        INDEX *ultimate = ultimate_index (i);
     815  
     816        if (!ultimate->hv)
     817          {
     818            ultimate->hv = (void *) newHV ();
     819            ultimate->contained_hv = (void *) newHV ();
     820            hv_store (ultimate->hv,
     821                      "contained_indices",
     822                      strlen ("contained_indices"),
     823                      newRV_inc ((SV *)(HV *) ultimate->contained_hv),
     824                      0);
     825          }
     826  
     827        hv_store (ultimate->contained_hv, i->name, strlen (i->name),
     828                  newSViv (1), 0);
     829  
     830        STORE("merged_in", newSVpv_utf8 (ultimate->name, 0));
     831  
     832        if (i->contained_hv)
     833          {
     834            /* This is unlikely to happen, as if this index is merged into
     835               another one, any indices merged into this index would have been
     836               recorded under that one, and not this one. */
     837            hv_delete (i->hv,
     838                       "contained_indices", strlen ("contained_indices"),
     839                       G_DISCARD);
     840            i->contained_hv = 0;
     841          }
     842      }
     843    else
     844      {
     845        if (!i->contained_hv)
     846          {
     847            i->contained_hv = newHV ();
     848            STORE("contained_indices", newRV_inc ((SV *)(HV *) i->contained_hv));
     849          }
     850        /* Record that this index "contains itself". */
     851        hv_store (i->contained_hv, i->name, strlen (i->name), newSViv(1), 0);
     852      }
     853  
     854    if (i->index_number > 0)
     855      {
     856        entries = newAV ();
     857        av_unshift (entries, i->index_number);
     858        STORE("index_entries", newRV_noinc ((SV *) entries));
     859  #undef STORE
     860  
     861        entry_number = 1;
     862        for (j = 0; j < i->index_number; j++)
     863          {
     864  #define STORE2(key, value) hv_store (entry, key, strlen (key), value, 0)
     865            HV *entry;
     866            INDEX_ENTRY *e;
     867  
     868            e = &i->index_entries[j];
     869            entry = newHV ();
     870  
     871            STORE2("index_name", newSVpv_utf8 (i->name, 0));
     872            STORE2("entry_element",
     873                   newRV_inc ((SV *)e->command->hv));
     874            STORE2("entry_number", newSViv (entry_number));
     875  
     876            av_store (entries, j, newRV_noinc ((SV *)entry));
     877  
     878            entry_number++;
     879  #undef STORE2
     880          }
     881      }
     882  }
     883  
     884  /* Return object to be used as $self->{'index_names'} in the perl code.
     885     build_texinfo_tree must be called before this so all the 'hv' fields
     886     are set on the elements in the tree. */
     887  HV *
     888  build_index_data (void)
     889  {
     890    HV *hv;
     891    INDEX **i, *idx;
     892  
     893    dTHX;
     894  
     895    hv = newHV ();
     896  
     897    for (i = index_names; (idx = *i); i++)
     898      {
     899        HV *hv2;
     900        build_single_index_data (idx);
     901        hv2 = idx->hv;
     902        hv_store (hv, idx->name, strlen (idx->name), newRV_noinc ((SV *)hv2), 0);
     903      }
     904  
     905    return hv;
     906  }
     907  
     908  
     909  /* Return object to be used as $self->{'info'} in the Perl code, retrievable
     910     with the 'global_information' function. */
     911  HV *
     912  build_global_info (void)
     913  {
     914    HV *hv;
     915    int i;
     916    ELEMENT *e;
     917  
     918    dTHX;
     919  
     920    hv = newHV ();
     921    if (global_input_encoding_name)
     922      hv_store (hv, "input_encoding_name", strlen ("input_encoding_name"),
     923                newSVpv (global_input_encoding_name, 0), 0);
     924  
     925    if (global_info.dircategory_direntry.contents.number > 0)
     926      {
     927        AV *av = newAV ();
     928        hv_store (hv, "dircategory_direntry", strlen ("dircategory_direntry"),
     929                  newRV_noinc ((SV *) av), 0);
     930        for (i = 0; i < global_info.dircategory_direntry.contents.number; i++)
     931          {
     932            e = contents_child_by_index (&global_info.dircategory_direntry, i);
     933            if (e->hv)
     934              av_push (av, newRV_inc ((SV *) e->hv));
     935          }
     936      }
     937  
     938    return hv;
     939  }
     940  
     941  /* Return object to be used as $self->{'extra'} in the Perl code, which
     942     are mostly references to tree elements. */
     943  HV *
     944  build_global_info2 (void)
     945  {
     946    HV *hv;
     947    AV *av;
     948    int i;
     949    ELEMENT *e;
     950  
     951    dTHX;
     952  
     953    hv = newHV ();
     954  
     955    /* These should be unique elements. */
     956  
     957  #define BUILD_GLOBAL_UNIQ(cmd) \
     958    if (global_info.cmd && global_info.cmd->hv) \
     959      { \
     960        hv_store (hv, #cmd, strlen (#cmd), \
     961                  newRV_inc ((SV *) global_info.cmd->hv), 0); \
     962      }
     963  
     964    BUILD_GLOBAL_UNIQ(setfilename);
     965    BUILD_GLOBAL_UNIQ(settitle);
     966    BUILD_GLOBAL_UNIQ(copying);
     967    BUILD_GLOBAL_UNIQ(titlepage);
     968    BUILD_GLOBAL_UNIQ(top);
     969    BUILD_GLOBAL_UNIQ(documentdescription);
     970    BUILD_GLOBAL_UNIQ(pagesizes);
     971    BUILD_GLOBAL_UNIQ(fonttextsize);
     972    BUILD_GLOBAL_UNIQ(footnotestyle);
     973    BUILD_GLOBAL_UNIQ(setchapternewpage);
     974    BUILD_GLOBAL_UNIQ(everyheading);
     975    BUILD_GLOBAL_UNIQ(everyfooting);
     976    BUILD_GLOBAL_UNIQ(evenheading);
     977    BUILD_GLOBAL_UNIQ(evenfooting);
     978    BUILD_GLOBAL_UNIQ(oddheading);
     979    BUILD_GLOBAL_UNIQ(oddfooting);
     980    BUILD_GLOBAL_UNIQ(everyheadingmarks);
     981    BUILD_GLOBAL_UNIQ(everyfootingmarks);
     982    BUILD_GLOBAL_UNIQ(evenheadingmarks);
     983    BUILD_GLOBAL_UNIQ(oddheadingmarks);
     984    BUILD_GLOBAL_UNIQ(evenfootingmarks);
     985    BUILD_GLOBAL_UNIQ(oddfootingmarks);
     986    BUILD_GLOBAL_UNIQ(shorttitlepage);
     987    BUILD_GLOBAL_UNIQ(title);
     988    BUILD_GLOBAL_UNIQ(novalidate);
     989  #undef BUILD_GLOBAL_UNIQ
     990  
     991    /* NOTE: Same list in handle_commands.c:register_global_command. */
     992  
     993    /* The following are arrays of elements. */
     994  
     995    
     996    if (global_info.footnotes.contents.number > 0)
     997      {
     998        av = newAV ();
     999        hv_store (hv, "footnote", strlen ("footnote"),
    1000                  newRV_noinc ((SV *) av), 0);
    1001        for (i = 0; i < global_info.footnotes.contents.number; i++)
    1002          {
    1003            e = contents_child_by_index (&global_info.footnotes, i);
    1004            if (e->hv)
    1005              av_push (av, newRV_inc ((SV *) e->hv));
    1006          }
    1007      }
    1008  
    1009    /* float is a type, it does not work there, use floats instead */
    1010    if (global_info.floats.contents.number > 0)
    1011      {
    1012        av = newAV ();
    1013        hv_store (hv, "float", strlen ("float"),
    1014                  newRV_noinc ((SV *) av), 0);
    1015        for (i = 0; i < global_info.floats.contents.number; i++)
    1016          {
    1017            e = contents_child_by_index (&global_info.floats, i);
    1018            if (e->hv)
    1019              av_push (av, newRV_inc ((SV *) e->hv));
    1020          }
    1021      }
    1022  
    1023  #define BUILD_GLOBAL_ARRAY(cmd) \
    1024    if (global_info.cmd.contents.number > 0)                              \
    1025      {                                                                   \
    1026        av = newAV ();                                                    \
    1027        hv_store (hv, #cmd, strlen (#cmd),                                \
    1028                  newRV_noinc ((SV *) av), 0);                              \
    1029        for (i = 0; i < global_info.cmd.contents.number; i++)             \
    1030          {                                                               \
    1031            e = contents_child_by_index (&global_info.cmd, i);            \
    1032            if (e->hv)                                                    \
    1033              av_push (av, newRV_inc ((SV *) e->hv));                     \
    1034          }                                                               \
    1035      }
    1036  
    1037    BUILD_GLOBAL_ARRAY(author);
    1038    BUILD_GLOBAL_ARRAY(detailmenu);
    1039    BUILD_GLOBAL_ARRAY(hyphenation);
    1040    BUILD_GLOBAL_ARRAY(insertcopying);
    1041    BUILD_GLOBAL_ARRAY(listoffloats);
    1042    BUILD_GLOBAL_ARRAY(part);
    1043    BUILD_GLOBAL_ARRAY(printindex);
    1044    BUILD_GLOBAL_ARRAY(subtitle);
    1045    BUILD_GLOBAL_ARRAY(titlefont);
    1046  
    1047    /* from Common.pm %document_settable_multiple_at_commands */
    1048    BUILD_GLOBAL_ARRAY(allowcodebreaks);
    1049    BUILD_GLOBAL_ARRAY(clickstyle);
    1050    BUILD_GLOBAL_ARRAY(codequotebacktick);
    1051    BUILD_GLOBAL_ARRAY(codequoteundirected);
    1052    BUILD_GLOBAL_ARRAY(contents);
    1053    BUILD_GLOBAL_ARRAY(deftypefnnewline);
    1054    BUILD_GLOBAL_ARRAY(documentencoding);
    1055    BUILD_GLOBAL_ARRAY(documentlanguage);
    1056    BUILD_GLOBAL_ARRAY(exampleindent);
    1057    BUILD_GLOBAL_ARRAY(firstparagraphindent);
    1058    BUILD_GLOBAL_ARRAY(frenchspacing);
    1059    BUILD_GLOBAL_ARRAY(headings);
    1060    BUILD_GLOBAL_ARRAY(kbdinputstyle);
    1061    BUILD_GLOBAL_ARRAY(paragraphindent);
    1062    BUILD_GLOBAL_ARRAY(shortcontents);
    1063    BUILD_GLOBAL_ARRAY(urefbreakstyle);
    1064    BUILD_GLOBAL_ARRAY(xrefautomaticsectiontitle);
    1065    return hv;
    1066  }
    1067  
    1068  /* for debugging */
    1069  void
    1070  set_debug (int value)
    1071  {
    1072    debug_output = value;
    1073  }
    1074  
    1075  void
    1076  conf_set_documentlanguage_override (char *value)
    1077  {
    1078    set_documentlanguage_override (value);
    1079  }
    1080  
    1081  
    1082  void
    1083  set_DOC_ENCODING_FOR_INPUT_FILE_NAME (int i)
    1084  {
    1085    doc_encoding_for_input_file_name = i;
    1086  }
    1087  
    1088  void
    1089  conf_set_input_file_name_encoding (char *value)
    1090  {
    1091    set_input_file_name_encoding (value);
    1092  }
    1093  
    1094  void
    1095  conf_set_locale_encoding (char *value)
    1096  {
    1097    set_locale_encoding (value);
    1098  }
    1099  
    1100  
    1101  static SV *
    1102  build_source_info_hash (SOURCE_INFO source_info)
    1103  {
    1104    HV *hv;
    1105  
    1106    dTHX;
    1107  
    1108    hv = newHV ();
    1109  
    1110    if (source_info.file_name)
    1111      {
    1112        hv_store (hv, "file_name", strlen ("file_name"),
    1113                  newSVpv (source_info.file_name, 0), 0);
    1114      }
    1115    else
    1116      {
    1117        hv_store (hv, "file_name", strlen ("file_name"),
    1118                  newSVpv ("", 0), 0);
    1119      }
    1120    if (source_info.line_nr)
    1121      {
    1122        hv_store (hv, "line_nr", strlen ("line_nr"),
    1123                  newSViv (source_info.line_nr), 0);
    1124      }
    1125    if (source_info.macro)
    1126      {
    1127        hv_store (hv, "macro", strlen ("macro"),
    1128                  newSVpv_utf8 (source_info.macro, 0), 0);
    1129      }
    1130    else
    1131      {
    1132        hv_store (hv, "macro", strlen ("macro"),
    1133                  newSVpv_utf8 ("", 0), 0);
    1134      }
    1135  
    1136    return newRV_noinc ((SV *) hv);
    1137  }
    1138  
    1139  static SV *
    1140  convert_error (int i)
    1141  {
    1142    ERROR_MESSAGE e;
    1143    HV *hv;
    1144    SV *msg;
    1145  
    1146    dTHX;
    1147  
    1148    e = error_list[i];
    1149    hv = newHV ();
    1150  
    1151    msg = newSVpv_utf8 (e.message, 0);
    1152  
    1153    hv_store (hv, "message", strlen ("message"), msg, 0);
    1154    hv_store (hv, "type", strlen ("type"),
    1155                e.type == error ? newSVpv("error", strlen("error"))
    1156                                : newSVpv("warning", strlen("warning")),
    1157              0);
    1158  
    1159    hv_store (hv, "source_info", strlen ("source_info"),
    1160              build_source_info_hash(e.source_info), 0);
    1161  
    1162    return newRV_noinc ((SV *) hv);
    1163  
    1164  }
    1165  
    1166  /* Errors */
    1167  AV *
    1168  get_errors (void)
    1169  {
    1170    AV *av;
    1171    int i;
    1172  
    1173    dTHX;
    1174  
    1175    av = newAV ();
    1176  
    1177    for (i = 0; i < error_number; i++)
    1178      {
    1179        SV *sv = convert_error (i);
    1180        av_push (av, sv);
    1181      }
    1182  
    1183    return av;
    1184  
    1185  }