(root)/
fribidi-1.0.13/
gen.tab/
gen-brackets-tab.c
       1  /* FriBidi
       2   * gen-brackets-tab.c - generate brackets.tab.i
       3   *
       4   * Author:
       5   *   Behdad Esfahbod, 2001, 2002, 2004
       6   *   Dov Grobgeld 2017
       7   *
       8   * Copyright (C) 2004 Sharif FarsiWeb, Inc
       9   * Copyright (C) 2001,2002,2004 Behdad Esfahbod
      10   * Copyright (C) 2017 Dov Grobgeld
      11   * 
      12   * This library is free software; you can redistribute it and/or
      13   * modify it under the terms of the GNU Lesser General Public
      14   * License as published by the Free Software Foundation; either
      15   * version 2.1 of the License, or (at your option) any later version.
      16   * 
      17   * This library is distributed in the hope that it will be useful,
      18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20   * Lesser General Public License for more details.
      21   * 
      22   * You should have received a copy of the GNU Lesser General Public License
      23   * along with this library, in a file named COPYING; if not, write to the
      24   * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
      25   * Boston, MA 02110-1301, USA
      26   * 
      27   * For licensing issues, contact <fribidi.license@gmail.com>.
      28   */
      29  
      30  #include <common.h>
      31  #include <ctype.h>
      32  #include <fribidi-unicode.h>
      33  
      34  #include <stdio.h>
      35  #ifdef HAVE_CONFIG_H
      36  # include <config.h>
      37  #endif
      38  
      39  #ifdef STDC_HEADERS
      40  # include <stdlib.h>
      41  # include <stddef.h>
      42  #else
      43  # if HAVE_STDLIB_H
      44  #  include <stdlib.h>
      45  # endif
      46  #endif
      47  #ifdef HAVE_STRING_H
      48  # if !STDC_HEADERS && HAVE_MEMORY_H
      49  #  include <memory.h>
      50  # endif
      51  # include <string.h>
      52  #endif
      53  #ifdef HAVE_STRINGS_H
      54  # include <strings.h>
      55  #endif
      56  
      57  #include "packtab.h"
      58  
      59  #define appname "gen-brackets-tab"
      60  #define outputname "brackets.tab.i"
      61  
      62  static void
      63  die (
      64    const char *msg
      65  )
      66  {
      67    fprintf (stderr, appname ": %s\n", msg);
      68    exit (1);
      69  }
      70  
      71  static void
      72  die2 (
      73    const char *fmt,
      74    const char *p
      75  )
      76  {
      77    fprintf (stderr, appname ": ");
      78    fprintf (stderr, fmt, p);
      79    fprintf (stderr, "\n");
      80    exit (1);
      81  }
      82  
      83  static void
      84  die3 (
      85    const char *fmt,
      86    const char *p,
      87    const char *q
      88  )
      89  {
      90    fprintf (stderr, appname ": ");
      91    fprintf (stderr, fmt, p, q);
      92    fprintf (stderr, "\n");
      93    exit (1);
      94  }
      95  
      96  static void
      97  die4 (
      98    const char *fmt,
      99    unsigned long l,
     100    unsigned long p,
     101    unsigned long q
     102  )
     103  {
     104    fprintf (stderr, appname ": ");
     105    fprintf (stderr, fmt, l, p, q);
     106    fprintf (stderr, "\n");
     107    exit (1);
     108  }
     109  
     110  #define table_name "Brk"
     111  #define macro_name "FRIBIDI_GET_BRACKETS"
     112  
     113  static signed int table[FRIBIDI_UNICODE_CHARS];
     114  static signed int equiv_table[FRIBIDI_UNICODE_CHARS];
     115  static char buf[4000];
     116  static signed long max_dist;
     117  
     118  static void
     119  init (
     120    void
     121  )
     122  {
     123    max_dist = 0;
     124  }
     125  
     126  static void
     127  clear_tabs (
     128    void
     129  )
     130  {
     131    register FriBidiChar c;
     132  
     133    for (c = 0; c < FRIBIDI_UNICODE_CHARS; c++)
     134      {
     135        table[c] = 0;
     136        equiv_table[c] = 0;
     137      }
     138  }
     139  
     140  static signed int table[FRIBIDI_UNICODE_CHARS];
     141  static char buf[4000];
     142  
     143  /* Read the canonical mapping of unicode characters and store them in the
     144     equiv_table array. */
     145  static void
     146  read_unicode_data_txt_equivalence (
     147    FILE *f
     148  )
     149  {
     150    unsigned long c, l;
     151  
     152    l = 0;
     153    while (fgets (buf, sizeof buf, f))
     154      {
     155        int i;
     156        const char *s = buf;
     157        char ce_string[256]; /* For parsing the equivalence */
     158        char *p = NULL;
     159        int ce, in_tag;
     160  
     161        l++;
     162  
     163        while (*s == ' ')
     164  	s++;
     165  
     166        if (s[0] == '#' || s[0] == '\0' || s[0] == '\n')
     167  	continue;
     168        /*  Field:       0 ; 1    ; 2    ; 3    ; 4    ; 5           */
     169        i = sscanf (s, "%lx;%*[^;];%*[^;];%*[^;];%*[^;];%[^;]", &c, ce_string);
     170        if (c >= FRIBIDI_UNICODE_CHARS)
     171          {
     172            fprintf (stderr, "invalid input at line %ld: %s", l, s);
     173            exit(1);
     174          }
     175        if (i==1)
     176          continue;
     177  
     178        /* split and parse ce */
     179        p = ce_string;
     180        ce = -1;
     181        in_tag = 0;
     182        while(*p)
     183          {
     184            if (*p==';')
     185              break;
     186            else if (*p=='<')
     187              in_tag = 1;
     188            else if (*p=='>')
     189              in_tag = 0;
     190            else if (!in_tag && isalnum(*p))
     191              {
     192                /* Assume we got a hexa decimal */
     193                ce = strtol(p,NULL,16);
     194                break;
     195              }
     196            p++;
     197          }
     198  
     199        /* FIXME: We don't handle First..Last parts of UnicodeData.txt,
     200         * but it works, since all those are LTR. */
     201        equiv_table[c] = ce;
     202      }
     203  }
     204  
     205  static void
     206  read_bidi_brackets_txt (
     207    FILE *f
     208  )
     209  {
     210    unsigned long l;
     211  
     212    l = 0;
     213    while (fgets (buf, sizeof buf, f))
     214      {
     215        unsigned long i, j;
     216        signed long dist;
     217        int k;
     218        const char *s = buf;
     219        char open_close;
     220  
     221        l++;
     222  
     223        while (*s == ' ')
     224  	s++;
     225  
     226        if (s[0] == '#' || s[0] == '\0' || s[0] == '\n')
     227  	continue;
     228  
     229        k = sscanf (s, "%lx; %lx; %c", &i, &j, &open_close);
     230        if (k != 3 || i >= FRIBIDI_UNICODE_CHARS || j >= FRIBIDI_UNICODE_CHARS)
     231  	die4 ("invalid pair in input at line %ld: %04lX, %04lX", l, i, j);
     232  
     233        /* Open braces map to themself */
     234        if (open_close=='o')
     235          j = i;
     236        
     237        /* Turn j into the unicode equivalence if it exists */
     238        if (equiv_table[j])
     239          {
     240            /* printf("Found match for %04x->%04x\n", j, equiv_table[j]); */
     241            j = equiv_table[j];
     242          }
     243  
     244        dist = ((signed long) j - (signed long) i);
     245        table[i] = dist;
     246        if (dist > max_dist)
     247  	max_dist = dist;
     248        else if (-dist > max_dist)
     249  	max_dist = -dist;
     250      }
     251  }
     252  
     253  static void
     254  read_data (
     255    const char *bracket_datafile_type,
     256    const char *bracket_datafile_name,
     257    const char *uni_datafile_type,
     258    const char *uni_datafile_name
     259  )
     260  {
     261    FILE *f;
     262  
     263    clear_tabs ();
     264  
     265    if (!(f = fopen (uni_datafile_name, "rt")))
     266      die2 ("error: cannot open `%s' for reading", bracket_datafile_name);
     267  
     268    if (!strcmp (uni_datafile_type, "UnicodeData.txt"))
     269      read_unicode_data_txt_equivalence (f);
     270    else
     271      die2 ("error: unknown data-file-type %s", uni_datafile_type);
     272  
     273    fprintf (stderr, "Reading `%s'\n", bracket_datafile_name);
     274    if (!(f = fopen (bracket_datafile_name, "rt")))
     275      die2 ("error: cannot open `%s' for reading", bracket_datafile_name);
     276  
     277    if (!strcmp (bracket_datafile_type, "BidiBrackets.txt"))
     278      read_bidi_brackets_txt (f);
     279    else
     280      die2 ("error: unknown data-file-type %s", bracket_datafile_type);
     281  
     282    fclose (f);
     283  }
     284  
     285  static void
     286  gen_brackets_tab (
     287    int max_depth,
     288    const char *data_file_type
     289  )
     290  {
     291    int key_bytes;
     292    const char *key_type;
     293  
     294    printf ("/* " outputname "\n * generated by " appname " (" FRIBIDI_NAME " "
     295  	  FRIBIDI_VERSION ")\n" " * from the file %s of Unicode version "
     296  	  FRIBIDI_UNICODE_VERSION ". */\n\n", data_file_type);
     297  
     298    printf ("#define PACKTAB_UINT8 uint8_t\n"
     299  	  "#define PACKTAB_UINT16 uint16_t\n"
     300  	  "#define PACKTAB_UINT32 uint32_t\n\n");
     301  
     302    key_bytes = max_dist <= 0x7f ? 1 : max_dist < 0x7fff ? 2 : 4;
     303    key_type = key_bytes == 1 ? "int8_t" : key_bytes == 2 ?
     304      "int16_t" : "int32_t";
     305  
     306    if (!pack_table
     307        (table, FRIBIDI_UNICODE_CHARS, key_bytes, 0, max_depth, 1, NULL,
     308         key_type, table_name, macro_name "_DELTA", stdout))
     309      die ("error: insufficient memory, decrease max_depth");
     310  
     311    printf ("#undef PACKTAB_UINT8\n"
     312  	  "#undef PACKTAB_UINT16\n" "#undef PACKTAB_UINT32\n\n");
     313  
     314    printf ("#define " macro_name "(x) ((x) + " macro_name "_DELTA(x))\n\n");
     315  
     316    printf ("/* End of generated " outputname " */\n");
     317  }
     318  
     319  int
     320  main (
     321    int argc,
     322    const char **argv
     323  )
     324  {
     325    const char *bracket_datafile_type = "BidiBrackets.txt";
     326    const char *uni_datafile_type = "UnicodeData.txt";
     327  
     328    if (argc < 4)
     329      die3 ("usage:\n  " appname " max-depth /path/to/%s /path/to/%s [junk...]",
     330  	  bracket_datafile_type,
     331            uni_datafile_type);
     332  
     333    {
     334      int max_depth = atoi (argv[1]);
     335      const char *bracket_datafile_name = argv[2];
     336      const char *uni_datafile_name = argv[3];
     337  
     338      if (max_depth < 2)
     339        die ("invalid depth");
     340  
     341      init ();
     342      read_data (bracket_datafile_type, bracket_datafile_name,
     343                 uni_datafile_type, uni_datafile_name);
     344      gen_brackets_tab (max_depth, bracket_datafile_type);
     345    }
     346  
     347    return 0;
     348  }