(root)/
fribidi-1.0.13/
lib/
fribidi-char-sets-cap-rtl.c
       1  /* FriBidi
       2   * fribidi-char-sets-cap-rtl.c - CapRTL character set conversion routines
       3   *
       4   * Authors:
       5   *   Behdad Esfahbod, 2001, 2002, 2004
       6   *   Dov Grobgeld, 1999, 2000
       7   *
       8   * Copyright (C) 2004 Sharif FarsiWeb, Inc
       9   * Copyright (C) 2001,2002 Behdad Esfahbod
      10   * Copyright (C) 1999,2000 Dov Grobgeld
      11   * 
      12   * This library is free software; you can redistribute it and/or
      13   * modify it under the terms of the GNU Lesser General Public
      14   * License as published by the Free Software Foundation; either
      15   * version 2.1 of the License, or (at your option) any later version.
      16   * 
      17   * This library is distributed in the hope that it will be useful,
      18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20   * Lesser General Public License for more details.
      21   * 
      22   * You should have received a copy of the GNU Lesser General Public License
      23   * along with this library, in a file named COPYING; if not, write to the
      24   * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
      25   * Boston, MA 02110-1301, USA
      26   * 
      27   * For licensing issues, contact <fribidi.license@gmail.com>.
      28   */
      29  
      30  #include <common.h>
      31  
      32  #include <fribidi-char-sets-cap-rtl.h>
      33  
      34  #include <fribidi-unicode.h>
      35  #include <fribidi-mirroring.h>
      36  #include <fribidi-bidi-types.h>
      37  
      38  #include <bidi-types.h>
      39  
      40  #include <stdio.h>
      41  
      42  enum
      43  {
      44  # define _FRIBIDI_ADD_TYPE(TYPE,SYMBOL) TYPE = FRIBIDI_TYPE_##TYPE,
      45  # include "fribidi-bidi-types-list.h"
      46  # undef _FRIBIDI_ADD_TYPE
      47    _FRIBIDI_MAX_TYPES_VALUE
      48  };
      49  
      50  enum
      51  {
      52  # define _FRIBIDI_ADD_TYPE(TYPE,SYMBOL) DUMMY_##TYPE,
      53  # include "fribidi-bidi-types-list.h"
      54  # undef _FRIBIDI_ADD_TYPE
      55    _FRIBIDI_NUM_TYPES
      56  };
      57  
      58  static FriBidiCharType CapRTLCharTypes[] = {
      59  /* *INDENT-OFF* */
      60    ON, ON, ON, ON, LTR,RTL,ON, ON, ON, ON, ON, ON, ON, BS, RLO,RLE, /* 00-0f */
      61    LRO,LRE,PDF,WS, LRI, RLI, FSI, PDI, ON, ON, ON, ON, ON, ON, ON, ON,  /* 10-1f */
      62    WS, ON, ON, ON, ET, ON, ON, ON, ON, ON, ON, ET, CS, ON, ES, ES,  /* 20-2f */
      63    EN, EN, EN, EN, EN, EN, AN, AN, AN, AN, CS, ON, ON, ON, ON, ON,  /* 30-3f */
      64    RTL,AL, AL, AL, AL, AL, AL, RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL, /* 40-4f */
      65    RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,ON, BS, ON, BN, ON,  /* 50-5f */
      66    NSM,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR, /* 60-6f */
      67    LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,ON, SS, ON, WS, ON,  /* 70-7f */
      68  /* *INDENT-ON* */
      69  };
      70  
      71  #define CAPRTL_CHARS (int)(sizeof CapRTLCharTypes / sizeof CapRTLCharTypes[0])
      72  
      73  static FriBidiChar *caprtl_to_unicode = NULL;
      74  
      75  static void
      76  init_cap_rtl (
      77    void
      78  )
      79  {
      80    int request[_FRIBIDI_NUM_TYPES];
      81    FriBidiCharType to_type[_FRIBIDI_NUM_TYPES];
      82    int num_types = 0, count = 0;
      83    FriBidiCharType i;
      84    char mark[CAPRTL_CHARS];
      85  
      86    caprtl_to_unicode =
      87      (FriBidiChar *) fribidi_malloc (CAPRTL_CHARS *
      88  				    sizeof caprtl_to_unicode[0]);
      89    for (i = 0; i < CAPRTL_CHARS; i++)
      90      if (CapRTLCharTypes[i] == fribidi_get_bidi_type (i))
      91        {
      92  	caprtl_to_unicode[i] = i;
      93  	mark[i] = 1;
      94        }
      95      else
      96        {
      97  	int j;
      98  
      99  	caprtl_to_unicode[i] = FRIBIDI_UNICODE_CHARS;
     100  	mark[i] = 0;
     101  	if (fribidi_get_mirror_char (i, NULL))
     102  	  {
     103  	    DBG ("warning: I could not map mirroring character map to itself in CapRTL");
     104  	  }
     105  
     106  	for (j = 0; j < num_types; j++)
     107  	  if (to_type[j] == CapRTLCharTypes[i])
     108  	    break;
     109  	if (j == num_types)
     110  	  {
     111  	    num_types++;
     112  	    to_type[j] = CapRTLCharTypes[i];
     113  	    request[j] = 0;
     114  	  }
     115  	request[j]++;
     116  	count++;
     117        }
     118    for (i = 0; i < 0x10000 && count; i++)	/* Assign BMP chars to CapRTL entries */
     119      if (!fribidi_get_mirror_char (i, NULL) && !(i < CAPRTL_CHARS && mark[i]))
     120        {
     121  	int j, k;
     122  	FriBidiCharType t = fribidi_get_bidi_type (i);
     123  	for (j = 0; j < num_types; j++)
     124  	  if (to_type[j] == t)
     125  	    break;
     126  	if (j >= num_types || !request[j])	/* Do not need this type */
     127  	  continue;
     128  	for (k = 0; k < CAPRTL_CHARS; k++)
     129  	  if (caprtl_to_unicode[k] == FRIBIDI_UNICODE_CHARS
     130  	      && to_type[j] == CapRTLCharTypes[k])
     131  	    {
     132  	      request[j]--;
     133  	      count--;
     134  	      caprtl_to_unicode[k] = i;
     135  	      break;
     136  	    }
     137        }
     138    if (count)
     139      {
     140        int j;
     141  
     142        DBG ("warning: could not find a mapping for CapRTL to Unicode:");
     143        for (j = 0; j < num_types; j++)
     144  	if (request[j])
     145  	  {
     146  	    DBG2 ("  need this type: %s", fribidi_get_bidi_type_name (to_type[j]));
     147  	  }
     148      }
     149  }
     150  
     151  static char
     152  fribidi_unicode_to_cap_rtl_c (
     153    /* input */
     154    FriBidiChar uch
     155  )
     156  {
     157    int i;
     158  
     159    if (!caprtl_to_unicode)
     160      init_cap_rtl ();
     161  
     162    for (i = 0; i < CAPRTL_CHARS; i++)
     163      if (uch == caprtl_to_unicode[i])
     164        return (unsigned char) i;
     165    return '?';
     166  }
     167  
     168  FriBidiStrIndex
     169  fribidi_cap_rtl_to_unicode (
     170    /* input */
     171    const char *s,
     172    FriBidiStrIndex len,
     173    /* output */
     174    FriBidiChar *us
     175  )
     176  {
     177    FriBidiStrIndex i, j;
     178  
     179    if (!caprtl_to_unicode)
     180      init_cap_rtl ();
     181  
     182    j = 0;
     183    for (i = 0; i < len; i++)
     184      {
     185        char ch;
     186  
     187        ch = s[i];
     188        if (ch == '_')
     189  	{
     190  	  switch (ch = s[++i])
     191  	    {
     192  	    case '>':
     193  	      us[j++] = FRIBIDI_CHAR_LRM;
     194  	      break;
     195  	    case '<':
     196  	      us[j++] = FRIBIDI_CHAR_RLM;
     197  	      break;
     198  	    case 'l':
     199  	      us[j++] = FRIBIDI_CHAR_LRE;
     200  	      break;
     201  	    case 'r':
     202  	      us[j++] = FRIBIDI_CHAR_RLE;
     203  	      break;
     204  	    case 'o':
     205  	      us[j++] = FRIBIDI_CHAR_PDF;
     206  	      break;
     207  	    case 'L':
     208  	      us[j++] = FRIBIDI_CHAR_LRO;
     209  	      break;
     210  	    case 'R':
     211  	      us[j++] = FRIBIDI_CHAR_RLO;
     212  	      break;
     213              case 'i':
     214                us[j++] = FRIBIDI_CHAR_LRI;
     215  	      break;
     216              case 'y':
     217                us[j++] = FRIBIDI_CHAR_RLI;
     218  	      break;
     219              case 'f':
     220                us[j++] = FRIBIDI_CHAR_FSI;
     221  	      break;
     222              case 'I':
     223                us[j++] = FRIBIDI_CHAR_PDI;
     224  	      break;
     225  	    case '_':
     226  	      us[j++] = '_';
     227  	      break;
     228  	    default:
     229  	      us[j++] = '_';
     230  	      i--;
     231  	      break;
     232  	    }
     233  	}
     234        else
     235        {
     236          if ((int)s[i] < 0)
     237            us[j++] = '?';
     238          else
     239            us[j++] = caprtl_to_unicode[(int) s[i]];
     240        }
     241      }
     242  
     243    return j;
     244  }
     245  
     246  FriBidiStrIndex
     247  fribidi_unicode_to_cap_rtl (
     248    /* input */
     249    const FriBidiChar *us,
     250    FriBidiStrIndex len,
     251    /* output */
     252    char *s
     253  )
     254  {
     255    FriBidiStrIndex i;
     256    int j;
     257  
     258    j = 0;
     259    for (i = 0; i < len; i++)
     260      {
     261        FriBidiChar ch = us[i];
     262        if (!FRIBIDI_IS_EXPLICIT (fribidi_get_bidi_type (ch))
     263            && !FRIBIDI_IS_ISOLATE (fribidi_get_bidi_type (ch))
     264            && ch != '_' && ch != FRIBIDI_CHAR_LRM && ch != FRIBIDI_CHAR_RLM)
     265  	s[j++] = fribidi_unicode_to_cap_rtl_c (ch);
     266        else
     267  	{
     268  	  s[j++] = '_';
     269  	  switch (ch)
     270  	    {
     271  	    case FRIBIDI_CHAR_LRM:
     272  	      s[j++] = '>';
     273  	      break;
     274  	    case FRIBIDI_CHAR_RLM:
     275  	      s[j++] = '<';
     276  	      break;
     277  	    case FRIBIDI_CHAR_LRE:
     278  	      s[j++] = 'l';
     279  	      break;
     280  	    case FRIBIDI_CHAR_RLE:
     281  	      s[j++] = 'r';
     282  	      break;
     283  	    case FRIBIDI_CHAR_PDF:
     284  	      s[j++] = 'o';
     285  	      break;
     286  	    case FRIBIDI_CHAR_LRO:
     287  	      s[j++] = 'L';
     288  	      break;
     289  	    case FRIBIDI_CHAR_RLO:
     290  	      s[j++] = 'R';
     291  	      break;
     292  	    case FRIBIDI_CHAR_LRI:
     293  	      s[j++] = 'i';
     294  	      break;
     295  	    case FRIBIDI_CHAR_RLI:
     296  	      s[j++] = 'y';
     297  	      break;
     298  	    case FRIBIDI_CHAR_FSI:
     299  	      s[j++] = 'f';
     300  	      break;
     301  	    case FRIBIDI_CHAR_PDI:
     302  	      s[j++] = 'I';
     303  	      break;
     304  	    case '_':
     305  	      s[j++] = '_';
     306  	      break;
     307  	    default:
     308  	      j--;
     309  	      if (ch < 256)
     310  		s[j++] = fribidi_unicode_to_cap_rtl_c (ch);
     311  	      else
     312  		s[j++] = '?';
     313  	      break;
     314  	    }
     315  	}
     316      }
     317    s[j] = 0;
     318  
     319    return j;
     320  }
     321  
     322  const char *
     323  fribidi_char_set_desc_cap_rtl (
     324    void
     325  )
     326  {
     327    static char *s = 0;
     328    int l, i, j;
     329  
     330    if (s)
     331      return s;
     332  
     333    l = 10000;
     334    s = (char *) fribidi_malloc (l);
     335    i = 0;
     336    i += sprintf (s + i,		/*l - i, */
     337  		"CapRTL is a character set for testing with the reference\n"
     338  		"implementation, with explicit marks escape strings, and\n"
     339  		"the property that contains all unicode character types in\n"
     340  		"ASCII range 1-127.\n"
     341  		"\n"
     342  		"Warning: CapRTL character types are subject to change.\n"
     343  		"\n" "CapRTL's character types:\n");
     344    for (j = 0; j < CAPRTL_CHARS; j++)
     345      {
     346        if (j % 4 == 0)
     347  	s[i++] = '\n';
     348        i += sprintf (s + i, /*l - i, */ "  * 0x%02x %c%c %-3s ", j,
     349  		    j < 0x20 ? '^' : ' ',
     350  		    j < 0x20 ? j + '@' : j < 0x7f ? j : ' ',
     351  		    fribidi_get_bidi_type_name (CapRTLCharTypes[j]));
     352      }
     353    i += sprintf (s + i,		/*l - i, */
     354  		"\n\n"
     355  		"Escape sequences:\n"
     356  		"  Character `_' is used to escape explicit marks. The list is:\n"
     357  		"    * _>  LRM\n" "    * _<  RLM\n"
     358  		"    * _l  LRE\n" "    * _r  RLE\n"
     359  		"    * _L  LRO\n" "    * _R  RLO\n"
     360  		"    * _o  PDF\n" "    * _i  LRI\n"
     361  		"    * _y  RLI\n" "    * _f  FSI\n"
     362  		"    * _I  PDI\n" "    * __  `_' itself\n"
     363                  "\n");
     364    return s;
     365  }
     366  
     367  /* Editor directions:
     368   * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent
     369   */