(root)/
fribidi-1.0.13/
lib/
fribidi-char-sets-utf8.c
       1  /* FriBidi
       2   * fribidi-char-sets-utf8.c - UTF-8 character set conversion routines
       3   *
       4   * Authors:
       5   *   Behdad Esfahbod, 2001, 2002, 2004
       6   *   Dov Grobgeld, 1999, 2000
       7   *
       8   * Copyright (C) 2004 Sharif FarsiWeb, Inc
       9   * Copyright (C) 2001,2002 Behdad Esfahbod
      10   * Copyright (C) 1999,2000 Dov Grobgeld
      11   * 
      12   * This library is free software; you can redistribute it and/or
      13   * modify it under the terms of the GNU Lesser General Public
      14   * License as published by the Free Software Foundation; either
      15   * version 2.1 of the License, or (at your option) any later version.
      16   * 
      17   * This library is distributed in the hope that it will be useful,
      18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20   * Lesser General Public License for more details.
      21   * 
      22   * You should have received a copy of the GNU Lesser General Public License
      23   * along with this library, in a file named COPYING; if not, write to the
      24   * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
      25   * Boston, MA 02110-1301, USA
      26   * 
      27   * For licensing issues, contact <fribidi.license@gmail.com>.
      28   */
      29  
      30  #include <common.h>
      31  
      32  #include <fribidi-char-sets-utf8.h>
      33  
      34  #include <fribidi-unicode.h>
      35  
      36  FriBidiStrIndex
      37  fribidi_utf8_to_unicode (
      38    /* input */
      39    const char *ss,
      40    FriBidiStrIndex len,
      41    /* output */
      42    FriBidiChar *us
      43  )
      44  {
      45    FriBidiStrIndex length;
      46    const unsigned char *s = (unsigned const char *) ss;
      47    const unsigned char *t = s;
      48  
      49    length = 0;
      50    while ((FriBidiStrIndex) (s - t) < len)
      51      {
      52        register unsigned char ch = *s;
      53        if (ch <= 0x7f)		/* one byte */
      54  	{
      55  	  *us++ = *s++;
      56  	}
      57        else if (ch <= 0xdf)	/* 2 byte */
      58  	{
      59            if (s+2-t>len)
      60                return (length);
      61  	  *us++ = ((*s & 0x1f) << 6) + (*(s + 1) & 0x3f);
      62  	  s += 2;
      63  	}
      64        else if (ch <= 0xef)	/* 3 byte */
      65  	{
      66            if (s+3-t>len)
      67                return (length);
      68  	  *us++ =
      69  	    ((int) (*s & 0x0f) << 12) +
      70  	    ((*(s + 1) & 0x3f) << 6) + (*(s + 2) & 0x3f);
      71  	  s += 3;
      72  	}
      73        else                     /* 4 byte */
      74          {
      75            if (s+4-t>len)
      76                return (length);
      77  	  *us++ =
      78  	    ((int) (*s & 0x07) << 18) +
      79  	    ((*(s + 1) & 0x3f) << 12) +
      80  	    ((*(s + 2) & 0x3f) << 6) +
      81  	    ((*(s + 3) & 0x3f) << 0);
      82  	  s += 4;
      83          }
      84        length++;
      85      }
      86    return (length);
      87  }
      88  
      89  FriBidiStrIndex
      90  fribidi_unicode_to_utf8 (
      91    /* input */
      92    const FriBidiChar *us,
      93    FriBidiStrIndex len,
      94    /* output */
      95    char *ss
      96  )
      97  {
      98    FriBidiStrIndex i;
      99    unsigned char *s = (unsigned char *) ss;
     100    unsigned char *t = s;
     101  
     102    for (i = 0; i < len; i++)
     103      {
     104        FriBidiChar mychar = us[i];
     105        if (mychar <= 0x7F)
     106  	{			/* 7 sig bits */
     107  	  *t++ = mychar;
     108  	}
     109        else if (mychar <= 0x7FF)
     110  	{			/* 11 sig bits */
     111  	  *t++ = 0xC0 | (unsigned char) (mychar >> 6);	/* upper 5 bits */
     112  	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lower 6 bits */
     113  	}
     114        else if (mychar <= 0xFFFF)
     115  	{			/* 16 sig bits */
     116  	  *t++ = 0xE0 | (unsigned char) (mychar >> 12);	/* upper 4 bits */
     117  	  *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F);	/* next 6 bits */
     118  	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lowest 6 bits */
     119  	}
     120        else if (mychar < FRIBIDI_UNICODE_CHARS)
     121  	{			/* 21 sig bits */
     122  	  *t++ = 0xF0 | (unsigned char) ((mychar >> 18) & 0x07);	/* upper 3 bits */
     123  	  *t++ = 0x80 | (unsigned char) ((mychar >> 12) & 0x3F);	/* next 6 bits */
     124  	  *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F);	/* next 6 bits */
     125  	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lowest 6 bits */
     126  	}
     127      }
     128    *t = 0;
     129  
     130    return (t - s);
     131  }
     132  
     133  /* Editor directions:
     134   * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent
     135   */