1 /* FriBidi
2 * fribidi-char-sets-utf8.c - UTF-8 character set conversion routines
3 *
4 * Authors:
5 * Behdad Esfahbod, 2001, 2002, 2004
6 * Dov Grobgeld, 1999, 2000
7 *
8 * Copyright (C) 2004 Sharif FarsiWeb, Inc
9 * Copyright (C) 2001,2002 Behdad Esfahbod
10 * Copyright (C) 1999,2000 Dov Grobgeld
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public License
23 * along with this library, in a file named COPYING; if not, write to the
24 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 * Boston, MA 02110-1301, USA
26 *
27 * For licensing issues, contact <fribidi.license@gmail.com>.
28 */
29
30 #include <common.h>
31
32 #include <fribidi-char-sets-utf8.h>
33
34 #include <fribidi-unicode.h>
35
36 FriBidiStrIndex
37 fribidi_utf8_to_unicode (
38 /* input */
39 const char *ss,
40 FriBidiStrIndex len,
41 /* output */
42 FriBidiChar *us
43 )
44 {
45 FriBidiStrIndex length;
46 const unsigned char *s = (unsigned const char *) ss;
47 const unsigned char *t = s;
48
49 length = 0;
50 while ((FriBidiStrIndex) (s - t) < len)
51 {
52 register unsigned char ch = *s;
53 if (ch <= 0x7f) /* one byte */
54 {
55 *us++ = *s++;
56 }
57 else if (ch <= 0xdf) /* 2 byte */
58 {
59 if (s+2-t>len)
60 return (length);
61 *us++ = ((*s & 0x1f) << 6) + (*(s + 1) & 0x3f);
62 s += 2;
63 }
64 else if (ch <= 0xef) /* 3 byte */
65 {
66 if (s+3-t>len)
67 return (length);
68 *us++ =
69 ((int) (*s & 0x0f) << 12) +
70 ((*(s + 1) & 0x3f) << 6) + (*(s + 2) & 0x3f);
71 s += 3;
72 }
73 else /* 4 byte */
74 {
75 if (s+4-t>len)
76 return (length);
77 *us++ =
78 ((int) (*s & 0x07) << 18) +
79 ((*(s + 1) & 0x3f) << 12) +
80 ((*(s + 2) & 0x3f) << 6) +
81 ((*(s + 3) & 0x3f) << 0);
82 s += 4;
83 }
84 length++;
85 }
86 return (length);
87 }
88
89 FriBidiStrIndex
90 fribidi_unicode_to_utf8 (
91 /* input */
92 const FriBidiChar *us,
93 FriBidiStrIndex len,
94 /* output */
95 char *ss
96 )
97 {
98 FriBidiStrIndex i;
99 unsigned char *s = (unsigned char *) ss;
100 unsigned char *t = s;
101
102 for (i = 0; i < len; i++)
103 {
104 FriBidiChar mychar = us[i];
105 if (mychar <= 0x7F)
106 { /* 7 sig bits */
107 *t++ = mychar;
108 }
109 else if (mychar <= 0x7FF)
110 { /* 11 sig bits */
111 *t++ = 0xC0 | (unsigned char) (mychar >> 6); /* upper 5 bits */
112 *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lower 6 bits */
113 }
114 else if (mychar <= 0xFFFF)
115 { /* 16 sig bits */
116 *t++ = 0xE0 | (unsigned char) (mychar >> 12); /* upper 4 bits */
117 *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F); /* next 6 bits */
118 *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lowest 6 bits */
119 }
120 else if (mychar < FRIBIDI_UNICODE_CHARS)
121 { /* 21 sig bits */
122 *t++ = 0xF0 | (unsigned char) ((mychar >> 18) & 0x07); /* upper 3 bits */
123 *t++ = 0x80 | (unsigned char) ((mychar >> 12) & 0x3F); /* next 6 bits */
124 *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F); /* next 6 bits */
125 *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lowest 6 bits */
126 }
127 }
128 *t = 0;
129
130 return (t - s);
131 }
132
133 /* Editor directions:
134 * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent
135 */