1 /*
2 * Copyright © 2010 Codethink Limited
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 *
19 * Author: Ryan Lortie <desrt@desrt.ca>
20 */
21
22 #include <string.h>
23 #include <glib.h>
24
25 /*
26 * The string info map is an efficient data structure designed to be
27 * used with a small set of items. It is used by GSettings schemas for
28 * three purposes:
29 *
30 * 1) Implement <choices> with a list of valid strings
31 *
32 * 2) Implement <alias> by mapping one string to another
33 *
34 * 3) Implement enumerated types by mapping strings to integer values
35 * (and back).
36 *
37 * The map is made out of an array of uint32s. Each entry in the array
38 * is an integer value, followed by a specially formatted string value:
39 *
40 * The string starts with the byte 0xff or 0xfe, followed by the
41 * content of the string, followed by a nul byte, followed by
42 * additional nul bytes for padding, followed by a 0xff byte.
43 *
44 * Padding is added so that the entire formatted string takes up a
45 * multiple of 4 bytes, and not less than 8 bytes. The requirement
46 * for a string to take up 8 bytes is so that the scanner doesn't lose
47 * synch and mistake a string for an integer value.
48 *
49 * The first byte of the formatted string depends on if the integer is
50 * an enum value (0xff) or an alias (0xfe). If it is an alias then the
51 * number refers to the word offset within the info map at which the
52 * integer corresponding to the "target" value is stored.
53 *
54 * For example, consider the case of the string info map representing an
55 * enumerated type of 'foo' (value 1) and 'bar' (value 2) and 'baz'
56 * (alias for 'bar'). Note that string info maps are always little
57 * endian.
58 *
59 * x01 x00 x00 x00 xff 'f' 'o' 'o' x00 x00 x00 xff x02 x00 x00 x00
60 * xff 'b' 'a' 'r' x00 x00 x00 xff x03 x00 x00 x00 xfe 'b' 'a' 'z'
61 * x00 x00 x00 xff
62 *
63 *
64 * The operations that someone may want to perform with the map:
65 *
66 * - look up if a string is valid (and not an alias)
67 * - look up the integer value for a enum 'nick'
68 * - look up the integer value for the target of an alias
69 * - look up an alias and convert it to its target string
70 * - look up the enum nick for a given value
71 *
72 * In order to look up if a string is valid, it is padded on either side
73 * (as described) and scanned for in the array. For example, you might
74 * look for "foo":
75 *
76 * xff 'f' 'o' 'o' x00 x00 x00 xff
77 *
78 * In order to look up the integer value for a nick, the string is padded
79 * on either side and scanned for in the array, as above. Instead of
80 * merely succeeding, we look at the integer value to the left of the
81 * match. This is the enum value.
82 *
83 * In order to look up an alias and convert it to its target enum value,
84 * the string is padded on either side (as described, with 0xfe) and
85 * scanned for. For example, you might look for "baz":
86 *
87 * xfe 'b' 'a' 'z' x00 x00 x00 xff
88 *
89 * The integer immediately preceding the match then contains the offset
90 * of the integer value of the target. In our example, that's '3'.
91 * This index is dereferenced to find the enum value of '2'.
92 *
93 * To convert the alias to its target string, 5 bytes just need to be
94 * added past the start of the integer value to find the start of the
95 * string.
96 *
97 * To look up the enum nick for a given value, the value is searched for
98 * in the array. To ensure that the value isn't matching the inside of a
99 * string, we must check that it is either the first item in the array or
100 * immediately preceded by the byte 0xff. It must also be immediately
101 * followed by the byte 0xff.
102 *
103 * Because strings always take up a minimum of 2 words, because 0xff or
104 * 0xfe never appear inside of a utf-8 string and because no two integer
105 * values ever appear in sequence, the only way we can have the
106 * sequence:
107 *
108 * xff __ __ __ __ xff (or 0xfe)
109 *
110 * is in the event of an integer nested between two strings.
111 *
112 * For implementation simplicity/efficiency, strings may not be more
113 * than 65 characters in length (ie: 17 32bit words after padding).
114 *
115 * In the event that we are doing <choices> (ie: not an enum type) then
116 * the value of each choice is set to zero and ignored.
117 */
118
119 #define STRINFO_MAX_WORDS 17
120 G_GNUC_UNUSED static guint
121 strinfo_string_to_words (const gchar *string,
122 guint32 *words,
123 gboolean alias)
124 {
125 guint n_words;
126 gsize size;
127
128 size = strlen (string);
129
130 n_words = MAX (2, (size + 6) >> 2);
131
132 if (n_words > STRINFO_MAX_WORDS)
133 return FALSE;
134
135 words[0] = GUINT32_TO_LE (alias ? 0xfe : 0xff);
136 words[n_words - 1] = GUINT32_TO_BE (0xff);
137 memcpy (((gchar *) words) + 1, string, size + 1);
138
139 return n_words;
140 }
141
142 G_GNUC_UNUSED static gint
143 strinfo_scan (const guint32 *strinfo,
144 guint length,
145 const guint32 *words,
146 guint n_words)
147 {
148 guint i = 0;
149
150 if (length < n_words)
151 return -1;
152
153 while (i <= length - n_words)
154 {
155 guint j = 0;
156
157 for (j = 0; j < n_words; j++)
158 if (strinfo[i + j] != words[j])
159 break;
160
161 if (j == n_words)
162 return i; /* match */
163
164 /* skip at least one word, continue */
165 i += j ? j : 1;
166 }
167
168 return -1;
169 }
170
171 G_GNUC_UNUSED static gint
172 strinfo_find_string (const guint32 *strinfo,
173 guint length,
174 const gchar *string,
175 gboolean alias)
176 {
177 guint32 words[STRINFO_MAX_WORDS];
178 guint n_words;
179
180 if (length == 0)
181 return -1;
182
183 n_words = strinfo_string_to_words (string, words, alias);
184
185 return strinfo_scan (strinfo + 1, length - 1, words, n_words);
186 }
187
188 G_GNUC_UNUSED static gint
189 strinfo_find_integer (const guint32 *strinfo,
190 guint length,
191 guint32 value)
192 {
193 guint i;
194
195 for (i = 0; i < length; i++)
196 if (strinfo[i] == GUINT32_TO_LE (value))
197 {
198 const guchar *charinfo = (const guchar *) &strinfo[i];
199
200 /* make sure it has 0xff on either side */
201 if ((i == 0 || charinfo[-1] == 0xff) && charinfo[4] == 0xff)
202 return i;
203 }
204
205 return -1;
206 }
207
208 G_GNUC_UNUSED static gboolean
209 strinfo_is_string_valid (const guint32 *strinfo,
210 guint length,
211 const gchar *string)
212 {
213 return strinfo_find_string (strinfo, length, string, FALSE) != -1;
214 }
215
216 G_GNUC_UNUSED static gboolean
217 strinfo_enum_from_string (const guint32 *strinfo,
218 guint length,
219 const gchar *string,
220 guint *result)
221 {
222 gint index;
223
224 index = strinfo_find_string (strinfo, length, string, FALSE);
225
226 if (index < 0)
227 return FALSE;
228
229 *result = GUINT32_FROM_LE (strinfo[index]);
230 return TRUE;
231 }
232
233 G_GNUC_UNUSED static const gchar *
234 strinfo_string_from_enum (const guint32 *strinfo,
235 guint length,
236 guint value)
237 {
238 gint index;
239
240 index = strinfo_find_integer (strinfo, length, value);
241
242 if (index < 0)
243 return NULL;
244
245 return 1 + (const gchar *) &strinfo[index + 1];
246 }
247
248 G_GNUC_UNUSED static const gchar *
249 strinfo_string_from_alias (const guint32 *strinfo,
250 guint length,
251 const gchar *alias)
252 {
253 gint index;
254
255 index = strinfo_find_string (strinfo, length, alias, TRUE);
256
257 if (index < 0)
258 return NULL;
259
260 return 1 + (const gchar *) &strinfo[GUINT32_TO_LE (strinfo[index]) + 1];
261 }
262
263 G_GNUC_UNUSED static GVariant *
264 strinfo_enumerate (const guint32 *strinfo,
265 guint length)
266 {
267 GVariantBuilder builder;
268 const gchar *ptr, *end;
269
270 ptr = (gpointer) strinfo;
271 end = ptr + 4 * length;
272
273 ptr += 4;
274
275 g_variant_builder_init (&builder, G_VARIANT_TYPE_STRING_ARRAY);
276
277 while (ptr < end)
278 {
279 /* don't include aliases */
280 if (*ptr == '\xff')
281 g_variant_builder_add (&builder, "s", ptr + 1);
282
283 /* find the end of this string */
284 ptr = memchr (ptr, '\xff', end - ptr);
285 g_assert (ptr != NULL);
286
287 /* skip over the int to the next string */
288 ptr += 5;
289 }
290
291 return g_variant_builder_end (&builder);
292 }
293
294 G_GNUC_UNUSED static void
295 strinfo_builder_append_item (GString *builder,
296 const gchar *string,
297 guint value)
298 {
299 guint32 words[STRINFO_MAX_WORDS];
300 guint n_words;
301
302 value = GUINT32_TO_LE (value);
303
304 n_words = strinfo_string_to_words (string, words, FALSE);
305 g_string_append_len (builder, (void *) &value, sizeof value);
306 g_string_append_len (builder, (void *) words, 4 * n_words);
307 }
308
309 G_GNUC_UNUSED static gboolean
310 strinfo_builder_append_alias (GString *builder,
311 const gchar *alias,
312 const gchar *target)
313 {
314 guint32 words[STRINFO_MAX_WORDS];
315 guint n_words;
316 guint value;
317 gint index;
318
319 index = strinfo_find_string ((const guint32 *) builder->str,
320 builder->len / 4, target, FALSE);
321
322 if (index == -1)
323 return FALSE;
324
325 value = GUINT32_TO_LE (index);
326
327 n_words = strinfo_string_to_words (alias, words, TRUE);
328 g_string_append_len (builder, (void *) &value, sizeof value);
329 g_string_append_len (builder, (void *) words, 4 * n_words);
330
331 return TRUE;
332 }
333
334 G_GNUC_UNUSED static gboolean
335 strinfo_builder_contains (GString *builder,
336 const gchar *string)
337 {
338 return strinfo_find_string ((const guint32 *) builder->str,
339 builder->len / 4, string, FALSE) != -1 ||
340 strinfo_find_string ((const guint32 *) builder->str,
341 builder->len / 4, string, TRUE) != -1;
342 }
343
344 G_GNUC_UNUSED static gboolean
345 strinfo_builder_contains_value (GString *builder,
346 guint value)
347 {
348 return strinfo_string_from_enum ((const guint32 *) builder->str,
349 builder->len / 4, value) != NULL;
350 }