1 /*
2 * gawkapi.h -- Definitions for use by extension functions calling into gawk.
3 */
4
5 /*
6 * Copyright (C) 2012-2019, 2021, 2022, 2023, the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26 /*
27 * The following types and/or macros and/or functions are referenced
28 * in this file. For correct use, you must therefore include the
29 * corresponding standard header file BEFORE including this file.
30 *
31 * FILE - <stdio.h>
32 * NULL - <stddef.h>
33 * memset(), memcpy() - <string.h>
34 * size_t - <sys/types.h>
35 * struct stat - <sys/stat.h>
36 *
37 * Due to portability concerns, especially to systems that are not
38 * fully standards-compliant, it is your responsibility to include
39 * the correct files in the correct way. This requirement is necessary
40 * in order to keep this file clean, instead of becoming a portability
41 * hodge-podge as can be seen in the gawk source code.
42 *
43 * To pass reasonable integer values for ERRNO, you will also need to
44 * include <errno.h>.
45 */
46
47 #ifndef _GAWK_API_H
48 #define _GAWK_API_H
49
50 /*
51 * General introduction:
52 *
53 * This API purposely restricts itself to ISO C 90 features. In particular, no
54 * bool, no // comments, no use of the restrict keyword, or anything else,
55 * in order to provide maximal portability.
56 *
57 * Exception: the "inline" keyword is used below in the "constructor"
58 * functions. If your compiler doesn't support it, you should either
59 * -Dinline='' on your command line, or use the autotools and include a
60 * config.h in your extensions.
61 *
62 * Additional important information:
63 *
64 * 1. ALL string values in awk_value_t objects need to come from api_malloc().
65 * Gawk will handle releasing the storage if necessary. This is slightly
66 * awkward, in that you can't take an awk_value_t that you got from gawk
67 * and reuse it directly, even for something that is conceptually pass
68 * by value.
69 *
70 * 2. Due to gawk internals, after using sym_update() to install an array
71 * into gawk, you have to retrieve the array cookie from the value
72 * passed in to sym_update(). Like so:
73 *
74 * new_array = create_array();
75 * val.val_type = AWK_ARRAY;
76 * val.array_cookie = new_array;
77 * sym_update("array", & val); // install array in the symbol table
78 *
79 * new_array = val.array_cookie; // MUST DO THIS
80 *
81 * // fill in new array with lots of subscripts and values
82 *
83 * Similarly, if installing a new array as a subarray of an existing
84 * array, you must add the new array to its parent before adding any
85 * elements to it.
86 *
87 * You must also retrieve the value of the array_cookie after the call
88 * to set_element().
89 *
90 * Thus, the correct way to build an array is to work "top down".
91 * Create the array, and immediately install it in gawk's symbol table
92 * using sym_update(), or install it as an element in a previously
93 * existing array using set_element().
94 *
95 * Thus the new array must ultimately be rooted in a global symbol. This is
96 * necessary before installing any subarrays in it, due to gawk's
97 * internal implementation. Strictly speaking, this is required only
98 * for arrays that will have subarrays as elements; however it is
99 * a good idea to always do this. This restriction may be relaxed
100 * in a subsequent revision of the API.
101 *
102 * 3. While each routine in the API has a few lines of summary for it
103 * in this header, said summaries are not standalone, adequate documentation. You
104 * should read the chapter in the gawk manual on writing extensions. Find it online
105 * at https://www.gnu.org/software/gawk/manual/html_node/Dynamic-Extensions.html,
106 * or in the Info files distributed with gawk.
107 */
108
109 /* Allow use in C++ code. */
110 #ifdef __cplusplus
111 extern "C" {
112 #endif
113
114 /* This is used to keep extensions from modifying certain fields in some structs. */
115 #ifdef GAWK
116 #define awk_const
117 #else
118 #define awk_const const
119 #endif
120
121 typedef enum awk_bool {
122 awk_false = 0,
123 awk_true
124 } awk_bool_t; /* we don't use <stdbool.h> on purpose */
125
126 /*
127 * If an input parser would like to specify the field positions in the input
128 * record, it may populate an awk_fieldwidth_info_t structure to indicate
129 * the location of each field. The use_chars boolean controls whether the
130 * field lengths are specified in terms of bytes or potentially multi-byte
131 * characters. Performance will be better if the values are supplied in
132 * terms of bytes. The fields[0].skip value indicates how many bytes (or
133 * characters) to skip before $1, and fields[0].len is the length of $1, etc.
134 */
135
136 typedef struct {
137 awk_bool_t use_chars; /* false ==> use bytes */
138 size_t nf;
139 struct awk_field_info {
140 size_t skip; /* amount to skip before field starts */
141 size_t len; /* length of field */
142 } fields[1]; /* actual dimension should be nf */
143 } awk_fieldwidth_info_t;
144
145 /*
146 * This macro calculates the total struct size needed. This is useful when
147 * calling malloc or realloc.
148 */
149 #define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \
150 (((NF)-1) * sizeof(struct awk_field_info)))
151
152 /* The information about input files that input parsers need to know: */
153 typedef struct awk_input {
154 const char *name; /* filename */
155 int fd; /* file descriptor */
156 #define INVALID_HANDLE (-1)
157 void *opaque; /* private data for input parsers */
158
159 /*
160 * The get_record function is called to read the next record of data.
161 *
162 * It should return the length of the input record or EOF, and it
163 * should set *out to point to the contents of $0. The rt_start
164 * and rt_len arguments should be used to return RT to gawk.
165 * If EOF is not returned, the parser must set *rt_len (and
166 * *rt_start if *rt_len is non-zero).
167 *
168 * Note that gawk will make a copy of the record in *out, so the
169 * parser is responsible for managing its own memory buffer.
170 * Similarly, gawk will make its own copy of RT, so the parser
171 * is also responsible for managing this memory.
172 *
173 * It is guaranteed that errcode is a valid pointer, so there is
174 * no need to test for a NULL value. Gawk sets *errcode to 0,
175 * so there is no need to set it unless an error occurs.
176 *
177 * If an error does occur, the function should return EOF and set
178 * *errcode to a positive value. In that case, if *errcode is greater
179 * than zero, gawk will automatically update the ERRNO variable based
180 * on the value of *errcode (e.g., setting *errcode = errno should do
181 * the right thing).
182 *
183 * If field_width is non-NULL, then *field_width will be initialized
184 * to NULL, and the function may set it to point to a structure
185 * supplying field width information to override the default
186 * gawk field parsing mechanism. Note that this structure will not
187 * be copied by gawk; it must persist at least until the next call
188 * to get_record or close_func. Note also that field_width will
189 * be NULL when getline is assigning the results to a variable, thus
190 * field parsing is not needed.
191 */
192 int (*get_record)(char **out, struct awk_input *iobuf, int *errcode,
193 char **rt_start, size_t *rt_len,
194 const awk_fieldwidth_info_t **field_width);
195
196 /*
197 * This replaces the POSIX read() system call. Use it if you want to
198 * manage reading raw bytes yourself, and let gawk parse the record.
199 */
200 ssize_t (*read_func)(int, void *, size_t);
201
202 /*
203 * The close_func is called to allow the parser to free private data.
204 * Gawk itself will close the fd unless close_func first sets it to
205 * INVALID_HANDLE.
206 */
207 void (*close_func)(struct awk_input *iobuf);
208
209 /* put last, for alignment. bleah */
210 struct stat sbuf; /* stat buf */
211
212 } awk_input_buf_t;
213
214 typedef struct awk_input_parser {
215 const char *name; /* name of parser */
216
217 /*
218 * The can_take_file function should return true if the parser
219 * would like to parse this file. It should not change any gawk
220 * state!
221 */
222 awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
223
224 /*
225 * If this parser is selected, then take_control_of will be called.
226 * It can assume that a previous call to can_take_file was successful,
227 * and no gawk state has changed since that call. It should populate
228 * the awk_input_buf_t's get_record, close_func, and opaque values as needed.
229 * It should return true if successful.
230 */
231 awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
232
233 awk_const struct awk_input_parser *awk_const next; /* for use by gawk */
234 } awk_input_parser_t;
235
236 /*
237 * Similar for output wrapper.
238 */
239
240 /* First the data structure */
241 typedef struct awk_output_buf {
242 const char *name; /* name of output file */
243 const char *mode; /* mode argument to fopen */
244 FILE *fp; /* stdio file pointer */
245 awk_bool_t redirected; /* true if a wrapper is active */
246 void *opaque; /* for use by output wrapper */
247
248 /*
249 * Replacement functions for I/O. Just like the regular
250 * versions but also take the opaque pointer argument.
251 */
252 size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
253 FILE *fp, void *opaque);
254 int (*gawk_fflush)(FILE *fp, void *opaque);
255 int (*gawk_ferror)(FILE *fp, void *opaque);
256 int (*gawk_fclose)(FILE *fp, void *opaque);
257 } awk_output_buf_t;
258
259 /* Next the output wrapper registered with gawk */
260 typedef struct awk_output_wrapper {
261 const char *name; /* name of the wrapper */
262
263 /*
264 * The can_take_file function should return true if the wrapper
265 * would like to process this file. It should not change any gawk
266 * state!
267 */
268 awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
269
270 /*
271 * If this wrapper is selected, then take_control_of will be called.
272 * It can assume that a previous call to can_take_file was successful,
273 * and no gawk state has changed since that call. It should populate
274 * the awk_output_buf_t function pointers and opaque pointer as needed.
275 * It should return true if successful.
276 */
277 awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
278
279 awk_const struct awk_output_wrapper *awk_const next; /* for use by gawk */
280 } awk_output_wrapper_t;
281
282 /* A two-way processor combines an input parser and an output wrapper. */
283 typedef struct awk_two_way_processor {
284 const char *name; /* name of the two-way processor */
285
286 /*
287 * The can_take_file function should return true if the two-way
288 * processor would like to parse this file. It should not change
289 * any gawk state!
290 */
291 awk_bool_t (*can_take_two_way)(const char *name);
292
293 /*
294 * If this processor is selected, then take_control_of will be called.
295 * It can assume that a previous call to can_take_file was successful,
296 * and no gawk state has changed since that call. It should populate
297 * the awk_input_buf_t and awk_otuput_buf_t structures as needed.
298 * It should return true if successful.
299 */
300 awk_bool_t (*take_control_of)(const char *name, awk_input_buf_t *inbuf,
301 awk_output_buf_t *outbuf);
302
303 awk_const struct awk_two_way_processor *awk_const next; /* for use by gawk */
304 } awk_two_way_processor_t;
305
306 #define gawk_api_major_version 3
307 #define gawk_api_minor_version 2
308
309 /* Current version of the API. */
310 enum {
311 GAWK_API_MAJOR_VERSION = gawk_api_major_version,
312 GAWK_API_MINOR_VERSION = gawk_api_minor_version
313 };
314
315 /* A number of typedefs related to different types of values. */
316
317 /*
318 * A mutable string. Gawk owns the memory pointed to if it supplied
319 * the value. Otherwise, it takes ownership of the memory pointed to.
320 *
321 * The API deals exclusively with regular chars; these strings may
322 * be multibyte encoded in the current locale's encoding and character
323 * set. Gawk will convert internally to wide characters if necessary.
324 *
325 * Note that a string provided by gawk will always be terminated
326 * with a '\0' character.
327 */
328 typedef struct awk_string {
329 char *str; /* data */
330 size_t len; /* length thereof, in chars */
331 } awk_string_t;
332
333 enum AWK_NUMBER_TYPE {
334 AWK_NUMBER_TYPE_DOUBLE,
335 AWK_NUMBER_TYPE_MPFR,
336 AWK_NUMBER_TYPE_MPZ
337 };
338
339 /*
340 * When type is AWK_NUMBER_MPFR or AWK_NUMBER_MPZ, the memory pointed to
341 * by the ptr member belongs to gawk if it came from gawk. Otherwise the
342 * memory belongs to the extension and gawk copies it when its received.
343 * See the manual for further discussion.
344 */
345
346 typedef struct awk_number {
347 double d; /* always populated in data received from gawk */
348 enum AWK_NUMBER_TYPE type;
349 void *ptr; /* either NULL or mpfr_ptr or mpz_ptr */
350 } awk_number_t;
351
352 /* Arrays are represented as an opaque type. */
353 typedef void *awk_array_t;
354
355 /* Scalars can be represented as an opaque type. */
356 typedef void *awk_scalar_t;
357
358 /* Any value can be stored as a cookie. */
359 typedef void *awk_value_cookie_t;
360
361 /*
362 * This tag defines the type of a value.
363 *
364 * Values are associated with regular variables and with array elements.
365 * Since arrays can be multidimensional (as can regular variables)
366 * it's valid to have a "value" that is actually an array.
367 */
368 typedef enum {
369 AWK_UNDEFINED,
370 AWK_NUMBER,
371 AWK_STRING,
372 AWK_REGEX,
373 AWK_STRNUM,
374 AWK_ARRAY,
375 AWK_SCALAR, /* opaque access to a variable */
376 AWK_VALUE_COOKIE, /* for updating a previously created value */
377 AWK_BOOL
378 } awk_valtype_t;
379
380 /*
381 * An awk value. The val_type tag indicates what
382 * is in the union.
383 */
384 typedef struct awk_value {
385 awk_valtype_t val_type;
386 union {
387 awk_string_t s;
388 awk_number_t n;
389 awk_array_t a;
390 awk_scalar_t scl;
391 awk_value_cookie_t vc;
392 awk_bool_t b;
393 } u;
394 #define str_value u.s
395 #define strnum_value str_value
396 #define regex_value str_value
397 #define num_value u.n.d
398 #define num_type u.n.type
399 #define num_ptr u.n.ptr
400 #define array_cookie u.a
401 #define scalar_cookie u.scl
402 #define value_cookie u.vc
403 #define bool_value u.b
404 } awk_value_t;
405
406 /*
407 * A "flattened" array element. Gawk produces an array of these
408 * inside the awk_flat_array_t.
409 * ALL memory pointed to belongs to gawk. Individual elements may
410 * be marked for deletion. New elements must be added individually,
411 * one at a time, using the separate API for that purpose.
412 */
413
414 typedef struct awk_element {
415 /* convenience linked list pointer, not used by gawk */
416 struct awk_element *next;
417 enum {
418 AWK_ELEMENT_DEFAULT = 0, /* set by gawk */
419 AWK_ELEMENT_DELETE = 1 /* set by extension if
420 should be deleted */
421 } flags;
422 awk_value_t index;
423 awk_value_t value;
424 } awk_element_t;
425
426 /*
427 * A "flattened" array. See the description above for how
428 * to use the elements contained herein.
429 */
430 typedef struct awk_flat_array {
431 awk_const void *awk_const opaque1; /* private data for use by gawk */
432 awk_const void *awk_const opaque2; /* private data for use by gawk */
433 awk_const size_t count; /* how many elements */
434 awk_element_t elements[1]; /* will be extended */
435 } awk_flat_array_t;
436
437 /*
438 * A record describing an extension function. Upon being
439 * loaded, the extension should pass in one of these to gawk for
440 * each C function.
441 *
442 * Each called function must fill in the result with either a scalar
443 * (number, string, or regex). Gawk takes ownership of any string memory.
444 *
445 * The called function must return the value of `result'.
446 * This is for the convenience of the calling code inside gawk.
447 *
448 * Each extension function may decide what to do if the number of
449 * arguments isn't what it expected. Following awk functions, it
450 * is likely OK to ignore extra arguments.
451 *
452 * 'min_required_args' indicates how many arguments MUST be passed.
453 * The API will throw a fatal error if not enough are passed.
454 *
455 * 'max_expected_args' is more benign; if more than that are passed,
456 * the API prints a lint message (IFF lint is enabled, of course).
457 *
458 * In any case, the extension function itself need not compare the
459 * actual number of arguments passed to those two values if it does
460 * not want to.
461 */
462 typedef struct awk_ext_func {
463 const char *name;
464 awk_value_t *(*const function)(int num_actual_args,
465 awk_value_t *result,
466 struct awk_ext_func *finfo);
467 const size_t max_expected_args;
468 const size_t min_required_args;
469 awk_bool_t suppress_lint;
470 void *data; /* opaque pointer to any extra state */
471 } awk_ext_func_t;
472
473 typedef void *awk_ext_id_t; /* opaque type for extension id */
474
475 /*
476 * The API into gawk. Lots of functions here. We hope that they are
477 * logically organized.
478 *
479 * !!! If you make any changes to this structure, please remember to bump !!!
480 * !!! gawk_api_major_version and/or gawk_api_minor_version. !!!
481 */
482 typedef struct gawk_api {
483 /* First, data fields. */
484
485 /* These are what gawk thinks the API version is. */
486 awk_const int major_version;
487 awk_const int minor_version;
488
489 /* GMP/MPFR versions, if extended-precision is available */
490 awk_const int gmp_major_version;
491 awk_const int gmp_minor_version;
492 awk_const int mpfr_major_version;
493 awk_const int mpfr_minor_version;
494
495 /*
496 * These can change on the fly as things happen within gawk.
497 * Currently only do_lint is prone to change, but we reserve
498 * the right to allow the others to do so also.
499 */
500 #define DO_FLAGS_SIZE 6
501 awk_const int do_flags[DO_FLAGS_SIZE];
502 /* Use these as indices into do_flags[] array to check the values */
503 #define gawk_do_lint 0
504 #define gawk_do_traditional 1
505 #define gawk_do_profile 2
506 #define gawk_do_sandbox 3
507 #define gawk_do_debug 4
508 #define gawk_do_mpfr 5
509
510 /* Next, registration functions: */
511
512 /*
513 * Add a function to the interpreter, returns true upon success.
514 * Gawk does not modify what func points to, but the extension
515 * function itself receives this pointer and can modify what it
516 * points to, thus it's not const.
517 */
518 awk_bool_t (*api_add_ext_func)(awk_ext_id_t id, const char *name_space,
519 awk_ext_func_t *func);
520
521 /* Register an input parser; for opening files read-only */
522 void (*api_register_input_parser)(awk_ext_id_t id,
523 awk_input_parser_t *input_parser);
524
525 /* Register an output wrapper, for writing files */
526 void (*api_register_output_wrapper)(awk_ext_id_t id,
527 awk_output_wrapper_t *output_wrapper);
528
529 /* Register a processor for two way I/O */
530 void (*api_register_two_way_processor)(awk_ext_id_t id,
531 awk_two_way_processor_t *two_way_processor);
532
533 /*
534 * Add an exit call back.
535 *
536 * arg0 is a private data pointer for use by the extension;
537 * gawk saves it and passes it into the function pointed
538 * to by funcp at exit.
539 *
540 * Exit callback functions are called in LIFO order.
541 */
542 void (*api_awk_atexit)(awk_ext_id_t id,
543 void (*funcp)(void *data, int exit_status),
544 void *arg0);
545
546 /* Register a version string for this extension with gawk. */
547 void (*api_register_ext_version)(awk_ext_id_t id, const char *version);
548
549 /* Functions to print messages */
550 void (*api_fatal)(awk_ext_id_t id, const char *format, ...);
551 void (*api_warning)(awk_ext_id_t id, const char *format, ...);
552 void (*api_lintwarn)(awk_ext_id_t id, const char *format, ...);
553 void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...);
554
555 /* Functions to update ERRNO */
556 void (*api_update_ERRNO_int)(awk_ext_id_t id, int errno_val);
557 void (*api_update_ERRNO_string)(awk_ext_id_t id, const char *string);
558 void (*api_unset_ERRNO)(awk_ext_id_t id);
559
560 /*
561 * All of the functions that return a value from inside gawk
562 * (get a parameter, get a global variable, get an array element)
563 * behave in the same way.
564 *
565 * For a function parameter, the return is false if the argument
566 * count is out of range, or if the actual parameter does not match
567 * what is specified in wanted. In that case, result->val_type
568 * will hold the actual type of what was passed.
569 *
570 * Similarly for symbol table access to variables and array elements,
571 * the return is false if the actual variable or array element does
572 * not match what was requested, and result->val_type will hold
573 * the actual type.
574
575 Table entry is type returned:
576
577
578 +----------------------------------------------------------------+
579 | Type of Actual Value: |
580 +--------+--------+--------+--------+--------+-------+-----------+
581 | String | Strnum | Number | Regex | Bool | Array | Undefined |
582 +-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+
583 | | String | String | String | String | String | String | false | false |
584 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
585 | | Strnum | false | Strnum | Strnum | false | false | false | false |
586 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
587 | | Number | Number | Number | Number | false | Number | false | false |
588 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
589 | | Regex | false | false | false | Regex | false | false | false |
590 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
591 | Type | Bool | false | false | false | false | Bool | false | false |
592 | Requested +-----------+--------+--------+--------+--------+--------+-------+-----------+
593 | | Array | false | false | false | false | false | Array | false |
594 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
595 | | Scalar | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
596 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
597 | | Undefined | String | Strnum | Number | Regex | Bool | Array | Undefined |
598 | +-----------+--------+--------+--------+--------+--------+-------+-----------+
599 | | Value | false | false | false | false | false | false | false |
600 | | Cookie | | | | | | | |
601 +-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+
602 */
603
604 /* Functions to handle parameters passed to the extension. */
605
606 /*
607 * Get the count'th parameter, zero-based.
608 * Returns false if count is out of range, or if actual parameter
609 * does not match what is specified in wanted. In that case,
610 * result->val_type is as described above.
611 */
612 awk_bool_t (*api_get_argument)(awk_ext_id_t id, size_t count,
613 awk_valtype_t wanted,
614 awk_value_t *result);
615
616 /*
617 * Convert a parameter that was undefined into an array
618 * (provide call-by-reference for arrays). Returns false
619 * if count is too big, or if the argument's type is
620 * not undefined.
621 */
622 awk_bool_t (*api_set_argument)(awk_ext_id_t id,
623 size_t count,
624 awk_array_t array);
625
626 /*
627 * Symbol table access:
628 * - Read-only access to special variables (NF, etc.)
629 * - One special exception: PROCINFO.
630 * - Use sym_update() to change a value, including from UNDEFINED
631 * to scalar or array.
632 */
633 /*
634 * Lookup a variable, fill in value. No messing with the value
635 * returned.
636 * Returns false if the variable doesn't exist or if the wrong type
637 * was requested. In the latter case, vaule->val_type will have
638 * the real type, as described above.
639 *
640 * awk_value_t val;
641 * if (! api->sym_lookup(id, name, wanted, & val))
642 * error_code_here();
643 * else {
644 * // safe to use val
645 * }
646 */
647 awk_bool_t (*api_sym_lookup)(awk_ext_id_t id,
648 const char *name_space,
649 const char *name,
650 awk_valtype_t wanted,
651 awk_value_t *result);
652
653 /*
654 * Update a value. Adds it to the symbol table if not there.
655 * Changing types (scalar <--> array) is not allowed.
656 * In fact, using this to update an array is not allowed, either.
657 * Such an attempt returns false.
658 */
659 awk_bool_t (*api_sym_update)(awk_ext_id_t id,
660 const char *name_space,
661 const char *name,
662 awk_value_t *value);
663
664 /*
665 * A ``scalar cookie'' is an opaque handle that provide access
666 * to a global variable or array. It is an optimization that
667 * avoids looking up variables in gawk's symbol table every time
668 * access is needed.
669 *
670 * This function retrieves the current value of a scalar cookie.
671 * Once you have obtained a scalar_cookie using sym_lookup, you can
672 * use this function to get its value more efficiently.
673 *
674 * Return will be false if the value cannot be retrieved.
675 *
676 * Flow is thus
677 * awk_value_t val;
678 * awk_scalar_t cookie;
679 * api->sym_lookup(id, "variable", AWK_SCALAR, & val); // get the cookie
680 * cookie = val.scalar_cookie;
681 * ...
682 * api->sym_lookup_scalar(id, cookie, wanted, & val); // get the value
683 */
684 awk_bool_t (*api_sym_lookup_scalar)(awk_ext_id_t id,
685 awk_scalar_t cookie,
686 awk_valtype_t wanted,
687 awk_value_t *result);
688
689 /*
690 * Update the value associated with a scalar cookie.
691 * Flow is
692 * sym_lookup with wanted == AWK_SCALAR
693 * if returns false
694 * sym_update with real initial value to install it
695 * sym_lookup again with AWK_SCALAR
696 * else
697 * use the scalar cookie
698 *
699 * Return will be false if the new value is not one of
700 * AWK_STRING, AWK_NUMBER, AWK_REGEX.
701 *
702 * Here too, the built-in variables may not be updated.
703 */
704 awk_bool_t (*api_sym_update_scalar)(awk_ext_id_t id,
705 awk_scalar_t cookie, awk_value_t *value);
706
707 /* Cached values */
708
709 /*
710 * Create a cached string,regex, or numeric value for efficient later
711 * assignment. This improves performance when you want to assign
712 * the same value to one or more variables repeatedly. Only
713 * AWK_NUMBER, AWK_STRING, AWK_REGEX and AWK_STRNUM values are allowed.
714 * Any other type is rejected. We disallow AWK_UNDEFINED since that
715 * case would result in inferior performance.
716 */
717 awk_bool_t (*api_create_value)(awk_ext_id_t id, awk_value_t *value,
718 awk_value_cookie_t *result);
719
720 /*
721 * Release the memory associated with a cookie from api_create_value.
722 * Please call this to free memory when the value is no longer needed.
723 */
724 awk_bool_t (*api_release_value)(awk_ext_id_t id, awk_value_cookie_t vc);
725
726 /* Array management */
727
728 /*
729 * Retrieve total number of elements in array.
730 * Returns false if some kind of error.
731 */
732 awk_bool_t (*api_get_element_count)(awk_ext_id_t id,
733 awk_array_t a_cookie, size_t *count);
734
735 /*
736 * Return the value of an element - read only!
737 * Use set_array_element() to change it.
738 * Behavior for value and return is same as for api_get_argument
739 * and sym_lookup.
740 */
741 awk_bool_t (*api_get_array_element)(awk_ext_id_t id,
742 awk_array_t a_cookie,
743 const awk_value_t *const index,
744 awk_valtype_t wanted,
745 awk_value_t *result);
746
747 /*
748 * Change (or create) element in existing array with
749 * index and value.
750 *
751 * ARGV and ENVIRON may not be updated.
752 */
753 awk_bool_t (*api_set_array_element)(awk_ext_id_t id, awk_array_t a_cookie,
754 const awk_value_t *const index,
755 const awk_value_t *const value);
756
757 /*
758 * Remove the element with the given index.
759 * Returns true if removed or false if element did not exist.
760 */
761 awk_bool_t (*api_del_array_element)(awk_ext_id_t id,
762 awk_array_t a_cookie, const awk_value_t* const index);
763
764 /* Create a new array cookie to which elements may be added. */
765 awk_array_t (*api_create_array)(awk_ext_id_t id);
766
767 /* Clear out an array. */
768 awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie);
769
770 /*
771 * Flatten out an array with type conversions as requested.
772 * This supersedes the earlier api_flatten_array function that
773 * did not allow the caller to specify the requested types.
774 * (That API is still available as a macro, defined below.)
775 */
776 awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id,
777 awk_array_t a_cookie,
778 awk_flat_array_t **data,
779 awk_valtype_t index_type, awk_valtype_t value_type);
780
781 /* When done, delete any marked elements, release the memory. */
782 awk_bool_t (*api_release_flattened_array)(awk_ext_id_t id,
783 awk_array_t a_cookie,
784 awk_flat_array_t *data);
785
786 /*
787 * Hooks to provide access to gawk's memory allocation functions.
788 * This ensures that memory passed between gawk and the extension
789 * is allocated and released by the same library.
790 */
791 void *(*api_malloc)(size_t size);
792 void *(*api_calloc)(size_t nmemb, size_t size);
793 void *(*api_realloc)(void *ptr, size_t size);
794 void (*api_free)(void *ptr);
795
796 /*
797 * Obsolete function, should not be used. It remains only
798 * for binary compatibility. Any value it returns should be
799 * freed via api_free.
800 */
801 void *(*api_get_mpfr)(awk_ext_id_t id);
802
803 /*
804 * Obsolete function, should not be used. It remains only
805 * for binary compatibility. Any value it returns should be
806 * freed via api_free.
807 */
808 void *(*api_get_mpz)(awk_ext_id_t id);
809
810 /*
811 * Look up a file. If the name is NULL or name_len is 0, it returns
812 * data for the currently open input file corresponding to FILENAME
813 * (and it will not access the filetype argument, so that may be
814 * undefined).
815 *
816 * If the file is not already open, try to open it.
817 *
818 * The "filetype" argument should be one of:
819 *
820 * ">", ">>", "<", "|>", "|<", and "|&"
821 *
822 * If the file is not already open, and the fd argument is non-negative,
823 * gawk will use that file descriptor instead of opening the file
824 * in the usual way.
825 *
826 * If the fd is non-negative, but the file exists already, gawk
827 * ignores the fd and returns the existing file. It is the caller's
828 * responsibility to notice that the fd in the returned
829 * awk_input_buf_t does not match the requested value.
830 *
831 * Note that supplying a file descriptor is currently NOT supported
832 * for pipes. It should work for input, output, append, and two-way
833 * (coprocess) sockets. If the filetype is two-way, we assume that
834 * it is a socket!
835 *
836 * Note that in the two-way case, the input and output file descriptors
837 * may differ. To check for success, one must check that either of
838 * them matches.
839 *
840 * ibufp and obufp point at gawk's internal copies of the
841 * awk_input_buf_t and awk_output_t associated with the open
842 * file. Treat these data structures as read-only!
843 */
844 awk_bool_t (*api_get_file)(awk_ext_id_t id,
845 const char *name,
846 size_t name_len,
847 const char *filetype,
848 int fd,
849 /*
850 * Return values (on success, one or both should
851 * be non-NULL):
852 */
853 const awk_input_buf_t **ibufp,
854 const awk_output_buf_t **obufp);
855
856 /* Destroy an array. */
857 awk_bool_t (*api_destroy_array)(awk_ext_id_t id, awk_array_t a_cookie);
858 } gawk_api_t;
859
860 #ifndef GAWK /* these are not for the gawk code itself! */
861 /*
862 * Use these if you want to define "global" variables named api
863 * and ext_id to make the code a little easier to read.
864 * See the sample boilerplate code, below.
865 */
866 #define do_lint (api->do_flags[gawk_do_lint])
867 #define do_traditional (api->do_flags[gawk_do_traditional])
868 #define do_profile (api->do_flags[gawk_do_profile])
869 #define do_sandbox (api->do_flags[gawk_do_sandbox])
870 #define do_debug (api->do_flags[gawk_do_debug])
871 #define do_mpfr (api->do_flags[gawk_do_mpfr])
872
873 #define get_argument(count, wanted, result) \
874 (api->api_get_argument(ext_id, count, wanted, result))
875 #define set_argument(count, new_array) \
876 (api->api_set_argument(ext_id, count, new_array))
877
878 #define fatal api->api_fatal
879 #define nonfatal api->api_nonfatal
880 #define warning api->api_warning
881 #define lintwarn api->api_lintwarn
882
883 #define register_input_parser(parser) (api->api_register_input_parser(ext_id, parser))
884 #define register_output_wrapper(wrapper) (api->api_register_output_wrapper(ext_id, wrapper))
885 #define register_two_way_processor(processor) \
886 (api->api_register_two_way_processor(ext_id, processor))
887
888 #define update_ERRNO_int(e) (api->api_update_ERRNO_int(ext_id, e))
889 #define update_ERRNO_string(str) \
890 (api->api_update_ERRNO_string(ext_id, str))
891 #define unset_ERRNO() (api->api_unset_ERRNO(ext_id))
892
893 #define add_ext_func(ns, func) (api->api_add_ext_func(ext_id, ns, func))
894 #define awk_atexit(funcp, arg0) (api->api_awk_atexit(ext_id, funcp, arg0))
895
896 #define sym_lookup(name, wanted, result) \
897 sym_lookup_ns("", name, wanted, result)
898 #define sym_update(name, value) \
899 sym_update_ns("", name, value)
900
901 #define sym_lookup_ns(name_space, name, wanted, result) \
902 (api->api_sym_lookup(ext_id, name_space, name, wanted, result))
903 #define sym_update_ns(name_space, name, value) \
904 (api->api_sym_update(ext_id, name_space, name, value))
905
906 #define sym_lookup_scalar(scalar_cookie, wanted, result) \
907 (api->api_sym_lookup_scalar(ext_id, scalar_cookie, wanted, result))
908 #define sym_update_scalar(scalar_cookie, value) \
909 (api->api_sym_update_scalar)(ext_id, scalar_cookie, value)
910
911 #define get_array_element(array, index, wanted, result) \
912 (api->api_get_array_element(ext_id, array, index, wanted, result))
913
914 #define set_array_element(array, index, value) \
915 (api->api_set_array_element(ext_id, array, index, value))
916
917 #define set_array_element_by_elem(array, elem) \
918 (api->api_set_array_element(ext_id, array, & (elem)->index, & (elem)->value))
919
920 #define del_array_element(array, index) \
921 (api->api_del_array_element(ext_id, array, index))
922
923 #define get_element_count(array, count_p) \
924 (api->api_get_element_count(ext_id, array, count_p))
925
926 #define create_array() (api->api_create_array(ext_id))
927
928 #define destroy_array(array) (api->api_destroy_array(ext_id, array))
929
930 #define clear_array(array) (api->api_clear_array(ext_id, array))
931
932 #define flatten_array_typed(array, data, index_type, value_type) \
933 (api->api_flatten_array_typed(ext_id, array, data, index_type, value_type))
934
935 #define flatten_array(array, data) \
936 flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED)
937
938 #define release_flattened_array(array, data) \
939 (api->api_release_flattened_array(ext_id, array, data))
940
941 #define gawk_malloc(size) (api->api_malloc(size))
942 #define gawk_calloc(nmemb, size) (api->api_calloc(nmemb, size))
943 #define gawk_realloc(ptr, size) (api->api_realloc(ptr, size))
944 #define gawk_free(ptr) (api->api_free(ptr))
945
946 #define create_value(value, result) \
947 (api->api_create_value(ext_id, value,result))
948
949 #define release_value(value) \
950 (api->api_release_value(ext_id, value))
951
952 #define get_file(name, namelen, filetype, fd, ibuf, obuf) \
953 (api->api_get_file(ext_id, name, namelen, filetype, fd, ibuf, obuf))
954
955 /* These two are obsolete and should not be used. */
956 #define get_mpfr_ptr() (api->api_get_mpfr(ext_id))
957 #define get_mpz_ptr() (api->api_get_mpz(ext_id))
958
959 #define register_ext_version(version) \
960 (api->api_register_ext_version(ext_id, version))
961
962 #define emalloc(pointer, type, size, message) \
963 do { \
964 if ((pointer = (type) gawk_malloc(size)) == 0) \
965 fatal(ext_id, "%s: malloc of %d bytes failed", message, size); \
966 } while(0)
967
968 #define ezalloc(pointer, type, size, message) \
969 do { \
970 if ((pointer = (type) gawk_calloc(1, size)) == 0) \
971 fatal(ext_id, "%s: calloc of %d bytes failed", message, size); \
972 } while(0)
973
974 #define erealloc(pointer, type, size, message) \
975 do { \
976 if ((pointer = (type) gawk_realloc(pointer, size)) == 0) \
977 fatal(ext_id, "%s: realloc of %d bytes failed", message, size); \
978 } while(0)
979
980 /* Constructor functions */
981
982 /* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */
983
984 static inline awk_value_t *
985 r_make_string_type(const gawk_api_t *api, /* needed for emalloc */
986 awk_ext_id_t ext_id, /* ditto */
987 const char *string,
988 size_t length,
989 awk_bool_t duplicate,
990 awk_value_t *result,
991 awk_valtype_t val_type)
992 {
993 char *cp = NULL;
994
995 memset(result, 0, sizeof(*result));
996
997 result->val_type = val_type;
998 result->str_value.len = length;
999
1000 if (duplicate) {
1001 emalloc(cp, char *, length + 1, "r_make_string");
1002 memcpy(cp, string, length);
1003 cp[length] = '\0';
1004 result->str_value.str = cp;
1005 } else {
1006 result->str_value.str = (char *) string;
1007 }
1008
1009 return result;
1010 }
1011
1012 /* r_make_string --- make a string value in result from the passed-in string */
1013
1014 static inline awk_value_t *
1015 r_make_string(const gawk_api_t *api, /* needed for emalloc */
1016 awk_ext_id_t ext_id, /* ditto */
1017 const char *string,
1018 size_t length,
1019 awk_bool_t duplicate,
1020 awk_value_t *result)
1021 {
1022 return r_make_string_type(api, ext_id, string, length, duplicate, result, AWK_STRING);
1023 }
1024
1025 #define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, awk_true, result)
1026 #define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, awk_false, result)
1027
1028 #define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_true, result, AWK_REGEX)
1029 #define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_false, result, AWK_REGEX)
1030
1031 /*
1032 * Note: The caller may not create a STRNUM, but it can create a string that is
1033 * flagged as user input that MAY be a STRNUM. Gawk will decide whether it's a
1034 * STRNUM or a string by checking whether the string is numeric.
1035 */
1036 #define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM)
1037 #define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM)
1038
1039 /* make_null_string --- make a null string value */
1040
1041 static inline awk_value_t *
1042 make_null_string(awk_value_t *result)
1043 {
1044 memset(result, 0, sizeof(*result));
1045 result->val_type = AWK_UNDEFINED;
1046
1047 return result;
1048 }
1049
1050 /* make_number --- make a number value in result */
1051
1052 static inline awk_value_t *
1053 make_number(double num, awk_value_t *result)
1054 {
1055 result->val_type = AWK_NUMBER;
1056 result->num_value = num;
1057 result->num_type = AWK_NUMBER_TYPE_DOUBLE;
1058 return result;
1059 }
1060
1061 /*
1062 * make_number_mpz --- make an mpz number value in result.
1063 * The mpz_ptr must be from a call to get_mpz_ptr.
1064 */
1065
1066 static inline awk_value_t *
1067 make_number_mpz(void *mpz_ptr, awk_value_t *result)
1068 {
1069 result->val_type = AWK_NUMBER;
1070 result->num_type = AWK_NUMBER_TYPE_MPZ;
1071 result->num_ptr = mpz_ptr;
1072 return result;
1073 }
1074
1075 /*
1076 * make_number_mpfr --- make an mpfr number value in result.
1077 * The mpfr_ptr must be from a call to get_mpfr_ptr.
1078 */
1079
1080 static inline awk_value_t *
1081 make_number_mpfr(void *mpfr_ptr, awk_value_t *result)
1082 {
1083 result->val_type = AWK_NUMBER;
1084 result->num_type = AWK_NUMBER_TYPE_MPFR;
1085 result->num_ptr = mpfr_ptr;
1086 return result;
1087 }
1088
1089 /* make_bool --- make a bool value in result */
1090
1091 static inline awk_value_t *
1092 make_bool(awk_bool_t boolval, awk_value_t *result)
1093 {
1094 result->val_type = AWK_BOOL;
1095 result->bool_value = boolval;
1096 return result;
1097 }
1098
1099
1100 /*
1101 * Each extension must define a function with this prototype:
1102 *
1103 * int dl_load(gawk_api_t *api_p, awk_ext_id_t id)
1104 *
1105 * The return value should be zero on failure and non-zero on success.
1106 *
1107 * For the macros to work, the function should save api_p in a global
1108 * variable named 'api' and save id in a global variable named 'ext_id'.
1109 * In addition, a global function pointer named 'init_func' should be
1110 * defined and set to either NULL or an initialization function that
1111 * returns non-zero on success and zero upon failure.
1112 */
1113
1114 extern int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id);
1115
1116 #if 0
1117 /* Boilerplate code: */
1118 int plugin_is_GPL_compatible;
1119
1120 static gawk_api_t *const api;
1121 static awk_ext_id_t ext_id;
1122 static const char *ext_version = NULL; /* or ... = "some string" */
1123
1124 static awk_ext_func_t func_table[] = {
1125 { "name", do_name, 1 },
1126 /* ... */
1127 };
1128
1129 /* EITHER: */
1130
1131 static awk_bool_t (*init_func)(void) = NULL;
1132
1133 /* OR: */
1134
1135 static awk_bool_t
1136 init_my_extension(void)
1137 {
1138 ...
1139 }
1140
1141 static awk_bool_t (*init_func)(void) = init_my_extension;
1142
1143 dl_load_func(func_table, some_name, "name_space_in_quotes")
1144 #endif
1145
1146 #define dl_load_func(func_table, extension, name_space) \
1147 int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id) \
1148 { \
1149 size_t i, j; \
1150 int errors = 0; \
1151 \
1152 api = api_p; \
1153 ext_id = (void **) id; \
1154 \
1155 if (api->major_version != GAWK_API_MAJOR_VERSION \
1156 || api->minor_version < GAWK_API_MINOR_VERSION) { \
1157 fprintf(stderr, #extension ": version mismatch with gawk!\n"); \
1158 fprintf(stderr, "\tmy version (API %d.%d), gawk version (API %d.%d)\n", \
1159 GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION, \
1160 api->major_version, api->minor_version); \
1161 exit(1); \
1162 } \
1163 \
1164 check_mpfr_version(extension); \
1165 \
1166 /* load functions */ \
1167 for (i = 0, j = sizeof(func_table) / sizeof(func_table[0]); i < j; i++) { \
1168 if (func_table[i].name == NULL) \
1169 break; \
1170 if (! add_ext_func(name_space, & func_table[i])) { \
1171 warning(ext_id, #extension ": could not add %s", \
1172 func_table[i].name); \
1173 errors++; \
1174 } \
1175 } \
1176 \
1177 if (init_func != NULL) { \
1178 if (! init_func()) { \
1179 warning(ext_id, #extension ": initialization function failed"); \
1180 errors++; \
1181 } \
1182 } \
1183 \
1184 if (ext_version != NULL) \
1185 register_ext_version(ext_version); \
1186 \
1187 return (errors == 0); \
1188 }
1189
1190 #if defined __GNU_MP_VERSION && defined MPFR_VERSION_MAJOR
1191 #define check_mpfr_version(extension) do { \
1192 if (api->gmp_major_version != __GNU_MP_VERSION \
1193 || api->gmp_minor_version < __GNU_MP_VERSION_MINOR) { \
1194 fprintf(stderr, #extension ": GMP version mismatch with gawk!\n"); \
1195 fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1196 __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, \
1197 api->gmp_major_version, api->gmp_minor_version); \
1198 exit(1); \
1199 } \
1200 if (api->mpfr_major_version != MPFR_VERSION_MAJOR \
1201 || api->mpfr_minor_version < MPFR_VERSION_MINOR) { \
1202 fprintf(stderr, #extension ": MPFR version mismatch with gawk!\n"); \
1203 fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1204 MPFR_VERSION_MAJOR, MPFR_VERSION_MINOR, \
1205 api->mpfr_major_version, api->mpfr_minor_version); \
1206 exit(1); \
1207 } \
1208 } while (0)
1209 #else
1210 #define check_mpfr_version(extension) /* nothing */
1211 #endif
1212
1213 #endif /* GAWK */
1214
1215 #ifdef __cplusplus
1216 }
1217 #endif /* C++ */
1218
1219 #endif /* _GAWK_API_H */