1 /*
2 * readfile.c - Read an entire file into a string.
3 *
4 * Arnold Robbins
5 * Tue Apr 23 17:43:30 IDT 2002
6 * Revised per Peter Tillier
7 * Mon Jun 9 17:05:11 IDT 2003
8 * Revised for new dynamic function facilities
9 * Mon Jun 14 14:53:07 IDT 2004
10 * Revised for formal API May 2012
11 * Added input parser March 2014
12 */
13
14 /*
15 * Copyright (C) 2002, 2003, 2004, 2011, 2012, 2013, 2014, 2018, 2022,
16 * the Free Software Foundation, Inc.
17 *
18 * This file is part of GAWK, the GNU implementation of the
19 * AWK Programming Language.
20 *
21 * GAWK is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 3 of the License, or
24 * (at your option) any later version.
25 *
26 * GAWK is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
34 */
35
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #define _BSD_SOURCE
41
42 #include <stdio.h>
43 #include <assert.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49
50 #include <sys/types.h>
51 #include <sys/stat.h>
52
53 #include "gawkapi.h"
54
55 #include "gettext.h"
56 #define _(msgid) gettext(msgid)
57 #define N_(msgid) msgid
58
59 #ifndef O_BINARY
60 #define O_BINARY 0
61 #endif
62
63 static const gawk_api_t *api; /* for convenience macros to work */
64 static awk_ext_id_t ext_id;
65 static const char *ext_version = "readfile extension: version 2.0";
66 static awk_bool_t init_readfile();
67 static awk_bool_t (*init_func)(void) = init_readfile;
68
69 int plugin_is_GPL_compatible;
70
71 /* read_file_to_buffer --- handle the mechanics of reading the file */
72
73 static char *
74 read_file_to_buffer(int fd, const struct stat *sbuf)
75 {
76 char *text;
77
78 if ((sbuf->st_mode & S_IFMT) != S_IFREG) {
79 errno = EINVAL;
80 update_ERRNO_int(errno);
81 return NULL;
82 }
83
84 emalloc(text, char *, sbuf->st_size + 1, "do_readfile");
85
86 if (read(fd, text, sbuf->st_size) != sbuf->st_size) {
87 update_ERRNO_int(errno);
88 gawk_free(text);
89 return NULL;
90 }
91 text[sbuf->st_size] = '\0';
92 return text;
93 }
94
95 /* do_readfile --- read a file into memory */
96
97 static awk_value_t *
98 do_readfile(int nargs, awk_value_t *result, struct awk_ext_func *unused)
99 {
100 awk_value_t filename;
101 int ret;
102 struct stat sbuf;
103 char *text;
104 int fd;
105
106 assert(result != NULL);
107 make_null_string(result); /* default return value */
108
109 unset_ERRNO();
110
111 if (get_argument(0, AWK_STRING, &filename)) {
112 ret = stat(filename.str_value.str, & sbuf);
113 if (ret < 0) {
114 update_ERRNO_int(errno);
115 goto done;
116 }
117
118 if ((fd = open(filename.str_value.str, O_RDONLY|O_BINARY)) < 0) {
119 update_ERRNO_int(errno);
120 goto done;
121 }
122
123 text = read_file_to_buffer(fd, & sbuf);
124 if (text == NULL) {
125 close(fd);
126 goto done; /* ERRNO already updated */
127 }
128
129 close(fd);
130 make_malloced_string(text, sbuf.st_size, result);
131 goto done;
132 } else if (do_lint)
133 lintwarn(ext_id, _("readfile: called with wrong kind of argument"));
134
135 done:
136 /* Set the return value */
137 return result;
138 }
139
140 /* readfile_get_record --- read the whole file as one record */
141
142 static int
143 readfile_get_record(char **out, awk_input_buf_t *iobuf, int *errcode,
144 char **rt_start, size_t *rt_len,
145 const awk_fieldwidth_info_t **unused)
146 {
147 char *text;
148
149 /*
150 * The caller sets *errcode to 0, so we should set it only if an
151 * error occurs.
152 */
153
154 if (out == NULL || iobuf == NULL)
155 return EOF;
156
157 if (iobuf->opaque != NULL) {
158 /*
159 * Already read the whole file,
160 * free up stuff and return EOF
161 */
162 gawk_free(iobuf->opaque);
163 iobuf->opaque = NULL;
164 return EOF;
165 }
166
167 /* read file */
168 text = read_file_to_buffer(iobuf->fd, & iobuf->sbuf);
169 if (text == NULL)
170 return EOF;
171
172 /* set up the iobuf for next time */
173 iobuf->opaque = text;
174
175 /* set return values */
176 *rt_start = NULL;
177 *rt_len = 0;
178 *out = text;
179
180 /* return count */
181 return iobuf->sbuf.st_size;
182 }
183
184 /* readfile_can_take_file --- return true if we want the file */
185
186 static awk_bool_t
187 readfile_can_take_file(const awk_input_buf_t *iobuf)
188 {
189 awk_value_t array, index, value;
190
191 if (iobuf == NULL)
192 return awk_false;
193
194 /*
195 * This could fail if PROCINFO isn't referenced from
196 * the awk program. It's not a "can't happen" error.
197 */
198 if (! sym_lookup("PROCINFO", AWK_ARRAY, & array)) {
199 return awk_false;
200 }
201
202 (void) make_const_string("readfile", 8, & index);
203
204 if (! get_array_element(array.array_cookie, & index, AWK_UNDEFINED, & value)) {
205 return awk_false;
206 }
207
208 return awk_true;
209 }
210
211 /* readfile_take_control_of --- take over the file */
212
213 static awk_bool_t
214 readfile_take_control_of(awk_input_buf_t *iobuf)
215 {
216 if (iobuf == NULL)
217 return awk_false;
218
219 iobuf->get_record = readfile_get_record;
220 return awk_true;
221 }
222
223 static awk_input_parser_t readfile_parser = {
224 "readfile",
225 readfile_can_take_file,
226 readfile_take_control_of,
227 NULL
228 };
229
230 /* init_readfile --- set things up */
231
232 static awk_bool_t
233 init_readfile()
234 {
235 register_input_parser(& readfile_parser);
236
237 return awk_true;
238 }
239
240 static awk_ext_func_t func_table[] = {
241 { "readfile", do_readfile, 1, 1, awk_false, NULL },
242 };
243
244 /* define the dl_load function using the boilerplate macro */
245
246 dl_load_func(func_table, readfile, "")