1 /*
2 * readdir.c --- Provide an input parser to read directories
3 *
4 * Arnold Robbins
5 * arnold@skeeve.com
6 * Written 7/2012
7 *
8 * Andrew Schorr and Arnold Robbins: further fixes 8/2012.
9 * Simplified 11/2012.
10 * Improved 3/2019.
11 */
12
13 /*
14 * Copyright (C) 2012-2014, 2017, 2018, 2019, 2023,
15 * the Free Software Foundation, Inc.
16 *
17 * This file is part of GAWK, the GNU implementation of the
18 * AWK Programming Language.
19 *
20 * GAWK is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * GAWK is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
33 */
34
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38
39 #define _BSD_SOURCE
40 #include <stdio.h>
41 #include <errno.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45
46 #include <sys/types.h>
47 #include <sys/stat.h>
48
49 #ifdef HAVE_LIMITS_H
50 #include <limits.h>
51 #endif
52
53 #ifdef HAVE_DIRENT_H
54 #include <dirent.h>
55 #else
56 #error Cannot compile the readdir extension on this system!
57 #endif
58
59 #ifdef __MINGW32__
60 #define WIN32_LEAN_AND_MEAN
61 #include <windows.h>
62 #endif
63
64 #include "gawkapi.h"
65
66 #include "gawkdirfd.h"
67
68 #include "gettext.h"
69 #define _(msgid) gettext(msgid)
70 #define N_(msgid) msgid
71
72 #ifndef PATH_MAX
73 #define PATH_MAX 1024 /* a good guess */
74 #endif
75
76 static const gawk_api_t *api; /* for convenience macros to work */
77 static awk_ext_id_t ext_id;
78 static const char *ext_version = "readdir extension: version 3.0";
79
80 static awk_bool_t init_readdir(void);
81 static awk_bool_t (*init_func)(void) = init_readdir;
82
83 int plugin_is_GPL_compatible;
84
85 /* data type for the opaque pointer: */
86
87 typedef struct open_directory {
88 DIR *dp;
89 char *buf;
90 union {
91 awk_fieldwidth_info_t fw;
92 char buf[awk_fieldwidth_info_size(3)];
93 } u;
94 } open_directory_t;
95 #define fw u.fw
96
97 /* ftype --- return type of file as a single character string */
98
99 static const char *
100 ftype(struct dirent *entry, const char *dirname)
101 {
102 #ifdef DT_BLK
103 (void) dirname; /* silence warnings */
104 switch (entry->d_type) {
105 case DT_BLK: return "b";
106 case DT_CHR: return "c";
107 case DT_DIR: return "d";
108 case DT_FIFO: return "p";
109 case DT_LNK: return "l";
110 case DT_REG: return "f";
111 case DT_SOCK: return "s";
112 default:
113 case DT_UNKNOWN: break; // JFS returns 'u', so fall through to stat
114 }
115 #endif
116 char fname[PATH_MAX];
117 struct stat sbuf;
118
119 strcpy(fname, dirname);
120 strcat(fname, "/");
121 strcat(fname, entry->d_name);
122 if (stat(fname, &sbuf) == 0) {
123 if (S_ISBLK(sbuf.st_mode))
124 return "b";
125 if (S_ISCHR(sbuf.st_mode))
126 return "c";
127 if (S_ISDIR(sbuf.st_mode))
128 return "d";
129 if (S_ISFIFO(sbuf.st_mode))
130 return "p";
131 if (S_ISREG(sbuf.st_mode))
132 return "f";
133 #ifdef S_ISLNK
134 if (S_ISLNK(sbuf.st_mode))
135 return "l";
136 #endif
137 #ifdef S_ISSOCK
138 if (S_ISSOCK(sbuf.st_mode))
139 return "s";
140 #endif
141 }
142 return "u";
143 }
144
145 /* get_inode --- get the inode of a file */
146
147 static long long
148 get_inode(struct dirent *entry, const char *dirname)
149 {
150 #ifdef __MINGW32__
151 char fname[PATH_MAX];
152 HANDLE fh;
153 BY_HANDLE_FILE_INFORMATION info;
154
155 sprintf(fname, "%s\\%s", dirname, entry->d_name);
156 fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING,
157 FILE_FLAG_BACKUP_SEMANTICS, NULL);
158 if (fh == INVALID_HANDLE_VALUE)
159 return 0;
160 if (GetFileInformationByHandle(fh, &info)) {
161 long long inode = info.nFileIndexHigh;
162
163 inode <<= 32;
164 inode += info.nFileIndexLow;
165 return inode;
166 }
167 return 0;
168 #else
169 (void) dirname; /* silence warnings */
170 return entry->d_ino;
171 #endif
172 }
173
174 /* dir_get_record --- get one record at a time out of a directory */
175
176 static int
177 dir_get_record(char **out, awk_input_buf_t *iobuf, int *errcode,
178 char **rt_start, size_t *rt_len,
179 const awk_fieldwidth_info_t **field_width)
180 {
181 DIR *dp;
182 struct dirent *dirent;
183 int len, flen;
184 open_directory_t *the_dir;
185 const char *ftstr;
186 unsigned long long ino;
187
188 /*
189 * The caller sets *errcode to 0, so we should set it only if an
190 * error occurs.
191 */
192
193 if (out == NULL || iobuf == NULL || iobuf->opaque == NULL)
194 return EOF;
195
196 the_dir = (open_directory_t *) iobuf->opaque;
197 dp = the_dir->dp;
198
199 /*
200 * Initialize errno, since readdir does not set it to zero on EOF.
201 */
202 errno = 0;
203 dirent = readdir(dp);
204 if (dirent == NULL) {
205 *errcode = errno; /* in case there was an error */
206 return EOF;
207 }
208
209 ino = get_inode(dirent, iobuf->name);
210
211 #if __MINGW32__
212 len = sprintf(the_dir->buf, "%I64u", ino);
213 #else
214 len = sprintf(the_dir->buf, "%llu", ino);
215 #endif
216 the_dir->fw.fields[0].len = len;
217 len += (flen = sprintf(the_dir->buf + len, "/%s", dirent->d_name));
218 the_dir->fw.fields[1].len = flen-1;
219
220 ftstr = ftype(dirent, iobuf->name);
221 len += (flen = sprintf(the_dir->buf + len, "/%s", ftstr));
222 the_dir->fw.fields[2].len = flen-1;
223
224 *out = the_dir->buf;
225
226 *rt_start = NULL;
227 *rt_len = 0; /* set RT to "" */
228 if (field_width)
229 *field_width = & the_dir->fw;
230 return len;
231 }
232
233 /* dir_close --- close up when done */
234
235 static void
236 dir_close(awk_input_buf_t *iobuf)
237 {
238 open_directory_t *the_dir;
239
240 if (iobuf == NULL || iobuf->opaque == NULL)
241 return;
242
243 the_dir = (open_directory_t *) iobuf->opaque;
244
245 closedir(the_dir->dp);
246 gawk_free(the_dir->buf);
247 gawk_free(the_dir);
248
249 iobuf->fd = -1;
250 }
251
252 /* dir_can_take_file --- return true if we want the file */
253
254 static awk_bool_t
255 dir_can_take_file(const awk_input_buf_t *iobuf)
256 {
257 if (iobuf == NULL)
258 return awk_false;
259
260 return (iobuf->fd != INVALID_HANDLE || S_ISDIR(iobuf->sbuf.st_mode));
261 }
262
263 /*
264 * dir_take_control_of --- set up input parser.
265 * We can assume that dir_can_take_file just returned true,
266 * and no state has changed since then.
267 */
268
269 static awk_bool_t
270 dir_take_control_of(awk_input_buf_t *iobuf)
271 {
272 DIR *dp;
273 open_directory_t *the_dir;
274 size_t size;
275
276 errno = 0;
277 #ifdef HAVE_FDOPENDIR
278 dp = fdopendir(iobuf->fd);
279 #else
280 dp = opendir(iobuf->name);
281 if (dp != NULL)
282 iobuf->fd = dirfd(dp);
283 #endif
284 if (dp == NULL) {
285 warning(ext_id, _("dir_take_control_of: opendir/fdopendir failed: %s"),
286 strerror(errno));
287 update_ERRNO_int(errno);
288 return awk_false;
289 }
290
291 emalloc(the_dir, open_directory_t *, sizeof(open_directory_t), "dir_take_control_of");
292 the_dir->dp = dp;
293 /* pre-populate the field_width struct with constant values: */
294 the_dir->fw.use_chars = awk_false;
295 the_dir->fw.nf = 3;
296 the_dir->fw.fields[0].skip = 0; /* no leading space */
297 the_dir->fw.fields[1].skip = 1; /* single '/' separator */
298 the_dir->fw.fields[2].skip = 1; /* single '/' separator */
299 size = sizeof(struct dirent) + 21 /* max digits in inode */ + 2 /* slashes */;
300 emalloc(the_dir->buf, char *, size, "dir_take_control_of");
301
302 iobuf->opaque = the_dir;
303 iobuf->get_record = dir_get_record;
304 iobuf->close_func = dir_close;
305
306 return awk_true;
307 }
308
309 static awk_input_parser_t readdir_parser = {
310 "readdir",
311 dir_can_take_file,
312 dir_take_control_of,
313 NULL
314 };
315
316 #ifdef TEST_DUPLICATE
317 static awk_input_parser_t readdir_parser2 = {
318 "readdir2",
319 dir_can_take_file,
320 dir_take_control_of,
321 NULL
322 };
323 #endif
324
325 /* init_readdir --- set things ups */
326
327 static awk_bool_t
328 init_readdir()
329 {
330 register_input_parser(& readdir_parser);
331 #ifdef TEST_DUPLICATE
332 register_input_parser(& readdir_parser2);
333 #endif
334
335 return awk_true;
336 }
337
338 static awk_ext_func_t func_table[] = {
339 { NULL, NULL, 0, 0, awk_false, NULL }
340 };
341
342 /* define the dl_load function using the boilerplate macro */
343
344 dl_load_func(func_table, readdir, "")