1  /*
       2   * readdir.c --- Provide an input parser to read directories
       3   *
       4   * Arnold Robbins
       5   * arnold@skeeve.com
       6   * Written 7/2012
       7   *
       8   * Andrew Schorr and Arnold Robbins: further fixes 8/2012.
       9   * Simplified 11/2012.
      10   * Improved 3/2019.
      11   */
      12  
      13  /*
      14   * Copyright (C) 2012-2014, 2018, 2019, 2021, 2023,
      15   * the Free Software Foundation, Inc.
      16   *
      17   * This file is part of GAWK, the GNU implementation of the
      18   * AWK Programming Language.
      19   *
      20   * GAWK is free software; you can redistribute it and/or modify
      21   * it under the terms of the GNU General Public License as published by
      22   * the Free Software Foundation; either version 3 of the License, or
      23   * (at your option) any later version.
      24   *
      25   * GAWK is distributed in the hope that it will be useful,
      26   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      27   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      28   * GNU General Public License for more details.
      29   *
      30   * You should have received a copy of the GNU General Public License
      31   * along with this program; if not, write to the Free Software
      32   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
      33   */
      34  
      35  #ifdef HAVE_CONFIG_H
      36  #include <config.h>
      37  #endif
      38  
      39  #define _BSD_SOURCE
      40  #include <stdio.h>
      41  #include <errno.h>
      42  #include <stdlib.h>
      43  #include <string.h>
      44  #include <unistd.h>
      45  
      46  #include <sys/types.h>
      47  #include <sys/stat.h>
      48  
      49  #ifdef HAVE_LIMITS_H
      50  #include <limits.h>
      51  #endif
      52  
      53  #ifdef HAVE_DIRENT_H
      54  #include <dirent.h>
      55  #else
      56  #error Cannot compile the readdir extension on this system!
      57  #endif
      58  
      59  #ifdef __MINGW32__
      60  #define WIN32_LEAN_AND_MEAN
      61  #include <windows.h>
      62  #endif
      63  
      64  #include "gawkapi.h"
      65  
      66  #include "gawkdirfd.h"
      67  
      68  #include "gettext.h"
      69  #define _(msgid)  gettext(msgid)
      70  #define N_(msgid) msgid
      71  
      72  #ifndef PATH_MAX
      73  #define PATH_MAX	1024	/* a good guess */
      74  #endif
      75  
      76  static const gawk_api_t *api;	/* for convenience macros to work */
      77  static awk_ext_id_t ext_id;
      78  static const char *ext_version = "readdir extension: version 2.0";
      79  
      80  static awk_bool_t init_readdir(void);
      81  static awk_bool_t (*init_func)(void) = init_readdir;
      82  
      83  int plugin_is_GPL_compatible;
      84  
      85  /* data type for the opaque pointer: */
      86  
      87  typedef struct open_directory {
      88  	DIR *dp;
      89  	char *buf;
      90  } open_directory_t;
      91  
      92  /* ftype --- return type of file as a single character string */
      93  
      94  static const char *
      95  ftype(struct dirent *entry, const char *dirname)
      96  {
      97  #ifdef DT_BLK
      98  	(void) dirname;		/* silence warnings */
      99  	switch (entry->d_type) {
     100  	case DT_BLK:	return "b";
     101  	case DT_CHR:	return "c";
     102  	case DT_DIR:	return "d";
     103  	case DT_FIFO:	return "p";
     104  	case DT_LNK:	return "l";
     105  	case DT_REG:	return "f";
     106  	case DT_SOCK:	return "s";
     107  	default:
     108  	case DT_UNKNOWN: break; // JFS returns 'u', so fall through and stat
     109  	}
     110  #endif
     111  	char fname[PATH_MAX];
     112  	struct stat sbuf;
     113  
     114  	strcpy(fname, dirname);
     115  	strcat(fname, "/");
     116  	strcat(fname, entry->d_name);
     117  	if (stat(fname, &sbuf) == 0) {
     118  		if (S_ISBLK(sbuf.st_mode))
     119  			return "b";
     120  		if (S_ISCHR(sbuf.st_mode))
     121  			return "c";
     122  		if (S_ISDIR(sbuf.st_mode))
     123  			return "d";
     124  		if (S_ISFIFO(sbuf.st_mode))
     125  			return "p";
     126  		if (S_ISREG(sbuf.st_mode))
     127  			return "f";
     128  #ifdef S_ISLNK
     129  		if (S_ISLNK(sbuf.st_mode))
     130  			return "l";
     131  #endif
     132  #ifdef S_ISSOCK
     133  		if (S_ISSOCK(sbuf.st_mode))
     134  			return "s";
     135  #endif
     136  	}
     137  	return "u";
     138  }
     139  
     140  /* get_inode --- get the inode of a file */
     141  
     142  static long long
     143  get_inode(struct dirent *entry, const char *dirname)
     144  {
     145  #ifdef __MINGW32__
     146  	char fname[PATH_MAX];
     147  	HANDLE fh;
     148  	BOOL ok;
     149  	BY_HANDLE_FILE_INFORMATION info;
     150  
     151  	sprintf(fname, "%s\\%s", dirname, entry->d_name);
     152  	fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING,
     153  			FILE_FLAG_BACKUP_SEMANTICS, NULL);
     154  	if (fh == INVALID_HANDLE_VALUE)
     155  		return 0;
     156  	ok = GetFileInformationByHandle(fh, &info);
     157  	CloseHandle(fh);
     158  	if (ok) {
     159  		long long inode = info.nFileIndexHigh;
     160  
     161  		inode <<= 32;
     162  		inode += info.nFileIndexLow;
     163  		return inode;
     164  	}
     165  	return 0;
     166  #else
     167  	(void) dirname;		/* silence warnings */
     168  	return entry->d_ino;
     169  #endif
     170  }
     171  
     172  /* dir_get_record --- get one record at a time out of a directory */
     173  
     174  static int
     175  dir_get_record(char **out, awk_input_buf_t *iobuf, int *errcode,
     176  		char **rt_start, size_t *rt_len,
     177  		const awk_fieldwidth_info_t **unused)
     178  {
     179  	DIR *dp;
     180  	struct dirent *dirent;
     181  	int len;
     182  	open_directory_t *the_dir;
     183  	const char *ftstr;
     184  	unsigned long long ino;
     185  
     186  	/*
     187  	 * The caller sets *errcode to 0, so we should set it only if an
     188  	 * error occurs.
     189  	 */
     190  
     191  	if (out == NULL || iobuf == NULL || iobuf->opaque == NULL)
     192  		return EOF;
     193  
     194  	the_dir = (open_directory_t *) iobuf->opaque;
     195  	dp = the_dir->dp;
     196  
     197  	/*
     198  	 * Initialize errno, since readdir does not set it to zero on EOF.
     199  	 */
     200  	errno = 0;
     201  	dirent = readdir(dp);
     202  	if (dirent == NULL) {
     203  		*errcode = errno;	/* in case there was an error */
     204  		return EOF;
     205  	}
     206  
     207  	ino = get_inode(dirent, iobuf->name);
     208  
     209  #if __MINGW32__
     210  	len = sprintf(the_dir->buf, "%I64u/%s", ino, dirent->d_name);
     211  #else
     212  	len = sprintf(the_dir->buf, "%llu/%s", ino, dirent->d_name);
     213  #endif
     214  
     215  	ftstr = ftype(dirent, iobuf->name);
     216  	len += sprintf(the_dir->buf + len, "/%s", ftstr);
     217  
     218  	*out = the_dir->buf;
     219  
     220  	*rt_start = NULL;
     221  	*rt_len = 0;	/* set RT to "" */
     222  	return len;
     223  }
     224  
     225  /* dir_close --- close up when done */
     226  
     227  static void
     228  dir_close(awk_input_buf_t *iobuf)
     229  {
     230  	open_directory_t *the_dir;
     231  
     232  	if (iobuf == NULL || iobuf->opaque == NULL)
     233  		return;
     234  
     235  	the_dir = (open_directory_t *) iobuf->opaque;
     236  
     237  	closedir(the_dir->dp);
     238  	gawk_free(the_dir->buf);
     239  	gawk_free(the_dir);
     240  
     241  	iobuf->fd = -1;
     242  }
     243  
     244  /* dir_can_take_file --- return true if we want the file */
     245  
     246  static awk_bool_t
     247  dir_can_take_file(const awk_input_buf_t *iobuf)
     248  {
     249  	if (iobuf == NULL)
     250  		return awk_false;
     251  
     252  	return (iobuf->fd != INVALID_HANDLE || S_ISDIR(iobuf->sbuf.st_mode));
     253  }
     254  
     255  /*
     256   * dir_take_control_of --- set up input parser.
     257   * We can assume that dir_can_take_file just returned true,
     258   * and no state has changed since then.
     259   */
     260  
     261  static awk_bool_t
     262  dir_take_control_of(awk_input_buf_t *iobuf)
     263  {
     264  	DIR *dp;
     265  	open_directory_t *the_dir;
     266  	size_t size;
     267  
     268  	errno = 0;
     269  #ifdef HAVE_FDOPENDIR
     270  	dp = fdopendir(iobuf->fd);
     271  #else
     272  	dp = opendir(iobuf->name);
     273  	if (dp != NULL) {
     274  		if (iobuf->fd != INVALID_HANDLE)
     275  			(void) close(iobuf->fd);
     276  		iobuf->fd = dirfd(dp);
     277  	}
     278  #endif
     279  	if (dp == NULL) {
     280  		warning(ext_id, _("dir_take_control_of: opendir/fdopendir failed: %s"),
     281  				strerror(errno));
     282  		update_ERRNO_int(errno);
     283  		return awk_false;
     284  	}
     285  
     286  	emalloc(the_dir, open_directory_t *, sizeof(open_directory_t), "dir_take_control_of");
     287  	the_dir->dp = dp;
     288  	size = sizeof(struct dirent) + 21 /* max digits in inode */ + 2 /* slashes */;
     289  	emalloc(the_dir->buf, char *, size, "dir_take_control_of");
     290  
     291  	iobuf->opaque = the_dir;
     292  	iobuf->get_record = dir_get_record;
     293  	iobuf->close_func = dir_close;
     294  
     295  	return awk_true;
     296  }
     297  
     298  static awk_input_parser_t readdir_parser = {
     299  	"readdir",
     300  	dir_can_take_file,
     301  	dir_take_control_of,
     302  	NULL
     303  };
     304  
     305  #ifdef TEST_DUPLICATE
     306  static awk_input_parser_t readdir_parser2 = {
     307  	"readdir2",
     308  	dir_can_take_file,
     309  	dir_take_control_of,
     310  	NULL
     311  };
     312  #endif
     313  
     314  /* init_readdir --- set things ups */
     315  
     316  static awk_bool_t
     317  init_readdir()
     318  {
     319  	register_input_parser(& readdir_parser);
     320  #ifdef TEST_DUPLICATE
     321  	register_input_parser(& readdir_parser2);
     322  #endif
     323  
     324  	return awk_true;
     325  }
     326  
     327  static awk_ext_func_t func_table[] = {
     328  	{ NULL, NULL, 0, 0, awk_false, NULL }
     329  };
     330  
     331  /* define the dl_load function using the boilerplate macro */
     332  
     333  dl_load_func(func_table, readdir, "")