(root)/
coreutils-9.4/
src/
paste.c
       1  /* paste - merge lines of files
       2     Copyright (C) 1997-2023 Free Software Foundation, Inc.
       3     Copyright (C) 1984 David M. Ihnat
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation, either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Written by David Ihnat.  */
      19  
      20  /* The list of valid escape sequences has been expanded over the Unix
      21     version, to include \b, \f, \r, and \v.
      22  
      23     POSIX changes, bug fixes, long-named options, and cleanup
      24     by David MacKenzie <djm@gnu.ai.mit.edu>.
      25  
      26     Options:
      27     --serial
      28     -s				Paste one file at a time rather than
      29                                  one line from each file.
      30     --delimiters=delim-list
      31     -d delim-list		Consecutively use the characters in
      32                                  DELIM-LIST instead of tab to separate
      33                                  merged lines.  When DELIM-LIST is exhausted,
      34                                  start again at its beginning.
      35     A FILE of '-' means standard input.
      36     If no FILEs are given, standard input is used. */
      37  
      38  #include <config.h>
      39  
      40  #include <stdio.h>
      41  #include <getopt.h>
      42  #include <sys/types.h>
      43  #include "system.h"
      44  #include "fadvise.h"
      45  
      46  /* The official name of this program (e.g., no 'g' prefix).  */
      47  #define PROGRAM_NAME "paste"
      48  
      49  #define AUTHORS \
      50    proper_name ("David M. Ihnat"), \
      51    proper_name ("David MacKenzie")
      52  
      53  /* Indicates that no delimiter should be added in the current position. */
      54  #define EMPTY_DELIM '\0'
      55  
      56  /* If nonzero, we have read standard input at some point. */
      57  static bool have_read_stdin;
      58  
      59  /* If nonzero, merge subsequent lines of each file rather than
      60     corresponding lines from each file in parallel. */
      61  static bool serial_merge;
      62  
      63  /* The delimiters between lines of input files (used cyclically). */
      64  static char *delims;
      65  
      66  /* A pointer to the character after the end of 'delims'. */
      67  static char const *delim_end;
      68  
      69  static unsigned char line_delim = '\n';
      70  
      71  static struct option const longopts[] =
      72  {
      73    {"serial", no_argument, nullptr, 's'},
      74    {"delimiters", required_argument, nullptr, 'd'},
      75    {"zero-terminated", no_argument, nullptr, 'z'},
      76    {GETOPT_HELP_OPTION_DECL},
      77    {GETOPT_VERSION_OPTION_DECL},
      78    {nullptr, 0, nullptr, 0}
      79  };
      80  
      81  /* Set globals delims and delim_end.  Copy STRPTR to DELIMS, converting
      82     backslash representations of special characters in STRPTR to their actual
      83     values. The set of possible backslash characters has been expanded beyond
      84     that recognized by the Unix version.
      85     Return 0 upon success.
      86     If the string ends in an odd number of backslashes, ignore the
      87     final backslash and return nonzero.  */
      88  
      89  static int
      90  collapse_escapes (char const *strptr)
      91  {
      92    char *strout = xstrdup (strptr);
      93    bool backslash_at_end = false;
      94  
      95    delims = strout;
      96  
      97    while (*strptr)
      98      {
      99        if (*strptr != '\\')	/* Is it an escape character? */
     100          *strout++ = *strptr++;	/* No, just transfer it. */
     101        else
     102          {
     103            switch (*++strptr)
     104              {
     105              case '0':
     106                *strout++ = EMPTY_DELIM;
     107                break;
     108  
     109              case 'b':
     110                *strout++ = '\b';
     111                break;
     112  
     113              case 'f':
     114                *strout++ = '\f';
     115                break;
     116  
     117              case 'n':
     118                *strout++ = '\n';
     119                break;
     120  
     121              case 'r':
     122                *strout++ = '\r';
     123                break;
     124  
     125              case 't':
     126                *strout++ = '\t';
     127                break;
     128  
     129              case 'v':
     130                *strout++ = '\v';
     131                break;
     132  
     133              case '\\':
     134                *strout++ = '\\';
     135                break;
     136  
     137              case '\0':
     138                backslash_at_end = true;
     139                goto done;
     140  
     141              default:
     142                *strout++ = *strptr;
     143                break;
     144              }
     145            strptr++;
     146          }
     147      }
     148  
     149   done:
     150  
     151    delim_end = strout;
     152    return backslash_at_end ? 1 : 0;
     153  }
     154  
     155  /* Output a single byte, reporting any write errors.  */
     156  
     157  static inline void
     158  xputchar (char c)
     159  {
     160    if (putchar (c) < 0)
     161      write_error ();
     162  }
     163  
     164  /* Perform column paste on the NFILES files named in FNAMPTR.
     165     Return true if successful, false if one or more files could not be
     166     opened or read. */
     167  
     168  static bool
     169  paste_parallel (size_t nfiles, char **fnamptr)
     170  {
     171    bool ok = true;
     172    /* If all files are just ready to be closed, or will be on this
     173       round, the string of delimiters must be preserved.
     174       delbuf[0] through delbuf[nfiles]
     175       store the delimiters for closed files. */
     176    char *delbuf = xmalloc (nfiles + 2);
     177  
     178    /* Streams open to the files to process; null if the corresponding
     179       stream is closed.  */
     180    FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
     181  
     182    /* Number of files still open to process.  */
     183    size_t files_open;
     184  
     185    /* True if any fopen got fd == STDIN_FILENO.  */
     186    bool opened_stdin = false;
     187  
     188    /* Attempt to open all files.  This could be expanded to an infinite
     189       number of files, but at the (considerable) expense of remembering
     190       each file and its current offset, then opening/reading/closing.  */
     191  
     192    for (files_open = 0; files_open < nfiles; ++files_open)
     193      {
     194        if (STREQ (fnamptr[files_open], "-"))
     195          {
     196            have_read_stdin = true;
     197            fileptr[files_open] = stdin;
     198          }
     199        else
     200          {
     201            fileptr[files_open] = fopen (fnamptr[files_open], "r");
     202            if (fileptr[files_open] == nullptr)
     203              error (EXIT_FAILURE, errno, "%s", quotef (fnamptr[files_open]));
     204            else if (fileno (fileptr[files_open]) == STDIN_FILENO)
     205              opened_stdin = true;
     206            fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
     207          }
     208      }
     209  
     210    if (opened_stdin && have_read_stdin)
     211      error (EXIT_FAILURE, 0, _("standard input is closed"));
     212  
     213    /* Read a line from each file and output it to stdout separated by a
     214       delimiter, until we go through the loop without successfully
     215       reading from any of the files. */
     216  
     217    while (files_open)
     218      {
     219        /* Set up for the next line. */
     220        bool somedone = false;
     221        char const *delimptr = delims;
     222        size_t delims_saved = 0;	/* Number of delims saved in 'delbuf'. */
     223  
     224        for (size_t i = 0; i < nfiles && files_open; i++)
     225          {
     226            int chr;			/* Input character. */
     227            int err;			/* Input errno value.  */
     228            bool sometodo = false;	/* Input chars to process.  */
     229  
     230            if (fileptr[i])
     231              {
     232                chr = getc (fileptr[i]);
     233                err = errno;
     234                if (chr != EOF && delims_saved)
     235                  {
     236                    if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
     237                      write_error ();
     238                    delims_saved = 0;
     239                  }
     240  
     241                while (chr != EOF)
     242                  {
     243                    sometodo = true;
     244                    if (chr == line_delim)
     245                      break;
     246                    xputchar (chr);
     247                    chr = getc (fileptr[i]);
     248                    err = errno;
     249                  }
     250              }
     251  
     252            if (! sometodo)
     253              {
     254                /* EOF, read error, or closed file.
     255                   If an EOF or error, close the file.  */
     256                if (fileptr[i])
     257                  {
     258                    if (!ferror (fileptr[i]))
     259                      err = 0;
     260                    if (fileptr[i] == stdin)
     261                      clearerr (fileptr[i]); /* Also clear EOF. */
     262                    else if (fclose (fileptr[i]) == EOF && !err)
     263                      err = errno;
     264                    if (err)
     265                      {
     266                        error (0, err, "%s", quotef (fnamptr[i]));
     267                        ok = false;
     268                      }
     269  
     270                    fileptr[i] = nullptr;
     271                    files_open--;
     272                  }
     273  
     274                if (i + 1 == nfiles)
     275                  {
     276                    /* End of this output line.
     277                       Is this the end of the whole thing? */
     278                    if (somedone)
     279                      {
     280                        /* No.  Some files were not closed for this line. */
     281                        if (delims_saved)
     282                          {
     283                            if (fwrite (delbuf, 1, delims_saved, stdout)
     284                                != delims_saved)
     285                              write_error ();
     286                            delims_saved = 0;
     287                          }
     288                        xputchar (line_delim);
     289                      }
     290                    continue;	/* Next read of files, or exit. */
     291                  }
     292                else
     293                  {
     294                    /* Closed file; add delimiter to 'delbuf'. */
     295                    if (*delimptr != EMPTY_DELIM)
     296                      delbuf[delims_saved++] = *delimptr;
     297                    if (++delimptr == delim_end)
     298                      delimptr = delims;
     299                  }
     300              }
     301            else
     302              {
     303                /* Some data read. */
     304                somedone = true;
     305  
     306                /* Except for last file, replace last newline with delim. */
     307                if (i + 1 != nfiles)
     308                  {
     309                    if (chr != line_delim && chr != EOF)
     310                      xputchar (chr);
     311                    if (*delimptr != EMPTY_DELIM)
     312                      xputchar (*delimptr);
     313                    if (++delimptr == delim_end)
     314                      delimptr = delims;
     315                  }
     316                else
     317                  {
     318                    /* If the last line of the last file lacks a newline,
     319                       print one anyhow.  POSIX requires this.  */
     320                    char c = (chr == EOF ? line_delim : chr);
     321                    xputchar (c);
     322                  }
     323              }
     324          }
     325      }
     326    free (fileptr);
     327    free (delbuf);
     328    return ok;
     329  }
     330  
     331  /* Perform serial paste on the NFILES files named in FNAMPTR.
     332     Return true if no errors, false if one or more files could not be
     333     opened or read. */
     334  
     335  static bool
     336  paste_serial (size_t nfiles, char **fnamptr)
     337  {
     338    bool ok = true;	/* false if open or read errors occur. */
     339    int charnew, charold; /* Current and previous char read. */
     340    char const *delimptr;	/* Current delimiter char. */
     341    FILE *fileptr;	/* Open for reading current file. */
     342  
     343    for (; nfiles; nfiles--, fnamptr++)
     344      {
     345        int saved_errno;
     346        bool is_stdin = STREQ (*fnamptr, "-");
     347        if (is_stdin)
     348          {
     349            have_read_stdin = true;
     350            fileptr = stdin;
     351          }
     352        else
     353          {
     354            fileptr = fopen (*fnamptr, "r");
     355            if (fileptr == nullptr)
     356              {
     357                error (0, errno, "%s", quotef (*fnamptr));
     358                ok = false;
     359                continue;
     360              }
     361            fadvise (fileptr, FADVISE_SEQUENTIAL);
     362          }
     363  
     364        delimptr = delims;	/* Set up for delimiter string. */
     365  
     366        charold = getc (fileptr);
     367        saved_errno = errno;
     368        if (charold != EOF)
     369          {
     370            /* 'charold' is set up.  Hit it!
     371               Keep reading characters, stashing them in 'charnew';
     372               output 'charold', converting to the appropriate delimiter
     373               character if needed.  After the EOF, output 'charold'
     374               if it's a newline; otherwise, output it and then a newline. */
     375  
     376            while ((charnew = getc (fileptr)) != EOF)
     377              {
     378                /* Process the old character. */
     379                if (charold == line_delim)
     380                  {
     381                    if (*delimptr != EMPTY_DELIM)
     382                      xputchar (*delimptr);
     383  
     384                    if (++delimptr == delim_end)
     385                      delimptr = delims;
     386                  }
     387                else
     388                  xputchar (charold);
     389  
     390                charold = charnew;
     391              }
     392            saved_errno = errno;
     393  
     394            /* Hit EOF.  Process that last character. */
     395            xputchar (charold);
     396          }
     397  
     398        if (charold != line_delim)
     399          xputchar (line_delim);
     400  
     401        if (!ferror (fileptr))
     402          saved_errno = 0;
     403        if (is_stdin)
     404          clearerr (fileptr);	/* Also clear EOF. */
     405        else if (fclose (fileptr) != 0 && !saved_errno)
     406          saved_errno = errno;
     407        if (saved_errno)
     408          {
     409            error (0, saved_errno, "%s", quotef (*fnamptr));
     410            ok = false;
     411          }
     412      }
     413    return ok;
     414  }
     415  
     416  void
     417  usage (int status)
     418  {
     419    if (status != EXIT_SUCCESS)
     420      emit_try_help ();
     421    else
     422      {
     423        printf (_("\
     424  Usage: %s [OPTION]... [FILE]...\n\
     425  "),
     426                program_name);
     427        fputs (_("\
     428  Write lines consisting of the sequentially corresponding lines from\n\
     429  each FILE, separated by TABs, to standard output.\n\
     430  "), stdout);
     431  
     432        emit_stdin_note ();
     433        emit_mandatory_arg_note ();
     434  
     435        fputs (_("\
     436    -d, --delimiters=LIST   reuse characters from LIST instead of TABs\n\
     437    -s, --serial            paste one file at a time instead of in parallel\n\
     438  "), stdout);
     439        fputs (_("\
     440    -z, --zero-terminated    line delimiter is NUL, not newline\n\
     441  "), stdout);
     442        fputs (HELP_OPTION_DESCRIPTION, stdout);
     443        fputs (VERSION_OPTION_DESCRIPTION, stdout);
     444        /* FIXME: add a couple of examples.  */
     445        emit_ancillary_info (PROGRAM_NAME);
     446      }
     447    exit (status);
     448  }
     449  
     450  int
     451  main (int argc, char **argv)
     452  {
     453    int optc;
     454    char const *delim_arg = "\t";
     455  
     456    initialize_main (&argc, &argv);
     457    set_program_name (argv[0]);
     458    setlocale (LC_ALL, "");
     459    bindtextdomain (PACKAGE, LOCALEDIR);
     460    textdomain (PACKAGE);
     461  
     462    atexit (close_stdout);
     463  
     464    have_read_stdin = false;
     465    serial_merge = false;
     466  
     467    while ((optc = getopt_long (argc, argv, "d:sz", longopts, nullptr)) != -1)
     468      {
     469        switch (optc)
     470          {
     471          case 'd':
     472            /* Delimiter character(s). */
     473            delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
     474            break;
     475  
     476          case 's':
     477            serial_merge = true;
     478            break;
     479  
     480          case 'z':
     481            line_delim = '\0';
     482            break;
     483  
     484          case_GETOPT_HELP_CHAR;
     485  
     486          case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     487  
     488          default:
     489            usage (EXIT_FAILURE);
     490          }
     491      }
     492  
     493    int nfiles = argc - optind;
     494    if (nfiles == 0)
     495      {
     496        argv[optind] = bad_cast ("-");
     497        nfiles++;
     498      }
     499  
     500    if (collapse_escapes (delim_arg))
     501      {
     502        /* Don't use the quote() quoting style, because that would double the
     503           number of displayed backslashes, making the diagnostic look bogus.  */
     504        error (EXIT_FAILURE, 0,
     505               _("delimiter list ends with an unescaped backslash: %s"),
     506               quotearg_n_style_colon (0, c_maybe_quoting_style, delim_arg));
     507      }
     508  
     509    bool ok = ((serial_merge ? paste_serial : paste_parallel)
     510               (nfiles, &argv[optind]));
     511  
     512    free (delims);
     513  
     514    if (have_read_stdin && fclose (stdin) == EOF)
     515      error (EXIT_FAILURE, errno, "-");
     516    return ok ? EXIT_SUCCESS : EXIT_FAILURE;
     517  }