(root)/
coreutils-9.4/
src/
wc.c
       1  /* wc - print the number of lines, words, and bytes in files
       2     Copyright (C) 1985-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Paul Rubin, phr@ocf.berkeley.edu
      18     and David MacKenzie, djm@gnu.ai.mit.edu. */
      19  
      20  #include <config.h>
      21  
      22  #include <stdckdint.h>
      23  #include <stdio.h>
      24  #include <getopt.h>
      25  #include <sys/types.h>
      26  #include <wchar.h>
      27  #include <wctype.h>
      28  
      29  #include "system.h"
      30  #include "assure.h"
      31  #include "argmatch.h"
      32  #include "argv-iter.h"
      33  #include "fadvise.h"
      34  #include "mbchar.h"
      35  #include "physmem.h"
      36  #include "readtokens0.h"
      37  #include "safe-read.h"
      38  #include "stat-size.h"
      39  #include "xbinary-io.h"
      40  
      41  #if !defined iswspace && !HAVE_ISWSPACE
      42  # define iswspace(wc) \
      43      ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
      44  #endif
      45  
      46  /* The official name of this program (e.g., no 'g' prefix).  */
      47  #define PROGRAM_NAME "wc"
      48  
      49  #define AUTHORS \
      50    proper_name ("Paul Rubin"), \
      51    proper_name ("David MacKenzie")
      52  
      53  /* Size of atomic reads. */
      54  #define BUFFER_SIZE (16 * 1024)
      55  
      56  #ifdef USE_AVX2_WC_LINECOUNT
      57  /* From wc_avx2.c */
      58  extern bool
      59  wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
      60                 uintmax_t *bytes_out);
      61  #endif
      62  
      63  static bool debug;
      64  
      65  /* Cumulative number of lines, words, chars and bytes in all files so far.
      66     max_line_length is the maximum over all files processed so far.  */
      67  static uintmax_t total_lines;
      68  static uintmax_t total_words;
      69  static uintmax_t total_chars;
      70  static uintmax_t total_bytes;
      71  static uintmax_t total_lines_overflow;
      72  static uintmax_t total_words_overflow;
      73  static uintmax_t total_chars_overflow;
      74  static uintmax_t total_bytes_overflow;
      75  static uintmax_t max_line_length;
      76  
      77  /* Which counts to print. */
      78  static bool print_lines, print_words, print_chars, print_bytes;
      79  static bool print_linelength;
      80  
      81  /* The print width of each count.  */
      82  static int number_width;
      83  
      84  /* True if we have ever read the standard input. */
      85  static bool have_read_stdin;
      86  
      87  /* Used to determine if file size can be determined without reading.  */
      88  static size_t page_size;
      89  
      90  /* Enable to _not_ treat non breaking space as a word separator.  */
      91  static bool posixly_correct;
      92  
      93  /* The result of calling fstat or stat on a file descriptor or file.  */
      94  struct fstatus
      95  {
      96    /* If positive, fstat or stat has not been called yet.  Otherwise,
      97       this is the value returned from fstat or stat.  */
      98    int failed;
      99  
     100    /* If FAILED is zero, this is the file's status.  */
     101    struct stat st;
     102  };
     103  
     104  /* For long options that have no equivalent short option, use a
     105     non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
     106  enum
     107  {
     108    DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
     109    FILES0_FROM_OPTION,
     110    TOTAL_OPTION,
     111  };
     112  
     113  static struct option const longopts[] =
     114  {
     115    {"bytes", no_argument, nullptr, 'c'},
     116    {"chars", no_argument, nullptr, 'm'},
     117    {"lines", no_argument, nullptr, 'l'},
     118    {"words", no_argument, nullptr, 'w'},
     119    {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
     120    {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
     121    {"max-line-length", no_argument, nullptr, 'L'},
     122    {"total", required_argument, nullptr, TOTAL_OPTION},
     123    {GETOPT_HELP_OPTION_DECL},
     124    {GETOPT_VERSION_OPTION_DECL},
     125    {nullptr, 0, nullptr, 0}
     126  };
     127  
     128  enum total_type
     129    {
     130      total_auto,         /* 0: default or --total=auto */
     131      total_always,       /* 1: --total=always */
     132      total_only,         /* 2: --total=only */
     133      total_never         /* 3: --total=never */
     134    };
     135  static char const *const total_args[] =
     136  {
     137    "auto", "always", "only", "never", nullptr
     138  };
     139  static enum total_type const total_types[] =
     140  {
     141    total_auto, total_always, total_only, total_never
     142  };
     143  ARGMATCH_VERIFY (total_args, total_types);
     144  static enum total_type total_mode = total_auto;
     145  
     146  #ifdef USE_AVX2_WC_LINECOUNT
     147  static bool
     148  avx2_supported (void)
     149  {
     150    bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
     151  
     152    if (debug)
     153      error (0, 0, (avx_enabled
     154                    ? _("using avx2 hardware support")
     155                    : _("avx2 support not detected")));
     156  
     157    return avx_enabled;
     158  }
     159  #endif
     160  
     161  void
     162  usage (int status)
     163  {
     164    if (status != EXIT_SUCCESS)
     165      emit_try_help ();
     166    else
     167      {
     168        printf (_("\
     169  Usage: %s [OPTION]... [FILE]...\n\
     170    or:  %s [OPTION]... --files0-from=F\n\
     171  "),
     172                program_name, program_name);
     173        fputs (_("\
     174  Print newline, word, and byte counts for each FILE, and a total line if\n\
     175  more than one FILE is specified.  A word is a non-zero-length sequence of\n\
     176  printable characters delimited by white space.\n\
     177  "), stdout);
     178  
     179        emit_stdin_note ();
     180  
     181        fputs (_("\
     182  \n\
     183  The options below may be used to select which counts are printed, always in\n\
     184  the following order: newline, word, character, byte, maximum line length.\n\
     185    -c, --bytes            print the byte counts\n\
     186    -m, --chars            print the character counts\n\
     187    -l, --lines            print the newline counts\n\
     188  "), stdout);
     189        fputs (_("\
     190        --files0-from=F    read input from the files specified by\n\
     191                             NUL-terminated names in file F;\n\
     192                             If F is - then read names from standard input\n\
     193    -L, --max-line-length  print the maximum display width\n\
     194    -w, --words            print the word counts\n\
     195  "), stdout);
     196        fputs (_("\
     197        --total=WHEN       when to print a line with total counts;\n\
     198                             WHEN can be: auto, always, only, never\n\
     199  "), stdout);
     200        fputs (HELP_OPTION_DESCRIPTION, stdout);
     201        fputs (VERSION_OPTION_DESCRIPTION, stdout);
     202        emit_ancillary_info (PROGRAM_NAME);
     203      }
     204    exit (status);
     205  }
     206  
     207  /* Return non zero if a non breaking space.  */
     208  ATTRIBUTE_PURE
     209  static int
     210  iswnbspace (wint_t wc)
     211  {
     212    return ! posixly_correct
     213           && (wc == 0x00A0 || wc == 0x2007
     214               || wc == 0x202F || wc == 0x2060);
     215  }
     216  
     217  static int
     218  isnbspace (int c)
     219  {
     220    return iswnbspace (btowc (c));
     221  }
     222  
     223  /* FILE is the name of the file (or null for standard input)
     224     associated with the specified counters.  */
     225  static void
     226  write_counts (uintmax_t lines,
     227                uintmax_t words,
     228                uintmax_t chars,
     229                uintmax_t bytes,
     230                uintmax_t linelength,
     231                char const *file)
     232  {
     233    static char const format_sp_int[] = " %*s";
     234    char const *format_int = format_sp_int + 1;
     235    char buf[INT_BUFSIZE_BOUND (uintmax_t)];
     236  
     237    if (print_lines)
     238      {
     239        printf (format_int, number_width, umaxtostr (lines, buf));
     240        format_int = format_sp_int;
     241      }
     242    if (print_words)
     243      {
     244        printf (format_int, number_width, umaxtostr (words, buf));
     245        format_int = format_sp_int;
     246      }
     247    if (print_chars)
     248      {
     249        printf (format_int, number_width, umaxtostr (chars, buf));
     250        format_int = format_sp_int;
     251      }
     252    if (print_bytes)
     253      {
     254        printf (format_int, number_width, umaxtostr (bytes, buf));
     255        format_int = format_sp_int;
     256      }
     257    if (print_linelength)
     258      {
     259        printf (format_int, number_width, umaxtostr (linelength, buf));
     260      }
     261    if (file)
     262      printf (" %s", strchr (file, '\n') ? quotef (file) : file);
     263    putchar ('\n');
     264  }
     265  
     266  static bool
     267  wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
     268  {
     269    size_t bytes_read;
     270    uintmax_t lines, bytes;
     271    char buf[BUFFER_SIZE + 1];
     272    bool long_lines = false;
     273  
     274    if (!lines_out || !bytes_out)
     275      {
     276        return false;
     277      }
     278  
     279    lines = bytes = 0;
     280  
     281    while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
     282      {
     283  
     284        if (bytes_read == SAFE_READ_ERROR)
     285          {
     286            error (0, errno, "%s", quotef (file));
     287            return false;
     288          }
     289  
     290        bytes += bytes_read;
     291  
     292        char *p = buf;
     293        char *end = buf + bytes_read;
     294        uintmax_t plines = lines;
     295  
     296        if (! long_lines)
     297          {
     298            /* Avoid function call overhead for shorter lines.  */
     299            while (p != end)
     300              lines += *p++ == '\n';
     301          }
     302        else
     303          {
     304            /* rawmemchr is more efficient with longer lines.  */
     305            *end = '\n';
     306            while ((p = rawmemchr (p, '\n')) < end)
     307              {
     308                ++p;
     309                ++lines;
     310              }
     311          }
     312  
     313        /* If the average line length in the block is >= 15, then use
     314            memchr for the next block, where system specific optimizations
     315            may outweigh function call overhead.
     316            FIXME: This line length was determined in 2015, on both
     317            x86_64 and ppc64, but it's worth re-evaluating in future with
     318            newer compilers, CPUs, or memchr() implementations etc.  */
     319        if (lines - plines <= bytes_read / 15)
     320          long_lines = true;
     321        else
     322          long_lines = false;
     323      }
     324  
     325    *bytes_out = bytes;
     326    *lines_out = lines;
     327  
     328    return true;
     329  }
     330  
     331  /* Count words.  FILE_X is the name of the file (or null for standard
     332     input) that is open on descriptor FD.  *FSTATUS is its status.
     333     CURRENT_POS is the current file offset if known, negative if unknown.
     334     Return true if successful.  */
     335  static bool
     336  wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
     337  {
     338    bool ok = true;
     339    char buf[BUFFER_SIZE + 1];
     340    size_t bytes_read;
     341    uintmax_t lines, words, chars, bytes, linelength;
     342    bool count_bytes, count_chars, count_complicated;
     343    char const *file = file_x ? file_x : _("standard input");
     344  
     345    lines = words = chars = bytes = linelength = 0;
     346  
     347    /* If in the current locale, chars are equivalent to bytes, we prefer
     348       counting bytes, because that's easier.  */
     349  #if MB_LEN_MAX > 1
     350    if (MB_CUR_MAX > 1)
     351      {
     352        count_bytes = print_bytes;
     353        count_chars = print_chars;
     354      }
     355    else
     356  #endif
     357      {
     358        count_bytes = print_bytes || print_chars;
     359        count_chars = false;
     360      }
     361    count_complicated = print_words || print_linelength;
     362  
     363    /* Advise the kernel of our access pattern only if we will read().  */
     364    if (!count_bytes || count_chars || print_lines || count_complicated)
     365      fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
     366  
     367    /* When counting only bytes, save some line- and word-counting
     368       overhead.  If FD is a 'regular' Unix file, using lseek is enough
     369       to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
     370       bytes at a time until EOF.  Note that the 'size' (number of bytes)
     371       that wc reports is smaller than stats.st_size when the file is not
     372       positioned at its beginning.  That's why the lseek calls below are
     373       necessary.  For example the command
     374       '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
     375       should make wc report '0' bytes.  */
     376  
     377    if (count_bytes && !count_chars && !print_lines && !count_complicated)
     378      {
     379        bool skip_read = false;
     380  
     381        if (0 < fstatus->failed)
     382          fstatus->failed = fstat (fd, &fstatus->st);
     383  
     384        /* For sized files, seek to one st_blksize before EOF rather than to EOF.
     385           This works better for files in proc-like file systems where
     386           the size is only approximate.  */
     387        if (! fstatus->failed && usable_st_size (&fstatus->st)
     388            && 0 <= fstatus->st.st_size)
     389          {
     390            off_t end_pos = fstatus->st.st_size;
     391            if (current_pos < 0)
     392              current_pos = lseek (fd, 0, SEEK_CUR);
     393  
     394            if (end_pos % page_size)
     395              {
     396                /* We only need special handling of /proc and /sys files etc.
     397                   when they're a multiple of PAGE_SIZE.  In the common case
     398                   for files with st_size not a multiple of PAGE_SIZE,
     399                   it's more efficient and accurate to use st_size.
     400  
     401                   Be careful here.  The current position may actually be
     402                   beyond the end of the file.  As in the example above.  */
     403  
     404                bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
     405                if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
     406                  skip_read = true;
     407                else
     408                  bytes = 0;
     409              }
     410            else
     411              {
     412                off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
     413                if (0 <= current_pos && current_pos < hi_pos
     414                    && 0 <= lseek (fd, hi_pos, SEEK_CUR))
     415                  bytes = hi_pos - current_pos;
     416              }
     417          }
     418  
     419        if (! skip_read)
     420          {
     421            fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
     422            while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
     423              {
     424                if (bytes_read == SAFE_READ_ERROR)
     425                  {
     426                    error (0, errno, "%s", quotef (file));
     427                    ok = false;
     428                    break;
     429                  }
     430                bytes += bytes_read;
     431              }
     432          }
     433      }
     434    else if (!count_chars && !count_complicated)
     435      {
     436  #ifdef USE_AVX2_WC_LINECOUNT
     437        static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
     438        if (!wc_lines_p)
     439          wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
     440  #else
     441        bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
     442          = wc_lines;
     443  #endif
     444  
     445        /* Use a separate loop when counting only lines or lines and bytes --
     446           but not chars or words.  */
     447        ok = wc_lines_p (file, fd, &lines, &bytes);
     448      }
     449  #if MB_LEN_MAX > 1
     450  # define SUPPORT_OLD_MBRTOWC 1
     451    else if (MB_CUR_MAX > 1)
     452      {
     453        bool in_word = false;
     454        uintmax_t linepos = 0;
     455        mbstate_t state = { 0, };
     456        bool in_shift = false;
     457  # if SUPPORT_OLD_MBRTOWC
     458        /* Back-up the state before each multibyte character conversion and
     459           move the last incomplete character of the buffer to the front
     460           of the buffer.  This is needed because we don't know whether
     461           the 'mbrtowc' function updates the state when it returns -2, --
     462           this is the ISO C 99 and glibc-2.2 behavior - or not - amended
     463           ANSI C, glibc-2.1 and Solaris 5.7 behavior.  We don't have an
     464           autoconf test for this, yet.  */
     465        size_t prev = 0; /* number of bytes carried over from previous round */
     466  # else
     467        const size_t prev = 0;
     468  # endif
     469  
     470        while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
     471          {
     472            char const *p;
     473  # if SUPPORT_OLD_MBRTOWC
     474            mbstate_t backup_state;
     475  # endif
     476            if (bytes_read == SAFE_READ_ERROR)
     477              {
     478                error (0, errno, "%s", quotef (file));
     479                ok = false;
     480                break;
     481              }
     482  
     483            bytes += bytes_read;
     484            p = buf;
     485            bytes_read += prev;
     486            do
     487              {
     488                wchar_t wide_char;
     489                size_t n;
     490                bool wide = true;
     491  
     492                if (!in_shift && is_basic (*p))
     493                  {
     494                    /* Handle most ASCII characters quickly, without calling
     495                       mbrtowc().  */
     496                    n = 1;
     497                    wide_char = *p;
     498                    wide = false;
     499                  }
     500                else
     501                  {
     502                    in_shift = true;
     503  # if SUPPORT_OLD_MBRTOWC
     504                    backup_state = state;
     505  # endif
     506                    n = mbrtowc (&wide_char, p, bytes_read, &state);
     507                    if (n == (size_t) -2)
     508                      {
     509  # if SUPPORT_OLD_MBRTOWC
     510                        state = backup_state;
     511  # endif
     512                        break;
     513                      }
     514                    if (n == (size_t) -1)
     515                      {
     516                        /* Remember that we read a byte, but don't complain
     517                           about the error.  Because of the decoding error,
     518                           this is a considered to be byte but not a
     519                           character (that is, chars is not incremented).  */
     520                        p++;
     521                        bytes_read--;
     522                        continue;
     523                      }
     524                    if (mbsinit (&state))
     525                      in_shift = false;
     526                    if (n == 0)
     527                      {
     528                        wide_char = 0;
     529                        n = 1;
     530                      }
     531                  }
     532  
     533                switch (wide_char)
     534                  {
     535                  case '\n':
     536                    lines++;
     537                    FALLTHROUGH;
     538                  case '\r':
     539                  case '\f':
     540                    if (linepos > linelength)
     541                      linelength = linepos;
     542                    linepos = 0;
     543                    goto mb_word_separator;
     544                  case '\t':
     545                    linepos += 8 - (linepos % 8);
     546                    goto mb_word_separator;
     547                  case ' ':
     548                    linepos++;
     549                    FALLTHROUGH;
     550                  case '\v':
     551                  mb_word_separator:
     552                    words += in_word;
     553                    in_word = false;
     554                    break;
     555                  default:
     556                    if (wide && iswprint (wide_char))
     557                      {
     558                        /* wcwidth can be expensive on OSX for example,
     559                           so avoid if not needed.  */
     560                        if (print_linelength)
     561                          {
     562                            int width = wcwidth (wide_char);
     563                            if (width > 0)
     564                              linepos += width;
     565                          }
     566                        if (iswspace (wide_char) || iswnbspace (wide_char))
     567                          goto mb_word_separator;
     568                        in_word = true;
     569                      }
     570                    else if (!wide && isprint (to_uchar (*p)))
     571                      {
     572                        linepos++;
     573                        if (isspace (to_uchar (*p)))
     574                          goto mb_word_separator;
     575                        in_word = true;
     576                      }
     577                    break;
     578                  }
     579  
     580                p += n;
     581                bytes_read -= n;
     582                chars++;
     583              }
     584            while (bytes_read > 0);
     585  
     586  # if SUPPORT_OLD_MBRTOWC
     587            if (bytes_read > 0)
     588              {
     589                if (bytes_read == BUFFER_SIZE)
     590                  {
     591                    /* Encountered a very long redundant shift sequence.  */
     592                    p++;
     593                    bytes_read--;
     594                  }
     595                memmove (buf, p, bytes_read);
     596              }
     597            prev = bytes_read;
     598  # endif
     599          }
     600        if (linepos > linelength)
     601          linelength = linepos;
     602        words += in_word;
     603      }
     604  #endif
     605    else
     606      {
     607        bool in_word = false;
     608        uintmax_t linepos = 0;
     609  
     610        while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
     611          {
     612            char const *p = buf;
     613            if (bytes_read == SAFE_READ_ERROR)
     614              {
     615                error (0, errno, "%s", quotef (file));
     616                ok = false;
     617                break;
     618              }
     619  
     620            bytes += bytes_read;
     621            do
     622              {
     623                switch (*p++)
     624                  {
     625                  case '\n':
     626                    lines++;
     627                    FALLTHROUGH;
     628                  case '\r':
     629                  case '\f':
     630                    if (linepos > linelength)
     631                      linelength = linepos;
     632                    linepos = 0;
     633                    goto word_separator;
     634                  case '\t':
     635                    linepos += 8 - (linepos % 8);
     636                    goto word_separator;
     637                  case ' ':
     638                    linepos++;
     639                    FALLTHROUGH;
     640                  case '\v':
     641                  word_separator:
     642                    words += in_word;
     643                    in_word = false;
     644                    break;
     645                  default:
     646                    if (isprint (to_uchar (p[-1])))
     647                      {
     648                        linepos++;
     649                        if (isspace (to_uchar (p[-1]))
     650                            || isnbspace (to_uchar (p[-1])))
     651                          goto word_separator;
     652                        in_word = true;
     653                      }
     654                    break;
     655                  }
     656              }
     657            while (--bytes_read);
     658          }
     659        if (linepos > linelength)
     660          linelength = linepos;
     661        words += in_word;
     662      }
     663  
     664    if (count_chars < print_chars)
     665      chars = bytes;
     666  
     667    if (total_mode != total_only)
     668      write_counts (lines, words, chars, bytes, linelength, file_x);
     669  
     670    if (ckd_add (&total_lines, total_lines, lines))
     671      total_lines_overflow = true;
     672    if (ckd_add (&total_words, total_words, words))
     673      total_words_overflow = true;
     674    if (ckd_add (&total_chars, total_chars, chars))
     675      total_chars_overflow = true;
     676    if (ckd_add (&total_bytes, total_bytes, bytes))
     677      total_bytes_overflow = true;
     678  
     679    if (linelength > max_line_length)
     680      max_line_length = linelength;
     681  
     682    return ok;
     683  }
     684  
     685  static bool
     686  wc_file (char const *file, struct fstatus *fstatus)
     687  {
     688    if (! file || STREQ (file, "-"))
     689      {
     690        have_read_stdin = true;
     691        xset_binary_mode (STDIN_FILENO, O_BINARY);
     692        return wc (STDIN_FILENO, file, fstatus, -1);
     693      }
     694    else
     695      {
     696        int fd = open (file, O_RDONLY | O_BINARY);
     697        if (fd == -1)
     698          {
     699            error (0, errno, "%s", quotef (file));
     700            return false;
     701          }
     702        else
     703          {
     704            bool ok = wc (fd, file, fstatus, 0);
     705            if (close (fd) != 0)
     706              {
     707                error (0, errno, "%s", quotef (file));
     708                return false;
     709              }
     710            return ok;
     711          }
     712      }
     713  }
     714  
     715  /* Return the file status for the NFILES files addressed by FILE.
     716     Optimize the case where only one number is printed, for just one
     717     file; in that case we can use a print width of 1, so we don't need
     718     to stat the file.  Handle the case of (nfiles == 0) in the same way;
     719     that happens when we don't know how long the list of file names will be.  */
     720  
     721  static struct fstatus *
     722  get_input_fstatus (size_t nfiles, char *const *file)
     723  {
     724    struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
     725  
     726    if (nfiles == 0
     727        || (nfiles == 1
     728            && ((print_lines + print_words + print_chars
     729                 + print_bytes + print_linelength)
     730                == 1)))
     731      fstatus[0].failed = 1;
     732    else
     733      {
     734        for (size_t i = 0; i < nfiles; i++)
     735          fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
     736                               ? fstat (STDIN_FILENO, &fstatus[i].st)
     737                               : stat (file[i], &fstatus[i].st));
     738      }
     739  
     740    return fstatus;
     741  }
     742  
     743  /* Return a print width suitable for the NFILES files whose status is
     744     recorded in FSTATUS.  Optimize the same special case that
     745     get_input_fstatus optimizes.  */
     746  
     747  ATTRIBUTE_PURE
     748  static int
     749  compute_number_width (size_t nfiles, struct fstatus const *fstatus)
     750  {
     751    int width = 1;
     752  
     753    if (0 < nfiles && fstatus[0].failed <= 0)
     754      {
     755        int minimum_width = 1;
     756        uintmax_t regular_total = 0;
     757  
     758        for (size_t i = 0; i < nfiles; i++)
     759          if (! fstatus[i].failed)
     760            {
     761              if (S_ISREG (fstatus[i].st.st_mode))
     762                regular_total += fstatus[i].st.st_size;
     763              else
     764                minimum_width = 7;
     765            }
     766  
     767        for (; 10 <= regular_total; regular_total /= 10)
     768          width++;
     769        if (width < minimum_width)
     770          width = minimum_width;
     771      }
     772  
     773    return width;
     774  }
     775  
     776  
     777  int
     778  main (int argc, char **argv)
     779  {
     780    bool ok;
     781    int optc;
     782    size_t nfiles;
     783    char **files;
     784    char *files_from = nullptr;
     785    struct fstatus *fstatus;
     786    struct Tokens tok;
     787  
     788    initialize_main (&argc, &argv);
     789    set_program_name (argv[0]);
     790    setlocale (LC_ALL, "");
     791    bindtextdomain (PACKAGE, LOCALEDIR);
     792    textdomain (PACKAGE);
     793  
     794    atexit (close_stdout);
     795  
     796    page_size = getpagesize ();
     797    /* Line buffer stdout to ensure lines are written atomically and immediately
     798       so that processes running in parallel do not intersperse their output.  */
     799    setvbuf (stdout, nullptr, _IOLBF, 0);
     800  
     801    posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
     802  
     803    print_lines = print_words = print_chars = print_bytes = false;
     804    print_linelength = false;
     805    total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
     806  
     807    while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
     808      switch (optc)
     809        {
     810        case 'c':
     811          print_bytes = true;
     812          break;
     813  
     814        case 'm':
     815          print_chars = true;
     816          break;
     817  
     818        case 'l':
     819          print_lines = true;
     820          break;
     821  
     822        case 'w':
     823          print_words = true;
     824          break;
     825  
     826        case 'L':
     827          print_linelength = true;
     828          break;
     829  
     830        case DEBUG_PROGRAM_OPTION:
     831          debug = true;
     832          break;
     833  
     834        case FILES0_FROM_OPTION:
     835          files_from = optarg;
     836          break;
     837  
     838        case TOTAL_OPTION:
     839          total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
     840          break;
     841  
     842        case_GETOPT_HELP_CHAR;
     843  
     844        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     845  
     846        default:
     847          usage (EXIT_FAILURE);
     848        }
     849  
     850    if (! (print_lines || print_words || print_chars || print_bytes
     851           || print_linelength))
     852      print_lines = print_words = print_bytes = true;
     853  
     854    bool read_tokens = false;
     855    struct argv_iterator *ai;
     856    if (files_from)
     857      {
     858        FILE *stream;
     859  
     860        /* When using --files0-from=F, you may not specify any files
     861           on the command-line.  */
     862        if (optind < argc)
     863          {
     864            error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
     865            fprintf (stderr, "%s\n",
     866                     _("file operands cannot be combined with --files0-from"));
     867            usage (EXIT_FAILURE);
     868          }
     869  
     870        if (STREQ (files_from, "-"))
     871          stream = stdin;
     872        else
     873          {
     874            stream = fopen (files_from, "r");
     875            if (stream == nullptr)
     876              error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
     877                     quoteaf (files_from));
     878          }
     879  
     880        /* Read the file list into RAM if we can detect its size and that
     881           size is reasonable.  Otherwise, we'll read a name at a time.  */
     882        struct stat st;
     883        if (fstat (fileno (stream), &st) == 0
     884            && S_ISREG (st.st_mode)
     885            && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
     886          {
     887            read_tokens = true;
     888            readtokens0_init (&tok);
     889            if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
     890              error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
     891                     quoteaf (files_from));
     892            files = tok.tok;
     893            nfiles = tok.n_tok;
     894            ai = argv_iter_init_argv (files);
     895          }
     896        else
     897          {
     898            files = nullptr;
     899            nfiles = 0;
     900            ai = argv_iter_init_stream (stream);
     901          }
     902      }
     903    else
     904      {
     905        static char *stdin_only[] = { nullptr };
     906        files = (optind < argc ? argv + optind : stdin_only);
     907        nfiles = (optind < argc ? argc - optind : 1);
     908        ai = argv_iter_init_argv (files);
     909      }
     910  
     911    if (!ai)
     912      xalloc_die ();
     913  
     914    fstatus = get_input_fstatus (nfiles, files);
     915    if (total_mode == total_only)
     916      number_width = 1;  /* No extra padding, since no alignment requirement.  */
     917    else
     918      number_width = compute_number_width (nfiles, fstatus);
     919  
     920    ok = true;
     921    for (int i = 0; /* */; i++)
     922      {
     923        bool skip_file = false;
     924        enum argv_iter_err ai_err;
     925        char *file_name = argv_iter (ai, &ai_err);
     926        if (!file_name)
     927          {
     928            switch (ai_err)
     929              {
     930              case AI_ERR_EOF:
     931                goto argv_iter_done;
     932              case AI_ERR_READ:
     933                error (0, errno, _("%s: read error"),
     934                       quotef (files_from));
     935                ok = false;
     936                goto argv_iter_done;
     937              case AI_ERR_MEM:
     938                xalloc_die ();
     939              default:
     940                affirm (!"unexpected error code from argv_iter");
     941              }
     942          }
     943        if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
     944          {
     945            /* Give a better diagnostic in an unusual case:
     946               printf - | wc --files0-from=- */
     947            error (0, 0, _("when reading file names from stdin, "
     948                           "no file name of %s allowed"),
     949                   quoteaf (file_name));
     950            skip_file = true;
     951          }
     952  
     953        if (!file_name[0])
     954          {
     955            /* Diagnose a zero-length file name.  When it's one
     956               among many, knowing the record number may help.
     957               FIXME: currently print the record number only with
     958               --files0-from=FILE.  Maybe do it for argv, too?  */
     959            if (files_from == nullptr)
     960              error (0, 0, "%s", _("invalid zero-length file name"));
     961            else
     962              {
     963                /* Using the standard 'filename:line-number:' prefix here is
     964                   not totally appropriate, since NUL is the separator, not NL,
     965                   but it might be better than nothing.  */
     966                unsigned long int file_number = argv_iter_n_args (ai);
     967                error (0, 0, "%s:%lu: %s", quotef (files_from),
     968                       file_number, _("invalid zero-length file name"));
     969              }
     970            skip_file = true;
     971          }
     972  
     973        if (skip_file)
     974          ok = false;
     975        else
     976          ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
     977  
     978        if (! nfiles)
     979          fstatus[0].failed = 1;
     980      }
     981   argv_iter_done:
     982  
     983    /* No arguments on the command line is fine.  That means read from stdin.
     984       However, no arguments on the --files0-from input stream is an error
     985       means don't read anything.  */
     986    if (ok && !files_from && argv_iter_n_args (ai) == 0)
     987      ok &= wc_file (nullptr, &fstatus[0]);
     988  
     989    if (read_tokens)
     990      readtokens0_free (&tok);
     991  
     992    if (total_mode != total_never
     993        && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
     994      {
     995        if (total_lines_overflow)
     996          {
     997            total_lines = UINTMAX_MAX;
     998            error (0, EOVERFLOW, _("total lines"));
     999            ok = false;
    1000          }
    1001        if (total_words_overflow)
    1002          {
    1003            total_words = UINTMAX_MAX;
    1004            error (0, EOVERFLOW, _("total words"));
    1005            ok = false;
    1006          }
    1007        if (total_chars_overflow)
    1008          {
    1009            total_chars = UINTMAX_MAX;
    1010            error (0, EOVERFLOW, _("total characters"));
    1011            ok = false;
    1012          }
    1013        if (total_bytes_overflow)
    1014          {
    1015            total_bytes = UINTMAX_MAX;
    1016            error (0, EOVERFLOW, _("total bytes"));
    1017            ok = false;
    1018          }
    1019  
    1020        write_counts (total_lines, total_words, total_chars, total_bytes,
    1021                      max_line_length,
    1022                      total_mode != total_only ? _("total") : nullptr);
    1023      }
    1024  
    1025    argv_iter_free (ai);
    1026  
    1027    free (fstatus);
    1028  
    1029    if (have_read_stdin && close (STDIN_FILENO) != 0)
    1030      error (EXIT_FAILURE, errno, "-");
    1031  
    1032    return ok ? EXIT_SUCCESS : EXIT_FAILURE;
    1033  }