(root)/
coreutils-9.4/
src/
set-fields.c
       1  /* set-fields.c -- common functions for parsing field list
       2     Copyright (C) 2015-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Extracted from cut.c by Assaf Gordon */
      18  
      19  #include <config.h>
      20  
      21  #include "system.h"
      22  #include "quote.h"
      23  #include "set-fields.h"
      24  
      25  /* Array of `struct field_range_pair' holding all the finite ranges. */
      26  struct field_range_pair *frp;
      27  
      28  /* Number of finite ranges specified by the user. */
      29  size_t n_frp;
      30  
      31  /* Number of `struct field_range_pair's allocated. */
      32  static size_t n_frp_allocated;
      33  
      34  #define FATAL_ERROR(Message)                                            \
      35    do                                                                    \
      36      {                                                                   \
      37        error (0, 0, (Message));                                          \
      38        usage (EXIT_FAILURE);                                             \
      39      }                                                                   \
      40    while (0)
      41  
      42  /* Append LOW, HIGH to the list RP of range pairs, allocating additional
      43     space if necessary.  Update global variable N_FRP.  When allocating,
      44     update global variable N_FRP_ALLOCATED.  */
      45  static void
      46  add_range_pair (uintmax_t lo, uintmax_t hi)
      47  {
      48    if (n_frp == n_frp_allocated)
      49      frp = X2NREALLOC (frp, &n_frp_allocated);
      50    frp[n_frp].lo = lo;
      51    frp[n_frp].hi = hi;
      52    ++n_frp;
      53  }
      54  
      55  
      56  /* Comparison function for qsort to order the list of
      57     struct field_range_pairs.  */
      58  static int
      59  compare_ranges (const void *a, const void *b)
      60  {
      61    struct field_range_pair const *ap = a, *bp = b;
      62    return (ap->lo > bp->lo) - (ap->lo < bp->lo);
      63  }
      64  
      65  /* Reallocate Range Pair entries, with corresponding
      66     entries outside the range of each specified entry.  */
      67  
      68  static void
      69  complement_rp (void)
      70  {
      71    struct field_range_pair *c = frp;
      72    size_t n = n_frp;
      73  
      74    frp = nullptr;
      75    n_frp = 0;
      76    n_frp_allocated = 0;
      77  
      78    if (c[0].lo > 1)
      79      add_range_pair (1, c[0].lo - 1);
      80  
      81    for (size_t i = 1; i < n; ++i)
      82      {
      83        if (c[i - 1].hi + 1 == c[i].lo)
      84          continue;
      85  
      86        add_range_pair (c[i - 1].hi + 1, c[i].lo - 1);
      87      }
      88  
      89    if (c[n - 1].hi < UINTMAX_MAX)
      90      add_range_pair (c[n - 1].hi + 1, UINTMAX_MAX);
      91  
      92    free (c);
      93  }
      94  
      95  /* Given the list of field or byte range specifications FIELDSTR,
      96     allocate and initialize the FRP array. FIELDSTR should
      97     be composed of one or more numbers or ranges of numbers, separated
      98     by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
      99     'n-' means 'n' through end of line.
     100     n=0 and n>=UINTMAX_MAX values will trigger an error.
     101  
     102     if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
     103     (otherwise a single dash triggers an error).
     104  
     105     if SETFLD_COMPLEMENT option is used, the specified field list
     106     is complemented (e.g. '1-3' will result in fields '4-').
     107  
     108     if SETFLD_ERRMSG_USE_POS option is used, error messages
     109     will say 'position' (or 'byte/character positions')
     110     instead of fields (used with cut -b/-c).
     111  
     112     The function terminates on failure.
     113  
     114     Upon return, the FRP array is initialized to contain
     115     a non-overlapping, increasing list of field ranges.
     116  
     117     N_FRP holds the number of field ranges in the FRP array.
     118  
     119     The first field is stored as 1 (zero is not used).
     120     An open-ended range (i.e., until the last field of the input line)
     121     is indicated with hi = UINTMAX_MAX.
     122  
     123     A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last
     124     field range pair.
     125  
     126     Examples:
     127     given '1-2,4', frp = [ { .lo = 1,           .hi = 2 },
     128                            { .lo = 4,           .hi = 4 },
     129                            { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
     130  
     131     given '3-',    frp = [ { .lo = 3,           .hi = UINTMAX_MAX },
     132                            { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
     133  */
     134  void
     135  set_fields (char const *fieldstr, unsigned int options)
     136  {
     137    uintmax_t initial = 1;	/* Value of first number in a range.  */
     138    uintmax_t value = 0;		/* If nonzero, a number being accumulated.  */
     139    bool lhs_specified = false;
     140    bool rhs_specified = false;
     141    bool dash_found = false;	/* True if a '-' is found in this field.  */
     142  
     143    bool in_digits = false;
     144  
     145    /* Collect and store in RP the range end points. */
     146  
     147    /* Special case: '--field=-' means all fields, emulate '--field=1-' . */
     148    if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-"))
     149      {
     150        value = 1;
     151        lhs_specified = true;
     152        dash_found = true;
     153        fieldstr++;
     154      }
     155  
     156    while (true)
     157      {
     158        if (*fieldstr == '-')
     159          {
     160            in_digits = false;
     161            /* Starting a range. */
     162            if (dash_found)
     163              FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
     164                           ? _("invalid byte or character range")
     165                           : _("invalid field range"));
     166  
     167            dash_found = true;
     168            fieldstr++;
     169  
     170            if (lhs_specified && !value)
     171              FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
     172                           ? _("byte/character positions are numbered from 1")
     173                           : _("fields are numbered from 1"));
     174  
     175            initial = (lhs_specified ? value : 1);
     176            value = 0;
     177          }
     178        else if (*fieldstr == ','
     179                 || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
     180          {
     181            in_digits = false;
     182            /* Ending the string, or this field/byte sublist. */
     183            if (dash_found)
     184              {
     185                dash_found = false;
     186  
     187                if (!lhs_specified && !rhs_specified)
     188                  {
     189                    /* if a lone dash is allowed, emulate '1-' for all fields */
     190                    if (options & SETFLD_ALLOW_DASH)
     191                      initial = 1;
     192                    else
     193                      FATAL_ERROR (_("invalid range with no endpoint: -"));
     194                  }
     195  
     196                /* A range.  Possibilities: -n, m-n, n-.
     197                   In any case, 'initial' contains the start of the range. */
     198                if (!rhs_specified)
     199                  {
     200                    /* 'n-'.  From 'initial' to end of line. */
     201                    add_range_pair (initial, UINTMAX_MAX);
     202                  }
     203                else
     204                  {
     205                    /* 'm-n' or '-n' (1-n). */
     206                    if (value < initial)
     207                      FATAL_ERROR (_("invalid decreasing range"));
     208  
     209                    add_range_pair (initial, value);
     210                  }
     211                value = 0;
     212              }
     213            else
     214              {
     215                /* A simple field number, not a range. */
     216                if (value == 0)
     217                  FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS)
     218                               ? _("byte/character positions are numbered from 1")
     219                               : _("fields are numbered from 1"));
     220  
     221                add_range_pair (value, value);
     222                value = 0;
     223              }
     224  
     225            if (*fieldstr == '\0')
     226              break;
     227  
     228            fieldstr++;
     229            lhs_specified = false;
     230            rhs_specified = false;
     231          }
     232        else if (ISDIGIT (*fieldstr))
     233          {
     234            /* Record beginning of digit string, in case we have to
     235               complain about it.  */
     236            static char const *num_start;
     237            if (!in_digits || !num_start)
     238              num_start = fieldstr;
     239            in_digits = true;
     240  
     241            if (dash_found)
     242              rhs_specified = 1;
     243            else
     244              lhs_specified = 1;
     245  
     246            /* Detect overflow.  */
     247            if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t)
     248                || value == UINTMAX_MAX)
     249              {
     250                /* In case the user specified -c$(echo 2^64|bc),22,
     251                   complain only about the first number.  */
     252                /* Determine the length of the offending number.  */
     253                size_t len = strspn (num_start, "0123456789");
     254                char *bad_num = ximemdup0 (num_start, len);
     255                error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
     256                             ?_("byte/character offset %s is too large")
     257                             :_("field number %s is too large"),
     258                             quote (bad_num));
     259                free (bad_num);
     260                usage (EXIT_FAILURE);
     261              }
     262  
     263            fieldstr++;
     264          }
     265        else
     266          {
     267            error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
     268                         ?_("invalid byte/character position %s")
     269                         :_("invalid field value %s"),
     270                         quote (fieldstr));
     271            usage (EXIT_FAILURE);
     272          }
     273      }
     274  
     275    if (!n_frp)
     276      FATAL_ERROR ((options&SETFLD_ERRMSG_USE_POS)
     277                   ? _("missing list of byte/character positions")
     278                   : _("missing list of fields"));
     279  
     280    qsort (frp, n_frp, sizeof (frp[0]), compare_ranges);
     281  
     282    /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
     283    for (size_t i = 0; i < n_frp; ++i)
     284      {
     285        for (size_t j = i + 1; j < n_frp; ++j)
     286          {
     287            if (frp[j].lo <= frp[i].hi)
     288              {
     289                frp[i].hi = MAX (frp[j].hi, frp[i].hi);
     290                memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp);
     291                n_frp--;
     292                j--;
     293              }
     294            else
     295              break;
     296          }
     297      }
     298  
     299    if (options & SETFLD_COMPLEMENT)
     300      complement_rp ();
     301  
     302    /* After merging, reallocate RP so we release memory to the system.
     303       Also add a sentinel at the end of RP, to avoid out of bounds access
     304       and for performance reasons.  */
     305    ++n_frp;
     306    frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
     307    frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX;
     308  }