(root)/
coreutils-9.4/
lib/
getndelim2.c
       1  /* getndelim2 - Read a line from a stream, stopping at one of 2 delimiters,
       2     with bounded memory allocation.
       3  
       4     Copyright (C) 1993, 1996-1998, 2000, 2003-2004, 2006, 2008-2023 Free
       5     Software Foundation, Inc.
       6  
       7     This file is free software: you can redistribute it and/or modify
       8     it under the terms of the GNU Lesser General Public License as
       9     published by the Free Software Foundation; either version 2.1 of the
      10     License, or (at your option) any later version.
      11  
      12     This file is distributed in the hope that it will be useful,
      13     but WITHOUT ANY WARRANTY; without even the implied warranty of
      14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15     GNU Lesser General Public License for more details.
      16  
      17     You should have received a copy of the GNU Lesser General Public License
      18     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      19  
      20  /* Originally written by Jan Brittenson, bson@gnu.ai.mit.edu.  */
      21  
      22  #include <config.h>
      23  
      24  #include "getndelim2.h"
      25  
      26  #include <stddef.h>
      27  #include <stdlib.h>
      28  #include <string.h>
      29  
      30  #if USE_UNLOCKED_IO
      31  # include "unlocked-io.h"
      32  #endif
      33  #if !HAVE_FLOCKFILE
      34  # undef flockfile
      35  # define flockfile(x) ((void) 0)
      36  #endif
      37  #if !HAVE_FUNLOCKFILE
      38  # undef funlockfile
      39  # define funlockfile(x) ((void) 0)
      40  #endif
      41  
      42  #include <limits.h>
      43  #include <stdint.h>
      44  
      45  #include "freadptr.h"
      46  #include "freadseek.h"
      47  #include "memchr2.h"
      48  
      49  /* Avoid false GCC warning "'c' may be used uninitialized".  */
      50  #if __GNUC__ + (__GNUC_MINOR__ >= 7) > 4
      51  # pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
      52  #endif
      53  
      54  /* The maximum value that getndelim2 can return without suffering from
      55     overflow problems, either internally (because of pointer
      56     subtraction overflow) or due to the API (because of ssize_t).  */
      57  #define GETNDELIM2_MAXIMUM (PTRDIFF_MAX < SSIZE_MAX ? PTRDIFF_MAX : SSIZE_MAX)
      58  
      59  /* Try to add at least this many bytes when extending the buffer.
      60     MIN_CHUNK must be no greater than GETNDELIM2_MAXIMUM.  */
      61  #define MIN_CHUNK 64
      62  
      63  ssize_t
      64  getndelim2 (char **lineptr, size_t *linesize, size_t offset, size_t nmax,
      65              int delim1, int delim2, FILE *stream)
      66  {
      67    size_t nbytes_avail;          /* Allocated but unused bytes in *LINEPTR.  */
      68    char *read_pos;               /* Where we're reading into *LINEPTR. */
      69    ssize_t bytes_stored = -1;
      70    char *ptr = *lineptr;
      71    size_t size = *linesize;
      72    bool found_delimiter;
      73  
      74    if (!ptr)
      75      {
      76        size = nmax < MIN_CHUNK ? nmax : MIN_CHUNK;
      77        ptr = malloc (size);
      78        if (!ptr)
      79          return -1;
      80      }
      81  
      82    if (size < offset)
      83      goto done;
      84  
      85    nbytes_avail = size - offset;
      86    read_pos = ptr + offset;
      87  
      88    if (nbytes_avail == 0 && nmax <= size)
      89      goto done;
      90  
      91    /* Normalize delimiters, since memchr2 doesn't handle EOF.  */
      92    if (delim1 == EOF)
      93      delim1 = delim2;
      94    else if (delim2 == EOF)
      95      delim2 = delim1;
      96  
      97    flockfile (stream);
      98  
      99    found_delimiter = false;
     100    do
     101      {
     102        /* Here always ptr + size == read_pos + nbytes_avail.
     103           Also nbytes_avail > 0 || size < nmax.  */
     104  
     105        int c;
     106        const char *buffer;
     107        size_t buffer_len;
     108  
     109        buffer = freadptr (stream, &buffer_len);
     110        if (buffer)
     111          {
     112            if (delim1 != EOF)
     113              {
     114                const char *end = memchr2 (buffer, delim1, delim2, buffer_len);
     115                if (end)
     116                  {
     117                    buffer_len = end - buffer + 1;
     118                    found_delimiter = true;
     119                  }
     120              }
     121          }
     122        else
     123          {
     124            c = getc (stream);
     125            if (c == EOF)
     126              {
     127                /* Return partial line, if any.  */
     128                if (read_pos == ptr)
     129                  goto unlock_done;
     130                else
     131                  break;
     132              }
     133            if (c == delim1 || c == delim2)
     134              found_delimiter = true;
     135            buffer_len = 1;
     136          }
     137  
     138        /* We always want at least one byte left in the buffer, since we
     139           always (unless we get an error while reading the first byte)
     140           NUL-terminate the line buffer.  */
     141  
     142        if (nbytes_avail < buffer_len + 1 && size < nmax)
     143          {
     144            /* Grow size proportionally, not linearly, to avoid O(n^2)
     145               running time.  */
     146            size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size;
     147            char *newptr;
     148  
     149            /* Increase newsize so that it becomes
     150               >= (read_pos - ptr) + buffer_len.  */
     151            if (newsize - (read_pos - ptr) < buffer_len + 1)
     152              newsize = (read_pos - ptr) + buffer_len + 1;
     153            /* Respect nmax.  This handles possible integer overflow.  */
     154            if (! (size < newsize && newsize <= nmax))
     155              newsize = nmax;
     156  
     157            if (GETNDELIM2_MAXIMUM < newsize - offset)
     158              {
     159                size_t newsizemax = offset + GETNDELIM2_MAXIMUM + 1;
     160                if (size == newsizemax)
     161                  goto unlock_done;
     162                newsize = newsizemax;
     163              }
     164  
     165            nbytes_avail = newsize - (read_pos - ptr);
     166            newptr = realloc (ptr, newsize);
     167            if (!newptr)
     168              goto unlock_done;
     169            ptr = newptr;
     170            size = newsize;
     171            read_pos = size - nbytes_avail + ptr;
     172          }
     173  
     174        /* Here, if size < nmax, nbytes_avail >= buffer_len + 1.
     175           If size == nmax, nbytes_avail > 0.  */
     176  
     177        if (1 < nbytes_avail)
     178          {
     179            size_t copy_len = nbytes_avail - 1;
     180            if (buffer_len < copy_len)
     181              copy_len = buffer_len;
     182            if (buffer)
     183              memcpy (read_pos, buffer, copy_len);
     184            else
     185              *read_pos = c;
     186            read_pos += copy_len;
     187            nbytes_avail -= copy_len;
     188          }
     189  
     190        /* Here still nbytes_avail > 0.  */
     191  
     192        if (buffer && freadseek (stream, buffer_len))
     193          goto unlock_done;
     194      }
     195    while (!found_delimiter);
     196  
     197    /* Done - NUL terminate and return the number of bytes read.
     198       At this point we know that nbytes_avail >= 1.  */
     199    *read_pos = '\0';
     200  
     201    bytes_stored = read_pos - (ptr + offset);
     202  
     203   unlock_done:
     204    funlockfile (stream);
     205  
     206   done:
     207    *lineptr = ptr;
     208    *linesize = size;
     209    return bytes_stored ? bytes_stored : -1;
     210  }