(root)/
tar-1.35/
src/
transform.c
       1  /* This file is part of GNU tar.
       2     Copyright 2006-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software; you can redistribute it and/or modify it
       5     under the terms of the GNU General Public License as published by the
       6     Free Software Foundation; either version 3, or (at your option) any later
       7     version.
       8  
       9     This program is distributed in the hope that it will be useful, but
      10     WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
      12     Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License along
      15     with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16  
      17  #include <system.h>
      18  #include <regex.h>
      19  #include "common.h"
      20  
      21  enum transform_type
      22    {
      23      transform_first,
      24      transform_global
      25    };
      26  
      27  enum replace_segm_type
      28    {
      29      segm_literal,   /* Literal segment */
      30      segm_backref,   /* Back-reference segment */
      31      segm_case_ctl   /* Case control segment (GNU extension) */
      32    };
      33  
      34  enum case_ctl_type
      35    {
      36      ctl_stop,       /* Stop case conversion */
      37      ctl_upcase_next,/* Turn the next character to uppercase */
      38      ctl_locase_next,/* Turn the next character to lowercase */
      39      ctl_upcase,     /* Turn the replacement to uppercase until ctl_stop */
      40      ctl_locase      /* Turn the replacement to lowercase until ctl_stop */
      41    };
      42  
      43  struct replace_segm
      44  {
      45    struct replace_segm *next;
      46    enum replace_segm_type type;
      47    union
      48    {
      49      struct
      50      {
      51        char *ptr;
      52        size_t size;
      53      } literal;                /* type == segm_literal */
      54      size_t ref;               /* type == segm_backref */
      55      enum case_ctl_type ctl;   /* type == segm_case_ctl */
      56    } v;
      57  };
      58  
      59  struct transform
      60  {
      61    struct transform *next;
      62    enum transform_type transform_type;
      63    int flags;
      64    unsigned match_number;
      65    regex_t regex;
      66    /* Compiled replacement expression */
      67    struct replace_segm *repl_head, *repl_tail;
      68    size_t segm_count; /* Number of elements in the above list */
      69  };
      70  
      71  
      72  
      73  static int transform_flags = XFORM_ALL;
      74  static struct transform *transform_head, *transform_tail;
      75  
      76  static struct transform *
      77  new_transform (void)
      78  {
      79    struct transform *p = xzalloc (sizeof *p);
      80    if (transform_tail)
      81      transform_tail->next = p;
      82    else
      83      transform_head = p;
      84    transform_tail = p;
      85    return p;
      86  }
      87  
      88  static struct replace_segm *
      89  add_segment (struct transform *tf)
      90  {
      91    struct replace_segm *segm = xmalloc (sizeof *segm);
      92    segm->next = NULL;
      93    if (tf->repl_tail)
      94      tf->repl_tail->next = segm;
      95    else
      96      tf->repl_head = segm;
      97    tf->repl_tail = segm;
      98    tf->segm_count++;
      99    return segm;
     100  }
     101  
     102  static void
     103  add_literal_segment (struct transform *tf, const char *str, const char *end)
     104  {
     105    size_t len = end - str;
     106    if (len)
     107      {
     108        struct replace_segm *segm = add_segment (tf);
     109        segm->type = segm_literal;
     110        segm->v.literal.ptr = xmalloc (len + 1);
     111        memcpy (segm->v.literal.ptr, str, len);
     112        segm->v.literal.ptr[len] = 0;
     113        segm->v.literal.size = len;
     114      }
     115  }
     116  
     117  static void
     118  add_char_segment (struct transform *tf, int chr)
     119  {
     120    struct replace_segm *segm = add_segment (tf);
     121    segm->type = segm_literal;
     122    segm->v.literal.ptr = xmalloc (2);
     123    segm->v.literal.ptr[0] = chr;
     124    segm->v.literal.ptr[1] = 0;
     125    segm->v.literal.size = 1;
     126  }
     127  
     128  static void
     129  add_backref_segment (struct transform *tf, size_t ref)
     130  {
     131    struct replace_segm *segm = add_segment (tf);
     132    segm->type = segm_backref;
     133    segm->v.ref = ref;
     134  }
     135  
     136  static int
     137  parse_xform_flags (int *pflags, int c)
     138  {
     139    switch (c)
     140      {
     141      case 'r':
     142        *pflags |= XFORM_REGFILE;
     143        break;
     144  
     145      case 'R':
     146        *pflags &= ~XFORM_REGFILE;
     147        break;
     148  
     149      case 'h':
     150        *pflags |= XFORM_LINK;
     151        break;
     152  
     153      case 'H':
     154        *pflags &= ~XFORM_LINK;
     155        break;
     156  
     157      case 's':
     158        *pflags |= XFORM_SYMLINK;
     159        break;
     160  
     161      case 'S':
     162        *pflags &= ~XFORM_SYMLINK;
     163        break;
     164  
     165      default:
     166        return 1;
     167      }
     168    return 0;
     169  }
     170  
     171  static void
     172  add_case_ctl_segment (struct transform *tf, enum case_ctl_type ctl)
     173  {
     174    struct replace_segm *segm = add_segment (tf);
     175    segm->type = segm_case_ctl;
     176    segm->v.ctl = ctl;
     177  }
     178  
     179  static const char *
     180  parse_transform_expr (const char *expr)
     181  {
     182    int delim;
     183    int i, j, rc;
     184    char *str, *beg, *cur;
     185    const char *p;
     186    int cflags = 0;
     187    struct transform *tf = new_transform ();
     188  
     189    if (expr[0] != 's')
     190      {
     191        if (strncmp (expr, "flags=", 6) == 0)
     192  	{
     193  	  transform_flags = 0;
     194  	  for (expr += 6; *expr; expr++)
     195  	    {
     196  	      if (*expr == ';')
     197  		{
     198  		  expr++;
     199  		  break;
     200  		}
     201  	      if (parse_xform_flags (&transform_flags, *expr))
     202  		USAGE_ERROR ((0, 0, _("Unknown transform flag: %c"),
     203  			      *expr));
     204  	    }
     205  	  return expr;
     206  	}
     207        USAGE_ERROR ((0, 0, _("Invalid transform expression")));
     208      }
     209  
     210    delim = expr[1];
     211    if (!delim)
     212      USAGE_ERROR ((0, 0, _("Invalid transform expression")));
     213  
     214    /* Scan regular expression */
     215    for (i = 2; expr[i] && expr[i] != delim; i++)
     216      if (expr[i] == '\\' && expr[i+1])
     217        i++;
     218  
     219    if (expr[i] != delim)
     220      USAGE_ERROR ((0, 0, _("Invalid transform expression")));
     221  
     222    /* Scan replacement expression */
     223    for (j = i + 1; expr[j] && expr[j] != delim; j++)
     224      if (expr[j] == '\\' && expr[j+1])
     225        j++;
     226  
     227    if (expr[j] != delim)
     228      USAGE_ERROR ((0, 0, _("Invalid transform expression")));
     229  
     230    /* Check flags */
     231    tf->transform_type = transform_first;
     232    tf->flags = transform_flags;
     233    for (p = expr + j + 1; *p && *p != ';'; p++)
     234      switch (*p)
     235        {
     236        case 'g':
     237  	tf->transform_type = transform_global;
     238  	break;
     239  
     240        case 'i':
     241  	cflags |= REG_ICASE;
     242  	break;
     243  
     244        case 'x':
     245  	cflags |= REG_EXTENDED;
     246  	break;
     247  
     248        case '0': case '1': case '2': case '3': case '4':
     249        case '5': case '6': case '7': case '8': case '9':
     250  	tf->match_number = strtoul (p, (char**) &p, 0);
     251  	p--;
     252  	break;
     253  
     254        default:
     255  	if (parse_xform_flags (&tf->flags, *p))
     256  	  USAGE_ERROR ((0, 0, _("Unknown flag in transform expression: %c"),
     257  			*p));
     258        }
     259  
     260    if (*p == ';')
     261      p++;
     262  
     263    /* Extract and compile regex */
     264    str = xmalloc (i - 1);
     265    memcpy (str, expr + 2, i - 2);
     266    str[i - 2] = 0;
     267  
     268    rc = regcomp (&tf->regex, str, cflags);
     269  
     270    if (rc)
     271      {
     272        char errbuf[512];
     273        regerror (rc, &tf->regex, errbuf, sizeof (errbuf));
     274        USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
     275      }
     276  
     277    if (str[0] == '^' || (i > 2 && str[i - 3] == '$'))
     278      tf->transform_type = transform_first;
     279  
     280    free (str);
     281  
     282    /* Extract and compile replacement expr */
     283    i++;
     284    str = xmalloc (j - i + 1);
     285    memcpy (str, expr + i, j - i);
     286    str[j - i] = 0;
     287  
     288    for (cur = beg = str; *cur;)
     289      {
     290        if (*cur == '\\')
     291  	{
     292  	  size_t n;
     293  
     294  	  add_literal_segment (tf, beg, cur);
     295  	  switch (*++cur)
     296  	    {
     297  	    case '0': case '1': case '2': case '3': case '4':
     298  	    case '5': case '6': case '7': case '8': case '9':
     299  	      n = strtoul (cur, &cur, 10);
     300  	      if (n > tf->regex.re_nsub)
     301  		USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
     302  	      add_backref_segment (tf, n);
     303  	      break;
     304  
     305  	    case '\\':
     306  	      add_char_segment (tf, '\\');
     307  	      cur++;
     308  	      break;
     309  
     310  	    case 'a':
     311  	      add_char_segment (tf, '\a');
     312  	      cur++;
     313  	      break;
     314  
     315  	    case 'b':
     316  	      add_char_segment (tf, '\b');
     317  	      cur++;
     318  	      break;
     319  
     320  	    case 'f':
     321  	      add_char_segment (tf, '\f');
     322  	      cur++;
     323  	      break;
     324  
     325  	    case 'n':
     326  	      add_char_segment (tf, '\n');
     327  	      cur++;
     328  	      break;
     329  
     330  	    case 'r':
     331  	      add_char_segment (tf, '\r');
     332  	      cur++;
     333  	      break;
     334  
     335  	    case 't':
     336  	      add_char_segment (tf, '\t');
     337  	      cur++;
     338  	      break;
     339  
     340  	    case 'v':
     341  	      add_char_segment (tf, '\v');
     342  	      cur++;
     343  	      break;
     344  
     345  	    case '&':
     346  	      add_char_segment (tf, '&');
     347  	      cur++;
     348  	      break;
     349  
     350  	    case 'L':
     351  	      /* Turn the replacement to lowercase until a '\U' or '\E'
     352  		 is found, */
     353  	      add_case_ctl_segment (tf, ctl_locase);
     354  	      cur++;
     355  	      break;
     356  
     357  	    case 'l':
     358  	      /* Turn the next character to lowercase, */
     359  	      add_case_ctl_segment (tf, ctl_locase_next);
     360  	      cur++;
     361  	      break;
     362  
     363  	    case 'U':
     364  	      /* Turn the replacement to uppercase until a '\L' or '\E'
     365  		 is found, */
     366  	      add_case_ctl_segment (tf, ctl_upcase);
     367  	      cur++;
     368  	      break;
     369  
     370  	    case 'u':
     371  	      /* Turn the next character to uppercase, */
     372  	      add_case_ctl_segment (tf, ctl_upcase_next);
     373  	      cur++;
     374  	      break;
     375  
     376  	    case 'E':
     377  	      /* Stop case conversion started by '\L' or '\U'. */
     378  	      add_case_ctl_segment (tf, ctl_stop);
     379  	      cur++;
     380  	      break;
     381  
     382  	    default:
     383  	      if (*cur == delim)
     384  		add_char_segment (tf, delim);
     385  	      else
     386  		{
     387  		  char buf[2];
     388  		  buf[0] = '\\';
     389  		  buf[1] = *cur;
     390  		  add_literal_segment (tf, buf, buf + 2);
     391  		}
     392  	      cur++;
     393  	      break;
     394  	    }
     395  	  beg = cur;
     396  	}
     397        else if (*cur == '&')
     398  	{
     399  	  add_literal_segment (tf, beg, cur);
     400  	  add_backref_segment (tf, 0);
     401  	  beg = ++cur;
     402  	}
     403        else
     404  	cur++;
     405      }
     406    add_literal_segment (tf, beg, cur);
     407    free(str);
     408  
     409    return p;
     410  }
     411  
     412  void
     413  set_transform_expr (const char *expr)
     414  {
     415    while (*expr)
     416      expr = parse_transform_expr (expr);
     417  }
     418  
     419  /* Run case conversion specified by CASE_CTL on array PTR of SIZE
     420     characters. Returns pointer to statically allocated storage. */
     421  static char *
     422  run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
     423  {
     424    static char *case_ctl_buffer;
     425    static size_t case_ctl_bufsize;
     426    char *p;
     427  
     428    if (case_ctl_bufsize < size)
     429      {
     430        case_ctl_bufsize = size;
     431        case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
     432      }
     433    memcpy (case_ctl_buffer, ptr, size);
     434    switch (case_ctl)
     435      {
     436      case ctl_upcase_next:
     437        case_ctl_buffer[0] = toupper ((unsigned char) case_ctl_buffer[0]);
     438        break;
     439  
     440      case ctl_locase_next:
     441        case_ctl_buffer[0] = tolower ((unsigned char) case_ctl_buffer[0]);
     442        break;
     443  
     444      case ctl_upcase:
     445        for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
     446  	*p = toupper ((unsigned char) *p);
     447        break;
     448  
     449      case ctl_locase:
     450        for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
     451  	*p = tolower ((unsigned char) *p);
     452        break;
     453  
     454      case ctl_stop:
     455        break;
     456      }
     457    return case_ctl_buffer;
     458  }
     459  
     460  
     461  static struct obstack stk;
     462  static bool stk_init;
     463  
     464  static void
     465  _single_transform_name_to_obstack (struct transform *tf, char *input)
     466  {
     467    regmatch_t *rmp;
     468    int rc;
     469    size_t nmatches = 0;
     470    enum case_ctl_type case_ctl = ctl_stop,  /* Current case conversion op */
     471                       save_ctl = ctl_stop;  /* Saved case_ctl for \u and \l */
     472  
     473    /* Reset case conversion after a single-char operation */
     474  #define CASE_CTL_RESET()  if (case_ctl == ctl_upcase_next     \
     475  			      || case_ctl == ctl_locase_next) \
     476                              {                                 \
     477                                case_ctl = save_ctl;            \
     478                                save_ctl = ctl_stop;            \
     479  			    }
     480  
     481    rmp = xmalloc ((tf->regex.re_nsub + 1) * sizeof (*rmp));
     482  
     483    while (*input)
     484      {
     485        size_t disp;
     486        char *ptr;
     487  
     488        rc = regexec (&tf->regex, input, tf->regex.re_nsub + 1, rmp, 0);
     489  
     490        if (rc == 0)
     491  	{
     492  	  struct replace_segm *segm;
     493  
     494  	  disp = rmp[0].rm_eo;
     495  
     496  	  nmatches++;
     497  	  if (tf->match_number && nmatches < tf->match_number)
     498  	    {
     499  	      obstack_grow (&stk, input, disp);
     500  	      input += disp;
     501  	      continue;
     502  	    }
     503  
     504  	  if (rmp[0].rm_so)
     505  	    obstack_grow (&stk, input, rmp[0].rm_so);
     506  
     507  	  for (segm = tf->repl_head; segm; segm = segm->next)
     508  	    {
     509  	      switch (segm->type)
     510  		{
     511  		case segm_literal:    /* Literal segment */
     512  		  if (case_ctl == ctl_stop)
     513  		    ptr = segm->v.literal.ptr;
     514  		  else
     515  		    {
     516  		      ptr = run_case_conv (case_ctl,
     517  					   segm->v.literal.ptr,
     518  					   segm->v.literal.size);
     519  		      CASE_CTL_RESET();
     520  		    }
     521  		  obstack_grow (&stk, ptr, segm->v.literal.size);
     522  		  break;
     523  
     524  		case segm_backref:    /* Back-reference segment */
     525  		  if (rmp[segm->v.ref].rm_so != -1
     526  		      && rmp[segm->v.ref].rm_eo != -1)
     527  		    {
     528  		      size_t size = rmp[segm->v.ref].rm_eo
     529  			              - rmp[segm->v.ref].rm_so;
     530  		      ptr = input + rmp[segm->v.ref].rm_so;
     531  		      if (case_ctl != ctl_stop)
     532  			{
     533  			  ptr = run_case_conv (case_ctl, ptr, size);
     534  			  CASE_CTL_RESET();
     535  			}
     536  
     537  		      obstack_grow (&stk, ptr, size);
     538  		    }
     539  		  break;
     540  
     541  		case segm_case_ctl:
     542  		  switch (segm->v.ctl)
     543  		    {
     544  		    case ctl_upcase_next:
     545  		    case ctl_locase_next:
     546  		      switch (save_ctl)
     547  			{
     548  			case ctl_stop:
     549  			case ctl_upcase:
     550  			case ctl_locase:
     551  			  save_ctl = case_ctl;
     552  			default:
     553  			  break;
     554  			}
     555  		      FALLTHROUGH;
     556  
     557  		    case ctl_upcase:
     558  		    case ctl_locase:
     559  		    case ctl_stop:
     560  		      case_ctl = segm->v.ctl;
     561  		    }
     562  		}
     563  	    }
     564  	}
     565        else
     566  	{
     567  	  disp = strlen (input);
     568  	  obstack_grow (&stk, input, disp);
     569  	}
     570  
     571        input += disp;
     572  
     573        if (tf->transform_type == transform_first)
     574  	{
     575  	  obstack_grow (&stk, input, strlen (input));
     576  	  break;
     577  	}
     578      }
     579  
     580    obstack_1grow (&stk, 0);
     581    free (rmp);
     582  }
     583  
     584  static bool
     585  _transform_name_to_obstack (int flags, char *input, char **output)
     586  {
     587    struct transform *tf;
     588    bool alloced = false;
     589  
     590    if (!stk_init)
     591      {
     592        obstack_init (&stk);
     593        stk_init = true;
     594      }
     595  
     596    for (tf = transform_head; tf; tf = tf->next)
     597      {
     598        if (tf->flags & flags)
     599  	{
     600  	  _single_transform_name_to_obstack (tf, input);
     601  	  input = obstack_finish (&stk);
     602  	  alloced = true;
     603  	}
     604      }
     605    *output = input;
     606    return alloced;
     607  }
     608  
     609  bool
     610  transform_name_fp (char **pinput, int flags,
     611  		   char *(*fun)(char *, void *), void *dat)
     612  {
     613      char *str;
     614      bool ret = _transform_name_to_obstack (flags, *pinput, &str);
     615      if (ret)
     616        {
     617  	assign_string (pinput, fun ? fun (str, dat) : str);
     618  	obstack_free (&stk, str);
     619        }
     620      else if (fun)
     621        {
     622  	*pinput = NULL;
     623  	assign_string (pinput, fun (str, dat));
     624  	free (str);
     625  	ret = true;
     626        }
     627      return ret;
     628  }
     629  
     630  bool
     631  transform_name (char **pinput, int type)
     632  {
     633    return transform_name_fp (pinput, type, NULL, NULL);
     634  }
     635  
     636  bool
     637  transform_program_p (void)
     638  {
     639    return transform_head != NULL;
     640  }