(root)/
glib-2.79.0/
glib/
gshell.c
       1  /* gshell.c - Shell-related utilities
       2   *
       3   *  Copyright 2000 Red Hat, Inc.
       4   *  g_execvpe implementation based on GNU libc execvp:
       5   *   Copyright 1991, 92, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
       6   *
       7   * SPDX-License-Identifier: LGPL-2.1-or-later
       8   *
       9   * This library is free software; you can redistribute it and/or
      10   * modify it under the terms of the GNU Lesser General Public
      11   * License as published by the Free Software Foundation; either
      12   * version 2.1 of the License, or (at your option) any later version.
      13   *
      14   * This library is distributed in the hope that it will be useful,
      15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      17   * Lesser General Public License for more details.
      18   *
      19   * You should have received a copy of the GNU Lesser General Public License
      20   * along with this library; if not, see <http://www.gnu.org/licenses/>.
      21   */
      22  
      23  #include "config.h"
      24  
      25  #include <string.h>
      26  
      27  #include "gshell.h"
      28  
      29  #include "gslist.h"
      30  #include "gstrfuncs.h"
      31  #include "gstring.h"
      32  #include "gtestutils.h"
      33  #include "glibintl.h"
      34  #include "gthread.h"
      35  
      36  /**
      37   * G_SHELL_ERROR:
      38   *
      39   * Error domain for shell functions.
      40   *
      41   * Errors in this domain will be from the #GShellError enumeration.
      42   *
      43   * See #GError for information on error domains.
      44   **/
      45  
      46  /**
      47   * GShellError:
      48   * @G_SHELL_ERROR_BAD_QUOTING: Mismatched or otherwise mangled quoting.
      49   * @G_SHELL_ERROR_EMPTY_STRING: String to be parsed was empty.
      50   * @G_SHELL_ERROR_FAILED: Some other error.
      51   *
      52   * Error codes returned by shell functions.
      53   **/
      54  G_DEFINE_QUARK (g-shell-error-quark, g_shell_error)
      55  
      56  /* Single quotes preserve the literal string exactly. escape
      57   * sequences are not allowed; not even \' - if you want a '
      58   * in the quoted text, you have to do something like 'foo'\''bar'
      59   *
      60   * Double quotes allow $ ` " \ and newline to be escaped with backslash.
      61   * Otherwise double quotes preserve things literally.
      62   */
      63  
      64  static gboolean 
      65  unquote_string_inplace (gchar* str, gchar** end, GError** err)
      66  {
      67    gchar* dest;
      68    gchar* s;
      69    gchar quote_char;
      70    
      71    g_return_val_if_fail(end != NULL, FALSE);
      72    g_return_val_if_fail(err == NULL || *err == NULL, FALSE);
      73    g_return_val_if_fail(str != NULL, FALSE);
      74    
      75    dest = s = str;
      76  
      77    quote_char = *s;
      78    
      79    if (!(*s == '"' || *s == '\''))
      80      {
      81        g_set_error_literal (err,
      82                             G_SHELL_ERROR,
      83                             G_SHELL_ERROR_BAD_QUOTING,
      84                             _("Quoted text doesn’t begin with a quotation mark"));
      85        *end = str;
      86        return FALSE;
      87      }
      88  
      89    /* Skip the initial quote mark */
      90    ++s;
      91  
      92    if (quote_char == '"')
      93      {
      94        while (*s)
      95          {
      96            g_assert(s > dest); /* loop invariant */
      97        
      98            switch (*s)
      99              {
     100              case '"':
     101                /* End of the string, return now */
     102                *dest = '\0';
     103                ++s;
     104                *end = s;
     105                return TRUE;
     106                break;
     107  
     108              case '\\':
     109                /* Possible escaped quote or \ */
     110                ++s;
     111                switch (*s)
     112                  {
     113                  case '"':
     114                  case '\\':
     115                  case '`':
     116                  case '$':
     117                  case '\n':
     118                    *dest = *s;
     119                    ++s;
     120                    ++dest;
     121                    break;
     122  
     123                  default:
     124                    /* not an escaped char */
     125                    *dest = '\\';
     126                    ++dest;
     127                    /* ++s already done. */
     128                    break;
     129                  }
     130                break;
     131  
     132              default:
     133                *dest = *s;
     134                ++dest;
     135                ++s;
     136                break;
     137              }
     138  
     139            g_assert(s > dest); /* loop invariant */
     140          }
     141      }
     142    else
     143      {
     144        while (*s)
     145          {
     146            g_assert(s > dest); /* loop invariant */
     147            
     148            if (*s == '\'')
     149              {
     150                /* End of the string, return now */
     151                *dest = '\0';
     152                ++s;
     153                *end = s;
     154                return TRUE;
     155              }
     156            else
     157              {
     158                *dest = *s;
     159                ++dest;
     160                ++s;
     161              }
     162  
     163            g_assert(s > dest); /* loop invariant */
     164          }
     165      }
     166    
     167    /* If we reach here this means the close quote was never encountered */
     168  
     169    *dest = '\0';
     170    
     171    g_set_error_literal (err,
     172                         G_SHELL_ERROR,
     173                         G_SHELL_ERROR_BAD_QUOTING,
     174                         _("Unmatched quotation mark in command line or other shell-quoted text"));
     175    *end = s;
     176    return FALSE;
     177  }
     178  
     179  /**
     180   * g_shell_quote:
     181   * @unquoted_string: (type filename): a literal string
     182   * 
     183   * Quotes a string so that the shell (/bin/sh) will interpret the
     184   * quoted string to mean @unquoted_string.
     185   *
     186   * If you pass a filename to the shell, for example, you should first
     187   * quote it with this function.
     188   *
     189   * The return value must be freed with g_free().
     190   *
     191   * The quoting style used is undefined (single or double quotes may be
     192   * used).
     193   * 
     194   * Returns: (type filename) (transfer full): quoted string
     195   **/
     196  gchar*
     197  g_shell_quote (const gchar *unquoted_string)
     198  {
     199    /* We always use single quotes, because the algorithm is cheesier.
     200     * We could use double if we felt like it, that might be more
     201     * human-readable.
     202     */
     203  
     204    const gchar *p;
     205    GString *dest;
     206  
     207    g_return_val_if_fail (unquoted_string != NULL, NULL);
     208    
     209    dest = g_string_new ("'");
     210  
     211    p = unquoted_string;
     212  
     213    /* could speed this up a lot by appending chunks of text at a
     214     * time.
     215     */
     216    while (*p)
     217      {
     218        /* Replace literal ' with a close ', a \', and an open ' */
     219        if (*p == '\'')
     220          g_string_append (dest, "'\\''");
     221        else
     222          g_string_append_c (dest, *p);
     223  
     224        ++p;
     225      }
     226  
     227    /* close the quote */
     228    g_string_append_c (dest, '\'');
     229    
     230    return g_string_free (dest, FALSE);
     231  }
     232  
     233  /**
     234   * g_shell_unquote:
     235   * @quoted_string: (type filename): shell-quoted string
     236   * @error: error return location or NULL
     237   * 
     238   * Unquotes a string as the shell (/bin/sh) would.
     239   *
     240   * This function only handles quotes; if a string contains file globs,
     241   * arithmetic operators, variables, backticks, redirections, or other
     242   * special-to-the-shell features, the result will be different from the
     243   * result a real shell would produce (the variables, backticks, etc.
     244   * will be passed through literally instead of being expanded).
     245   *
     246   * This function is guaranteed to succeed if applied to the result of
     247   * g_shell_quote(). If it fails, it returns %NULL and sets the
     248   * error.
     249   *
     250   * The @quoted_string need not actually contain quoted or escaped text;
     251   * g_shell_unquote() simply goes through the string and unquotes/unescapes
     252   * anything that the shell would. Both single and double quotes are
     253   * handled, as are escapes including escaped newlines.
     254   *
     255   * The return value must be freed with g_free().
     256   *
     257   * Possible errors are in the %G_SHELL_ERROR domain.
     258   * 
     259   * Shell quoting rules are a bit strange. Single quotes preserve the
     260   * literal string exactly. escape sequences are not allowed; not even
     261   * `\'` - if you want a `'` in the quoted text, you have to do something
     262   * like `'foo'\''bar'`. Double quotes allow `$`, ```, `"`, `\`, and
     263   * newline to be escaped with backslash. Otherwise double quotes
     264   * preserve things literally.
     265   *
     266   * Returns: (type filename): an unquoted string
     267   **/
     268  gchar*
     269  g_shell_unquote (const gchar *quoted_string,
     270                   GError     **error)
     271  {
     272    gchar *unquoted;
     273    gchar *end;
     274    gchar *start;
     275    GString *retval;
     276    
     277    g_return_val_if_fail (quoted_string != NULL, NULL);
     278    
     279    unquoted = g_strdup (quoted_string);
     280  
     281    start = unquoted;
     282    end = unquoted;
     283    retval = g_string_new (NULL);
     284  
     285    /* The loop allows cases such as
     286     * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo'
     287     */
     288    while (*start)
     289      {
     290        /* Append all non-quoted chars, honoring backslash escape
     291         */
     292        
     293        while (*start && !(*start == '"' || *start == '\''))
     294          {
     295            if (*start == '\\')
     296              {
     297                /* all characters can get escaped by backslash,
     298                 * except newline, which is removed if it follows
     299                 * a backslash outside of quotes
     300                 */
     301                
     302                ++start;
     303                if (*start)
     304                  {
     305                    if (*start != '\n')
     306                      g_string_append_c (retval, *start);
     307                    ++start;
     308                  }
     309              }
     310            else
     311              {
     312                g_string_append_c (retval, *start);
     313                ++start;
     314              }
     315          }
     316  
     317        if (*start)
     318          {
     319            if (!unquote_string_inplace (start, &end, error))
     320              {
     321                goto error;
     322              }
     323            else
     324              {
     325                g_string_append (retval, start);
     326                start = end;
     327              }
     328          }
     329      }
     330  
     331    g_free (unquoted);
     332    return g_string_free (retval, FALSE);
     333    
     334   error:
     335    g_assert (error == NULL || *error != NULL);
     336    
     337    g_free (unquoted);
     338    g_string_free (retval, TRUE);
     339    return NULL;
     340  }
     341  
     342  /* g_parse_argv() does a semi-arbitrary weird subset of the way
     343   * the shell parses a command line. We don't do variable expansion,
     344   * don't understand that operators are tokens, don't do tilde expansion,
     345   * don't do command substitution, no arithmetic expansion, IFS gets ignored,
     346   * don't do filename globs, don't remove redirection stuff, etc.
     347   *
     348   * READ THE UNIX98 SPEC on "Shell Command Language" before changing
     349   * the behavior of this code.
     350   *
     351   * Steps to parsing the argv string:
     352   *
     353   *  - tokenize the string (but since we ignore operators,
     354   *    our tokenization may diverge from what the shell would do)
     355   *    note that tokenization ignores the internals of a quoted
     356   *    word and it always splits on spaces, not on IFS even
     357   *    if we used IFS. We also ignore "end of input indicator"
     358   *    (I guess this is control-D?)
     359   *
     360   *    Tokenization steps, from UNIX98 with operator stuff removed,
     361   *    are:
     362   * 
     363   *    1) "If the current character is backslash, single-quote or
     364   *        double-quote (\, ' or ") and it is not quoted, it will affect
     365   *        quoting for subsequent characters up to the end of the quoted
     366   *        text. The rules for quoting are as described in Quoting
     367   *        . During token recognition no substitutions will be actually
     368   *        performed, and the result token will contain exactly the
     369   *        characters that appear in the input (except for newline
     370   *        character joining), unmodified, including any embedded or
     371   *        enclosing quotes or substitution operators, between the quote
     372   *        mark and the end of the quoted text. The token will not be
     373   *        delimited by the end of the quoted field."
     374   *
     375   *    2) "If the current character is an unquoted newline character,
     376   *        the current token will be delimited."
     377   *
     378   *    3) "If the current character is an unquoted blank character, any
     379   *        token containing the previous character is delimited and the
     380   *        current character will be discarded."
     381   *
     382   *    4) "If the previous character was part of a word, the current
     383   *        character will be appended to that word."
     384   *
     385   *    5) "If the current character is a "#", it and all subsequent
     386   *        characters up to, but excluding, the next newline character
     387   *        will be discarded as a comment. The newline character that
     388   *        ends the line is not considered part of the comment. The
     389   *        "#" starts a comment only when it is at the beginning of a
     390   *        token. Since the search for the end-of-comment does not
     391   *        consider an escaped newline character specially, a comment
     392   *        cannot be continued to the next line."
     393   *
     394   *    6) "The current character will be used as the start of a new word."
     395   *
     396   *
     397   *  - for each token (word), perform portions of word expansion, namely
     398   *    field splitting (using default whitespace IFS) and quote
     399   *    removal.  Field splitting may increase the number of words.
     400   *    Quote removal does not increase the number of words.
     401   *
     402   *   "If the complete expansion appropriate for a word results in an
     403   *   empty field, that empty field will be deleted from the list of
     404   *   fields that form the completely expanded command, unless the
     405   *   original word contained single-quote or double-quote characters."
     406   *    - UNIX98 spec
     407   *
     408   *
     409   */
     410  
     411  static inline void
     412  ensure_token (GString **token)
     413  {
     414    if (*token == NULL)
     415      *token = g_string_new (NULL);
     416  }
     417  
     418  static void
     419  delimit_token (GString **token,
     420                 GSList **retval)
     421  {
     422    if (*token == NULL)
     423      return;
     424  
     425    *retval = g_slist_prepend (*retval, g_string_free (*token, FALSE));
     426  
     427    *token = NULL;
     428  }
     429  
     430  static GSList*
     431  tokenize_command_line (const gchar *command_line,
     432                         GError **error)
     433  {
     434    gchar current_quote;
     435    const gchar *p;
     436    GString *current_token = NULL;
     437    GSList *retval = NULL;
     438    gboolean quoted;
     439  
     440    current_quote = '\0';
     441    quoted = FALSE;
     442    p = command_line;
     443   
     444    while (*p)
     445      {
     446        if (current_quote == '\\')
     447          {
     448            if (*p == '\n')
     449              {
     450                /* we append nothing; backslash-newline become nothing */
     451              }
     452            else
     453              {
     454                /* we append the backslash and the current char,
     455                 * to be interpreted later after tokenization
     456                 */
     457                ensure_token (&current_token);
     458                g_string_append_c (current_token, '\\');
     459                g_string_append_c (current_token, *p);
     460              }
     461  
     462            current_quote = '\0';
     463          }
     464        else if (current_quote == '#')
     465          {
     466            /* Discard up to and including next newline */
     467            while (*p && *p != '\n')
     468              ++p;
     469  
     470            current_quote = '\0';
     471            
     472            if (*p == '\0')
     473              break;
     474          }
     475        else if (current_quote)
     476          {
     477            if (*p == current_quote &&
     478                /* check that it isn't an escaped double quote */
     479                !(current_quote == '"' && quoted))
     480              {
     481                /* close the quote */
     482                current_quote = '\0';
     483              }
     484  
     485            /* Everything inside quotes, and the close quote,
     486             * gets appended literally.
     487             */
     488  
     489            ensure_token (&current_token);
     490            g_string_append_c (current_token, *p);
     491          }
     492        else
     493          {
     494            switch (*p)
     495              {
     496              case '\n':
     497                delimit_token (&current_token, &retval);
     498                break;
     499  
     500              case ' ':
     501              case '\t':
     502                /* If the current token contains the previous char, delimit
     503                 * the current token. A nonzero length
     504                 * token should always contain the previous char.
     505                 */
     506                if (current_token &&
     507                    current_token->len > 0)
     508                  {
     509                    delimit_token (&current_token, &retval);
     510                  }
     511                
     512                /* discard all unquoted blanks (don't add them to a token) */
     513                break;
     514  
     515  
     516                /* single/double quotes are appended to the token,
     517                 * escapes are maybe appended next time through the loop,
     518                 * comment chars are never appended.
     519                 */
     520                
     521              case '\'':
     522              case '"':
     523                ensure_token (&current_token);
     524                g_string_append_c (current_token, *p);
     525  
     526                G_GNUC_FALLTHROUGH;
     527              case '\\':
     528                current_quote = *p;
     529                break;
     530  
     531              case '#':
     532                if (p == command_line)
     533  	        { /* '#' was the first char */
     534                    current_quote = *p;
     535                    break;
     536                  }
     537                switch(*(p-1))
     538                  {
     539                    case ' ':
     540                    case '\n':
     541                    case '\0':
     542                      current_quote = *p;
     543                      break;
     544                    default:
     545                      ensure_token (&current_token);
     546                      g_string_append_c (current_token, *p);
     547  		    break;
     548                  }
     549                break;
     550  
     551              default:
     552                /* Combines rules 4) and 6) - if we have a token, append to it,
     553                 * otherwise create a new token.
     554                 */
     555                ensure_token (&current_token);
     556                g_string_append_c (current_token, *p);
     557                break;
     558              }
     559          }
     560  
     561        /* We need to count consecutive backslashes mod 2, 
     562         * to detect escaped doublequotes.
     563         */
     564        if (*p != '\\')
     565  	quoted = FALSE;
     566        else
     567  	quoted = !quoted;
     568  
     569        ++p;
     570      }
     571  
     572    delimit_token (&current_token, &retval);
     573  
     574    if (current_quote)
     575      {
     576        if (current_quote == '\\')
     577          g_set_error (error,
     578                       G_SHELL_ERROR,
     579                       G_SHELL_ERROR_BAD_QUOTING,
     580                       _("Text ended just after a “\\” character."
     581                         " (The text was “%s”)"),
     582                       command_line);
     583        else
     584          g_set_error (error,
     585                       G_SHELL_ERROR,
     586                       G_SHELL_ERROR_BAD_QUOTING,
     587                       _("Text ended before matching quote was found for %c."
     588                         " (The text was “%s”)"),
     589                       current_quote, command_line);
     590        
     591        goto error;
     592      }
     593  
     594    if (retval == NULL)
     595      {
     596        g_set_error_literal (error,
     597                             G_SHELL_ERROR,
     598                             G_SHELL_ERROR_EMPTY_STRING,
     599                             _("Text was empty (or contained only whitespace)"));
     600  
     601        goto error;
     602      }
     603    
     604    /* we appended backward */
     605    retval = g_slist_reverse (retval);
     606  
     607    return retval;
     608  
     609   error:
     610    g_assert (error == NULL || *error != NULL);
     611  
     612    g_slist_free_full (retval, g_free);
     613  
     614    return NULL;
     615  }
     616  
     617  /**
     618   * g_shell_parse_argv:
     619   * @command_line: (type filename): command line to parse
     620   * @argcp: (out) (optional): return location for number of args
     621   * @argvp: (out) (optional) (array length=argcp zero-terminated=1) (element-type filename):
     622   *   return location for array of args
     623   * @error: (optional): return location for error
     624   * 
     625   * Parses a command line into an argument vector, in much the same way
     626   * the shell would, but without many of the expansions the shell would
     627   * perform (variable expansion, globs, operators, filename expansion,
     628   * etc. are not supported).
     629   *
     630   * The results are defined to be the same as those you would get from
     631   * a UNIX98 `/bin/sh`, as long as the input contains none of the
     632   * unsupported shell expansions. If the input does contain such expansions,
     633   * they are passed through literally.
     634   *
     635   * Possible errors are those from the %G_SHELL_ERROR domain.
     636   *
     637   * In particular, if @command_line is an empty string (or a string containing
     638   * only whitespace), %G_SHELL_ERROR_EMPTY_STRING will be returned. It’s
     639   * guaranteed that @argvp will be a non-empty array if this function returns
     640   * successfully.
     641   *
     642   * Free the returned vector with g_strfreev().
     643   * 
     644   * Returns: %TRUE on success, %FALSE if error set
     645   **/
     646  gboolean
     647  g_shell_parse_argv (const gchar *command_line,
     648                      gint        *argcp,
     649                      gchar     ***argvp,
     650                      GError     **error)
     651  {
     652    /* Code based on poptParseArgvString() from libpopt */
     653    gint argc = 0;
     654    gchar **argv = NULL;
     655    GSList *tokens = NULL;
     656    gint i;
     657    GSList *tmp_list;
     658    
     659    g_return_val_if_fail (command_line != NULL, FALSE);
     660  
     661    tokens = tokenize_command_line (command_line, error);
     662    if (tokens == NULL)
     663      return FALSE;
     664  
     665    /* Because we can't have introduced any new blank space into the
     666     * tokens (we didn't do any new expansions), we don't need to
     667     * perform field splitting. If we were going to honor IFS or do any
     668     * expansions, we would have to do field splitting on each word
     669     * here. Also, if we were going to do any expansion we would need to
     670     * remove any zero-length words that didn't contain quotes
     671     * originally; but since there's no expansion we know all words have
     672     * nonzero length, unless they contain quotes.
     673     * 
     674     * So, we simply remove quotes, and don't do any field splitting or
     675     * empty word removal, since we know there was no way to introduce
     676     * such things.
     677     */
     678  
     679    argc = g_slist_length (tokens);
     680    argv = g_new0 (gchar*, argc + 1);
     681    i = 0;
     682    tmp_list = tokens;
     683    while (tmp_list)
     684      {
     685        argv[i] = g_shell_unquote (tmp_list->data, error);
     686  
     687        /* Since we already checked that quotes matched up in the
     688         * tokenizer, this shouldn't be possible to reach I guess.
     689         */
     690        if (argv[i] == NULL)
     691          goto failed;
     692  
     693        tmp_list = g_slist_next (tmp_list);
     694        ++i;
     695      }
     696    
     697    g_slist_free_full (tokens, g_free);
     698  
     699    g_assert (argc > 0);
     700    g_assert (argv != NULL && argv[0] != NULL);
     701  
     702    if (argcp)
     703      *argcp = argc;
     704  
     705    if (argvp)
     706      *argvp = argv;
     707    else
     708      g_strfreev (argv);
     709  
     710    return TRUE;
     711  
     712   failed:
     713  
     714    g_assert (error == NULL || *error != NULL);
     715    g_strfreev (argv);
     716    g_slist_free_full (tokens, g_free);
     717    
     718    return FALSE;
     719  }