(root)/
Python-3.11.7/
Lib/
shlex.py
       1  """A lexical analyzer class for simple shell-like syntaxes."""
       2  
       3  # Module and documentation by Eric S. Raymond, 21 Dec 1998
       4  # Input stacking and error message cleanup added by ESR, March 2000
       5  # push_source() and pop_source() made explicit by ESR, January 2001.
       6  # Posix compliance, split(), string arguments, and
       7  # iterator interface by Gustavo Niemeyer, April 2003.
       8  # changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
       9  
      10  import os
      11  import re
      12  import sys
      13  from collections import deque
      14  
      15  from io import StringIO
      16  
      17  __all__ = ["shlex", "split", "quote", "join"]
      18  
      19  class ESC[4;38;5;81mshlex:
      20      "A lexical analyzer class for simple shell-like syntaxes."
      21      def __init__(self, instream=None, infile=None, posix=False,
      22                   punctuation_chars=False):
      23          if isinstance(instream, str):
      24              instream = StringIO(instream)
      25          if instream is not None:
      26              self.instream = instream
      27              self.infile = infile
      28          else:
      29              self.instream = sys.stdin
      30              self.infile = None
      31          self.posix = posix
      32          if posix:
      33              self.eof = None
      34          else:
      35              self.eof = ''
      36          self.commenters = '#'
      37          self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
      38                            'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
      39          if self.posix:
      40              self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
      41                                 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
      42          self.whitespace = ' \t\r\n'
      43          self.whitespace_split = False
      44          self.quotes = '\'"'
      45          self.escape = '\\'
      46          self.escapedquotes = '"'
      47          self.state = ' '
      48          self.pushback = deque()
      49          self.lineno = 1
      50          self.debug = 0
      51          self.token = ''
      52          self.filestack = deque()
      53          self.source = None
      54          if not punctuation_chars:
      55              punctuation_chars = ''
      56          elif punctuation_chars is True:
      57              punctuation_chars = '();<>|&'
      58          self._punctuation_chars = punctuation_chars
      59          if punctuation_chars:
      60              # _pushback_chars is a push back queue used by lookahead logic
      61              self._pushback_chars = deque()
      62              # these chars added because allowed in file names, args, wildcards
      63              self.wordchars += '~-./*?='
      64              #remove any punctuation chars from wordchars
      65              t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
      66              self.wordchars = self.wordchars.translate(t)
      67  
      68      @property
      69      def punctuation_chars(self):
      70          return self._punctuation_chars
      71  
      72      def push_token(self, tok):
      73          "Push a token onto the stack popped by the get_token method"
      74          if self.debug >= 1:
      75              print("shlex: pushing token " + repr(tok))
      76          self.pushback.appendleft(tok)
      77  
      78      def push_source(self, newstream, newfile=None):
      79          "Push an input source onto the lexer's input source stack."
      80          if isinstance(newstream, str):
      81              newstream = StringIO(newstream)
      82          self.filestack.appendleft((self.infile, self.instream, self.lineno))
      83          self.infile = newfile
      84          self.instream = newstream
      85          self.lineno = 1
      86          if self.debug:
      87              if newfile is not None:
      88                  print('shlex: pushing to file %s' % (self.infile,))
      89              else:
      90                  print('shlex: pushing to stream %s' % (self.instream,))
      91  
      92      def pop_source(self):
      93          "Pop the input source stack."
      94          self.instream.close()
      95          (self.infile, self.instream, self.lineno) = self.filestack.popleft()
      96          if self.debug:
      97              print('shlex: popping to %s, line %d' \
      98                    % (self.instream, self.lineno))
      99          self.state = ' '
     100  
     101      def get_token(self):
     102          "Get a token from the input stream (or from stack if it's nonempty)"
     103          if self.pushback:
     104              tok = self.pushback.popleft()
     105              if self.debug >= 1:
     106                  print("shlex: popping token " + repr(tok))
     107              return tok
     108          # No pushback.  Get a token.
     109          raw = self.read_token()
     110          # Handle inclusions
     111          if self.source is not None:
     112              while raw == self.source:
     113                  spec = self.sourcehook(self.read_token())
     114                  if spec:
     115                      (newfile, newstream) = spec
     116                      self.push_source(newstream, newfile)
     117                  raw = self.get_token()
     118          # Maybe we got EOF instead?
     119          while raw == self.eof:
     120              if not self.filestack:
     121                  return self.eof
     122              else:
     123                  self.pop_source()
     124                  raw = self.get_token()
     125          # Neither inclusion nor EOF
     126          if self.debug >= 1:
     127              if raw != self.eof:
     128                  print("shlex: token=" + repr(raw))
     129              else:
     130                  print("shlex: token=EOF")
     131          return raw
     132  
     133      def read_token(self):
     134          quoted = False
     135          escapedstate = ' '
     136          while True:
     137              if self.punctuation_chars and self._pushback_chars:
     138                  nextchar = self._pushback_chars.pop()
     139              else:
     140                  nextchar = self.instream.read(1)
     141              if nextchar == '\n':
     142                  self.lineno += 1
     143              if self.debug >= 3:
     144                  print("shlex: in state %r I see character: %r" % (self.state,
     145                                                                    nextchar))
     146              if self.state is None:
     147                  self.token = ''        # past end of file
     148                  break
     149              elif self.state == ' ':
     150                  if not nextchar:
     151                      self.state = None  # end of file
     152                      break
     153                  elif nextchar in self.whitespace:
     154                      if self.debug >= 2:
     155                          print("shlex: I see whitespace in whitespace state")
     156                      if self.token or (self.posix and quoted):
     157                          break   # emit current token
     158                      else:
     159                          continue
     160                  elif nextchar in self.commenters:
     161                      self.instream.readline()
     162                      self.lineno += 1
     163                  elif self.posix and nextchar in self.escape:
     164                      escapedstate = 'a'
     165                      self.state = nextchar
     166                  elif nextchar in self.wordchars:
     167                      self.token = nextchar
     168                      self.state = 'a'
     169                  elif nextchar in self.punctuation_chars:
     170                      self.token = nextchar
     171                      self.state = 'c'
     172                  elif nextchar in self.quotes:
     173                      if not self.posix:
     174                          self.token = nextchar
     175                      self.state = nextchar
     176                  elif self.whitespace_split:
     177                      self.token = nextchar
     178                      self.state = 'a'
     179                  else:
     180                      self.token = nextchar
     181                      if self.token or (self.posix and quoted):
     182                          break   # emit current token
     183                      else:
     184                          continue
     185              elif self.state in self.quotes:
     186                  quoted = True
     187                  if not nextchar:      # end of file
     188                      if self.debug >= 2:
     189                          print("shlex: I see EOF in quotes state")
     190                      # XXX what error should be raised here?
     191                      raise ValueError("No closing quotation")
     192                  if nextchar == self.state:
     193                      if not self.posix:
     194                          self.token += nextchar
     195                          self.state = ' '
     196                          break
     197                      else:
     198                          self.state = 'a'
     199                  elif (self.posix and nextchar in self.escape and self.state
     200                        in self.escapedquotes):
     201                      escapedstate = self.state
     202                      self.state = nextchar
     203                  else:
     204                      self.token += nextchar
     205              elif self.state in self.escape:
     206                  if not nextchar:      # end of file
     207                      if self.debug >= 2:
     208                          print("shlex: I see EOF in escape state")
     209                      # XXX what error should be raised here?
     210                      raise ValueError("No escaped character")
     211                  # In posix shells, only the quote itself or the escape
     212                  # character may be escaped within quotes.
     213                  if (escapedstate in self.quotes and
     214                          nextchar != self.state and nextchar != escapedstate):
     215                      self.token += self.state
     216                  self.token += nextchar
     217                  self.state = escapedstate
     218              elif self.state in ('a', 'c'):
     219                  if not nextchar:
     220                      self.state = None   # end of file
     221                      break
     222                  elif nextchar in self.whitespace:
     223                      if self.debug >= 2:
     224                          print("shlex: I see whitespace in word state")
     225                      self.state = ' '
     226                      if self.token or (self.posix and quoted):
     227                          break   # emit current token
     228                      else:
     229                          continue
     230                  elif nextchar in self.commenters:
     231                      self.instream.readline()
     232                      self.lineno += 1
     233                      if self.posix:
     234                          self.state = ' '
     235                          if self.token or (self.posix and quoted):
     236                              break   # emit current token
     237                          else:
     238                              continue
     239                  elif self.state == 'c':
     240                      if nextchar in self.punctuation_chars:
     241                          self.token += nextchar
     242                      else:
     243                          if nextchar not in self.whitespace:
     244                              self._pushback_chars.append(nextchar)
     245                          self.state = ' '
     246                          break
     247                  elif self.posix and nextchar in self.quotes:
     248                      self.state = nextchar
     249                  elif self.posix and nextchar in self.escape:
     250                      escapedstate = 'a'
     251                      self.state = nextchar
     252                  elif (nextchar in self.wordchars or nextchar in self.quotes
     253                        or (self.whitespace_split and
     254                            nextchar not in self.punctuation_chars)):
     255                      self.token += nextchar
     256                  else:
     257                      if self.punctuation_chars:
     258                          self._pushback_chars.append(nextchar)
     259                      else:
     260                          self.pushback.appendleft(nextchar)
     261                      if self.debug >= 2:
     262                          print("shlex: I see punctuation in word state")
     263                      self.state = ' '
     264                      if self.token or (self.posix and quoted):
     265                          break   # emit current token
     266                      else:
     267                          continue
     268          result = self.token
     269          self.token = ''
     270          if self.posix and not quoted and result == '':
     271              result = None
     272          if self.debug > 1:
     273              if result:
     274                  print("shlex: raw token=" + repr(result))
     275              else:
     276                  print("shlex: raw token=EOF")
     277          return result
     278  
     279      def sourcehook(self, newfile):
     280          "Hook called on a filename to be sourced."
     281          if newfile[0] == '"':
     282              newfile = newfile[1:-1]
     283          # This implements cpp-like semantics for relative-path inclusion.
     284          if isinstance(self.infile, str) and not os.path.isabs(newfile):
     285              newfile = os.path.join(os.path.dirname(self.infile), newfile)
     286          return (newfile, open(newfile, "r"))
     287  
     288      def error_leader(self, infile=None, lineno=None):
     289          "Emit a C-compiler-like, Emacs-friendly error-message leader."
     290          if infile is None:
     291              infile = self.infile
     292          if lineno is None:
     293              lineno = self.lineno
     294          return "\"%s\", line %d: " % (infile, lineno)
     295  
     296      def __iter__(self):
     297          return self
     298  
     299      def __next__(self):
     300          token = self.get_token()
     301          if token == self.eof:
     302              raise StopIteration
     303          return token
     304  
     305  def split(s, comments=False, posix=True):
     306      """Split the string *s* using shell-like syntax."""
     307      if s is None:
     308          import warnings
     309          warnings.warn("Passing None for 's' to shlex.split() is deprecated.",
     310                        DeprecationWarning, stacklevel=2)
     311      lex = shlex(s, posix=posix)
     312      lex.whitespace_split = True
     313      if not comments:
     314          lex.commenters = ''
     315      return list(lex)
     316  
     317  
     318  def join(split_command):
     319      """Return a shell-escaped string from *split_command*."""
     320      return ' '.join(quote(arg) for arg in split_command)
     321  
     322  
     323  _find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
     324  
     325  def quote(s):
     326      """Return a shell-escaped version of the string *s*."""
     327      if not s:
     328          return "''"
     329      if _find_unsafe(s) is None:
     330          return s
     331  
     332      # use single quotes, and put single quotes into double quotes
     333      # the string $'b is then quoted as '$'"'"'b'
     334      return "'" + s.replace("'", "'\"'\"'") + "'"
     335  
     336  
     337  def _print_tokens(lexer):
     338      while 1:
     339          tt = lexer.get_token()
     340          if not tt:
     341              break
     342          print("Token: " + repr(tt))
     343  
     344  if __name__ == '__main__':
     345      if len(sys.argv) == 1:
     346          _print_tokens(shlex())
     347      else:
     348          fn = sys.argv[1]
     349          with open(fn) as f:
     350              _print_tokens(shlex(f, fn))