(root)/
Python-3.11.7/
Lib/
test/
test_shlex.py
       1  import io
       2  import itertools
       3  import shlex
       4  import string
       5  import unittest
       6  from unittest import mock
       7  
       8  
       9  # The original test data set was from shellwords, by Hartmut Goebel.
      10  
      11  data = r"""x|x|
      12  foo bar|foo|bar|
      13   foo bar|foo|bar|
      14   foo bar |foo|bar|
      15  foo   bar    bla     fasel|foo|bar|bla|fasel|
      16  x y  z              xxxx|x|y|z|xxxx|
      17  \x bar|\|x|bar|
      18  \ x bar|\|x|bar|
      19  \ bar|\|bar|
      20  foo \x bar|foo|\|x|bar|
      21  foo \ x bar|foo|\|x|bar|
      22  foo \ bar|foo|\|bar|
      23  foo "bar" bla|foo|"bar"|bla|
      24  "foo" "bar" "bla"|"foo"|"bar"|"bla"|
      25  "foo" bar "bla"|"foo"|bar|"bla"|
      26  "foo" bar bla|"foo"|bar|bla|
      27  foo 'bar' bla|foo|'bar'|bla|
      28  'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
      29  'foo' bar 'bla'|'foo'|bar|'bla'|
      30  'foo' bar bla|'foo'|bar|bla|
      31  blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
      32  blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
      33  ""|""|
      34  ''|''|
      35  foo "" bar|foo|""|bar|
      36  foo '' bar|foo|''|bar|
      37  foo "" "" "" bar|foo|""|""|""|bar|
      38  foo '' '' '' bar|foo|''|''|''|bar|
      39  \""|\|""|
      40  "\"|"\"|
      41  "foo\ bar"|"foo\ bar"|
      42  "foo\\ bar"|"foo\\ bar"|
      43  "foo\\ bar\"|"foo\\ bar\"|
      44  "foo\\" bar\""|"foo\\"|bar|\|""|
      45  "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
      46  "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
      47  "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
      48  "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
      49  \''|\|''|
      50  'foo\ bar'|'foo\ bar'|
      51  'foo\\ bar'|'foo\\ bar'|
      52  "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
      53  \"foo"|\|"foo"|
      54  \"foo"\x|\|"foo"|\|x|
      55  "foo\x"|"foo\x"|
      56  "foo\ "|"foo\ "|
      57  foo\ xx|foo|\|xx|
      58  foo\ x\x|foo|\|x|\|x|
      59  foo\ x\x\""|foo|\|x|\|x|\|""|
      60  "foo\ x\x"|"foo\ x\x"|
      61  "foo\ x\x\\"|"foo\ x\x\\"|
      62  "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
      63  "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
      64  "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
      65  "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
      66  'foo\ bar'|'foo\ bar'|
      67  'foo\\ bar'|'foo\\ bar'|
      68  foo\ bar|foo|\|bar|
      69  foo#bar\nbaz|foobaz|
      70  :-) ;-)|:|-|)|;|-|)|
      71  áéíóú|á|é|í|ó|ú|
      72  """
      73  
      74  posix_data = r"""x|x|
      75  foo bar|foo|bar|
      76   foo bar|foo|bar|
      77   foo bar |foo|bar|
      78  foo   bar    bla     fasel|foo|bar|bla|fasel|
      79  x y  z              xxxx|x|y|z|xxxx|
      80  \x bar|x|bar|
      81  \ x bar| x|bar|
      82  \ bar| bar|
      83  foo \x bar|foo|x|bar|
      84  foo \ x bar|foo| x|bar|
      85  foo \ bar|foo| bar|
      86  foo "bar" bla|foo|bar|bla|
      87  "foo" "bar" "bla"|foo|bar|bla|
      88  "foo" bar "bla"|foo|bar|bla|
      89  "foo" bar bla|foo|bar|bla|
      90  foo 'bar' bla|foo|bar|bla|
      91  'foo' 'bar' 'bla'|foo|bar|bla|
      92  'foo' bar 'bla'|foo|bar|bla|
      93  'foo' bar bla|foo|bar|bla|
      94  blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
      95  blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
      96  ""||
      97  ''||
      98  foo "" bar|foo||bar|
      99  foo '' bar|foo||bar|
     100  foo "" "" "" bar|foo||||bar|
     101  foo '' '' '' bar|foo||||bar|
     102  \"|"|
     103  "\""|"|
     104  "foo\ bar"|foo\ bar|
     105  "foo\\ bar"|foo\ bar|
     106  "foo\\ bar\""|foo\ bar"|
     107  "foo\\" bar\"|foo\|bar"|
     108  "foo\\ bar\" dfadf"|foo\ bar" dfadf|
     109  "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
     110  "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
     111  "foo\x bar\" dfadf"|foo\x bar" dfadf|
     112  \'|'|
     113  'foo\ bar'|foo\ bar|
     114  'foo\\ bar'|foo\\ bar|
     115  "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
     116  \"foo|"foo|
     117  \"foo\x|"foox|
     118  "foo\x"|foo\x|
     119  "foo\ "|foo\ |
     120  foo\ xx|foo xx|
     121  foo\ x\x|foo xx|
     122  foo\ x\x\"|foo xx"|
     123  "foo\ x\x"|foo\ x\x|
     124  "foo\ x\x\\"|foo\ x\x\|
     125  "foo\ x\x\\""foobar"|foo\ x\x\foobar|
     126  "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
     127  "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
     128  "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
     129  "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
     130  'foo\ bar'|foo\ bar|
     131  'foo\\ bar'|foo\\ bar|
     132  foo\ bar|foo bar|
     133  foo#bar\nbaz|foo|baz|
     134  :-) ;-)|:-)|;-)|
     135  áéíóú|áéíóú|
     136  """
     137  
     138  class ESC[4;38;5;81mShlexTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     139      def setUp(self):
     140          self.data = [x.split("|")[:-1]
     141                       for x in data.splitlines()]
     142          self.posix_data = [x.split("|")[:-1]
     143                             for x in posix_data.splitlines()]
     144          for item in self.data:
     145              item[0] = item[0].replace(r"\n", "\n")
     146          for item in self.posix_data:
     147              item[0] = item[0].replace(r"\n", "\n")
     148  
     149      def splitTest(self, data, comments):
     150          for i in range(len(data)):
     151              l = shlex.split(data[i][0], comments=comments)
     152              self.assertEqual(l, data[i][1:],
     153                               "%s: %s != %s" %
     154                               (data[i][0], l, data[i][1:]))
     155  
     156      def oldSplit(self, s):
     157          ret = []
     158          lex = shlex.shlex(io.StringIO(s))
     159          tok = lex.get_token()
     160          while tok:
     161              ret.append(tok)
     162              tok = lex.get_token()
     163          return ret
     164  
     165      @mock.patch('sys.stdin', io.StringIO())
     166      def testSplitNoneDeprecation(self):
     167          with self.assertWarns(DeprecationWarning):
     168              shlex.split(None)
     169  
     170      def testSplitPosix(self):
     171          """Test data splitting with posix parser"""
     172          self.splitTest(self.posix_data, comments=True)
     173  
     174      def testCompat(self):
     175          """Test compatibility interface"""
     176          for i in range(len(self.data)):
     177              l = self.oldSplit(self.data[i][0])
     178              self.assertEqual(l, self.data[i][1:],
     179                               "%s: %s != %s" %
     180                               (self.data[i][0], l, self.data[i][1:]))
     181  
     182      def testSyntaxSplitAmpersandAndPipe(self):
     183          """Test handling of syntax splitting of &, |"""
     184          # Could take these forms: &&, &, |&, ;&, ;;&
     185          # of course, the same applies to | and ||
     186          # these should all parse to the same output
     187          for delimiter in ('&&', '&', '|&', ';&', ';;&',
     188                            '||', '|', '&|', ';|', ';;|'):
     189              src = ['echo hi %s echo bye' % delimiter,
     190                     'echo hi%secho bye' % delimiter]
     191              ref = ['echo', 'hi', delimiter, 'echo', 'bye']
     192              for ss, ws in itertools.product(src, (False, True)):
     193                  s = shlex.shlex(ss, punctuation_chars=True)
     194                  s.whitespace_split = ws
     195                  result = list(s)
     196                  self.assertEqual(ref, result,
     197                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     198  
     199      def testSyntaxSplitSemicolon(self):
     200          """Test handling of syntax splitting of ;"""
     201          # Could take these forms: ;, ;;, ;&, ;;&
     202          # these should all parse to the same output
     203          for delimiter in (';', ';;', ';&', ';;&'):
     204              src = ['echo hi %s echo bye' % delimiter,
     205                     'echo hi%s echo bye' % delimiter,
     206                     'echo hi%secho bye' % delimiter]
     207              ref = ['echo', 'hi', delimiter, 'echo', 'bye']
     208              for ss, ws in itertools.product(src, (False, True)):
     209                  s = shlex.shlex(ss, punctuation_chars=True)
     210                  s.whitespace_split = ws
     211                  result = list(s)
     212                  self.assertEqual(ref, result,
     213                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     214  
     215      def testSyntaxSplitRedirect(self):
     216          """Test handling of syntax splitting of >"""
     217          # of course, the same applies to <, |
     218          # these should all parse to the same output
     219          for delimiter in ('<', '|'):
     220              src = ['echo hi %s out' % delimiter,
     221                     'echo hi%s out' % delimiter,
     222                     'echo hi%sout' % delimiter]
     223              ref = ['echo', 'hi', delimiter, 'out']
     224              for ss, ws in itertools.product(src, (False, True)):
     225                  s = shlex.shlex(ss, punctuation_chars=True)
     226                  result = list(s)
     227                  self.assertEqual(ref, result,
     228                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     229  
     230      def testSyntaxSplitParen(self):
     231          """Test handling of syntax splitting of ()"""
     232          # these should all parse to the same output
     233          src = ['( echo hi )',
     234                 '(echo hi)']
     235          ref = ['(', 'echo', 'hi', ')']
     236          for ss, ws in itertools.product(src, (False, True)):
     237              s = shlex.shlex(ss, punctuation_chars=True)
     238              s.whitespace_split = ws
     239              result = list(s)
     240              self.assertEqual(ref, result,
     241                               "While splitting '%s' [ws=%s]" % (ss, ws))
     242  
     243      def testSyntaxSplitCustom(self):
     244          """Test handling of syntax splitting with custom chars"""
     245          ss = "~/a&&b-c --color=auto||d *.py?"
     246          ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
     247          s = shlex.shlex(ss, punctuation_chars="|")
     248          result = list(s)
     249          self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
     250          ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
     251          s = shlex.shlex(ss, punctuation_chars="|")
     252          s.whitespace_split = True
     253          result = list(s)
     254          self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
     255  
     256      def testTokenTypes(self):
     257          """Test that tokens are split with types as expected."""
     258          for source, expected in (
     259                                  ('a && b || c',
     260                                   [('a', 'a'), ('&&', 'c'), ('b', 'a'),
     261                                    ('||', 'c'), ('c', 'a')]),
     262                                ):
     263              s = shlex.shlex(source, punctuation_chars=True)
     264              observed = []
     265              while True:
     266                  t = s.get_token()
     267                  if t == s.eof:
     268                      break
     269                  if t[0] in s.punctuation_chars:
     270                      tt = 'c'
     271                  else:
     272                      tt = 'a'
     273                  observed.append((t, tt))
     274              self.assertEqual(observed, expected)
     275  
     276      def testPunctuationInWordChars(self):
     277          """Test that any punctuation chars are removed from wordchars"""
     278          s = shlex.shlex('a_b__c', punctuation_chars='_')
     279          self.assertNotIn('_', s.wordchars)
     280          self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
     281  
     282      def testPunctuationWithWhitespaceSplit(self):
     283          """Test that with whitespace_split, behaviour is as expected"""
     284          s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
     285          # whitespace_split is False, so splitting will be based on
     286          # punctuation_chars
     287          self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
     288          s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
     289          s.whitespace_split = True
     290          # whitespace_split is True, so splitting will be based on
     291          # white space
     292          self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
     293  
     294      def testPunctuationWithPosix(self):
     295          """Test that punctuation_chars and posix behave correctly together."""
     296          # see Issue #29132
     297          s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
     298          self.assertEqual(list(s), ['f', '>', 'abc'])
     299          s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
     300          self.assertEqual(list(s), ['f', '>', '"abc"'])
     301  
     302      def testEmptyStringHandling(self):
     303          """Test that parsing of empty strings is correctly handled."""
     304          # see Issue #21999
     305          expected = ['', ')', 'abc']
     306          for punct in (False, True):
     307              s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
     308              slist = list(s)
     309              self.assertEqual(slist, expected)
     310          expected = ["''", ')', 'abc']
     311          s = shlex.shlex("'')abc", punctuation_chars=True)
     312          self.assertEqual(list(s), expected)
     313  
     314      def testUnicodeHandling(self):
     315          """Test punctuation_chars and whitespace_split handle unicode."""
     316          ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
     317          # Should be parsed as one complete token (whitespace_split=True).
     318          ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
     319          s = shlex.shlex(ss, punctuation_chars=True)
     320          s.whitespace_split = True
     321          self.assertEqual(list(s), ref)
     322          # Without whitespace_split, uses wordchars and splits on all.
     323          ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
     324          s = shlex.shlex(ss, punctuation_chars=True)
     325          self.assertEqual(list(s), ref)
     326  
     327      def testQuote(self):
     328          safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
     329          unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
     330          unsafe = '"`$\\!' + unicode_sample
     331  
     332          self.assertEqual(shlex.quote(''), "''")
     333          self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
     334          self.assertEqual(shlex.quote('test file name'), "'test file name'")
     335          for u in unsafe:
     336              self.assertEqual(shlex.quote('test%sname' % u),
     337                               "'test%sname'" % u)
     338          for u in unsafe:
     339              self.assertEqual(shlex.quote("test%s'name'" % u),
     340                               "'test%s'\"'\"'name'\"'\"''" % u)
     341  
     342      def testJoin(self):
     343          for split_command, command in [
     344              (['a ', 'b'], "'a ' b"),
     345              (['a', ' b'], "a ' b'"),
     346              (['a', ' ', 'b'], "a ' ' b"),
     347              (['"a', 'b"'], '\'"a\' \'b"\''),
     348          ]:
     349              with self.subTest(command=command):
     350                  joined = shlex.join(split_command)
     351                  self.assertEqual(joined, command)
     352  
     353      def testJoinRoundtrip(self):
     354          all_data = self.data + self.posix_data
     355          for command, *split_command in all_data:
     356              with self.subTest(command=command):
     357                  joined = shlex.join(split_command)
     358                  resplit = shlex.split(joined)
     359                  self.assertEqual(split_command, resplit)
     360  
     361      def testPunctuationCharsReadOnly(self):
     362          punctuation_chars = "/|$%^"
     363          shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
     364          self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
     365          with self.assertRaises(AttributeError):
     366              shlex_instance.punctuation_chars = False
     367  
     368  
     369  # Allow this test to be used with old shlex.py
     370  if not getattr(shlex, "split", None):
     371      for methname in dir(ShlexTest):
     372          if methname.startswith("test") and methname != "testCompat":
     373              delattr(ShlexTest, methname)
     374  
     375  if __name__ == "__main__":
     376      unittest.main()