python (3.12.0)

(root)/
lib/
python3.12/
test/
test_shlex.py
       1  import io
       2  import itertools
       3  import shlex
       4  import string
       5  import unittest
       6  
       7  
       8  # The original test data set was from shellwords, by Hartmut Goebel.
       9  
      10  data = r"""x|x|
      11  foo bar|foo|bar|
      12   foo bar|foo|bar|
      13   foo bar |foo|bar|
      14  foo   bar    bla     fasel|foo|bar|bla|fasel|
      15  x y  z              xxxx|x|y|z|xxxx|
      16  \x bar|\|x|bar|
      17  \ x bar|\|x|bar|
      18  \ bar|\|bar|
      19  foo \x bar|foo|\|x|bar|
      20  foo \ x bar|foo|\|x|bar|
      21  foo \ bar|foo|\|bar|
      22  foo "bar" bla|foo|"bar"|bla|
      23  "foo" "bar" "bla"|"foo"|"bar"|"bla"|
      24  "foo" bar "bla"|"foo"|bar|"bla"|
      25  "foo" bar bla|"foo"|bar|bla|
      26  foo 'bar' bla|foo|'bar'|bla|
      27  'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
      28  'foo' bar 'bla'|'foo'|bar|'bla'|
      29  'foo' bar bla|'foo'|bar|bla|
      30  blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
      31  blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
      32  ""|""|
      33  ''|''|
      34  foo "" bar|foo|""|bar|
      35  foo '' bar|foo|''|bar|
      36  foo "" "" "" bar|foo|""|""|""|bar|
      37  foo '' '' '' bar|foo|''|''|''|bar|
      38  \""|\|""|
      39  "\"|"\"|
      40  "foo\ bar"|"foo\ bar"|
      41  "foo\\ bar"|"foo\\ bar"|
      42  "foo\\ bar\"|"foo\\ bar\"|
      43  "foo\\" bar\""|"foo\\"|bar|\|""|
      44  "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
      45  "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
      46  "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
      47  "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
      48  \''|\|''|
      49  'foo\ bar'|'foo\ bar'|
      50  'foo\\ bar'|'foo\\ bar'|
      51  "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
      52  \"foo"|\|"foo"|
      53  \"foo"\x|\|"foo"|\|x|
      54  "foo\x"|"foo\x"|
      55  "foo\ "|"foo\ "|
      56  foo\ xx|foo|\|xx|
      57  foo\ x\x|foo|\|x|\|x|
      58  foo\ x\x\""|foo|\|x|\|x|\|""|
      59  "foo\ x\x"|"foo\ x\x"|
      60  "foo\ x\x\\"|"foo\ x\x\\"|
      61  "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
      62  "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
      63  "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
      64  "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
      65  'foo\ bar'|'foo\ bar'|
      66  'foo\\ bar'|'foo\\ bar'|
      67  foo\ bar|foo|\|bar|
      68  foo#bar\nbaz|foobaz|
      69  :-) ;-)|:|-|)|;|-|)|
      70  áéíóú|á|é|í|ó|ú|
      71  """
      72  
      73  posix_data = r"""x|x|
      74  foo bar|foo|bar|
      75   foo bar|foo|bar|
      76   foo bar |foo|bar|
      77  foo   bar    bla     fasel|foo|bar|bla|fasel|
      78  x y  z              xxxx|x|y|z|xxxx|
      79  \x bar|x|bar|
      80  \ x bar| x|bar|
      81  \ bar| bar|
      82  foo \x bar|foo|x|bar|
      83  foo \ x bar|foo| x|bar|
      84  foo \ bar|foo| bar|
      85  foo "bar" bla|foo|bar|bla|
      86  "foo" "bar" "bla"|foo|bar|bla|
      87  "foo" bar "bla"|foo|bar|bla|
      88  "foo" bar bla|foo|bar|bla|
      89  foo 'bar' bla|foo|bar|bla|
      90  'foo' 'bar' 'bla'|foo|bar|bla|
      91  'foo' bar 'bla'|foo|bar|bla|
      92  'foo' bar bla|foo|bar|bla|
      93  blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
      94  blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
      95  ""||
      96  ''||
      97  foo "" bar|foo||bar|
      98  foo '' bar|foo||bar|
      99  foo "" "" "" bar|foo||||bar|
     100  foo '' '' '' bar|foo||||bar|
     101  \"|"|
     102  "\""|"|
     103  "foo\ bar"|foo\ bar|
     104  "foo\\ bar"|foo\ bar|
     105  "foo\\ bar\""|foo\ bar"|
     106  "foo\\" bar\"|foo\|bar"|
     107  "foo\\ bar\" dfadf"|foo\ bar" dfadf|
     108  "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
     109  "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
     110  "foo\x bar\" dfadf"|foo\x bar" dfadf|
     111  \'|'|
     112  'foo\ bar'|foo\ bar|
     113  'foo\\ bar'|foo\\ bar|
     114  "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
     115  \"foo|"foo|
     116  \"foo\x|"foox|
     117  "foo\x"|foo\x|
     118  "foo\ "|foo\ |
     119  foo\ xx|foo xx|
     120  foo\ x\x|foo xx|
     121  foo\ x\x\"|foo xx"|
     122  "foo\ x\x"|foo\ x\x|
     123  "foo\ x\x\\"|foo\ x\x\|
     124  "foo\ x\x\\""foobar"|foo\ x\x\foobar|
     125  "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
     126  "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
     127  "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
     128  "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
     129  'foo\ bar'|foo\ bar|
     130  'foo\\ bar'|foo\\ bar|
     131  foo\ bar|foo bar|
     132  foo#bar\nbaz|foo|baz|
     133  :-) ;-)|:-)|;-)|
     134  áéíóú|áéíóú|
     135  """
     136  
     137  class ESC[4;38;5;81mShlexTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     138      def setUp(self):
     139          self.data = [x.split("|")[:-1]
     140                       for x in data.splitlines()]
     141          self.posix_data = [x.split("|")[:-1]
     142                             for x in posix_data.splitlines()]
     143          for item in self.data:
     144              item[0] = item[0].replace(r"\n", "\n")
     145          for item in self.posix_data:
     146              item[0] = item[0].replace(r"\n", "\n")
     147  
     148      def splitTest(self, data, comments):
     149          for i in range(len(data)):
     150              l = shlex.split(data[i][0], comments=comments)
     151              self.assertEqual(l, data[i][1:],
     152                               "%s: %s != %s" %
     153                               (data[i][0], l, data[i][1:]))
     154  
     155      def oldSplit(self, s):
     156          ret = []
     157          lex = shlex.shlex(io.StringIO(s))
     158          tok = lex.get_token()
     159          while tok:
     160              ret.append(tok)
     161              tok = lex.get_token()
     162          return ret
     163  
     164      def testSplitNone(self):
     165          with self.assertRaises(ValueError):
     166              shlex.split(None)
     167  
     168      def testSplitPosix(self):
     169          """Test data splitting with posix parser"""
     170          self.splitTest(self.posix_data, comments=True)
     171  
     172      def testCompat(self):
     173          """Test compatibility interface"""
     174          for i in range(len(self.data)):
     175              l = self.oldSplit(self.data[i][0])
     176              self.assertEqual(l, self.data[i][1:],
     177                               "%s: %s != %s" %
     178                               (self.data[i][0], l, self.data[i][1:]))
     179  
     180      def testSyntaxSplitAmpersandAndPipe(self):
     181          """Test handling of syntax splitting of &, |"""
     182          # Could take these forms: &&, &, |&, ;&, ;;&
     183          # of course, the same applies to | and ||
     184          # these should all parse to the same output
     185          for delimiter in ('&&', '&', '|&', ';&', ';;&',
     186                            '||', '|', '&|', ';|', ';;|'):
     187              src = ['echo hi %s echo bye' % delimiter,
     188                     'echo hi%secho bye' % delimiter]
     189              ref = ['echo', 'hi', delimiter, 'echo', 'bye']
     190              for ss, ws in itertools.product(src, (False, True)):
     191                  s = shlex.shlex(ss, punctuation_chars=True)
     192                  s.whitespace_split = ws
     193                  result = list(s)
     194                  self.assertEqual(ref, result,
     195                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     196  
     197      def testSyntaxSplitSemicolon(self):
     198          """Test handling of syntax splitting of ;"""
     199          # Could take these forms: ;, ;;, ;&, ;;&
     200          # these should all parse to the same output
     201          for delimiter in (';', ';;', ';&', ';;&'):
     202              src = ['echo hi %s echo bye' % delimiter,
     203                     'echo hi%s echo bye' % delimiter,
     204                     'echo hi%secho bye' % delimiter]
     205              ref = ['echo', 'hi', delimiter, 'echo', 'bye']
     206              for ss, ws in itertools.product(src, (False, True)):
     207                  s = shlex.shlex(ss, punctuation_chars=True)
     208                  s.whitespace_split = ws
     209                  result = list(s)
     210                  self.assertEqual(ref, result,
     211                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     212  
     213      def testSyntaxSplitRedirect(self):
     214          """Test handling of syntax splitting of >"""
     215          # of course, the same applies to <, |
     216          # these should all parse to the same output
     217          for delimiter in ('<', '|'):
     218              src = ['echo hi %s out' % delimiter,
     219                     'echo hi%s out' % delimiter,
     220                     'echo hi%sout' % delimiter]
     221              ref = ['echo', 'hi', delimiter, 'out']
     222              for ss, ws in itertools.product(src, (False, True)):
     223                  s = shlex.shlex(ss, punctuation_chars=True)
     224                  result = list(s)
     225                  self.assertEqual(ref, result,
     226                                   "While splitting '%s' [ws=%s]" % (ss, ws))
     227  
     228      def testSyntaxSplitParen(self):
     229          """Test handling of syntax splitting of ()"""
     230          # these should all parse to the same output
     231          src = ['( echo hi )',
     232                 '(echo hi)']
     233          ref = ['(', 'echo', 'hi', ')']
     234          for ss, ws in itertools.product(src, (False, True)):
     235              s = shlex.shlex(ss, punctuation_chars=True)
     236              s.whitespace_split = ws
     237              result = list(s)
     238              self.assertEqual(ref, result,
     239                               "While splitting '%s' [ws=%s]" % (ss, ws))
     240  
     241      def testSyntaxSplitCustom(self):
     242          """Test handling of syntax splitting with custom chars"""
     243          ss = "~/a&&b-c --color=auto||d *.py?"
     244          ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
     245          s = shlex.shlex(ss, punctuation_chars="|")
     246          result = list(s)
     247          self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
     248          ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
     249          s = shlex.shlex(ss, punctuation_chars="|")
     250          s.whitespace_split = True
     251          result = list(s)
     252          self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
     253  
     254      def testTokenTypes(self):
     255          """Test that tokens are split with types as expected."""
     256          for source, expected in (
     257                                  ('a && b || c',
     258                                   [('a', 'a'), ('&&', 'c'), ('b', 'a'),
     259                                    ('||', 'c'), ('c', 'a')]),
     260                                ):
     261              s = shlex.shlex(source, punctuation_chars=True)
     262              observed = []
     263              while True:
     264                  t = s.get_token()
     265                  if t == s.eof:
     266                      break
     267                  if t[0] in s.punctuation_chars:
     268                      tt = 'c'
     269                  else:
     270                      tt = 'a'
     271                  observed.append((t, tt))
     272              self.assertEqual(observed, expected)
     273  
     274      def testPunctuationInWordChars(self):
     275          """Test that any punctuation chars are removed from wordchars"""
     276          s = shlex.shlex('a_b__c', punctuation_chars='_')
     277          self.assertNotIn('_', s.wordchars)
     278          self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
     279  
     280      def testPunctuationWithWhitespaceSplit(self):
     281          """Test that with whitespace_split, behaviour is as expected"""
     282          s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
     283          # whitespace_split is False, so splitting will be based on
     284          # punctuation_chars
     285          self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
     286          s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
     287          s.whitespace_split = True
     288          # whitespace_split is True, so splitting will be based on
     289          # white space
     290          self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
     291  
     292      def testPunctuationWithPosix(self):
     293          """Test that punctuation_chars and posix behave correctly together."""
     294          # see Issue #29132
     295          s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
     296          self.assertEqual(list(s), ['f', '>', 'abc'])
     297          s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
     298          self.assertEqual(list(s), ['f', '>', '"abc"'])
     299  
     300      def testEmptyStringHandling(self):
     301          """Test that parsing of empty strings is correctly handled."""
     302          # see Issue #21999
     303          expected = ['', ')', 'abc']
     304          for punct in (False, True):
     305              s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
     306              slist = list(s)
     307              self.assertEqual(slist, expected)
     308          expected = ["''", ')', 'abc']
     309          s = shlex.shlex("'')abc", punctuation_chars=True)
     310          self.assertEqual(list(s), expected)
     311  
     312      def testUnicodeHandling(self):
     313          """Test punctuation_chars and whitespace_split handle unicode."""
     314          ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
     315          # Should be parsed as one complete token (whitespace_split=True).
     316          ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
     317          s = shlex.shlex(ss, punctuation_chars=True)
     318          s.whitespace_split = True
     319          self.assertEqual(list(s), ref)
     320          # Without whitespace_split, uses wordchars and splits on all.
     321          ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
     322          s = shlex.shlex(ss, punctuation_chars=True)
     323          self.assertEqual(list(s), ref)
     324  
     325      def testQuote(self):
     326          safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
     327          unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
     328          unsafe = '"`$\\!' + unicode_sample
     329  
     330          self.assertEqual(shlex.quote(''), "''")
     331          self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
     332          self.assertEqual(shlex.quote('test file name'), "'test file name'")
     333          for u in unsafe:
     334              self.assertEqual(shlex.quote('test%sname' % u),
     335                               "'test%sname'" % u)
     336          for u in unsafe:
     337              self.assertEqual(shlex.quote("test%s'name'" % u),
     338                               "'test%s'\"'\"'name'\"'\"''" % u)
     339  
     340      def testJoin(self):
     341          for split_command, command in [
     342              (['a ', 'b'], "'a ' b"),
     343              (['a', ' b'], "a ' b'"),
     344              (['a', ' ', 'b'], "a ' ' b"),
     345              (['"a', 'b"'], '\'"a\' \'b"\''),
     346          ]:
     347              with self.subTest(command=command):
     348                  joined = shlex.join(split_command)
     349                  self.assertEqual(joined, command)
     350  
     351      def testJoinRoundtrip(self):
     352          all_data = self.data + self.posix_data
     353          for command, *split_command in all_data:
     354              with self.subTest(command=command):
     355                  joined = shlex.join(split_command)
     356                  resplit = shlex.split(joined)
     357                  self.assertEqual(split_command, resplit)
     358  
     359      def testPunctuationCharsReadOnly(self):
     360          punctuation_chars = "/|$%^"
     361          shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
     362          self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
     363          with self.assertRaises(AttributeError):
     364              shlex_instance.punctuation_chars = False
     365  
     366  
     367  # Allow this test to be used with old shlex.py
     368  if not getattr(shlex, "split", None):
     369      for methname in dir(ShlexTest):
     370          if methname.startswith("test") and methname != "testCompat":
     371              delattr(ShlexTest, methname)
     372  
     373  if __name__ == "__main__":
     374      unittest.main()