python (3.11.7)

Browse
Build Log
Usage
       1  # helpers.py
       2  import html.entities
       3  import re
       4  import sys
       5  import typing
       6  
       7  from . import __diag__
       8  from .core import *
       9  from .util import (
      10      _bslash,
      11      _flatten,
      12      _escape_regex_range_chars,
      13      replaced_by_pep8,
      14  )
      15  
      16  
      17  #
      18  # global helpers
      19  #
      20  def counted_array(
      21      expr: ParserElement,
      22      int_expr: typing.Optional[ParserElement] = None,
      23      *,
      24      intExpr: typing.Optional[ParserElement] = None,
      25  ) -> ParserElement:
      26      """Helper to define a counted list of expressions.
      27  
      28      This helper defines a pattern of the form::
      29  
      30          integer expr expr expr...
      31  
      32      where the leading integer tells how many expr expressions follow.
      33      The matched tokens returns the array of expr tokens as a list - the
      34      leading count token is suppressed.
      35  
      36      If ``int_expr`` is specified, it should be a pyparsing expression
      37      that produces an integer value.
      38  
      39      Example::
      40  
      41          counted_array(Word(alphas)).parse_string('2 ab cd ef')  # -> ['ab', 'cd']
      42  
      43          # in this parser, the leading integer value is given in binary,
      44          # '10' indicating that 2 values are in the array
      45          binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
      46          counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef')  # -> ['ab', 'cd']
      47  
      48          # if other fields must be parsed after the count but before the
      49          # list items, give the fields results names and they will
      50          # be preserved in the returned ParseResults:
      51          count_with_metadata = integer + Word(alphas)("type")
      52          typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
      53          result = typed_array.parse_string("3 bool True True False")
      54          print(result.dump())
      55  
      56          # prints
      57          # ['True', 'True', 'False']
      58          # - items: ['True', 'True', 'False']
      59          # - type: 'bool'
      60      """
      61      intExpr = intExpr or int_expr
      62      array_expr = Forward()
      63  
      64      def count_field_parse_action(s, l, t):
      65          nonlocal array_expr
      66          n = t[0]
      67          array_expr <<= (expr * n) if n else Empty()
      68          # clear list contents, but keep any named results
      69          del t[:]
      70  
      71      if intExpr is None:
      72          intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
      73      else:
      74          intExpr = intExpr.copy()
      75      intExpr.set_name("arrayLen")
      76      intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
      77      return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
      78  
      79  
      80  def match_previous_literal(expr: ParserElement) -> ParserElement:
      81      """Helper to define an expression that is indirectly defined from
      82      the tokens matched in a previous expression, that is, it looks for
      83      a 'repeat' of a previous expression.  For example::
      84  
      85          first = Word(nums)
      86          second = match_previous_literal(first)
      87          match_expr = first + ":" + second
      88  
      89      will match ``"1:1"``, but not ``"1:2"``.  Because this
      90      matches a previous literal, will also match the leading
      91      ``"1:1"`` in ``"1:10"``. If this is not desired, use
      92      :class:`match_previous_expr`. Do *not* use with packrat parsing
      93      enabled.
      94      """
      95      rep = Forward()
      96  
      97      def copy_token_to_repeater(s, l, t):
      98          if t:
      99              if len(t) == 1:
     100                  rep << t[0]
     101              else:
     102                  # flatten t tokens
     103                  tflat = _flatten(t.as_list())
     104                  rep << And(Literal(tt) for tt in tflat)
     105          else:
     106              rep << Empty()
     107  
     108      expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
     109      rep.set_name("(prev) " + str(expr))
     110      return rep
     111  
     112  
     113  def match_previous_expr(expr: ParserElement) -> ParserElement:
     114      """Helper to define an expression that is indirectly defined from
     115      the tokens matched in a previous expression, that is, it looks for
     116      a 'repeat' of a previous expression.  For example::
     117  
     118          first = Word(nums)
     119          second = match_previous_expr(first)
     120          match_expr = first + ":" + second
     121  
     122      will match ``"1:1"``, but not ``"1:2"``.  Because this
     123      matches by expressions, will *not* match the leading ``"1:1"``
     124      in ``"1:10"``; the expressions are evaluated first, and then
     125      compared, so ``"1"`` is compared with ``"10"``. Do *not* use
     126      with packrat parsing enabled.
     127      """
     128      rep = Forward()
     129      e2 = expr.copy()
     130      rep <<= e2
     131  
     132      def copy_token_to_repeater(s, l, t):
     133          matchTokens = _flatten(t.as_list())
     134  
     135          def must_match_these_tokens(s, l, t):
     136              theseTokens = _flatten(t.as_list())
     137              if theseTokens != matchTokens:
     138                  raise ParseException(
     139                      s, l, f"Expected {matchTokens}, found{theseTokens}"
     140                  )
     141  
     142          rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
     143  
     144      expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
     145      rep.set_name("(prev) " + str(expr))
     146      return rep
     147  
     148  
     149  def one_of(
     150      strs: Union[typing.Iterable[str], str],
     151      caseless: bool = False,
     152      use_regex: bool = True,
     153      as_keyword: bool = False,
     154      *,
     155      useRegex: bool = True,
     156      asKeyword: bool = False,
     157  ) -> ParserElement:
     158      """Helper to quickly define a set of alternative :class:`Literal` s,
     159      and makes sure to do longest-first testing when there is a conflict,
     160      regardless of the input order, but returns
     161      a :class:`MatchFirst` for best performance.
     162  
     163      Parameters:
     164  
     165      - ``strs`` - a string of space-delimited literals, or a collection of
     166        string literals
     167      - ``caseless`` - treat all literals as caseless - (default= ``False``)
     168      - ``use_regex`` - as an optimization, will
     169        generate a :class:`Regex` object; otherwise, will generate
     170        a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
     171        creating a :class:`Regex` raises an exception) - (default= ``True``)
     172      - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
     173        generated expressions - (default= ``False``)
     174      - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
     175        but will be removed in a future release
     176  
     177      Example::
     178  
     179          comp_oper = one_of("< = > <= >= !=")
     180          var = Word(alphas)
     181          number = Word(nums)
     182          term = var | number
     183          comparison_expr = term + comp_oper + term
     184          print(comparison_expr.search_string("B = 12  AA=23 B<=AA AA>12"))
     185  
     186      prints::
     187  
     188          [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
     189      """
     190      asKeyword = asKeyword or as_keyword
     191      useRegex = useRegex and use_regex
     192  
     193      if (
     194          isinstance(caseless, str_type)
     195          and __diag__.warn_on_multiple_string_args_to_oneof
     196      ):
     197          warnings.warn(
     198              "More than one string argument passed to one_of, pass"
     199              " choices as a list or space-delimited string",
     200              stacklevel=2,
     201          )
     202  
     203      if caseless:
     204          isequal = lambda a, b: a.upper() == b.upper()
     205          masks = lambda a, b: b.upper().startswith(a.upper())
     206          parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
     207      else:
     208          isequal = lambda a, b: a == b
     209          masks = lambda a, b: b.startswith(a)
     210          parseElementClass = Keyword if asKeyword else Literal
     211  
     212      symbols: List[str] = []
     213      if isinstance(strs, str_type):
     214          strs = typing.cast(str, strs)
     215          symbols = strs.split()
     216      elif isinstance(strs, Iterable):
     217          symbols = list(strs)
     218      else:
     219          raise TypeError("Invalid argument to one_of, expected string or iterable")
     220      if not symbols:
     221          return NoMatch()
     222  
     223      # reorder given symbols to take care to avoid masking longer choices with shorter ones
     224      # (but only if the given symbols are not just single characters)
     225      if any(len(sym) > 1 for sym in symbols):
     226          i = 0
     227          while i < len(symbols) - 1:
     228              cur = symbols[i]
     229              for j, other in enumerate(symbols[i + 1 :]):
     230                  if isequal(other, cur):
     231                      del symbols[i + j + 1]
     232                      break
     233                  elif masks(cur, other):
     234                      del symbols[i + j + 1]
     235                      symbols.insert(i, other)
     236                      break
     237              else:
     238                  i += 1
     239  
     240      if useRegex:
     241          re_flags: int = re.IGNORECASE if caseless else 0
     242  
     243          try:
     244              if all(len(sym) == 1 for sym in symbols):
     245                  # symbols are just single characters, create range regex pattern
     246                  patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
     247              else:
     248                  patt = "|".join(re.escape(sym) for sym in symbols)
     249  
     250              # wrap with \b word break markers if defining as keywords
     251              if asKeyword:
     252                  patt = rf"\b(?:{patt})\b"
     253  
     254              ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
     255  
     256              if caseless:
     257                  # add parse action to return symbols as specified, not in random
     258                  # casing as found in input string
     259                  symbol_map = {sym.lower(): sym for sym in symbols}
     260                  ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
     261  
     262              return ret
     263  
     264          except re.error:
     265              warnings.warn(
     266                  "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
     267              )
     268  
     269      # last resort, just use MatchFirst
     270      return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
     271          " | ".join(symbols)
     272      )
     273  
     274  
     275  def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
     276      """Helper to easily and clearly define a dictionary by specifying
     277      the respective patterns for the key and value.  Takes care of
     278      defining the :class:`Dict`, :class:`ZeroOrMore`, and
     279      :class:`Group` tokens in the proper order.  The key pattern
     280      can include delimiting markers or punctuation, as long as they are
     281      suppressed, thereby leaving the significant key text.  The value
     282      pattern can include named results, so that the :class:`Dict` results
     283      can include named token fields.
     284  
     285      Example::
     286  
     287          text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
     288          attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
     289          print(attr_expr[1, ...].parse_string(text).dump())
     290  
     291          attr_label = label
     292          attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
     293  
     294          # similar to Dict, but simpler call format
     295          result = dict_of(attr_label, attr_value).parse_string(text)
     296          print(result.dump())
     297          print(result['shape'])
     298          print(result.shape)  # object attribute access works too
     299          print(result.as_dict())
     300  
     301      prints::
     302  
     303          [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
     304          - color: 'light blue'
     305          - posn: 'upper left'
     306          - shape: 'SQUARE'
     307          - texture: 'burlap'
     308          SQUARE
     309          SQUARE
     310          {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
     311      """
     312      return Dict(OneOrMore(Group(key + value)))
     313  
     314  
     315  def original_text_for(
     316      expr: ParserElement, as_string: bool = True, *, asString: bool = True
     317  ) -> ParserElement:
     318      """Helper to return the original, untokenized text for a given
     319      expression.  Useful to restore the parsed fields of an HTML start
     320      tag into the raw tag text itself, or to revert separate tokens with
     321      intervening whitespace back to the original matching input text. By
     322      default, returns a string containing the original parsed text.
     323  
     324      If the optional ``as_string`` argument is passed as
     325      ``False``, then the return value is
     326      a :class:`ParseResults` containing any results names that
     327      were originally matched, and a single token containing the original
     328      matched text from the input string.  So if the expression passed to
     329      :class:`original_text_for` contains expressions with defined
     330      results names, you must set ``as_string`` to ``False`` if you
     331      want to preserve those results name values.
     332  
     333      The ``asString`` pre-PEP8 argument is retained for compatibility,
     334      but will be removed in a future release.
     335  
     336      Example::
     337  
     338          src = "this is test <b> bold <i>text</i> </b> normal text "
     339          for tag in ("b", "i"):
     340              opener, closer = make_html_tags(tag)
     341              patt = original_text_for(opener + ... + closer)
     342              print(patt.search_string(src)[0])
     343  
     344      prints::
     345  
     346          ['<b> bold <i>text</i> </b>']
     347          ['<i>text</i>']
     348      """
     349      asString = asString and as_string
     350  
     351      locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
     352      endlocMarker = locMarker.copy()
     353      endlocMarker.callPreparse = False
     354      matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
     355      if asString:
     356          extractText = lambda s, l, t: s[t._original_start : t._original_end]
     357      else:
     358  
     359          def extractText(s, l, t):
     360              t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
     361  
     362      matchExpr.set_parse_action(extractText)
     363      matchExpr.ignoreExprs = expr.ignoreExprs
     364      matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
     365      return matchExpr
     366  
     367  
     368  def ungroup(expr: ParserElement) -> ParserElement:
     369      """Helper to undo pyparsing's default grouping of And expressions,
     370      even if all but one are non-empty.
     371      """
     372      return TokenConverter(expr).add_parse_action(lambda t: t[0])
     373  
     374  
     375  def locatedExpr(expr: ParserElement) -> ParserElement:
     376      """
     377      (DEPRECATED - future code should use the :class:`Located` class)
     378      Helper to decorate a returned token with its starting and ending
     379      locations in the input string.
     380  
     381      This helper adds the following results names:
     382  
     383      - ``locn_start`` - location where matched expression begins
     384      - ``locn_end`` - location where matched expression ends
     385      - ``value`` - the actual parsed results
     386  
     387      Be careful if the input text contains ``<TAB>`` characters, you
     388      may want to call :class:`ParserElement.parse_with_tabs`
     389  
     390      Example::
     391  
     392          wd = Word(alphas)
     393          for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
     394              print(match)
     395  
     396      prints::
     397  
     398          [[0, 'ljsdf', 5]]
     399          [[8, 'lksdjjf', 15]]
     400          [[18, 'lkkjj', 23]]
     401      """
     402      locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
     403      return Group(
     404          locator("locn_start")
     405          + expr("value")
     406          + locator.copy().leaveWhitespace()("locn_end")
     407      )
     408  
     409  
     410  def nested_expr(
     411      opener: Union[str, ParserElement] = "(",
     412      closer: Union[str, ParserElement] = ")",
     413      content: typing.Optional[ParserElement] = None,
     414      ignore_expr: ParserElement = quoted_string(),
     415      *,
     416      ignoreExpr: ParserElement = quoted_string(),
     417  ) -> ParserElement:
     418      """Helper method for defining nested lists enclosed in opening and
     419      closing delimiters (``"("`` and ``")"`` are the default).
     420  
     421      Parameters:
     422  
     423      - ``opener`` - opening character for a nested list
     424        (default= ``"("``); can also be a pyparsing expression
     425      - ``closer`` - closing character for a nested list
     426        (default= ``")"``); can also be a pyparsing expression
     427      - ``content`` - expression for items within the nested lists
     428        (default= ``None``)
     429      - ``ignore_expr`` - expression for ignoring opening and closing delimiters
     430        (default= :class:`quoted_string`)
     431      - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
     432        but will be removed in a future release
     433  
     434      If an expression is not provided for the content argument, the
     435      nested expression will capture all whitespace-delimited content
     436      between delimiters as a list of separate values.
     437  
     438      Use the ``ignore_expr`` argument to define expressions that may
     439      contain opening or closing characters that should not be treated as
     440      opening or closing characters for nesting, such as quoted_string or
     441      a comment expression.  Specify multiple expressions using an
     442      :class:`Or` or :class:`MatchFirst`. The default is
     443      :class:`quoted_string`, but if no expressions are to be ignored, then
     444      pass ``None`` for this argument.
     445  
     446      Example::
     447  
     448          data_type = one_of("void int short long char float double")
     449          decl_data_type = Combine(data_type + Opt(Word('*')))
     450          ident = Word(alphas+'_', alphanums+'_')
     451          number = pyparsing_common.number
     452          arg = Group(decl_data_type + ident)
     453          LPAR, RPAR = map(Suppress, "()")
     454  
     455          code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
     456  
     457          c_function = (decl_data_type("type")
     458                        + ident("name")
     459                        + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
     460                        + code_body("body"))
     461          c_function.ignore(c_style_comment)
     462  
     463          source_code = '''
     464              int is_odd(int x) {
     465                  return (x%2);
     466              }
     467  
     468              int dec_to_hex(char hchar) {
     469                  if (hchar >= '0' && hchar <= '9') {
     470                      return (ord(hchar)-ord('0'));
     471                  } else {
     472                      return (10+ord(hchar)-ord('A'));
     473                  }
     474              }
     475          '''
     476          for func in c_function.search_string(source_code):
     477              print("%(name)s (%(type)s) args: %(args)s" % func)
     478  
     479  
     480      prints::
     481  
     482          is_odd (int) args: [['int', 'x']]
     483          dec_to_hex (int) args: [['char', 'hchar']]
     484      """
     485      if ignoreExpr != ignore_expr:
     486          ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
     487      if opener == closer:
     488          raise ValueError("opening and closing strings cannot be the same")
     489      if content is None:
     490          if isinstance(opener, str_type) and isinstance(closer, str_type):
     491              opener = typing.cast(str, opener)
     492              closer = typing.cast(str, closer)
     493              if len(opener) == 1 and len(closer) == 1:
     494                  if ignoreExpr is not None:
     495                      content = Combine(
     496                          OneOrMore(
     497                              ~ignoreExpr
     498                              + CharsNotIn(
     499                                  opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
     500                                  exact=1,
     501                              )
     502                          )
     503                      ).set_parse_action(lambda t: t[0].strip())
     504                  else:
     505                      content = empty.copy() + CharsNotIn(
     506                          opener + closer + ParserElement.DEFAULT_WHITE_CHARS
     507                      ).set_parse_action(lambda t: t[0].strip())
     508              else:
     509                  if ignoreExpr is not None:
     510                      content = Combine(
     511                          OneOrMore(
     512                              ~ignoreExpr
     513                              + ~Literal(opener)
     514                              + ~Literal(closer)
     515                              + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
     516                          )
     517                      ).set_parse_action(lambda t: t[0].strip())
     518                  else:
     519                      content = Combine(
     520                          OneOrMore(
     521                              ~Literal(opener)
     522                              + ~Literal(closer)
     523                              + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
     524                          )
     525                      ).set_parse_action(lambda t: t[0].strip())
     526          else:
     527              raise ValueError(
     528                  "opening and closing arguments must be strings if no content expression is given"
     529              )
     530      ret = Forward()
     531      if ignoreExpr is not None:
     532          ret <<= Group(
     533              Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
     534          )
     535      else:
     536          ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
     537      ret.set_name("nested %s%s expression" % (opener, closer))
     538      return ret
     539  
     540  
     541  def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
     542      """Internal helper to construct opening and closing tag expressions, given a tag name"""
     543      if isinstance(tagStr, str_type):
     544          resname = tagStr
     545          tagStr = Keyword(tagStr, caseless=not xml)
     546      else:
     547          resname = tagStr.name
     548  
     549      tagAttrName = Word(alphas, alphanums + "_-:")
     550      if xml:
     551          tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
     552          openTag = (
     553              suppress_LT
     554              + tagStr("tag")
     555              + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
     556              + Opt("/", default=[False])("empty").set_parse_action(
     557                  lambda s, l, t: t[0] == "/"
     558              )
     559              + suppress_GT
     560          )
     561      else:
     562          tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
     563              printables, exclude_chars=">"
     564          )
     565          openTag = (
     566              suppress_LT
     567              + tagStr("tag")
     568              + Dict(
     569                  ZeroOrMore(
     570                      Group(
     571                          tagAttrName.set_parse_action(lambda t: t[0].lower())
     572                          + Opt(Suppress("=") + tagAttrValue)
     573                      )
     574                  )
     575              )
     576              + Opt("/", default=[False])("empty").set_parse_action(
     577                  lambda s, l, t: t[0] == "/"
     578              )
     579              + suppress_GT
     580          )
     581      closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
     582  
     583      openTag.set_name("<%s>" % resname)
     584      # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
     585      openTag.add_parse_action(
     586          lambda t: t.__setitem__(
     587              "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
     588          )
     589      )
     590      closeTag = closeTag(
     591          "end" + "".join(resname.replace(":", " ").title().split())
     592      ).set_name("</%s>" % resname)
     593      openTag.tag = resname
     594      closeTag.tag = resname
     595      openTag.tag_body = SkipTo(closeTag())
     596      return openTag, closeTag
     597  
     598  
     599  def make_html_tags(
     600      tag_str: Union[str, ParserElement]
     601  ) -> Tuple[ParserElement, ParserElement]:
     602      """Helper to construct opening and closing tag expressions for HTML,
     603      given a tag name. Matches tags in either upper or lower case,
     604      attributes with namespaces and with quoted or unquoted values.
     605  
     606      Example::
     607  
     608          text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
     609          # make_html_tags returns pyparsing expressions for the opening and
     610          # closing tags as a 2-tuple
     611          a, a_end = make_html_tags("A")
     612          link_expr = a + SkipTo(a_end)("link_text") + a_end
     613  
     614          for link in link_expr.search_string(text):
     615              # attributes in the <A> tag (like "href" shown here) are
     616              # also accessible as named results
     617              print(link.link_text, '->', link.href)
     618  
     619      prints::
     620  
     621          pyparsing -> https://github.com/pyparsing/pyparsing/wiki
     622      """
     623      return _makeTags(tag_str, False)
     624  
     625  
     626  def make_xml_tags(
     627      tag_str: Union[str, ParserElement]
     628  ) -> Tuple[ParserElement, ParserElement]:
     629      """Helper to construct opening and closing tag expressions for XML,
     630      given a tag name. Matches tags only in the given upper/lower case.
     631  
     632      Example: similar to :class:`make_html_tags`
     633      """
     634      return _makeTags(tag_str, True)
     635  
     636  
     637  any_open_tag: ParserElement
     638  any_close_tag: ParserElement
     639  any_open_tag, any_close_tag = make_html_tags(
     640      Word(alphas, alphanums + "_:").set_name("any tag")
     641  )
     642  
     643  _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
     644  common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
     645      "common HTML entity"
     646  )
     647  
     648  
     649  def replace_html_entity(s, l, t):
     650      """Helper parser action to replace common HTML entities with their special characters"""
     651      return _htmlEntityMap.get(t.entity)
     652  
     653  
     654  class ESC[4;38;5;81mOpAssoc(ESC[4;38;5;149mEnum):
     655      """Enumeration of operator associativity
     656      - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
     657  
     658      LEFT = 1
     659      RIGHT = 2
     660  
     661  
     662  InfixNotationOperatorArgType = Union[
     663      ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
     664  ]
     665  InfixNotationOperatorSpec = Union[
     666      Tuple[
     667          InfixNotationOperatorArgType,
     668          int,
     669          OpAssoc,
     670          typing.Optional[ParseAction],
     671      ],
     672      Tuple[
     673          InfixNotationOperatorArgType,
     674          int,
     675          OpAssoc,
     676      ],
     677  ]
     678  
     679  
     680  def infix_notation(
     681      base_expr: ParserElement,
     682      op_list: List[InfixNotationOperatorSpec],
     683      lpar: Union[str, ParserElement] = Suppress("("),
     684      rpar: Union[str, ParserElement] = Suppress(")"),
     685  ) -> ParserElement:
     686      """Helper method for constructing grammars of expressions made up of
     687      operators working in a precedence hierarchy.  Operators may be unary
     688      or binary, left- or right-associative.  Parse actions can also be
     689      attached to operator expressions. The generated parser will also
     690      recognize the use of parentheses to override operator precedences
     691      (see example below).
     692  
     693      Note: if you define a deep operator list, you may see performance
     694      issues when using infix_notation. See
     695      :class:`ParserElement.enable_packrat` for a mechanism to potentially
     696      improve your parser performance.
     697  
     698      Parameters:
     699  
     700      - ``base_expr`` - expression representing the most basic operand to
     701        be used in the expression
     702      - ``op_list`` - list of tuples, one for each operator precedence level
     703        in the expression grammar; each tuple is of the form ``(op_expr,
     704        num_operands, right_left_assoc, (optional)parse_action)``, where:
     705  
     706        - ``op_expr`` is the pyparsing expression for the operator; may also
     707          be a string, which will be converted to a Literal; if ``num_operands``
     708          is 3, ``op_expr`` is a tuple of two expressions, for the two
     709          operators separating the 3 terms
     710        - ``num_operands`` is the number of terms for this operator (must be 1,
     711          2, or 3)
     712        - ``right_left_assoc`` is the indicator whether the operator is right
     713          or left associative, using the pyparsing-defined constants
     714          ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
     715        - ``parse_action`` is the parse action to be associated with
     716          expressions matching this operator expression (the parse action
     717          tuple member may be omitted); if the parse action is passed
     718          a tuple or list of functions, this is equivalent to calling
     719          ``set_parse_action(*fn)``
     720          (:class:`ParserElement.set_parse_action`)
     721      - ``lpar`` - expression for matching left-parentheses; if passed as a
     722        str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
     723        an expression (such as ``Literal('(')``), then it will be kept in
     724        the parsed results, and grouped with them. (default= ``Suppress('(')``)
     725      - ``rpar`` - expression for matching right-parentheses; if passed as a
     726        str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
     727        an expression (such as ``Literal(')')``), then it will be kept in
     728        the parsed results, and grouped with them. (default= ``Suppress(')')``)
     729  
     730      Example::
     731  
     732          # simple example of four-function arithmetic with ints and
     733          # variable names
     734          integer = pyparsing_common.signed_integer
     735          varname = pyparsing_common.identifier
     736  
     737          arith_expr = infix_notation(integer | varname,
     738              [
     739              ('-', 1, OpAssoc.RIGHT),
     740              (one_of('* /'), 2, OpAssoc.LEFT),
     741              (one_of('+ -'), 2, OpAssoc.LEFT),
     742              ])
     743  
     744          arith_expr.run_tests('''
     745              5+3*6
     746              (5+3)*6
     747              -2--11
     748              ''', full_dump=False)
     749  
     750      prints::
     751  
     752          5+3*6
     753          [[5, '+', [3, '*', 6]]]
     754  
     755          (5+3)*6
     756          [[[5, '+', 3], '*', 6]]
     757  
     758          (5+x)*y
     759          [[[5, '+', 'x'], '*', 'y']]
     760  
     761          -2--11
     762          [[['-', 2], '-', ['-', 11]]]
     763      """
     764  
     765      # captive version of FollowedBy that does not do parse actions or capture results names
     766      class ESC[4;38;5;81m_FB(ESC[4;38;5;149mFollowedBy):
     767          def parseImpl(self, instring, loc, doActions=True):
     768              self.expr.try_parse(instring, loc)
     769              return loc, []
     770  
     771      _FB.__name__ = "FollowedBy>"
     772  
     773      ret = Forward()
     774      if isinstance(lpar, str):
     775          lpar = Suppress(lpar)
     776      if isinstance(rpar, str):
     777          rpar = Suppress(rpar)
     778  
     779      # if lpar and rpar are not suppressed, wrap in group
     780      if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
     781          lastExpr = base_expr | Group(lpar + ret + rpar)
     782      else:
     783          lastExpr = base_expr | (lpar + ret + rpar)
     784  
     785      arity: int
     786      rightLeftAssoc: opAssoc
     787      pa: typing.Optional[ParseAction]
     788      opExpr1: ParserElement
     789      opExpr2: ParserElement
     790      for i, operDef in enumerate(op_list):
     791          opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]  # type: ignore[assignment]
     792          if isinstance(opExpr, str_type):
     793              opExpr = ParserElement._literalStringClass(opExpr)
     794          opExpr = typing.cast(ParserElement, opExpr)
     795          if arity == 3:
     796              if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
     797                  raise ValueError(
     798                      "if numterms=3, opExpr must be a tuple or list of two expressions"
     799                  )
     800              opExpr1, opExpr2 = opExpr
     801              term_name = f"{opExpr1}{opExpr2} term"
     802          else:
     803              term_name = f"{opExpr} term"
     804  
     805          if not 1 <= arity <= 3:
     806              raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
     807  
     808          if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
     809              raise ValueError("operator must indicate right or left associativity")
     810  
     811          thisExpr: ParserElement = Forward().set_name(term_name)
     812          thisExpr = typing.cast(Forward, thisExpr)
     813          if rightLeftAssoc is OpAssoc.LEFT:
     814              if arity == 1:
     815                  matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
     816              elif arity == 2:
     817                  if opExpr is not None:
     818                      matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
     819                          lastExpr + (opExpr + lastExpr)[1, ...]
     820                      )
     821                  else:
     822                      matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
     823              elif arity == 3:
     824                  matchExpr = _FB(
     825                      lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
     826                  ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
     827          elif rightLeftAssoc is OpAssoc.RIGHT:
     828              if arity == 1:
     829                  # try to avoid LR with this extra test
     830                  if not isinstance(opExpr, Opt):
     831                      opExpr = Opt(opExpr)
     832                  matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
     833              elif arity == 2:
     834                  if opExpr is not None:
     835                      matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
     836                          lastExpr + (opExpr + thisExpr)[1, ...]
     837                      )
     838                  else:
     839                      matchExpr = _FB(lastExpr + thisExpr) + Group(
     840                          lastExpr + thisExpr[1, ...]
     841                      )
     842              elif arity == 3:
     843                  matchExpr = _FB(
     844                      lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
     845                  ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
     846          if pa:
     847              if isinstance(pa, (tuple, list)):
     848                  matchExpr.set_parse_action(*pa)
     849              else:
     850                  matchExpr.set_parse_action(pa)
     851          thisExpr <<= (matchExpr | lastExpr).setName(term_name)
     852          lastExpr = thisExpr
     853      ret <<= lastExpr
     854      return ret
     855  
     856  
     857  def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
     858      """
     859      (DEPRECATED - use :class:`IndentedBlock` class instead)
     860      Helper method for defining space-delimited indentation blocks,
     861      such as those used to define block statements in Python source code.
     862  
     863      Parameters:
     864  
     865      - ``blockStatementExpr`` - expression defining syntax of statement that
     866        is repeated within the indented block
     867      - ``indentStack`` - list created by caller to manage indentation stack
     868        (multiple ``statementWithIndentedBlock`` expressions within a single
     869        grammar should share a common ``indentStack``)
     870      - ``indent`` - boolean indicating whether block must be indented beyond
     871        the current level; set to ``False`` for block of left-most statements
     872        (default= ``True``)
     873  
     874      A valid block must contain at least one ``blockStatement``.
     875  
     876      (Note that indentedBlock uses internal parse actions which make it
     877      incompatible with packrat parsing.)
     878  
     879      Example::
     880  
     881          data = '''
     882          def A(z):
     883            A1
     884            B = 100
     885            G = A2
     886            A2
     887            A3
     888          B
     889          def BB(a,b,c):
     890            BB1
     891            def BBA():
     892              bba1
     893              bba2
     894              bba3
     895          C
     896          D
     897          def spam(x,y):
     898               def eggs(z):
     899                   pass
     900          '''
     901  
     902  
     903          indentStack = [1]
     904          stmt = Forward()
     905  
     906          identifier = Word(alphas, alphanums)
     907          funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
     908          func_body = indentedBlock(stmt, indentStack)
     909          funcDef = Group(funcDecl + func_body)
     910  
     911          rvalue = Forward()
     912          funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
     913          rvalue << (funcCall | identifier | Word(nums))
     914          assignment = Group(identifier + "=" + rvalue)
     915          stmt << (funcDef | assignment | identifier)
     916  
     917          module_body = stmt[1, ...]
     918  
     919          parseTree = module_body.parseString(data)
     920          parseTree.pprint()
     921  
     922      prints::
     923  
     924          [['def',
     925            'A',
     926            ['(', 'z', ')'],
     927            ':',
     928            [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
     929           'B',
     930           ['def',
     931            'BB',
     932            ['(', 'a', 'b', 'c', ')'],
     933            ':',
     934            [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
     935           'C',
     936           'D',
     937           ['def',
     938            'spam',
     939            ['(', 'x', 'y', ')'],
     940            ':',
     941            [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
     942      """
     943      backup_stacks.append(indentStack[:])
     944  
     945      def reset_stack():
     946          indentStack[:] = backup_stacks[-1]
     947  
     948      def checkPeerIndent(s, l, t):
     949          if l >= len(s):
     950              return
     951          curCol = col(l, s)
     952          if curCol != indentStack[-1]:
     953              if curCol > indentStack[-1]:
     954                  raise ParseException(s, l, "illegal nesting")
     955              raise ParseException(s, l, "not a peer entry")
     956  
     957      def checkSubIndent(s, l, t):
     958          curCol = col(l, s)
     959          if curCol > indentStack[-1]:
     960              indentStack.append(curCol)
     961          else:
     962              raise ParseException(s, l, "not a subentry")
     963  
     964      def checkUnindent(s, l, t):
     965          if l >= len(s):
     966              return
     967          curCol = col(l, s)
     968          if not (indentStack and curCol in indentStack):
     969              raise ParseException(s, l, "not an unindent")
     970          if curCol < indentStack[-1]:
     971              indentStack.pop()
     972  
     973      NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
     974      INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
     975      PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
     976      UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
     977      if indent:
     978          smExpr = Group(
     979              Opt(NL)
     980              + INDENT
     981              + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
     982              + UNDENT
     983          )
     984      else:
     985          smExpr = Group(
     986              Opt(NL)
     987              + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
     988              + Opt(UNDENT)
     989          )
     990  
     991      # add a parse action to remove backup_stack from list of backups
     992      smExpr.add_parse_action(
     993          lambda: backup_stacks.pop(-1) and None if backup_stacks else None
     994      )
     995      smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
     996      blockStatementExpr.ignore(_bslash + LineEnd())
     997      return smExpr.set_name("indented block")
     998  
     999  
    1000  # it's easy to get these comment structures wrong - they're very common, so may as well make them available
    1001  c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
    1002      "C style comment"
    1003  )
    1004  "Comment of the form ``/* ... */``"
    1005  
    1006  html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
    1007  "Comment of the form ``<!-- ... -->``"
    1008  
    1009  rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
    1010  dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
    1011  "Comment of the form ``// ... (to end of line)``"
    1012  
    1013  cpp_style_comment = Combine(
    1014      Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
    1015  ).set_name("C++ style comment")
    1016  "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
    1017  
    1018  java_style_comment = cpp_style_comment
    1019  "Same as :class:`cpp_style_comment`"
    1020  
    1021  python_style_comment = Regex(r"#.*").set_name("Python style comment")
    1022  "Comment of the form ``# ... (to end of line)``"
    1023  
    1024  
    1025  # build list of built-in expressions, for future reference if a global default value
    1026  # gets updated
    1027  _builtin_exprs: List[ParserElement] = [
    1028      v for v in vars().values() if isinstance(v, ParserElement)
    1029  ]
    1030  
    1031  
    1032  # compatibility function, superseded by DelimitedList class
    1033  def delimited_list(
    1034      expr: Union[str, ParserElement],
    1035      delim: Union[str, ParserElement] = ",",
    1036      combine: bool = False,
    1037      min: typing.Optional[int] = None,
    1038      max: typing.Optional[int] = None,
    1039      *,
    1040      allow_trailing_delim: bool = False,
    1041  ) -> ParserElement:
    1042      """(DEPRECATED - use :class:`DelimitedList` class)"""
    1043      return DelimitedList(
    1044          expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
    1045      )
    1046  
    1047  
    1048  # pre-PEP8 compatible names
    1049  # fmt: off
    1050  opAssoc = OpAssoc
    1051  anyOpenTag = any_open_tag
    1052  anyCloseTag = any_close_tag
    1053  commonHTMLEntity = common_html_entity
    1054  cStyleComment = c_style_comment
    1055  htmlComment = html_comment
    1056  restOfLine = rest_of_line
    1057  dblSlashComment = dbl_slash_comment
    1058  cppStyleComment = cpp_style_comment
    1059  javaStyleComment = java_style_comment
    1060  pythonStyleComment = python_style_comment
    1061  
    1062  @replaced_by_pep8(DelimitedList)
    1063  def delimitedList(): ...
    1064  
    1065  @replaced_by_pep8(DelimitedList)
    1066  def delimited_list(): ...
    1067  
    1068  @replaced_by_pep8(counted_array)
    1069  def countedArray(): ...
    1070  
    1071  @replaced_by_pep8(match_previous_literal)
    1072  def matchPreviousLiteral(): ...
    1073  
    1074  @replaced_by_pep8(match_previous_expr)
    1075  def matchPreviousExpr(): ...
    1076  
    1077  @replaced_by_pep8(one_of)
    1078  def oneOf(): ...
    1079  
    1080  @replaced_by_pep8(dict_of)
    1081  def dictOf(): ...
    1082  
    1083  @replaced_by_pep8(original_text_for)
    1084  def originalTextFor(): ...
    1085  
    1086  @replaced_by_pep8(nested_expr)
    1087  def nestedExpr(): ...
    1088  
    1089  @replaced_by_pep8(make_html_tags)
    1090  def makeHTMLTags(): ...
    1091  
    1092  @replaced_by_pep8(make_xml_tags)
    1093  def makeXMLTags(): ...
    1094  
    1095  @replaced_by_pep8(replace_html_entity)
    1096  def replaceHTMLEntity(): ...
    1097  
    1098  @replaced_by_pep8(infix_notation)
    1099  def infixNotation(): ...
    1100  # fmt: on