python (3.11.7)

(root)/
lib/
python3.11/
site-packages/
pip/
_vendor/
pyparsing/
common.py
       1  # common.py
       2  from .core import *
       3  from .helpers import DelimitedList, any_open_tag, any_close_tag
       4  from datetime import datetime
       5  
       6  
       7  # some other useful expressions - using lower-case class name since we are really using this as a namespace
       8  class ESC[4;38;5;81mpyparsing_common:
       9      """Here are some common low-level expressions that may be useful in
      10      jump-starting parser development:
      11  
      12      - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
      13        :class:`scientific notation<sci_real>`)
      14      - common :class:`programming identifiers<identifier>`
      15      - network addresses (:class:`MAC<mac_address>`,
      16        :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
      17      - ISO8601 :class:`dates<iso8601_date>` and
      18        :class:`datetime<iso8601_datetime>`
      19      - :class:`UUID<uuid>`
      20      - :class:`comma-separated list<comma_separated_list>`
      21      - :class:`url`
      22  
      23      Parse actions:
      24  
      25      - :class:`convert_to_integer`
      26      - :class:`convert_to_float`
      27      - :class:`convert_to_date`
      28      - :class:`convert_to_datetime`
      29      - :class:`strip_html_tags`
      30      - :class:`upcase_tokens`
      31      - :class:`downcase_tokens`
      32  
      33      Example::
      34  
      35          pyparsing_common.number.run_tests('''
      36              # any int or real number, returned as the appropriate type
      37              100
      38              -100
      39              +100
      40              3.14159
      41              6.02e23
      42              1e-12
      43              ''')
      44  
      45          pyparsing_common.fnumber.run_tests('''
      46              # any int or real number, returned as float
      47              100
      48              -100
      49              +100
      50              3.14159
      51              6.02e23
      52              1e-12
      53              ''')
      54  
      55          pyparsing_common.hex_integer.run_tests('''
      56              # hex numbers
      57              100
      58              FF
      59              ''')
      60  
      61          pyparsing_common.fraction.run_tests('''
      62              # fractions
      63              1/2
      64              -3/4
      65              ''')
      66  
      67          pyparsing_common.mixed_integer.run_tests('''
      68              # mixed fractions
      69              1
      70              1/2
      71              -3/4
      72              1-3/4
      73              ''')
      74  
      75          import uuid
      76          pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
      77          pyparsing_common.uuid.run_tests('''
      78              # uuid
      79              12345678-1234-5678-1234-567812345678
      80              ''')
      81  
      82      prints::
      83  
      84          # any int or real number, returned as the appropriate type
      85          100
      86          [100]
      87  
      88          -100
      89          [-100]
      90  
      91          +100
      92          [100]
      93  
      94          3.14159
      95          [3.14159]
      96  
      97          6.02e23
      98          [6.02e+23]
      99  
     100          1e-12
     101          [1e-12]
     102  
     103          # any int or real number, returned as float
     104          100
     105          [100.0]
     106  
     107          -100
     108          [-100.0]
     109  
     110          +100
     111          [100.0]
     112  
     113          3.14159
     114          [3.14159]
     115  
     116          6.02e23
     117          [6.02e+23]
     118  
     119          1e-12
     120          [1e-12]
     121  
     122          # hex numbers
     123          100
     124          [256]
     125  
     126          FF
     127          [255]
     128  
     129          # fractions
     130          1/2
     131          [0.5]
     132  
     133          -3/4
     134          [-0.75]
     135  
     136          # mixed fractions
     137          1
     138          [1]
     139  
     140          1/2
     141          [0.5]
     142  
     143          -3/4
     144          [-0.75]
     145  
     146          1-3/4
     147          [1.75]
     148  
     149          # uuid
     150          12345678-1234-5678-1234-567812345678
     151          [UUID('12345678-1234-5678-1234-567812345678')]
     152      """
     153  
     154      convert_to_integer = token_map(int)
     155      """
     156      Parse action for converting parsed integers to Python int
     157      """
     158  
     159      convert_to_float = token_map(float)
     160      """
     161      Parse action for converting parsed numbers to Python float
     162      """
     163  
     164      integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
     165      """expression that parses an unsigned integer, returns an int"""
     166  
     167      hex_integer = (
     168          Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
     169      )
     170      """expression that parses a hexadecimal integer, returns an int"""
     171  
     172      signed_integer = (
     173          Regex(r"[+-]?\d+")
     174          .set_name("signed integer")
     175          .set_parse_action(convert_to_integer)
     176      )
     177      """expression that parses an integer with optional leading sign, returns an int"""
     178  
     179      fraction = (
     180          signed_integer().set_parse_action(convert_to_float)
     181          + "/"
     182          + signed_integer().set_parse_action(convert_to_float)
     183      ).set_name("fraction")
     184      """fractional expression of an integer divided by an integer, returns a float"""
     185      fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
     186  
     187      mixed_integer = (
     188          fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
     189      ).set_name("fraction or mixed integer-fraction")
     190      """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
     191      mixed_integer.add_parse_action(sum)
     192  
     193      real = (
     194          Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
     195          .set_name("real number")
     196          .set_parse_action(convert_to_float)
     197      )
     198      """expression that parses a floating point number and returns a float"""
     199  
     200      sci_real = (
     201          Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
     202          .set_name("real number with scientific notation")
     203          .set_parse_action(convert_to_float)
     204      )
     205      """expression that parses a floating point number with optional
     206      scientific notation and returns a float"""
     207  
     208      # streamlining this expression makes the docs nicer-looking
     209      number = (sci_real | real | signed_integer).setName("number").streamline()
     210      """any numeric expression, returns the corresponding Python type"""
     211  
     212      fnumber = (
     213          Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
     214          .set_name("fnumber")
     215          .set_parse_action(convert_to_float)
     216      )
     217      """any int or real number, returned as float"""
     218  
     219      identifier = Word(identchars, identbodychars).set_name("identifier")
     220      """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
     221  
     222      ipv4_address = Regex(
     223          r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
     224      ).set_name("IPv4 address")
     225      "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
     226  
     227      _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
     228      _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
     229          "full IPv6 address"
     230      )
     231      _short_ipv6_address = (
     232          Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
     233          + "::"
     234          + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
     235      ).set_name("short IPv6 address")
     236      _short_ipv6_address.add_condition(
     237          lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
     238      )
     239      _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
     240      ipv6_address = Combine(
     241          (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
     242              "IPv6 address"
     243          )
     244      ).set_name("IPv6 address")
     245      "IPv6 address (long, short, or mixed form)"
     246  
     247      mac_address = Regex(
     248          r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
     249      ).set_name("MAC address")
     250      "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
     251  
     252      @staticmethod
     253      def convert_to_date(fmt: str = "%Y-%m-%d"):
     254          """
     255          Helper to create a parse action for converting parsed date string to Python datetime.date
     256  
     257          Params -
     258          - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
     259  
     260          Example::
     261  
     262              date_expr = pyparsing_common.iso8601_date.copy()
     263              date_expr.set_parse_action(pyparsing_common.convert_to_date())
     264              print(date_expr.parse_string("1999-12-31"))
     265  
     266          prints::
     267  
     268              [datetime.date(1999, 12, 31)]
     269          """
     270  
     271          def cvt_fn(ss, ll, tt):
     272              try:
     273                  return datetime.strptime(tt[0], fmt).date()
     274              except ValueError as ve:
     275                  raise ParseException(ss, ll, str(ve))
     276  
     277          return cvt_fn
     278  
     279      @staticmethod
     280      def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
     281          """Helper to create a parse action for converting parsed
     282          datetime string to Python datetime.datetime
     283  
     284          Params -
     285          - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
     286  
     287          Example::
     288  
     289              dt_expr = pyparsing_common.iso8601_datetime.copy()
     290              dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
     291              print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
     292  
     293          prints::
     294  
     295              [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
     296          """
     297  
     298          def cvt_fn(s, l, t):
     299              try:
     300                  return datetime.strptime(t[0], fmt)
     301              except ValueError as ve:
     302                  raise ParseException(s, l, str(ve))
     303  
     304          return cvt_fn
     305  
     306      iso8601_date = Regex(
     307          r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
     308      ).set_name("ISO8601 date")
     309      "ISO8601 date (``yyyy-mm-dd``)"
     310  
     311      iso8601_datetime = Regex(
     312          r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
     313      ).set_name("ISO8601 datetime")
     314      "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
     315  
     316      uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
     317      "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
     318  
     319      _html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
     320  
     321      @staticmethod
     322      def strip_html_tags(s: str, l: int, tokens: ParseResults):
     323          """Parse action to remove HTML tags from web page HTML source
     324  
     325          Example::
     326  
     327              # strip HTML links from normal text
     328              text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
     329              td, td_end = make_html_tags("TD")
     330              table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end
     331              print(table_text.parse_string(text).body)
     332  
     333          Prints::
     334  
     335              More info at the pyparsing wiki page
     336          """
     337          return pyparsing_common._html_stripper.transform_string(tokens[0])
     338  
     339      _commasepitem = (
     340          Combine(
     341              OneOrMore(
     342                  ~Literal(",")
     343                  + ~LineEnd()
     344                  + Word(printables, exclude_chars=",")
     345                  + Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
     346              )
     347          )
     348          .streamline()
     349          .set_name("commaItem")
     350      )
     351      comma_separated_list = DelimitedList(
     352          Opt(quoted_string.copy() | _commasepitem, default="")
     353      ).set_name("comma separated list")
     354      """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
     355  
     356      upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
     357      """Parse action to convert tokens to upper case."""
     358  
     359      downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
     360      """Parse action to convert tokens to lower case."""
     361  
     362      # fmt: off
     363      url = Regex(
     364          # https://mathiasbynens.be/demo/url-regex
     365          # https://gist.github.com/dperini/729294
     366          r"(?P<url>" +
     367          # protocol identifier (optional)
     368          # short syntax // still required
     369          r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
     370          # user:pass BasicAuth (optional)
     371          r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
     372          r"(?P<host>" +
     373          # IP address exclusion
     374          # private & local networks
     375          r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
     376          r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
     377          r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
     378          # IP address dotted notation octets
     379          # excludes loopback network 0.0.0.0
     380          # excludes reserved space >= 224.0.0.0
     381          # excludes network & broadcast addresses
     382          # (first & last IP address of each class)
     383          r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
     384          r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
     385          r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
     386          r"|" +
     387          # host & domain names, may end with dot
     388          # can be replaced by a shortest alternative
     389          # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
     390          r"(?:" +
     391          r"(?:" +
     392          r"[a-z0-9\u00a1-\uffff]" +
     393          r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
     394          r")?" +
     395          r"[a-z0-9\u00a1-\uffff]\." +
     396          r")+" +
     397          # TLD identifier name, may end with dot
     398          r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
     399          r")" +
     400          # port number (optional)
     401          r"(:(?P<port>\d{2,5}))?" +
     402          # resource path (optional)
     403          r"(?P<path>\/[^?# ]*)?" +
     404          # query string (optional)
     405          r"(\?(?P<query>[^#]*))?" +
     406          # fragment (optional)
     407          r"(#(?P<fragment>\S*))?" +
     408          r")"
     409      ).set_name("url")
     410      """URL (http/https/ftp scheme)"""
     411      # fmt: on
     412  
     413      # pre-PEP8 compatibility names
     414      convertToInteger = convert_to_integer
     415      """Deprecated - use :class:`convert_to_integer`"""
     416      convertToFloat = convert_to_float
     417      """Deprecated - use :class:`convert_to_float`"""
     418      convertToDate = convert_to_date
     419      """Deprecated - use :class:`convert_to_date`"""
     420      convertToDatetime = convert_to_datetime
     421      """Deprecated - use :class:`convert_to_datetime`"""
     422      stripHTMLTags = strip_html_tags
     423      """Deprecated - use :class:`strip_html_tags`"""
     424      upcaseTokens = upcase_tokens
     425      """Deprecated - use :class:`upcase_tokens`"""
     426      downcaseTokens = downcase_tokens
     427      """Deprecated - use :class:`downcase_tokens`"""
     428  
     429  
     430  _builtin_exprs = [
     431      v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
     432  ]