1  
       2  KINDS = [
       3      'section-major',
       4      'section-minor',
       5      'section-group',
       6      'row',
       7  ]
       8  
       9  
      10  def iter_clean_lines(lines):
      11      lines = iter(lines)
      12      for rawline in lines:
      13          line = rawline.strip()
      14          if line.startswith('#') and not rawline.startswith('##'):
      15              continue
      16          yield line, rawline
      17  
      18  
      19  def parse_table_lines(lines):
      20      lines = iter_clean_lines(lines)
      21  
      22      group = None
      23      prev = ''
      24      for line, rawline in lines:
      25          if line.startswith('## '):
      26              assert not rawline.startswith(' '), (line, rawline)
      27              if group:
      28                  assert prev, (line, rawline)
      29                  kind, after, _ = group
      30                  assert kind and kind != 'section-group', (group, line, rawline)
      31                  assert after is not None, (group, line, rawline)
      32              else:
      33                  assert not prev, (prev, line, rawline)
      34                  kind, after = group = ('section-group', None)
      35              title = line[3:].lstrip()
      36              assert title, (line, rawline)
      37              if after is not None:
      38                  try:
      39                      line, rawline = next(lines)
      40                  except StopIteration:
      41                      line = None
      42                  if line != after:
      43                      raise NotImplementedError((group, line, rawline))
      44              yield kind, title
      45              group = None
      46          elif group:
      47              raise NotImplementedError((group, line, rawline))
      48          elif line.startswith('##---'):
      49              assert line.rstrip('-') == '##', (line, rawline)
      50              group = ('section-minor', '', line)
      51          elif line.startswith('#####'):
      52              assert not line.strip('#'), (line, rawline)
      53              group = ('section-major', '', line)
      54          elif line:
      55              yield 'row', line
      56          prev = line
      57  
      58  
      59  def iter_sections(lines):
      60      header = None
      61      section = []
      62      for kind, value in parse_table_lines(lines):
      63          if kind == 'row':
      64              if not section:
      65                  if header is None:
      66                      header = value
      67                      continue
      68                  raise NotImplementedError(repr(value))
      69              yield tuple(section), value
      70          else:
      71              if header is None:
      72                  header = False
      73              start = KINDS.index(kind)
      74              section[start:] = [value]
      75  
      76  
      77  def collect_sections(lines):
      78      sections = {}
      79      for section, row in iter_sections(lines):
      80          if section not in sections:
      81              sections[section] = [row]
      82          else:
      83              sections[section].append(row)
      84      return sections
      85  
      86  
      87  def collate_sections(lines):
      88      collated = {}
      89      for section, rows in collect_sections(lines).items():
      90          parent = collated
      91          current = ()
      92          for name in section:
      93              current += (name,)
      94              try:
      95                  child, secrows, totalrows = parent[name]
      96              except KeyError:
      97                  child = {}
      98                  secrows = []
      99                  totalrows = []
     100                  parent[name] = (child, secrows, totalrows)
     101              parent = child
     102              if current == section:
     103                  secrows.extend(rows)
     104              totalrows.extend(rows)
     105      return collated
     106  
     107  
     108  #############################
     109  # the commands
     110  
     111  def cmd_count_by_section(lines):
     112      div = ' ' + '-' * 50
     113      total = 0
     114      def render_tree(root, depth=0):
     115          nonlocal total
     116          indent = '    ' * depth
     117          for name, data in root.items():
     118              subroot, rows, totalrows = data
     119              sectotal = f'({len(totalrows)})' if totalrows != rows else ''
     120              count = len(rows) if rows else ''
     121              if depth == 0:
     122                  yield div
     123              yield f'{sectotal:>7} {count:>4}  {indent}{name}'
     124              yield from render_tree(subroot, depth+1)
     125              total += len(rows)
     126      sections = collate_sections(lines)
     127      yield from render_tree(sections)
     128      yield div
     129      yield f'(total: {total})'
     130  
     131  
     132  #############################
     133  # the script
     134  
     135  def parse_args(argv=None, prog=None):
     136      import argparse
     137      parser = argparse.ArgumentParser(prog=prog)
     138      parser.add_argument('filename')
     139  
     140      args = parser.parse_args(argv)
     141      ns = vars(args)
     142  
     143      return ns
     144  
     145  
     146  def main(filename):
     147      with open(filename) as infile:
     148          for line in cmd_count_by_section(infile):
     149              print(line)
     150  
     151  
     152  if __name__ == '__main__':
     153      kwargs = parse_args()
     154      main(**kwargs)