1 #!/usr/bin/env python3.8
2
3 import argparse
4 import os
5 import glob
6 import tarfile
7 import zipfile
8 import shutil
9 import pathlib
10 import sys
11
12 from typing import Generator
13
14 sys.path.insert(0, ".")
15
16 from scripts import test_parse_directory
17
18 HERE = pathlib.Path(__file__).resolve().parent
19
20 argparser = argparse.ArgumentParser(
21 prog="test_pypi_packages",
22 description="Helper program to test parsing PyPI packages",
23 )
24 argparser.add_argument(
25 "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
26 )
27
28
29 def get_packages() -> Generator[str, None, None]:
30 all_packages = (
31 glob.glob("./data/pypi/*.tar.gz")
32 + glob.glob("./data/pypi/*.zip")
33 + glob.glob("./data/pypi/*.tgz")
34 )
35 for package in all_packages:
36 yield package
37
38
39 def extract_files(filename: str) -> None:
40 savedir = os.path.join("data", "pypi")
41 if tarfile.is_tarfile(filename):
42 tarfile.open(filename).extractall(savedir)
43 elif zipfile.is_zipfile(filename):
44 zipfile.ZipFile(filename).extractall(savedir)
45 else:
46 raise ValueError(f"Could not identify type of compressed file {filename}")
47
48
49 def find_dirname(package_name: str) -> str:
50 for name in os.listdir(os.path.join("data", "pypi")):
51 full_path = os.path.join("data", "pypi", name)
52 if os.path.isdir(full_path) and name in package_name:
53 return full_path
54 assert False # This is to fix mypy, should never be reached
55
56
57 def run_tests(dirname: str, tree: int) -> int:
58 return test_parse_directory.parse_directory(
59 dirname,
60 verbose=False,
61 excluded_files=[],
62 tree_arg=tree,
63 short=True,
64 mode=1 if tree else 0,
65 parser="pegen",
66 )
67
68
69 def main() -> None:
70 args = argparser.parse_args()
71 tree = args.tree
72
73 for package in get_packages():
74 print(f"Extracting files from {package}... ", end="")
75 try:
76 extract_files(package)
77 print("Done")
78 except ValueError as e:
79 print(e)
80 continue
81
82 print(f"Trying to parse all python files ... ")
83 dirname = find_dirname(package)
84 status = run_tests(dirname, tree)
85 if status == 0:
86 shutil.rmtree(dirname)
87 else:
88 print(f"Failed to parse {dirname}")
89
90
91 if __name__ == "__main__":
92 main()