pybuild-parser 0.0.1

Python file parser
Documentation
use std::collections::HashSet;

use fancy_regex::Regex;
use lazy_static::lazy_static;

lazy_static! {
    // https://docs.python.org/3/py-modindex.html
    static ref STD_LIBS: HashSet<&'static str> = {
        HashSet::from([
            "__future__",
            "__main__",
            "_thread",
            "abc",
            "aifc",
            "argparse",
            "array",
            "ast",
            "asynchat",
            "asyncio",
            "asyncore",
            "atexit",
            "audioop",
            "base64",
            "bdb",
            "binascii",
            "binhex",
            "bisect",
            "builtins",
            "bz2",
            "cProfile",
            "calendar",
            "cgi",
            "cgitb",
            "chunk",
            "cmath",
            "cmd",
            "code",
            "codecs",
            "codeop",
            "collections",
            "colorsys",
            "compileall",
            "concurrent",
            "configparser",
            "contextlib",
            "contextvars",
            "copy",
            "copyreg",
            "crypt",
            "csv",
            "ctypes",
            "curses",
            "dataclasses",
            "datetime",
            "dbm",
            "decimal",
            "difflib",
            "dis",
            "distutils",
            "doctest",
            "email",
            "encodings",
            "ensurepip",
            "enum",
            "errno",
            "faulthandler",
            "fcntl",
            "filecmp",
            "fileinput",
            "fnmatch",
            "fractions",
            "ftplib",
            "functools",
            "gc",
            "getopt",
            "getpass",
            "gettext",
            "glob",
            "graphlib",
            "grp",
            "gzip",
            "hashlib",
            "heapq",
            "hmac",
            "html",
            "http",
            "imaplib",
            "imghdr",
            "imp",
            "importlib",
            "inspect",
            "io",
            "ipaddress",
            "itertools",
            "json",
            "keyword",
            "lib2to3",
            "linecache",
            "locale",
            "logging",
            "lzma",
            "mailbox",
            "mailcap",
            "marshal",
            "math",
            "mimetypes",
            "mmap",
            "modulefinder",
            "msilib",
            "msvcrt",
            "multiprocessing",
            "netrc",
            "nis",
            "nntplib",
            "numbers",
            "operator",
            "optparse",
            "os",
            "ossaudiodev",
            "pathlib",
            "pdb",
            "pickle",
            "pickletools",
            "pipes",
            "pkgutil",
            "platform",
            "plistlib",
            "poplib",
            "posix",
            "pprint",
            "profile",
            "pstats",
            "pty",
            "pwd",
            "py_compile",
            "pyclbr",
            "pydoc",
            "queue",
            "quopri",
            "random",
            "re",
            "readline",
            "reprlib",
            "resource",
            "rlcompleter",
            "runpy",
            "sched",
            "secrets",
            "select",
            "selectors",
            "shelve",
            "shlex",
            "shutil",
            "signal",
            "site",
            "smtpd",
            "smtplib",
            "sndhdr",
            "socket",
            "socketserver",
            "spwd",
            "sqlite3",
            "ssl",
            "stat",
            "statistics",
            "string",
            "stringprep",
            "struct",
            "subprocess",
            "sunau",
            "symtable",
            "sys",
            "sysconfig",
            "syslog",
            "tabnanny",
            "tarfile",
            "telnetlib",
            "tempfile",
            "termios",
            "test",
            "textwrap",
            "threading",
            "time",
            "timeit",
            "tkinter",
            "token",
            "tokenize",
            "trace",
            "traceback",
            "tracemalloc",
            "tty",
            "turtle",
            "turtledemo",
            "types",
            "typing",
            "unicodedata",
            "unittest",
            "urllib",
            "uu",
            "uuid",
            "venv",
            "warnings",
            "wave",
            "weakref",
            "webbrowser",
            "winreg",
            "winsound",
            "wsgiref",
            "xdrlib",
            "xml",
            "xmlrpc",
            "zipapp",
            "zipfile",
            "zipimport",
            "zlib",
            "zoneinfo",
        ])
    };

    static ref RE_IMPORTS: Regex = Regex::new(r#".*(import).*"#).unwrap();
    static ref RE_IMPORT_STMT: Regex = Regex::new(r#"(?<=^import\s)(.*)"#).unwrap();
    static ref RE_FROM_STMT: Regex = Regex::new(r#"(?<=^from\s)(.*)(?=.import)"#).unwrap();
}

// https://www.python.org/dev/peps/pep-0328/
// struct ImportStatement {
//     // Import statement as defined in the file it was extracted from
//     pub statement: String,
//     pub root_mod: String,
//     pub abs_path: String,
// }

// impl ImportStatement {
//     fn new(import_stmt: &str) -> ImportStatement {
//         ImportStatement {
//             statement: import_stmt.to_owned(),
//             root_mod: "x".to_string(),
//             abs_path: "y".to_string(),
//         }
//     }
// }

// TODO - might not be worthwhile, instead we should flatten and expand the import statement
pub(crate) fn get_root_module(import_stmt: &str) -> String {
    // `from` can be local or remote
    let from_stmt = RE_FROM_STMT.captures(import_stmt).unwrap();

    // `import` is remote only
    // TODO - import can be multiple via comma separated
    //  will most likely have to return Vec<String>
    let import_stmt = RE_IMPORT_STMT.captures(import_stmt).unwrap();

    let root = match from_stmt {
        Some(result) => result,
        None => import_stmt.unwrap(),
    };

    root.get(1).map_or("", |m| m.as_str()).to_owned()
}

// https://docs.python.org/3/reference/import.html
pub(crate) fn extract_imports(file_contents: &str) -> Vec<String> {
    RE_IMPORTS
        .captures_iter(file_contents)
        .filter_map(|s| s.ok())
        .map(|x| x.get(0).map_or("", |m| m.as_str()).to_owned())
        .collect::<Vec<String>>()
}