provenant/parsers/python/
mod.rs1mod archive;
35mod pypi_json;
36mod pyproject;
37mod rfc822_meta;
38mod setup_cfg;
39mod setup_py;
40mod utils;
41
42#[cfg(test)]
43mod scan_test;
44#[cfg(test)]
45mod test;
46
47use super::PackageParser;
48use crate::models::{DatasourceId, PackageData, PackageType};
49use std::path::Path;
50
51pub(crate) use self::utils::build_pypi_urls;
52#[cfg(test)]
53pub(crate) use self::utils::extract_requires_dist_dependencies;
54pub(crate) use self::utils::read_toml_file;
55
56enum PythonFileKind {
57 PyprojectToml,
58 SetupCfg,
59 SetupPy,
60 PkgInfo,
61 WheelMetadata,
62 PipOriginJson,
63 PypiJson,
64 PipInspectDeplock,
65 SdistArchive,
66 WheelArchive,
67 EggArchive,
68}
69
70fn classify_python_file(path: &Path) -> Option<PythonFileKind> {
71 let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
72 Some(match filename {
73 "pyproject.toml" => PythonFileKind::PyprojectToml,
74 "setup.cfg" => PythonFileKind::SetupCfg,
75 _ if is_setup_py_like_path(path) => PythonFileKind::SetupPy,
76 "PKG-INFO" => PythonFileKind::PkgInfo,
77 "METADATA" if is_installed_wheel_metadata_path(path) => PythonFileKind::WheelMetadata,
78 "pypi.json" => PythonFileKind::PypiJson,
79 "pip-inspect.deplock" => PythonFileKind::PipInspectDeplock,
80 _ => {
81 if archive::is_pip_cache_origin_json(path) {
82 PythonFileKind::PipOriginJson
83 } else if archive::is_python_sdist_archive_path(path) {
84 PythonFileKind::SdistArchive
85 } else if path
86 .extension()
87 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
88 && archive::is_valid_wheel_archive_path(path)
89 {
90 PythonFileKind::WheelArchive
91 } else if path
92 .extension()
93 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
94 {
95 PythonFileKind::EggArchive
96 } else {
97 return None;
98 }
99 }
100 })
101}
102
103pub struct PythonParser;
113
114impl PackageParser for PythonParser {
115 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
116
117 fn extract_packages(path: &Path) -> Vec<PackageData> {
118 match classify_python_file(path) {
119 Some(PythonFileKind::PyprojectToml) => pyproject::extract(path),
120 Some(PythonFileKind::SetupCfg) => setup_cfg::extract(path),
121 Some(PythonFileKind::SetupPy) => setup_py::extract(path),
122 Some(PythonFileKind::PkgInfo) => rfc822_meta::extract_from_rfc822_metadata(
123 path,
124 utils::detect_pkg_info_datasource_id(path),
125 ),
126 Some(PythonFileKind::WheelMetadata) => {
127 rfc822_meta::extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
128 }
129 Some(PythonFileKind::PipOriginJson) => archive::extract_from_pip_origin_json(path),
130 Some(PythonFileKind::PypiJson) => pypi_json::extract_from_pypi_json(path),
131 Some(PythonFileKind::PipInspectDeplock) => pypi_json::extract_from_pip_inspect(path),
132 Some(PythonFileKind::SdistArchive) => archive::extract_from_sdist_archive(path),
133 Some(PythonFileKind::WheelArchive) => archive::extract_from_wheel_archive(path),
134 Some(PythonFileKind::EggArchive) => archive::extract_from_egg_archive(path),
135 None => utils::default_package_data(path),
136 }
137 }
138
139 fn is_match(path: &Path) -> bool {
140 classify_python_file(path).is_some()
141 }
142}
143
144fn is_setup_py_like_path(path: &Path) -> bool {
145 path.file_name()
146 .and_then(|name| name.to_str())
147 .is_some_and(|name| {
148 name == "setup.py" || name.ends_with("_setup.py") || name.ends_with("-setup.py")
149 })
150}
151
152pub(super) fn is_installed_wheel_metadata_path(path: &Path) -> bool {
153 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
154 && path
155 .parent()
156 .and_then(|parent| parent.file_name())
157 .and_then(|name| name.to_str())
158 .is_some_and(|name| name.ends_with(".dist-info"))
159}
160
161crate::register_parser!(
162 "Python package manifests (pyproject.toml, setup.py, suffixed setup.py variants, setup.cfg, pypi.json, PKG-INFO, .dist-info/METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
163 &[
164 "**/pyproject.toml",
165 "**/setup.py",
166 "**/*_setup.py",
167 "**/*-setup.py",
168 "**/setup.cfg",
169 "**/pypi.json",
170 "**/PKG-INFO",
171 "**/*.dist-info/METADATA",
172 "**/origin.json",
173 "**/*.tar.gz",
174 "**/*.tgz",
175 "**/*.tar.bz2",
176 "**/*.tar.xz",
177 "**/*.zip",
178 "**/*.whl",
179 "**/*.egg"
180 ],
181 "pypi",
182 "Python",
183 Some("https://packaging.python.org/"),
184);