provenant/parsers/python/
mod.rs1mod archive;
38mod pypi_json;
39mod pyproject;
40mod rfc822_meta;
41mod setup_cfg;
42mod setup_py;
43mod utils;
44
45#[cfg(test)]
46mod scan_test;
47#[cfg(test)]
48mod test;
49
50use super::PackageParser;
51use crate::models::{DatasourceId, PackageData, PackageType};
52use std::path::Path;
53
54pub(crate) use self::utils::build_pypi_urls;
55#[cfg(test)]
56pub(crate) use self::utils::extract_requires_dist_dependencies;
57pub(crate) use self::utils::read_toml_file;
58
59enum PythonFileKind {
60 PyprojectToml,
61 SetupCfg,
62 SetupPy,
63 PkgInfo,
64 WheelMetadata,
65 PipOriginJson,
66 PypiJson,
67 PipInspectDeplock,
68 SdistArchive,
69 WheelArchive,
70 EggArchive,
71}
72
73fn classify_python_file(path: &Path) -> Option<PythonFileKind> {
74 let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
75 Some(match filename {
76 _ if is_pyproject_toml_like_path(path) => PythonFileKind::PyprojectToml,
77 _ if is_setup_cfg_like_path(path) => PythonFileKind::SetupCfg,
78 _ if is_setup_py_like_path(path) => PythonFileKind::SetupPy,
79 "PKG-INFO" => PythonFileKind::PkgInfo,
80 "METADATA" if is_installed_wheel_metadata_path(path) => PythonFileKind::WheelMetadata,
81 "pypi.json" => PythonFileKind::PypiJson,
82 "pip-inspect.deplock" => PythonFileKind::PipInspectDeplock,
83 _ => {
84 if archive::is_pip_cache_origin_json(path) {
85 PythonFileKind::PipOriginJson
86 } else if archive::is_python_sdist_archive_path(path) {
87 PythonFileKind::SdistArchive
88 } else if path
89 .extension()
90 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
91 && archive::is_valid_wheel_archive_path(path)
92 {
93 PythonFileKind::WheelArchive
94 } else if path
95 .extension()
96 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
97 {
98 PythonFileKind::EggArchive
99 } else {
100 return None;
101 }
102 }
103 })
104}
105
106pub struct PythonParser;
116
117impl PackageParser for PythonParser {
118 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
119
120 fn extract_packages(path: &Path) -> Vec<PackageData> {
121 match classify_python_file(path) {
122 Some(PythonFileKind::PyprojectToml) => pyproject::extract(path),
123 Some(PythonFileKind::SetupCfg) => setup_cfg::extract(path),
124 Some(PythonFileKind::SetupPy) => setup_py::extract(path),
125 Some(PythonFileKind::PkgInfo) => rfc822_meta::extract_from_rfc822_metadata(
126 path,
127 utils::detect_pkg_info_datasource_id(path),
128 ),
129 Some(PythonFileKind::WheelMetadata) => {
130 rfc822_meta::extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
131 }
132 Some(PythonFileKind::PipOriginJson) => archive::extract_from_pip_origin_json(path),
133 Some(PythonFileKind::PypiJson) => pypi_json::extract_from_pypi_json(path),
134 Some(PythonFileKind::PipInspectDeplock) => pypi_json::extract_from_pip_inspect(path),
135 Some(PythonFileKind::SdistArchive) => archive::extract_from_sdist_archive(path),
136 Some(PythonFileKind::WheelArchive) => archive::extract_from_wheel_archive(path),
137 Some(PythonFileKind::EggArchive) => archive::extract_from_egg_archive(path),
138 None => utils::default_package_data(path),
139 }
140 }
141
142 fn is_match(path: &Path) -> bool {
143 classify_python_file(path).is_some()
144 }
145}
146
147fn is_pyproject_toml_like_path(path: &Path) -> bool {
148 path.file_name()
149 .and_then(|name| name.to_str())
150 .is_some_and(|name| {
151 name == "pyproject.toml"
152 || name.ends_with("-pyproject.toml")
153 || name.ends_with("_pyproject.toml")
154 || name.ends_with(".pyproject.toml")
155 })
156}
157
158fn is_setup_py_like_path(path: &Path) -> bool {
159 path.file_name()
160 .and_then(|name| name.to_str())
161 .is_some_and(|name| {
162 name == "setup.py" || name.ends_with("_setup.py") || name.ends_with("-setup.py")
163 })
164}
165
166fn is_setup_cfg_like_path(path: &Path) -> bool {
167 path.file_name()
168 .and_then(|name| name.to_str())
169 .is_some_and(|name| {
170 name == "setup.cfg"
171 || name.ends_with("_setup.cfg")
172 || name.ends_with("-setup.cfg")
173 || name.ends_with(".setup.cfg")
174 })
175}
176
177pub(super) fn is_installed_wheel_metadata_path(path: &Path) -> bool {
178 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
179 && path
180 .parent()
181 .and_then(|parent| parent.file_name())
182 .and_then(|name| name.to_str())
183 .is_some_and(|name| name.ends_with(".dist-info"))
184}
185
186crate::register_parser!(
187 "Python package manifests (pyproject.toml, setup.py, suffixed setup.py variants, setup.cfg, pypi.json, PKG-INFO, .dist-info/METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
188 &[
189 "**/pyproject.toml",
190 "**/setup.py",
191 "**/*_setup.py",
192 "**/*-setup.py",
193 "**/setup.cfg",
194 "**/pypi.json",
195 "**/PKG-INFO",
196 "**/*.dist-info/METADATA",
197 "**/origin.json",
198 "**/*.tar.gz",
199 "**/*.tgz",
200 "**/*.tar.bz2",
201 "**/*.tar.xz",
202 "**/*.zip",
203 "**/*.whl",
204 "**/*.egg"
205 ],
206 "pypi",
207 "Python",
208 Some("https://packaging.python.org/"),
209);