provenant/parsers/python/
mod.rs1mod archive;
38mod pypi_json;
39mod pyproject;
40mod rfc822_meta;
41mod setup_cfg;
42mod setup_py;
43mod utils;
44
45#[cfg(test)]
46mod scan_test;
47#[cfg(test)]
48mod test;
49
50use super::PackageParser;
51use super::metadata::ParserMetadata;
52use crate::models::{DatasourceId, PackageData, PackageType};
53use std::path::Path;
54
55pub(crate) use self::utils::build_pypi_urls;
56#[cfg(test)]
57pub(crate) use self::utils::extract_requires_dist_dependencies;
58pub(crate) use self::utils::read_toml_file;
59
60enum PythonFileKind {
61 PyprojectToml,
62 SetupCfg,
63 SetupPy,
64 PkgInfo,
65 WheelMetadata,
66 PipOriginJson,
67 PypiJson,
68 PipInspectDeplock,
69 SdistArchive,
70 WheelArchive,
71 EggArchive,
72}
73
74fn classify_python_file(path: &Path) -> Option<PythonFileKind> {
75 let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
76 Some(match filename {
77 _ if is_pyproject_toml_like_path(path) => PythonFileKind::PyprojectToml,
78 _ if is_setup_cfg_like_path(path) => PythonFileKind::SetupCfg,
79 _ if is_setup_py_like_path(path) => PythonFileKind::SetupPy,
80 "PKG-INFO" => PythonFileKind::PkgInfo,
81 "METADATA" if is_installed_wheel_metadata_path(path) => PythonFileKind::WheelMetadata,
82 "pypi.json" => PythonFileKind::PypiJson,
83 "pip-inspect.deplock" => PythonFileKind::PipInspectDeplock,
84 _ => {
85 if archive::is_pip_cache_origin_json(path) {
86 PythonFileKind::PipOriginJson
87 } else if archive::is_python_sdist_archive_path(path) {
88 PythonFileKind::SdistArchive
89 } else if path
90 .extension()
91 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
92 && archive::is_valid_wheel_archive_path(path)
93 {
94 PythonFileKind::WheelArchive
95 } else if path
96 .extension()
97 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
98 {
99 PythonFileKind::EggArchive
100 } else {
101 return None;
102 }
103 }
104 })
105}
106
107pub struct PythonParser;
117
118impl PackageParser for PythonParser {
119 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
120
121 fn metadata() -> Vec<ParserMetadata> {
122 vec![ParserMetadata {
123 description: "Python package manifests (pyproject.toml, setup.py, suffixed setup.py variants, setup.cfg, pypi.json, PKG-INFO, .dist-info/METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
124 file_patterns: &[
125 "**/pyproject.toml",
126 "**/setup.py",
127 "**/*_setup.py",
128 "**/*-setup.py",
129 "**/setup.cfg",
130 "**/pypi.json",
131 "**/PKG-INFO",
132 "**/*.dist-info/METADATA",
133 "**/origin.json",
134 "**/*.tar.gz",
135 "**/*.tgz",
136 "**/*.tar.bz2",
137 "**/*.tar.xz",
138 "**/*.zip",
139 "**/*.whl",
140 "**/*.egg",
141 ],
142 package_type: "pypi",
143 primary_language: "Python",
144 documentation_url: Some("https://packaging.python.org/"),
145 }]
146 }
147
148 fn extract_packages(path: &Path) -> Vec<PackageData> {
149 match classify_python_file(path) {
150 Some(PythonFileKind::PyprojectToml) => pyproject::extract(path),
151 Some(PythonFileKind::SetupCfg) => setup_cfg::extract(path),
152 Some(PythonFileKind::SetupPy) => setup_py::extract(path),
153 Some(PythonFileKind::PkgInfo) => rfc822_meta::extract_from_rfc822_metadata(
154 path,
155 utils::detect_pkg_info_datasource_id(path),
156 ),
157 Some(PythonFileKind::WheelMetadata) => {
158 rfc822_meta::extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
159 }
160 Some(PythonFileKind::PipOriginJson) => archive::extract_from_pip_origin_json(path),
161 Some(PythonFileKind::PypiJson) => pypi_json::extract_from_pypi_json(path),
162 Some(PythonFileKind::PipInspectDeplock) => pypi_json::extract_from_pip_inspect(path),
163 Some(PythonFileKind::SdistArchive) => archive::extract_from_sdist_archive(path),
164 Some(PythonFileKind::WheelArchive) => archive::extract_from_wheel_archive(path),
165 Some(PythonFileKind::EggArchive) => archive::extract_from_egg_archive(path),
166 None => utils::default_package_data(path),
167 }
168 }
169
170 fn is_match(path: &Path) -> bool {
171 classify_python_file(path).is_some()
172 }
173}
174
175fn is_pyproject_toml_like_path(path: &Path) -> bool {
176 path.file_name()
177 .and_then(|name| name.to_str())
178 .is_some_and(|name| {
179 name == "pyproject.toml"
180 || name.ends_with("-pyproject.toml")
181 || name.ends_with("_pyproject.toml")
182 || name.ends_with(".pyproject.toml")
183 })
184}
185
186fn is_setup_py_like_path(path: &Path) -> bool {
187 path.file_name()
188 .and_then(|name| name.to_str())
189 .is_some_and(|name| {
190 name == "setup.py" || name.ends_with("_setup.py") || name.ends_with("-setup.py")
191 })
192}
193
194fn is_setup_cfg_like_path(path: &Path) -> bool {
195 path.file_name()
196 .and_then(|name| name.to_str())
197 .is_some_and(|name| {
198 name == "setup.cfg"
199 || name.ends_with("_setup.cfg")
200 || name.ends_with("-setup.cfg")
201 || name.ends_with(".setup.cfg")
202 })
203}
204
205pub(super) fn is_installed_wheel_metadata_path(path: &Path) -> bool {
206 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
207 && path
208 .parent()
209 .and_then(|parent| parent.file_name())
210 .and_then(|name| name.to_str())
211 .is_some_and(|name| name.ends_with(".dist-info"))
212}