provenant/parsers/python/
mod.rs1mod archive;
35mod pypi_json;
36mod pyproject;
37mod rfc822_meta;
38mod setup_cfg;
39mod setup_py;
40mod utils;
41
42#[cfg(test)]
43mod scan_test;
44#[cfg(test)]
45mod test;
46
47use super::PackageParser;
48use crate::models::{DatasourceId, PackageData, PackageType};
49use std::path::Path;
50
51pub(crate) use self::utils::build_pypi_urls;
52#[cfg(test)]
53pub(crate) use self::utils::extract_requires_dist_dependencies;
54pub(crate) use self::utils::read_toml_file;
55
56pub struct PythonParser;
66
67impl PackageParser for PythonParser {
68 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
69
70 fn extract_packages(path: &Path) -> Vec<PackageData> {
71 vec![
72 if path.file_name().unwrap_or_default() == "pyproject.toml" {
73 pyproject::extract_from_pyproject_toml(path)
74 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
75 setup_cfg::extract_from_setup_cfg(path)
76 } else if is_setup_py_like_path(path) {
77 return setup_py::extract_setup_py_packages(path);
78 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
79 rfc822_meta::extract_from_rfc822_metadata(
80 path,
81 utils::detect_pkg_info_datasource_id(path),
82 )
83 } else if is_installed_wheel_metadata_path(path) {
84 rfc822_meta::extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
85 } else if archive::is_pip_cache_origin_json(path) {
86 archive::extract_from_pip_origin_json(path)
87 } else if path.file_name().unwrap_or_default() == "pypi.json" {
88 pypi_json::extract_from_pypi_json(path)
89 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
90 pypi_json::extract_from_pip_inspect(path)
91 } else if archive::is_python_sdist_archive_path(path) {
92 archive::extract_from_sdist_archive(path)
93 } else if path
94 .extension()
95 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
96 {
97 archive::extract_from_wheel_archive(path)
98 } else if path
99 .extension()
100 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
101 {
102 archive::extract_from_egg_archive(path)
103 } else {
104 utils::default_package_data(path)
105 },
106 ]
107 }
108
109 fn is_match(path: &Path) -> bool {
110 if let Some(filename) = path.file_name()
111 && (filename == "pyproject.toml"
112 || filename == "setup.cfg"
113 || is_setup_py_like_path(path)
114 || filename == "PKG-INFO"
115 || (filename == "METADATA" && is_installed_wheel_metadata_path(path))
116 || filename == "pypi.json"
117 || filename == "pip-inspect.deplock"
118 || archive::is_pip_cache_origin_json(path))
119 {
120 return true;
121 }
122
123 if let Some(extension) = path.extension() {
124 let ext = extension.to_string_lossy().to_lowercase();
125 if (ext == "whl" && archive::is_valid_wheel_archive_path(path))
126 || ext == "egg"
127 || archive::is_python_sdist_archive_path(path)
128 {
129 return true;
130 }
131 }
132
133 false
134 }
135}
136
137fn is_setup_py_like_path(path: &Path) -> bool {
138 path.file_name()
139 .and_then(|name| name.to_str())
140 .is_some_and(|name| {
141 name == "setup.py" || name.ends_with("_setup.py") || name.ends_with("-setup.py")
142 })
143}
144
145pub(super) fn is_installed_wheel_metadata_path(path: &Path) -> bool {
146 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
147 && path
148 .parent()
149 .and_then(|parent| parent.file_name())
150 .and_then(|name| name.to_str())
151 .is_some_and(|name| name.ends_with(".dist-info"))
152}
153
154crate::register_parser!(
155 "Python package manifests (pyproject.toml, setup.py, suffixed setup.py variants, setup.cfg, pypi.json, PKG-INFO, .dist-info/METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
156 &[
157 "**/pyproject.toml",
158 "**/setup.py",
159 "**/*_setup.py",
160 "**/*-setup.py",
161 "**/setup.cfg",
162 "**/pypi.json",
163 "**/PKG-INFO",
164 "**/*.dist-info/METADATA",
165 "**/origin.json",
166 "**/*.tar.gz",
167 "**/*.tgz",
168 "**/*.tar.bz2",
169 "**/*.tar.xz",
170 "**/*.zip",
171 "**/*.whl",
172 "**/*.egg"
173 ],
174 "pypi",
175 "Python",
176 Some("https://packaging.python.org/"),
177);