1use crate::models::{
35 DatasourceId, Dependency, FileReference, PackageData, PackageType, Party, Sha256Digest,
36};
37use crate::parser_warn as warn;
38use crate::parsers::utils::{read_file_to_string, split_name_email};
39use base64::Engine;
40use base64::engine::general_purpose::URL_SAFE_NO_PAD;
41use bzip2::read::BzDecoder;
42use csv::ReaderBuilder;
43use flate2::read::GzDecoder;
44use liblzma::read::XzDecoder;
45use packageurl::PackageUrl;
46use regex::Regex;
47use ruff_python_ast as ast;
48use ruff_python_parser::parse_module;
49use serde_json::{Map as JsonMap, Value as JsonValue};
50use sha2::{Digest, Sha256};
51use std::collections::{HashMap, HashSet};
52use std::fs::File;
53use std::io::Read;
54use std::path::{Component, Path, PathBuf};
55use tar::Archive;
56use toml::Value as TomlValue;
57use toml::map::Map as TomlMap;
58use zip::ZipArchive;
59
60use super::PackageParser;
61use super::license_normalization::{
62 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
63 normalize_spdx_expression,
64};
65use super::pep508::parse_pep508_requirement;
66
67const FIELD_PROJECT: &str = "project";
69const FIELD_NAME: &str = "name";
70const FIELD_VERSION: &str = "version";
71const FIELD_DESCRIPTION: &str = "description";
72const FIELD_KEYWORDS: &str = "keywords";
73const FIELD_LICENSE: &str = "license";
74const FIELD_AUTHORS: &str = "authors";
75const FIELD_MAINTAINERS: &str = "maintainers";
76const FIELD_URLS: &str = "urls";
77const FIELD_HOMEPAGE: &str = "homepage";
78const FIELD_REPOSITORY: &str = "repository";
79const FIELD_DEPENDENCIES: &str = "dependencies";
80const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
81const FIELD_EXTRAS: &str = "extras";
82
83type ProjectUrls = (
84 Option<String>,
85 Option<String>,
86 Option<String>,
87 Option<String>,
88 Option<String>,
89);
90const FIELD_DEPENDENCY_GROUPS: &str = "dependency-groups";
91const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
92const MAX_SETUP_PY_BYTES: usize = 1_048_576;
93const MAX_SETUP_PY_AST_NODES: usize = 10_000;
94const MAX_SETUP_PY_AST_DEPTH: usize = 50;
95const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct PythonParser;
109
110#[derive(Clone, Copy, Debug)]
111enum PythonSdistArchiveFormat {
112 TarGz,
113 Tgz,
114 TarBz2,
115 TarXz,
116 Zip,
117}
118
119#[derive(Clone, Debug)]
120struct ValidatedZipEntry {
121 index: usize,
122 name: String,
123}
124
125impl PackageParser for PythonParser {
126 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
127
128 fn extract_packages(path: &Path) -> Vec<PackageData> {
129 vec![
130 if path.file_name().unwrap_or_default() == "pyproject.toml" {
131 extract_from_pyproject_toml(path)
132 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
133 extract_from_setup_cfg(path)
134 } else if is_setup_py_like_path(path) {
135 return extract_setup_py_packages(path);
136 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
137 extract_from_rfc822_metadata(path, detect_pkg_info_datasource_id(path))
138 } else if is_installed_wheel_metadata_path(path) {
139 extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
140 } else if is_pip_cache_origin_json(path) {
141 extract_from_pip_origin_json(path)
142 } else if path.file_name().unwrap_or_default() == "pypi.json" {
143 extract_from_pypi_json(path)
144 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
145 extract_from_pip_inspect(path)
146 } else if is_python_sdist_archive_path(path) {
147 extract_from_sdist_archive(path)
148 } else if path
149 .extension()
150 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
151 {
152 extract_from_wheel_archive(path)
153 } else if path
154 .extension()
155 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
156 {
157 extract_from_egg_archive(path)
158 } else {
159 default_package_data(path)
160 },
161 ]
162 }
163
164 fn is_match(path: &Path) -> bool {
165 if let Some(filename) = path.file_name()
166 && (filename == "pyproject.toml"
167 || filename == "setup.cfg"
168 || is_setup_py_like_path(path)
169 || filename == "PKG-INFO"
170 || (filename == "METADATA" && is_installed_wheel_metadata_path(path))
171 || filename == "pypi.json"
172 || filename == "pip-inspect.deplock"
173 || is_pip_cache_origin_json(path))
174 {
175 return true;
176 }
177
178 if let Some(extension) = path.extension() {
179 let ext = extension.to_string_lossy().to_lowercase();
180 if (ext == "whl" && is_valid_wheel_archive_path(path))
181 || ext == "egg"
182 || is_python_sdist_archive_path(path)
183 {
184 return true;
185 }
186 }
187
188 false
189 }
190}
191
192fn is_setup_py_like_path(path: &Path) -> bool {
193 path.file_name()
194 .and_then(|name| name.to_str())
195 .is_some_and(|name| name == "setup.py" || name.ends_with("_setup.py"))
196}
197
198fn is_installed_wheel_metadata_path(path: &Path) -> bool {
199 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
200 && path
201 .parent()
202 .and_then(|parent| parent.file_name())
203 .and_then(|name| name.to_str())
204 .is_some_and(|name| name.ends_with(".dist-info"))
205}
206
207#[derive(Debug, Clone)]
208struct InstalledWheelMetadata {
209 wheel_tags: Vec<String>,
210 wheel_version: Option<String>,
211 wheel_generator: Option<String>,
212 root_is_purelib: Option<bool>,
213 compressed_tag: Option<String>,
214}
215
216fn merge_sibling_wheel_metadata(path: &Path, package_data: &mut PackageData) {
217 let Some(parent) = path.parent() else {
218 return;
219 };
220
221 if !parent
222 .file_name()
223 .and_then(|name| name.to_str())
224 .is_some_and(|name| name.ends_with(".dist-info"))
225 {
226 return;
227 }
228
229 let wheel_path = parent.join("WHEEL");
230 if !wheel_path.exists() {
231 return;
232 }
233
234 let Ok(content) = read_file_to_string(&wheel_path) else {
235 warn!("Failed to read sibling WHEEL file at {:?}", wheel_path);
236 return;
237 };
238
239 let Some(wheel_metadata) = parse_installed_wheel_metadata(&content) else {
240 return;
241 };
242
243 apply_installed_wheel_metadata(package_data, &wheel_metadata);
244}
245
246fn parse_installed_wheel_metadata(content: &str) -> Option<InstalledWheelMetadata> {
247 use super::rfc822::{get_header_all, get_header_first};
248
249 let metadata = super::rfc822::parse_rfc822_content(content);
250 let wheel_tags = get_header_all(&metadata.headers, "tag");
251 if wheel_tags.is_empty() {
252 return None;
253 }
254
255 let wheel_version = get_header_first(&metadata.headers, "wheel-version");
256 let wheel_generator = get_header_first(&metadata.headers, "generator");
257 let root_is_purelib =
258 get_header_first(&metadata.headers, "root-is-purelib").and_then(|value| {
259 match value.to_ascii_lowercase().as_str() {
260 "true" => Some(true),
261 "false" => Some(false),
262 _ => None,
263 }
264 });
265
266 let compressed_tag = compress_wheel_tags(&wheel_tags);
267
268 Some(InstalledWheelMetadata {
269 wheel_tags,
270 wheel_version,
271 wheel_generator,
272 root_is_purelib,
273 compressed_tag,
274 })
275}
276
277fn compress_wheel_tags(tags: &[String]) -> Option<String> {
278 if tags.is_empty() {
279 return None;
280 }
281
282 if tags.len() == 1 {
283 return Some(tags[0].clone());
284 }
285
286 let mut python_tags = Vec::new();
287 let mut abi_tag: Option<&str> = None;
288 let mut platform_tag: Option<&str> = None;
289
290 for tag in tags {
291 let mut parts = tag.splitn(3, '-');
292 let python = parts.next()?;
293 let abi = parts.next()?;
294 let platform = parts.next()?;
295
296 if abi_tag.is_some_and(|existing| existing != abi)
297 || platform_tag.is_some_and(|existing| existing != platform)
298 {
299 return None;
300 }
301
302 abi_tag = Some(abi);
303 platform_tag = Some(platform);
304 python_tags.push(python.to_string());
305 }
306
307 Some(format!(
308 "{}-{}-{}",
309 python_tags.join("."),
310 abi_tag?,
311 platform_tag?
312 ))
313}
314
315fn apply_installed_wheel_metadata(
316 package_data: &mut PackageData,
317 wheel_metadata: &InstalledWheelMetadata,
318) {
319 let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
320 extra_data.insert(
321 "wheel_tags".to_string(),
322 JsonValue::Array(
323 wheel_metadata
324 .wheel_tags
325 .iter()
326 .cloned()
327 .map(JsonValue::String)
328 .collect(),
329 ),
330 );
331
332 if let Some(wheel_version) = &wheel_metadata.wheel_version {
333 extra_data.insert(
334 "wheel_version".to_string(),
335 JsonValue::String(wheel_version.clone()),
336 );
337 }
338
339 if let Some(wheel_generator) = &wheel_metadata.wheel_generator {
340 extra_data.insert(
341 "wheel_generator".to_string(),
342 JsonValue::String(wheel_generator.clone()),
343 );
344 }
345
346 if let Some(root_is_purelib) = wheel_metadata.root_is_purelib {
347 extra_data.insert(
348 "root_is_purelib".to_string(),
349 JsonValue::Bool(root_is_purelib),
350 );
351 }
352
353 if let (Some(name), Some(version), Some(extension)) = (
354 package_data.name.as_deref(),
355 package_data.version.as_deref(),
356 wheel_metadata.compressed_tag.as_deref(),
357 ) {
358 package_data.purl = build_pypi_purl_with_extension(name, Some(version), extension);
359 }
360}
361
362fn is_pip_cache_origin_json(path: &Path) -> bool {
363 path.file_name().and_then(|name| name.to_str()) == Some("origin.json")
364 && path.ancestors().skip(1).any(|ancestor| {
365 ancestor
366 .file_name()
367 .and_then(|name| name.to_str())
368 .is_some_and(|name| name.eq_ignore_ascii_case("wheels"))
369 })
370}
371
372fn extract_from_pip_origin_json(path: &Path) -> PackageData {
373 let content = match read_file_to_string(path) {
374 Ok(content) => content,
375 Err(e) => {
376 warn!("Failed to read pip cache origin.json at {:?}: {}", path, e);
377 return default_package_data(path);
378 }
379 };
380
381 let root: JsonValue = match serde_json::from_str(&content) {
382 Ok(root) => root,
383 Err(e) => {
384 warn!("Failed to parse pip cache origin.json at {:?}: {}", path, e);
385 return default_package_data(path);
386 }
387 };
388
389 let Some(download_url) = root.get("url").and_then(|value| value.as_str()) else {
390 warn!("No url found in pip cache origin.json at {:?}", path);
391 return default_package_data(path);
392 };
393
394 let sibling_wheel = find_sibling_cached_wheel(path);
395 let name_version = parse_name_version_from_origin_url(download_url).or_else(|| {
396 sibling_wheel
397 .as_ref()
398 .map(|wheel_info| (wheel_info.name.clone(), wheel_info.version.clone()))
399 });
400
401 let Some((name, version)) = name_version else {
402 warn!(
403 "Failed to infer package name/version from pip cache origin.json at {:?}",
404 path
405 );
406 return default_package_data(path);
407 };
408
409 let (repository_homepage_url, repository_download_url, api_data_url, plain_purl) =
410 build_pypi_urls(Some(&name), Some(&version));
411 let purl = sibling_wheel
412 .as_ref()
413 .and_then(|wheel_info| build_wheel_purl(Some(&name), Some(&version), wheel_info))
414 .or(plain_purl);
415
416 PackageData {
417 package_type: Some(PythonParser::PACKAGE_TYPE),
418 primary_language: Some("Python".to_string()),
419 name: Some(name),
420 version: Some(version),
421 datasource_id: Some(DatasourceId::PypiPipOriginJson),
422 download_url: Some(download_url.to_string()),
423 sha256: extract_sha256_from_origin_json(&root)
424 .and_then(|h| Sha256Digest::from_hex(&h).ok()),
425 repository_homepage_url,
426 repository_download_url,
427 api_data_url,
428 purl,
429 ..Default::default()
430 }
431}
432
433fn find_sibling_cached_wheel(path: &Path) -> Option<WheelInfo> {
434 let parent = path.parent()?;
435 let entries = parent.read_dir().ok()?;
436
437 for entry in entries.flatten() {
438 let sibling_path = entry.path();
439 if sibling_path
440 .extension()
441 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
442 && let Some(wheel_info) = parse_wheel_filename(&sibling_path)
443 {
444 return Some(wheel_info);
445 }
446 }
447
448 None
449}
450
451fn parse_name_version_from_origin_url(url: &str) -> Option<(String, String)> {
452 let file_name = url.rsplit('/').next()?;
453
454 if file_name.ends_with(".whl") {
455 return parse_wheel_filename(Path::new(file_name))
456 .map(|wheel_info| (wheel_info.name, wheel_info.version));
457 }
458
459 let stem = strip_python_archive_extension(file_name)?;
460 let (name, version) = stem.rsplit_once('-')?;
461 if name.is_empty() || version.is_empty() {
462 return None;
463 }
464
465 Some((name.replace('_', "-"), version.to_string()))
466}
467
468fn strip_python_archive_extension(file_name: &str) -> Option<&str> {
469 [".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".zip", ".whl"]
470 .iter()
471 .find_map(|suffix| file_name.strip_suffix(suffix))
472}
473
474fn extract_sha256_from_origin_json(root: &JsonValue) -> Option<String> {
475 root.pointer("/archive_info/hashes/sha256")
476 .and_then(|value| value.as_str())
477 .map(ToOwned::to_owned)
478 .or_else(|| {
479 root.pointer("/archive_info/hash")
480 .and_then(|value| value.as_str())
481 .and_then(normalize_origin_hash)
482 })
483}
484
485fn normalize_origin_hash(hash: &str) -> Option<String> {
486 if let Some(value) = hash.strip_prefix("sha256=") {
487 return Some(value.to_string());
488 }
489 if let Some(value) = hash.strip_prefix("sha256:") {
490 return Some(value.to_string());
491 }
492 if hash.len() == 64 && hash.chars().all(|ch| ch.is_ascii_hexdigit()) {
493 return Some(hash.to_string());
494 }
495 None
496}
497
498fn extract_from_rfc822_metadata(path: &Path, datasource_id: DatasourceId) -> PackageData {
499 let content = match read_file_to_string(path) {
500 Ok(content) => content,
501 Err(e) => {
502 warn!("Failed to read metadata at {:?}: {}", path, e);
503 return default_package_data(path);
504 }
505 };
506
507 let metadata = super::rfc822::parse_rfc822_content(&content);
508 let mut package_data = build_package_data_from_rfc822(&metadata, datasource_id);
509 merge_sibling_metadata_dependencies(path, &mut package_data);
510 merge_sibling_metadata_file_references(path, &mut package_data);
511 if datasource_id == DatasourceId::PypiWheelMetadata {
512 merge_sibling_wheel_metadata(path, &mut package_data);
513 }
514 package_data
515}
516
517fn merge_sibling_metadata_dependencies(path: &Path, package_data: &mut PackageData) {
518 let mut extra_dependencies = Vec::new();
519
520 if let Some(parent) = path.parent() {
521 let direct_requires = parent.join("requires.txt");
522 if direct_requires.exists()
523 && let Ok(content) = read_file_to_string(&direct_requires)
524 {
525 extra_dependencies.extend(parse_requires_txt(&content));
526 }
527
528 let sibling_egg_info_requires = parent
529 .read_dir()
530 .ok()
531 .into_iter()
532 .flatten()
533 .flatten()
534 .find_map(|entry| {
535 let child_path = entry.path();
536 if child_path.is_dir()
537 && child_path
538 .file_name()
539 .and_then(|name| name.to_str())
540 .is_some_and(|name| name.ends_with(".egg-info"))
541 {
542 let requires = child_path.join("requires.txt");
543 requires.exists().then_some(requires)
544 } else {
545 None
546 }
547 });
548
549 if let Some(requires_path) = sibling_egg_info_requires
550 && let Ok(content) = read_file_to_string(&requires_path)
551 {
552 extra_dependencies.extend(parse_requires_txt(&content));
553 }
554 }
555
556 for dependency in extra_dependencies {
557 if !package_data.dependencies.iter().any(|existing| {
558 existing.purl == dependency.purl
559 && existing.scope == dependency.scope
560 && existing.extracted_requirement == dependency.extracted_requirement
561 && existing.extra_data == dependency.extra_data
562 }) {
563 package_data.dependencies.push(dependency);
564 }
565 }
566}
567
568fn merge_sibling_metadata_file_references(path: &Path, package_data: &mut PackageData) {
569 let mut extra_refs = Vec::new();
570
571 if let Some(parent) = path.parent() {
572 let record_path = parent.join("RECORD");
573 if record_path.exists()
574 && let Ok(content) = read_file_to_string(&record_path)
575 {
576 extra_refs.extend(parse_record_csv(&content));
577 }
578
579 let installed_files_path = parent.join("installed-files.txt");
580 if installed_files_path.exists()
581 && let Ok(content) = read_file_to_string(&installed_files_path)
582 {
583 extra_refs.extend(parse_installed_files_txt(&content));
584 }
585
586 let sources_path = parent.join("SOURCES.txt");
587 if sources_path.exists()
588 && let Ok(content) = read_file_to_string(&sources_path)
589 {
590 extra_refs.extend(parse_sources_txt(&content));
591 }
592 }
593
594 for file_ref in extra_refs {
595 if !package_data
596 .file_references
597 .iter()
598 .any(|existing| existing.path == file_ref.path)
599 {
600 package_data.file_references.push(file_ref);
601 }
602 }
603}
604
605fn collect_validated_zip_entries<R: Read + std::io::Seek>(
606 archive: &mut ZipArchive<R>,
607 path: &Path,
608 archive_type: &str,
609) -> Result<Vec<ValidatedZipEntry>, String> {
610 let mut total_extracted = 0u64;
611 let mut entries = Vec::new();
612
613 for i in 0..archive.len() {
614 if let Ok(file) = archive.by_index_raw(i) {
615 let compressed_size = file.compressed_size();
616 let uncompressed_size = file.size();
617 let Some(entry_name) = normalize_archive_entry_path(file.name()) else {
618 warn!(
619 "Skipping unsafe path in {} {:?}: {}",
620 archive_type,
621 path,
622 file.name()
623 );
624 continue;
625 };
626
627 if compressed_size > 0 {
628 let ratio = uncompressed_size as f64 / compressed_size as f64;
629 if ratio > MAX_COMPRESSION_RATIO {
630 warn!(
631 "Suspicious compression ratio in {} {:?}: {:.2}:1",
632 archive_type, path, ratio
633 );
634 continue;
635 }
636 }
637
638 if uncompressed_size > MAX_FILE_SIZE {
639 warn!(
640 "File too large in {} {:?}: {} bytes (limit: {} bytes)",
641 archive_type, path, uncompressed_size, MAX_FILE_SIZE
642 );
643 continue;
644 }
645
646 total_extracted += uncompressed_size;
647 if total_extracted > MAX_ARCHIVE_SIZE {
648 let msg = format!(
649 "Total extracted size exceeds limit for {} {:?}",
650 archive_type, path
651 );
652 warn!("{}", msg);
653 return Err(msg);
654 }
655
656 entries.push(ValidatedZipEntry {
657 index: i,
658 name: entry_name,
659 });
660 }
661 }
662
663 Ok(entries)
664}
665
666fn is_python_sdist_archive_path(path: &Path) -> bool {
667 detect_python_sdist_archive_format(path).is_some()
668}
669
670fn is_valid_wheel_archive_path(path: &Path) -> bool {
671 if !path.is_file() {
672 return true;
673 }
674
675 let file = match File::open(path) {
676 Ok(file) => file,
677 Err(_) => return false,
678 };
679 let mut archive = match ZipArchive::new(file) {
680 Ok(archive) => archive,
681 Err(_) => return false,
682 };
683
684 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "wheel") {
685 Ok(entries) => entries,
686 Err(_) => return false,
687 };
688
689 find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/METADATA").is_some()
690}
691
692fn detect_python_sdist_archive_format(path: &Path) -> Option<PythonSdistArchiveFormat> {
693 let file_name = path.file_name()?.to_str()?.to_ascii_lowercase();
694
695 if !is_likely_python_sdist_filename(&file_name) {
696 return None;
697 }
698
699 if file_name.ends_with(".tar.gz") {
700 tar_gz_sdist_contains_pkg_info(path).then_some(PythonSdistArchiveFormat::TarGz)
701 } else if file_name.ends_with(".tgz") {
702 tgz_sdist_contains_pkg_info(path).then_some(PythonSdistArchiveFormat::Tgz)
703 } else if file_name.ends_with(".tar.bz2") {
704 tar_bz2_sdist_contains_pkg_info(path).then_some(PythonSdistArchiveFormat::TarBz2)
705 } else if file_name.ends_with(".tar.xz") {
706 tar_xz_sdist_contains_pkg_info(path).then_some(PythonSdistArchiveFormat::TarXz)
707 } else if file_name.ends_with(".zip") {
708 zip_sdist_contains_pkg_info(path).then_some(PythonSdistArchiveFormat::Zip)
709 } else {
710 None
711 }
712}
713
714fn tar_gz_sdist_contains_pkg_info(path: &Path) -> bool {
715 let Some(compressed_size) = compressed_archive_size(path) else {
716 return false;
717 };
718 let file = match File::open(path) {
719 Ok(file) => file,
720 Err(_) => return false,
721 };
722 let decoder = GzDecoder::new(file);
723 tar_sdist_contains_pkg_info(path, decoder, "tar.gz", compressed_size)
724}
725
726fn tar_bz2_sdist_contains_pkg_info(path: &Path) -> bool {
727 let Some(compressed_size) = compressed_archive_size(path) else {
728 return false;
729 };
730 let file = match File::open(path) {
731 Ok(file) => file,
732 Err(_) => return false,
733 };
734 let decoder = BzDecoder::new(file);
735 tar_sdist_contains_pkg_info(path, decoder, "tar.bz2", compressed_size)
736}
737
738fn tar_xz_sdist_contains_pkg_info(path: &Path) -> bool {
739 let Some(compressed_size) = compressed_archive_size(path) else {
740 return false;
741 };
742 let file = match File::open(path) {
743 Ok(file) => file,
744 Err(_) => return false,
745 };
746 let decoder = XzDecoder::new(file);
747 tar_sdist_contains_pkg_info(path, decoder, "tar.xz", compressed_size)
748}
749
750fn compressed_archive_size(path: &Path) -> Option<u64> {
751 std::fs::metadata(path).ok().map(|metadata| metadata.len())
752}
753
754fn tar_sdist_contains_pkg_info<R: Read>(
755 path: &Path,
756 reader: R,
757 archive_type: &str,
758 compressed_size: u64,
759) -> bool {
760 let Some(entries) = collect_tar_sdist_entries(path, reader, archive_type, compressed_size)
761 else {
762 return false;
763 };
764
765 select_sdist_pkginfo_entry(path, &entries).is_some()
766}
767
768fn tgz_sdist_contains_pkg_info(path: &Path) -> bool {
769 if !path.is_file() {
770 return true;
771 }
772
773 let Some(compressed_size) = compressed_archive_size(path) else {
774 return false;
775 };
776 let file = match File::open(path) {
777 Ok(file) => file,
778 Err(_) => return false,
779 };
780 let decoder = GzDecoder::new(file);
781 tar_sdist_contains_pkg_info(path, decoder, "tgz", compressed_size)
782}
783
784fn zip_sdist_contains_pkg_info(path: &Path) -> bool {
785 if !path.is_file() {
786 return true;
787 }
788
789 let file = match File::open(path) {
790 Ok(file) => file,
791 Err(_) => return false,
792 };
793 let mut archive = match ZipArchive::new(file) {
794 Ok(archive) => archive,
795 Err(_) => return false,
796 };
797
798 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "sdist zip") {
799 Ok(entries) => entries,
800 Err(_) => return false,
801 };
802 let metadata_entries: Vec<_> = validated_entries
803 .iter()
804 .filter(|entry| entry.name.ends_with("/PKG-INFO"))
805 .filter_map(|entry| {
806 read_validated_zip_entry(&mut archive, entry, path, "sdist zip")
807 .ok()
808 .map(|content| (entry.name.clone(), content))
809 })
810 .collect();
811
812 has_matching_sdist_pkginfo_candidate(path, &metadata_entries)
813}
814
815fn is_likely_python_sdist_filename(file_name: &str) -> bool {
816 let Some(stem) = strip_python_archive_extension(file_name) else {
817 return false;
818 };
819
820 let Some((name, version)) = stem.rsplit_once('-') else {
821 return false;
822 };
823
824 !name.is_empty()
825 && !version.is_empty()
826 && version.chars().any(|ch| ch.is_ascii_digit())
827 && name
828 .chars()
829 .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
830}
831
832fn extract_from_sdist_archive(path: &Path) -> PackageData {
833 let metadata = match std::fs::metadata(path) {
834 Ok(m) => m,
835 Err(e) => {
836 warn!(
837 "Failed to read metadata for sdist archive {:?}: {}",
838 path, e
839 );
840 return default_package_data(path);
841 }
842 };
843
844 if metadata.len() > MAX_ARCHIVE_SIZE {
845 warn!(
846 "sdist archive too large: {} bytes (limit: {} bytes)",
847 metadata.len(),
848 MAX_ARCHIVE_SIZE
849 );
850 return default_package_data(path);
851 }
852
853 let Some(format) = detect_python_sdist_archive_format(path) else {
854 return default_package_data(path);
855 };
856
857 let mut package_data = match format {
858 PythonSdistArchiveFormat::TarGz | PythonSdistArchiveFormat::Tgz => {
859 let file = match File::open(path) {
860 Ok(file) => file,
861 Err(e) => {
862 warn!("Failed to open sdist archive {:?}: {}", path, e);
863 return default_package_data(path);
864 }
865 };
866 let decoder = GzDecoder::new(file);
867 extract_from_tar_sdist_archive(path, decoder, "tar.gz", metadata.len())
868 }
869 PythonSdistArchiveFormat::TarBz2 => {
870 let file = match File::open(path) {
871 Ok(file) => file,
872 Err(e) => {
873 warn!("Failed to open sdist archive {:?}: {}", path, e);
874 return default_package_data(path);
875 }
876 };
877 let decoder = BzDecoder::new(file);
878 extract_from_tar_sdist_archive(path, decoder, "tar.bz2", metadata.len())
879 }
880 PythonSdistArchiveFormat::TarXz => {
881 let file = match File::open(path) {
882 Ok(file) => file,
883 Err(e) => {
884 warn!("Failed to open sdist archive {:?}: {}", path, e);
885 return default_package_data(path);
886 }
887 };
888 let decoder = XzDecoder::new(file);
889 extract_from_tar_sdist_archive(path, decoder, "tar.xz", metadata.len())
890 }
891 PythonSdistArchiveFormat::Zip => extract_from_zip_sdist_archive(path),
892 };
893
894 if package_data.package_type.is_some() {
895 let (size, sha256) = calculate_file_checksums(path);
896 package_data.size = size;
897 package_data.sha256 = sha256;
898 }
899
900 package_data
901}
902
903fn extract_from_tar_sdist_archive<R: Read>(
904 path: &Path,
905 reader: R,
906 archive_type: &str,
907 compressed_size: u64,
908) -> PackageData {
909 let Some(entries) = collect_tar_sdist_entries(path, reader, archive_type, compressed_size)
910 else {
911 return default_package_data(path);
912 };
913
914 build_sdist_package_data(path, entries)
915}
916
917fn collect_tar_sdist_entries<R: Read>(
918 path: &Path,
919 reader: R,
920 archive_type: &str,
921 compressed_size: u64,
922) -> Option<Vec<(String, String)>> {
923 let mut archive = Archive::new(reader);
924 let archive_entries = match archive.entries() {
925 Ok(entries) => entries,
926 Err(e) => {
927 warn!(
928 "Failed to read {} sdist archive {:?}: {}",
929 archive_type, path, e
930 );
931 return None;
932 }
933 };
934
935 let mut total_extracted = 0u64;
936 let mut entries = Vec::new();
937
938 for entry_result in archive_entries {
939 let mut entry = match entry_result {
940 Ok(entry) => entry,
941 Err(e) => {
942 warn!(
943 "Failed to read {} sdist entry from {:?}: {}",
944 archive_type, path, e
945 );
946 continue;
947 }
948 };
949
950 let entry_size = entry.size();
951 if entry_size > MAX_FILE_SIZE {
952 warn!(
953 "File too large in {} sdist {:?}: {} bytes (limit: {} bytes)",
954 archive_type, path, entry_size, MAX_FILE_SIZE
955 );
956 continue;
957 }
958
959 total_extracted += entry_size;
960 if total_extracted > MAX_ARCHIVE_SIZE {
961 warn!(
962 "Total extracted size exceeds limit for {} sdist {:?}",
963 archive_type, path
964 );
965 return None;
966 }
967
968 if compressed_size > 0 {
969 let ratio = total_extracted as f64 / compressed_size as f64;
970 if ratio > MAX_COMPRESSION_RATIO {
971 warn!(
972 "Suspicious compression ratio in {} sdist {:?}: {:.2}:1",
973 archive_type, path, ratio
974 );
975 return None;
976 }
977 }
978
979 let entry_path = match entry.path() {
980 Ok(path) => path.to_string_lossy().replace('\\', "/"),
981 Err(e) => {
982 warn!(
983 "Failed to get {} sdist entry path from {:?}: {}",
984 archive_type, path, e
985 );
986 continue;
987 }
988 };
989
990 let Some(entry_path) = normalize_archive_entry_path(&entry_path) else {
991 warn!("Skipping unsafe {} sdist path in {:?}", archive_type, path);
992 continue;
993 };
994
995 if !is_relevant_sdist_text_entry(&entry_path) {
996 continue;
997 }
998
999 if let Ok(content) = read_limited_utf8(
1000 &mut entry,
1001 MAX_FILE_SIZE,
1002 &format!("{} entry {}", archive_type, entry_path),
1003 ) {
1004 entries.push((entry_path, content));
1005 }
1006 }
1007
1008 Some(entries)
1009}
1010
1011fn extract_from_zip_sdist_archive(path: &Path) -> PackageData {
1012 let file = match File::open(path) {
1013 Ok(file) => file,
1014 Err(e) => {
1015 warn!("Failed to open zip sdist archive {:?}: {}", path, e);
1016 return default_package_data(path);
1017 }
1018 };
1019
1020 let mut archive = match ZipArchive::new(file) {
1021 Ok(archive) => archive,
1022 Err(e) => {
1023 warn!("Failed to read zip sdist archive {:?}: {}", path, e);
1024 return default_package_data(path);
1025 }
1026 };
1027
1028 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "sdist zip") {
1029 Ok(entries) => entries,
1030 Err(_) => return default_package_data(path),
1031 };
1032
1033 let mut entries = Vec::new();
1034 for entry in validated_entries.iter() {
1035 if !is_relevant_sdist_text_entry(&entry.name) {
1036 continue;
1037 }
1038
1039 if let Ok(content) = read_validated_zip_entry(&mut archive, entry, path, "sdist zip") {
1040 entries.push((entry.name.clone(), content));
1041 }
1042 }
1043
1044 build_sdist_package_data(path, entries)
1045}
1046
1047fn is_relevant_sdist_text_entry(entry_path: &str) -> bool {
1048 entry_path.ends_with("/PKG-INFO")
1049 || entry_path.ends_with("/requires.txt")
1050 || entry_path.ends_with("/SOURCES.txt")
1051}
1052
1053fn build_sdist_package_data(path: &Path, entries: Vec<(String, String)>) -> PackageData {
1054 let Some((metadata_path, metadata_content)) = select_sdist_pkginfo_entry(path, &entries) else {
1055 warn!("No PKG-INFO file found in sdist archive {:?}", path);
1056 return default_package_data(path);
1057 };
1058
1059 let mut package_data =
1060 python_parse_rfc822_content(&metadata_content, DatasourceId::PypiSdistPkginfo);
1061 merge_sdist_archive_dependencies(&entries, &metadata_path, &mut package_data);
1062 merge_sdist_archive_file_references(&entries, &metadata_path, &mut package_data);
1063 apply_sdist_name_version_fallback(path, &mut package_data);
1064 package_data.datasource_id = Some(DatasourceId::PypiSdist);
1065 package_data
1066}
1067
1068fn select_sdist_pkginfo_entry(
1069 archive_path: &Path,
1070 entries: &[(String, String)],
1071) -> Option<(String, String)> {
1072 let expected_name = sdist_archive_expected_name(archive_path);
1073
1074 entries
1075 .iter()
1076 .filter(|(entry_path, _)| entry_path.ends_with("/PKG-INFO"))
1077 .min_by_key(|(entry_path, content)| {
1078 let components: Vec<_> = entry_path
1079 .split('/')
1080 .filter(|part| !part.is_empty())
1081 .collect();
1082 let candidate_name = sdist_pkginfo_candidate_name(content);
1083 let name_rank = if candidate_name == expected_name {
1084 0
1085 } else {
1086 1
1087 };
1088 let kind_rank = sdist_pkginfo_kind_rank(entry_path);
1089
1090 (name_rank, kind_rank, components.len(), entry_path.clone())
1091 })
1092 .map(|(entry_path, content)| (entry_path.clone(), content.clone()))
1093}
1094
1095fn has_matching_sdist_pkginfo_candidate(archive_path: &Path, entries: &[(String, String)]) -> bool {
1096 let Some(expected_name) = sdist_archive_expected_name(archive_path) else {
1097 return false;
1098 };
1099
1100 entries.iter().any(|(entry_path, content)| {
1101 sdist_pkginfo_kind_rank(entry_path) < 3
1102 && sdist_pkginfo_candidate_name(content).as_deref() == Some(expected_name.as_str())
1103 })
1104}
1105
1106fn sdist_archive_expected_name(archive_path: &Path) -> Option<String> {
1107 archive_path
1108 .file_name()
1109 .and_then(|name| name.to_str())
1110 .and_then(strip_python_archive_extension)
1111 .and_then(|stem| {
1112 stem.rsplit_once('-')
1113 .map(|(name, _)| normalize_python_package_name(name))
1114 })
1115}
1116
1117fn sdist_pkginfo_candidate_name(content: &str) -> Option<String> {
1118 let metadata = super::rfc822::parse_rfc822_content(content);
1119 super::rfc822::get_header_first(&metadata.headers, "name")
1120 .map(|name| normalize_python_package_name(&name))
1121}
1122
1123fn sdist_pkginfo_kind_rank(entry_path: &str) -> usize {
1124 let components: Vec<_> = entry_path
1125 .split('/')
1126 .filter(|part| !part.is_empty())
1127 .collect();
1128
1129 if components.len() == 3 && components[1].ends_with(".egg-info") && components[2] == "PKG-INFO"
1130 {
1131 0
1132 } else if components.len() == 2 && components[1] == "PKG-INFO" {
1133 1
1134 } else if entry_path.ends_with(".egg-info/PKG-INFO") {
1135 2
1136 } else {
1137 3
1138 }
1139}
1140
1141fn merge_sdist_archive_dependencies(
1142 entries: &[(String, String)],
1143 metadata_path: &str,
1144 package_data: &mut PackageData,
1145) {
1146 let metadata_dir = metadata_path
1147 .rsplit_once('/')
1148 .map(|(dir, _)| dir)
1149 .unwrap_or("");
1150 let archive_root = metadata_path.split('/').next().unwrap_or("");
1151 let matched_egg_info_dir =
1152 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
1153 let mut extra_dependencies = Vec::new();
1154
1155 for (entry_path, content) in entries {
1156 let is_direct_requires =
1157 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/requires.txt");
1158 let is_egg_info_requires = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
1159 entry_path == &format!("{archive_root}/{egg_info_dir}/requires.txt")
1160 });
1161
1162 if is_direct_requires || is_egg_info_requires {
1163 extra_dependencies.extend(parse_requires_txt(content));
1164 }
1165 }
1166
1167 for dependency in extra_dependencies {
1168 if !package_data.dependencies.iter().any(|existing| {
1169 existing.purl == dependency.purl
1170 && existing.scope == dependency.scope
1171 && existing.extracted_requirement == dependency.extracted_requirement
1172 && existing.extra_data == dependency.extra_data
1173 }) {
1174 package_data.dependencies.push(dependency);
1175 }
1176 }
1177}
1178
1179fn merge_sdist_archive_file_references(
1180 entries: &[(String, String)],
1181 metadata_path: &str,
1182 package_data: &mut PackageData,
1183) {
1184 let metadata_dir = metadata_path
1185 .rsplit_once('/')
1186 .map(|(dir, _)| dir)
1187 .unwrap_or("");
1188 let archive_root = metadata_path.split('/').next().unwrap_or("");
1189 let matched_egg_info_dir =
1190 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
1191 let mut extra_refs = Vec::new();
1192
1193 for (entry_path, content) in entries {
1194 let is_direct_sources =
1195 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/SOURCES.txt");
1196 let is_egg_info_sources = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
1197 entry_path == &format!("{archive_root}/{egg_info_dir}/SOURCES.txt")
1198 });
1199
1200 if is_direct_sources || is_egg_info_sources {
1201 extra_refs.extend(parse_sources_txt(content));
1202 }
1203 }
1204
1205 for file_ref in extra_refs {
1206 if !package_data
1207 .file_references
1208 .iter()
1209 .any(|existing| existing.path == file_ref.path)
1210 {
1211 package_data.file_references.push(file_ref);
1212 }
1213 }
1214}
1215
1216fn select_matching_sdist_egg_info_dir(
1217 entries: &[(String, String)],
1218 archive_root: &str,
1219 package_name: Option<&str>,
1220) -> Option<String> {
1221 let normalized_package_name = package_name.map(normalize_python_package_name);
1222
1223 entries
1224 .iter()
1225 .filter_map(|(entry_path, _)| {
1226 let components: Vec<_> = entry_path
1227 .split('/')
1228 .filter(|part| !part.is_empty())
1229 .collect();
1230 if components.len() == 3
1231 && components[0] == archive_root
1232 && components[1].ends_with(".egg-info")
1233 {
1234 Some(components[1].to_string())
1235 } else {
1236 None
1237 }
1238 })
1239 .min_by_key(|egg_info_dir| {
1240 let normalized_dir_name =
1241 normalize_python_package_name(egg_info_dir.trim_end_matches(".egg-info"));
1242 let name_rank = if Some(normalized_dir_name.clone()) == normalized_package_name {
1243 0
1244 } else {
1245 1
1246 };
1247
1248 (name_rank, egg_info_dir.clone())
1249 })
1250}
1251
1252fn normalize_python_package_name(name: &str) -> String {
1253 name.to_ascii_lowercase().replace('_', "-")
1254}
1255
1256fn apply_sdist_name_version_fallback(path: &Path, package_data: &mut PackageData) {
1257 let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
1258 return;
1259 };
1260
1261 let Some(stem) = strip_python_archive_extension(file_name) else {
1262 return;
1263 };
1264
1265 let Some((name, version)) = stem.rsplit_once('-') else {
1266 return;
1267 };
1268
1269 if package_data.name.is_none() {
1270 package_data.name = Some(name.replace('_', "-"));
1271 }
1272 if package_data.version.is_none() {
1273 package_data.version = Some(version.to_string());
1274 }
1275
1276 if package_data.purl.is_none()
1277 || package_data.repository_homepage_url.is_none()
1278 || package_data.repository_download_url.is_none()
1279 || package_data.api_data_url.is_none()
1280 {
1281 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1282 build_pypi_urls(
1283 package_data.name.as_deref(),
1284 package_data.version.as_deref(),
1285 );
1286
1287 if package_data.repository_homepage_url.is_none() {
1288 package_data.repository_homepage_url = repository_homepage_url;
1289 }
1290 if package_data.repository_download_url.is_none() {
1291 package_data.repository_download_url = repository_download_url;
1292 }
1293 if package_data.api_data_url.is_none() {
1294 package_data.api_data_url = api_data_url;
1295 }
1296 if package_data.purl.is_none() {
1297 package_data.purl = purl;
1298 }
1299 }
1300}
1301
1302fn extract_from_wheel_archive(path: &Path) -> PackageData {
1303 let metadata = match std::fs::metadata(path) {
1304 Ok(m) => m,
1305 Err(e) => {
1306 warn!(
1307 "Failed to read metadata for wheel archive {:?}: {}",
1308 path, e
1309 );
1310 return default_package_data(path);
1311 }
1312 };
1313
1314 if metadata.len() > MAX_ARCHIVE_SIZE {
1315 warn!(
1316 "Wheel archive too large: {} bytes (limit: {} bytes)",
1317 metadata.len(),
1318 MAX_ARCHIVE_SIZE
1319 );
1320 return default_package_data(path);
1321 }
1322
1323 let file = match File::open(path) {
1324 Ok(f) => f,
1325 Err(e) => {
1326 warn!("Failed to open wheel archive {:?}: {}", path, e);
1327 return default_package_data(path);
1328 }
1329 };
1330
1331 let mut archive = match ZipArchive::new(file) {
1332 Ok(a) => a,
1333 Err(e) => {
1334 warn!("Failed to read wheel archive {:?}: {}", path, e);
1335 return default_package_data(path);
1336 }
1337 };
1338
1339 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "wheel") {
1340 Ok(entries) => entries,
1341 Err(_) => return default_package_data(path),
1342 };
1343
1344 let metadata_entry =
1345 match find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/METADATA") {
1346 Some(entry) => entry,
1347 None => {
1348 warn!("No METADATA file found in wheel archive {:?}", path);
1349 return default_package_data(path);
1350 }
1351 };
1352
1353 let content = match read_validated_zip_entry(&mut archive, metadata_entry, path, "wheel") {
1354 Ok(c) => c,
1355 Err(e) => {
1356 warn!("Failed to read METADATA from {:?}: {}", path, e);
1357 return default_package_data(path);
1358 }
1359 };
1360
1361 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiWheel);
1362
1363 let (size, sha256) = calculate_file_checksums(path);
1364 package_data.size = size;
1365 package_data.sha256 = sha256;
1366
1367 if let Some(record_entry) =
1368 find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/RECORD")
1369 && let Ok(record_content) =
1370 read_validated_zip_entry(&mut archive, record_entry, path, "wheel")
1371 {
1372 package_data.file_references = parse_record_csv(&record_content);
1373 }
1374
1375 if let Some(wheel_info) = parse_wheel_filename(path) {
1376 if package_data.name.is_none() {
1377 package_data.name = Some(wheel_info.name.clone());
1378 }
1379 if package_data.version.is_none() {
1380 package_data.version = Some(wheel_info.version.clone());
1381 }
1382
1383 package_data.qualifiers = Some(std::collections::HashMap::from([(
1384 "extension".to_string(),
1385 format!(
1386 "{}-{}-{}",
1387 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1388 ),
1389 )]));
1390
1391 package_data.purl = build_wheel_purl(
1392 package_data.name.as_deref(),
1393 package_data.version.as_deref(),
1394 &wheel_info,
1395 );
1396
1397 let mut extra_data = package_data.extra_data.unwrap_or_default();
1398 extra_data.insert(
1399 "python_requires".to_string(),
1400 serde_json::Value::String(wheel_info.python_tag.clone()),
1401 );
1402 extra_data.insert(
1403 "abi_tag".to_string(),
1404 serde_json::Value::String(wheel_info.abi_tag.clone()),
1405 );
1406 extra_data.insert(
1407 "platform_tag".to_string(),
1408 serde_json::Value::String(wheel_info.platform_tag.clone()),
1409 );
1410 package_data.extra_data = Some(extra_data);
1411 }
1412
1413 package_data
1414}
1415
1416fn extract_from_egg_archive(path: &Path) -> PackageData {
1417 let metadata = match std::fs::metadata(path) {
1418 Ok(m) => m,
1419 Err(e) => {
1420 warn!("Failed to read metadata for egg archive {:?}: {}", path, e);
1421 return default_package_data(path);
1422 }
1423 };
1424
1425 if metadata.len() > MAX_ARCHIVE_SIZE {
1426 warn!(
1427 "Egg archive too large: {} bytes (limit: {} bytes)",
1428 metadata.len(),
1429 MAX_ARCHIVE_SIZE
1430 );
1431 return default_package_data(path);
1432 }
1433
1434 let file = match File::open(path) {
1435 Ok(f) => f,
1436 Err(e) => {
1437 warn!("Failed to open egg archive {:?}: {}", path, e);
1438 return default_package_data(path);
1439 }
1440 };
1441
1442 let mut archive = match ZipArchive::new(file) {
1443 Ok(a) => a,
1444 Err(e) => {
1445 warn!("Failed to read egg archive {:?}: {}", path, e);
1446 return default_package_data(path);
1447 }
1448 };
1449
1450 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "egg") {
1451 Ok(entries) => entries,
1452 Err(_) => return default_package_data(path),
1453 };
1454
1455 let pkginfo_entry = match find_validated_zip_entry_by_any_suffix(
1456 &validated_entries,
1457 &["EGG-INFO/PKG-INFO", ".egg-info/PKG-INFO"],
1458 ) {
1459 Some(entry) => entry,
1460 None => {
1461 warn!("No PKG-INFO file found in egg archive {:?}", path);
1462 return default_package_data(path);
1463 }
1464 };
1465
1466 let content = match read_validated_zip_entry(&mut archive, pkginfo_entry, path, "egg") {
1467 Ok(c) => c,
1468 Err(e) => {
1469 warn!("Failed to read PKG-INFO from {:?}: {}", path, e);
1470 return default_package_data(path);
1471 }
1472 };
1473
1474 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiEgg);
1475
1476 let (size, sha256) = calculate_file_checksums(path);
1477 package_data.size = size;
1478 package_data.sha256 = sha256;
1479
1480 if let Some(installed_files_entry) = find_validated_zip_entry_by_any_suffix(
1481 &validated_entries,
1482 &[
1483 "EGG-INFO/installed-files.txt",
1484 ".egg-info/installed-files.txt",
1485 ],
1486 ) && let Ok(installed_files_content) =
1487 read_validated_zip_entry(&mut archive, installed_files_entry, path, "egg")
1488 {
1489 package_data.file_references = parse_installed_files_txt(&installed_files_content);
1490 }
1491
1492 if let Some(egg_info) = parse_egg_filename(path) {
1493 if package_data.name.is_none() {
1494 package_data.name = Some(egg_info.name.clone());
1495 }
1496 if package_data.version.is_none() {
1497 package_data.version = Some(egg_info.version.clone());
1498 }
1499
1500 if let Some(python_version) = &egg_info.python_version {
1501 let mut extra_data = package_data.extra_data.unwrap_or_default();
1502 extra_data.insert(
1503 "python_version".to_string(),
1504 serde_json::Value::String(python_version.clone()),
1505 );
1506 package_data.extra_data = Some(extra_data);
1507 }
1508 }
1509
1510 package_data.purl = build_egg_purl(
1511 package_data.name.as_deref(),
1512 package_data.version.as_deref(),
1513 );
1514
1515 package_data
1516}
1517
1518fn find_validated_zip_entry_by_suffix<'a>(
1519 entries: &'a [ValidatedZipEntry],
1520 suffix: &str,
1521) -> Option<&'a ValidatedZipEntry> {
1522 entries.iter().find(|entry| entry.name.ends_with(suffix))
1523}
1524
1525fn find_validated_zip_entry_by_any_suffix<'a>(
1526 entries: &'a [ValidatedZipEntry],
1527 suffixes: &[&str],
1528) -> Option<&'a ValidatedZipEntry> {
1529 entries
1530 .iter()
1531 .find(|entry| suffixes.iter().any(|suffix| entry.name.ends_with(suffix)))
1532}
1533
1534fn read_validated_zip_entry<R: Read + std::io::Seek>(
1535 archive: &mut ZipArchive<R>,
1536 entry: &ValidatedZipEntry,
1537 path: &Path,
1538 archive_type: &str,
1539) -> Result<String, String> {
1540 let mut file = archive
1541 .by_index(entry.index)
1542 .map_err(|e| format!("Failed to find entry {}: {}", entry.name, e))?;
1543
1544 let compressed_size = file.compressed_size();
1545 let uncompressed_size = file.size();
1546
1547 if compressed_size > 0 {
1548 let ratio = uncompressed_size as f64 / compressed_size as f64;
1549 if ratio > MAX_COMPRESSION_RATIO {
1550 return Err(format!(
1551 "Rejected suspicious compression ratio in {} {:?}: {:.2}:1",
1552 archive_type, path, ratio
1553 ));
1554 }
1555 }
1556
1557 if uncompressed_size > MAX_FILE_SIZE {
1558 return Err(format!(
1559 "Rejected oversized entry in {} {:?}: {} bytes",
1560 archive_type, path, uncompressed_size
1561 ));
1562 }
1563
1564 read_limited_utf8(
1565 &mut file,
1566 MAX_FILE_SIZE,
1567 &format!("{} entry {}", archive_type, entry.name),
1568 )
1569}
1570
1571fn read_limited_utf8<R: Read>(
1572 reader: &mut R,
1573 max_bytes: u64,
1574 context: &str,
1575) -> Result<String, String> {
1576 let mut limited = reader.take(max_bytes + 1);
1577 let mut bytes = Vec::new();
1578 limited
1579 .read_to_end(&mut bytes)
1580 .map_err(|e| format!("Failed to read {}: {}", context, e))?;
1581
1582 if bytes.len() as u64 > max_bytes {
1583 return Err(format!(
1584 "{} exceeded {} byte limit while reading",
1585 context, max_bytes
1586 ));
1587 }
1588
1589 String::from_utf8(bytes).map_err(|e| format!("{} is not valid UTF-8: {}", context, e))
1590}
1591
1592fn normalize_archive_entry_path(entry_path: &str) -> Option<String> {
1593 let normalized = entry_path.replace('\\', "/");
1594 if normalized.len() >= 3 {
1595 let bytes = normalized.as_bytes();
1596 if bytes[1] == b':' && bytes[2] == b'/' && bytes[0].is_ascii_alphabetic() {
1597 return None;
1598 }
1599 }
1600 let path = Path::new(&normalized);
1601 let mut components = Vec::new();
1602
1603 for component in path.components() {
1604 match component {
1605 Component::Normal(segment) => components.push(segment.to_string_lossy().to_string()),
1606 Component::CurDir => {}
1607 Component::RootDir | Component::ParentDir | Component::Prefix(_) => return None,
1608 }
1609 }
1610
1611 (!components.is_empty()).then_some(components.join("/"))
1612}
1613
1614pub fn parse_record_csv(content: &str) -> Vec<FileReference> {
1619 let mut reader = ReaderBuilder::new()
1620 .has_headers(false)
1621 .from_reader(content.as_bytes());
1622
1623 let mut file_references = Vec::new();
1624
1625 for result in reader.records() {
1626 match result {
1627 Ok(record) => {
1628 if record.len() < 3 {
1629 continue;
1630 }
1631
1632 let path = record.get(0).unwrap_or("").trim().to_string();
1633 if path.is_empty() {
1634 continue;
1635 }
1636
1637 let hash_field = record.get(1).unwrap_or("").trim();
1638 let size_field = record.get(2).unwrap_or("").trim();
1639
1640 let sha256 = if !hash_field.is_empty() && hash_field.contains('=') {
1642 let parts: Vec<&str> = hash_field.split('=').collect();
1643 if parts.len() == 2 && parts[0] == "sha256" {
1644 match URL_SAFE_NO_PAD.decode(parts[1]) {
1645 Ok(decoded) => {
1646 let hex = decoded
1647 .iter()
1648 .map(|b| format!("{:02x}", b))
1649 .collect::<String>();
1650 Sha256Digest::from_hex(&hex).ok()
1651 }
1652 Err(_) => None,
1653 }
1654 } else {
1655 None
1656 }
1657 } else {
1658 None
1659 };
1660
1661 let size = if !size_field.is_empty() && size_field != "-" {
1663 size_field.parse::<u64>().ok()
1664 } else {
1665 None
1666 };
1667
1668 file_references.push(FileReference {
1669 path,
1670 size,
1671 sha1: None,
1672 md5: None,
1673 sha256,
1674 sha512: None,
1675 extra_data: None,
1676 });
1677 }
1678 Err(e) => {
1679 warn!("Failed to parse RECORD CSV row: {}", e);
1680 continue;
1681 }
1682 }
1683 }
1684
1685 file_references
1686}
1687
1688pub fn parse_installed_files_txt(content: &str) -> Vec<FileReference> {
1691 content
1692 .lines()
1693 .map(|line| line.trim())
1694 .filter(|line| !line.is_empty())
1695 .map(|path| FileReference {
1696 path: path.to_string(),
1697 size: None,
1698 sha1: None,
1699 md5: None,
1700 sha256: None,
1701 sha512: None,
1702 extra_data: None,
1703 })
1704 .collect()
1705}
1706
1707pub fn parse_sources_txt(content: &str) -> Vec<FileReference> {
1708 content
1709 .lines()
1710 .map(str::trim)
1711 .filter(|line| !line.is_empty())
1712 .map(|path| FileReference {
1713 path: path.to_string(),
1714 size: None,
1715 sha1: None,
1716 md5: None,
1717 sha256: None,
1718 sha512: None,
1719 extra_data: None,
1720 })
1721 .collect()
1722}
1723
1724struct WheelInfo {
1725 name: String,
1726 version: String,
1727 python_tag: String,
1728 abi_tag: String,
1729 platform_tag: String,
1730}
1731
1732fn parse_wheel_filename(path: &Path) -> Option<WheelInfo> {
1733 let stem = path.file_stem()?.to_string_lossy();
1734 let parts: Vec<&str> = stem.split('-').collect();
1735
1736 if parts.len() >= 5 {
1737 Some(WheelInfo {
1738 name: parts[0].replace('_', "-"),
1739 version: parts[1].to_string(),
1740 python_tag: parts[2].to_string(),
1741 abi_tag: parts[3].to_string(),
1742 platform_tag: parts[4..].join("-"),
1743 })
1744 } else {
1745 None
1746 }
1747}
1748
1749struct EggInfo {
1750 name: String,
1751 version: String,
1752 python_version: Option<String>,
1753}
1754
1755fn parse_egg_filename(path: &Path) -> Option<EggInfo> {
1756 let stem = path.file_stem()?.to_string_lossy();
1757 let parts: Vec<&str> = stem.split('-').collect();
1758
1759 if parts.len() >= 2 {
1760 Some(EggInfo {
1761 name: parts[0].replace('_', "-"),
1762 version: parts[1].to_string(),
1763 python_version: parts.get(2).map(|s| s.to_string()),
1764 })
1765 } else {
1766 None
1767 }
1768}
1769
1770fn build_wheel_purl(
1771 name: Option<&str>,
1772 version: Option<&str>,
1773 wheel_info: &WheelInfo,
1774) -> Option<String> {
1775 let name = name?;
1776 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1777
1778 if let Some(ver) = version {
1779 package_url.with_version(ver).ok()?;
1780 }
1781
1782 let extension = format!(
1783 "{}-{}-{}",
1784 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1785 );
1786 package_url.add_qualifier("extension", extension).ok()?;
1787
1788 Some(package_url.to_string())
1789}
1790
1791fn build_egg_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1792 let name = name?;
1793 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1794
1795 if let Some(ver) = version {
1796 package_url.with_version(ver).ok()?;
1797 }
1798
1799 package_url.add_qualifier("type", "egg").ok()?;
1800
1801 Some(package_url.to_string())
1802}
1803
1804fn python_parse_rfc822_content(content: &str, datasource_id: DatasourceId) -> PackageData {
1805 let metadata = super::rfc822::parse_rfc822_content(content);
1806 build_package_data_from_rfc822(&metadata, datasource_id)
1807}
1808
1809fn build_package_data_from_rfc822(
1814 metadata: &super::rfc822::Rfc822Metadata,
1815 datasource_id: DatasourceId,
1816) -> PackageData {
1817 use super::rfc822::{get_header_all, get_header_first};
1818
1819 let name = get_header_first(&metadata.headers, "name");
1820 let version = get_header_first(&metadata.headers, "version");
1821 let summary = get_header_first(&metadata.headers, "summary");
1822 let mut homepage_url = get_header_first(&metadata.headers, "home-page");
1823 let author = get_header_first(&metadata.headers, "author");
1824 let author_email = get_header_first(&metadata.headers, "author-email");
1825 let license = get_header_first(&metadata.headers, "license");
1826 let license_expression = get_header_first(&metadata.headers, "license-expression");
1827 let download_url = get_header_first(&metadata.headers, "download-url");
1828 let platform = get_header_first(&metadata.headers, "platform");
1829 let requires_python = get_header_first(&metadata.headers, "requires-python");
1830 let classifiers = get_header_all(&metadata.headers, "classifier");
1831 let license_files = get_header_all(&metadata.headers, "license-file");
1832
1833 let description_body = if metadata.body.is_empty() {
1834 get_header_first(&metadata.headers, "description").unwrap_or_default()
1835 } else {
1836 metadata.body.clone()
1837 };
1838
1839 let description = build_description(summary.as_deref(), &description_body);
1840
1841 let mut parties = Vec::new();
1842 if author.is_some() || author_email.is_some() {
1843 parties.push(Party {
1844 r#type: Some("person".to_string()),
1845 role: Some("author".to_string()),
1846 name: author,
1847 email: author_email,
1848 url: None,
1849 organization: None,
1850 organization_url: None,
1851 timezone: None,
1852 });
1853 }
1854
1855 let (keywords, license_classifiers) = split_classifiers(&classifiers);
1856 let referenced_license_files: Vec<&str> = license_files.iter().map(String::as_str).collect();
1857 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1858 license_expression
1859 .as_deref()
1860 .and_then(normalize_spdx_expression)
1861 .map(|normalized| {
1862 build_declared_license_data(
1863 normalized,
1864 DeclaredLicenseMatchMetadata::single_line(
1865 license_expression.as_deref().unwrap_or_default(),
1866 )
1867 .with_referenced_filenames(&referenced_license_files),
1868 )
1869 })
1870 .unwrap_or_else(|| normalize_spdx_declared_license(license_expression.as_deref()));
1871
1872 let extracted_license_statement = license_expression
1873 .clone()
1874 .or_else(|| build_extracted_license_statement(license.as_deref(), &license_classifiers));
1875
1876 let mut extra_data = HashMap::new();
1877 if let Some(platform_value) = platform
1878 && !platform_value.eq_ignore_ascii_case("unknown")
1879 && !platform_value.is_empty()
1880 {
1881 extra_data.insert(
1882 "platform".to_string(),
1883 serde_json::Value::String(platform_value),
1884 );
1885 }
1886
1887 if let Some(requires_python_value) = requires_python
1888 && !requires_python_value.is_empty()
1889 {
1890 extra_data.insert(
1891 "requires_python".to_string(),
1892 serde_json::Value::String(requires_python_value),
1893 );
1894 }
1895
1896 if !license_files.is_empty() {
1897 extra_data.insert(
1898 "license_files".to_string(),
1899 serde_json::Value::Array(
1900 license_files
1901 .iter()
1902 .cloned()
1903 .map(serde_json::Value::String)
1904 .collect(),
1905 ),
1906 );
1907 }
1908
1909 let file_references = license_files
1910 .iter()
1911 .map(|path| FileReference {
1912 path: path.clone(),
1913 size: None,
1914 sha1: None,
1915 md5: None,
1916 sha256: None,
1917 sha512: None,
1918 extra_data: None,
1919 })
1920 .collect();
1921
1922 let project_urls = get_header_all(&metadata.headers, "project-url");
1923 let dependencies = extract_rfc822_dependencies(&metadata.headers);
1924 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
1925
1926 if !project_urls.is_empty() {
1927 let parsed_urls = parse_project_urls(&project_urls);
1928
1929 for (label, url) in &parsed_urls {
1930 let label_lower = label.to_lowercase();
1931
1932 if bug_tracking_url.is_none()
1933 && matches!(
1934 label_lower.as_str(),
1935 "tracker"
1936 | "bug reports"
1937 | "bug tracker"
1938 | "issues"
1939 | "issue tracker"
1940 | "github: issues"
1941 )
1942 {
1943 bug_tracking_url = Some(url.clone());
1944 } else if code_view_url.is_none()
1945 && matches!(label_lower.as_str(), "source" | "source code" | "code")
1946 {
1947 code_view_url = Some(url.clone());
1948 } else if vcs_url.is_none()
1949 && matches!(
1950 label_lower.as_str(),
1951 "github" | "gitlab" | "github: repo" | "repository"
1952 )
1953 {
1954 vcs_url = Some(url.clone());
1955 } else if homepage_url.is_none()
1956 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
1957 {
1958 homepage_url = Some(url.clone());
1959 } else if label_lower == "changelog" {
1960 extra_data.insert(
1961 "changelog_url".to_string(),
1962 serde_json::Value::String(url.clone()),
1963 );
1964 }
1965 }
1966
1967 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
1968 .iter()
1969 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
1970 .collect();
1971
1972 if !project_urls_json.is_empty() {
1973 extra_data.insert(
1974 "project_urls".to_string(),
1975 serde_json::Value::Object(project_urls_json),
1976 );
1977 }
1978 }
1979
1980 let extra_data = if extra_data.is_empty() {
1981 None
1982 } else {
1983 Some(extra_data)
1984 };
1985
1986 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1987 build_pypi_urls(name.as_deref(), version.as_deref());
1988
1989 PackageData {
1990 package_type: Some(PythonParser::PACKAGE_TYPE),
1991 namespace: None,
1992 name,
1993 version,
1994 qualifiers: None,
1995 subpath: None,
1996 primary_language: Some("Python".to_string()),
1997 description,
1998 release_date: None,
1999 parties,
2000 keywords,
2001 homepage_url,
2002 download_url,
2003 size: None,
2004 sha1: None,
2005 md5: None,
2006 sha256: None,
2007 sha512: None,
2008 bug_tracking_url,
2009 code_view_url,
2010 vcs_url,
2011 copyright: None,
2012 holder: None,
2013 declared_license_expression,
2014 declared_license_expression_spdx,
2015 license_detections,
2016 other_license_expression: None,
2017 other_license_expression_spdx: None,
2018 other_license_detections: Vec::new(),
2019 extracted_license_statement,
2020 notice_text: None,
2021 source_packages: Vec::new(),
2022 file_references,
2023 is_private: false,
2024 is_virtual: false,
2025 extra_data,
2026 dependencies,
2027 repository_homepage_url,
2028 repository_download_url,
2029 api_data_url,
2030 datasource_id: Some(datasource_id),
2031 purl,
2032 }
2033}
2034
2035fn parse_project_urls(project_urls: &[String]) -> Vec<(String, String)> {
2036 project_urls
2037 .iter()
2038 .filter_map(|url_entry| {
2039 if let Some((label, url)) = url_entry.split_once(", ") {
2040 let label_trimmed = label.trim();
2041 let url_trimmed = url.trim();
2042 if !label_trimmed.is_empty() && !url_trimmed.is_empty() {
2043 return Some((label_trimmed.to_string(), url_trimmed.to_string()));
2044 }
2045 }
2046 None
2047 })
2048 .collect()
2049}
2050
2051fn build_description(summary: Option<&str>, body: &str) -> Option<String> {
2052 let mut parts = Vec::new();
2053 if let Some(summary_value) = summary
2054 && !summary_value.trim().is_empty()
2055 {
2056 parts.push(summary_value.trim().to_string());
2057 }
2058
2059 if !body.trim().is_empty() {
2060 parts.push(body.trim().to_string());
2061 }
2062
2063 if parts.is_empty() {
2064 None
2065 } else {
2066 Some(parts.join("\n"))
2067 }
2068}
2069
2070fn split_classifiers(classifiers: &[String]) -> (Vec<String>, Vec<String>) {
2071 let mut keywords = Vec::new();
2072 let mut license_classifiers = Vec::new();
2073
2074 for classifier in classifiers {
2075 if classifier.starts_with("License ::") {
2076 license_classifiers.push(classifier.to_string());
2077 } else {
2078 keywords.push(classifier.to_string());
2079 }
2080 }
2081
2082 (keywords, license_classifiers)
2083}
2084
2085fn build_extracted_license_statement(
2086 license: Option<&str>,
2087 license_classifiers: &[String],
2088) -> Option<String> {
2089 let mut lines = Vec::new();
2090
2091 if let Some(value) = license
2092 && !value.trim().is_empty()
2093 {
2094 lines.push(format!("license: {}", value.trim()));
2095 }
2096
2097 if !license_classifiers.is_empty() {
2098 lines.push("classifiers:".to_string());
2099 for classifier in license_classifiers {
2100 lines.push(format!(" - '{}'", classifier));
2101 }
2102 }
2103
2104 if lines.is_empty() {
2105 None
2106 } else {
2107 Some(format!("{}\n", lines.join("\n")))
2108 }
2109}
2110
2111pub(crate) fn build_pypi_urls(
2112 name: Option<&str>,
2113 version: Option<&str>,
2114) -> (
2115 Option<String>,
2116 Option<String>,
2117 Option<String>,
2118 Option<String>,
2119) {
2120 let repository_homepage_url = name.map(|value| format!("https://pypi.org/project/{}", value));
2121
2122 let repository_download_url = name.and_then(|value| {
2123 version.map(|ver| {
2124 format!(
2125 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
2126 &value[..1.min(value.len())],
2127 value,
2128 value,
2129 ver
2130 )
2131 })
2132 });
2133
2134 let api_data_url = name.map(|value| {
2135 if let Some(ver) = version {
2136 format!("https://pypi.org/pypi/{}/{}/json", value, ver)
2137 } else {
2138 format!("https://pypi.org/pypi/{}/json", value)
2139 }
2140 });
2141
2142 let purl = name.and_then(|value| {
2143 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), value).ok()?;
2144 if let Some(ver) = version {
2145 package_url.with_version(ver).ok()?;
2146 }
2147 Some(package_url.to_string())
2148 });
2149
2150 (
2151 repository_homepage_url,
2152 repository_download_url,
2153 api_data_url,
2154 purl,
2155 )
2156}
2157
2158fn build_pypi_purl_with_extension(
2159 name: &str,
2160 version: Option<&str>,
2161 extension: &str,
2162) -> Option<String> {
2163 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2164 if let Some(ver) = version {
2165 package_url.with_version(ver).ok()?;
2166 }
2167 package_url.add_qualifier("extension", extension).ok()?;
2168 Some(package_url.to_string())
2169}
2170
2171fn extract_from_pyproject_toml(path: &Path) -> PackageData {
2172 let toml_content = match read_toml_file(path) {
2173 Ok(content) => content,
2174 Err(e) => {
2175 warn!(
2176 "Failed to read or parse pyproject.toml at {:?}: {}",
2177 path, e
2178 );
2179 return default_package_data(path);
2180 }
2181 };
2182
2183 let tool_table = toml_content.get("tool").and_then(|v| v.as_table());
2184 let is_poetry_pyproject = tool_table
2185 .and_then(|tool| tool.get("poetry"))
2186 .and_then(|value| value.as_table())
2187 .is_some();
2188
2189 let project_table =
2191 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
2192 project.clone()
2194 } else if let Some(tool) = tool_table {
2195 if let Some(poetry) = tool.get("poetry").and_then(|v| v.as_table()) {
2196 poetry.clone()
2198 } else {
2199 return default_package_data(path);
2200 }
2201 } else if toml_content.get(FIELD_NAME).is_some() {
2202 match toml_content.as_table() {
2204 Some(table) => table.clone(),
2205 None => {
2206 warn!("Failed to convert TOML content to table in {:?}", path);
2207 return default_package_data(path);
2208 }
2209 }
2210 } else {
2211 return default_package_data(path);
2212 };
2213
2214 let name = project_table
2215 .get(FIELD_NAME)
2216 .and_then(|v| v.as_str())
2217 .map(String::from);
2218
2219 let version = project_table
2220 .get(FIELD_VERSION)
2221 .and_then(|v| v.as_str())
2222 .map(String::from);
2223 let classifiers = project_table
2224 .get("classifiers")
2225 .and_then(|value| value.as_array())
2226 .map(|values| {
2227 values
2228 .iter()
2229 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2230 .collect::<Vec<_>>()
2231 })
2232 .unwrap_or_default();
2233 let (classifier_keywords, license_classifiers) = split_classifiers(&classifiers);
2234
2235 let extracted_license_statement = extract_raw_license_string(&project_table);
2236 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2237 normalize_spdx_declared_license(extract_license_expression_candidate(&project_table));
2238
2239 let description = project_table
2240 .get(FIELD_DESCRIPTION)
2241 .and_then(|value| value.as_str())
2242 .map(|value| value.to_string());
2243 let mut keywords = project_table
2244 .get(FIELD_KEYWORDS)
2245 .and_then(|value| value.as_array())
2246 .map(|values| {
2247 values
2248 .iter()
2249 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2250 .collect::<Vec<_>>()
2251 })
2252 .unwrap_or_default();
2253 for classifier in classifier_keywords {
2254 if !keywords.contains(&classifier) {
2255 keywords.push(classifier);
2256 }
2257 }
2258
2259 let mut extra_data = extract_pyproject_extra_data(&toml_content).unwrap_or_default();
2261 let (homepage_url, download_url, bug_tracking_url, code_view_url, repository_url) =
2262 extract_urls(&project_table, &mut extra_data);
2263
2264 let (dependencies, optional_dependencies) = extract_dependencies(&project_table, &toml_content);
2265
2266 let purl = name.as_ref().and_then(|n| {
2268 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n) {
2269 Ok(p) => p,
2270 Err(e) => {
2271 warn!(
2272 "Failed to create PackageUrl for Python package '{}': {}",
2273 n, e
2274 );
2275 return None;
2276 }
2277 };
2278
2279 if let Some(v) = &version
2280 && let Err(e) = package_url.with_version(v)
2281 {
2282 warn!(
2283 "Failed to set version '{}' for Python package '{}': {}",
2284 v, n, e
2285 );
2286 return None;
2287 }
2288
2289 Some(package_url.to_string())
2290 });
2291
2292 let api_data_url = name.as_ref().map(|n| {
2293 if let Some(v) = &version {
2294 format!("https://pypi.org/pypi/{}/{}/json", n, v)
2295 } else {
2296 format!("https://pypi.org/pypi/{}/json", n)
2297 }
2298 });
2299
2300 let pypi_homepage_url = name
2301 .as_ref()
2302 .map(|n| format!("https://pypi.org/project/{}", n));
2303
2304 let pypi_download_url = name.as_ref().and_then(|n| {
2305 version.as_ref().map(|v| {
2306 format!(
2307 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
2308 &n[..1.min(n.len())],
2309 n,
2310 n,
2311 v
2312 )
2313 })
2314 });
2315
2316 PackageData {
2317 package_type: Some(PythonParser::PACKAGE_TYPE),
2318 namespace: None,
2319 name,
2320 version,
2321 qualifiers: None,
2322 subpath: None,
2323 primary_language: None,
2324 description,
2325 release_date: None,
2326 parties: extract_parties(&project_table),
2327 keywords,
2328 homepage_url: homepage_url.or(pypi_homepage_url),
2329 download_url: download_url
2330 .or_else(|| repository_url.clone())
2331 .or(pypi_download_url),
2332 size: None,
2333 sha1: None,
2334 md5: None,
2335 sha256: None,
2336 sha512: None,
2337 bug_tracking_url,
2338 code_view_url,
2339 vcs_url: repository_url,
2340 copyright: None,
2341 holder: None,
2342 declared_license_expression,
2343 declared_license_expression_spdx,
2344 license_detections,
2345 other_license_expression: None,
2346 other_license_expression_spdx: None,
2347 other_license_detections: Vec::new(),
2348 extracted_license_statement: extracted_license_statement
2349 .or_else(|| build_extracted_license_statement(None, &license_classifiers)),
2350 notice_text: None,
2351 source_packages: Vec::new(),
2352 file_references: Vec::new(),
2353 is_private: has_private_classifier(&classifiers),
2354 is_virtual: false,
2355 extra_data: if extra_data.is_empty() {
2356 None
2357 } else {
2358 Some(extra_data)
2359 },
2360 dependencies: [dependencies, optional_dependencies].concat(),
2361 repository_homepage_url: None,
2362 repository_download_url: None,
2363 api_data_url,
2364 datasource_id: Some(if is_poetry_pyproject {
2365 DatasourceId::PypiPoetryPyprojectToml
2366 } else {
2367 DatasourceId::PypiPyprojectToml
2368 }),
2369 purl,
2370 }
2371}
2372
2373fn detect_pkg_info_datasource_id(path: &Path) -> DatasourceId {
2374 let path_str = path.to_string_lossy().replace('\\', "/");
2375 if path_str.contains("/EGG-INFO/PKG-INFO") {
2376 DatasourceId::PypiEggPkginfo
2377 } else if path_str.ends_with(".egg-info/PKG-INFO") {
2378 DatasourceId::PypiEditableEggPkginfo
2379 } else {
2380 DatasourceId::PypiSdistPkginfo
2381 }
2382}
2383
2384fn extract_raw_license_string(project: &TomlMap<String, TomlValue>) -> Option<String> {
2385 project
2386 .get(FIELD_LICENSE)
2387 .and_then(|license_value| match license_value {
2388 TomlValue::String(license_str) => Some(license_str.clone()),
2389 TomlValue::Table(license_table) => license_table
2390 .get("text")
2391 .and_then(|v| v.as_str())
2392 .map(|s| s.to_string())
2393 .or_else(|| {
2394 license_table
2395 .get("expression")
2396 .and_then(|v| v.as_str())
2397 .map(|expr| expr.to_string())
2398 }),
2399 _ => None,
2400 })
2401}
2402
2403fn extract_license_expression_candidate(project: &TomlMap<String, TomlValue>) -> Option<&str> {
2404 match project.get(FIELD_LICENSE) {
2405 Some(TomlValue::String(license_str)) => Some(license_str.as_str()),
2406 Some(TomlValue::Table(license_table)) => license_table
2407 .get("expression")
2408 .and_then(|value| value.as_str()),
2409 _ => None,
2410 }
2411}
2412
2413fn extract_urls(
2414 project: &TomlMap<String, TomlValue>,
2415 extra_data: &mut HashMap<String, serde_json::Value>,
2416) -> ProjectUrls {
2417 let mut homepage_url = None;
2418 let mut download_url = None;
2419 let mut bug_tracking_url = None;
2420 let mut code_view_url = None;
2421 let mut repository_url = None;
2422
2423 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
2425 let parsed_urls: Vec<(String, String)> = urls
2426 .iter()
2427 .filter_map(|(label, value)| {
2428 value
2429 .as_str()
2430 .map(|url| (label.to_string(), url.to_string()))
2431 })
2432 .collect();
2433 apply_project_url_mappings(
2434 &parsed_urls,
2435 &mut homepage_url,
2436 &mut bug_tracking_url,
2437 &mut code_view_url,
2438 &mut repository_url,
2439 extra_data,
2440 );
2441
2442 download_url = urls
2443 .get("Downloads")
2444 .or_else(|| urls.get("downloads"))
2445 .and_then(|v| v.as_str())
2446 .map(String::from);
2447
2448 if homepage_url.is_none() {
2449 homepage_url = urls
2450 .get(FIELD_HOMEPAGE)
2451 .and_then(|v| v.as_str())
2452 .map(String::from);
2453 }
2454 if repository_url.is_none() {
2455 repository_url = urls
2456 .get(FIELD_REPOSITORY)
2457 .and_then(|v| v.as_str())
2458 .map(String::from);
2459 }
2460 }
2461
2462 if homepage_url.is_none() {
2464 homepage_url = project
2465 .get(FIELD_HOMEPAGE)
2466 .and_then(|v| v.as_str())
2467 .map(String::from);
2468 }
2469
2470 if repository_url.is_none() {
2471 repository_url = project
2472 .get(FIELD_REPOSITORY)
2473 .and_then(|v| v.as_str())
2474 .map(String::from);
2475 }
2476
2477 (
2478 homepage_url,
2479 download_url,
2480 bug_tracking_url,
2481 code_view_url,
2482 repository_url,
2483 )
2484}
2485
2486fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
2487 let mut parties = Vec::new();
2488
2489 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
2490 for author in authors {
2491 if let Some(author_str) = author.as_str() {
2492 let (name, email) = split_name_email(author_str);
2493 parties.push(Party {
2494 r#type: None,
2495 role: Some("author".to_string()),
2496 name,
2497 email,
2498 url: None,
2499 organization: None,
2500 organization_url: None,
2501 timezone: None,
2502 });
2503 } else if let Some(author_table) = author.as_table() {
2504 let name = author_table
2505 .get("name")
2506 .and_then(|value| value.as_str())
2507 .map(|value| value.to_string());
2508 let email = author_table
2509 .get("email")
2510 .and_then(|value| value.as_str())
2511 .map(|value| value.to_string());
2512 if name.is_some() || email.is_some() {
2513 parties.push(Party {
2514 r#type: None,
2515 role: Some("author".to_string()),
2516 name,
2517 email,
2518 url: None,
2519 organization: None,
2520 organization_url: None,
2521 timezone: None,
2522 });
2523 }
2524 }
2525 }
2526 }
2527
2528 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
2529 for maintainer in maintainers {
2530 if let Some(maintainer_str) = maintainer.as_str() {
2531 let (name, email) = split_name_email(maintainer_str);
2532 parties.push(Party {
2533 r#type: None,
2534 role: Some("maintainer".to_string()),
2535 name,
2536 email,
2537 url: None,
2538 organization: None,
2539 organization_url: None,
2540 timezone: None,
2541 });
2542 } else if let Some(maintainer_table) = maintainer.as_table() {
2543 let name = maintainer_table
2544 .get("name")
2545 .and_then(|value| value.as_str())
2546 .map(|value| value.to_string());
2547 let email = maintainer_table
2548 .get("email")
2549 .and_then(|value| value.as_str())
2550 .map(|value| value.to_string());
2551 if name.is_some() || email.is_some() {
2552 parties.push(Party {
2553 r#type: None,
2554 role: Some("maintainer".to_string()),
2555 name,
2556 email,
2557 url: None,
2558 organization: None,
2559 organization_url: None,
2560 timezone: None,
2561 });
2562 }
2563 }
2564 }
2565 }
2566
2567 parties
2568}
2569
2570fn extract_dependencies(
2571 project: &TomlMap<String, TomlValue>,
2572 toml_content: &TomlValue,
2573) -> (Vec<Dependency>, Vec<Dependency>) {
2574 let mut dependencies = Vec::new();
2575 let mut optional_dependencies = Vec::new();
2576
2577 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
2579 match deps_value {
2580 TomlValue::Array(arr) => {
2581 dependencies = parse_dependency_array(arr, false, None);
2582 }
2583 TomlValue::Table(table) => {
2584 dependencies = parse_dependency_table(table, false, None);
2585 }
2586 _ => {}
2587 }
2588 }
2589
2590 if let Some(opt_deps_table) = project
2592 .get(FIELD_OPTIONAL_DEPENDENCIES)
2593 .and_then(|v| v.as_table())
2594 {
2595 for (extra_name, deps) in opt_deps_table {
2596 match deps {
2597 TomlValue::Array(arr) => {
2598 optional_dependencies.extend(parse_dependency_array(
2599 arr,
2600 true,
2601 Some(extra_name),
2602 ));
2603 }
2604 TomlValue::Table(table) => {
2605 optional_dependencies.extend(parse_dependency_table(
2606 table,
2607 true,
2608 Some(extra_name),
2609 ));
2610 }
2611 _ => {}
2612 }
2613 }
2614 }
2615
2616 if let Some(dev_deps_value) = project.get(FIELD_DEV_DEPENDENCIES) {
2618 match dev_deps_value {
2619 TomlValue::Array(arr) => {
2620 optional_dependencies.extend(parse_dependency_array(
2621 arr,
2622 true,
2623 Some(FIELD_DEV_DEPENDENCIES),
2624 ));
2625 }
2626 TomlValue::Table(table) => {
2627 optional_dependencies.extend(parse_dependency_table(
2628 table,
2629 true,
2630 Some(FIELD_DEV_DEPENDENCIES),
2631 ));
2632 }
2633 _ => {}
2634 }
2635 }
2636
2637 if let Some(groups_table) = toml_content
2639 .get("tool")
2640 .and_then(|value| value.as_table())
2641 .and_then(|tool| tool.get("poetry"))
2642 .and_then(|value| value.as_table())
2643 .and_then(|poetry| poetry.get("group"))
2644 .and_then(|value| value.as_table())
2645 {
2646 for (group_name, group_data) in groups_table {
2647 if let Some(group_deps) = group_data.as_table().and_then(|t| t.get("dependencies")) {
2648 match group_deps {
2649 TomlValue::Array(arr) => {
2650 optional_dependencies.extend(parse_dependency_array(
2651 arr,
2652 true,
2653 Some(group_name),
2654 ));
2655 }
2656 TomlValue::Table(table) => {
2657 optional_dependencies.extend(parse_poetry_group_dependency_table(
2658 table,
2659 true,
2660 Some(group_name),
2661 ));
2662 }
2663 _ => {}
2664 }
2665 }
2666 }
2667 }
2668
2669 if let Some(groups_table) = toml_content
2670 .get(FIELD_DEPENDENCY_GROUPS)
2671 .and_then(|value| value.as_table())
2672 {
2673 for (group_name, deps) in groups_table {
2674 match deps {
2675 TomlValue::Array(arr) => {
2676 optional_dependencies.extend(parse_dependency_array(
2677 arr,
2678 true,
2679 Some(group_name),
2680 ));
2681 }
2682 TomlValue::Table(table) => {
2683 optional_dependencies.extend(parse_dependency_table(
2684 table,
2685 true,
2686 Some(group_name),
2687 ));
2688 }
2689 _ => {}
2690 }
2691 }
2692 }
2693
2694 if let Some(dev_deps_value) = toml_content
2695 .get("tool")
2696 .and_then(|value| value.as_table())
2697 .and_then(|tool| tool.get("uv"))
2698 .and_then(|value| value.as_table())
2699 .and_then(|uv| uv.get(FIELD_DEV_DEPENDENCIES))
2700 {
2701 match dev_deps_value {
2702 TomlValue::Array(arr) => {
2703 optional_dependencies.extend(parse_dependency_array(arr, true, Some("dev")));
2704 }
2705 TomlValue::Table(table) => {
2706 optional_dependencies.extend(parse_dependency_table(table, true, Some("dev")));
2707 }
2708 _ => {}
2709 }
2710 }
2711
2712 (dependencies, optional_dependencies)
2713}
2714
2715fn extract_pyproject_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
2716 let mut extra_data = HashMap::new();
2717
2718 if let Some(tool_uv) = toml_content
2719 .get("tool")
2720 .and_then(|value| value.as_table())
2721 .and_then(|tool| tool.get("uv"))
2722 {
2723 extra_data.insert("tool_uv".to_string(), toml_value_to_json(tool_uv));
2724 }
2725
2726 if extra_data.is_empty() {
2727 None
2728 } else {
2729 Some(extra_data)
2730 }
2731}
2732
2733fn toml_value_to_json(value: &TomlValue) -> JsonValue {
2734 match value {
2735 TomlValue::String(value) => JsonValue::String(value.clone()),
2736 TomlValue::Integer(value) => JsonValue::String(value.to_string()),
2737 TomlValue::Float(value) => JsonValue::String(value.to_string()),
2738 TomlValue::Boolean(value) => JsonValue::Bool(*value),
2739 TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
2740 TomlValue::Array(values) => {
2741 JsonValue::Array(values.iter().map(toml_value_to_json).collect())
2742 }
2743 TomlValue::Table(values) => JsonValue::Object(
2744 values
2745 .iter()
2746 .map(|(key, value)| (key.clone(), toml_value_to_json(value)))
2747 .collect::<JsonMap<String, JsonValue>>(),
2748 ),
2749 }
2750}
2751
2752fn parse_dependency_table(
2753 table: &TomlMap<String, TomlValue>,
2754 is_optional: bool,
2755 scope: Option<&str>,
2756) -> Vec<Dependency> {
2757 table
2758 .iter()
2759 .filter_map(|(name, version)| {
2760 let version_str = version.as_str().map(|s| s.to_string());
2761 let mut package_url =
2762 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2763
2764 if let Some(v) = &version_str {
2765 package_url.with_version(v).ok()?;
2766 }
2767
2768 Some(Dependency {
2769 purl: Some(package_url.to_string()),
2770 extracted_requirement: None,
2771 scope: scope.map(|s| s.to_string()),
2772 is_runtime: Some(!is_optional),
2773 is_optional: Some(is_optional),
2774 is_pinned: None,
2775 is_direct: Some(true),
2776 resolved_package: None,
2777 extra_data: None,
2778 })
2779 })
2780 .collect()
2781}
2782
2783fn parse_poetry_group_dependency_table(
2784 table: &TomlMap<String, TomlValue>,
2785 is_optional: bool,
2786 scope: Option<&str>,
2787) -> Vec<Dependency> {
2788 table
2789 .iter()
2790 .filter_map(|(name, value)| build_poetry_group_dependency(name, value, is_optional, scope))
2791 .collect()
2792}
2793
2794fn build_poetry_group_dependency(
2795 name: &str,
2796 value: &TomlValue,
2797 is_optional: bool,
2798 scope: Option<&str>,
2799) -> Option<Dependency> {
2800 let normalized_name = normalize_python_dependency_name(name);
2801 let (version_spec, extras, marker) = match value {
2802 TomlValue::String(spec) => (Some(spec.trim().to_string()), Vec::new(), None),
2803 TomlValue::Table(table) => {
2804 let version_spec = table
2805 .get(FIELD_VERSION)
2806 .and_then(|value| value.as_str())
2807 .map(str::trim)
2808 .filter(|value| !value.is_empty())
2809 .map(ToOwned::to_owned);
2810 let extras = table
2811 .get(FIELD_EXTRAS)
2812 .and_then(|value| value.as_array())
2813 .map(|values| {
2814 values
2815 .iter()
2816 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2817 .collect::<Vec<_>>()
2818 })
2819 .unwrap_or_default();
2820 let marker = table
2821 .get("markers")
2822 .and_then(|value| value.as_str())
2823 .map(str::trim)
2824 .filter(|value| !value.is_empty())
2825 .map(ToOwned::to_owned);
2826
2827 (version_spec, extras, marker)
2828 }
2829 _ => return None,
2830 };
2831
2832 let pinned_version = version_spec
2833 .as_deref()
2834 .and_then(extract_exact_pinned_version);
2835 let purl = build_python_dependency_purl(&normalized_name, pinned_version.as_deref())?;
2836
2837 let mut extra_data = HashMap::new();
2838 if let Some(marker) = marker {
2839 extra_data.insert("marker".to_string(), JsonValue::String(marker));
2840 }
2841 if !extras.is_empty() {
2842 extra_data.insert(
2843 "extras".to_string(),
2844 JsonValue::Array(extras.into_iter().map(JsonValue::String).collect()),
2845 );
2846 }
2847
2848 Some(Dependency {
2849 purl: Some(purl),
2850 extracted_requirement: version_spec,
2851 scope: scope.map(|value| value.to_string()),
2852 is_runtime: Some(!is_optional),
2853 is_optional: Some(is_optional),
2854 is_pinned: Some(pinned_version.is_some()),
2855 is_direct: Some(true),
2856 resolved_package: None,
2857 extra_data: if extra_data.is_empty() {
2858 None
2859 } else {
2860 Some(extra_data)
2861 },
2862 })
2863}
2864
2865fn parse_dependency_array(
2866 array: &[TomlValue],
2867 is_optional: bool,
2868 scope: Option<&str>,
2869) -> Vec<Dependency> {
2870 array
2871 .iter()
2872 .filter_map(|dep| {
2873 let dep_str = dep.as_str()?;
2874 build_pyproject_array_dependency(dep_str, is_optional, scope)
2875 })
2876 .collect()
2877}
2878
2879fn build_pyproject_array_dependency(
2880 dep_str: &str,
2881 is_optional: bool,
2882 scope: Option<&str>,
2883) -> Option<Dependency> {
2884 let parsed = parse_pep508_requirement(dep_str)?;
2885 let name = normalize_python_package_name(&parsed.name);
2886 let pinned_version = parsed
2887 .specifiers
2888 .as_deref()
2889 .and_then(extract_exact_pinned_version);
2890
2891 let purl = build_python_dependency_purl(&name, pinned_version.as_deref())?;
2892
2893 let mut extra_data = HashMap::new();
2894 if let Some(marker) = parsed.marker {
2895 extra_data.insert("marker".to_string(), JsonValue::String(marker));
2896 }
2897 if !parsed.extras.is_empty() {
2898 extra_data.insert(
2899 "extras".to_string(),
2900 JsonValue::Array(parsed.extras.into_iter().map(JsonValue::String).collect()),
2901 );
2902 }
2903
2904 let extracted_requirement = parsed.specifiers.or(parsed.url);
2905
2906 Some(Dependency {
2907 purl: Some(purl),
2908 extracted_requirement: extracted_requirement.clone(),
2909 scope: scope.map(|s| s.to_string()),
2910 is_runtime: Some(!is_optional),
2911 is_optional: Some(is_optional),
2912 is_pinned: Some(pinned_version.is_some()),
2913 is_direct: Some(true),
2914 resolved_package: None,
2915 extra_data: if extra_data.is_empty() {
2916 None
2917 } else {
2918 Some(extra_data)
2919 },
2920 })
2921}
2922
2923fn extract_exact_pinned_version(specifiers: &str) -> Option<String> {
2924 let trimmed = specifiers.trim();
2925 if trimmed.contains(',') {
2926 return None;
2927 }
2928
2929 let stripped = if let Some(version) = trimmed.strip_prefix("===") {
2930 version
2931 } else if let Some(version) = trimmed.strip_prefix("==") {
2932 version
2933 } else {
2934 return None;
2935 };
2936
2937 let version = stripped.trim();
2938 if version.is_empty() {
2939 None
2940 } else {
2941 Some(version.to_string())
2942 }
2943}
2944
2945#[derive(Debug, Clone)]
2946enum Value {
2947 String(String),
2948 Number(f64),
2949 Bool(bool),
2950 None,
2951 List(Vec<Value>),
2952 Tuple(Vec<Value>),
2953 Dict(HashMap<String, Value>),
2954}
2955
2956struct LiteralEvaluator {
2957 constants: HashMap<String, Value>,
2958 max_depth: usize,
2959 max_nodes: usize,
2960 nodes_visited: usize,
2961}
2962
2963impl LiteralEvaluator {
2964 fn new(constants: HashMap<String, Value>) -> Self {
2965 Self {
2966 constants,
2967 max_depth: MAX_SETUP_PY_AST_DEPTH,
2968 max_nodes: MAX_SETUP_PY_AST_NODES,
2969 nodes_visited: 0,
2970 }
2971 }
2972
2973 fn insert_constant(&mut self, name: String, value: Value) {
2974 self.constants.insert(name, value);
2975 }
2976
2977 fn evaluate_expr(&mut self, expr: &ast::Expr, depth: usize) -> Option<Value> {
2978 if depth >= self.max_depth || self.nodes_visited >= self.max_nodes {
2979 return None;
2980 }
2981 self.nodes_visited += 1;
2982
2983 match expr {
2984 ast::Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => {
2985 Some(Value::String(value.to_str().to_string()))
2986 }
2987 ast::Expr::BooleanLiteral(ast::ExprBooleanLiteral { value, .. }) => {
2988 Some(Value::Bool(*value))
2989 }
2990 ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. }) => {
2991 self.evaluate_number(value)
2992 }
2993 ast::Expr::NoneLiteral(_) => Some(Value::None),
2994 ast::Expr::Name(ast::ExprName { id, .. }) => self.constants.get(id.as_str()).cloned(),
2995 ast::Expr::List(ast::ExprList { elts, .. }) => {
2996 let mut values = Vec::new();
2997 for elt in elts {
2998 values.push(self.evaluate_expr(elt, depth + 1)?);
2999 }
3000 Some(Value::List(values))
3001 }
3002 ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
3003 let mut values = Vec::new();
3004 for elt in elts {
3005 values.push(self.evaluate_expr(elt, depth + 1)?);
3006 }
3007 Some(Value::Tuple(values))
3008 }
3009 ast::Expr::Dict(ast::ExprDict { items, .. }) => {
3010 let mut dict = HashMap::new();
3011 for item in items {
3012 let key_expr = item.key.as_ref()?;
3013 let key_value = self.evaluate_expr(key_expr, depth + 1)?;
3014 let key = value_to_string(&key_value)?;
3015 let value = self.evaluate_expr(&item.value, depth + 1)?;
3016 dict.insert(key, value);
3017 }
3018 Some(Value::Dict(dict))
3019 }
3020 ast::Expr::Call(ast::ExprCall {
3021 func, arguments, ..
3022 }) => {
3023 let args = arguments.args.as_ref();
3024 let keywords = arguments.keywords.as_ref();
3025 if keywords.is_empty()
3026 && let Some(name) = dotted_name(func.as_ref(), depth + 1)
3027 && matches!(name.as_str(), "OrderedDict" | "collections.OrderedDict")
3028 {
3029 return self.evaluate_ordered_dict(args, depth + 1);
3030 }
3031
3032 if !args.is_empty() {
3033 return None;
3034 }
3035
3036 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref()
3037 && id == "dict"
3038 {
3039 let mut dict = HashMap::new();
3040 for keyword in keywords {
3041 let key = keyword.arg.as_ref().map(ast::Identifier::as_str)?;
3042 let value = self.evaluate_expr(&keyword.value, depth + 1)?;
3043 dict.insert(key.to_string(), value);
3044 }
3045 return Some(Value::Dict(dict));
3046 }
3047
3048 None
3049 }
3050 _ => None,
3051 }
3052 }
3053
3054 fn evaluate_number(&self, number: &ast::Number) -> Option<Value> {
3055 match number {
3056 ast::Number::Int(value) => value.to_string().parse::<f64>().ok().map(Value::Number),
3057 ast::Number::Float(value) => Some(Value::Number(*value)),
3058 ast::Number::Complex { .. } => None,
3059 }
3060 }
3061
3062 fn evaluate_ordered_dict(&mut self, args: &[ast::Expr], depth: usize) -> Option<Value> {
3063 if args.len() != 1 {
3064 return None;
3065 }
3066
3067 let items = match self.evaluate_expr(&args[0], depth)? {
3068 Value::List(items) | Value::Tuple(items) => items,
3069 _ => return None,
3070 };
3071
3072 let mut dict = HashMap::new();
3073 for item in items {
3074 let Value::Tuple(values) = item else {
3075 return None;
3076 };
3077 if values.len() != 2 {
3078 return None;
3079 }
3080 let key = value_to_string(&values[0])?;
3081 dict.insert(key, values[1].clone());
3082 }
3083
3084 Some(Value::Dict(dict))
3085 }
3086}
3087
3088#[derive(Default)]
3089struct SetupAliases {
3090 setup_names: HashSet<String>,
3091 module_aliases: HashMap<String, String>,
3092}
3093
3094fn extract_setup_py_packages(path: &Path) -> Vec<PackageData> {
3095 extract_from_setup_py(path).into_iter().collect()
3096}
3097
3098fn extract_from_setup_py(path: &Path) -> Option<PackageData> {
3099 let content = match read_file_to_string(path) {
3100 Ok(content) => content,
3101 Err(e) => {
3102 warn!("Failed to read setup.py at {:?}: {}", path, e);
3103 return Some(default_package_data(path));
3104 }
3105 };
3106
3107 if content.len() > MAX_SETUP_PY_BYTES {
3108 warn!("setup.py too large at {:?}: {} bytes", path, content.len());
3109 let package_data = extract_from_setup_py_regex(&content);
3110 return should_emit_setup_py_package(&package_data).then_some(package_data);
3111 }
3112
3113 let mut package_data = match extract_from_setup_py_ast(&content) {
3114 Ok(Some(data)) => data,
3115 Ok(None) => return Some(default_package_data(path)),
3116 Err(e) => {
3117 warn!("Failed to parse setup.py AST at {:?}: {}", path, e);
3118 extract_from_setup_py_regex(&content)
3119 }
3120 };
3121
3122 if package_data.name.is_none() {
3123 package_data.name = extract_setup_value(&content, "name");
3124 }
3125
3126 if package_data.version.is_none() {
3127 package_data.version = extract_setup_value(&content, "version");
3128 }
3129
3130 fill_from_sibling_dunder_metadata(path, &content, &mut package_data);
3131
3132 if package_data.purl.is_none() {
3133 package_data.purl = build_setup_py_purl(
3134 package_data.name.as_deref(),
3135 package_data.version.as_deref(),
3136 );
3137 }
3138
3139 if should_emit_setup_py_package(&package_data) {
3140 Some(package_data)
3141 } else {
3142 Some(default_package_data(path))
3143 }
3144}
3145
3146fn should_emit_setup_py_package(package_data: &PackageData) -> bool {
3147 package_data.name.is_some()
3148 || package_data.version.is_some()
3149 || package_data.purl.is_some()
3150 || !package_data.dependencies.is_empty()
3151 || package_data.extracted_license_statement.is_some()
3152 || !package_data.license_detections.is_empty()
3153 || !package_data.parties.is_empty()
3154 || package_data.description.is_some()
3155 || package_data.homepage_url.is_some()
3156 || package_data.bug_tracking_url.is_some()
3157 || package_data.code_view_url.is_some()
3158 || package_data.vcs_url.is_some()
3159}
3160
3161fn fill_from_sibling_dunder_metadata(path: &Path, content: &str, package_data: &mut PackageData) {
3162 if package_data.version.is_some()
3163 && package_data.extracted_license_statement.is_some()
3164 && package_data
3165 .parties
3166 .iter()
3167 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some())
3168 {
3169 return;
3170 }
3171
3172 let Some(root) = path.parent() else {
3173 return;
3174 };
3175
3176 let dunder_metadata = collect_sibling_dunder_metadata(root, content);
3177
3178 if package_data.version.is_none() {
3179 package_data.version = dunder_metadata.version;
3180 }
3181
3182 if package_data.extracted_license_statement.is_none() {
3183 package_data.extracted_license_statement = dunder_metadata.license;
3184 }
3185
3186 let has_author = package_data
3187 .parties
3188 .iter()
3189 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some());
3190
3191 if !has_author && let Some(author) = dunder_metadata.author {
3192 package_data.parties.push(Party {
3193 r#type: Some("person".to_string()),
3194 role: Some("author".to_string()),
3195 name: Some(author),
3196 email: None,
3197 url: None,
3198 organization: None,
3199 organization_url: None,
3200 timezone: None,
3201 });
3202 }
3203}
3204
3205#[derive(Default)]
3206struct DunderMetadata {
3207 version: Option<String>,
3208 author: Option<String>,
3209 license: Option<String>,
3210}
3211
3212fn collect_sibling_dunder_metadata(root: &Path, content: &str) -> DunderMetadata {
3213 let statements = match parse_module(content) {
3214 Ok(parsed) => parsed.into_suite(),
3215 Err(_) => return DunderMetadata::default(),
3216 };
3217
3218 let version_re = Regex::new(r#"(?m)^\s*__version__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
3219 let author_re = Regex::new(r#"(?m)^\s*__author__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
3220 let license_re = Regex::new(r#"(?m)^\s*__license__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
3221 let mut metadata = DunderMetadata::default();
3222
3223 for module in imported_dunder_modules(&statements) {
3224 let Some(path) = resolve_imported_module_path(root, &module) else {
3225 continue;
3226 };
3227 let Ok(module_content) = read_file_to_string(&path) else {
3228 continue;
3229 };
3230
3231 if metadata.version.is_none() {
3232 metadata.version = version_re
3233 .as_ref()
3234 .and_then(|regex| regex.captures(&module_content))
3235 .and_then(|captures| captures.get(1))
3236 .map(|match_| match_.as_str().to_string());
3237 }
3238
3239 if metadata.author.is_none() {
3240 metadata.author = author_re
3241 .as_ref()
3242 .and_then(|regex| regex.captures(&module_content))
3243 .and_then(|captures| captures.get(1))
3244 .map(|match_| match_.as_str().to_string());
3245 }
3246
3247 if metadata.license.is_none() {
3248 metadata.license = license_re
3249 .as_ref()
3250 .and_then(|regex| regex.captures(&module_content))
3251 .and_then(|captures| captures.get(1))
3252 .map(|match_| match_.as_str().to_string());
3253 }
3254
3255 if metadata.version.is_some() && metadata.author.is_some() && metadata.license.is_some() {
3256 return metadata;
3257 }
3258 }
3259
3260 metadata
3261}
3262
3263fn imported_dunder_modules(statements: &[ast::Stmt]) -> Vec<String> {
3264 let mut modules = Vec::new();
3265
3266 for statement in statements {
3267 let ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) = statement else {
3268 continue;
3269 };
3270 let Some(module) = module.as_ref().map(|name| name.as_str()) else {
3271 continue;
3272 };
3273 let imports_dunder = names.iter().any(|alias| {
3274 matches!(
3275 alias.name.as_str(),
3276 "__version__" | "__author__" | "__license__"
3277 )
3278 });
3279 if imports_dunder {
3280 modules.push(module.to_string());
3281 }
3282 }
3283
3284 modules
3285}
3286
3287fn resolve_imported_module_path(root: &Path, module: &str) -> Option<PathBuf> {
3288 let relative = PathBuf::from_iter(module.split('.'));
3289 let candidates = [
3290 root.join(relative.with_extension("py")),
3291 root.join(&relative).join("__init__.py"),
3292 root.join("src").join(relative.with_extension("py")),
3293 root.join("src").join(relative).join("__init__.py"),
3294 ];
3295
3296 candidates.into_iter().find(|candidate| candidate.exists())
3297}
3298
3299fn extract_from_setup_py_ast(content: &str) -> Result<Option<PackageData>, String> {
3315 let statements = parse_module(content)
3316 .map(|parsed| parsed.into_suite())
3317 .map_err(|e| e.to_string())?;
3318 let aliases = collect_setup_aliases(&statements);
3319 let mut evaluator = LiteralEvaluator::new(HashMap::new());
3320 build_setup_py_constants(&statements, &mut evaluator);
3321
3322 let setup_call = find_setup_call(&statements, &aliases);
3323 let Some(call_expr) = setup_call else {
3324 return Ok(None);
3325 };
3326
3327 let setup_values = extract_setup_keywords(call_expr, &mut evaluator);
3328 Ok(Some(build_setup_py_package_data(&setup_values)))
3329}
3330
3331fn build_setup_py_constants(statements: &[ast::Stmt], evaluator: &mut LiteralEvaluator) {
3332 for stmt in statements {
3333 if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = stmt {
3334 if targets.len() != 1 {
3335 continue;
3336 }
3337
3338 let Some(name) = extract_assign_name(&targets[0]) else {
3339 continue;
3340 };
3341
3342 if let Some(value) = evaluator.evaluate_expr(value.as_ref(), 0) {
3343 evaluator.insert_constant(name, value);
3344 }
3345 }
3346 }
3347}
3348
3349fn extract_assign_name(target: &ast::Expr) -> Option<String> {
3350 match target {
3351 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
3352 _ => None,
3353 }
3354}
3355
3356fn collect_setup_aliases(statements: &[ast::Stmt]) -> SetupAliases {
3357 let mut aliases = SetupAliases::default();
3358 aliases.setup_names.insert("setup".to_string());
3359
3360 for stmt in statements {
3361 match stmt {
3362 ast::Stmt::Import(ast::StmtImport { names, .. }) => {
3363 for alias in names {
3364 let module_name = alias.name.as_str();
3365 if !is_setup_module(module_name) {
3366 continue;
3367 }
3368 let alias_name = alias
3369 .asname
3370 .as_ref()
3371 .map(|name| name.as_str())
3372 .unwrap_or(module_name);
3373 aliases
3374 .module_aliases
3375 .insert(alias_name.to_string(), module_name.to_string());
3376 }
3377 }
3378 ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
3379 let Some(module_name) = module.as_ref().map(|name| name.as_str()) else {
3380 continue;
3381 };
3382 if !is_setup_module(module_name) {
3383 continue;
3384 }
3385 for alias in names {
3386 if alias.name.as_str() != "setup" {
3387 continue;
3388 }
3389 let alias_name = alias
3390 .asname
3391 .as_ref()
3392 .map(|name| name.as_str())
3393 .unwrap_or("setup");
3394 aliases.setup_names.insert(alias_name.to_string());
3395 }
3396 }
3397 _ => {}
3398 }
3399 }
3400
3401 aliases
3402}
3403
3404fn is_setup_module(module_name: &str) -> bool {
3405 matches!(module_name, "setuptools" | "distutils" | "distutils.core")
3406}
3407
3408fn find_setup_call<'a>(
3409 statements: &'a [ast::Stmt],
3410 aliases: &'a SetupAliases,
3411) -> Option<&'a ast::Expr> {
3412 let mut finder = SetupCallFinder {
3413 aliases,
3414 called_function_names: collect_top_level_called_function_names(statements),
3415 nodes_visited: 0,
3416 };
3417 finder.find_in_statements(statements)
3418}
3419
3420fn collect_top_level_called_function_names(statements: &[ast::Stmt]) -> HashSet<String> {
3421 let mut called = HashSet::new();
3422 collect_called_function_names_in_statements(statements, &mut called);
3423 called
3424}
3425
3426fn collect_called_function_names_in_statements(
3427 statements: &[ast::Stmt],
3428 called: &mut HashSet<String>,
3429) {
3430 for stmt in statements {
3431 match stmt {
3432 ast::Stmt::Expr(ast::StmtExpr { value, .. })
3433 | ast::Stmt::Assign(ast::StmtAssign { value, .. }) => {
3434 collect_called_function_names_in_expr(value.as_ref(), called);
3435 }
3436 ast::Stmt::If(ast::StmtIf {
3437 body,
3438 elif_else_clauses,
3439 ..
3440 }) => {
3441 collect_called_function_names_in_statements(body, called);
3442 for clause in elif_else_clauses {
3443 collect_called_function_names_in_statements(&clause.body, called);
3444 }
3445 }
3446 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
3447 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => {
3448 collect_called_function_names_in_statements(body, called);
3449 collect_called_function_names_in_statements(orelse, called);
3450 }
3451 ast::Stmt::With(ast::StmtWith { body, .. }) => {
3452 collect_called_function_names_in_statements(body, called);
3453 }
3454 ast::Stmt::Try(ast::StmtTry {
3455 body,
3456 orelse,
3457 finalbody,
3458 handlers,
3459 ..
3460 }) => {
3461 collect_called_function_names_in_statements(body, called);
3462 collect_called_function_names_in_statements(orelse, called);
3463 collect_called_function_names_in_statements(finalbody, called);
3464 for handler in handlers {
3465 let ast::ExceptHandler::ExceptHandler(ast::ExceptHandlerExceptHandler {
3466 body,
3467 ..
3468 }) = handler;
3469 collect_called_function_names_in_statements(body, called);
3470 }
3471 }
3472 _ => {}
3473 }
3474 }
3475}
3476
3477fn collect_called_function_names_in_expr(expr: &ast::Expr, called: &mut HashSet<String>) {
3478 if let ast::Expr::Call(ast::ExprCall {
3479 func, arguments, ..
3480 }) = expr
3481 {
3482 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref() {
3483 called.insert(id.as_str().to_string());
3484 }
3485
3486 for arg in arguments.args.iter() {
3487 collect_called_function_names_in_expr(arg, called);
3488 }
3489 for keyword in arguments.keywords.iter() {
3490 collect_called_function_names_in_expr(&keyword.value, called);
3491 }
3492 }
3493}
3494
3495struct SetupCallFinder<'a> {
3496 aliases: &'a SetupAliases,
3497 called_function_names: HashSet<String>,
3498 nodes_visited: usize,
3499}
3500
3501impl<'a> SetupCallFinder<'a> {
3502 fn find_in_statements(&mut self, statements: &'a [ast::Stmt]) -> Option<&'a ast::Expr> {
3503 for stmt in statements {
3504 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
3505 return None;
3506 }
3507 self.nodes_visited += 1;
3508
3509 let found = match stmt {
3510 ast::Stmt::Expr(ast::StmtExpr { value, .. }) => self.visit_expr(value.as_ref()),
3511 ast::Stmt::Assign(ast::StmtAssign { value, .. }) => self.visit_expr(value.as_ref()),
3512 ast::Stmt::If(ast::StmtIf {
3513 body,
3514 elif_else_clauses,
3515 ..
3516 }) => self.find_in_statements(body).or_else(|| {
3517 for clause in elif_else_clauses {
3518 if let Some(found) = self.find_in_statements(&clause.body) {
3519 return Some(found);
3520 }
3521 }
3522 None
3523 }),
3524 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
3525 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => self
3526 .find_in_statements(body)
3527 .or_else(|| self.find_in_statements(orelse)),
3528 ast::Stmt::FunctionDef(ast::StmtFunctionDef { name, body, .. }) => self
3529 .called_function_names
3530 .contains(name.as_str())
3531 .then(|| self.find_in_statements(body))
3532 .flatten(),
3533 ast::Stmt::With(ast::StmtWith { body, .. }) => self.find_in_statements(body),
3534 ast::Stmt::Try(ast::StmtTry {
3535 body,
3536 orelse,
3537 finalbody,
3538 handlers,
3539 ..
3540 }) => self
3541 .find_in_statements(body)
3542 .or_else(|| self.find_in_statements(orelse))
3543 .or_else(|| self.find_in_statements(finalbody))
3544 .or_else(|| {
3545 for handler in handlers {
3546 let ast::ExceptHandler::ExceptHandler(
3547 ast::ExceptHandlerExceptHandler { body, .. },
3548 ) = handler;
3549 if let Some(found) = self.find_in_statements(body) {
3550 return Some(found);
3551 }
3552 }
3553 None
3554 }),
3555 _ => None,
3556 };
3557
3558 if found.is_some() {
3559 return found;
3560 }
3561 }
3562
3563 None
3564 }
3565
3566 fn visit_expr(&mut self, expr: &'a ast::Expr) -> Option<&'a ast::Expr> {
3567 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
3568 return None;
3569 }
3570 self.nodes_visited += 1;
3571
3572 match expr {
3573 ast::Expr::Call(ast::ExprCall { func, .. })
3574 if is_setup_call(func.as_ref(), self.aliases) =>
3575 {
3576 Some(expr)
3577 }
3578 _ => None,
3579 }
3580 }
3581}
3582
3583fn is_setup_call(func: &ast::Expr, aliases: &SetupAliases) -> bool {
3584 let Some(dotted) = dotted_name(func, 0) else {
3585 return false;
3586 };
3587
3588 if aliases.setup_names.contains(&dotted) {
3589 return true;
3590 }
3591
3592 let Some(module) = dotted.strip_suffix(".setup") else {
3593 return false;
3594 };
3595
3596 let resolved = resolve_module_alias(module, aliases);
3597 is_setup_module(&resolved)
3598}
3599
3600fn dotted_name(expr: &ast::Expr, depth: usize) -> Option<String> {
3601 if depth >= MAX_SETUP_PY_AST_DEPTH {
3602 return None;
3603 }
3604
3605 match expr {
3606 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
3607 ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
3608 let base = dotted_name(value.as_ref(), depth + 1)?;
3609 Some(format!("{}.{}", base, attr.as_str()))
3610 }
3611 _ => None,
3612 }
3613}
3614
3615fn resolve_module_alias(module: &str, aliases: &SetupAliases) -> String {
3616 if let Some(mapped) = aliases.module_aliases.get(module) {
3617 return mapped.clone();
3618 }
3619
3620 let Some((base, rest)) = module.split_once('.') else {
3621 return module.to_string();
3622 };
3623
3624 if let Some(mapped) = aliases.module_aliases.get(base) {
3625 return format!("{}.{}", mapped, rest);
3626 }
3627
3628 module.to_string()
3629}
3630
3631fn extract_setup_keywords(
3632 call_expr: &ast::Expr,
3633 evaluator: &mut LiteralEvaluator,
3634) -> HashMap<String, Value> {
3635 let mut values = HashMap::new();
3636 let ast::Expr::Call(ast::ExprCall { arguments, .. }) = call_expr else {
3637 return values;
3638 };
3639
3640 for keyword in arguments.keywords.iter() {
3641 if let Some(arg) = keyword.arg.as_ref().map(ast::Identifier::as_str) {
3642 if let Some(value) = evaluator.evaluate_expr(&keyword.value, 0) {
3643 values.insert(arg.to_string(), value);
3644 }
3645 } else if let Some(Value::Dict(dict)) = evaluator.evaluate_expr(&keyword.value, 0) {
3646 for (key, value) in dict {
3647 values.insert(key, value);
3648 }
3649 }
3650 }
3651
3652 values
3653}
3654
3655fn build_setup_py_package_data(values: &HashMap<String, Value>) -> PackageData {
3656 let name = get_value_string(values, "name");
3657 let version = get_value_string(values, "version");
3658 let description =
3659 get_value_string(values, "description").or_else(|| get_value_string(values, "summary"));
3660 let homepage_url =
3661 get_value_string(values, "url").or_else(|| get_value_string(values, "home_page"));
3662 let author = get_value_string(values, "author");
3663 let author_email = get_value_string(values, "author_email");
3664 let maintainer = get_value_string(values, "maintainer");
3665 let maintainer_email = get_value_string(values, "maintainer_email");
3666 let license = get_value_string(values, "license");
3667 let classifiers = values
3668 .get("classifiers")
3669 .and_then(value_to_string_list)
3670 .unwrap_or_default();
3671
3672 let mut parties = Vec::new();
3673 if author.is_some() || author_email.is_some() {
3674 parties.push(Party {
3675 r#type: Some("person".to_string()),
3676 role: Some("author".to_string()),
3677 name: author,
3678 email: author_email,
3679 url: None,
3680 organization: None,
3681 organization_url: None,
3682 timezone: None,
3683 });
3684 }
3685
3686 if maintainer.is_some() || maintainer_email.is_some() {
3687 parties.push(Party {
3688 r#type: Some("person".to_string()),
3689 role: Some("maintainer".to_string()),
3690 name: maintainer,
3691 email: maintainer_email,
3692 url: None,
3693 organization: None,
3694 organization_url: None,
3695 timezone: None,
3696 });
3697 }
3698
3699 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3700 normalize_spdx_declared_license(license.as_deref());
3701 let extracted_license_statement = license.clone();
3702
3703 let dependencies = build_setup_py_dependencies(values);
3704 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3705 let mut homepage_from_project_urls = None;
3706 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
3707 let mut extra_data = HashMap::new();
3708
3709 if let Some(parsed_project_urls) = values.get("project_urls").and_then(value_to_string_pairs) {
3710 apply_project_url_mappings(
3711 &parsed_project_urls,
3712 &mut homepage_from_project_urls,
3713 &mut bug_tracking_url,
3714 &mut code_view_url,
3715 &mut vcs_url,
3716 &mut extra_data,
3717 );
3718 }
3719
3720 let extra_data = if extra_data.is_empty() {
3721 None
3722 } else {
3723 Some(extra_data)
3724 };
3725
3726 PackageData {
3727 package_type: Some(PythonParser::PACKAGE_TYPE),
3728 namespace: None,
3729 name,
3730 version,
3731 qualifiers: None,
3732 subpath: None,
3733 primary_language: Some("Python".to_string()),
3734 description,
3735 release_date: None,
3736 parties,
3737 keywords: Vec::new(),
3738 homepage_url: homepage_url.or(homepage_from_project_urls),
3739 download_url: None,
3740 size: None,
3741 sha1: None,
3742 md5: None,
3743 sha256: None,
3744 sha512: None,
3745 bug_tracking_url,
3746 code_view_url,
3747 vcs_url,
3748 copyright: None,
3749 holder: None,
3750 declared_license_expression,
3751 declared_license_expression_spdx,
3752 license_detections,
3753 other_license_expression: None,
3754 other_license_expression_spdx: None,
3755 other_license_detections: Vec::new(),
3756 extracted_license_statement,
3757 notice_text: None,
3758 source_packages: Vec::new(),
3759 file_references: Vec::new(),
3760 is_private: has_private_classifier(&classifiers),
3761 is_virtual: false,
3762 extra_data,
3763 dependencies,
3764 repository_homepage_url: None,
3765 repository_download_url: None,
3766 api_data_url: None,
3767 datasource_id: Some(DatasourceId::PypiSetupPy),
3768 purl,
3769 }
3770}
3771
3772fn build_setup_py_dependencies(values: &HashMap<String, Value>) -> Vec<Dependency> {
3773 let mut dependencies = Vec::new();
3774
3775 if let Some(reqs) = values
3776 .get("install_requires")
3777 .and_then(value_to_string_list)
3778 {
3779 dependencies.extend(build_setup_py_dependency_list(&reqs, "install", false));
3780 }
3781
3782 if let Some(reqs) = values.get("tests_require").and_then(value_to_string_list) {
3783 dependencies.extend(build_setup_py_dependency_list(&reqs, "test", true));
3784 }
3785
3786 if let Some(Value::Dict(extras)) = values.get("extras_require") {
3787 let mut extra_items: Vec<_> = extras.iter().collect();
3788 extra_items.sort_by_key(|(name, _)| *name);
3789 for (extra_name, extra_value) in extra_items {
3790 if let Some(reqs) = value_to_string_list(extra_value) {
3791 dependencies.extend(build_setup_py_dependency_list(
3792 reqs.as_slice(),
3793 extra_name,
3794 true,
3795 ));
3796 }
3797 }
3798 }
3799
3800 dependencies
3801}
3802
3803fn build_setup_py_dependency_list(
3804 reqs: &[String],
3805 scope: &str,
3806 is_optional: bool,
3807) -> Vec<Dependency> {
3808 reqs.iter()
3809 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
3810 .collect()
3811}
3812
3813fn get_value_string(values: &HashMap<String, Value>, key: &str) -> Option<String> {
3814 values.get(key).and_then(value_to_string)
3815}
3816
3817fn value_to_string(value: &Value) -> Option<String> {
3818 match value {
3819 Value::String(value) => Some(value.clone()),
3820 Value::Number(value) => Some(value.to_string()),
3821 Value::Bool(value) => Some(value.to_string()),
3822 _ => None,
3823 }
3824}
3825
3826fn value_to_string_list(value: &Value) -> Option<Vec<String>> {
3827 match value {
3828 Value::String(value) => Some(vec![value.clone()]),
3829 Value::List(values) | Value::Tuple(values) => {
3830 let mut items = Vec::new();
3831 for item in values {
3832 items.push(value_to_string(item)?);
3833 }
3834 Some(items)
3835 }
3836 _ => None,
3837 }
3838}
3839
3840fn value_to_string_pairs(value: &Value) -> Option<Vec<(String, String)>> {
3841 let Value::Dict(dict) = value else {
3842 return None;
3843 };
3844
3845 let mut pairs: Vec<(String, String)> = dict
3846 .iter()
3847 .map(|(key, value)| Some((key.clone(), value_to_string(value)?)))
3848 .collect::<Option<Vec<_>>>()?;
3849 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3850 Some(pairs)
3851}
3852
3853fn extract_rfc822_dependencies(headers: &HashMap<String, Vec<String>>) -> Vec<Dependency> {
3854 let requires_dist = super::rfc822::get_header_all(headers, "requires-dist");
3855 extract_requires_dist_dependencies(&requires_dist)
3856}
3857
3858pub(crate) fn extract_requires_dist_dependencies(requires_dist: &[String]) -> Vec<Dependency> {
3859 requires_dist
3860 .iter()
3861 .filter_map(|entry| build_rfc822_dependency(entry))
3862 .collect()
3863}
3864
3865fn build_rfc822_dependency(entry: &str) -> Option<Dependency> {
3866 build_python_dependency(entry, "install", false, None)
3867}
3868
3869fn build_python_dependency(
3870 entry: &str,
3871 default_scope: &str,
3872 default_optional: bool,
3873 marker_override: Option<&str>,
3874) -> Option<Dependency> {
3875 let (requirement_part, marker_part) = entry
3876 .split_once(';')
3877 .map(|(req, marker)| (req.trim(), Some(marker.trim())))
3878 .unwrap_or((entry.trim(), None));
3879
3880 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3881 let requirement = normalize_rfc822_requirement(requirement_part);
3882 let (scope, is_optional, marker, marker_data) = parse_rfc822_marker(
3883 marker_part.or(marker_override),
3884 default_scope,
3885 default_optional,
3886 );
3887 let purl = build_python_dependency_purl(&name, None)?;
3888
3889 let is_pinned = requirement
3890 .as_deref()
3891 .is_some_and(|req| req.starts_with("==") || req.starts_with("==="));
3892 let purl = if is_pinned {
3893 requirement
3894 .as_deref()
3895 .map(|req| req.trim_start_matches('='))
3896 .and_then(|version| build_python_dependency_purl(&name, Some(version)))
3897 .unwrap_or(purl)
3898 } else {
3899 purl
3900 };
3901
3902 let mut extra_data = HashMap::new();
3903 extra_data.extend(marker_data);
3904 if let Some(marker) = marker {
3905 extra_data.insert("marker".to_string(), serde_json::Value::String(marker));
3906 }
3907
3908 Some(Dependency {
3909 purl: Some(purl),
3910 extracted_requirement: requirement,
3911 scope: Some(scope),
3912 is_runtime: Some(true),
3913 is_optional: Some(is_optional),
3914 is_pinned: Some(is_pinned),
3915 is_direct: Some(true),
3916 resolved_package: None,
3917 extra_data: if extra_data.is_empty() {
3918 None
3919 } else {
3920 Some(extra_data)
3921 },
3922 })
3923}
3924
3925fn normalize_rfc822_requirement(requirement_part: &str) -> Option<String> {
3926 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3927 let trimmed = requirement_part.trim();
3928 let mut remainder = trimmed[name.len()..].trim();
3929
3930 if let Some(stripped) = remainder.strip_prefix('[')
3931 && let Some(end_idx) = stripped.find(']')
3932 {
3933 remainder = stripped[end_idx + 1..].trim();
3934 }
3935
3936 let remainder = remainder
3937 .strip_prefix('(')
3938 .and_then(|value| value.strip_suffix(')'))
3939 .unwrap_or(remainder)
3940 .trim();
3941
3942 if remainder.is_empty() {
3943 return None;
3944 }
3945
3946 let mut specifiers: Vec<String> = remainder
3947 .split(',')
3948 .map(|specifier| specifier.trim().replace(' ', ""))
3949 .filter(|specifier| !specifier.is_empty())
3950 .collect();
3951 specifiers.sort();
3952 Some(specifiers.join(","))
3953}
3954
3955fn encode_python_dependency_purl_version(version: &str) -> String {
3956 version.replace('*', "%2A")
3957}
3958
3959fn build_python_dependency_purl(name: &str, version: Option<&str>) -> Option<String> {
3960 let normalized_name = normalize_python_dependency_name(name);
3961
3962 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &normalized_name)
3963 .ok()
3964 .map(|_| match version {
3965 Some(version) => {
3966 format!(
3967 "pkg:pypi/{normalized_name}@{}",
3968 encode_python_dependency_purl_version(version)
3969 )
3970 }
3971 None => format!("pkg:pypi/{normalized_name}"),
3972 })
3973}
3974
3975fn normalize_python_dependency_name(name: &str) -> String {
3976 name.trim().to_ascii_lowercase().replace('_', "-")
3977}
3978
3979fn parse_rfc822_marker(
3980 marker_part: Option<&str>,
3981 default_scope: &str,
3982 default_optional: bool,
3983) -> (
3984 String,
3985 bool,
3986 Option<String>,
3987 HashMap<String, serde_json::Value>,
3988) {
3989 let Some(marker) = marker_part.filter(|marker| !marker.trim().is_empty()) else {
3990 return (
3991 default_scope.to_string(),
3992 default_optional,
3993 None,
3994 HashMap::new(),
3995 );
3996 };
3997
3998 let extra_re = Regex::new(r#"extra\s*==\s*['\"]([^'\"]+)['\"]"#)
3999 .expect("extra marker regex should compile");
4000 let mut extra_data = HashMap::new();
4001
4002 if let Some(python_version) = extract_marker_field(marker, "python_version") {
4003 extra_data.insert(
4004 "python_version".to_string(),
4005 serde_json::Value::String(python_version),
4006 );
4007 }
4008 if let Some(sys_platform) = extract_marker_field(marker, "sys_platform") {
4009 extra_data.insert(
4010 "sys_platform".to_string(),
4011 serde_json::Value::String(sys_platform),
4012 );
4013 }
4014
4015 if let Some(captures) = extra_re.captures(marker)
4016 && let Some(scope) = captures.get(1)
4017 {
4018 return (
4019 scope.as_str().to_string(),
4020 true,
4021 Some(marker.trim().to_string()),
4022 extra_data,
4023 );
4024 }
4025
4026 (
4027 default_scope.to_string(),
4028 default_optional,
4029 Some(marker.trim().to_string()),
4030 extra_data,
4031 )
4032}
4033
4034fn extract_marker_field(marker: &str, field: &str) -> Option<String> {
4035 let re = Regex::new(&format!(
4036 r#"{}\s*(==|!=|<=|>=|<|>)\s*['\"]([^'\"]+)['\"]"#,
4037 field
4038 ))
4039 .ok()?;
4040 let captures = re.captures(marker)?;
4041 let operator = captures.get(1)?.as_str();
4042 let value = captures.get(2)?.as_str();
4043 Some(format!("{} {}", operator, value))
4044}
4045
4046fn parse_requires_txt(content: &str) -> Vec<Dependency> {
4047 let mut dependencies = Vec::new();
4048 let mut current_scope = "install".to_string();
4049 let mut current_optional = false;
4050 let mut current_marker: Option<String> = None;
4051
4052 for line in content.lines() {
4053 let trimmed = line.trim();
4054 if trimmed.is_empty() || trimmed.starts_with('#') {
4055 continue;
4056 }
4057
4058 if trimmed.starts_with('[') && trimmed.ends_with(']') {
4059 let inner = &trimmed[1..trimmed.len() - 1];
4060 if let Some(rest) = inner.strip_prefix(':') {
4061 current_scope = "install".to_string();
4062 current_optional = false;
4063 current_marker = Some(rest.trim().to_string());
4064 } else if let Some((scope, marker)) = inner.split_once(':') {
4065 current_scope = scope.trim().to_string();
4066 current_optional = true;
4067 current_marker = Some(marker.trim().to_string());
4068 } else {
4069 current_scope = inner.trim().to_string();
4070 current_optional = true;
4071 current_marker = None;
4072 }
4073 continue;
4074 }
4075
4076 if let Some(dependency) = build_python_dependency(
4077 trimmed,
4078 ¤t_scope,
4079 current_optional,
4080 current_marker.as_deref(),
4081 ) {
4082 dependencies.push(dependency);
4083 }
4084 }
4085
4086 dependencies
4087}
4088
4089fn has_private_classifier(classifiers: &[String]) -> bool {
4090 classifiers
4091 .iter()
4092 .any(|classifier| classifier.eq_ignore_ascii_case("Private :: Do Not Upload"))
4093}
4094
4095fn build_setup_py_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
4096 let name = name?;
4097 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
4098 if let Some(version) = version {
4099 package_url.with_version(version).ok()?;
4100 }
4101 Some(package_url.to_string())
4102}
4103
4104fn extract_from_setup_py_regex(content: &str) -> PackageData {
4105 let name = extract_setup_value(content, "name");
4106 let version = extract_setup_value(content, "version");
4107 let license_expression = extract_setup_value(content, "license");
4108
4109 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
4110 normalize_spdx_declared_license(license_expression.as_deref());
4111 let extracted_license_statement = license_expression.clone();
4112
4113 let dependencies = extract_setup_py_dependencies(content);
4114 let homepage_url = extract_setup_value(content, "url");
4115 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
4116
4117 PackageData {
4118 package_type: Some(PythonParser::PACKAGE_TYPE),
4119 namespace: None,
4120 name,
4121 version,
4122 qualifiers: None,
4123 subpath: None,
4124 primary_language: Some("Python".to_string()),
4125 description: None,
4126 release_date: None,
4127 parties: Vec::new(),
4128 keywords: Vec::new(),
4129 homepage_url,
4130 download_url: None,
4131 size: None,
4132 sha1: None,
4133 md5: None,
4134 sha256: None,
4135 sha512: None,
4136 bug_tracking_url: None,
4137 code_view_url: None,
4138 vcs_url: None,
4139 copyright: None,
4140 holder: None,
4141 declared_license_expression,
4142 declared_license_expression_spdx,
4143 license_detections,
4144 other_license_expression: None,
4145 other_license_expression_spdx: None,
4146 other_license_detections: Vec::new(),
4147 extracted_license_statement,
4148 notice_text: None,
4149 source_packages: Vec::new(),
4150 file_references: Vec::new(),
4151 is_private: false,
4152 is_virtual: false,
4153 extra_data: None,
4154 dependencies,
4155 repository_homepage_url: None,
4156 repository_download_url: None,
4157 api_data_url: None,
4158 datasource_id: Some(DatasourceId::PypiSetupPy),
4159 purl,
4160 }
4161}
4162
4163fn package_data_to_resolved(pkg: &PackageData) -> crate::models::ResolvedPackage {
4164 crate::models::ResolvedPackage::from_package_data(pkg, PackageType::Pypi)
4165}
4166
4167fn extract_from_pypi_json(path: &Path) -> PackageData {
4168 let default = PackageData {
4169 package_type: Some(PythonParser::PACKAGE_TYPE),
4170 datasource_id: Some(DatasourceId::PypiJson),
4171 ..Default::default()
4172 };
4173
4174 let content = match read_file_to_string(path) {
4175 Ok(content) => content,
4176 Err(error) => {
4177 warn!("Failed to read pypi.json at {:?}: {}", path, error);
4178 return default;
4179 }
4180 };
4181
4182 let root: serde_json::Value = match serde_json::from_str(&content) {
4183 Ok(value) => value,
4184 Err(error) => {
4185 warn!("Failed to parse pypi.json at {:?}: {}", path, error);
4186 return default;
4187 }
4188 };
4189
4190 let Some(info) = root.get("info").and_then(|value| value.as_object()) else {
4191 warn!("No info object found in pypi.json at {:?}", path);
4192 return default;
4193 };
4194
4195 let name = info
4196 .get("name")
4197 .and_then(|value| value.as_str())
4198 .map(ToOwned::to_owned);
4199 let version = info
4200 .get("version")
4201 .and_then(|value| value.as_str())
4202 .map(ToOwned::to_owned);
4203 let summary = info
4204 .get("summary")
4205 .and_then(|value| value.as_str())
4206 .map(ToOwned::to_owned);
4207 let description = info
4208 .get("description")
4209 .and_then(|value| value.as_str())
4210 .filter(|value| !value.trim().is_empty())
4211 .map(ToOwned::to_owned)
4212 .or(summary);
4213 let mut homepage_url = info
4214 .get("home_page")
4215 .and_then(|value| value.as_str())
4216 .map(ToOwned::to_owned);
4217 let author = info
4218 .get("author")
4219 .and_then(|value| value.as_str())
4220 .filter(|value| !value.trim().is_empty())
4221 .map(ToOwned::to_owned);
4222 let author_email = info
4223 .get("author_email")
4224 .and_then(|value| value.as_str())
4225 .filter(|value| !value.trim().is_empty())
4226 .map(ToOwned::to_owned);
4227 let license = info
4228 .get("license")
4229 .and_then(|value| value.as_str())
4230 .filter(|value| !value.trim().is_empty())
4231 .map(ToOwned::to_owned);
4232 let keywords = parse_setup_cfg_keywords(
4233 info.get("keywords")
4234 .and_then(|value| value.as_str())
4235 .map(ToOwned::to_owned),
4236 );
4237 let classifiers = info
4238 .get("classifiers")
4239 .and_then(|value| value.as_array())
4240 .map(|values| {
4241 values
4242 .iter()
4243 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
4244 .collect::<Vec<_>>()
4245 })
4246 .unwrap_or_default();
4247
4248 let mut parties = Vec::new();
4249 if author.is_some() || author_email.is_some() {
4250 parties.push(Party {
4251 r#type: Some("person".to_string()),
4252 role: Some("author".to_string()),
4253 name: author,
4254 email: author_email,
4255 url: None,
4256 organization: None,
4257 organization_url: None,
4258 timezone: None,
4259 });
4260 }
4261
4262 let mut bug_tracking_url = None;
4263 let mut code_view_url = None;
4264 let mut vcs_url = None;
4265 let mut extra_data = HashMap::new();
4266
4267 let parsed_project_urls = info
4268 .get("project_urls")
4269 .and_then(|value| value.as_object())
4270 .map(|map| {
4271 let mut pairs: Vec<(String, String)> = map
4272 .iter()
4273 .filter_map(|(key, value)| Some((key.clone(), value.as_str()?.to_string())))
4274 .collect();
4275 pairs.sort_by(|left, right| left.0.cmp(&right.0));
4276 pairs
4277 })
4278 .unwrap_or_default();
4279
4280 apply_project_url_mappings(
4281 &parsed_project_urls,
4282 &mut homepage_url,
4283 &mut bug_tracking_url,
4284 &mut code_view_url,
4285 &mut vcs_url,
4286 &mut extra_data,
4287 );
4288
4289 let (download_url, size, sha256) = root
4290 .get("urls")
4291 .and_then(|value| value.as_array())
4292 .map(|urls| select_pypi_json_artifact(urls))
4293 .unwrap_or((None, None, None));
4294
4295 let sha256 = sha256.and_then(|h| Sha256Digest::from_hex(&h).ok());
4296
4297 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
4298 normalize_spdx_declared_license(license.as_deref());
4299 let dependencies = info
4300 .get("requires_dist")
4301 .and_then(|value| value.as_array())
4302 .map(|entries| {
4303 entries
4304 .iter()
4305 .filter_map(|entry| entry.as_str().map(ToOwned::to_owned))
4306 .collect::<Vec<_>>()
4307 })
4308 .map(|entries| extract_requires_dist_dependencies(&entries))
4309 .unwrap_or_default();
4310
4311 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
4312 build_pypi_urls(name.as_deref(), version.as_deref());
4313
4314 PackageData {
4315 package_type: Some(PythonParser::PACKAGE_TYPE),
4316 namespace: None,
4317 name,
4318 version,
4319 qualifiers: None,
4320 subpath: None,
4321 primary_language: None,
4322 description,
4323 release_date: None,
4324 parties,
4325 keywords,
4326 homepage_url: homepage_url.or(repository_homepage_url.clone()),
4327 download_url,
4328 size,
4329 sha1: None,
4330 md5: None,
4331 sha256,
4332 sha512: None,
4333 bug_tracking_url,
4334 code_view_url,
4335 vcs_url,
4336 copyright: None,
4337 holder: None,
4338 declared_license_expression,
4339 declared_license_expression_spdx,
4340 license_detections,
4341 other_license_expression: None,
4342 other_license_expression_spdx: None,
4343 other_license_detections: Vec::new(),
4344 extracted_license_statement: license,
4345 notice_text: None,
4346 source_packages: Vec::new(),
4347 file_references: Vec::new(),
4348 is_private: has_private_classifier(&classifiers),
4349 is_virtual: false,
4350 extra_data: if extra_data.is_empty() {
4351 None
4352 } else {
4353 Some(extra_data)
4354 },
4355 dependencies,
4356 repository_homepage_url,
4357 repository_download_url,
4358 api_data_url,
4359 datasource_id: Some(DatasourceId::PypiJson),
4360 purl,
4361 }
4362}
4363
4364fn select_pypi_json_artifact(
4365 urls: &[serde_json::Value],
4366) -> (Option<String>, Option<u64>, Option<String>) {
4367 let selected = urls
4368 .iter()
4369 .find(|entry| entry.get("packagetype").and_then(|value| value.as_str()) == Some("sdist"))
4370 .or_else(|| urls.first());
4371
4372 let Some(entry) = selected else {
4373 return (None, None, None);
4374 };
4375
4376 let download_url = entry
4377 .get("url")
4378 .and_then(|value| value.as_str())
4379 .map(ToOwned::to_owned);
4380 let size = entry.get("size").and_then(|value| value.as_u64());
4381 let sha256 = entry
4382 .get("digests")
4383 .and_then(|value| value.as_object())
4384 .and_then(|digests| digests.get("sha256"))
4385 .and_then(|value| value.as_str())
4386 .map(ToOwned::to_owned);
4387
4388 (download_url, size, sha256)
4389}
4390
4391fn extract_from_pip_inspect(path: &Path) -> PackageData {
4392 let content = match read_file_to_string(path) {
4393 Ok(content) => content,
4394 Err(e) => {
4395 warn!("Failed to read pip-inspect.deplock at {:?}: {}", path, e);
4396 return default_package_data(path);
4397 }
4398 };
4399
4400 let root: serde_json::Value = match serde_json::from_str(&content) {
4401 Ok(value) => value,
4402 Err(e) => {
4403 warn!(
4404 "Failed to parse pip-inspect.deplock JSON at {:?}: {}",
4405 path, e
4406 );
4407 return default_package_data(path);
4408 }
4409 };
4410
4411 let installed = match root.get("installed").and_then(|v| v.as_array()) {
4412 Some(arr) => arr,
4413 None => {
4414 warn!(
4415 "No 'installed' array found in pip-inspect.deplock at {:?}",
4416 path
4417 );
4418 return default_package_data(path);
4419 }
4420 };
4421
4422 let pip_version = root
4423 .get("pip_version")
4424 .and_then(|v| v.as_str())
4425 .map(String::from);
4426 let inspect_version = root
4427 .get("version")
4428 .and_then(|v| v.as_str())
4429 .map(String::from);
4430
4431 let mut main_package: Option<PackageData> = None;
4432 let mut dependencies: Vec<Dependency> = Vec::new();
4433
4434 for package_entry in installed {
4435 let metadata = match package_entry.get("metadata") {
4436 Some(m) => m,
4437 None => continue,
4438 };
4439
4440 let is_requested = package_entry
4441 .get("requested")
4442 .and_then(|v| v.as_bool())
4443 .unwrap_or(false);
4444 let has_direct_url = package_entry.get("direct_url").is_some();
4445
4446 let name = metadata
4447 .get("name")
4448 .and_then(|v| v.as_str())
4449 .map(String::from);
4450 let version = metadata
4451 .get("version")
4452 .and_then(|v| v.as_str())
4453 .map(String::from);
4454 let summary = metadata
4455 .get("summary")
4456 .and_then(|v| v.as_str())
4457 .map(String::from);
4458 let home_page = metadata
4459 .get("home_page")
4460 .and_then(|v| v.as_str())
4461 .map(String::from);
4462 let author = metadata
4463 .get("author")
4464 .and_then(|v| v.as_str())
4465 .map(String::from);
4466 let author_email = metadata
4467 .get("author_email")
4468 .and_then(|v| v.as_str())
4469 .map(String::from);
4470 let license = metadata
4471 .get("license")
4472 .and_then(|v| v.as_str())
4473 .map(String::from);
4474 let description = metadata
4475 .get("description")
4476 .and_then(|v| v.as_str())
4477 .map(String::from);
4478 let keywords = metadata
4479 .get("keywords")
4480 .and_then(|v| v.as_array())
4481 .map(|arr| {
4482 arr.iter()
4483 .filter_map(|k| k.as_str().map(String::from))
4484 .collect::<Vec<_>>()
4485 })
4486 .unwrap_or_default();
4487
4488 let mut parties = Vec::new();
4489 if author.is_some() || author_email.is_some() {
4490 parties.push(Party {
4491 r#type: Some("person".to_string()),
4492 role: Some("author".to_string()),
4493 name: author,
4494 email: author_email,
4495 url: None,
4496 organization: None,
4497 organization_url: None,
4498 timezone: None,
4499 });
4500 }
4501
4502 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
4503 normalize_spdx_declared_license(license.as_deref());
4504 let extracted_license_statement = license.clone();
4505 let requires_dist = metadata
4506 .get("requires_dist")
4507 .and_then(|v| v.as_array())
4508 .map(|entries| {
4509 entries
4510 .iter()
4511 .filter_map(|entry| entry.as_str().map(ToOwned::to_owned))
4512 .collect::<Vec<_>>()
4513 })
4514 .unwrap_or_default();
4515 let parsed_dependencies = extract_requires_dist_dependencies(&requires_dist);
4516
4517 let purl = name.as_ref().and_then(|n| {
4518 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
4519 if let Some(v) = &version {
4520 package_url.with_version(v).ok()?;
4521 }
4522 Some(package_url.to_string())
4523 });
4524
4525 if is_requested && has_direct_url {
4526 let mut extra_data = HashMap::new();
4527 if let Some(pv) = &pip_version {
4528 extra_data.insert(
4529 "pip_version".to_string(),
4530 serde_json::Value::String(pv.clone()),
4531 );
4532 }
4533 if let Some(iv) = &inspect_version {
4534 extra_data.insert(
4535 "inspect_version".to_string(),
4536 serde_json::Value::String(iv.clone()),
4537 );
4538 }
4539
4540 main_package = Some(PackageData {
4541 package_type: Some(PythonParser::PACKAGE_TYPE),
4542 namespace: None,
4543 name,
4544 version,
4545 qualifiers: None,
4546 subpath: None,
4547 primary_language: Some("Python".to_string()),
4548 description: description.or(summary),
4549 release_date: None,
4550 parties,
4551 keywords,
4552 homepage_url: home_page,
4553 download_url: None,
4554 size: None,
4555 sha1: None,
4556 md5: None,
4557 sha256: None,
4558 sha512: None,
4559 bug_tracking_url: None,
4560 code_view_url: None,
4561 vcs_url: None,
4562 copyright: None,
4563 holder: None,
4564 declared_license_expression,
4565 declared_license_expression_spdx,
4566 license_detections,
4567 other_license_expression: None,
4568 other_license_expression_spdx: None,
4569 other_license_detections: Vec::new(),
4570 extracted_license_statement,
4571 notice_text: None,
4572 source_packages: Vec::new(),
4573 file_references: Vec::new(),
4574 is_private: false,
4575 is_virtual: true,
4576 extra_data: if extra_data.is_empty() {
4577 None
4578 } else {
4579 Some(extra_data)
4580 },
4581 dependencies: parsed_dependencies,
4582 repository_homepage_url: None,
4583 repository_download_url: None,
4584 api_data_url: None,
4585 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4586 purl,
4587 });
4588 } else {
4589 let resolved_package = PackageData {
4590 package_type: Some(PythonParser::PACKAGE_TYPE),
4591 namespace: None,
4592 name: name.clone(),
4593 version: version.clone(),
4594 qualifiers: None,
4595 subpath: None,
4596 primary_language: Some("Python".to_string()),
4597 description: description.or(summary),
4598 release_date: None,
4599 parties,
4600 keywords,
4601 homepage_url: home_page,
4602 download_url: None,
4603 size: None,
4604 sha1: None,
4605 md5: None,
4606 sha256: None,
4607 sha512: None,
4608 bug_tracking_url: None,
4609 code_view_url: None,
4610 vcs_url: None,
4611 copyright: None,
4612 holder: None,
4613 declared_license_expression,
4614 declared_license_expression_spdx,
4615 license_detections,
4616 other_license_expression: None,
4617 other_license_expression_spdx: None,
4618 other_license_detections: Vec::new(),
4619 extracted_license_statement,
4620 notice_text: None,
4621 source_packages: Vec::new(),
4622 file_references: Vec::new(),
4623 is_private: false,
4624 is_virtual: true,
4625 extra_data: None,
4626 dependencies: parsed_dependencies,
4627 repository_homepage_url: None,
4628 repository_download_url: None,
4629 api_data_url: None,
4630 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4631 purl: purl.clone(),
4632 };
4633
4634 let resolved = package_data_to_resolved(&resolved_package);
4635 dependencies.push(Dependency {
4636 purl,
4637 extracted_requirement: None,
4638 scope: None,
4639 is_runtime: Some(true),
4640 is_optional: Some(false),
4641 is_pinned: Some(true),
4642 is_direct: Some(is_requested),
4643 resolved_package: Some(Box::new(resolved)),
4644 extra_data: None,
4645 });
4646 }
4647 }
4648
4649 if let Some(mut main_pkg) = main_package {
4650 let direct_requirement_purls: HashSet<String> = main_pkg
4651 .dependencies
4652 .iter()
4653 .filter_map(|dep| dep.purl.as_deref().map(base_dependency_purl))
4654 .collect();
4655
4656 let resolved_requirement_purls: HashSet<String> = dependencies
4657 .iter()
4658 .filter_map(|dep| dep.purl.as_deref().map(base_dependency_purl))
4659 .collect();
4660
4661 let unresolved_dependencies = main_pkg
4662 .dependencies
4663 .iter()
4664 .filter(|dep| {
4665 dep.purl.as_ref().is_some_and(|purl| {
4666 !resolved_requirement_purls.contains(&base_dependency_purl(purl))
4667 })
4668 })
4669 .cloned()
4670 .collect::<Vec<_>>();
4671
4672 for dependency in &mut dependencies {
4673 if dependency
4674 .purl
4675 .as_ref()
4676 .is_some_and(|purl| direct_requirement_purls.contains(&base_dependency_purl(purl)))
4677 {
4678 dependency.is_direct = Some(true);
4679 }
4680 }
4681
4682 main_pkg.dependencies = dependencies;
4683 main_pkg.dependencies.extend(unresolved_dependencies);
4684 main_pkg
4685 } else {
4686 default_package_data(path)
4687 }
4688}
4689
4690fn base_dependency_purl(purl: &str) -> String {
4691 purl.split_once('@')
4692 .map(|(base, _)| base.to_string())
4693 .unwrap_or_else(|| purl.to_string())
4694}
4695
4696type IniSections = HashMap<String, HashMap<String, Vec<String>>>;
4697
4698fn extract_from_setup_cfg(path: &Path) -> PackageData {
4699 let content = match read_file_to_string(path) {
4700 Ok(content) => content,
4701 Err(e) => {
4702 warn!("Failed to read setup.cfg at {:?}: {}", path, e);
4703 return default_package_data(path);
4704 }
4705 };
4706
4707 let sections = parse_setup_cfg(&content);
4708 let name = get_ini_value(§ions, "metadata", "name");
4709 let version = get_ini_value(§ions, "metadata", "version");
4710 let description = get_ini_value(§ions, "metadata", "description");
4711 let author = get_ini_value(§ions, "metadata", "author");
4712 let author_email = get_ini_value(§ions, "metadata", "author_email");
4713 let maintainer = get_ini_value(§ions, "metadata", "maintainer");
4714 let maintainer_email = get_ini_value(§ions, "metadata", "maintainer_email");
4715 let license = get_ini_value(§ions, "metadata", "license");
4716 let mut homepage_url = get_ini_value(§ions, "metadata", "url");
4717 let classifiers = get_ini_values(§ions, "metadata", "classifiers");
4718 let keywords = parse_setup_cfg_keywords(get_ini_value(§ions, "metadata", "keywords"));
4719 let python_requires = get_ini_value(§ions, "options", "python_requires");
4720 let parsed_project_urls =
4721 parse_setup_cfg_project_urls(&get_ini_values(§ions, "metadata", "project_urls"));
4722 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
4723 let mut extra_data = HashMap::new();
4724
4725 let mut parties = Vec::new();
4726 if author.is_some() || author_email.is_some() {
4727 parties.push(Party {
4728 r#type: Some("person".to_string()),
4729 role: Some("author".to_string()),
4730 name: author,
4731 email: author_email,
4732 url: None,
4733 organization: None,
4734 organization_url: None,
4735 timezone: None,
4736 });
4737 }
4738
4739 if maintainer.is_some() || maintainer_email.is_some() {
4740 parties.push(Party {
4741 r#type: Some("person".to_string()),
4742 role: Some("maintainer".to_string()),
4743 name: maintainer,
4744 email: maintainer_email,
4745 url: None,
4746 organization: None,
4747 organization_url: None,
4748 timezone: None,
4749 });
4750 }
4751
4752 let declared_license_expression = None;
4753 let declared_license_expression_spdx = None;
4754 let license_detections = Vec::new();
4755 let extracted_license_statement = license.clone();
4756
4757 let dependencies = extract_setup_cfg_dependencies(§ions);
4758
4759 if let Some(value) = python_requires {
4760 extra_data.insert(
4761 "python_requires".to_string(),
4762 serde_json::Value::String(value),
4763 );
4764 }
4765
4766 apply_project_url_mappings(
4767 &parsed_project_urls,
4768 &mut homepage_url,
4769 &mut bug_tracking_url,
4770 &mut code_view_url,
4771 &mut vcs_url,
4772 &mut extra_data,
4773 );
4774
4775 let extra_data = if extra_data.is_empty() {
4776 None
4777 } else {
4778 Some(extra_data)
4779 };
4780
4781 let purl = name.as_ref().and_then(|n| {
4782 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
4783 if let Some(v) = &version {
4784 package_url.with_version(v).ok()?;
4785 }
4786 Some(package_url.to_string())
4787 });
4788
4789 PackageData {
4790 package_type: Some(PythonParser::PACKAGE_TYPE),
4791 namespace: None,
4792 name,
4793 version,
4794 qualifiers: None,
4795 subpath: None,
4796 primary_language: Some("Python".to_string()),
4797 description,
4798 release_date: None,
4799 parties,
4800 keywords,
4801 homepage_url,
4802 download_url: None,
4803 size: None,
4804 sha1: None,
4805 md5: None,
4806 sha256: None,
4807 sha512: None,
4808 bug_tracking_url,
4809 code_view_url,
4810 vcs_url,
4811 copyright: None,
4812 holder: None,
4813 declared_license_expression,
4814 declared_license_expression_spdx,
4815 license_detections,
4816 other_license_expression: None,
4817 other_license_expression_spdx: None,
4818 other_license_detections: Vec::new(),
4819 extracted_license_statement,
4820 notice_text: None,
4821 source_packages: Vec::new(),
4822 file_references: Vec::new(),
4823 is_private: has_private_classifier(&classifiers),
4824 is_virtual: false,
4825 extra_data,
4826 dependencies,
4827 repository_homepage_url: None,
4828 repository_download_url: None,
4829 api_data_url: None,
4830 datasource_id: Some(DatasourceId::PypiSetupCfg),
4831 purl,
4832 }
4833}
4834
4835fn parse_setup_cfg_keywords(value: Option<String>) -> Vec<String> {
4836 let Some(keywords) = value else {
4837 return Vec::new();
4838 };
4839
4840 keywords
4841 .split(',')
4842 .map(str::trim)
4843 .filter(|keyword| !keyword.is_empty())
4844 .map(ToOwned::to_owned)
4845 .collect()
4846}
4847
4848fn parse_setup_cfg_project_urls(entries: &[String]) -> Vec<(String, String)> {
4849 entries
4850 .iter()
4851 .filter_map(|entry| {
4852 let (label, url) = entry.split_once('=')?;
4853 let label = label.trim();
4854 let url = url.trim();
4855 if label.is_empty() || url.is_empty() {
4856 None
4857 } else {
4858 Some((label.to_string(), url.to_string()))
4859 }
4860 })
4861 .collect()
4862}
4863
4864fn apply_project_url_mappings(
4865 parsed_urls: &[(String, String)],
4866 homepage_url: &mut Option<String>,
4867 bug_tracking_url: &mut Option<String>,
4868 code_view_url: &mut Option<String>,
4869 vcs_url: &mut Option<String>,
4870 extra_data: &mut HashMap<String, serde_json::Value>,
4871) {
4872 for (label, url) in parsed_urls {
4873 let label_lower = label.to_lowercase();
4874
4875 if bug_tracking_url.is_none()
4876 && matches!(
4877 label_lower.as_str(),
4878 "tracker"
4879 | "bug reports"
4880 | "bug tracker"
4881 | "issues"
4882 | "issue tracker"
4883 | "github: issues"
4884 )
4885 {
4886 *bug_tracking_url = Some(url.clone());
4887 } else if code_view_url.is_none()
4888 && matches!(label_lower.as_str(), "source" | "source code" | "code")
4889 {
4890 *code_view_url = Some(url.clone());
4891 } else if vcs_url.is_none()
4892 && matches!(
4893 label_lower.as_str(),
4894 "github" | "gitlab" | "github: repo" | "repository"
4895 )
4896 {
4897 *vcs_url = Some(url.clone());
4898 } else if homepage_url.is_none()
4899 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
4900 {
4901 *homepage_url = Some(url.clone());
4902 } else if label_lower == "changelog" {
4903 extra_data.insert(
4904 "changelog_url".to_string(),
4905 serde_json::Value::String(url.clone()),
4906 );
4907 }
4908 }
4909
4910 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
4911 .iter()
4912 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
4913 .collect();
4914
4915 if !project_urls_json.is_empty() {
4916 extra_data.insert(
4917 "project_urls".to_string(),
4918 serde_json::Value::Object(project_urls_json),
4919 );
4920 }
4921}
4922
4923fn parse_setup_cfg(content: &str) -> IniSections {
4924 let mut sections: IniSections = HashMap::new();
4925 let mut current_section: Option<String> = None;
4926 let mut current_key: Option<String> = None;
4927
4928 for raw_line in content.lines() {
4929 let line = raw_line.trim_end_matches('\r');
4930 let trimmed = line.trim();
4931 if trimmed.is_empty() {
4932 continue;
4933 }
4934
4935 let stripped = line.trim_start();
4936 if stripped.starts_with('#') || stripped.starts_with(';') {
4937 continue;
4938 }
4939
4940 if stripped.starts_with('[') && stripped.ends_with(']') {
4941 let section_name = stripped
4942 .trim_start_matches('[')
4943 .trim_end_matches(']')
4944 .trim()
4945 .to_ascii_lowercase();
4946 current_section = if section_name.is_empty() {
4947 None
4948 } else {
4949 Some(section_name)
4950 };
4951 current_key = None;
4952 continue;
4953 }
4954
4955 if (line.starts_with(' ') || line.starts_with('\t')) && current_key.is_some() {
4956 if let (Some(section), Some(key)) = (current_section.as_ref(), current_key.as_ref()) {
4957 let value = stripped.trim();
4958 if !value.is_empty() {
4959 sections
4960 .entry(section.clone())
4961 .or_default()
4962 .entry(key.clone())
4963 .or_default()
4964 .push(value.to_string());
4965 }
4966 }
4967 continue;
4968 }
4969
4970 if let Some((key, value)) = stripped.split_once('=')
4971 && let Some(section) = current_section.as_ref()
4972 {
4973 let key_name = key.trim().to_ascii_lowercase();
4974 let value_trimmed = value.trim();
4975 let entry = sections
4976 .entry(section.clone())
4977 .or_default()
4978 .entry(key_name.clone())
4979 .or_default();
4980 if !value_trimmed.is_empty() {
4981 entry.push(value_trimmed.to_string());
4982 }
4983 current_key = Some(key_name);
4984 }
4985 }
4986
4987 sections
4988}
4989
4990fn get_ini_value(sections: &IniSections, section: &str, key: &str) -> Option<String> {
4991 sections
4992 .get(§ion.to_ascii_lowercase())
4993 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4994 .and_then(|entries| entries.first())
4995 .map(|value| value.trim().to_string())
4996}
4997
4998fn get_ini_values(sections: &IniSections, section: &str, key: &str) -> Vec<String> {
4999 sections
5000 .get(§ion.to_ascii_lowercase())
5001 .and_then(|values| values.get(&key.to_ascii_lowercase()))
5002 .cloned()
5003 .unwrap_or_default()
5004}
5005
5006fn extract_setup_cfg_dependencies(sections: &IniSections) -> Vec<Dependency> {
5007 let mut dependencies = Vec::new();
5008
5009 for (sub_section, scope) in [
5010 ("install_requires", "install"),
5011 ("tests_require", "test"),
5012 ("setup_requires", "setup"),
5013 ] {
5014 let reqs = get_ini_values(sections, "options", sub_section);
5015 dependencies.extend(parse_setup_cfg_requirements(&reqs, scope, false));
5016 }
5017
5018 if let Some(extras) = sections.get("options.extras_require") {
5019 let mut extra_items: Vec<_> = extras.iter().collect();
5020 extra_items.sort_by_key(|(name, _)| *name);
5021 for (extra_name, reqs) in extra_items {
5022 dependencies.extend(parse_setup_cfg_requirements(reqs, extra_name, true));
5023 }
5024 }
5025
5026 dependencies
5027}
5028
5029fn parse_setup_cfg_requirements(
5030 reqs: &[String],
5031 scope: &str,
5032 is_optional: bool,
5033) -> Vec<Dependency> {
5034 reqs.iter()
5035 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
5036 .collect()
5037}
5038
5039fn build_setup_cfg_dependency(req: &str, scope: &str, is_optional: bool) -> Option<Dependency> {
5040 let trimmed = req.trim();
5041 if trimmed.is_empty() || trimmed.starts_with('#') {
5042 return None;
5043 }
5044
5045 let name = extract_setup_cfg_dependency_name(trimmed)?;
5046 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
5047
5048 Some(Dependency {
5049 purl: Some(purl.to_string()),
5050 extracted_requirement: Some(normalize_setup_cfg_requirement(trimmed)),
5051 scope: Some(scope.to_string()),
5052 is_runtime: Some(true),
5053 is_optional: Some(is_optional),
5054 is_pinned: Some(false),
5055 is_direct: Some(true),
5056 resolved_package: None,
5057 extra_data: None,
5058 })
5059}
5060
5061fn extract_setup_cfg_dependency_name(req: &str) -> Option<String> {
5062 let trimmed = req.trim();
5063 if trimmed.is_empty() {
5064 return None;
5065 }
5066
5067 let end = trimmed
5068 .find(|c: char| c.is_whitespace() || matches!(c, '<' | '>' | '=' | '!' | '~' | ';' | '['))
5069 .unwrap_or(trimmed.len());
5070 let name = trimmed[..end].trim();
5071 if name.is_empty() {
5072 None
5073 } else {
5074 Some(name.to_string())
5075 }
5076}
5077
5078fn normalize_setup_cfg_requirement(req: &str) -> String {
5079 req.chars().filter(|c| !c.is_whitespace()).collect()
5080}
5081
5082fn extract_setup_value(content: &str, key: &str) -> Option<String> {
5083 let patterns = vec![
5084 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
5093
5094 for pattern in patterns {
5095 if let Some(start_idx) = content.find(&pattern) {
5096 let value_start = start_idx + pattern.len();
5097 let remaining = &content[value_start..];
5098
5099 if let Some(end_idx) = remaining.find(['"', '\'']) {
5100 return Some(remaining[..end_idx].to_string());
5101 }
5102 }
5103 }
5104
5105 None
5106}
5107
5108fn extract_setup_py_dependencies(content: &str) -> Vec<Dependency> {
5109 let mut dependencies = Vec::new();
5110
5111 if let Some(tests_deps) = extract_tests_require(content) {
5112 dependencies.extend(tests_deps);
5113 }
5114
5115 if let Some(extras_deps) = extract_extras_require(content) {
5116 dependencies.extend(extras_deps);
5117 }
5118
5119 dependencies
5120}
5121
5122fn extract_tests_require(content: &str) -> Option<Vec<Dependency>> {
5123 let pattern = r"tests_require\s*=\s*\[([^\]]+)\]";
5124 let re = Regex::new(pattern).ok()?;
5125 let captures = re.captures(content)?;
5126 let deps_str = captures.get(1)?.as_str();
5127
5128 let deps = parse_setup_py_dep_list(deps_str, "test", true);
5129 if deps.is_empty() { None } else { Some(deps) }
5130}
5131
5132fn extract_extras_require(content: &str) -> Option<Vec<Dependency>> {
5133 let pattern = r"extras_require\s*=\s*\{([^}]+)\}";
5134 let re = Regex::new(pattern).ok()?;
5135 let captures = re.captures(content)?;
5136 let dict_content = captures.get(1)?.as_str();
5137
5138 let mut all_deps = Vec::new();
5139
5140 let entry_pattern = r#"['"]([^'"]+)['"]\s*:\s*\[([^\]]+)\]"#;
5141 let entry_re = Regex::new(entry_pattern).ok()?;
5142
5143 for entry_cap in entry_re.captures_iter(dict_content) {
5144 if let (Some(extra_name), Some(deps_str)) = (entry_cap.get(1), entry_cap.get(2)) {
5145 let deps = parse_setup_py_dep_list(deps_str.as_str(), extra_name.as_str(), true);
5146 all_deps.extend(deps);
5147 }
5148 }
5149
5150 if all_deps.is_empty() {
5151 None
5152 } else {
5153 Some(all_deps)
5154 }
5155}
5156
5157fn parse_setup_py_dep_list(deps_str: &str, scope: &str, is_optional: bool) -> Vec<Dependency> {
5158 let dep_pattern = r#"['"]([^'"]+)['"]"#;
5159 let re = match Regex::new(dep_pattern) {
5160 Ok(r) => r,
5161 Err(_) => return Vec::new(),
5162 };
5163
5164 re.captures_iter(deps_str)
5165 .filter_map(|cap| {
5166 let dep_str = cap.get(1)?.as_str().trim();
5167 if dep_str.is_empty() {
5168 return None;
5169 }
5170
5171 let name = extract_setup_cfg_dependency_name(dep_str)?;
5172 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
5173
5174 Some(Dependency {
5175 purl: Some(purl.to_string()),
5176 extracted_requirement: Some(dep_str.to_string()),
5177 scope: Some(scope.to_string()),
5178 is_runtime: Some(true),
5179 is_optional: Some(is_optional),
5180 is_pinned: Some(false),
5181 is_direct: Some(true),
5182 resolved_package: None,
5183 extra_data: None,
5184 })
5185 })
5186 .collect()
5187}
5188
5189pub(crate) fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
5191 let content = read_file_to_string(path).map_err(|e| e.to_string())?;
5192 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
5193}
5194
5195fn calculate_file_checksums(path: &Path) -> (Option<u64>, Option<Sha256Digest>) {
5206 let mut file = match File::open(path) {
5207 Ok(f) => f,
5208 Err(_) => return (None, None),
5209 };
5210
5211 let metadata = match file.metadata() {
5212 Ok(m) => m,
5213 Err(_) => return (None, None),
5214 };
5215 let size = metadata.len();
5216
5217 let mut hasher = Sha256::new();
5218 let mut buffer = vec![0; 8192];
5219
5220 loop {
5221 match file.read(&mut buffer) {
5222 Ok(0) => break,
5223 Ok(n) => hasher.update(&buffer[..n]),
5224 Err(_) => return (Some(size), None),
5225 }
5226 }
5227
5228 let hash = Sha256Digest::from_bytes(hasher.finalize().into());
5229 (Some(size), Some(hash))
5230}
5231
5232fn default_package_data(path: &Path) -> PackageData {
5233 PackageData {
5234 package_type: Some(PythonParser::PACKAGE_TYPE),
5235 primary_language: Some("Python".to_string()),
5236 datasource_id: infer_python_datasource_id(path),
5237 ..Default::default()
5238 }
5239}
5240
5241fn infer_python_datasource_id(path: &Path) -> Option<DatasourceId> {
5242 let file_name = path.file_name().and_then(|name| name.to_str());
5243
5244 match file_name {
5245 Some("pyproject.toml") => {
5246 if read_toml_file(path)
5247 .ok()
5248 .and_then(|content| content.get("tool").and_then(|v| v.as_table()).cloned())
5249 .and_then(|tool| tool.get("poetry").and_then(|v| v.as_table()).cloned())
5250 .is_some()
5251 {
5252 Some(DatasourceId::PypiPoetryPyprojectToml)
5253 } else {
5254 Some(DatasourceId::PypiPyprojectToml)
5255 }
5256 }
5257 Some(name) if name == "setup.py" || name.ends_with("_setup.py") => {
5258 Some(DatasourceId::PypiSetupPy)
5259 }
5260 Some("setup.cfg") => Some(DatasourceId::PypiSetupCfg),
5261 Some("PKG-INFO") => Some(detect_pkg_info_datasource_id(path)),
5262 Some("METADATA") if is_installed_wheel_metadata_path(path) => {
5263 Some(DatasourceId::PypiWheelMetadata)
5264 }
5265 Some("pypi.json") => Some(DatasourceId::PypiJson),
5266 Some("pip-inspect.deplock") => Some(DatasourceId::PypiInspectDeplock),
5267 Some("origin.json") if is_pip_cache_origin_json(path) => {
5268 Some(DatasourceId::PypiPipOriginJson)
5269 }
5270 _ if file_name.is_some_and(is_likely_python_sdist_filename) => {
5271 Some(DatasourceId::PypiSdist)
5272 }
5273 _ if path
5274 .extension()
5275 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) =>
5276 {
5277 Some(DatasourceId::PypiWheel)
5278 }
5279 _ if path
5280 .extension()
5281 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg")) =>
5282 {
5283 Some(DatasourceId::PypiEgg)
5284 }
5285 _ => None,
5286 }
5287}
5288
5289crate::register_parser!(
5290 "Python package manifests (pyproject.toml, setup.py, *_setup.py, setup.cfg, pypi.json, PKG-INFO, .dist-info/METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
5291 &[
5292 "**/pyproject.toml",
5293 "**/setup.py",
5294 "**/*_setup.py",
5295 "**/setup.cfg",
5296 "**/pypi.json",
5297 "**/PKG-INFO",
5298 "**/*.dist-info/METADATA",
5299 "**/origin.json",
5300 "**/*.tar.gz",
5301 "**/*.tgz",
5302 "**/*.tar.bz2",
5303 "**/*.tar.xz",
5304 "**/*.zip",
5305 "**/*.whl",
5306 "**/*.egg"
5307 ],
5308 "pypi",
5309 "Python",
5310 Some("https://packaging.python.org/"),
5311);