1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parsers::utils::{read_file_to_string, split_name_email};
36use base64::Engine;
37use base64::engine::general_purpose::URL_SAFE_NO_PAD;
38use bzip2::read::BzDecoder;
39use csv::ReaderBuilder;
40use flate2::read::GzDecoder;
41use liblzma::read::XzDecoder;
42use log::warn;
43use packageurl::PackageUrl;
44use regex::Regex;
45use rustpython_parser::{Parse, ast};
46use serde_json::{Map as JsonMap, Value as JsonValue};
47use sha2::{Digest, Sha256};
48use std::collections::{HashMap, HashSet};
49use std::fs::File;
50use std::io::Read;
51use std::path::{Component, Path, PathBuf};
52use tar::Archive;
53use toml::Value as TomlValue;
54use toml::map::Map as TomlMap;
55use zip::ZipArchive;
56
57use super::PackageParser;
58use super::license_normalization::normalize_spdx_declared_license;
59
60const FIELD_PROJECT: &str = "project";
62const FIELD_NAME: &str = "name";
63const FIELD_VERSION: &str = "version";
64const FIELD_LICENSE: &str = "license";
65const FIELD_AUTHORS: &str = "authors";
66const FIELD_MAINTAINERS: &str = "maintainers";
67const FIELD_URLS: &str = "urls";
68const FIELD_HOMEPAGE: &str = "homepage";
69const FIELD_REPOSITORY: &str = "repository";
70const FIELD_DEPENDENCIES: &str = "dependencies";
71const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
72const FIELD_DEPENDENCY_GROUPS: &str = "dependency-groups";
73const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
74const MAX_SETUP_PY_BYTES: usize = 1_048_576;
75const MAX_SETUP_PY_AST_NODES: usize = 10_000;
76const MAX_SETUP_PY_AST_DEPTH: usize = 50;
77const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct PythonParser;
91
92#[derive(Clone, Copy, Debug)]
93enum PythonSdistArchiveFormat {
94 TarGz,
95 Tgz,
96 TarBz2,
97 TarXz,
98 Zip,
99}
100
101#[derive(Clone, Debug)]
102struct ValidatedZipEntry {
103 index: usize,
104 name: String,
105}
106
107impl PackageParser for PythonParser {
108 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
109
110 fn extract_packages(path: &Path) -> Vec<PackageData> {
111 vec![
112 if path.file_name().unwrap_or_default() == "pyproject.toml" {
113 extract_from_pyproject_toml(path)
114 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
115 extract_from_setup_cfg(path)
116 } else if path.file_name().unwrap_or_default() == "setup.py" {
117 extract_from_setup_py(path)
118 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
119 extract_from_rfc822_metadata(path, DatasourceId::PypiSdistPkginfo)
120 } else if path.file_name().unwrap_or_default() == "METADATA" {
121 extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
122 } else if is_pip_cache_origin_json(path) {
123 extract_from_pip_origin_json(path)
124 } else if path.file_name().unwrap_or_default() == "pypi.json" {
125 extract_from_pypi_json(path)
126 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
127 extract_from_pip_inspect(path)
128 } else if is_python_sdist_archive_path(path) {
129 extract_from_sdist_archive(path)
130 } else if path
131 .extension()
132 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
133 {
134 extract_from_wheel_archive(path)
135 } else if path
136 .extension()
137 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
138 {
139 extract_from_egg_archive(path)
140 } else {
141 default_package_data()
142 },
143 ]
144 }
145
146 fn is_match(path: &Path) -> bool {
147 if let Some(filename) = path.file_name()
148 && (filename == "pyproject.toml"
149 || filename == "setup.cfg"
150 || filename == "setup.py"
151 || filename == "PKG-INFO"
152 || filename == "METADATA"
153 || filename == "pypi.json"
154 || filename == "pip-inspect.deplock"
155 || is_pip_cache_origin_json(path))
156 {
157 return true;
158 }
159
160 if let Some(extension) = path.extension() {
161 let ext = extension.to_string_lossy().to_lowercase();
162 if ext == "whl" || ext == "egg" || is_python_sdist_archive_path(path) {
163 return true;
164 }
165 }
166
167 false
168 }
169}
170
171#[derive(Debug, Clone)]
172struct InstalledWheelMetadata {
173 wheel_tags: Vec<String>,
174 wheel_version: Option<String>,
175 wheel_generator: Option<String>,
176 root_is_purelib: Option<bool>,
177 compressed_tag: Option<String>,
178}
179
180fn merge_sibling_wheel_metadata(path: &Path, package_data: &mut PackageData) {
181 let Some(parent) = path.parent() else {
182 return;
183 };
184
185 if !parent
186 .file_name()
187 .and_then(|name| name.to_str())
188 .is_some_and(|name| name.ends_with(".dist-info"))
189 {
190 return;
191 }
192
193 let wheel_path = parent.join("WHEEL");
194 if !wheel_path.exists() {
195 return;
196 }
197
198 let Ok(content) = read_file_to_string(&wheel_path) else {
199 warn!("Failed to read sibling WHEEL file at {:?}", wheel_path);
200 return;
201 };
202
203 let Some(wheel_metadata) = parse_installed_wheel_metadata(&content) else {
204 return;
205 };
206
207 apply_installed_wheel_metadata(package_data, &wheel_metadata);
208}
209
210fn parse_installed_wheel_metadata(content: &str) -> Option<InstalledWheelMetadata> {
211 use super::rfc822::{get_header_all, get_header_first};
212
213 let metadata = super::rfc822::parse_rfc822_content(content);
214 let wheel_tags = get_header_all(&metadata.headers, "tag");
215 if wheel_tags.is_empty() {
216 return None;
217 }
218
219 let wheel_version = get_header_first(&metadata.headers, "wheel-version");
220 let wheel_generator = get_header_first(&metadata.headers, "generator");
221 let root_is_purelib =
222 get_header_first(&metadata.headers, "root-is-purelib").and_then(|value| {
223 match value.to_ascii_lowercase().as_str() {
224 "true" => Some(true),
225 "false" => Some(false),
226 _ => None,
227 }
228 });
229
230 let compressed_tag = compress_wheel_tags(&wheel_tags);
231
232 Some(InstalledWheelMetadata {
233 wheel_tags,
234 wheel_version,
235 wheel_generator,
236 root_is_purelib,
237 compressed_tag,
238 })
239}
240
241fn compress_wheel_tags(tags: &[String]) -> Option<String> {
242 if tags.is_empty() {
243 return None;
244 }
245
246 if tags.len() == 1 {
247 return Some(tags[0].clone());
248 }
249
250 let mut python_tags = Vec::new();
251 let mut abi_tag: Option<&str> = None;
252 let mut platform_tag: Option<&str> = None;
253
254 for tag in tags {
255 let mut parts = tag.splitn(3, '-');
256 let python = parts.next()?;
257 let abi = parts.next()?;
258 let platform = parts.next()?;
259
260 if abi_tag.is_some_and(|existing| existing != abi)
261 || platform_tag.is_some_and(|existing| existing != platform)
262 {
263 return None;
264 }
265
266 abi_tag = Some(abi);
267 platform_tag = Some(platform);
268 python_tags.push(python.to_string());
269 }
270
271 Some(format!(
272 "{}-{}-{}",
273 python_tags.join("."),
274 abi_tag?,
275 platform_tag?
276 ))
277}
278
279fn apply_installed_wheel_metadata(
280 package_data: &mut PackageData,
281 wheel_metadata: &InstalledWheelMetadata,
282) {
283 let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
284 extra_data.insert(
285 "wheel_tags".to_string(),
286 JsonValue::Array(
287 wheel_metadata
288 .wheel_tags
289 .iter()
290 .cloned()
291 .map(JsonValue::String)
292 .collect(),
293 ),
294 );
295
296 if let Some(wheel_version) = &wheel_metadata.wheel_version {
297 extra_data.insert(
298 "wheel_version".to_string(),
299 JsonValue::String(wheel_version.clone()),
300 );
301 }
302
303 if let Some(wheel_generator) = &wheel_metadata.wheel_generator {
304 extra_data.insert(
305 "wheel_generator".to_string(),
306 JsonValue::String(wheel_generator.clone()),
307 );
308 }
309
310 if let Some(root_is_purelib) = wheel_metadata.root_is_purelib {
311 extra_data.insert(
312 "root_is_purelib".to_string(),
313 JsonValue::Bool(root_is_purelib),
314 );
315 }
316
317 if let (Some(name), Some(version), Some(extension)) = (
318 package_data.name.as_deref(),
319 package_data.version.as_deref(),
320 wheel_metadata.compressed_tag.as_deref(),
321 ) {
322 package_data.purl = build_pypi_purl_with_extension(name, Some(version), extension);
323 }
324}
325
326fn is_pip_cache_origin_json(path: &Path) -> bool {
327 path.file_name().and_then(|name| name.to_str()) == Some("origin.json")
328 && path.ancestors().skip(1).any(|ancestor| {
329 ancestor
330 .file_name()
331 .and_then(|name| name.to_str())
332 .is_some_and(|name| name.eq_ignore_ascii_case("wheels"))
333 })
334}
335
336fn extract_from_pip_origin_json(path: &Path) -> PackageData {
337 let content = match read_file_to_string(path) {
338 Ok(content) => content,
339 Err(e) => {
340 warn!("Failed to read pip cache origin.json at {:?}: {}", path, e);
341 return default_package_data();
342 }
343 };
344
345 let root: JsonValue = match serde_json::from_str(&content) {
346 Ok(root) => root,
347 Err(e) => {
348 warn!("Failed to parse pip cache origin.json at {:?}: {}", path, e);
349 return default_package_data();
350 }
351 };
352
353 let Some(download_url) = root.get("url").and_then(|value| value.as_str()) else {
354 warn!("No url found in pip cache origin.json at {:?}", path);
355 return default_package_data();
356 };
357
358 let sibling_wheel = find_sibling_cached_wheel(path);
359 let name_version = parse_name_version_from_origin_url(download_url).or_else(|| {
360 sibling_wheel
361 .as_ref()
362 .map(|wheel_info| (wheel_info.name.clone(), wheel_info.version.clone()))
363 });
364
365 let Some((name, version)) = name_version else {
366 warn!(
367 "Failed to infer package name/version from pip cache origin.json at {:?}",
368 path
369 );
370 return default_package_data();
371 };
372
373 let (repository_homepage_url, repository_download_url, api_data_url, plain_purl) =
374 build_pypi_urls(Some(&name), Some(&version));
375 let purl = sibling_wheel
376 .as_ref()
377 .and_then(|wheel_info| build_wheel_purl(Some(&name), Some(&version), wheel_info))
378 .or(plain_purl);
379
380 PackageData {
381 package_type: Some(PythonParser::PACKAGE_TYPE),
382 primary_language: Some("Python".to_string()),
383 name: Some(name),
384 version: Some(version),
385 datasource_id: Some(DatasourceId::PypiPipOriginJson),
386 download_url: Some(download_url.to_string()),
387 sha256: extract_sha256_from_origin_json(&root),
388 repository_homepage_url,
389 repository_download_url,
390 api_data_url,
391 purl,
392 ..Default::default()
393 }
394}
395
396fn find_sibling_cached_wheel(path: &Path) -> Option<WheelInfo> {
397 let parent = path.parent()?;
398 let entries = parent.read_dir().ok()?;
399
400 for entry in entries.flatten() {
401 let sibling_path = entry.path();
402 if sibling_path
403 .extension()
404 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
405 && let Some(wheel_info) = parse_wheel_filename(&sibling_path)
406 {
407 return Some(wheel_info);
408 }
409 }
410
411 None
412}
413
414fn parse_name_version_from_origin_url(url: &str) -> Option<(String, String)> {
415 let file_name = url.rsplit('/').next()?;
416
417 if file_name.ends_with(".whl") {
418 return parse_wheel_filename(Path::new(file_name))
419 .map(|wheel_info| (wheel_info.name, wheel_info.version));
420 }
421
422 let stem = strip_python_archive_extension(file_name)?;
423 let (name, version) = stem.rsplit_once('-')?;
424 if name.is_empty() || version.is_empty() {
425 return None;
426 }
427
428 Some((name.replace('_', "-"), version.to_string()))
429}
430
431fn strip_python_archive_extension(file_name: &str) -> Option<&str> {
432 [".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".zip", ".whl"]
433 .iter()
434 .find_map(|suffix| file_name.strip_suffix(suffix))
435}
436
437fn extract_sha256_from_origin_json(root: &JsonValue) -> Option<String> {
438 root.pointer("/archive_info/hashes/sha256")
439 .and_then(|value| value.as_str())
440 .map(ToOwned::to_owned)
441 .or_else(|| {
442 root.pointer("/archive_info/hash")
443 .and_then(|value| value.as_str())
444 .and_then(normalize_origin_hash)
445 })
446}
447
448fn normalize_origin_hash(hash: &str) -> Option<String> {
449 if let Some(value) = hash.strip_prefix("sha256=") {
450 return Some(value.to_string());
451 }
452 if let Some(value) = hash.strip_prefix("sha256:") {
453 return Some(value.to_string());
454 }
455 if hash.len() == 64 && hash.chars().all(|ch| ch.is_ascii_hexdigit()) {
456 return Some(hash.to_string());
457 }
458 None
459}
460
461fn extract_from_rfc822_metadata(path: &Path, datasource_id: DatasourceId) -> PackageData {
462 let content = match read_file_to_string(path) {
463 Ok(content) => content,
464 Err(e) => {
465 warn!("Failed to read metadata at {:?}: {}", path, e);
466 return default_package_data();
467 }
468 };
469
470 let metadata = super::rfc822::parse_rfc822_content(&content);
471 let mut package_data = build_package_data_from_rfc822(&metadata, datasource_id);
472 merge_sibling_metadata_dependencies(path, &mut package_data);
473 merge_sibling_metadata_file_references(path, &mut package_data);
474 if datasource_id == DatasourceId::PypiWheelMetadata {
475 merge_sibling_wheel_metadata(path, &mut package_data);
476 }
477 package_data
478}
479
480fn merge_sibling_metadata_dependencies(path: &Path, package_data: &mut PackageData) {
481 let mut extra_dependencies = Vec::new();
482
483 if let Some(parent) = path.parent() {
484 let direct_requires = parent.join("requires.txt");
485 if direct_requires.exists()
486 && let Ok(content) = read_file_to_string(&direct_requires)
487 {
488 extra_dependencies.extend(parse_requires_txt(&content));
489 }
490
491 let sibling_egg_info_requires = parent
492 .read_dir()
493 .ok()
494 .into_iter()
495 .flatten()
496 .flatten()
497 .find_map(|entry| {
498 let child_path = entry.path();
499 if child_path.is_dir()
500 && child_path
501 .file_name()
502 .and_then(|name| name.to_str())
503 .is_some_and(|name| name.ends_with(".egg-info"))
504 {
505 let requires = child_path.join("requires.txt");
506 requires.exists().then_some(requires)
507 } else {
508 None
509 }
510 });
511
512 if let Some(requires_path) = sibling_egg_info_requires
513 && let Ok(content) = read_file_to_string(&requires_path)
514 {
515 extra_dependencies.extend(parse_requires_txt(&content));
516 }
517 }
518
519 for dependency in extra_dependencies {
520 if !package_data.dependencies.iter().any(|existing| {
521 existing.purl == dependency.purl
522 && existing.scope == dependency.scope
523 && existing.extracted_requirement == dependency.extracted_requirement
524 && existing.extra_data == dependency.extra_data
525 }) {
526 package_data.dependencies.push(dependency);
527 }
528 }
529}
530
531fn merge_sibling_metadata_file_references(path: &Path, package_data: &mut PackageData) {
532 let mut extra_refs = Vec::new();
533
534 if let Some(parent) = path.parent() {
535 let record_path = parent.join("RECORD");
536 if record_path.exists()
537 && let Ok(content) = read_file_to_string(&record_path)
538 {
539 extra_refs.extend(parse_record_csv(&content));
540 }
541
542 let installed_files_path = parent.join("installed-files.txt");
543 if installed_files_path.exists()
544 && let Ok(content) = read_file_to_string(&installed_files_path)
545 {
546 extra_refs.extend(parse_installed_files_txt(&content));
547 }
548
549 let sources_path = parent.join("SOURCES.txt");
550 if sources_path.exists()
551 && let Ok(content) = read_file_to_string(&sources_path)
552 {
553 extra_refs.extend(parse_sources_txt(&content));
554 }
555 }
556
557 for file_ref in extra_refs {
558 if !package_data
559 .file_references
560 .iter()
561 .any(|existing| existing.path == file_ref.path)
562 {
563 package_data.file_references.push(file_ref);
564 }
565 }
566}
567
568fn collect_validated_zip_entries<R: Read + std::io::Seek>(
569 archive: &mut ZipArchive<R>,
570 path: &Path,
571 archive_type: &str,
572) -> Result<Vec<ValidatedZipEntry>, String> {
573 let mut total_extracted = 0u64;
574 let mut entries = Vec::new();
575
576 for i in 0..archive.len() {
577 if let Ok(file) = archive.by_index_raw(i) {
578 let compressed_size = file.compressed_size();
579 let uncompressed_size = file.size();
580 let Some(entry_name) = normalize_archive_entry_path(file.name()) else {
581 warn!(
582 "Skipping unsafe path in {} {:?}: {}",
583 archive_type,
584 path,
585 file.name()
586 );
587 continue;
588 };
589
590 if compressed_size > 0 {
591 let ratio = uncompressed_size as f64 / compressed_size as f64;
592 if ratio > MAX_COMPRESSION_RATIO {
593 warn!(
594 "Suspicious compression ratio in {} {:?}: {:.2}:1",
595 archive_type, path, ratio
596 );
597 continue;
598 }
599 }
600
601 if uncompressed_size > MAX_FILE_SIZE {
602 warn!(
603 "File too large in {} {:?}: {} bytes (limit: {} bytes)",
604 archive_type, path, uncompressed_size, MAX_FILE_SIZE
605 );
606 continue;
607 }
608
609 total_extracted += uncompressed_size;
610 if total_extracted > MAX_ARCHIVE_SIZE {
611 let msg = format!(
612 "Total extracted size exceeds limit for {} {:?}",
613 archive_type, path
614 );
615 warn!("{}", msg);
616 return Err(msg);
617 }
618
619 entries.push(ValidatedZipEntry {
620 index: i,
621 name: entry_name,
622 });
623 }
624 }
625
626 Ok(entries)
627}
628
629fn is_python_sdist_archive_path(path: &Path) -> bool {
630 detect_python_sdist_archive_format(path).is_some()
631}
632
633fn detect_python_sdist_archive_format(path: &Path) -> Option<PythonSdistArchiveFormat> {
634 let file_name = path.file_name()?.to_str()?.to_ascii_lowercase();
635
636 if !is_likely_python_sdist_filename(&file_name) {
637 return None;
638 }
639
640 if file_name.ends_with(".tar.gz") {
641 Some(PythonSdistArchiveFormat::TarGz)
642 } else if file_name.ends_with(".tgz") {
643 Some(PythonSdistArchiveFormat::Tgz)
644 } else if file_name.ends_with(".tar.bz2") {
645 Some(PythonSdistArchiveFormat::TarBz2)
646 } else if file_name.ends_with(".tar.xz") {
647 Some(PythonSdistArchiveFormat::TarXz)
648 } else if file_name.ends_with(".zip") {
649 Some(PythonSdistArchiveFormat::Zip)
650 } else {
651 None
652 }
653}
654
655fn is_likely_python_sdist_filename(file_name: &str) -> bool {
656 let Some(stem) = strip_python_archive_extension(file_name) else {
657 return false;
658 };
659
660 let Some((name, version)) = stem.rsplit_once('-') else {
661 return false;
662 };
663
664 !name.is_empty()
665 && !version.is_empty()
666 && version.chars().any(|ch| ch.is_ascii_digit())
667 && name
668 .chars()
669 .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
670}
671
672fn extract_from_sdist_archive(path: &Path) -> PackageData {
673 let metadata = match std::fs::metadata(path) {
674 Ok(m) => m,
675 Err(e) => {
676 warn!(
677 "Failed to read metadata for sdist archive {:?}: {}",
678 path, e
679 );
680 return default_package_data();
681 }
682 };
683
684 if metadata.len() > MAX_ARCHIVE_SIZE {
685 warn!(
686 "sdist archive too large: {} bytes (limit: {} bytes)",
687 metadata.len(),
688 MAX_ARCHIVE_SIZE
689 );
690 return default_package_data();
691 }
692
693 let Some(format) = detect_python_sdist_archive_format(path) else {
694 return default_package_data();
695 };
696
697 let mut package_data = match format {
698 PythonSdistArchiveFormat::TarGz | PythonSdistArchiveFormat::Tgz => {
699 let file = match File::open(path) {
700 Ok(file) => file,
701 Err(e) => {
702 warn!("Failed to open sdist archive {:?}: {}", path, e);
703 return default_package_data();
704 }
705 };
706 let decoder = GzDecoder::new(file);
707 extract_from_tar_sdist_archive(path, decoder, "tar.gz", metadata.len())
708 }
709 PythonSdistArchiveFormat::TarBz2 => {
710 let file = match File::open(path) {
711 Ok(file) => file,
712 Err(e) => {
713 warn!("Failed to open sdist archive {:?}: {}", path, e);
714 return default_package_data();
715 }
716 };
717 let decoder = BzDecoder::new(file);
718 extract_from_tar_sdist_archive(path, decoder, "tar.bz2", metadata.len())
719 }
720 PythonSdistArchiveFormat::TarXz => {
721 let file = match File::open(path) {
722 Ok(file) => file,
723 Err(e) => {
724 warn!("Failed to open sdist archive {:?}: {}", path, e);
725 return default_package_data();
726 }
727 };
728 let decoder = XzDecoder::new(file);
729 extract_from_tar_sdist_archive(path, decoder, "tar.xz", metadata.len())
730 }
731 PythonSdistArchiveFormat::Zip => extract_from_zip_sdist_archive(path),
732 };
733
734 if package_data.package_type.is_some() {
735 let (size, sha256) = calculate_file_checksums(path);
736 package_data.size = size;
737 package_data.sha256 = sha256;
738 }
739
740 package_data
741}
742
743fn extract_from_tar_sdist_archive<R: Read>(
744 path: &Path,
745 reader: R,
746 archive_type: &str,
747 compressed_size: u64,
748) -> PackageData {
749 let mut archive = Archive::new(reader);
750 let archive_entries = match archive.entries() {
751 Ok(entries) => entries,
752 Err(e) => {
753 warn!(
754 "Failed to read {} sdist archive {:?}: {}",
755 archive_type, path, e
756 );
757 return default_package_data();
758 }
759 };
760
761 let mut total_extracted = 0u64;
762 let mut entries = Vec::new();
763
764 for entry_result in archive_entries {
765 let mut entry = match entry_result {
766 Ok(entry) => entry,
767 Err(e) => {
768 warn!(
769 "Failed to read {} sdist entry from {:?}: {}",
770 archive_type, path, e
771 );
772 continue;
773 }
774 };
775
776 let entry_size = entry.size();
777 if entry_size > MAX_FILE_SIZE {
778 warn!(
779 "File too large in {} sdist {:?}: {} bytes (limit: {} bytes)",
780 archive_type, path, entry_size, MAX_FILE_SIZE
781 );
782 continue;
783 }
784
785 total_extracted += entry_size;
786 if total_extracted > MAX_ARCHIVE_SIZE {
787 warn!(
788 "Total extracted size exceeds limit for {} sdist {:?}",
789 archive_type, path
790 );
791 return default_package_data();
792 }
793
794 if compressed_size > 0 {
795 let ratio = total_extracted as f64 / compressed_size as f64;
796 if ratio > MAX_COMPRESSION_RATIO {
797 warn!(
798 "Suspicious compression ratio in {} sdist {:?}: {:.2}:1",
799 archive_type, path, ratio
800 );
801 return default_package_data();
802 }
803 }
804
805 let entry_path = match entry.path() {
806 Ok(path) => path.to_string_lossy().replace('\\', "/"),
807 Err(e) => {
808 warn!(
809 "Failed to get {} sdist entry path from {:?}: {}",
810 archive_type, path, e
811 );
812 continue;
813 }
814 };
815
816 let Some(entry_path) = normalize_archive_entry_path(&entry_path) else {
817 warn!("Skipping unsafe {} sdist path in {:?}", archive_type, path);
818 continue;
819 };
820
821 if !is_relevant_sdist_text_entry(&entry_path) {
822 continue;
823 }
824
825 if let Ok(content) = read_limited_utf8(
826 &mut entry,
827 MAX_FILE_SIZE,
828 &format!("{} entry {}", archive_type, entry_path),
829 ) {
830 entries.push((entry_path, content));
831 }
832 }
833
834 build_sdist_package_data(path, entries)
835}
836
837fn extract_from_zip_sdist_archive(path: &Path) -> PackageData {
838 let file = match File::open(path) {
839 Ok(file) => file,
840 Err(e) => {
841 warn!("Failed to open zip sdist archive {:?}: {}", path, e);
842 return default_package_data();
843 }
844 };
845
846 let mut archive = match ZipArchive::new(file) {
847 Ok(archive) => archive,
848 Err(e) => {
849 warn!("Failed to read zip sdist archive {:?}: {}", path, e);
850 return default_package_data();
851 }
852 };
853
854 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "sdist zip") {
855 Ok(entries) => entries,
856 Err(_) => return default_package_data(),
857 };
858
859 let mut entries = Vec::new();
860 for entry in validated_entries.iter() {
861 if !is_relevant_sdist_text_entry(&entry.name) {
862 continue;
863 }
864
865 if let Ok(content) = read_validated_zip_entry(&mut archive, entry, path, "sdist zip") {
866 entries.push((entry.name.clone(), content));
867 }
868 }
869
870 build_sdist_package_data(path, entries)
871}
872
873fn is_relevant_sdist_text_entry(entry_path: &str) -> bool {
874 entry_path.ends_with("/PKG-INFO")
875 || entry_path.ends_with("/requires.txt")
876 || entry_path.ends_with("/SOURCES.txt")
877}
878
879fn build_sdist_package_data(path: &Path, entries: Vec<(String, String)>) -> PackageData {
880 let Some((metadata_path, metadata_content)) = select_sdist_pkginfo_entry(path, &entries) else {
881 warn!("No PKG-INFO file found in sdist archive {:?}", path);
882 return default_package_data();
883 };
884
885 let mut package_data =
886 python_parse_rfc822_content(&metadata_content, DatasourceId::PypiSdistPkginfo);
887 merge_sdist_archive_dependencies(&entries, &metadata_path, &mut package_data);
888 merge_sdist_archive_file_references(&entries, &metadata_path, &mut package_data);
889 apply_sdist_name_version_fallback(path, &mut package_data);
890 package_data
891}
892
893fn select_sdist_pkginfo_entry(
894 archive_path: &Path,
895 entries: &[(String, String)],
896) -> Option<(String, String)> {
897 let expected_name = archive_path
898 .file_name()
899 .and_then(|name| name.to_str())
900 .and_then(strip_python_archive_extension)
901 .and_then(|stem| {
902 stem.rsplit_once('-')
903 .map(|(name, _)| normalize_python_package_name(name))
904 });
905
906 entries
907 .iter()
908 .filter(|(entry_path, _)| entry_path.ends_with("/PKG-INFO"))
909 .min_by_key(|(entry_path, content)| {
910 let components: Vec<_> = entry_path
911 .split('/')
912 .filter(|part| !part.is_empty())
913 .collect();
914 let metadata = super::rfc822::parse_rfc822_content(content);
915 let candidate_name = super::rfc822::get_header_first(&metadata.headers, "name")
916 .map(|name| normalize_python_package_name(&name));
917 let name_rank = if candidate_name == expected_name {
918 0
919 } else {
920 1
921 };
922 let kind_rank = if components.len() == 3
923 && components[1].ends_with(".egg-info")
924 && components[2] == "PKG-INFO"
925 {
926 0
927 } else if components.len() == 2 && components[1] == "PKG-INFO" {
928 1
929 } else if entry_path.ends_with(".egg-info/PKG-INFO") {
930 2
931 } else {
932 3
933 };
934
935 (name_rank, kind_rank, components.len(), entry_path.clone())
936 })
937 .map(|(entry_path, content)| (entry_path.clone(), content.clone()))
938}
939
940fn merge_sdist_archive_dependencies(
941 entries: &[(String, String)],
942 metadata_path: &str,
943 package_data: &mut PackageData,
944) {
945 let metadata_dir = metadata_path
946 .rsplit_once('/')
947 .map(|(dir, _)| dir)
948 .unwrap_or("");
949 let archive_root = metadata_path.split('/').next().unwrap_or("");
950 let matched_egg_info_dir =
951 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
952 let mut extra_dependencies = Vec::new();
953
954 for (entry_path, content) in entries {
955 let is_direct_requires =
956 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/requires.txt");
957 let is_egg_info_requires = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
958 entry_path == &format!("{archive_root}/{egg_info_dir}/requires.txt")
959 });
960
961 if is_direct_requires || is_egg_info_requires {
962 extra_dependencies.extend(parse_requires_txt(content));
963 }
964 }
965
966 for dependency in extra_dependencies {
967 if !package_data.dependencies.iter().any(|existing| {
968 existing.purl == dependency.purl
969 && existing.scope == dependency.scope
970 && existing.extracted_requirement == dependency.extracted_requirement
971 && existing.extra_data == dependency.extra_data
972 }) {
973 package_data.dependencies.push(dependency);
974 }
975 }
976}
977
978fn merge_sdist_archive_file_references(
979 entries: &[(String, String)],
980 metadata_path: &str,
981 package_data: &mut PackageData,
982) {
983 let metadata_dir = metadata_path
984 .rsplit_once('/')
985 .map(|(dir, _)| dir)
986 .unwrap_or("");
987 let archive_root = metadata_path.split('/').next().unwrap_or("");
988 let matched_egg_info_dir =
989 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
990 let mut extra_refs = Vec::new();
991
992 for (entry_path, content) in entries {
993 let is_direct_sources =
994 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/SOURCES.txt");
995 let is_egg_info_sources = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
996 entry_path == &format!("{archive_root}/{egg_info_dir}/SOURCES.txt")
997 });
998
999 if is_direct_sources || is_egg_info_sources {
1000 extra_refs.extend(parse_sources_txt(content));
1001 }
1002 }
1003
1004 for file_ref in extra_refs {
1005 if !package_data
1006 .file_references
1007 .iter()
1008 .any(|existing| existing.path == file_ref.path)
1009 {
1010 package_data.file_references.push(file_ref);
1011 }
1012 }
1013}
1014
1015fn select_matching_sdist_egg_info_dir(
1016 entries: &[(String, String)],
1017 archive_root: &str,
1018 package_name: Option<&str>,
1019) -> Option<String> {
1020 let normalized_package_name = package_name.map(normalize_python_package_name);
1021
1022 entries
1023 .iter()
1024 .filter_map(|(entry_path, _)| {
1025 let components: Vec<_> = entry_path
1026 .split('/')
1027 .filter(|part| !part.is_empty())
1028 .collect();
1029 if components.len() == 3
1030 && components[0] == archive_root
1031 && components[1].ends_with(".egg-info")
1032 {
1033 Some(components[1].to_string())
1034 } else {
1035 None
1036 }
1037 })
1038 .min_by_key(|egg_info_dir| {
1039 let normalized_dir_name =
1040 normalize_python_package_name(egg_info_dir.trim_end_matches(".egg-info"));
1041 let name_rank = if Some(normalized_dir_name.clone()) == normalized_package_name {
1042 0
1043 } else {
1044 1
1045 };
1046
1047 (name_rank, egg_info_dir.clone())
1048 })
1049}
1050
1051fn normalize_python_package_name(name: &str) -> String {
1052 name.to_ascii_lowercase().replace('_', "-")
1053}
1054
1055fn apply_sdist_name_version_fallback(path: &Path, package_data: &mut PackageData) {
1056 let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
1057 return;
1058 };
1059
1060 let Some(stem) = strip_python_archive_extension(file_name) else {
1061 return;
1062 };
1063
1064 let Some((name, version)) = stem.rsplit_once('-') else {
1065 return;
1066 };
1067
1068 if package_data.name.is_none() {
1069 package_data.name = Some(name.replace('_', "-"));
1070 }
1071 if package_data.version.is_none() {
1072 package_data.version = Some(version.to_string());
1073 }
1074
1075 if package_data.purl.is_none()
1076 || package_data.repository_homepage_url.is_none()
1077 || package_data.repository_download_url.is_none()
1078 || package_data.api_data_url.is_none()
1079 {
1080 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1081 build_pypi_urls(
1082 package_data.name.as_deref(),
1083 package_data.version.as_deref(),
1084 );
1085
1086 if package_data.repository_homepage_url.is_none() {
1087 package_data.repository_homepage_url = repository_homepage_url;
1088 }
1089 if package_data.repository_download_url.is_none() {
1090 package_data.repository_download_url = repository_download_url;
1091 }
1092 if package_data.api_data_url.is_none() {
1093 package_data.api_data_url = api_data_url;
1094 }
1095 if package_data.purl.is_none() {
1096 package_data.purl = purl;
1097 }
1098 }
1099}
1100
1101fn extract_from_wheel_archive(path: &Path) -> PackageData {
1102 let metadata = match std::fs::metadata(path) {
1103 Ok(m) => m,
1104 Err(e) => {
1105 warn!(
1106 "Failed to read metadata for wheel archive {:?}: {}",
1107 path, e
1108 );
1109 return default_package_data();
1110 }
1111 };
1112
1113 if metadata.len() > MAX_ARCHIVE_SIZE {
1114 warn!(
1115 "Wheel archive too large: {} bytes (limit: {} bytes)",
1116 metadata.len(),
1117 MAX_ARCHIVE_SIZE
1118 );
1119 return default_package_data();
1120 }
1121
1122 let file = match File::open(path) {
1123 Ok(f) => f,
1124 Err(e) => {
1125 warn!("Failed to open wheel archive {:?}: {}", path, e);
1126 return default_package_data();
1127 }
1128 };
1129
1130 let mut archive = match ZipArchive::new(file) {
1131 Ok(a) => a,
1132 Err(e) => {
1133 warn!("Failed to read wheel archive {:?}: {}", path, e);
1134 return default_package_data();
1135 }
1136 };
1137
1138 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "wheel") {
1139 Ok(entries) => entries,
1140 Err(_) => return default_package_data(),
1141 };
1142
1143 let metadata_entry =
1144 match find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/METADATA") {
1145 Some(entry) => entry,
1146 None => {
1147 warn!("No METADATA file found in wheel archive {:?}", path);
1148 return default_package_data();
1149 }
1150 };
1151
1152 let content = match read_validated_zip_entry(&mut archive, metadata_entry, path, "wheel") {
1153 Ok(c) => c,
1154 Err(e) => {
1155 warn!("Failed to read METADATA from {:?}: {}", path, e);
1156 return default_package_data();
1157 }
1158 };
1159
1160 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiWheel);
1161
1162 let (size, sha256) = calculate_file_checksums(path);
1163 package_data.size = size;
1164 package_data.sha256 = sha256;
1165
1166 if let Some(record_entry) =
1167 find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/RECORD")
1168 && let Ok(record_content) =
1169 read_validated_zip_entry(&mut archive, record_entry, path, "wheel")
1170 {
1171 package_data.file_references = parse_record_csv(&record_content);
1172 }
1173
1174 if let Some(wheel_info) = parse_wheel_filename(path) {
1175 if package_data.name.is_none() {
1176 package_data.name = Some(wheel_info.name.clone());
1177 }
1178 if package_data.version.is_none() {
1179 package_data.version = Some(wheel_info.version.clone());
1180 }
1181
1182 package_data.purl = build_wheel_purl(
1183 package_data.name.as_deref(),
1184 package_data.version.as_deref(),
1185 &wheel_info,
1186 );
1187
1188 let mut extra_data = package_data.extra_data.unwrap_or_default();
1189 extra_data.insert(
1190 "python_requires".to_string(),
1191 serde_json::Value::String(wheel_info.python_tag.clone()),
1192 );
1193 extra_data.insert(
1194 "abi_tag".to_string(),
1195 serde_json::Value::String(wheel_info.abi_tag.clone()),
1196 );
1197 extra_data.insert(
1198 "platform_tag".to_string(),
1199 serde_json::Value::String(wheel_info.platform_tag.clone()),
1200 );
1201 package_data.extra_data = Some(extra_data);
1202 }
1203
1204 package_data
1205}
1206
1207fn extract_from_egg_archive(path: &Path) -> PackageData {
1208 let metadata = match std::fs::metadata(path) {
1209 Ok(m) => m,
1210 Err(e) => {
1211 warn!("Failed to read metadata for egg archive {:?}: {}", path, e);
1212 return default_package_data();
1213 }
1214 };
1215
1216 if metadata.len() > MAX_ARCHIVE_SIZE {
1217 warn!(
1218 "Egg archive too large: {} bytes (limit: {} bytes)",
1219 metadata.len(),
1220 MAX_ARCHIVE_SIZE
1221 );
1222 return default_package_data();
1223 }
1224
1225 let file = match File::open(path) {
1226 Ok(f) => f,
1227 Err(e) => {
1228 warn!("Failed to open egg archive {:?}: {}", path, e);
1229 return default_package_data();
1230 }
1231 };
1232
1233 let mut archive = match ZipArchive::new(file) {
1234 Ok(a) => a,
1235 Err(e) => {
1236 warn!("Failed to read egg archive {:?}: {}", path, e);
1237 return default_package_data();
1238 }
1239 };
1240
1241 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "egg") {
1242 Ok(entries) => entries,
1243 Err(_) => return default_package_data(),
1244 };
1245
1246 let pkginfo_entry = match find_validated_zip_entry_by_any_suffix(
1247 &validated_entries,
1248 &["EGG-INFO/PKG-INFO", ".egg-info/PKG-INFO"],
1249 ) {
1250 Some(entry) => entry,
1251 None => {
1252 warn!("No PKG-INFO file found in egg archive {:?}", path);
1253 return default_package_data();
1254 }
1255 };
1256
1257 let content = match read_validated_zip_entry(&mut archive, pkginfo_entry, path, "egg") {
1258 Ok(c) => c,
1259 Err(e) => {
1260 warn!("Failed to read PKG-INFO from {:?}: {}", path, e);
1261 return default_package_data();
1262 }
1263 };
1264
1265 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiEgg);
1266
1267 let (size, sha256) = calculate_file_checksums(path);
1268 package_data.size = size;
1269 package_data.sha256 = sha256;
1270
1271 if let Some(installed_files_entry) = find_validated_zip_entry_by_any_suffix(
1272 &validated_entries,
1273 &[
1274 "EGG-INFO/installed-files.txt",
1275 ".egg-info/installed-files.txt",
1276 ],
1277 ) && let Ok(installed_files_content) =
1278 read_validated_zip_entry(&mut archive, installed_files_entry, path, "egg")
1279 {
1280 package_data.file_references = parse_installed_files_txt(&installed_files_content);
1281 }
1282
1283 if let Some(egg_info) = parse_egg_filename(path) {
1284 if package_data.name.is_none() {
1285 package_data.name = Some(egg_info.name.clone());
1286 }
1287 if package_data.version.is_none() {
1288 package_data.version = Some(egg_info.version.clone());
1289 }
1290
1291 if let Some(python_version) = &egg_info.python_version {
1292 let mut extra_data = package_data.extra_data.unwrap_or_default();
1293 extra_data.insert(
1294 "python_version".to_string(),
1295 serde_json::Value::String(python_version.clone()),
1296 );
1297 package_data.extra_data = Some(extra_data);
1298 }
1299 }
1300
1301 package_data.purl = build_egg_purl(
1302 package_data.name.as_deref(),
1303 package_data.version.as_deref(),
1304 );
1305
1306 package_data
1307}
1308
1309fn find_validated_zip_entry_by_suffix<'a>(
1310 entries: &'a [ValidatedZipEntry],
1311 suffix: &str,
1312) -> Option<&'a ValidatedZipEntry> {
1313 entries.iter().find(|entry| entry.name.ends_with(suffix))
1314}
1315
1316fn find_validated_zip_entry_by_any_suffix<'a>(
1317 entries: &'a [ValidatedZipEntry],
1318 suffixes: &[&str],
1319) -> Option<&'a ValidatedZipEntry> {
1320 entries
1321 .iter()
1322 .find(|entry| suffixes.iter().any(|suffix| entry.name.ends_with(suffix)))
1323}
1324
1325fn read_validated_zip_entry<R: Read + std::io::Seek>(
1326 archive: &mut ZipArchive<R>,
1327 entry: &ValidatedZipEntry,
1328 path: &Path,
1329 archive_type: &str,
1330) -> Result<String, String> {
1331 let mut file = archive
1332 .by_index(entry.index)
1333 .map_err(|e| format!("Failed to find entry {}: {}", entry.name, e))?;
1334
1335 let compressed_size = file.compressed_size();
1336 let uncompressed_size = file.size();
1337
1338 if compressed_size > 0 {
1339 let ratio = uncompressed_size as f64 / compressed_size as f64;
1340 if ratio > MAX_COMPRESSION_RATIO {
1341 return Err(format!(
1342 "Rejected suspicious compression ratio in {} {:?}: {:.2}:1",
1343 archive_type, path, ratio
1344 ));
1345 }
1346 }
1347
1348 if uncompressed_size > MAX_FILE_SIZE {
1349 return Err(format!(
1350 "Rejected oversized entry in {} {:?}: {} bytes",
1351 archive_type, path, uncompressed_size
1352 ));
1353 }
1354
1355 read_limited_utf8(
1356 &mut file,
1357 MAX_FILE_SIZE,
1358 &format!("{} entry {}", archive_type, entry.name),
1359 )
1360}
1361
1362fn read_limited_utf8<R: Read>(
1363 reader: &mut R,
1364 max_bytes: u64,
1365 context: &str,
1366) -> Result<String, String> {
1367 let mut limited = reader.take(max_bytes + 1);
1368 let mut bytes = Vec::new();
1369 limited
1370 .read_to_end(&mut bytes)
1371 .map_err(|e| format!("Failed to read {}: {}", context, e))?;
1372
1373 if bytes.len() as u64 > max_bytes {
1374 return Err(format!(
1375 "{} exceeded {} byte limit while reading",
1376 context, max_bytes
1377 ));
1378 }
1379
1380 String::from_utf8(bytes).map_err(|e| format!("{} is not valid UTF-8: {}", context, e))
1381}
1382
1383fn normalize_archive_entry_path(entry_path: &str) -> Option<String> {
1384 let normalized = entry_path.replace('\\', "/");
1385 if normalized.len() >= 3 {
1386 let bytes = normalized.as_bytes();
1387 if bytes[1] == b':' && bytes[2] == b'/' && bytes[0].is_ascii_alphabetic() {
1388 return None;
1389 }
1390 }
1391 let path = Path::new(&normalized);
1392 let mut components = Vec::new();
1393
1394 for component in path.components() {
1395 match component {
1396 Component::Normal(segment) => components.push(segment.to_string_lossy().to_string()),
1397 Component::CurDir => {}
1398 Component::RootDir | Component::ParentDir | Component::Prefix(_) => return None,
1399 }
1400 }
1401
1402 (!components.is_empty()).then_some(components.join("/"))
1403}
1404
1405pub fn parse_record_csv(content: &str) -> Vec<FileReference> {
1410 let mut reader = ReaderBuilder::new()
1411 .has_headers(false)
1412 .from_reader(content.as_bytes());
1413
1414 let mut file_references = Vec::new();
1415
1416 for result in reader.records() {
1417 match result {
1418 Ok(record) => {
1419 if record.len() < 3 {
1420 continue;
1421 }
1422
1423 let path = record.get(0).unwrap_or("").trim().to_string();
1424 if path.is_empty() {
1425 continue;
1426 }
1427
1428 let hash_field = record.get(1).unwrap_or("").trim();
1429 let size_field = record.get(2).unwrap_or("").trim();
1430
1431 let sha256 = if !hash_field.is_empty() && hash_field.contains('=') {
1433 let parts: Vec<&str> = hash_field.split('=').collect();
1434 if parts.len() == 2 && parts[0] == "sha256" {
1435 match URL_SAFE_NO_PAD.decode(parts[1]) {
1437 Ok(decoded) => {
1438 let hex = decoded
1439 .iter()
1440 .map(|b| format!("{:02x}", b))
1441 .collect::<String>();
1442 Some(hex)
1443 }
1444 Err(_) => None,
1445 }
1446 } else {
1447 None
1448 }
1449 } else {
1450 None
1451 };
1452
1453 let size = if !size_field.is_empty() && size_field != "-" {
1455 size_field.parse::<u64>().ok()
1456 } else {
1457 None
1458 };
1459
1460 file_references.push(FileReference {
1461 path,
1462 size,
1463 sha1: None,
1464 md5: None,
1465 sha256,
1466 sha512: None,
1467 extra_data: None,
1468 });
1469 }
1470 Err(e) => {
1471 warn!("Failed to parse RECORD CSV row: {}", e);
1472 continue;
1473 }
1474 }
1475 }
1476
1477 file_references
1478}
1479
1480pub fn parse_installed_files_txt(content: &str) -> Vec<FileReference> {
1483 content
1484 .lines()
1485 .map(|line| line.trim())
1486 .filter(|line| !line.is_empty())
1487 .map(|path| FileReference {
1488 path: path.to_string(),
1489 size: None,
1490 sha1: None,
1491 md5: None,
1492 sha256: None,
1493 sha512: None,
1494 extra_data: None,
1495 })
1496 .collect()
1497}
1498
1499pub fn parse_sources_txt(content: &str) -> Vec<FileReference> {
1500 content
1501 .lines()
1502 .map(str::trim)
1503 .filter(|line| !line.is_empty())
1504 .map(|path| FileReference {
1505 path: path.to_string(),
1506 size: None,
1507 sha1: None,
1508 md5: None,
1509 sha256: None,
1510 sha512: None,
1511 extra_data: None,
1512 })
1513 .collect()
1514}
1515
1516struct WheelInfo {
1517 name: String,
1518 version: String,
1519 python_tag: String,
1520 abi_tag: String,
1521 platform_tag: String,
1522}
1523
1524fn parse_wheel_filename(path: &Path) -> Option<WheelInfo> {
1525 let stem = path.file_stem()?.to_string_lossy();
1526 let parts: Vec<&str> = stem.split('-').collect();
1527
1528 if parts.len() >= 5 {
1529 Some(WheelInfo {
1530 name: parts[0].replace('_', "-"),
1531 version: parts[1].to_string(),
1532 python_tag: parts[2].to_string(),
1533 abi_tag: parts[3].to_string(),
1534 platform_tag: parts[4..].join("-"),
1535 })
1536 } else {
1537 None
1538 }
1539}
1540
1541struct EggInfo {
1542 name: String,
1543 version: String,
1544 python_version: Option<String>,
1545}
1546
1547fn parse_egg_filename(path: &Path) -> Option<EggInfo> {
1548 let stem = path.file_stem()?.to_string_lossy();
1549 let parts: Vec<&str> = stem.split('-').collect();
1550
1551 if parts.len() >= 2 {
1552 Some(EggInfo {
1553 name: parts[0].replace('_', "-"),
1554 version: parts[1].to_string(),
1555 python_version: parts.get(2).map(|s| s.to_string()),
1556 })
1557 } else {
1558 None
1559 }
1560}
1561
1562fn build_wheel_purl(
1563 name: Option<&str>,
1564 version: Option<&str>,
1565 wheel_info: &WheelInfo,
1566) -> Option<String> {
1567 let name = name?;
1568 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1569
1570 if let Some(ver) = version {
1571 package_url.with_version(ver).ok()?;
1572 }
1573
1574 let extension = format!(
1575 "{}-{}-{}",
1576 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1577 );
1578 package_url.add_qualifier("extension", extension).ok()?;
1579
1580 Some(package_url.to_string())
1581}
1582
1583fn build_egg_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1584 let name = name?;
1585 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1586
1587 if let Some(ver) = version {
1588 package_url.with_version(ver).ok()?;
1589 }
1590
1591 package_url.add_qualifier("type", "egg").ok()?;
1592
1593 Some(package_url.to_string())
1594}
1595
1596fn python_parse_rfc822_content(content: &str, datasource_id: DatasourceId) -> PackageData {
1597 let metadata = super::rfc822::parse_rfc822_content(content);
1598 build_package_data_from_rfc822(&metadata, datasource_id)
1599}
1600
1601fn build_package_data_from_rfc822(
1606 metadata: &super::rfc822::Rfc822Metadata,
1607 datasource_id: DatasourceId,
1608) -> PackageData {
1609 use super::rfc822::{get_header_all, get_header_first};
1610
1611 let name = get_header_first(&metadata.headers, "name");
1612 let version = get_header_first(&metadata.headers, "version");
1613 let summary = get_header_first(&metadata.headers, "summary");
1614 let mut homepage_url = get_header_first(&metadata.headers, "home-page");
1615 let author = get_header_first(&metadata.headers, "author");
1616 let author_email = get_header_first(&metadata.headers, "author-email");
1617 let license = get_header_first(&metadata.headers, "license");
1618 let license_expression = get_header_first(&metadata.headers, "license-expression");
1619 let download_url = get_header_first(&metadata.headers, "download-url");
1620 let platform = get_header_first(&metadata.headers, "platform");
1621 let requires_python = get_header_first(&metadata.headers, "requires-python");
1622 let classifiers = get_header_all(&metadata.headers, "classifier");
1623 let license_files = get_header_all(&metadata.headers, "license-file");
1624
1625 let description_body = if metadata.body.is_empty() {
1626 get_header_first(&metadata.headers, "description").unwrap_or_default()
1627 } else {
1628 metadata.body.clone()
1629 };
1630
1631 let description = build_description(summary.as_deref(), &description_body);
1632
1633 let mut parties = Vec::new();
1634 if author.is_some() || author_email.is_some() {
1635 parties.push(Party {
1636 r#type: Some("person".to_string()),
1637 role: Some("author".to_string()),
1638 name: author,
1639 email: author_email,
1640 url: None,
1641 organization: None,
1642 organization_url: None,
1643 timezone: None,
1644 });
1645 }
1646
1647 let (keywords, license_classifiers) = split_classifiers(&classifiers);
1648 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1649 normalize_spdx_declared_license(license_expression.as_deref());
1650
1651 let extracted_license_statement = license_expression
1652 .clone()
1653 .or_else(|| build_extracted_license_statement(license.as_deref(), &license_classifiers));
1654
1655 let mut extra_data = HashMap::new();
1656 if let Some(platform_value) = platform
1657 && !platform_value.eq_ignore_ascii_case("unknown")
1658 && !platform_value.is_empty()
1659 {
1660 extra_data.insert(
1661 "platform".to_string(),
1662 serde_json::Value::String(platform_value),
1663 );
1664 }
1665
1666 if let Some(requires_python_value) = requires_python
1667 && !requires_python_value.is_empty()
1668 {
1669 extra_data.insert(
1670 "requires_python".to_string(),
1671 serde_json::Value::String(requires_python_value),
1672 );
1673 }
1674
1675 if !license_files.is_empty() {
1676 extra_data.insert(
1677 "license_files".to_string(),
1678 serde_json::Value::Array(
1679 license_files
1680 .iter()
1681 .cloned()
1682 .map(serde_json::Value::String)
1683 .collect(),
1684 ),
1685 );
1686 }
1687
1688 let file_references = license_files
1689 .iter()
1690 .map(|path| FileReference {
1691 path: path.clone(),
1692 size: None,
1693 sha1: None,
1694 md5: None,
1695 sha256: None,
1696 sha512: None,
1697 extra_data: None,
1698 })
1699 .collect();
1700
1701 let project_urls = get_header_all(&metadata.headers, "project-url");
1702 let dependencies = extract_rfc822_dependencies(&metadata.headers);
1703 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
1704
1705 if !project_urls.is_empty() {
1706 let parsed_urls = parse_project_urls(&project_urls);
1707
1708 for (label, url) in &parsed_urls {
1709 let label_lower = label.to_lowercase();
1710
1711 if bug_tracking_url.is_none()
1712 && matches!(
1713 label_lower.as_str(),
1714 "tracker"
1715 | "bug reports"
1716 | "bug tracker"
1717 | "issues"
1718 | "issue tracker"
1719 | "github: issues"
1720 )
1721 {
1722 bug_tracking_url = Some(url.clone());
1723 } else if code_view_url.is_none()
1724 && matches!(label_lower.as_str(), "source" | "source code" | "code")
1725 {
1726 code_view_url = Some(url.clone());
1727 } else if vcs_url.is_none()
1728 && matches!(
1729 label_lower.as_str(),
1730 "github" | "gitlab" | "github: repo" | "repository"
1731 )
1732 {
1733 vcs_url = Some(url.clone());
1734 } else if homepage_url.is_none()
1735 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
1736 {
1737 homepage_url = Some(url.clone());
1738 } else if label_lower == "changelog" {
1739 extra_data.insert(
1740 "changelog_url".to_string(),
1741 serde_json::Value::String(url.clone()),
1742 );
1743 }
1744 }
1745
1746 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
1747 .iter()
1748 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
1749 .collect();
1750
1751 if !project_urls_json.is_empty() {
1752 extra_data.insert(
1753 "project_urls".to_string(),
1754 serde_json::Value::Object(project_urls_json),
1755 );
1756 }
1757 }
1758
1759 let extra_data = if extra_data.is_empty() {
1760 None
1761 } else {
1762 Some(extra_data)
1763 };
1764
1765 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1766 build_pypi_urls(name.as_deref(), version.as_deref());
1767
1768 PackageData {
1769 package_type: Some(PythonParser::PACKAGE_TYPE),
1770 namespace: None,
1771 name,
1772 version,
1773 qualifiers: None,
1774 subpath: None,
1775 primary_language: Some("Python".to_string()),
1776 description,
1777 release_date: None,
1778 parties,
1779 keywords,
1780 homepage_url,
1781 download_url,
1782 size: None,
1783 sha1: None,
1784 md5: None,
1785 sha256: None,
1786 sha512: None,
1787 bug_tracking_url,
1788 code_view_url,
1789 vcs_url,
1790 copyright: None,
1791 holder: None,
1792 declared_license_expression,
1793 declared_license_expression_spdx,
1794 license_detections,
1795 other_license_expression: None,
1796 other_license_expression_spdx: None,
1797 other_license_detections: Vec::new(),
1798 extracted_license_statement,
1799 notice_text: None,
1800 source_packages: Vec::new(),
1801 file_references,
1802 is_private: false,
1803 is_virtual: false,
1804 extra_data,
1805 dependencies,
1806 repository_homepage_url,
1807 repository_download_url,
1808 api_data_url,
1809 datasource_id: Some(datasource_id),
1810 purl,
1811 }
1812}
1813
1814fn parse_project_urls(project_urls: &[String]) -> Vec<(String, String)> {
1815 project_urls
1816 .iter()
1817 .filter_map(|url_entry| {
1818 if let Some((label, url)) = url_entry.split_once(", ") {
1819 let label_trimmed = label.trim();
1820 let url_trimmed = url.trim();
1821 if !label_trimmed.is_empty() && !url_trimmed.is_empty() {
1822 return Some((label_trimmed.to_string(), url_trimmed.to_string()));
1823 }
1824 }
1825 None
1826 })
1827 .collect()
1828}
1829
1830fn build_description(summary: Option<&str>, body: &str) -> Option<String> {
1831 let mut parts = Vec::new();
1832 if let Some(summary_value) = summary
1833 && !summary_value.trim().is_empty()
1834 {
1835 parts.push(summary_value.trim().to_string());
1836 }
1837
1838 if !body.trim().is_empty() {
1839 parts.push(body.trim().to_string());
1840 }
1841
1842 if parts.is_empty() {
1843 None
1844 } else {
1845 Some(parts.join("\n"))
1846 }
1847}
1848
1849fn split_classifiers(classifiers: &[String]) -> (Vec<String>, Vec<String>) {
1850 let mut keywords = Vec::new();
1851 let mut license_classifiers = Vec::new();
1852
1853 for classifier in classifiers {
1854 if classifier.starts_with("License ::") {
1855 license_classifiers.push(classifier.to_string());
1856 } else {
1857 keywords.push(classifier.to_string());
1858 }
1859 }
1860
1861 (keywords, license_classifiers)
1862}
1863
1864fn build_extracted_license_statement(
1865 license: Option<&str>,
1866 license_classifiers: &[String],
1867) -> Option<String> {
1868 let mut lines = Vec::new();
1869
1870 if let Some(value) = license
1871 && !value.trim().is_empty()
1872 {
1873 lines.push(format!("license: {}", value.trim()));
1874 }
1875
1876 if !license_classifiers.is_empty() {
1877 lines.push("classifiers:".to_string());
1878 for classifier in license_classifiers {
1879 lines.push(format!(" - '{}'", classifier));
1880 }
1881 }
1882
1883 if lines.is_empty() {
1884 None
1885 } else {
1886 Some(format!("{}\n", lines.join("\n")))
1887 }
1888}
1889
1890pub(crate) fn build_pypi_urls(
1891 name: Option<&str>,
1892 version: Option<&str>,
1893) -> (
1894 Option<String>,
1895 Option<String>,
1896 Option<String>,
1897 Option<String>,
1898) {
1899 let repository_homepage_url = name.map(|value| format!("https://pypi.org/project/{}", value));
1900
1901 let repository_download_url = name.and_then(|value| {
1902 version.map(|ver| {
1903 format!(
1904 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
1905 &value[..1.min(value.len())],
1906 value,
1907 value,
1908 ver
1909 )
1910 })
1911 });
1912
1913 let api_data_url = name.map(|value| {
1914 if let Some(ver) = version {
1915 format!("https://pypi.org/pypi/{}/{}/json", value, ver)
1916 } else {
1917 format!("https://pypi.org/pypi/{}/json", value)
1918 }
1919 });
1920
1921 let purl = name.and_then(|value| {
1922 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), value).ok()?;
1923 if let Some(ver) = version {
1924 package_url.with_version(ver).ok()?;
1925 }
1926 Some(package_url.to_string())
1927 });
1928
1929 (
1930 repository_homepage_url,
1931 repository_download_url,
1932 api_data_url,
1933 purl,
1934 )
1935}
1936
1937fn build_pypi_purl_with_extension(
1938 name: &str,
1939 version: Option<&str>,
1940 extension: &str,
1941) -> Option<String> {
1942 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1943 if let Some(ver) = version {
1944 package_url.with_version(ver).ok()?;
1945 }
1946 package_url.add_qualifier("extension", extension).ok()?;
1947 Some(package_url.to_string())
1948}
1949
1950fn extract_from_pyproject_toml(path: &Path) -> PackageData {
1951 let toml_content = match read_toml_file(path) {
1952 Ok(content) => content,
1953 Err(e) => {
1954 warn!(
1955 "Failed to read or parse pyproject.toml at {:?}: {}",
1956 path, e
1957 );
1958 return default_package_data();
1959 }
1960 };
1961
1962 let tool_table = toml_content.get("tool").and_then(|v| v.as_table());
1963
1964 let project_table =
1966 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
1967 project.clone()
1969 } else if let Some(tool) = tool_table {
1970 if let Some(poetry) = tool.get("poetry").and_then(|v| v.as_table()) {
1971 poetry.clone()
1973 } else {
1974 warn!(
1975 "No project or tool.poetry data found in pyproject.toml at {:?}",
1976 path
1977 );
1978 return default_package_data();
1979 }
1980 } else if toml_content.get(FIELD_NAME).is_some() {
1981 match toml_content.as_table() {
1983 Some(table) => table.clone(),
1984 None => {
1985 warn!("Failed to convert TOML content to table in {:?}", path);
1986 return default_package_data();
1987 }
1988 }
1989 } else {
1990 warn!("No project data found in pyproject.toml at {:?}", path);
1991 return default_package_data();
1992 };
1993
1994 let name = project_table
1995 .get(FIELD_NAME)
1996 .and_then(|v| v.as_str())
1997 .map(String::from);
1998
1999 let version = project_table
2000 .get(FIELD_VERSION)
2001 .and_then(|v| v.as_str())
2002 .map(String::from);
2003 let classifiers = project_table
2004 .get("classifiers")
2005 .and_then(|value| value.as_array())
2006 .map(|values| {
2007 values
2008 .iter()
2009 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2010 .collect::<Vec<_>>()
2011 })
2012 .unwrap_or_default();
2013
2014 let extracted_license_statement = extract_raw_license_string(&project_table);
2015 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2016 normalize_spdx_declared_license(extract_license_expression_candidate(&project_table));
2017
2018 let (homepage_url, repository_url) = extract_urls(&project_table);
2020
2021 let (dependencies, optional_dependencies) = extract_dependencies(&project_table, &toml_content);
2022 let extra_data = extract_pyproject_extra_data(&toml_content);
2023
2024 let purl = name.as_ref().and_then(|n| {
2026 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n) {
2027 Ok(p) => p,
2028 Err(e) => {
2029 warn!(
2030 "Failed to create PackageUrl for Python package '{}': {}",
2031 n, e
2032 );
2033 return None;
2034 }
2035 };
2036
2037 if let Some(v) = &version
2038 && let Err(e) = package_url.with_version(v)
2039 {
2040 warn!(
2041 "Failed to set version '{}' for Python package '{}': {}",
2042 v, n, e
2043 );
2044 return None;
2045 }
2046
2047 Some(package_url.to_string())
2048 });
2049
2050 let api_data_url = name.as_ref().map(|n| {
2051 if let Some(v) = &version {
2052 format!("https://pypi.org/pypi/{}/{}/json", n, v)
2053 } else {
2054 format!("https://pypi.org/pypi/{}/json", n)
2055 }
2056 });
2057
2058 let pypi_homepage_url = name
2059 .as_ref()
2060 .map(|n| format!("https://pypi.org/project/{}", n));
2061
2062 let pypi_download_url = name.as_ref().and_then(|n| {
2063 version.as_ref().map(|v| {
2064 format!(
2065 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
2066 &n[..1.min(n.len())],
2067 n,
2068 n,
2069 v
2070 )
2071 })
2072 });
2073
2074 PackageData {
2075 package_type: Some(PythonParser::PACKAGE_TYPE),
2076 namespace: None,
2077 name,
2078 version,
2079 qualifiers: None,
2080 subpath: None,
2081 primary_language: None,
2082 description: None,
2083 release_date: None,
2084 parties: extract_parties(&project_table),
2085 keywords: Vec::new(),
2086 homepage_url: homepage_url.or(pypi_homepage_url),
2087 download_url: repository_url.clone().or(pypi_download_url),
2088 size: None,
2089 sha1: None,
2090 md5: None,
2091 sha256: None,
2092 sha512: None,
2093 bug_tracking_url: None,
2094 code_view_url: None,
2095 vcs_url: repository_url,
2096 copyright: None,
2097 holder: None,
2098 declared_license_expression,
2099 declared_license_expression_spdx,
2100 license_detections,
2101 other_license_expression: None,
2102 other_license_expression_spdx: None,
2103 other_license_detections: Vec::new(),
2104 extracted_license_statement,
2105 notice_text: None,
2106 source_packages: Vec::new(),
2107 file_references: Vec::new(),
2108 is_private: has_private_classifier(&classifiers),
2109 is_virtual: false,
2110 extra_data,
2111 dependencies: [dependencies, optional_dependencies].concat(),
2112 repository_homepage_url: None,
2113 repository_download_url: None,
2114 api_data_url,
2115 datasource_id: Some(DatasourceId::PypiPyprojectToml),
2116 purl,
2117 }
2118}
2119
2120fn extract_raw_license_string(project: &TomlMap<String, TomlValue>) -> Option<String> {
2121 project
2122 .get(FIELD_LICENSE)
2123 .and_then(|license_value| match license_value {
2124 TomlValue::String(license_str) => Some(license_str.clone()),
2125 TomlValue::Table(license_table) => license_table
2126 .get("text")
2127 .and_then(|v| v.as_str())
2128 .map(|s| s.to_string())
2129 .or_else(|| {
2130 license_table
2131 .get("expression")
2132 .and_then(|v| v.as_str())
2133 .map(|expr| expr.to_string())
2134 }),
2135 _ => None,
2136 })
2137}
2138
2139fn extract_license_expression_candidate(project: &TomlMap<String, TomlValue>) -> Option<&str> {
2140 match project.get(FIELD_LICENSE) {
2141 Some(TomlValue::String(license_str)) => Some(license_str.as_str()),
2142 Some(TomlValue::Table(license_table)) => license_table
2143 .get("expression")
2144 .and_then(|value| value.as_str()),
2145 _ => None,
2146 }
2147}
2148
2149fn extract_urls(project: &TomlMap<String, TomlValue>) -> (Option<String>, Option<String>) {
2150 let mut homepage_url = None;
2151 let mut repository_url = None;
2152
2153 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
2155 homepage_url = urls
2156 .get(FIELD_HOMEPAGE)
2157 .and_then(|v| v.as_str())
2158 .map(String::from);
2159 repository_url = urls
2160 .get(FIELD_REPOSITORY)
2161 .and_then(|v| v.as_str())
2162 .map(String::from);
2163 }
2164
2165 if homepage_url.is_none() {
2167 homepage_url = project
2168 .get(FIELD_HOMEPAGE)
2169 .and_then(|v| v.as_str())
2170 .map(String::from);
2171 }
2172
2173 if repository_url.is_none() {
2174 repository_url = project
2175 .get(FIELD_REPOSITORY)
2176 .and_then(|v| v.as_str())
2177 .map(String::from);
2178 }
2179
2180 (homepage_url, repository_url)
2181}
2182
2183fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
2184 let mut parties = Vec::new();
2185
2186 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
2187 for author in authors {
2188 if let Some(author_str) = author.as_str() {
2189 let (name, email) = split_name_email(author_str);
2190 parties.push(Party {
2191 r#type: None,
2192 role: Some("author".to_string()),
2193 name,
2194 email,
2195 url: None,
2196 organization: None,
2197 organization_url: None,
2198 timezone: None,
2199 });
2200 }
2201 }
2202 }
2203
2204 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
2205 for maintainer in maintainers {
2206 if let Some(maintainer_str) = maintainer.as_str() {
2207 let (name, email) = split_name_email(maintainer_str);
2208 parties.push(Party {
2209 r#type: None,
2210 role: Some("maintainer".to_string()),
2211 name,
2212 email,
2213 url: None,
2214 organization: None,
2215 organization_url: None,
2216 timezone: None,
2217 });
2218 }
2219 }
2220 }
2221
2222 parties
2223}
2224
2225fn extract_dependencies(
2226 project: &TomlMap<String, TomlValue>,
2227 toml_content: &TomlValue,
2228) -> (Vec<Dependency>, Vec<Dependency>) {
2229 let mut dependencies = Vec::new();
2230 let mut optional_dependencies = Vec::new();
2231
2232 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
2234 match deps_value {
2235 TomlValue::Array(arr) => {
2236 dependencies = parse_dependency_array(arr, false, None);
2237 }
2238 TomlValue::Table(table) => {
2239 dependencies = parse_dependency_table(table, false, None);
2240 }
2241 _ => {}
2242 }
2243 }
2244
2245 if let Some(opt_deps_table) = project
2247 .get(FIELD_OPTIONAL_DEPENDENCIES)
2248 .and_then(|v| v.as_table())
2249 {
2250 for (extra_name, deps) in opt_deps_table {
2251 match deps {
2252 TomlValue::Array(arr) => {
2253 optional_dependencies.extend(parse_dependency_array(
2254 arr,
2255 true,
2256 Some(extra_name),
2257 ));
2258 }
2259 TomlValue::Table(table) => {
2260 optional_dependencies.extend(parse_dependency_table(
2261 table,
2262 true,
2263 Some(extra_name),
2264 ));
2265 }
2266 _ => {}
2267 }
2268 }
2269 }
2270
2271 if let Some(dev_deps_value) = project.get(FIELD_DEV_DEPENDENCIES) {
2273 match dev_deps_value {
2274 TomlValue::Array(arr) => {
2275 optional_dependencies.extend(parse_dependency_array(
2276 arr,
2277 true,
2278 Some(FIELD_DEV_DEPENDENCIES),
2279 ));
2280 }
2281 TomlValue::Table(table) => {
2282 optional_dependencies.extend(parse_dependency_table(
2283 table,
2284 true,
2285 Some(FIELD_DEV_DEPENDENCIES),
2286 ));
2287 }
2288 _ => {}
2289 }
2290 }
2291
2292 if let Some(groups_table) = project.get("group").and_then(|v| v.as_table()) {
2294 for (group_name, group_data) in groups_table {
2295 if let Some(group_deps) = group_data.as_table().and_then(|t| t.get("dependencies")) {
2296 match group_deps {
2297 TomlValue::Array(arr) => {
2298 optional_dependencies.extend(parse_dependency_array(
2299 arr,
2300 true,
2301 Some(group_name),
2302 ));
2303 }
2304 TomlValue::Table(table) => {
2305 optional_dependencies.extend(parse_dependency_table(
2306 table,
2307 true,
2308 Some(group_name),
2309 ));
2310 }
2311 _ => {}
2312 }
2313 }
2314 }
2315 }
2316
2317 if let Some(groups_table) = toml_content
2318 .get(FIELD_DEPENDENCY_GROUPS)
2319 .and_then(|value| value.as_table())
2320 {
2321 for (group_name, deps) in groups_table {
2322 match deps {
2323 TomlValue::Array(arr) => {
2324 optional_dependencies.extend(parse_dependency_array(
2325 arr,
2326 true,
2327 Some(group_name),
2328 ));
2329 }
2330 TomlValue::Table(table) => {
2331 optional_dependencies.extend(parse_dependency_table(
2332 table,
2333 true,
2334 Some(group_name),
2335 ));
2336 }
2337 _ => {}
2338 }
2339 }
2340 }
2341
2342 if let Some(dev_deps_value) = toml_content
2343 .get("tool")
2344 .and_then(|value| value.as_table())
2345 .and_then(|tool| tool.get("uv"))
2346 .and_then(|value| value.as_table())
2347 .and_then(|uv| uv.get(FIELD_DEV_DEPENDENCIES))
2348 {
2349 match dev_deps_value {
2350 TomlValue::Array(arr) => {
2351 optional_dependencies.extend(parse_dependency_array(arr, true, Some("dev")));
2352 }
2353 TomlValue::Table(table) => {
2354 optional_dependencies.extend(parse_dependency_table(table, true, Some("dev")));
2355 }
2356 _ => {}
2357 }
2358 }
2359
2360 (dependencies, optional_dependencies)
2361}
2362
2363fn extract_pyproject_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
2364 let mut extra_data = HashMap::new();
2365
2366 if let Some(tool_uv) = toml_content
2367 .get("tool")
2368 .and_then(|value| value.as_table())
2369 .and_then(|tool| tool.get("uv"))
2370 {
2371 extra_data.insert("tool_uv".to_string(), toml_value_to_json(tool_uv));
2372 }
2373
2374 if extra_data.is_empty() {
2375 None
2376 } else {
2377 Some(extra_data)
2378 }
2379}
2380
2381fn toml_value_to_json(value: &TomlValue) -> JsonValue {
2382 match value {
2383 TomlValue::String(value) => JsonValue::String(value.clone()),
2384 TomlValue::Integer(value) => JsonValue::String(value.to_string()),
2385 TomlValue::Float(value) => JsonValue::String(value.to_string()),
2386 TomlValue::Boolean(value) => JsonValue::Bool(*value),
2387 TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
2388 TomlValue::Array(values) => {
2389 JsonValue::Array(values.iter().map(toml_value_to_json).collect())
2390 }
2391 TomlValue::Table(values) => JsonValue::Object(
2392 values
2393 .iter()
2394 .map(|(key, value)| (key.clone(), toml_value_to_json(value)))
2395 .collect::<JsonMap<String, JsonValue>>(),
2396 ),
2397 }
2398}
2399
2400fn parse_dependency_table(
2401 table: &TomlMap<String, TomlValue>,
2402 is_optional: bool,
2403 scope: Option<&str>,
2404) -> Vec<Dependency> {
2405 table
2406 .iter()
2407 .filter_map(|(name, version)| {
2408 let version_str = version.as_str().map(|s| s.to_string());
2409 let mut package_url =
2410 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2411
2412 if let Some(v) = &version_str {
2413 package_url.with_version(v).ok()?;
2414 }
2415
2416 Some(Dependency {
2417 purl: Some(package_url.to_string()),
2418 extracted_requirement: None,
2419 scope: scope.map(|s| s.to_string()),
2420 is_runtime: Some(!is_optional),
2421 is_optional: Some(is_optional),
2422 is_pinned: None,
2423 is_direct: Some(true),
2424 resolved_package: None,
2425 extra_data: None,
2426 })
2427 })
2428 .collect()
2429}
2430
2431fn parse_dependency_array(
2432 array: &[TomlValue],
2433 is_optional: bool,
2434 scope: Option<&str>,
2435) -> Vec<Dependency> {
2436 array
2437 .iter()
2438 .filter_map(|dep| {
2439 let dep_str = dep.as_str()?;
2440
2441 let mut parts = dep_str.split(['>', '=', '<', '~']);
2442 let name = parts.next()?.trim().to_string();
2443
2444 let version = parts.next().map(|v| v.trim().to_string());
2445
2446 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name)
2447 {
2448 Ok(purl) => purl,
2449 Err(_) => return None,
2450 };
2451
2452 if let Some(ref v) = version {
2453 package_url.with_version(v).ok()?;
2454 }
2455
2456 Some(Dependency {
2457 purl: Some(package_url.to_string()),
2458 extracted_requirement: None,
2459 scope: scope.map(|s| s.to_string()),
2460 is_runtime: Some(!is_optional),
2461 is_optional: Some(is_optional),
2462 is_pinned: None,
2463 is_direct: Some(true),
2464 resolved_package: None,
2465 extra_data: None,
2466 })
2467 })
2468 .collect()
2469}
2470
2471#[derive(Debug, Clone)]
2472enum Value {
2473 String(String),
2474 Number(f64),
2475 Bool(bool),
2476 None,
2477 List(Vec<Value>),
2478 Tuple(Vec<Value>),
2479 Dict(HashMap<String, Value>),
2480}
2481
2482struct LiteralEvaluator {
2483 constants: HashMap<String, Value>,
2484 max_depth: usize,
2485 max_nodes: usize,
2486 nodes_visited: usize,
2487}
2488
2489impl LiteralEvaluator {
2490 fn new(constants: HashMap<String, Value>) -> Self {
2491 Self {
2492 constants,
2493 max_depth: MAX_SETUP_PY_AST_DEPTH,
2494 max_nodes: MAX_SETUP_PY_AST_NODES,
2495 nodes_visited: 0,
2496 }
2497 }
2498
2499 fn insert_constant(&mut self, name: String, value: Value) {
2500 self.constants.insert(name, value);
2501 }
2502
2503 fn evaluate_expr(&mut self, expr: &ast::Expr, depth: usize) -> Option<Value> {
2504 if depth >= self.max_depth || self.nodes_visited >= self.max_nodes {
2505 return None;
2506 }
2507 self.nodes_visited += 1;
2508
2509 match expr {
2510 ast::Expr::Constant(ast::ExprConstant { value, .. }) => self.evaluate_constant(value),
2511 ast::Expr::Name(ast::ExprName { id, .. }) => self.constants.get(id.as_str()).cloned(),
2512 ast::Expr::List(ast::ExprList { elts, .. }) => {
2513 let mut values = Vec::new();
2514 for elt in elts {
2515 values.push(self.evaluate_expr(elt, depth + 1)?);
2516 }
2517 Some(Value::List(values))
2518 }
2519 ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
2520 let mut values = Vec::new();
2521 for elt in elts {
2522 values.push(self.evaluate_expr(elt, depth + 1)?);
2523 }
2524 Some(Value::Tuple(values))
2525 }
2526 ast::Expr::Dict(ast::ExprDict { keys, values, .. }) => {
2527 let mut dict = HashMap::new();
2528 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
2529 let key_expr = key_expr.as_ref()?;
2530 let key_value = self.evaluate_expr(key_expr, depth + 1)?;
2531 let key = value_to_string(&key_value)?;
2532 let value = self.evaluate_expr(value_expr, depth + 1)?;
2533 dict.insert(key, value);
2534 }
2535 Some(Value::Dict(dict))
2536 }
2537 ast::Expr::Call(ast::ExprCall {
2538 func,
2539 args,
2540 keywords,
2541 ..
2542 }) => {
2543 if keywords.is_empty()
2544 && let Some(name) = dotted_name(func.as_ref(), depth + 1)
2545 && matches!(name.as_str(), "OrderedDict" | "collections.OrderedDict")
2546 {
2547 return self.evaluate_ordered_dict(args, depth + 1);
2548 }
2549
2550 if !args.is_empty() {
2551 return None;
2552 }
2553
2554 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref()
2555 && id == "dict"
2556 {
2557 let mut dict = HashMap::new();
2558 for keyword in keywords {
2559 let key = keyword.arg.as_ref().map(|name| name.as_str())?;
2560 let value = self.evaluate_expr(&keyword.value, depth + 1)?;
2561 dict.insert(key.to_string(), value);
2562 }
2563 return Some(Value::Dict(dict));
2564 }
2565
2566 None
2567 }
2568 _ => None,
2569 }
2570 }
2571
2572 fn evaluate_constant(&self, constant: &ast::Constant) -> Option<Value> {
2573 match constant {
2574 ast::Constant::Str(value) => Some(Value::String(value.clone())),
2575 ast::Constant::Bool(value) => Some(Value::Bool(*value)),
2576 ast::Constant::Int(value) => value.to_string().parse::<f64>().ok().map(Value::Number),
2577 ast::Constant::Float(value) => Some(Value::Number(*value)),
2578 ast::Constant::None => Some(Value::None),
2579 _ => None,
2580 }
2581 }
2582
2583 fn evaluate_ordered_dict(&mut self, args: &[ast::Expr], depth: usize) -> Option<Value> {
2584 if args.len() != 1 {
2585 return None;
2586 }
2587
2588 let items = match self.evaluate_expr(&args[0], depth)? {
2589 Value::List(items) | Value::Tuple(items) => items,
2590 _ => return None,
2591 };
2592
2593 let mut dict = HashMap::new();
2594 for item in items {
2595 let Value::Tuple(values) = item else {
2596 return None;
2597 };
2598 if values.len() != 2 {
2599 return None;
2600 }
2601 let key = value_to_string(&values[0])?;
2602 dict.insert(key, values[1].clone());
2603 }
2604
2605 Some(Value::Dict(dict))
2606 }
2607}
2608
2609#[derive(Default)]
2610struct SetupAliases {
2611 setup_names: HashSet<String>,
2612 module_aliases: HashMap<String, String>,
2613}
2614
2615fn extract_from_setup_py(path: &Path) -> PackageData {
2616 let content = match read_file_to_string(path) {
2617 Ok(content) => content,
2618 Err(e) => {
2619 warn!("Failed to read setup.py at {:?}: {}", path, e);
2620 return default_package_data();
2621 }
2622 };
2623
2624 if content.len() > MAX_SETUP_PY_BYTES {
2625 warn!("setup.py too large at {:?}: {} bytes", path, content.len());
2626 return extract_from_setup_py_regex(&content);
2627 }
2628
2629 let mut package_data = match extract_from_setup_py_ast(&content) {
2630 Ok(Some(data)) => data,
2631 Ok(None) => extract_from_setup_py_regex(&content),
2632 Err(e) => {
2633 warn!("Failed to parse setup.py AST at {:?}: {}", path, e);
2634 extract_from_setup_py_regex(&content)
2635 }
2636 };
2637
2638 if package_data.name.is_none() {
2639 package_data.name = extract_setup_value(&content, "name");
2640 }
2641
2642 if package_data.version.is_none() {
2643 package_data.version = extract_setup_value(&content, "version");
2644 }
2645
2646 fill_from_sibling_dunder_metadata(path, &content, &mut package_data);
2647
2648 if package_data.purl.is_none() {
2649 package_data.purl = build_setup_py_purl(
2650 package_data.name.as_deref(),
2651 package_data.version.as_deref(),
2652 );
2653 }
2654
2655 package_data
2656}
2657
2658fn fill_from_sibling_dunder_metadata(path: &Path, content: &str, package_data: &mut PackageData) {
2659 if package_data.version.is_some()
2660 && package_data.extracted_license_statement.is_some()
2661 && package_data
2662 .parties
2663 .iter()
2664 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some())
2665 {
2666 return;
2667 }
2668
2669 let Some(root) = path.parent() else {
2670 return;
2671 };
2672
2673 let dunder_metadata = collect_sibling_dunder_metadata(root, content);
2674
2675 if package_data.version.is_none() {
2676 package_data.version = dunder_metadata.version;
2677 }
2678
2679 if package_data.extracted_license_statement.is_none() {
2680 package_data.extracted_license_statement = dunder_metadata.license;
2681 }
2682
2683 let has_author = package_data
2684 .parties
2685 .iter()
2686 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some());
2687
2688 if !has_author && let Some(author) = dunder_metadata.author {
2689 package_data.parties.push(Party {
2690 r#type: Some("person".to_string()),
2691 role: Some("author".to_string()),
2692 name: Some(author),
2693 email: None,
2694 url: None,
2695 organization: None,
2696 organization_url: None,
2697 timezone: None,
2698 });
2699 }
2700}
2701
2702#[derive(Default)]
2703struct DunderMetadata {
2704 version: Option<String>,
2705 author: Option<String>,
2706 license: Option<String>,
2707}
2708
2709fn collect_sibling_dunder_metadata(root: &Path, content: &str) -> DunderMetadata {
2710 let statements = match ast::Suite::parse(content, "<setup.py>") {
2711 Ok(statements) => statements,
2712 Err(_) => return DunderMetadata::default(),
2713 };
2714
2715 let version_re = Regex::new(r#"(?m)^\s*__version__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2716 let author_re = Regex::new(r#"(?m)^\s*__author__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2717 let license_re = Regex::new(r#"(?m)^\s*__license__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2718 let mut metadata = DunderMetadata::default();
2719
2720 for module in imported_dunder_modules(&statements) {
2721 let Some(path) = resolve_imported_module_path(root, &module) else {
2722 continue;
2723 };
2724 let Ok(module_content) = read_file_to_string(&path) else {
2725 continue;
2726 };
2727
2728 if metadata.version.is_none() {
2729 metadata.version = version_re
2730 .as_ref()
2731 .and_then(|regex| regex.captures(&module_content))
2732 .and_then(|captures| captures.get(1))
2733 .map(|match_| match_.as_str().to_string());
2734 }
2735
2736 if metadata.author.is_none() {
2737 metadata.author = author_re
2738 .as_ref()
2739 .and_then(|regex| regex.captures(&module_content))
2740 .and_then(|captures| captures.get(1))
2741 .map(|match_| match_.as_str().to_string());
2742 }
2743
2744 if metadata.license.is_none() {
2745 metadata.license = license_re
2746 .as_ref()
2747 .and_then(|regex| regex.captures(&module_content))
2748 .and_then(|captures| captures.get(1))
2749 .map(|match_| match_.as_str().to_string());
2750 }
2751
2752 if metadata.version.is_some() && metadata.author.is_some() && metadata.license.is_some() {
2753 return metadata;
2754 }
2755 }
2756
2757 metadata
2758}
2759
2760fn imported_dunder_modules(statements: &[ast::Stmt]) -> Vec<String> {
2761 let mut modules = Vec::new();
2762
2763 for statement in statements {
2764 let ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) = statement else {
2765 continue;
2766 };
2767 let Some(module) = module.as_ref().map(|name| name.as_str()) else {
2768 continue;
2769 };
2770 let imports_dunder = names.iter().any(|alias| {
2771 matches!(
2772 alias.name.as_str(),
2773 "__version__" | "__author__" | "__license__"
2774 )
2775 });
2776 if imports_dunder {
2777 modules.push(module.to_string());
2778 }
2779 }
2780
2781 modules
2782}
2783
2784fn resolve_imported_module_path(root: &Path, module: &str) -> Option<PathBuf> {
2785 let relative = PathBuf::from_iter(module.split('.'));
2786 let candidates = [
2787 root.join(relative.with_extension("py")),
2788 root.join(&relative).join("__init__.py"),
2789 root.join("src").join(relative.with_extension("py")),
2790 root.join("src").join(relative).join("__init__.py"),
2791 ];
2792
2793 candidates.into_iter().find(|candidate| candidate.exists())
2794}
2795
2796fn extract_from_setup_py_ast(content: &str) -> Result<Option<PackageData>, String> {
2812 let statements = ast::Suite::parse(content, "<setup.py>").map_err(|e| format!("{}", e))?;
2813 let aliases = collect_setup_aliases(&statements);
2814 let mut evaluator = LiteralEvaluator::new(HashMap::new());
2815 build_setup_py_constants(&statements, &mut evaluator);
2816
2817 let setup_call = find_setup_call(&statements, &aliases);
2818 let Some(call_expr) = setup_call else {
2819 return Ok(None);
2820 };
2821
2822 let setup_values = extract_setup_keywords(call_expr, &mut evaluator);
2823 Ok(Some(build_setup_py_package_data(&setup_values)))
2824}
2825
2826fn build_setup_py_constants(statements: &[ast::Stmt], evaluator: &mut LiteralEvaluator) {
2827 for stmt in statements {
2828 if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = stmt {
2829 if targets.len() != 1 {
2830 continue;
2831 }
2832
2833 let Some(name) = extract_assign_name(&targets[0]) else {
2834 continue;
2835 };
2836
2837 if let Some(value) = evaluator.evaluate_expr(value.as_ref(), 0) {
2838 evaluator.insert_constant(name, value);
2839 }
2840 }
2841 }
2842}
2843
2844fn extract_assign_name(target: &ast::Expr) -> Option<String> {
2845 match target {
2846 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
2847 _ => None,
2848 }
2849}
2850
2851fn collect_setup_aliases(statements: &[ast::Stmt]) -> SetupAliases {
2852 let mut aliases = SetupAliases::default();
2853 aliases.setup_names.insert("setup".to_string());
2854
2855 for stmt in statements {
2856 match stmt {
2857 ast::Stmt::Import(ast::StmtImport { names, .. }) => {
2858 for alias in names {
2859 let module_name = alias.name.as_str();
2860 if !is_setup_module(module_name) {
2861 continue;
2862 }
2863 let alias_name = alias
2864 .asname
2865 .as_ref()
2866 .map(|name| name.as_str())
2867 .unwrap_or(module_name);
2868 aliases
2869 .module_aliases
2870 .insert(alias_name.to_string(), module_name.to_string());
2871 }
2872 }
2873 ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
2874 let Some(module_name) = module.as_ref().map(|name| name.as_str()) else {
2875 continue;
2876 };
2877 if !is_setup_module(module_name) {
2878 continue;
2879 }
2880 for alias in names {
2881 if alias.name.as_str() != "setup" {
2882 continue;
2883 }
2884 let alias_name = alias
2885 .asname
2886 .as_ref()
2887 .map(|name| name.as_str())
2888 .unwrap_or("setup");
2889 aliases.setup_names.insert(alias_name.to_string());
2890 }
2891 }
2892 _ => {}
2893 }
2894 }
2895
2896 aliases
2897}
2898
2899fn is_setup_module(module_name: &str) -> bool {
2900 matches!(module_name, "setuptools" | "distutils" | "distutils.core")
2901}
2902
2903fn find_setup_call<'a>(
2904 statements: &'a [ast::Stmt],
2905 aliases: &'a SetupAliases,
2906) -> Option<&'a ast::Expr> {
2907 let mut finder = SetupCallFinder {
2908 aliases,
2909 nodes_visited: 0,
2910 };
2911 finder.find_in_statements(statements)
2912}
2913
2914struct SetupCallFinder<'a> {
2915 aliases: &'a SetupAliases,
2916 nodes_visited: usize,
2917}
2918
2919impl<'a> SetupCallFinder<'a> {
2920 fn find_in_statements(&mut self, statements: &'a [ast::Stmt]) -> Option<&'a ast::Expr> {
2921 for stmt in statements {
2922 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2923 return None;
2924 }
2925 self.nodes_visited += 1;
2926
2927 let found = match stmt {
2928 ast::Stmt::Expr(ast::StmtExpr { value, .. }) => self.visit_expr(value.as_ref()),
2929 ast::Stmt::Assign(ast::StmtAssign { value, .. }) => self.visit_expr(value.as_ref()),
2930 ast::Stmt::If(ast::StmtIf { body, orelse, .. }) => self
2931 .find_in_statements(body)
2932 .or_else(|| self.find_in_statements(orelse)),
2933 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
2934 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => self
2935 .find_in_statements(body)
2936 .or_else(|| self.find_in_statements(orelse)),
2937 ast::Stmt::With(ast::StmtWith { body, .. }) => self.find_in_statements(body),
2938 ast::Stmt::Try(ast::StmtTry {
2939 body,
2940 orelse,
2941 finalbody,
2942 handlers,
2943 ..
2944 })
2945 | ast::Stmt::TryStar(ast::StmtTryStar {
2946 body,
2947 orelse,
2948 finalbody,
2949 handlers,
2950 ..
2951 }) => self
2952 .find_in_statements(body)
2953 .or_else(|| self.find_in_statements(orelse))
2954 .or_else(|| self.find_in_statements(finalbody))
2955 .or_else(|| {
2956 for handler in handlers {
2957 let ast::ExceptHandler::ExceptHandler(
2958 ast::ExceptHandlerExceptHandler { body, .. },
2959 ) = handler;
2960 if let Some(found) = self.find_in_statements(body) {
2961 return Some(found);
2962 }
2963 }
2964 None
2965 }),
2966 _ => None,
2967 };
2968
2969 if found.is_some() {
2970 return found;
2971 }
2972 }
2973
2974 None
2975 }
2976
2977 fn visit_expr(&mut self, expr: &'a ast::Expr) -> Option<&'a ast::Expr> {
2978 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2979 return None;
2980 }
2981 self.nodes_visited += 1;
2982
2983 match expr {
2984 ast::Expr::Call(ast::ExprCall { func, .. })
2985 if is_setup_call(func.as_ref(), self.aliases) =>
2986 {
2987 Some(expr)
2988 }
2989 _ => None,
2990 }
2991 }
2992}
2993
2994fn is_setup_call(func: &ast::Expr, aliases: &SetupAliases) -> bool {
2995 let Some(dotted) = dotted_name(func, 0) else {
2996 return false;
2997 };
2998
2999 if aliases.setup_names.contains(&dotted) {
3000 return true;
3001 }
3002
3003 let Some(module) = dotted.strip_suffix(".setup") else {
3004 return false;
3005 };
3006
3007 let resolved = resolve_module_alias(module, aliases);
3008 is_setup_module(&resolved)
3009}
3010
3011fn dotted_name(expr: &ast::Expr, depth: usize) -> Option<String> {
3012 if depth >= MAX_SETUP_PY_AST_DEPTH {
3013 return None;
3014 }
3015
3016 match expr {
3017 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
3018 ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
3019 let base = dotted_name(value.as_ref(), depth + 1)?;
3020 Some(format!("{}.{}", base, attr.as_str()))
3021 }
3022 _ => None,
3023 }
3024}
3025
3026fn resolve_module_alias(module: &str, aliases: &SetupAliases) -> String {
3027 if let Some(mapped) = aliases.module_aliases.get(module) {
3028 return mapped.clone();
3029 }
3030
3031 let Some((base, rest)) = module.split_once('.') else {
3032 return module.to_string();
3033 };
3034
3035 if let Some(mapped) = aliases.module_aliases.get(base) {
3036 return format!("{}.{}", mapped, rest);
3037 }
3038
3039 module.to_string()
3040}
3041
3042fn extract_setup_keywords(
3043 call_expr: &ast::Expr,
3044 evaluator: &mut LiteralEvaluator,
3045) -> HashMap<String, Value> {
3046 let mut values = HashMap::new();
3047 let ast::Expr::Call(ast::ExprCall { keywords, .. }) = call_expr else {
3048 return values;
3049 };
3050
3051 for keyword in keywords {
3052 if let Some(arg) = keyword.arg.as_ref().map(|name| name.as_str()) {
3053 if let Some(value) = evaluator.evaluate_expr(&keyword.value, 0) {
3054 values.insert(arg.to_string(), value);
3055 }
3056 } else if let Some(Value::Dict(dict)) = evaluator.evaluate_expr(&keyword.value, 0) {
3057 for (key, value) in dict {
3058 values.insert(key, value);
3059 }
3060 }
3061 }
3062
3063 values
3064}
3065
3066fn build_setup_py_package_data(values: &HashMap<String, Value>) -> PackageData {
3067 let name = get_value_string(values, "name");
3068 let version = get_value_string(values, "version");
3069 let description =
3070 get_value_string(values, "description").or_else(|| get_value_string(values, "summary"));
3071 let homepage_url =
3072 get_value_string(values, "url").or_else(|| get_value_string(values, "home_page"));
3073 let author = get_value_string(values, "author");
3074 let author_email = get_value_string(values, "author_email");
3075 let maintainer = get_value_string(values, "maintainer");
3076 let maintainer_email = get_value_string(values, "maintainer_email");
3077 let license = get_value_string(values, "license");
3078 let classifiers = values
3079 .get("classifiers")
3080 .and_then(value_to_string_list)
3081 .unwrap_or_default();
3082
3083 let mut parties = Vec::new();
3084 if author.is_some() || author_email.is_some() {
3085 parties.push(Party {
3086 r#type: Some("person".to_string()),
3087 role: Some("author".to_string()),
3088 name: author,
3089 email: author_email,
3090 url: None,
3091 organization: None,
3092 organization_url: None,
3093 timezone: None,
3094 });
3095 }
3096
3097 if maintainer.is_some() || maintainer_email.is_some() {
3098 parties.push(Party {
3099 r#type: Some("person".to_string()),
3100 role: Some("maintainer".to_string()),
3101 name: maintainer,
3102 email: maintainer_email,
3103 url: None,
3104 organization: None,
3105 organization_url: None,
3106 timezone: None,
3107 });
3108 }
3109
3110 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3111 normalize_spdx_declared_license(license.as_deref());
3112 let extracted_license_statement = license.clone();
3113
3114 let dependencies = build_setup_py_dependencies(values);
3115 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3116 let mut homepage_from_project_urls = None;
3117 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
3118 let mut extra_data = HashMap::new();
3119
3120 if let Some(parsed_project_urls) = values.get("project_urls").and_then(value_to_string_pairs) {
3121 apply_project_url_mappings(
3122 &parsed_project_urls,
3123 &mut homepage_from_project_urls,
3124 &mut bug_tracking_url,
3125 &mut code_view_url,
3126 &mut vcs_url,
3127 &mut extra_data,
3128 );
3129 }
3130
3131 let extra_data = if extra_data.is_empty() {
3132 None
3133 } else {
3134 Some(extra_data)
3135 };
3136
3137 PackageData {
3138 package_type: Some(PythonParser::PACKAGE_TYPE),
3139 namespace: None,
3140 name,
3141 version,
3142 qualifiers: None,
3143 subpath: None,
3144 primary_language: Some("Python".to_string()),
3145 description,
3146 release_date: None,
3147 parties,
3148 keywords: Vec::new(),
3149 homepage_url: homepage_url.or(homepage_from_project_urls),
3150 download_url: None,
3151 size: None,
3152 sha1: None,
3153 md5: None,
3154 sha256: None,
3155 sha512: None,
3156 bug_tracking_url,
3157 code_view_url,
3158 vcs_url,
3159 copyright: None,
3160 holder: None,
3161 declared_license_expression,
3162 declared_license_expression_spdx,
3163 license_detections,
3164 other_license_expression: None,
3165 other_license_expression_spdx: None,
3166 other_license_detections: Vec::new(),
3167 extracted_license_statement,
3168 notice_text: None,
3169 source_packages: Vec::new(),
3170 file_references: Vec::new(),
3171 is_private: has_private_classifier(&classifiers),
3172 is_virtual: false,
3173 extra_data,
3174 dependencies,
3175 repository_homepage_url: None,
3176 repository_download_url: None,
3177 api_data_url: None,
3178 datasource_id: Some(DatasourceId::PypiSetupPy),
3179 purl,
3180 }
3181}
3182
3183fn build_setup_py_dependencies(values: &HashMap<String, Value>) -> Vec<Dependency> {
3184 let mut dependencies = Vec::new();
3185
3186 if let Some(reqs) = values
3187 .get("install_requires")
3188 .and_then(value_to_string_list)
3189 {
3190 dependencies.extend(build_setup_py_dependency_list(&reqs, "install", false));
3191 }
3192
3193 if let Some(reqs) = values.get("tests_require").and_then(value_to_string_list) {
3194 dependencies.extend(build_setup_py_dependency_list(&reqs, "test", true));
3195 }
3196
3197 if let Some(Value::Dict(extras)) = values.get("extras_require") {
3198 let mut extra_items: Vec<_> = extras.iter().collect();
3199 extra_items.sort_by_key(|(name, _)| *name);
3200 for (extra_name, extra_value) in extra_items {
3201 if let Some(reqs) = value_to_string_list(extra_value) {
3202 dependencies.extend(build_setup_py_dependency_list(
3203 reqs.as_slice(),
3204 extra_name,
3205 true,
3206 ));
3207 }
3208 }
3209 }
3210
3211 dependencies
3212}
3213
3214fn build_setup_py_dependency_list(
3215 reqs: &[String],
3216 scope: &str,
3217 is_optional: bool,
3218) -> Vec<Dependency> {
3219 reqs.iter()
3220 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
3221 .collect()
3222}
3223
3224fn get_value_string(values: &HashMap<String, Value>, key: &str) -> Option<String> {
3225 values.get(key).and_then(value_to_string)
3226}
3227
3228fn value_to_string(value: &Value) -> Option<String> {
3229 match value {
3230 Value::String(value) => Some(value.clone()),
3231 Value::Number(value) => Some(value.to_string()),
3232 Value::Bool(value) => Some(value.to_string()),
3233 _ => None,
3234 }
3235}
3236
3237fn value_to_string_list(value: &Value) -> Option<Vec<String>> {
3238 match value {
3239 Value::String(value) => Some(vec![value.clone()]),
3240 Value::List(values) | Value::Tuple(values) => {
3241 let mut items = Vec::new();
3242 for item in values {
3243 items.push(value_to_string(item)?);
3244 }
3245 Some(items)
3246 }
3247 _ => None,
3248 }
3249}
3250
3251fn value_to_string_pairs(value: &Value) -> Option<Vec<(String, String)>> {
3252 let Value::Dict(dict) = value else {
3253 return None;
3254 };
3255
3256 let mut pairs: Vec<(String, String)> = dict
3257 .iter()
3258 .map(|(key, value)| Some((key.clone(), value_to_string(value)?)))
3259 .collect::<Option<Vec<_>>>()?;
3260 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3261 Some(pairs)
3262}
3263
3264fn extract_rfc822_dependencies(headers: &HashMap<String, Vec<String>>) -> Vec<Dependency> {
3265 let requires_dist = super::rfc822::get_header_all(headers, "requires-dist");
3266 requires_dist
3267 .iter()
3268 .filter_map(|entry| build_rfc822_dependency(entry))
3269 .collect()
3270}
3271
3272fn build_rfc822_dependency(entry: &str) -> Option<Dependency> {
3273 build_python_dependency(entry, "install", false, None)
3274}
3275
3276fn build_python_dependency(
3277 entry: &str,
3278 default_scope: &str,
3279 default_optional: bool,
3280 marker_override: Option<&str>,
3281) -> Option<Dependency> {
3282 let (requirement_part, marker_part) = entry
3283 .split_once(';')
3284 .map(|(req, marker)| (req.trim(), Some(marker.trim())))
3285 .unwrap_or((entry.trim(), None));
3286
3287 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3288 let requirement = normalize_rfc822_requirement(requirement_part);
3289 let (scope, is_optional, marker, marker_data) = parse_rfc822_marker(
3290 marker_part.or(marker_override),
3291 default_scope,
3292 default_optional,
3293 );
3294 let mut purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
3295
3296 let is_pinned = requirement
3297 .as_deref()
3298 .is_some_and(|req| req.starts_with("==") || req.starts_with("==="));
3299 if is_pinned
3300 && let Some(version) = requirement
3301 .as_deref()
3302 .map(|req| req.trim_start_matches('='))
3303 {
3304 purl.with_version(version).ok()?;
3305 }
3306
3307 let mut extra_data = HashMap::new();
3308 extra_data.extend(marker_data);
3309 if let Some(marker) = marker {
3310 extra_data.insert("marker".to_string(), serde_json::Value::String(marker));
3311 }
3312
3313 Some(Dependency {
3314 purl: Some(purl.to_string()),
3315 extracted_requirement: requirement,
3316 scope: Some(scope),
3317 is_runtime: Some(true),
3318 is_optional: Some(is_optional),
3319 is_pinned: Some(is_pinned),
3320 is_direct: Some(true),
3321 resolved_package: None,
3322 extra_data: if extra_data.is_empty() {
3323 None
3324 } else {
3325 Some(extra_data)
3326 },
3327 })
3328}
3329
3330fn normalize_rfc822_requirement(requirement_part: &str) -> Option<String> {
3331 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3332 let trimmed = requirement_part.trim();
3333 let mut remainder = trimmed[name.len()..].trim();
3334
3335 if let Some(stripped) = remainder.strip_prefix('[')
3336 && let Some(end_idx) = stripped.find(']')
3337 {
3338 remainder = stripped[end_idx + 1..].trim();
3339 }
3340
3341 let remainder = remainder
3342 .strip_prefix('(')
3343 .and_then(|value| value.strip_suffix(')'))
3344 .unwrap_or(remainder)
3345 .trim();
3346
3347 if remainder.is_empty() {
3348 return None;
3349 }
3350
3351 let mut specifiers: Vec<String> = remainder
3352 .split(',')
3353 .map(|specifier| specifier.trim().replace(' ', ""))
3354 .filter(|specifier| !specifier.is_empty())
3355 .collect();
3356 specifiers.sort();
3357 Some(specifiers.join(","))
3358}
3359
3360fn parse_rfc822_marker(
3361 marker_part: Option<&str>,
3362 default_scope: &str,
3363 default_optional: bool,
3364) -> (
3365 String,
3366 bool,
3367 Option<String>,
3368 HashMap<String, serde_json::Value>,
3369) {
3370 let Some(marker) = marker_part.filter(|marker| !marker.trim().is_empty()) else {
3371 return (
3372 default_scope.to_string(),
3373 default_optional,
3374 None,
3375 HashMap::new(),
3376 );
3377 };
3378
3379 let extra_re = Regex::new(r#"extra\s*==\s*['\"]([^'\"]+)['\"]"#)
3380 .expect("extra marker regex should compile");
3381 let mut extra_data = HashMap::new();
3382
3383 if let Some(python_version) = extract_marker_field(marker, "python_version") {
3384 extra_data.insert(
3385 "python_version".to_string(),
3386 serde_json::Value::String(python_version),
3387 );
3388 }
3389 if let Some(sys_platform) = extract_marker_field(marker, "sys_platform") {
3390 extra_data.insert(
3391 "sys_platform".to_string(),
3392 serde_json::Value::String(sys_platform),
3393 );
3394 }
3395
3396 if let Some(captures) = extra_re.captures(marker)
3397 && let Some(scope) = captures.get(1)
3398 {
3399 return (
3400 scope.as_str().to_string(),
3401 true,
3402 Some(marker.trim().to_string()),
3403 extra_data,
3404 );
3405 }
3406
3407 (
3408 default_scope.to_string(),
3409 default_optional,
3410 Some(marker.trim().to_string()),
3411 extra_data,
3412 )
3413}
3414
3415fn extract_marker_field(marker: &str, field: &str) -> Option<String> {
3416 let re = Regex::new(&format!(
3417 r#"{}\s*(==|!=|<=|>=|<|>)\s*['\"]([^'\"]+)['\"]"#,
3418 field
3419 ))
3420 .ok()?;
3421 let captures = re.captures(marker)?;
3422 let operator = captures.get(1)?.as_str();
3423 let value = captures.get(2)?.as_str();
3424 Some(format!("{} {}", operator, value))
3425}
3426
3427fn parse_requires_txt(content: &str) -> Vec<Dependency> {
3428 let mut dependencies = Vec::new();
3429 let mut current_scope = "install".to_string();
3430 let mut current_optional = false;
3431 let mut current_marker: Option<String> = None;
3432
3433 for line in content.lines() {
3434 let trimmed = line.trim();
3435 if trimmed.is_empty() || trimmed.starts_with('#') {
3436 continue;
3437 }
3438
3439 if trimmed.starts_with('[') && trimmed.ends_with(']') {
3440 let inner = &trimmed[1..trimmed.len() - 1];
3441 if let Some(rest) = inner.strip_prefix(':') {
3442 current_scope = "install".to_string();
3443 current_optional = false;
3444 current_marker = Some(rest.trim().to_string());
3445 } else if let Some((scope, marker)) = inner.split_once(':') {
3446 current_scope = scope.trim().to_string();
3447 current_optional = true;
3448 current_marker = Some(marker.trim().to_string());
3449 } else {
3450 current_scope = inner.trim().to_string();
3451 current_optional = true;
3452 current_marker = None;
3453 }
3454 continue;
3455 }
3456
3457 if let Some(dependency) = build_python_dependency(
3458 trimmed,
3459 ¤t_scope,
3460 current_optional,
3461 current_marker.as_deref(),
3462 ) {
3463 dependencies.push(dependency);
3464 }
3465 }
3466
3467 dependencies
3468}
3469
3470fn has_private_classifier(classifiers: &[String]) -> bool {
3471 classifiers
3472 .iter()
3473 .any(|classifier| classifier.eq_ignore_ascii_case("Private :: Do Not Upload"))
3474}
3475
3476fn build_setup_py_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
3477 let name = name?;
3478 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
3479 if let Some(version) = version {
3480 package_url.with_version(version).ok()?;
3481 }
3482 Some(package_url.to_string())
3483}
3484
3485fn extract_from_setup_py_regex(content: &str) -> PackageData {
3486 let name = extract_setup_value(content, "name");
3487 let version = extract_setup_value(content, "version");
3488 let license_expression = extract_setup_value(content, "license");
3489
3490 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3491 normalize_spdx_declared_license(license_expression.as_deref());
3492 let extracted_license_statement = license_expression.clone();
3493
3494 let dependencies = extract_setup_py_dependencies(content);
3495 let homepage_url = extract_setup_value(content, "url");
3496 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3497
3498 PackageData {
3499 package_type: Some(PythonParser::PACKAGE_TYPE),
3500 namespace: None,
3501 name,
3502 version,
3503 qualifiers: None,
3504 subpath: None,
3505 primary_language: Some("Python".to_string()),
3506 description: None,
3507 release_date: None,
3508 parties: Vec::new(),
3509 keywords: Vec::new(),
3510 homepage_url,
3511 download_url: None,
3512 size: None,
3513 sha1: None,
3514 md5: None,
3515 sha256: None,
3516 sha512: None,
3517 bug_tracking_url: None,
3518 code_view_url: None,
3519 vcs_url: None,
3520 copyright: None,
3521 holder: None,
3522 declared_license_expression,
3523 declared_license_expression_spdx,
3524 license_detections,
3525 other_license_expression: None,
3526 other_license_expression_spdx: None,
3527 other_license_detections: Vec::new(),
3528 extracted_license_statement,
3529 notice_text: None,
3530 source_packages: Vec::new(),
3531 file_references: Vec::new(),
3532 is_private: false,
3533 is_virtual: false,
3534 extra_data: None,
3535 dependencies,
3536 repository_homepage_url: None,
3537 repository_download_url: None,
3538 api_data_url: None,
3539 datasource_id: Some(DatasourceId::PypiSetupPy),
3540 purl,
3541 }
3542}
3543
3544fn package_data_to_resolved(pkg: &PackageData) -> crate::models::ResolvedPackage {
3545 crate::models::ResolvedPackage {
3546 package_type: pkg.package_type.unwrap_or(PackageType::Pypi),
3547 namespace: pkg.namespace.clone().unwrap_or_default(),
3548 name: pkg.name.clone().unwrap_or_default(),
3549 version: pkg.version.clone().unwrap_or_default(),
3550 primary_language: pkg.primary_language.clone(),
3551 download_url: pkg.download_url.clone(),
3552 sha1: pkg.sha1.clone(),
3553 sha256: pkg.sha256.clone(),
3554 sha512: pkg.sha512.clone(),
3555 md5: pkg.md5.clone(),
3556 is_virtual: pkg.is_virtual,
3557 extra_data: None,
3558 dependencies: pkg.dependencies.clone(),
3559 repository_homepage_url: pkg.repository_homepage_url.clone(),
3560 repository_download_url: pkg.repository_download_url.clone(),
3561 api_data_url: pkg.api_data_url.clone(),
3562 datasource_id: pkg.datasource_id,
3563 purl: pkg.purl.clone(),
3564 }
3565}
3566
3567fn extract_from_pypi_json(path: &Path) -> PackageData {
3568 let default = PackageData {
3569 package_type: Some(PythonParser::PACKAGE_TYPE),
3570 datasource_id: Some(DatasourceId::PypiJson),
3571 ..Default::default()
3572 };
3573
3574 let content = match read_file_to_string(path) {
3575 Ok(content) => content,
3576 Err(error) => {
3577 warn!("Failed to read pypi.json at {:?}: {}", path, error);
3578 return default;
3579 }
3580 };
3581
3582 let root: serde_json::Value = match serde_json::from_str(&content) {
3583 Ok(value) => value,
3584 Err(error) => {
3585 warn!("Failed to parse pypi.json at {:?}: {}", path, error);
3586 return default;
3587 }
3588 };
3589
3590 let Some(info) = root.get("info").and_then(|value| value.as_object()) else {
3591 warn!("No info object found in pypi.json at {:?}", path);
3592 return default;
3593 };
3594
3595 let name = info
3596 .get("name")
3597 .and_then(|value| value.as_str())
3598 .map(ToOwned::to_owned);
3599 let version = info
3600 .get("version")
3601 .and_then(|value| value.as_str())
3602 .map(ToOwned::to_owned);
3603 let summary = info
3604 .get("summary")
3605 .and_then(|value| value.as_str())
3606 .map(ToOwned::to_owned);
3607 let description = info
3608 .get("description")
3609 .and_then(|value| value.as_str())
3610 .filter(|value| !value.trim().is_empty())
3611 .map(ToOwned::to_owned)
3612 .or(summary);
3613 let mut homepage_url = info
3614 .get("home_page")
3615 .and_then(|value| value.as_str())
3616 .map(ToOwned::to_owned);
3617 let author = info
3618 .get("author")
3619 .and_then(|value| value.as_str())
3620 .filter(|value| !value.trim().is_empty())
3621 .map(ToOwned::to_owned);
3622 let author_email = info
3623 .get("author_email")
3624 .and_then(|value| value.as_str())
3625 .filter(|value| !value.trim().is_empty())
3626 .map(ToOwned::to_owned);
3627 let license = info
3628 .get("license")
3629 .and_then(|value| value.as_str())
3630 .filter(|value| !value.trim().is_empty())
3631 .map(ToOwned::to_owned);
3632 let keywords = parse_setup_cfg_keywords(
3633 info.get("keywords")
3634 .and_then(|value| value.as_str())
3635 .map(ToOwned::to_owned),
3636 );
3637 let classifiers = info
3638 .get("classifiers")
3639 .and_then(|value| value.as_array())
3640 .map(|values| {
3641 values
3642 .iter()
3643 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
3644 .collect::<Vec<_>>()
3645 })
3646 .unwrap_or_default();
3647
3648 let mut parties = Vec::new();
3649 if author.is_some() || author_email.is_some() {
3650 parties.push(Party {
3651 r#type: Some("person".to_string()),
3652 role: Some("author".to_string()),
3653 name: author,
3654 email: author_email,
3655 url: None,
3656 organization: None,
3657 organization_url: None,
3658 timezone: None,
3659 });
3660 }
3661
3662 let mut bug_tracking_url = None;
3663 let mut code_view_url = None;
3664 let mut vcs_url = None;
3665 let mut extra_data = HashMap::new();
3666
3667 let parsed_project_urls = info
3668 .get("project_urls")
3669 .and_then(|value| value.as_object())
3670 .map(|map| {
3671 let mut pairs: Vec<(String, String)> = map
3672 .iter()
3673 .filter_map(|(key, value)| Some((key.clone(), value.as_str()?.to_string())))
3674 .collect();
3675 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3676 pairs
3677 })
3678 .unwrap_or_default();
3679
3680 apply_project_url_mappings(
3681 &parsed_project_urls,
3682 &mut homepage_url,
3683 &mut bug_tracking_url,
3684 &mut code_view_url,
3685 &mut vcs_url,
3686 &mut extra_data,
3687 );
3688
3689 let (download_url, size, sha256) = root
3690 .get("urls")
3691 .and_then(|value| value.as_array())
3692 .map(|urls| select_pypi_json_artifact(urls))
3693 .unwrap_or((None, None, None));
3694
3695 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
3696 build_pypi_urls(name.as_deref(), version.as_deref());
3697
3698 PackageData {
3699 package_type: Some(PythonParser::PACKAGE_TYPE),
3700 namespace: None,
3701 name,
3702 version,
3703 qualifiers: None,
3704 subpath: None,
3705 primary_language: None,
3706 description,
3707 release_date: None,
3708 parties,
3709 keywords,
3710 homepage_url: homepage_url.or(repository_homepage_url.clone()),
3711 download_url,
3712 size,
3713 sha1: None,
3714 md5: None,
3715 sha256,
3716 sha512: None,
3717 bug_tracking_url,
3718 code_view_url,
3719 vcs_url,
3720 copyright: None,
3721 holder: None,
3722 declared_license_expression: None,
3723 declared_license_expression_spdx: None,
3724 license_detections: Vec::new(),
3725 other_license_expression: None,
3726 other_license_expression_spdx: None,
3727 other_license_detections: Vec::new(),
3728 extracted_license_statement: license,
3729 notice_text: None,
3730 source_packages: Vec::new(),
3731 file_references: Vec::new(),
3732 is_private: has_private_classifier(&classifiers),
3733 is_virtual: false,
3734 extra_data: if extra_data.is_empty() {
3735 None
3736 } else {
3737 Some(extra_data)
3738 },
3739 dependencies: Vec::new(),
3740 repository_homepage_url,
3741 repository_download_url,
3742 api_data_url,
3743 datasource_id: Some(DatasourceId::PypiJson),
3744 purl,
3745 }
3746}
3747
3748fn select_pypi_json_artifact(
3749 urls: &[serde_json::Value],
3750) -> (Option<String>, Option<u64>, Option<String>) {
3751 let selected = urls
3752 .iter()
3753 .find(|entry| entry.get("packagetype").and_then(|value| value.as_str()) == Some("sdist"))
3754 .or_else(|| urls.first());
3755
3756 let Some(entry) = selected else {
3757 return (None, None, None);
3758 };
3759
3760 let download_url = entry
3761 .get("url")
3762 .and_then(|value| value.as_str())
3763 .map(ToOwned::to_owned);
3764 let size = entry.get("size").and_then(|value| value.as_u64());
3765 let sha256 = entry
3766 .get("digests")
3767 .and_then(|value| value.as_object())
3768 .and_then(|digests| digests.get("sha256"))
3769 .and_then(|value| value.as_str())
3770 .map(ToOwned::to_owned);
3771
3772 (download_url, size, sha256)
3773}
3774
3775fn extract_from_pip_inspect(path: &Path) -> PackageData {
3776 let content = match read_file_to_string(path) {
3777 Ok(content) => content,
3778 Err(e) => {
3779 warn!("Failed to read pip-inspect.deplock at {:?}: {}", path, e);
3780 return default_package_data();
3781 }
3782 };
3783
3784 let root: serde_json::Value = match serde_json::from_str(&content) {
3785 Ok(value) => value,
3786 Err(e) => {
3787 warn!(
3788 "Failed to parse pip-inspect.deplock JSON at {:?}: {}",
3789 path, e
3790 );
3791 return default_package_data();
3792 }
3793 };
3794
3795 let installed = match root.get("installed").and_then(|v| v.as_array()) {
3796 Some(arr) => arr,
3797 None => {
3798 warn!(
3799 "No 'installed' array found in pip-inspect.deplock at {:?}",
3800 path
3801 );
3802 return default_package_data();
3803 }
3804 };
3805
3806 let pip_version = root
3807 .get("pip_version")
3808 .and_then(|v| v.as_str())
3809 .map(String::from);
3810 let inspect_version = root
3811 .get("version")
3812 .and_then(|v| v.as_str())
3813 .map(String::from);
3814
3815 let mut main_package: Option<PackageData> = None;
3816 let mut dependencies: Vec<Dependency> = Vec::new();
3817
3818 for package_entry in installed {
3819 let metadata = match package_entry.get("metadata") {
3820 Some(m) => m,
3821 None => continue,
3822 };
3823
3824 let is_requested = package_entry
3825 .get("requested")
3826 .and_then(|v| v.as_bool())
3827 .unwrap_or(false);
3828 let has_direct_url = package_entry.get("direct_url").is_some();
3829
3830 let name = metadata
3831 .get("name")
3832 .and_then(|v| v.as_str())
3833 .map(String::from);
3834 let version = metadata
3835 .get("version")
3836 .and_then(|v| v.as_str())
3837 .map(String::from);
3838 let summary = metadata
3839 .get("summary")
3840 .and_then(|v| v.as_str())
3841 .map(String::from);
3842 let home_page = metadata
3843 .get("home_page")
3844 .and_then(|v| v.as_str())
3845 .map(String::from);
3846 let author = metadata
3847 .get("author")
3848 .and_then(|v| v.as_str())
3849 .map(String::from);
3850 let author_email = metadata
3851 .get("author_email")
3852 .and_then(|v| v.as_str())
3853 .map(String::from);
3854 let license = metadata
3855 .get("license")
3856 .and_then(|v| v.as_str())
3857 .map(String::from);
3858 let description = metadata
3859 .get("description")
3860 .and_then(|v| v.as_str())
3861 .map(String::from);
3862 let keywords = metadata
3863 .get("keywords")
3864 .and_then(|v| v.as_array())
3865 .map(|arr| {
3866 arr.iter()
3867 .filter_map(|k| k.as_str().map(String::from))
3868 .collect::<Vec<_>>()
3869 })
3870 .unwrap_or_default();
3871
3872 let mut parties = Vec::new();
3873 if author.is_some() || author_email.is_some() {
3874 parties.push(Party {
3875 r#type: Some("person".to_string()),
3876 role: Some("author".to_string()),
3877 name: author,
3878 email: author_email,
3879 url: None,
3880 organization: None,
3881 organization_url: None,
3882 timezone: None,
3883 });
3884 }
3885
3886 let license_detections = Vec::new();
3888 let declared_license_expression = None;
3889 let declared_license_expression_spdx = None;
3890 let extracted_license_statement = license.clone();
3891
3892 let purl = name.as_ref().and_then(|n| {
3893 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
3894 if let Some(v) = &version {
3895 package_url.with_version(v).ok()?;
3896 }
3897 Some(package_url.to_string())
3898 });
3899
3900 if is_requested && has_direct_url {
3901 let mut extra_data = HashMap::new();
3902 if let Some(pv) = &pip_version {
3903 extra_data.insert(
3904 "pip_version".to_string(),
3905 serde_json::Value::String(pv.clone()),
3906 );
3907 }
3908 if let Some(iv) = &inspect_version {
3909 extra_data.insert(
3910 "inspect_version".to_string(),
3911 serde_json::Value::String(iv.clone()),
3912 );
3913 }
3914
3915 main_package = Some(PackageData {
3916 package_type: Some(PythonParser::PACKAGE_TYPE),
3917 namespace: None,
3918 name,
3919 version,
3920 qualifiers: None,
3921 subpath: None,
3922 primary_language: Some("Python".to_string()),
3923 description: description.or(summary),
3924 release_date: None,
3925 parties,
3926 keywords,
3927 homepage_url: home_page,
3928 download_url: None,
3929 size: None,
3930 sha1: None,
3931 md5: None,
3932 sha256: None,
3933 sha512: None,
3934 bug_tracking_url: None,
3935 code_view_url: None,
3936 vcs_url: None,
3937 copyright: None,
3938 holder: None,
3939 declared_license_expression,
3940 declared_license_expression_spdx,
3941 license_detections,
3942 other_license_expression: None,
3943 other_license_expression_spdx: None,
3944 other_license_detections: Vec::new(),
3945 extracted_license_statement,
3946 notice_text: None,
3947 source_packages: Vec::new(),
3948 file_references: Vec::new(),
3949 is_private: false,
3950 is_virtual: true,
3951 extra_data: if extra_data.is_empty() {
3952 None
3953 } else {
3954 Some(extra_data)
3955 },
3956 dependencies: Vec::new(),
3957 repository_homepage_url: None,
3958 repository_download_url: None,
3959 api_data_url: None,
3960 datasource_id: Some(DatasourceId::PypiInspectDeplock),
3961 purl,
3962 });
3963 } else {
3964 let resolved_package = PackageData {
3965 package_type: Some(PythonParser::PACKAGE_TYPE),
3966 namespace: None,
3967 name: name.clone(),
3968 version: version.clone(),
3969 qualifiers: None,
3970 subpath: None,
3971 primary_language: Some("Python".to_string()),
3972 description: description.or(summary),
3973 release_date: None,
3974 parties,
3975 keywords,
3976 homepage_url: home_page,
3977 download_url: None,
3978 size: None,
3979 sha1: None,
3980 md5: None,
3981 sha256: None,
3982 sha512: None,
3983 bug_tracking_url: None,
3984 code_view_url: None,
3985 vcs_url: None,
3986 copyright: None,
3987 holder: None,
3988 declared_license_expression,
3989 declared_license_expression_spdx,
3990 license_detections,
3991 other_license_expression: None,
3992 other_license_expression_spdx: None,
3993 other_license_detections: Vec::new(),
3994 extracted_license_statement,
3995 notice_text: None,
3996 source_packages: Vec::new(),
3997 file_references: Vec::new(),
3998 is_private: false,
3999 is_virtual: true,
4000 extra_data: None,
4001 dependencies: Vec::new(),
4002 repository_homepage_url: None,
4003 repository_download_url: None,
4004 api_data_url: None,
4005 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4006 purl: purl.clone(),
4007 };
4008
4009 let resolved = package_data_to_resolved(&resolved_package);
4010 dependencies.push(Dependency {
4011 purl,
4012 extracted_requirement: None,
4013 scope: None,
4014 is_runtime: Some(true),
4015 is_optional: Some(false),
4016 is_pinned: Some(true),
4017 is_direct: Some(is_requested),
4018 resolved_package: Some(Box::new(resolved)),
4019 extra_data: None,
4020 });
4021 }
4022 }
4023
4024 if let Some(mut main_pkg) = main_package {
4025 main_pkg.dependencies = dependencies;
4026 main_pkg
4027 } else {
4028 default_package_data()
4029 }
4030}
4031
4032type IniSections = HashMap<String, HashMap<String, Vec<String>>>;
4033
4034fn extract_from_setup_cfg(path: &Path) -> PackageData {
4035 let content = match read_file_to_string(path) {
4036 Ok(content) => content,
4037 Err(e) => {
4038 warn!("Failed to read setup.cfg at {:?}: {}", path, e);
4039 return default_package_data();
4040 }
4041 };
4042
4043 let sections = parse_setup_cfg(&content);
4044 let name = get_ini_value(§ions, "metadata", "name");
4045 let version = get_ini_value(§ions, "metadata", "version");
4046 let description = get_ini_value(§ions, "metadata", "description");
4047 let author = get_ini_value(§ions, "metadata", "author");
4048 let author_email = get_ini_value(§ions, "metadata", "author_email");
4049 let maintainer = get_ini_value(§ions, "metadata", "maintainer");
4050 let maintainer_email = get_ini_value(§ions, "metadata", "maintainer_email");
4051 let license = get_ini_value(§ions, "metadata", "license");
4052 let mut homepage_url = get_ini_value(§ions, "metadata", "url");
4053 let classifiers = get_ini_values(§ions, "metadata", "classifiers");
4054 let keywords = parse_setup_cfg_keywords(get_ini_value(§ions, "metadata", "keywords"));
4055 let python_requires = get_ini_value(§ions, "options", "python_requires");
4056 let parsed_project_urls =
4057 parse_setup_cfg_project_urls(&get_ini_values(§ions, "metadata", "project_urls"));
4058 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
4059 let mut extra_data = HashMap::new();
4060
4061 let mut parties = Vec::new();
4062 if author.is_some() || author_email.is_some() {
4063 parties.push(Party {
4064 r#type: Some("person".to_string()),
4065 role: Some("author".to_string()),
4066 name: author,
4067 email: author_email,
4068 url: None,
4069 organization: None,
4070 organization_url: None,
4071 timezone: None,
4072 });
4073 }
4074
4075 if maintainer.is_some() || maintainer_email.is_some() {
4076 parties.push(Party {
4077 r#type: Some("person".to_string()),
4078 role: Some("maintainer".to_string()),
4079 name: maintainer,
4080 email: maintainer_email,
4081 url: None,
4082 organization: None,
4083 organization_url: None,
4084 timezone: None,
4085 });
4086 }
4087
4088 let declared_license_expression = None;
4089 let declared_license_expression_spdx = None;
4090 let license_detections = Vec::new();
4091 let extracted_license_statement = license.clone();
4092
4093 let dependencies = extract_setup_cfg_dependencies(§ions);
4094
4095 if let Some(value) = python_requires {
4096 extra_data.insert(
4097 "python_requires".to_string(),
4098 serde_json::Value::String(value),
4099 );
4100 }
4101
4102 apply_project_url_mappings(
4103 &parsed_project_urls,
4104 &mut homepage_url,
4105 &mut bug_tracking_url,
4106 &mut code_view_url,
4107 &mut vcs_url,
4108 &mut extra_data,
4109 );
4110
4111 let extra_data = if extra_data.is_empty() {
4112 None
4113 } else {
4114 Some(extra_data)
4115 };
4116
4117 let purl = name.as_ref().and_then(|n| {
4118 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
4119 if let Some(v) = &version {
4120 package_url.with_version(v).ok()?;
4121 }
4122 Some(package_url.to_string())
4123 });
4124
4125 PackageData {
4126 package_type: Some(PythonParser::PACKAGE_TYPE),
4127 namespace: None,
4128 name,
4129 version,
4130 qualifiers: None,
4131 subpath: None,
4132 primary_language: Some("Python".to_string()),
4133 description,
4134 release_date: None,
4135 parties,
4136 keywords,
4137 homepage_url,
4138 download_url: None,
4139 size: None,
4140 sha1: None,
4141 md5: None,
4142 sha256: None,
4143 sha512: None,
4144 bug_tracking_url,
4145 code_view_url,
4146 vcs_url,
4147 copyright: None,
4148 holder: None,
4149 declared_license_expression,
4150 declared_license_expression_spdx,
4151 license_detections,
4152 other_license_expression: None,
4153 other_license_expression_spdx: None,
4154 other_license_detections: Vec::new(),
4155 extracted_license_statement,
4156 notice_text: None,
4157 source_packages: Vec::new(),
4158 file_references: Vec::new(),
4159 is_private: has_private_classifier(&classifiers),
4160 is_virtual: false,
4161 extra_data,
4162 dependencies,
4163 repository_homepage_url: None,
4164 repository_download_url: None,
4165 api_data_url: None,
4166 datasource_id: Some(DatasourceId::PypiSetupCfg),
4167 purl,
4168 }
4169}
4170
4171fn parse_setup_cfg_keywords(value: Option<String>) -> Vec<String> {
4172 let Some(keywords) = value else {
4173 return Vec::new();
4174 };
4175
4176 keywords
4177 .split(',')
4178 .map(str::trim)
4179 .filter(|keyword| !keyword.is_empty())
4180 .map(ToOwned::to_owned)
4181 .collect()
4182}
4183
4184fn parse_setup_cfg_project_urls(entries: &[String]) -> Vec<(String, String)> {
4185 entries
4186 .iter()
4187 .filter_map(|entry| {
4188 let (label, url) = entry.split_once('=')?;
4189 let label = label.trim();
4190 let url = url.trim();
4191 if label.is_empty() || url.is_empty() {
4192 None
4193 } else {
4194 Some((label.to_string(), url.to_string()))
4195 }
4196 })
4197 .collect()
4198}
4199
4200fn apply_project_url_mappings(
4201 parsed_urls: &[(String, String)],
4202 homepage_url: &mut Option<String>,
4203 bug_tracking_url: &mut Option<String>,
4204 code_view_url: &mut Option<String>,
4205 vcs_url: &mut Option<String>,
4206 extra_data: &mut HashMap<String, serde_json::Value>,
4207) {
4208 for (label, url) in parsed_urls {
4209 let label_lower = label.to_lowercase();
4210
4211 if bug_tracking_url.is_none()
4212 && matches!(
4213 label_lower.as_str(),
4214 "tracker"
4215 | "bug reports"
4216 | "bug tracker"
4217 | "issues"
4218 | "issue tracker"
4219 | "github: issues"
4220 )
4221 {
4222 *bug_tracking_url = Some(url.clone());
4223 } else if code_view_url.is_none()
4224 && matches!(label_lower.as_str(), "source" | "source code" | "code")
4225 {
4226 *code_view_url = Some(url.clone());
4227 } else if vcs_url.is_none()
4228 && matches!(
4229 label_lower.as_str(),
4230 "github" | "gitlab" | "github: repo" | "repository"
4231 )
4232 {
4233 *vcs_url = Some(url.clone());
4234 } else if homepage_url.is_none()
4235 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
4236 {
4237 *homepage_url = Some(url.clone());
4238 } else if label_lower == "changelog" {
4239 extra_data.insert(
4240 "changelog_url".to_string(),
4241 serde_json::Value::String(url.clone()),
4242 );
4243 }
4244 }
4245
4246 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
4247 .iter()
4248 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
4249 .collect();
4250
4251 if !project_urls_json.is_empty() {
4252 extra_data.insert(
4253 "project_urls".to_string(),
4254 serde_json::Value::Object(project_urls_json),
4255 );
4256 }
4257}
4258
4259fn parse_setup_cfg(content: &str) -> IniSections {
4260 let mut sections: IniSections = HashMap::new();
4261 let mut current_section: Option<String> = None;
4262 let mut current_key: Option<String> = None;
4263
4264 for raw_line in content.lines() {
4265 let line = raw_line.trim_end_matches('\r');
4266 let trimmed = line.trim();
4267 if trimmed.is_empty() {
4268 continue;
4269 }
4270
4271 let stripped = line.trim_start();
4272 if stripped.starts_with('#') || stripped.starts_with(';') {
4273 continue;
4274 }
4275
4276 if stripped.starts_with('[') && stripped.ends_with(']') {
4277 let section_name = stripped
4278 .trim_start_matches('[')
4279 .trim_end_matches(']')
4280 .trim()
4281 .to_ascii_lowercase();
4282 current_section = if section_name.is_empty() {
4283 None
4284 } else {
4285 Some(section_name)
4286 };
4287 current_key = None;
4288 continue;
4289 }
4290
4291 if (line.starts_with(' ') || line.starts_with('\t')) && current_key.is_some() {
4292 if let (Some(section), Some(key)) = (current_section.as_ref(), current_key.as_ref()) {
4293 let value = stripped.trim();
4294 if !value.is_empty() {
4295 sections
4296 .entry(section.clone())
4297 .or_default()
4298 .entry(key.clone())
4299 .or_default()
4300 .push(value.to_string());
4301 }
4302 }
4303 continue;
4304 }
4305
4306 if let Some((key, value)) = stripped.split_once('=')
4307 && let Some(section) = current_section.as_ref()
4308 {
4309 let key_name = key.trim().to_ascii_lowercase();
4310 let value_trimmed = value.trim();
4311 let entry = sections
4312 .entry(section.clone())
4313 .or_default()
4314 .entry(key_name.clone())
4315 .or_default();
4316 if !value_trimmed.is_empty() {
4317 entry.push(value_trimmed.to_string());
4318 }
4319 current_key = Some(key_name);
4320 }
4321 }
4322
4323 sections
4324}
4325
4326fn get_ini_value(sections: &IniSections, section: &str, key: &str) -> Option<String> {
4327 sections
4328 .get(§ion.to_ascii_lowercase())
4329 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4330 .and_then(|entries| entries.first())
4331 .map(|value| value.trim().to_string())
4332}
4333
4334fn get_ini_values(sections: &IniSections, section: &str, key: &str) -> Vec<String> {
4335 sections
4336 .get(§ion.to_ascii_lowercase())
4337 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4338 .cloned()
4339 .unwrap_or_default()
4340}
4341
4342fn extract_setup_cfg_dependencies(sections: &IniSections) -> Vec<Dependency> {
4343 let mut dependencies = Vec::new();
4344
4345 for (sub_section, scope) in [
4346 ("install_requires", "install"),
4347 ("tests_require", "test"),
4348 ("setup_requires", "setup"),
4349 ] {
4350 let reqs = get_ini_values(sections, "options", sub_section);
4351 dependencies.extend(parse_setup_cfg_requirements(&reqs, scope, false));
4352 }
4353
4354 if let Some(extras) = sections.get("options.extras_require") {
4355 let mut extra_items: Vec<_> = extras.iter().collect();
4356 extra_items.sort_by_key(|(name, _)| *name);
4357 for (extra_name, reqs) in extra_items {
4358 dependencies.extend(parse_setup_cfg_requirements(reqs, extra_name, true));
4359 }
4360 }
4361
4362 dependencies
4363}
4364
4365fn parse_setup_cfg_requirements(
4366 reqs: &[String],
4367 scope: &str,
4368 is_optional: bool,
4369) -> Vec<Dependency> {
4370 reqs.iter()
4371 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
4372 .collect()
4373}
4374
4375fn build_setup_cfg_dependency(req: &str, scope: &str, is_optional: bool) -> Option<Dependency> {
4376 let trimmed = req.trim();
4377 if trimmed.is_empty() || trimmed.starts_with('#') {
4378 return None;
4379 }
4380
4381 let name = extract_setup_cfg_dependency_name(trimmed)?;
4382 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4383
4384 Some(Dependency {
4385 purl: Some(purl.to_string()),
4386 extracted_requirement: Some(normalize_setup_cfg_requirement(trimmed)),
4387 scope: Some(scope.to_string()),
4388 is_runtime: Some(true),
4389 is_optional: Some(is_optional),
4390 is_pinned: Some(false),
4391 is_direct: Some(true),
4392 resolved_package: None,
4393 extra_data: None,
4394 })
4395}
4396
4397fn extract_setup_cfg_dependency_name(req: &str) -> Option<String> {
4398 let trimmed = req.trim();
4399 if trimmed.is_empty() {
4400 return None;
4401 }
4402
4403 let end = trimmed
4404 .find(|c: char| c.is_whitespace() || matches!(c, '<' | '>' | '=' | '!' | '~' | ';' | '['))
4405 .unwrap_or(trimmed.len());
4406 let name = trimmed[..end].trim();
4407 if name.is_empty() {
4408 None
4409 } else {
4410 Some(name.to_string())
4411 }
4412}
4413
4414fn normalize_setup_cfg_requirement(req: &str) -> String {
4415 req.chars().filter(|c| !c.is_whitespace()).collect()
4416}
4417
4418fn extract_setup_value(content: &str, key: &str) -> Option<String> {
4419 let patterns = vec![
4420 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
4429
4430 for pattern in patterns {
4431 if let Some(start_idx) = content.find(&pattern) {
4432 let value_start = start_idx + pattern.len();
4433 let remaining = &content[value_start..];
4434
4435 if let Some(end_idx) = remaining.find(['"', '\'']) {
4436 return Some(remaining[..end_idx].to_string());
4437 }
4438 }
4439 }
4440
4441 None
4442}
4443
4444fn extract_setup_py_dependencies(content: &str) -> Vec<Dependency> {
4445 let mut dependencies = Vec::new();
4446
4447 if let Some(tests_deps) = extract_tests_require(content) {
4448 dependencies.extend(tests_deps);
4449 }
4450
4451 if let Some(extras_deps) = extract_extras_require(content) {
4452 dependencies.extend(extras_deps);
4453 }
4454
4455 dependencies
4456}
4457
4458fn extract_tests_require(content: &str) -> Option<Vec<Dependency>> {
4459 let pattern = r"tests_require\s*=\s*\[([^\]]+)\]";
4460 let re = Regex::new(pattern).ok()?;
4461 let captures = re.captures(content)?;
4462 let deps_str = captures.get(1)?.as_str();
4463
4464 let deps = parse_setup_py_dep_list(deps_str, "test", true);
4465 if deps.is_empty() { None } else { Some(deps) }
4466}
4467
4468fn extract_extras_require(content: &str) -> Option<Vec<Dependency>> {
4469 let pattern = r"extras_require\s*=\s*\{([^}]+)\}";
4470 let re = Regex::new(pattern).ok()?;
4471 let captures = re.captures(content)?;
4472 let dict_content = captures.get(1)?.as_str();
4473
4474 let mut all_deps = Vec::new();
4475
4476 let entry_pattern = r#"['"]([^'"]+)['"]\s*:\s*\[([^\]]+)\]"#;
4477 let entry_re = Regex::new(entry_pattern).ok()?;
4478
4479 for entry_cap in entry_re.captures_iter(dict_content) {
4480 if let (Some(extra_name), Some(deps_str)) = (entry_cap.get(1), entry_cap.get(2)) {
4481 let deps = parse_setup_py_dep_list(deps_str.as_str(), extra_name.as_str(), true);
4482 all_deps.extend(deps);
4483 }
4484 }
4485
4486 if all_deps.is_empty() {
4487 None
4488 } else {
4489 Some(all_deps)
4490 }
4491}
4492
4493fn parse_setup_py_dep_list(deps_str: &str, scope: &str, is_optional: bool) -> Vec<Dependency> {
4494 let dep_pattern = r#"['"]([^'"]+)['"]"#;
4495 let re = match Regex::new(dep_pattern) {
4496 Ok(r) => r,
4497 Err(_) => return Vec::new(),
4498 };
4499
4500 re.captures_iter(deps_str)
4501 .filter_map(|cap| {
4502 let dep_str = cap.get(1)?.as_str().trim();
4503 if dep_str.is_empty() {
4504 return None;
4505 }
4506
4507 let name = extract_setup_cfg_dependency_name(dep_str)?;
4508 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4509
4510 Some(Dependency {
4511 purl: Some(purl.to_string()),
4512 extracted_requirement: Some(dep_str.to_string()),
4513 scope: Some(scope.to_string()),
4514 is_runtime: Some(true),
4515 is_optional: Some(is_optional),
4516 is_pinned: Some(false),
4517 is_direct: Some(true),
4518 resolved_package: None,
4519 extra_data: None,
4520 })
4521 })
4522 .collect()
4523}
4524
4525pub(crate) fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
4527 let content = read_file_to_string(path).map_err(|e| e.to_string())?;
4528 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
4529}
4530
4531fn calculate_file_checksums(path: &Path) -> (Option<u64>, Option<String>) {
4542 let mut file = match File::open(path) {
4543 Ok(f) => f,
4544 Err(_) => return (None, None),
4545 };
4546
4547 let metadata = match file.metadata() {
4548 Ok(m) => m,
4549 Err(_) => return (None, None),
4550 };
4551 let size = metadata.len();
4552
4553 let mut hasher = Sha256::new();
4554 let mut buffer = vec![0; 8192];
4555
4556 loop {
4557 match file.read(&mut buffer) {
4558 Ok(0) => break,
4559 Ok(n) => hasher.update(&buffer[..n]),
4560 Err(_) => return (Some(size), None),
4561 }
4562 }
4563
4564 let hash = format!("{:x}", hasher.finalize());
4565 (Some(size), Some(hash))
4566}
4567
4568fn default_package_data() -> PackageData {
4569 PackageData::default()
4570}
4571
4572crate::register_parser!(
4573 "Python package manifests (pyproject.toml, setup.py, setup.cfg, pypi.json, PKG-INFO, METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
4574 &[
4575 "**/pyproject.toml",
4576 "**/setup.py",
4577 "**/setup.cfg",
4578 "**/pypi.json",
4579 "**/PKG-INFO",
4580 "**/METADATA",
4581 "**/origin.json",
4582 "**/*.tar.gz",
4583 "**/*.tgz",
4584 "**/*.tar.bz2",
4585 "**/*.tar.xz",
4586 "**/*.zip",
4587 "**/*.whl",
4588 "**/*.egg"
4589 ],
4590 "pypi",
4591 "Python",
4592 Some("https://packaging.python.org/"),
4593);