1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parsers::utils::{read_file_to_string, split_name_email};
36use base64::Engine;
37use base64::engine::general_purpose::URL_SAFE_NO_PAD;
38use bzip2::read::BzDecoder;
39use csv::ReaderBuilder;
40use flate2::read::GzDecoder;
41use liblzma::read::XzDecoder;
42use log::warn;
43use packageurl::PackageUrl;
44use regex::Regex;
45use rustpython_parser::{Parse, ast};
46use serde_json::{Map as JsonMap, Value as JsonValue};
47use sha2::{Digest, Sha256};
48use std::collections::{HashMap, HashSet};
49use std::fs::File;
50use std::io::Read;
51use std::path::{Component, Path, PathBuf};
52use tar::Archive;
53use toml::Value as TomlValue;
54use toml::map::Map as TomlMap;
55use zip::ZipArchive;
56
57use super::PackageParser;
58use super::license_normalization::normalize_spdx_declared_license;
59
60const FIELD_PROJECT: &str = "project";
62const FIELD_NAME: &str = "name";
63const FIELD_VERSION: &str = "version";
64const FIELD_LICENSE: &str = "license";
65const FIELD_AUTHORS: &str = "authors";
66const FIELD_MAINTAINERS: &str = "maintainers";
67const FIELD_URLS: &str = "urls";
68const FIELD_HOMEPAGE: &str = "homepage";
69const FIELD_REPOSITORY: &str = "repository";
70const FIELD_DEPENDENCIES: &str = "dependencies";
71const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
72const FIELD_DEPENDENCY_GROUPS: &str = "dependency-groups";
73const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
74const MAX_SETUP_PY_BYTES: usize = 1_048_576;
75const MAX_SETUP_PY_AST_NODES: usize = 10_000;
76const MAX_SETUP_PY_AST_DEPTH: usize = 50;
77const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct PythonParser;
91
92#[derive(Clone, Copy, Debug)]
93enum PythonSdistArchiveFormat {
94 TarGz,
95 Tgz,
96 TarBz2,
97 TarXz,
98 Zip,
99}
100
101#[derive(Clone, Debug)]
102struct ValidatedZipEntry {
103 index: usize,
104 name: String,
105}
106
107impl PackageParser for PythonParser {
108 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
109
110 fn extract_packages(path: &Path) -> Vec<PackageData> {
111 vec![
112 if path.file_name().unwrap_or_default() == "pyproject.toml" {
113 extract_from_pyproject_toml(path)
114 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
115 extract_from_setup_cfg(path)
116 } else if path.file_name().unwrap_or_default() == "setup.py" {
117 extract_from_setup_py(path)
118 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
119 extract_from_rfc822_metadata(path, DatasourceId::PypiSdistPkginfo)
120 } else if path.file_name().unwrap_or_default() == "METADATA" {
121 extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
122 } else if is_pip_cache_origin_json(path) {
123 extract_from_pip_origin_json(path)
124 } else if path.file_name().unwrap_or_default() == "pypi.json" {
125 extract_from_pypi_json(path)
126 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
127 extract_from_pip_inspect(path)
128 } else if is_python_sdist_archive_path(path) {
129 extract_from_sdist_archive(path)
130 } else if path
131 .extension()
132 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
133 {
134 extract_from_wheel_archive(path)
135 } else if path
136 .extension()
137 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
138 {
139 extract_from_egg_archive(path)
140 } else {
141 default_package_data()
142 },
143 ]
144 }
145
146 fn is_match(path: &Path) -> bool {
147 if let Some(filename) = path.file_name()
148 && (filename == "pyproject.toml"
149 || filename == "setup.cfg"
150 || filename == "setup.py"
151 || filename == "PKG-INFO"
152 || filename == "METADATA"
153 || filename == "pypi.json"
154 || filename == "pip-inspect.deplock"
155 || is_pip_cache_origin_json(path))
156 {
157 return true;
158 }
159
160 if let Some(extension) = path.extension() {
161 let ext = extension.to_string_lossy().to_lowercase();
162 if ext == "whl" || ext == "egg" || is_python_sdist_archive_path(path) {
163 return true;
164 }
165 }
166
167 false
168 }
169}
170
171#[derive(Debug, Clone)]
172struct InstalledWheelMetadata {
173 wheel_tags: Vec<String>,
174 wheel_version: Option<String>,
175 wheel_generator: Option<String>,
176 root_is_purelib: Option<bool>,
177 compressed_tag: Option<String>,
178}
179
180fn merge_sibling_wheel_metadata(path: &Path, package_data: &mut PackageData) {
181 let Some(parent) = path.parent() else {
182 return;
183 };
184
185 if !parent
186 .file_name()
187 .and_then(|name| name.to_str())
188 .is_some_and(|name| name.ends_with(".dist-info"))
189 {
190 return;
191 }
192
193 let wheel_path = parent.join("WHEEL");
194 if !wheel_path.exists() {
195 return;
196 }
197
198 let Ok(content) = read_file_to_string(&wheel_path) else {
199 warn!("Failed to read sibling WHEEL file at {:?}", wheel_path);
200 return;
201 };
202
203 let Some(wheel_metadata) = parse_installed_wheel_metadata(&content) else {
204 return;
205 };
206
207 apply_installed_wheel_metadata(package_data, &wheel_metadata);
208}
209
210fn parse_installed_wheel_metadata(content: &str) -> Option<InstalledWheelMetadata> {
211 use super::rfc822::{get_header_all, get_header_first};
212
213 let metadata = super::rfc822::parse_rfc822_content(content);
214 let wheel_tags = get_header_all(&metadata.headers, "tag");
215 if wheel_tags.is_empty() {
216 return None;
217 }
218
219 let wheel_version = get_header_first(&metadata.headers, "wheel-version");
220 let wheel_generator = get_header_first(&metadata.headers, "generator");
221 let root_is_purelib =
222 get_header_first(&metadata.headers, "root-is-purelib").and_then(|value| {
223 match value.to_ascii_lowercase().as_str() {
224 "true" => Some(true),
225 "false" => Some(false),
226 _ => None,
227 }
228 });
229
230 let compressed_tag = compress_wheel_tags(&wheel_tags);
231
232 Some(InstalledWheelMetadata {
233 wheel_tags,
234 wheel_version,
235 wheel_generator,
236 root_is_purelib,
237 compressed_tag,
238 })
239}
240
241fn compress_wheel_tags(tags: &[String]) -> Option<String> {
242 if tags.is_empty() {
243 return None;
244 }
245
246 if tags.len() == 1 {
247 return Some(tags[0].clone());
248 }
249
250 let mut python_tags = Vec::new();
251 let mut abi_tag: Option<&str> = None;
252 let mut platform_tag: Option<&str> = None;
253
254 for tag in tags {
255 let mut parts = tag.splitn(3, '-');
256 let python = parts.next()?;
257 let abi = parts.next()?;
258 let platform = parts.next()?;
259
260 if abi_tag.is_some_and(|existing| existing != abi)
261 || platform_tag.is_some_and(|existing| existing != platform)
262 {
263 return None;
264 }
265
266 abi_tag = Some(abi);
267 platform_tag = Some(platform);
268 python_tags.push(python.to_string());
269 }
270
271 Some(format!(
272 "{}-{}-{}",
273 python_tags.join("."),
274 abi_tag?,
275 platform_tag?
276 ))
277}
278
279fn apply_installed_wheel_metadata(
280 package_data: &mut PackageData,
281 wheel_metadata: &InstalledWheelMetadata,
282) {
283 let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
284 extra_data.insert(
285 "wheel_tags".to_string(),
286 JsonValue::Array(
287 wheel_metadata
288 .wheel_tags
289 .iter()
290 .cloned()
291 .map(JsonValue::String)
292 .collect(),
293 ),
294 );
295
296 if let Some(wheel_version) = &wheel_metadata.wheel_version {
297 extra_data.insert(
298 "wheel_version".to_string(),
299 JsonValue::String(wheel_version.clone()),
300 );
301 }
302
303 if let Some(wheel_generator) = &wheel_metadata.wheel_generator {
304 extra_data.insert(
305 "wheel_generator".to_string(),
306 JsonValue::String(wheel_generator.clone()),
307 );
308 }
309
310 if let Some(root_is_purelib) = wheel_metadata.root_is_purelib {
311 extra_data.insert(
312 "root_is_purelib".to_string(),
313 JsonValue::Bool(root_is_purelib),
314 );
315 }
316
317 if let (Some(name), Some(version), Some(extension)) = (
318 package_data.name.as_deref(),
319 package_data.version.as_deref(),
320 wheel_metadata.compressed_tag.as_deref(),
321 ) {
322 package_data.purl = build_pypi_purl_with_extension(name, Some(version), extension);
323 }
324}
325
326fn is_pip_cache_origin_json(path: &Path) -> bool {
327 path.file_name().and_then(|name| name.to_str()) == Some("origin.json")
328 && path.ancestors().skip(1).any(|ancestor| {
329 ancestor
330 .file_name()
331 .and_then(|name| name.to_str())
332 .is_some_and(|name| name.eq_ignore_ascii_case("wheels"))
333 })
334}
335
336fn extract_from_pip_origin_json(path: &Path) -> PackageData {
337 let content = match read_file_to_string(path) {
338 Ok(content) => content,
339 Err(e) => {
340 warn!("Failed to read pip cache origin.json at {:?}: {}", path, e);
341 return default_package_data();
342 }
343 };
344
345 let root: JsonValue = match serde_json::from_str(&content) {
346 Ok(root) => root,
347 Err(e) => {
348 warn!("Failed to parse pip cache origin.json at {:?}: {}", path, e);
349 return default_package_data();
350 }
351 };
352
353 let Some(download_url) = root.get("url").and_then(|value| value.as_str()) else {
354 warn!("No url found in pip cache origin.json at {:?}", path);
355 return default_package_data();
356 };
357
358 let sibling_wheel = find_sibling_cached_wheel(path);
359 let name_version = parse_name_version_from_origin_url(download_url).or_else(|| {
360 sibling_wheel
361 .as_ref()
362 .map(|wheel_info| (wheel_info.name.clone(), wheel_info.version.clone()))
363 });
364
365 let Some((name, version)) = name_version else {
366 warn!(
367 "Failed to infer package name/version from pip cache origin.json at {:?}",
368 path
369 );
370 return default_package_data();
371 };
372
373 let (repository_homepage_url, repository_download_url, api_data_url, plain_purl) =
374 build_pypi_urls(Some(&name), Some(&version));
375 let purl = sibling_wheel
376 .as_ref()
377 .and_then(|wheel_info| build_wheel_purl(Some(&name), Some(&version), wheel_info))
378 .or(plain_purl);
379
380 PackageData {
381 package_type: Some(PythonParser::PACKAGE_TYPE),
382 primary_language: Some("Python".to_string()),
383 name: Some(name),
384 version: Some(version),
385 datasource_id: Some(DatasourceId::PypiPipOriginJson),
386 download_url: Some(download_url.to_string()),
387 sha256: extract_sha256_from_origin_json(&root),
388 repository_homepage_url,
389 repository_download_url,
390 api_data_url,
391 purl,
392 ..Default::default()
393 }
394}
395
396fn find_sibling_cached_wheel(path: &Path) -> Option<WheelInfo> {
397 let parent = path.parent()?;
398 let entries = parent.read_dir().ok()?;
399
400 for entry in entries.flatten() {
401 let sibling_path = entry.path();
402 if sibling_path
403 .extension()
404 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
405 && let Some(wheel_info) = parse_wheel_filename(&sibling_path)
406 {
407 return Some(wheel_info);
408 }
409 }
410
411 None
412}
413
414fn parse_name_version_from_origin_url(url: &str) -> Option<(String, String)> {
415 let file_name = url.rsplit('/').next()?;
416
417 if file_name.ends_with(".whl") {
418 return parse_wheel_filename(Path::new(file_name))
419 .map(|wheel_info| (wheel_info.name, wheel_info.version));
420 }
421
422 let stem = strip_python_archive_extension(file_name)?;
423 let (name, version) = stem.rsplit_once('-')?;
424 if name.is_empty() || version.is_empty() {
425 return None;
426 }
427
428 Some((name.replace('_', "-"), version.to_string()))
429}
430
431fn strip_python_archive_extension(file_name: &str) -> Option<&str> {
432 [".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".zip", ".whl"]
433 .iter()
434 .find_map(|suffix| file_name.strip_suffix(suffix))
435}
436
437fn extract_sha256_from_origin_json(root: &JsonValue) -> Option<String> {
438 root.pointer("/archive_info/hashes/sha256")
439 .and_then(|value| value.as_str())
440 .map(ToOwned::to_owned)
441 .or_else(|| {
442 root.pointer("/archive_info/hash")
443 .and_then(|value| value.as_str())
444 .and_then(normalize_origin_hash)
445 })
446}
447
448fn normalize_origin_hash(hash: &str) -> Option<String> {
449 if let Some(value) = hash.strip_prefix("sha256=") {
450 return Some(value.to_string());
451 }
452 if let Some(value) = hash.strip_prefix("sha256:") {
453 return Some(value.to_string());
454 }
455 if hash.len() == 64 && hash.chars().all(|ch| ch.is_ascii_hexdigit()) {
456 return Some(hash.to_string());
457 }
458 None
459}
460
461fn extract_from_rfc822_metadata(path: &Path, datasource_id: DatasourceId) -> PackageData {
462 let content = match read_file_to_string(path) {
463 Ok(content) => content,
464 Err(e) => {
465 warn!("Failed to read metadata at {:?}: {}", path, e);
466 return default_package_data();
467 }
468 };
469
470 let metadata = super::rfc822::parse_rfc822_content(&content);
471 let mut package_data = build_package_data_from_rfc822(&metadata, datasource_id);
472 merge_sibling_metadata_dependencies(path, &mut package_data);
473 merge_sibling_metadata_file_references(path, &mut package_data);
474 if datasource_id == DatasourceId::PypiWheelMetadata {
475 merge_sibling_wheel_metadata(path, &mut package_data);
476 }
477 package_data
478}
479
480fn merge_sibling_metadata_dependencies(path: &Path, package_data: &mut PackageData) {
481 let mut extra_dependencies = Vec::new();
482
483 if let Some(parent) = path.parent() {
484 let direct_requires = parent.join("requires.txt");
485 if direct_requires.exists()
486 && let Ok(content) = read_file_to_string(&direct_requires)
487 {
488 extra_dependencies.extend(parse_requires_txt(&content));
489 }
490
491 let sibling_egg_info_requires = parent
492 .read_dir()
493 .ok()
494 .into_iter()
495 .flatten()
496 .flatten()
497 .find_map(|entry| {
498 let child_path = entry.path();
499 if child_path.is_dir()
500 && child_path
501 .file_name()
502 .and_then(|name| name.to_str())
503 .is_some_and(|name| name.ends_with(".egg-info"))
504 {
505 let requires = child_path.join("requires.txt");
506 requires.exists().then_some(requires)
507 } else {
508 None
509 }
510 });
511
512 if let Some(requires_path) = sibling_egg_info_requires
513 && let Ok(content) = read_file_to_string(&requires_path)
514 {
515 extra_dependencies.extend(parse_requires_txt(&content));
516 }
517 }
518
519 for dependency in extra_dependencies {
520 if !package_data.dependencies.iter().any(|existing| {
521 existing.purl == dependency.purl
522 && existing.scope == dependency.scope
523 && existing.extracted_requirement == dependency.extracted_requirement
524 && existing.extra_data == dependency.extra_data
525 }) {
526 package_data.dependencies.push(dependency);
527 }
528 }
529}
530
531fn merge_sibling_metadata_file_references(path: &Path, package_data: &mut PackageData) {
532 let mut extra_refs = Vec::new();
533
534 if let Some(parent) = path.parent() {
535 let record_path = parent.join("RECORD");
536 if record_path.exists()
537 && let Ok(content) = read_file_to_string(&record_path)
538 {
539 extra_refs.extend(parse_record_csv(&content));
540 }
541
542 let installed_files_path = parent.join("installed-files.txt");
543 if installed_files_path.exists()
544 && let Ok(content) = read_file_to_string(&installed_files_path)
545 {
546 extra_refs.extend(parse_installed_files_txt(&content));
547 }
548
549 let sources_path = parent.join("SOURCES.txt");
550 if sources_path.exists()
551 && let Ok(content) = read_file_to_string(&sources_path)
552 {
553 extra_refs.extend(parse_sources_txt(&content));
554 }
555 }
556
557 for file_ref in extra_refs {
558 if !package_data
559 .file_references
560 .iter()
561 .any(|existing| existing.path == file_ref.path)
562 {
563 package_data.file_references.push(file_ref);
564 }
565 }
566}
567
568fn collect_validated_zip_entries<R: Read + std::io::Seek>(
569 archive: &mut ZipArchive<R>,
570 path: &Path,
571 archive_type: &str,
572) -> Result<Vec<ValidatedZipEntry>, String> {
573 let mut total_extracted = 0u64;
574 let mut entries = Vec::new();
575
576 for i in 0..archive.len() {
577 if let Ok(file) = archive.by_index_raw(i) {
578 let compressed_size = file.compressed_size();
579 let uncompressed_size = file.size();
580 let Some(entry_name) = normalize_archive_entry_path(file.name()) else {
581 warn!(
582 "Skipping unsafe path in {} {:?}: {}",
583 archive_type,
584 path,
585 file.name()
586 );
587 continue;
588 };
589
590 if compressed_size > 0 {
591 let ratio = uncompressed_size as f64 / compressed_size as f64;
592 if ratio > MAX_COMPRESSION_RATIO {
593 warn!(
594 "Suspicious compression ratio in {} {:?}: {:.2}:1",
595 archive_type, path, ratio
596 );
597 continue;
598 }
599 }
600
601 if uncompressed_size > MAX_FILE_SIZE {
602 warn!(
603 "File too large in {} {:?}: {} bytes (limit: {} bytes)",
604 archive_type, path, uncompressed_size, MAX_FILE_SIZE
605 );
606 continue;
607 }
608
609 total_extracted += uncompressed_size;
610 if total_extracted > MAX_ARCHIVE_SIZE {
611 let msg = format!(
612 "Total extracted size exceeds limit for {} {:?}",
613 archive_type, path
614 );
615 warn!("{}", msg);
616 return Err(msg);
617 }
618
619 entries.push(ValidatedZipEntry {
620 index: i,
621 name: entry_name,
622 });
623 }
624 }
625
626 Ok(entries)
627}
628
629fn is_python_sdist_archive_path(path: &Path) -> bool {
630 detect_python_sdist_archive_format(path).is_some()
631}
632
633fn detect_python_sdist_archive_format(path: &Path) -> Option<PythonSdistArchiveFormat> {
634 let file_name = path.file_name()?.to_str()?.to_ascii_lowercase();
635
636 if !is_likely_python_sdist_filename(&file_name) {
637 return None;
638 }
639
640 if file_name.ends_with(".tar.gz") {
641 Some(PythonSdistArchiveFormat::TarGz)
642 } else if file_name.ends_with(".tgz") {
643 Some(PythonSdistArchiveFormat::Tgz)
644 } else if file_name.ends_with(".tar.bz2") {
645 Some(PythonSdistArchiveFormat::TarBz2)
646 } else if file_name.ends_with(".tar.xz") {
647 Some(PythonSdistArchiveFormat::TarXz)
648 } else if file_name.ends_with(".zip") {
649 Some(PythonSdistArchiveFormat::Zip)
650 } else {
651 None
652 }
653}
654
655fn is_likely_python_sdist_filename(file_name: &str) -> bool {
656 let Some(stem) = strip_python_archive_extension(file_name) else {
657 return false;
658 };
659
660 let Some((name, version)) = stem.rsplit_once('-') else {
661 return false;
662 };
663
664 !name.is_empty()
665 && !version.is_empty()
666 && version.chars().any(|ch| ch.is_ascii_digit())
667 && name
668 .chars()
669 .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
670}
671
672fn extract_from_sdist_archive(path: &Path) -> PackageData {
673 let metadata = match std::fs::metadata(path) {
674 Ok(m) => m,
675 Err(e) => {
676 warn!(
677 "Failed to read metadata for sdist archive {:?}: {}",
678 path, e
679 );
680 return default_package_data();
681 }
682 };
683
684 if metadata.len() > MAX_ARCHIVE_SIZE {
685 warn!(
686 "sdist archive too large: {} bytes (limit: {} bytes)",
687 metadata.len(),
688 MAX_ARCHIVE_SIZE
689 );
690 return default_package_data();
691 }
692
693 let Some(format) = detect_python_sdist_archive_format(path) else {
694 return default_package_data();
695 };
696
697 let mut package_data = match format {
698 PythonSdistArchiveFormat::TarGz | PythonSdistArchiveFormat::Tgz => {
699 let file = match File::open(path) {
700 Ok(file) => file,
701 Err(e) => {
702 warn!("Failed to open sdist archive {:?}: {}", path, e);
703 return default_package_data();
704 }
705 };
706 let decoder = GzDecoder::new(file);
707 extract_from_tar_sdist_archive(path, decoder, "tar.gz", metadata.len())
708 }
709 PythonSdistArchiveFormat::TarBz2 => {
710 let file = match File::open(path) {
711 Ok(file) => file,
712 Err(e) => {
713 warn!("Failed to open sdist archive {:?}: {}", path, e);
714 return default_package_data();
715 }
716 };
717 let decoder = BzDecoder::new(file);
718 extract_from_tar_sdist_archive(path, decoder, "tar.bz2", metadata.len())
719 }
720 PythonSdistArchiveFormat::TarXz => {
721 let file = match File::open(path) {
722 Ok(file) => file,
723 Err(e) => {
724 warn!("Failed to open sdist archive {:?}: {}", path, e);
725 return default_package_data();
726 }
727 };
728 let decoder = XzDecoder::new(file);
729 extract_from_tar_sdist_archive(path, decoder, "tar.xz", metadata.len())
730 }
731 PythonSdistArchiveFormat::Zip => extract_from_zip_sdist_archive(path),
732 };
733
734 if package_data.package_type.is_some() {
735 let (size, sha256) = calculate_file_checksums(path);
736 package_data.size = size;
737 package_data.sha256 = sha256;
738 }
739
740 package_data
741}
742
743fn extract_from_tar_sdist_archive<R: Read>(
744 path: &Path,
745 reader: R,
746 archive_type: &str,
747 compressed_size: u64,
748) -> PackageData {
749 let mut archive = Archive::new(reader);
750 let archive_entries = match archive.entries() {
751 Ok(entries) => entries,
752 Err(e) => {
753 warn!(
754 "Failed to read {} sdist archive {:?}: {}",
755 archive_type, path, e
756 );
757 return default_package_data();
758 }
759 };
760
761 let mut total_extracted = 0u64;
762 let mut entries = Vec::new();
763
764 for entry_result in archive_entries {
765 let mut entry = match entry_result {
766 Ok(entry) => entry,
767 Err(e) => {
768 warn!(
769 "Failed to read {} sdist entry from {:?}: {}",
770 archive_type, path, e
771 );
772 continue;
773 }
774 };
775
776 let entry_size = entry.size();
777 if entry_size > MAX_FILE_SIZE {
778 warn!(
779 "File too large in {} sdist {:?}: {} bytes (limit: {} bytes)",
780 archive_type, path, entry_size, MAX_FILE_SIZE
781 );
782 continue;
783 }
784
785 total_extracted += entry_size;
786 if total_extracted > MAX_ARCHIVE_SIZE {
787 warn!(
788 "Total extracted size exceeds limit for {} sdist {:?}",
789 archive_type, path
790 );
791 return default_package_data();
792 }
793
794 if compressed_size > 0 {
795 let ratio = total_extracted as f64 / compressed_size as f64;
796 if ratio > MAX_COMPRESSION_RATIO {
797 warn!(
798 "Suspicious compression ratio in {} sdist {:?}: {:.2}:1",
799 archive_type, path, ratio
800 );
801 return default_package_data();
802 }
803 }
804
805 let entry_path = match entry.path() {
806 Ok(path) => path.to_string_lossy().replace('\\', "/"),
807 Err(e) => {
808 warn!(
809 "Failed to get {} sdist entry path from {:?}: {}",
810 archive_type, path, e
811 );
812 continue;
813 }
814 };
815
816 let Some(entry_path) = normalize_archive_entry_path(&entry_path) else {
817 warn!("Skipping unsafe {} sdist path in {:?}", archive_type, path);
818 continue;
819 };
820
821 if !is_relevant_sdist_text_entry(&entry_path) {
822 continue;
823 }
824
825 if let Ok(content) = read_limited_utf8(
826 &mut entry,
827 MAX_FILE_SIZE,
828 &format!("{} entry {}", archive_type, entry_path),
829 ) {
830 entries.push((entry_path, content));
831 }
832 }
833
834 build_sdist_package_data(path, entries)
835}
836
837fn extract_from_zip_sdist_archive(path: &Path) -> PackageData {
838 let file = match File::open(path) {
839 Ok(file) => file,
840 Err(e) => {
841 warn!("Failed to open zip sdist archive {:?}: {}", path, e);
842 return default_package_data();
843 }
844 };
845
846 let mut archive = match ZipArchive::new(file) {
847 Ok(archive) => archive,
848 Err(e) => {
849 warn!("Failed to read zip sdist archive {:?}: {}", path, e);
850 return default_package_data();
851 }
852 };
853
854 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "sdist zip") {
855 Ok(entries) => entries,
856 Err(_) => return default_package_data(),
857 };
858
859 let mut entries = Vec::new();
860 for entry in validated_entries.iter() {
861 if !is_relevant_sdist_text_entry(&entry.name) {
862 continue;
863 }
864
865 if let Ok(content) = read_validated_zip_entry(&mut archive, entry, path, "sdist zip") {
866 entries.push((entry.name.clone(), content));
867 }
868 }
869
870 build_sdist_package_data(path, entries)
871}
872
873fn is_relevant_sdist_text_entry(entry_path: &str) -> bool {
874 entry_path.ends_with("/PKG-INFO")
875 || entry_path.ends_with("/requires.txt")
876 || entry_path.ends_with("/SOURCES.txt")
877}
878
879fn build_sdist_package_data(path: &Path, entries: Vec<(String, String)>) -> PackageData {
880 let Some((metadata_path, metadata_content)) = select_sdist_pkginfo_entry(path, &entries) else {
881 warn!("No PKG-INFO file found in sdist archive {:?}", path);
882 return default_package_data();
883 };
884
885 let mut package_data =
886 python_parse_rfc822_content(&metadata_content, DatasourceId::PypiSdistPkginfo);
887 merge_sdist_archive_dependencies(&entries, &metadata_path, &mut package_data);
888 merge_sdist_archive_file_references(&entries, &metadata_path, &mut package_data);
889 apply_sdist_name_version_fallback(path, &mut package_data);
890 package_data
891}
892
893fn select_sdist_pkginfo_entry(
894 archive_path: &Path,
895 entries: &[(String, String)],
896) -> Option<(String, String)> {
897 let expected_name = archive_path
898 .file_name()
899 .and_then(|name| name.to_str())
900 .and_then(strip_python_archive_extension)
901 .and_then(|stem| {
902 stem.rsplit_once('-')
903 .map(|(name, _)| normalize_python_package_name(name))
904 });
905
906 entries
907 .iter()
908 .filter(|(entry_path, _)| entry_path.ends_with("/PKG-INFO"))
909 .min_by_key(|(entry_path, content)| {
910 let components: Vec<_> = entry_path
911 .split('/')
912 .filter(|part| !part.is_empty())
913 .collect();
914 let metadata = super::rfc822::parse_rfc822_content(content);
915 let candidate_name = super::rfc822::get_header_first(&metadata.headers, "name")
916 .map(|name| normalize_python_package_name(&name));
917 let name_rank = if candidate_name == expected_name {
918 0
919 } else {
920 1
921 };
922 let kind_rank = if components.len() == 3
923 && components[1].ends_with(".egg-info")
924 && components[2] == "PKG-INFO"
925 {
926 0
927 } else if components.len() == 2 && components[1] == "PKG-INFO" {
928 1
929 } else if entry_path.ends_with(".egg-info/PKG-INFO") {
930 2
931 } else {
932 3
933 };
934
935 (name_rank, kind_rank, components.len(), entry_path.clone())
936 })
937 .map(|(entry_path, content)| (entry_path.clone(), content.clone()))
938}
939
940fn merge_sdist_archive_dependencies(
941 entries: &[(String, String)],
942 metadata_path: &str,
943 package_data: &mut PackageData,
944) {
945 let metadata_dir = metadata_path
946 .rsplit_once('/')
947 .map(|(dir, _)| dir)
948 .unwrap_or("");
949 let archive_root = metadata_path.split('/').next().unwrap_or("");
950 let matched_egg_info_dir =
951 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
952 let mut extra_dependencies = Vec::new();
953
954 for (entry_path, content) in entries {
955 let is_direct_requires =
956 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/requires.txt");
957 let is_egg_info_requires = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
958 entry_path == &format!("{archive_root}/{egg_info_dir}/requires.txt")
959 });
960
961 if is_direct_requires || is_egg_info_requires {
962 extra_dependencies.extend(parse_requires_txt(content));
963 }
964 }
965
966 for dependency in extra_dependencies {
967 if !package_data.dependencies.iter().any(|existing| {
968 existing.purl == dependency.purl
969 && existing.scope == dependency.scope
970 && existing.extracted_requirement == dependency.extracted_requirement
971 && existing.extra_data == dependency.extra_data
972 }) {
973 package_data.dependencies.push(dependency);
974 }
975 }
976}
977
978fn merge_sdist_archive_file_references(
979 entries: &[(String, String)],
980 metadata_path: &str,
981 package_data: &mut PackageData,
982) {
983 let metadata_dir = metadata_path
984 .rsplit_once('/')
985 .map(|(dir, _)| dir)
986 .unwrap_or("");
987 let archive_root = metadata_path.split('/').next().unwrap_or("");
988 let matched_egg_info_dir =
989 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
990 let mut extra_refs = Vec::new();
991
992 for (entry_path, content) in entries {
993 let is_direct_sources =
994 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/SOURCES.txt");
995 let is_egg_info_sources = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
996 entry_path == &format!("{archive_root}/{egg_info_dir}/SOURCES.txt")
997 });
998
999 if is_direct_sources || is_egg_info_sources {
1000 extra_refs.extend(parse_sources_txt(content));
1001 }
1002 }
1003
1004 for file_ref in extra_refs {
1005 if !package_data
1006 .file_references
1007 .iter()
1008 .any(|existing| existing.path == file_ref.path)
1009 {
1010 package_data.file_references.push(file_ref);
1011 }
1012 }
1013}
1014
1015fn select_matching_sdist_egg_info_dir(
1016 entries: &[(String, String)],
1017 archive_root: &str,
1018 package_name: Option<&str>,
1019) -> Option<String> {
1020 let normalized_package_name = package_name.map(normalize_python_package_name);
1021
1022 entries
1023 .iter()
1024 .filter_map(|(entry_path, _)| {
1025 let components: Vec<_> = entry_path
1026 .split('/')
1027 .filter(|part| !part.is_empty())
1028 .collect();
1029 if components.len() == 3
1030 && components[0] == archive_root
1031 && components[1].ends_with(".egg-info")
1032 {
1033 Some(components[1].to_string())
1034 } else {
1035 None
1036 }
1037 })
1038 .min_by_key(|egg_info_dir| {
1039 let normalized_dir_name =
1040 normalize_python_package_name(egg_info_dir.trim_end_matches(".egg-info"));
1041 let name_rank = if Some(normalized_dir_name.clone()) == normalized_package_name {
1042 0
1043 } else {
1044 1
1045 };
1046
1047 (name_rank, egg_info_dir.clone())
1048 })
1049}
1050
1051fn normalize_python_package_name(name: &str) -> String {
1052 name.to_ascii_lowercase().replace('_', "-")
1053}
1054
1055fn apply_sdist_name_version_fallback(path: &Path, package_data: &mut PackageData) {
1056 let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
1057 return;
1058 };
1059
1060 let Some(stem) = strip_python_archive_extension(file_name) else {
1061 return;
1062 };
1063
1064 let Some((name, version)) = stem.rsplit_once('-') else {
1065 return;
1066 };
1067
1068 if package_data.name.is_none() {
1069 package_data.name = Some(name.replace('_', "-"));
1070 }
1071 if package_data.version.is_none() {
1072 package_data.version = Some(version.to_string());
1073 }
1074
1075 if package_data.purl.is_none()
1076 || package_data.repository_homepage_url.is_none()
1077 || package_data.repository_download_url.is_none()
1078 || package_data.api_data_url.is_none()
1079 {
1080 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1081 build_pypi_urls(
1082 package_data.name.as_deref(),
1083 package_data.version.as_deref(),
1084 );
1085
1086 if package_data.repository_homepage_url.is_none() {
1087 package_data.repository_homepage_url = repository_homepage_url;
1088 }
1089 if package_data.repository_download_url.is_none() {
1090 package_data.repository_download_url = repository_download_url;
1091 }
1092 if package_data.api_data_url.is_none() {
1093 package_data.api_data_url = api_data_url;
1094 }
1095 if package_data.purl.is_none() {
1096 package_data.purl = purl;
1097 }
1098 }
1099}
1100
1101fn extract_from_wheel_archive(path: &Path) -> PackageData {
1102 let metadata = match std::fs::metadata(path) {
1103 Ok(m) => m,
1104 Err(e) => {
1105 warn!(
1106 "Failed to read metadata for wheel archive {:?}: {}",
1107 path, e
1108 );
1109 return default_package_data();
1110 }
1111 };
1112
1113 if metadata.len() > MAX_ARCHIVE_SIZE {
1114 warn!(
1115 "Wheel archive too large: {} bytes (limit: {} bytes)",
1116 metadata.len(),
1117 MAX_ARCHIVE_SIZE
1118 );
1119 return default_package_data();
1120 }
1121
1122 let file = match File::open(path) {
1123 Ok(f) => f,
1124 Err(e) => {
1125 warn!("Failed to open wheel archive {:?}: {}", path, e);
1126 return default_package_data();
1127 }
1128 };
1129
1130 let mut archive = match ZipArchive::new(file) {
1131 Ok(a) => a,
1132 Err(e) => {
1133 warn!("Failed to read wheel archive {:?}: {}", path, e);
1134 return default_package_data();
1135 }
1136 };
1137
1138 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "wheel") {
1139 Ok(entries) => entries,
1140 Err(_) => return default_package_data(),
1141 };
1142
1143 let metadata_entry =
1144 match find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/METADATA") {
1145 Some(entry) => entry,
1146 None => {
1147 warn!("No METADATA file found in wheel archive {:?}", path);
1148 return default_package_data();
1149 }
1150 };
1151
1152 let content = match read_validated_zip_entry(&mut archive, metadata_entry, path, "wheel") {
1153 Ok(c) => c,
1154 Err(e) => {
1155 warn!("Failed to read METADATA from {:?}: {}", path, e);
1156 return default_package_data();
1157 }
1158 };
1159
1160 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiWheel);
1161
1162 let (size, sha256) = calculate_file_checksums(path);
1163 package_data.size = size;
1164 package_data.sha256 = sha256;
1165
1166 if let Some(record_entry) =
1167 find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/RECORD")
1168 && let Ok(record_content) =
1169 read_validated_zip_entry(&mut archive, record_entry, path, "wheel")
1170 {
1171 package_data.file_references = parse_record_csv(&record_content);
1172 }
1173
1174 if let Some(wheel_info) = parse_wheel_filename(path) {
1175 if package_data.name.is_none() {
1176 package_data.name = Some(wheel_info.name.clone());
1177 }
1178 if package_data.version.is_none() {
1179 package_data.version = Some(wheel_info.version.clone());
1180 }
1181
1182 package_data.qualifiers = Some(std::collections::HashMap::from([(
1183 "extension".to_string(),
1184 format!(
1185 "{}-{}-{}",
1186 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1187 ),
1188 )]));
1189
1190 package_data.purl = build_wheel_purl(
1191 package_data.name.as_deref(),
1192 package_data.version.as_deref(),
1193 &wheel_info,
1194 );
1195
1196 let mut extra_data = package_data.extra_data.unwrap_or_default();
1197 extra_data.insert(
1198 "python_requires".to_string(),
1199 serde_json::Value::String(wheel_info.python_tag.clone()),
1200 );
1201 extra_data.insert(
1202 "abi_tag".to_string(),
1203 serde_json::Value::String(wheel_info.abi_tag.clone()),
1204 );
1205 extra_data.insert(
1206 "platform_tag".to_string(),
1207 serde_json::Value::String(wheel_info.platform_tag.clone()),
1208 );
1209 package_data.extra_data = Some(extra_data);
1210 }
1211
1212 package_data
1213}
1214
1215fn extract_from_egg_archive(path: &Path) -> PackageData {
1216 let metadata = match std::fs::metadata(path) {
1217 Ok(m) => m,
1218 Err(e) => {
1219 warn!("Failed to read metadata for egg archive {:?}: {}", path, e);
1220 return default_package_data();
1221 }
1222 };
1223
1224 if metadata.len() > MAX_ARCHIVE_SIZE {
1225 warn!(
1226 "Egg archive too large: {} bytes (limit: {} bytes)",
1227 metadata.len(),
1228 MAX_ARCHIVE_SIZE
1229 );
1230 return default_package_data();
1231 }
1232
1233 let file = match File::open(path) {
1234 Ok(f) => f,
1235 Err(e) => {
1236 warn!("Failed to open egg archive {:?}: {}", path, e);
1237 return default_package_data();
1238 }
1239 };
1240
1241 let mut archive = match ZipArchive::new(file) {
1242 Ok(a) => a,
1243 Err(e) => {
1244 warn!("Failed to read egg archive {:?}: {}", path, e);
1245 return default_package_data();
1246 }
1247 };
1248
1249 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "egg") {
1250 Ok(entries) => entries,
1251 Err(_) => return default_package_data(),
1252 };
1253
1254 let pkginfo_entry = match find_validated_zip_entry_by_any_suffix(
1255 &validated_entries,
1256 &["EGG-INFO/PKG-INFO", ".egg-info/PKG-INFO"],
1257 ) {
1258 Some(entry) => entry,
1259 None => {
1260 warn!("No PKG-INFO file found in egg archive {:?}", path);
1261 return default_package_data();
1262 }
1263 };
1264
1265 let content = match read_validated_zip_entry(&mut archive, pkginfo_entry, path, "egg") {
1266 Ok(c) => c,
1267 Err(e) => {
1268 warn!("Failed to read PKG-INFO from {:?}: {}", path, e);
1269 return default_package_data();
1270 }
1271 };
1272
1273 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiEgg);
1274
1275 let (size, sha256) = calculate_file_checksums(path);
1276 package_data.size = size;
1277 package_data.sha256 = sha256;
1278
1279 if let Some(installed_files_entry) = find_validated_zip_entry_by_any_suffix(
1280 &validated_entries,
1281 &[
1282 "EGG-INFO/installed-files.txt",
1283 ".egg-info/installed-files.txt",
1284 ],
1285 ) && let Ok(installed_files_content) =
1286 read_validated_zip_entry(&mut archive, installed_files_entry, path, "egg")
1287 {
1288 package_data.file_references = parse_installed_files_txt(&installed_files_content);
1289 }
1290
1291 if let Some(egg_info) = parse_egg_filename(path) {
1292 if package_data.name.is_none() {
1293 package_data.name = Some(egg_info.name.clone());
1294 }
1295 if package_data.version.is_none() {
1296 package_data.version = Some(egg_info.version.clone());
1297 }
1298
1299 if let Some(python_version) = &egg_info.python_version {
1300 let mut extra_data = package_data.extra_data.unwrap_or_default();
1301 extra_data.insert(
1302 "python_version".to_string(),
1303 serde_json::Value::String(python_version.clone()),
1304 );
1305 package_data.extra_data = Some(extra_data);
1306 }
1307 }
1308
1309 package_data.purl = build_egg_purl(
1310 package_data.name.as_deref(),
1311 package_data.version.as_deref(),
1312 );
1313
1314 package_data
1315}
1316
1317fn find_validated_zip_entry_by_suffix<'a>(
1318 entries: &'a [ValidatedZipEntry],
1319 suffix: &str,
1320) -> Option<&'a ValidatedZipEntry> {
1321 entries.iter().find(|entry| entry.name.ends_with(suffix))
1322}
1323
1324fn find_validated_zip_entry_by_any_suffix<'a>(
1325 entries: &'a [ValidatedZipEntry],
1326 suffixes: &[&str],
1327) -> Option<&'a ValidatedZipEntry> {
1328 entries
1329 .iter()
1330 .find(|entry| suffixes.iter().any(|suffix| entry.name.ends_with(suffix)))
1331}
1332
1333fn read_validated_zip_entry<R: Read + std::io::Seek>(
1334 archive: &mut ZipArchive<R>,
1335 entry: &ValidatedZipEntry,
1336 path: &Path,
1337 archive_type: &str,
1338) -> Result<String, String> {
1339 let mut file = archive
1340 .by_index(entry.index)
1341 .map_err(|e| format!("Failed to find entry {}: {}", entry.name, e))?;
1342
1343 let compressed_size = file.compressed_size();
1344 let uncompressed_size = file.size();
1345
1346 if compressed_size > 0 {
1347 let ratio = uncompressed_size as f64 / compressed_size as f64;
1348 if ratio > MAX_COMPRESSION_RATIO {
1349 return Err(format!(
1350 "Rejected suspicious compression ratio in {} {:?}: {:.2}:1",
1351 archive_type, path, ratio
1352 ));
1353 }
1354 }
1355
1356 if uncompressed_size > MAX_FILE_SIZE {
1357 return Err(format!(
1358 "Rejected oversized entry in {} {:?}: {} bytes",
1359 archive_type, path, uncompressed_size
1360 ));
1361 }
1362
1363 read_limited_utf8(
1364 &mut file,
1365 MAX_FILE_SIZE,
1366 &format!("{} entry {}", archive_type, entry.name),
1367 )
1368}
1369
1370fn read_limited_utf8<R: Read>(
1371 reader: &mut R,
1372 max_bytes: u64,
1373 context: &str,
1374) -> Result<String, String> {
1375 let mut limited = reader.take(max_bytes + 1);
1376 let mut bytes = Vec::new();
1377 limited
1378 .read_to_end(&mut bytes)
1379 .map_err(|e| format!("Failed to read {}: {}", context, e))?;
1380
1381 if bytes.len() as u64 > max_bytes {
1382 return Err(format!(
1383 "{} exceeded {} byte limit while reading",
1384 context, max_bytes
1385 ));
1386 }
1387
1388 String::from_utf8(bytes).map_err(|e| format!("{} is not valid UTF-8: {}", context, e))
1389}
1390
1391fn normalize_archive_entry_path(entry_path: &str) -> Option<String> {
1392 let normalized = entry_path.replace('\\', "/");
1393 if normalized.len() >= 3 {
1394 let bytes = normalized.as_bytes();
1395 if bytes[1] == b':' && bytes[2] == b'/' && bytes[0].is_ascii_alphabetic() {
1396 return None;
1397 }
1398 }
1399 let path = Path::new(&normalized);
1400 let mut components = Vec::new();
1401
1402 for component in path.components() {
1403 match component {
1404 Component::Normal(segment) => components.push(segment.to_string_lossy().to_string()),
1405 Component::CurDir => {}
1406 Component::RootDir | Component::ParentDir | Component::Prefix(_) => return None,
1407 }
1408 }
1409
1410 (!components.is_empty()).then_some(components.join("/"))
1411}
1412
1413pub fn parse_record_csv(content: &str) -> Vec<FileReference> {
1418 let mut reader = ReaderBuilder::new()
1419 .has_headers(false)
1420 .from_reader(content.as_bytes());
1421
1422 let mut file_references = Vec::new();
1423
1424 for result in reader.records() {
1425 match result {
1426 Ok(record) => {
1427 if record.len() < 3 {
1428 continue;
1429 }
1430
1431 let path = record.get(0).unwrap_or("").trim().to_string();
1432 if path.is_empty() {
1433 continue;
1434 }
1435
1436 let hash_field = record.get(1).unwrap_or("").trim();
1437 let size_field = record.get(2).unwrap_or("").trim();
1438
1439 let sha256 = if !hash_field.is_empty() && hash_field.contains('=') {
1441 let parts: Vec<&str> = hash_field.split('=').collect();
1442 if parts.len() == 2 && parts[0] == "sha256" {
1443 match URL_SAFE_NO_PAD.decode(parts[1]) {
1445 Ok(decoded) => {
1446 let hex = decoded
1447 .iter()
1448 .map(|b| format!("{:02x}", b))
1449 .collect::<String>();
1450 Some(hex)
1451 }
1452 Err(_) => None,
1453 }
1454 } else {
1455 None
1456 }
1457 } else {
1458 None
1459 };
1460
1461 let size = if !size_field.is_empty() && size_field != "-" {
1463 size_field.parse::<u64>().ok()
1464 } else {
1465 None
1466 };
1467
1468 file_references.push(FileReference {
1469 path,
1470 size,
1471 sha1: None,
1472 md5: None,
1473 sha256,
1474 sha512: None,
1475 extra_data: None,
1476 });
1477 }
1478 Err(e) => {
1479 warn!("Failed to parse RECORD CSV row: {}", e);
1480 continue;
1481 }
1482 }
1483 }
1484
1485 file_references
1486}
1487
1488pub fn parse_installed_files_txt(content: &str) -> Vec<FileReference> {
1491 content
1492 .lines()
1493 .map(|line| line.trim())
1494 .filter(|line| !line.is_empty())
1495 .map(|path| FileReference {
1496 path: path.to_string(),
1497 size: None,
1498 sha1: None,
1499 md5: None,
1500 sha256: None,
1501 sha512: None,
1502 extra_data: None,
1503 })
1504 .collect()
1505}
1506
1507pub fn parse_sources_txt(content: &str) -> Vec<FileReference> {
1508 content
1509 .lines()
1510 .map(str::trim)
1511 .filter(|line| !line.is_empty())
1512 .map(|path| FileReference {
1513 path: path.to_string(),
1514 size: None,
1515 sha1: None,
1516 md5: None,
1517 sha256: None,
1518 sha512: None,
1519 extra_data: None,
1520 })
1521 .collect()
1522}
1523
1524struct WheelInfo {
1525 name: String,
1526 version: String,
1527 python_tag: String,
1528 abi_tag: String,
1529 platform_tag: String,
1530}
1531
1532fn parse_wheel_filename(path: &Path) -> Option<WheelInfo> {
1533 let stem = path.file_stem()?.to_string_lossy();
1534 let parts: Vec<&str> = stem.split('-').collect();
1535
1536 if parts.len() >= 5 {
1537 Some(WheelInfo {
1538 name: parts[0].replace('_', "-"),
1539 version: parts[1].to_string(),
1540 python_tag: parts[2].to_string(),
1541 abi_tag: parts[3].to_string(),
1542 platform_tag: parts[4..].join("-"),
1543 })
1544 } else {
1545 None
1546 }
1547}
1548
1549struct EggInfo {
1550 name: String,
1551 version: String,
1552 python_version: Option<String>,
1553}
1554
1555fn parse_egg_filename(path: &Path) -> Option<EggInfo> {
1556 let stem = path.file_stem()?.to_string_lossy();
1557 let parts: Vec<&str> = stem.split('-').collect();
1558
1559 if parts.len() >= 2 {
1560 Some(EggInfo {
1561 name: parts[0].replace('_', "-"),
1562 version: parts[1].to_string(),
1563 python_version: parts.get(2).map(|s| s.to_string()),
1564 })
1565 } else {
1566 None
1567 }
1568}
1569
1570fn build_wheel_purl(
1571 name: Option<&str>,
1572 version: Option<&str>,
1573 wheel_info: &WheelInfo,
1574) -> Option<String> {
1575 let name = name?;
1576 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1577
1578 if let Some(ver) = version {
1579 package_url.with_version(ver).ok()?;
1580 }
1581
1582 let extension = format!(
1583 "{}-{}-{}",
1584 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1585 );
1586 package_url.add_qualifier("extension", extension).ok()?;
1587
1588 Some(package_url.to_string())
1589}
1590
1591fn build_egg_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1592 let name = name?;
1593 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1594
1595 if let Some(ver) = version {
1596 package_url.with_version(ver).ok()?;
1597 }
1598
1599 package_url.add_qualifier("type", "egg").ok()?;
1600
1601 Some(package_url.to_string())
1602}
1603
1604fn python_parse_rfc822_content(content: &str, datasource_id: DatasourceId) -> PackageData {
1605 let metadata = super::rfc822::parse_rfc822_content(content);
1606 build_package_data_from_rfc822(&metadata, datasource_id)
1607}
1608
1609fn build_package_data_from_rfc822(
1614 metadata: &super::rfc822::Rfc822Metadata,
1615 datasource_id: DatasourceId,
1616) -> PackageData {
1617 use super::rfc822::{get_header_all, get_header_first};
1618
1619 let name = get_header_first(&metadata.headers, "name");
1620 let version = get_header_first(&metadata.headers, "version");
1621 let summary = get_header_first(&metadata.headers, "summary");
1622 let mut homepage_url = get_header_first(&metadata.headers, "home-page");
1623 let author = get_header_first(&metadata.headers, "author");
1624 let author_email = get_header_first(&metadata.headers, "author-email");
1625 let license = get_header_first(&metadata.headers, "license");
1626 let license_expression = get_header_first(&metadata.headers, "license-expression");
1627 let download_url = get_header_first(&metadata.headers, "download-url");
1628 let platform = get_header_first(&metadata.headers, "platform");
1629 let requires_python = get_header_first(&metadata.headers, "requires-python");
1630 let classifiers = get_header_all(&metadata.headers, "classifier");
1631 let license_files = get_header_all(&metadata.headers, "license-file");
1632
1633 let description_body = if metadata.body.is_empty() {
1634 get_header_first(&metadata.headers, "description").unwrap_or_default()
1635 } else {
1636 metadata.body.clone()
1637 };
1638
1639 let description = build_description(summary.as_deref(), &description_body);
1640
1641 let mut parties = Vec::new();
1642 if author.is_some() || author_email.is_some() {
1643 parties.push(Party {
1644 r#type: Some("person".to_string()),
1645 role: Some("author".to_string()),
1646 name: author,
1647 email: author_email,
1648 url: None,
1649 organization: None,
1650 organization_url: None,
1651 timezone: None,
1652 });
1653 }
1654
1655 let (keywords, license_classifiers) = split_classifiers(&classifiers);
1656 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1657 normalize_spdx_declared_license(license_expression.as_deref());
1658
1659 let extracted_license_statement = license_expression
1660 .clone()
1661 .or_else(|| build_extracted_license_statement(license.as_deref(), &license_classifiers));
1662
1663 let mut extra_data = HashMap::new();
1664 if let Some(platform_value) = platform
1665 && !platform_value.eq_ignore_ascii_case("unknown")
1666 && !platform_value.is_empty()
1667 {
1668 extra_data.insert(
1669 "platform".to_string(),
1670 serde_json::Value::String(platform_value),
1671 );
1672 }
1673
1674 if let Some(requires_python_value) = requires_python
1675 && !requires_python_value.is_empty()
1676 {
1677 extra_data.insert(
1678 "requires_python".to_string(),
1679 serde_json::Value::String(requires_python_value),
1680 );
1681 }
1682
1683 if !license_files.is_empty() {
1684 extra_data.insert(
1685 "license_files".to_string(),
1686 serde_json::Value::Array(
1687 license_files
1688 .iter()
1689 .cloned()
1690 .map(serde_json::Value::String)
1691 .collect(),
1692 ),
1693 );
1694 }
1695
1696 let file_references = license_files
1697 .iter()
1698 .map(|path| FileReference {
1699 path: path.clone(),
1700 size: None,
1701 sha1: None,
1702 md5: None,
1703 sha256: None,
1704 sha512: None,
1705 extra_data: None,
1706 })
1707 .collect();
1708
1709 let project_urls = get_header_all(&metadata.headers, "project-url");
1710 let dependencies = extract_rfc822_dependencies(&metadata.headers);
1711 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
1712
1713 if !project_urls.is_empty() {
1714 let parsed_urls = parse_project_urls(&project_urls);
1715
1716 for (label, url) in &parsed_urls {
1717 let label_lower = label.to_lowercase();
1718
1719 if bug_tracking_url.is_none()
1720 && matches!(
1721 label_lower.as_str(),
1722 "tracker"
1723 | "bug reports"
1724 | "bug tracker"
1725 | "issues"
1726 | "issue tracker"
1727 | "github: issues"
1728 )
1729 {
1730 bug_tracking_url = Some(url.clone());
1731 } else if code_view_url.is_none()
1732 && matches!(label_lower.as_str(), "source" | "source code" | "code")
1733 {
1734 code_view_url = Some(url.clone());
1735 } else if vcs_url.is_none()
1736 && matches!(
1737 label_lower.as_str(),
1738 "github" | "gitlab" | "github: repo" | "repository"
1739 )
1740 {
1741 vcs_url = Some(url.clone());
1742 } else if homepage_url.is_none()
1743 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
1744 {
1745 homepage_url = Some(url.clone());
1746 } else if label_lower == "changelog" {
1747 extra_data.insert(
1748 "changelog_url".to_string(),
1749 serde_json::Value::String(url.clone()),
1750 );
1751 }
1752 }
1753
1754 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
1755 .iter()
1756 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
1757 .collect();
1758
1759 if !project_urls_json.is_empty() {
1760 extra_data.insert(
1761 "project_urls".to_string(),
1762 serde_json::Value::Object(project_urls_json),
1763 );
1764 }
1765 }
1766
1767 let extra_data = if extra_data.is_empty() {
1768 None
1769 } else {
1770 Some(extra_data)
1771 };
1772
1773 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1774 build_pypi_urls(name.as_deref(), version.as_deref());
1775
1776 PackageData {
1777 package_type: Some(PythonParser::PACKAGE_TYPE),
1778 namespace: None,
1779 name,
1780 version,
1781 qualifiers: None,
1782 subpath: None,
1783 primary_language: Some("Python".to_string()),
1784 description,
1785 release_date: None,
1786 parties,
1787 keywords,
1788 homepage_url,
1789 download_url,
1790 size: None,
1791 sha1: None,
1792 md5: None,
1793 sha256: None,
1794 sha512: None,
1795 bug_tracking_url,
1796 code_view_url,
1797 vcs_url,
1798 copyright: None,
1799 holder: None,
1800 declared_license_expression,
1801 declared_license_expression_spdx,
1802 license_detections,
1803 other_license_expression: None,
1804 other_license_expression_spdx: None,
1805 other_license_detections: Vec::new(),
1806 extracted_license_statement,
1807 notice_text: None,
1808 source_packages: Vec::new(),
1809 file_references,
1810 is_private: false,
1811 is_virtual: false,
1812 extra_data,
1813 dependencies,
1814 repository_homepage_url,
1815 repository_download_url,
1816 api_data_url,
1817 datasource_id: Some(datasource_id),
1818 purl,
1819 }
1820}
1821
1822fn parse_project_urls(project_urls: &[String]) -> Vec<(String, String)> {
1823 project_urls
1824 .iter()
1825 .filter_map(|url_entry| {
1826 if let Some((label, url)) = url_entry.split_once(", ") {
1827 let label_trimmed = label.trim();
1828 let url_trimmed = url.trim();
1829 if !label_trimmed.is_empty() && !url_trimmed.is_empty() {
1830 return Some((label_trimmed.to_string(), url_trimmed.to_string()));
1831 }
1832 }
1833 None
1834 })
1835 .collect()
1836}
1837
1838fn build_description(summary: Option<&str>, body: &str) -> Option<String> {
1839 let mut parts = Vec::new();
1840 if let Some(summary_value) = summary
1841 && !summary_value.trim().is_empty()
1842 {
1843 parts.push(summary_value.trim().to_string());
1844 }
1845
1846 if !body.trim().is_empty() {
1847 parts.push(body.trim().to_string());
1848 }
1849
1850 if parts.is_empty() {
1851 None
1852 } else {
1853 Some(parts.join("\n"))
1854 }
1855}
1856
1857fn split_classifiers(classifiers: &[String]) -> (Vec<String>, Vec<String>) {
1858 let mut keywords = Vec::new();
1859 let mut license_classifiers = Vec::new();
1860
1861 for classifier in classifiers {
1862 if classifier.starts_with("License ::") {
1863 license_classifiers.push(classifier.to_string());
1864 } else {
1865 keywords.push(classifier.to_string());
1866 }
1867 }
1868
1869 (keywords, license_classifiers)
1870}
1871
1872fn build_extracted_license_statement(
1873 license: Option<&str>,
1874 license_classifiers: &[String],
1875) -> Option<String> {
1876 let mut lines = Vec::new();
1877
1878 if let Some(value) = license
1879 && !value.trim().is_empty()
1880 {
1881 lines.push(format!("license: {}", value.trim()));
1882 }
1883
1884 if !license_classifiers.is_empty() {
1885 lines.push("classifiers:".to_string());
1886 for classifier in license_classifiers {
1887 lines.push(format!(" - '{}'", classifier));
1888 }
1889 }
1890
1891 if lines.is_empty() {
1892 None
1893 } else {
1894 Some(format!("{}\n", lines.join("\n")))
1895 }
1896}
1897
1898pub(crate) fn build_pypi_urls(
1899 name: Option<&str>,
1900 version: Option<&str>,
1901) -> (
1902 Option<String>,
1903 Option<String>,
1904 Option<String>,
1905 Option<String>,
1906) {
1907 let repository_homepage_url = name.map(|value| format!("https://pypi.org/project/{}", value));
1908
1909 let repository_download_url = name.and_then(|value| {
1910 version.map(|ver| {
1911 format!(
1912 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
1913 &value[..1.min(value.len())],
1914 value,
1915 value,
1916 ver
1917 )
1918 })
1919 });
1920
1921 let api_data_url = name.map(|value| {
1922 if let Some(ver) = version {
1923 format!("https://pypi.org/pypi/{}/{}/json", value, ver)
1924 } else {
1925 format!("https://pypi.org/pypi/{}/json", value)
1926 }
1927 });
1928
1929 let purl = name.and_then(|value| {
1930 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), value).ok()?;
1931 if let Some(ver) = version {
1932 package_url.with_version(ver).ok()?;
1933 }
1934 Some(package_url.to_string())
1935 });
1936
1937 (
1938 repository_homepage_url,
1939 repository_download_url,
1940 api_data_url,
1941 purl,
1942 )
1943}
1944
1945fn build_pypi_purl_with_extension(
1946 name: &str,
1947 version: Option<&str>,
1948 extension: &str,
1949) -> Option<String> {
1950 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1951 if let Some(ver) = version {
1952 package_url.with_version(ver).ok()?;
1953 }
1954 package_url.add_qualifier("extension", extension).ok()?;
1955 Some(package_url.to_string())
1956}
1957
1958fn extract_from_pyproject_toml(path: &Path) -> PackageData {
1959 let toml_content = match read_toml_file(path) {
1960 Ok(content) => content,
1961 Err(e) => {
1962 warn!(
1963 "Failed to read or parse pyproject.toml at {:?}: {}",
1964 path, e
1965 );
1966 return default_package_data();
1967 }
1968 };
1969
1970 let tool_table = toml_content.get("tool").and_then(|v| v.as_table());
1971
1972 let project_table =
1974 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
1975 project.clone()
1977 } else if let Some(tool) = tool_table {
1978 if let Some(poetry) = tool.get("poetry").and_then(|v| v.as_table()) {
1979 poetry.clone()
1981 } else {
1982 warn!(
1983 "No project or tool.poetry data found in pyproject.toml at {:?}",
1984 path
1985 );
1986 return default_package_data();
1987 }
1988 } else if toml_content.get(FIELD_NAME).is_some() {
1989 match toml_content.as_table() {
1991 Some(table) => table.clone(),
1992 None => {
1993 warn!("Failed to convert TOML content to table in {:?}", path);
1994 return default_package_data();
1995 }
1996 }
1997 } else {
1998 warn!("No project data found in pyproject.toml at {:?}", path);
1999 return default_package_data();
2000 };
2001
2002 let name = project_table
2003 .get(FIELD_NAME)
2004 .and_then(|v| v.as_str())
2005 .map(String::from);
2006
2007 let version = project_table
2008 .get(FIELD_VERSION)
2009 .and_then(|v| v.as_str())
2010 .map(String::from);
2011 let classifiers = project_table
2012 .get("classifiers")
2013 .and_then(|value| value.as_array())
2014 .map(|values| {
2015 values
2016 .iter()
2017 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2018 .collect::<Vec<_>>()
2019 })
2020 .unwrap_or_default();
2021
2022 let extracted_license_statement = extract_raw_license_string(&project_table);
2023 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2024 normalize_spdx_declared_license(extract_license_expression_candidate(&project_table));
2025
2026 let (homepage_url, repository_url) = extract_urls(&project_table);
2028
2029 let (dependencies, optional_dependencies) = extract_dependencies(&project_table, &toml_content);
2030 let extra_data = extract_pyproject_extra_data(&toml_content);
2031
2032 let purl = name.as_ref().and_then(|n| {
2034 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n) {
2035 Ok(p) => p,
2036 Err(e) => {
2037 warn!(
2038 "Failed to create PackageUrl for Python package '{}': {}",
2039 n, e
2040 );
2041 return None;
2042 }
2043 };
2044
2045 if let Some(v) = &version
2046 && let Err(e) = package_url.with_version(v)
2047 {
2048 warn!(
2049 "Failed to set version '{}' for Python package '{}': {}",
2050 v, n, e
2051 );
2052 return None;
2053 }
2054
2055 Some(package_url.to_string())
2056 });
2057
2058 let api_data_url = name.as_ref().map(|n| {
2059 if let Some(v) = &version {
2060 format!("https://pypi.org/pypi/{}/{}/json", n, v)
2061 } else {
2062 format!("https://pypi.org/pypi/{}/json", n)
2063 }
2064 });
2065
2066 let pypi_homepage_url = name
2067 .as_ref()
2068 .map(|n| format!("https://pypi.org/project/{}", n));
2069
2070 let pypi_download_url = name.as_ref().and_then(|n| {
2071 version.as_ref().map(|v| {
2072 format!(
2073 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
2074 &n[..1.min(n.len())],
2075 n,
2076 n,
2077 v
2078 )
2079 })
2080 });
2081
2082 PackageData {
2083 package_type: Some(PythonParser::PACKAGE_TYPE),
2084 namespace: None,
2085 name,
2086 version,
2087 qualifiers: None,
2088 subpath: None,
2089 primary_language: None,
2090 description: None,
2091 release_date: None,
2092 parties: extract_parties(&project_table),
2093 keywords: Vec::new(),
2094 homepage_url: homepage_url.or(pypi_homepage_url),
2095 download_url: repository_url.clone().or(pypi_download_url),
2096 size: None,
2097 sha1: None,
2098 md5: None,
2099 sha256: None,
2100 sha512: None,
2101 bug_tracking_url: None,
2102 code_view_url: None,
2103 vcs_url: repository_url,
2104 copyright: None,
2105 holder: None,
2106 declared_license_expression,
2107 declared_license_expression_spdx,
2108 license_detections,
2109 other_license_expression: None,
2110 other_license_expression_spdx: None,
2111 other_license_detections: Vec::new(),
2112 extracted_license_statement,
2113 notice_text: None,
2114 source_packages: Vec::new(),
2115 file_references: Vec::new(),
2116 is_private: has_private_classifier(&classifiers),
2117 is_virtual: false,
2118 extra_data,
2119 dependencies: [dependencies, optional_dependencies].concat(),
2120 repository_homepage_url: None,
2121 repository_download_url: None,
2122 api_data_url,
2123 datasource_id: Some(DatasourceId::PypiPyprojectToml),
2124 purl,
2125 }
2126}
2127
2128fn extract_raw_license_string(project: &TomlMap<String, TomlValue>) -> Option<String> {
2129 project
2130 .get(FIELD_LICENSE)
2131 .and_then(|license_value| match license_value {
2132 TomlValue::String(license_str) => Some(license_str.clone()),
2133 TomlValue::Table(license_table) => license_table
2134 .get("text")
2135 .and_then(|v| v.as_str())
2136 .map(|s| s.to_string())
2137 .or_else(|| {
2138 license_table
2139 .get("expression")
2140 .and_then(|v| v.as_str())
2141 .map(|expr| expr.to_string())
2142 }),
2143 _ => None,
2144 })
2145}
2146
2147fn extract_license_expression_candidate(project: &TomlMap<String, TomlValue>) -> Option<&str> {
2148 match project.get(FIELD_LICENSE) {
2149 Some(TomlValue::String(license_str)) => Some(license_str.as_str()),
2150 Some(TomlValue::Table(license_table)) => license_table
2151 .get("expression")
2152 .and_then(|value| value.as_str()),
2153 _ => None,
2154 }
2155}
2156
2157fn extract_urls(project: &TomlMap<String, TomlValue>) -> (Option<String>, Option<String>) {
2158 let mut homepage_url = None;
2159 let mut repository_url = None;
2160
2161 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
2163 homepage_url = urls
2164 .get(FIELD_HOMEPAGE)
2165 .and_then(|v| v.as_str())
2166 .map(String::from);
2167 repository_url = urls
2168 .get(FIELD_REPOSITORY)
2169 .and_then(|v| v.as_str())
2170 .map(String::from);
2171 }
2172
2173 if homepage_url.is_none() {
2175 homepage_url = project
2176 .get(FIELD_HOMEPAGE)
2177 .and_then(|v| v.as_str())
2178 .map(String::from);
2179 }
2180
2181 if repository_url.is_none() {
2182 repository_url = project
2183 .get(FIELD_REPOSITORY)
2184 .and_then(|v| v.as_str())
2185 .map(String::from);
2186 }
2187
2188 (homepage_url, repository_url)
2189}
2190
2191fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
2192 let mut parties = Vec::new();
2193
2194 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
2195 for author in authors {
2196 if let Some(author_str) = author.as_str() {
2197 let (name, email) = split_name_email(author_str);
2198 parties.push(Party {
2199 r#type: None,
2200 role: Some("author".to_string()),
2201 name,
2202 email,
2203 url: None,
2204 organization: None,
2205 organization_url: None,
2206 timezone: None,
2207 });
2208 }
2209 }
2210 }
2211
2212 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
2213 for maintainer in maintainers {
2214 if let Some(maintainer_str) = maintainer.as_str() {
2215 let (name, email) = split_name_email(maintainer_str);
2216 parties.push(Party {
2217 r#type: None,
2218 role: Some("maintainer".to_string()),
2219 name,
2220 email,
2221 url: None,
2222 organization: None,
2223 organization_url: None,
2224 timezone: None,
2225 });
2226 }
2227 }
2228 }
2229
2230 parties
2231}
2232
2233fn extract_dependencies(
2234 project: &TomlMap<String, TomlValue>,
2235 toml_content: &TomlValue,
2236) -> (Vec<Dependency>, Vec<Dependency>) {
2237 let mut dependencies = Vec::new();
2238 let mut optional_dependencies = Vec::new();
2239
2240 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
2242 match deps_value {
2243 TomlValue::Array(arr) => {
2244 dependencies = parse_dependency_array(arr, false, None);
2245 }
2246 TomlValue::Table(table) => {
2247 dependencies = parse_dependency_table(table, false, None);
2248 }
2249 _ => {}
2250 }
2251 }
2252
2253 if let Some(opt_deps_table) = project
2255 .get(FIELD_OPTIONAL_DEPENDENCIES)
2256 .and_then(|v| v.as_table())
2257 {
2258 for (extra_name, deps) in opt_deps_table {
2259 match deps {
2260 TomlValue::Array(arr) => {
2261 optional_dependencies.extend(parse_dependency_array(
2262 arr,
2263 true,
2264 Some(extra_name),
2265 ));
2266 }
2267 TomlValue::Table(table) => {
2268 optional_dependencies.extend(parse_dependency_table(
2269 table,
2270 true,
2271 Some(extra_name),
2272 ));
2273 }
2274 _ => {}
2275 }
2276 }
2277 }
2278
2279 if let Some(dev_deps_value) = project.get(FIELD_DEV_DEPENDENCIES) {
2281 match dev_deps_value {
2282 TomlValue::Array(arr) => {
2283 optional_dependencies.extend(parse_dependency_array(
2284 arr,
2285 true,
2286 Some(FIELD_DEV_DEPENDENCIES),
2287 ));
2288 }
2289 TomlValue::Table(table) => {
2290 optional_dependencies.extend(parse_dependency_table(
2291 table,
2292 true,
2293 Some(FIELD_DEV_DEPENDENCIES),
2294 ));
2295 }
2296 _ => {}
2297 }
2298 }
2299
2300 if let Some(groups_table) = project.get("group").and_then(|v| v.as_table()) {
2302 for (group_name, group_data) in groups_table {
2303 if let Some(group_deps) = group_data.as_table().and_then(|t| t.get("dependencies")) {
2304 match group_deps {
2305 TomlValue::Array(arr) => {
2306 optional_dependencies.extend(parse_dependency_array(
2307 arr,
2308 true,
2309 Some(group_name),
2310 ));
2311 }
2312 TomlValue::Table(table) => {
2313 optional_dependencies.extend(parse_dependency_table(
2314 table,
2315 true,
2316 Some(group_name),
2317 ));
2318 }
2319 _ => {}
2320 }
2321 }
2322 }
2323 }
2324
2325 if let Some(groups_table) = toml_content
2326 .get(FIELD_DEPENDENCY_GROUPS)
2327 .and_then(|value| value.as_table())
2328 {
2329 for (group_name, deps) in groups_table {
2330 match deps {
2331 TomlValue::Array(arr) => {
2332 optional_dependencies.extend(parse_dependency_array(
2333 arr,
2334 true,
2335 Some(group_name),
2336 ));
2337 }
2338 TomlValue::Table(table) => {
2339 optional_dependencies.extend(parse_dependency_table(
2340 table,
2341 true,
2342 Some(group_name),
2343 ));
2344 }
2345 _ => {}
2346 }
2347 }
2348 }
2349
2350 if let Some(dev_deps_value) = toml_content
2351 .get("tool")
2352 .and_then(|value| value.as_table())
2353 .and_then(|tool| tool.get("uv"))
2354 .and_then(|value| value.as_table())
2355 .and_then(|uv| uv.get(FIELD_DEV_DEPENDENCIES))
2356 {
2357 match dev_deps_value {
2358 TomlValue::Array(arr) => {
2359 optional_dependencies.extend(parse_dependency_array(arr, true, Some("dev")));
2360 }
2361 TomlValue::Table(table) => {
2362 optional_dependencies.extend(parse_dependency_table(table, true, Some("dev")));
2363 }
2364 _ => {}
2365 }
2366 }
2367
2368 (dependencies, optional_dependencies)
2369}
2370
2371fn extract_pyproject_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
2372 let mut extra_data = HashMap::new();
2373
2374 if let Some(tool_uv) = toml_content
2375 .get("tool")
2376 .and_then(|value| value.as_table())
2377 .and_then(|tool| tool.get("uv"))
2378 {
2379 extra_data.insert("tool_uv".to_string(), toml_value_to_json(tool_uv));
2380 }
2381
2382 if extra_data.is_empty() {
2383 None
2384 } else {
2385 Some(extra_data)
2386 }
2387}
2388
2389fn toml_value_to_json(value: &TomlValue) -> JsonValue {
2390 match value {
2391 TomlValue::String(value) => JsonValue::String(value.clone()),
2392 TomlValue::Integer(value) => JsonValue::String(value.to_string()),
2393 TomlValue::Float(value) => JsonValue::String(value.to_string()),
2394 TomlValue::Boolean(value) => JsonValue::Bool(*value),
2395 TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
2396 TomlValue::Array(values) => {
2397 JsonValue::Array(values.iter().map(toml_value_to_json).collect())
2398 }
2399 TomlValue::Table(values) => JsonValue::Object(
2400 values
2401 .iter()
2402 .map(|(key, value)| (key.clone(), toml_value_to_json(value)))
2403 .collect::<JsonMap<String, JsonValue>>(),
2404 ),
2405 }
2406}
2407
2408fn parse_dependency_table(
2409 table: &TomlMap<String, TomlValue>,
2410 is_optional: bool,
2411 scope: Option<&str>,
2412) -> Vec<Dependency> {
2413 table
2414 .iter()
2415 .filter_map(|(name, version)| {
2416 let version_str = version.as_str().map(|s| s.to_string());
2417 let mut package_url =
2418 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2419
2420 if let Some(v) = &version_str {
2421 package_url.with_version(v).ok()?;
2422 }
2423
2424 Some(Dependency {
2425 purl: Some(package_url.to_string()),
2426 extracted_requirement: None,
2427 scope: scope.map(|s| s.to_string()),
2428 is_runtime: Some(!is_optional),
2429 is_optional: Some(is_optional),
2430 is_pinned: None,
2431 is_direct: Some(true),
2432 resolved_package: None,
2433 extra_data: None,
2434 })
2435 })
2436 .collect()
2437}
2438
2439fn parse_dependency_array(
2440 array: &[TomlValue],
2441 is_optional: bool,
2442 scope: Option<&str>,
2443) -> Vec<Dependency> {
2444 array
2445 .iter()
2446 .filter_map(|dep| {
2447 let dep_str = dep.as_str()?;
2448
2449 let mut parts = dep_str.split(['>', '=', '<', '~']);
2450 let name = parts.next()?.trim().to_string();
2451
2452 let version = parts.next().map(|v| v.trim().to_string());
2453
2454 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name)
2455 {
2456 Ok(purl) => purl,
2457 Err(_) => return None,
2458 };
2459
2460 if let Some(ref v) = version {
2461 package_url.with_version(v).ok()?;
2462 }
2463
2464 Some(Dependency {
2465 purl: Some(package_url.to_string()),
2466 extracted_requirement: None,
2467 scope: scope.map(|s| s.to_string()),
2468 is_runtime: Some(!is_optional),
2469 is_optional: Some(is_optional),
2470 is_pinned: None,
2471 is_direct: Some(true),
2472 resolved_package: None,
2473 extra_data: None,
2474 })
2475 })
2476 .collect()
2477}
2478
2479#[derive(Debug, Clone)]
2480enum Value {
2481 String(String),
2482 Number(f64),
2483 Bool(bool),
2484 None,
2485 List(Vec<Value>),
2486 Tuple(Vec<Value>),
2487 Dict(HashMap<String, Value>),
2488}
2489
2490struct LiteralEvaluator {
2491 constants: HashMap<String, Value>,
2492 max_depth: usize,
2493 max_nodes: usize,
2494 nodes_visited: usize,
2495}
2496
2497impl LiteralEvaluator {
2498 fn new(constants: HashMap<String, Value>) -> Self {
2499 Self {
2500 constants,
2501 max_depth: MAX_SETUP_PY_AST_DEPTH,
2502 max_nodes: MAX_SETUP_PY_AST_NODES,
2503 nodes_visited: 0,
2504 }
2505 }
2506
2507 fn insert_constant(&mut self, name: String, value: Value) {
2508 self.constants.insert(name, value);
2509 }
2510
2511 fn evaluate_expr(&mut self, expr: &ast::Expr, depth: usize) -> Option<Value> {
2512 if depth >= self.max_depth || self.nodes_visited >= self.max_nodes {
2513 return None;
2514 }
2515 self.nodes_visited += 1;
2516
2517 match expr {
2518 ast::Expr::Constant(ast::ExprConstant { value, .. }) => self.evaluate_constant(value),
2519 ast::Expr::Name(ast::ExprName { id, .. }) => self.constants.get(id.as_str()).cloned(),
2520 ast::Expr::List(ast::ExprList { elts, .. }) => {
2521 let mut values = Vec::new();
2522 for elt in elts {
2523 values.push(self.evaluate_expr(elt, depth + 1)?);
2524 }
2525 Some(Value::List(values))
2526 }
2527 ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
2528 let mut values = Vec::new();
2529 for elt in elts {
2530 values.push(self.evaluate_expr(elt, depth + 1)?);
2531 }
2532 Some(Value::Tuple(values))
2533 }
2534 ast::Expr::Dict(ast::ExprDict { keys, values, .. }) => {
2535 let mut dict = HashMap::new();
2536 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
2537 let key_expr = key_expr.as_ref()?;
2538 let key_value = self.evaluate_expr(key_expr, depth + 1)?;
2539 let key = value_to_string(&key_value)?;
2540 let value = self.evaluate_expr(value_expr, depth + 1)?;
2541 dict.insert(key, value);
2542 }
2543 Some(Value::Dict(dict))
2544 }
2545 ast::Expr::Call(ast::ExprCall {
2546 func,
2547 args,
2548 keywords,
2549 ..
2550 }) => {
2551 if keywords.is_empty()
2552 && let Some(name) = dotted_name(func.as_ref(), depth + 1)
2553 && matches!(name.as_str(), "OrderedDict" | "collections.OrderedDict")
2554 {
2555 return self.evaluate_ordered_dict(args, depth + 1);
2556 }
2557
2558 if !args.is_empty() {
2559 return None;
2560 }
2561
2562 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref()
2563 && id == "dict"
2564 {
2565 let mut dict = HashMap::new();
2566 for keyword in keywords {
2567 let key = keyword.arg.as_ref().map(|name| name.as_str())?;
2568 let value = self.evaluate_expr(&keyword.value, depth + 1)?;
2569 dict.insert(key.to_string(), value);
2570 }
2571 return Some(Value::Dict(dict));
2572 }
2573
2574 None
2575 }
2576 _ => None,
2577 }
2578 }
2579
2580 fn evaluate_constant(&self, constant: &ast::Constant) -> Option<Value> {
2581 match constant {
2582 ast::Constant::Str(value) => Some(Value::String(value.clone())),
2583 ast::Constant::Bool(value) => Some(Value::Bool(*value)),
2584 ast::Constant::Int(value) => value.to_string().parse::<f64>().ok().map(Value::Number),
2585 ast::Constant::Float(value) => Some(Value::Number(*value)),
2586 ast::Constant::None => Some(Value::None),
2587 _ => None,
2588 }
2589 }
2590
2591 fn evaluate_ordered_dict(&mut self, args: &[ast::Expr], depth: usize) -> Option<Value> {
2592 if args.len() != 1 {
2593 return None;
2594 }
2595
2596 let items = match self.evaluate_expr(&args[0], depth)? {
2597 Value::List(items) | Value::Tuple(items) => items,
2598 _ => return None,
2599 };
2600
2601 let mut dict = HashMap::new();
2602 for item in items {
2603 let Value::Tuple(values) = item else {
2604 return None;
2605 };
2606 if values.len() != 2 {
2607 return None;
2608 }
2609 let key = value_to_string(&values[0])?;
2610 dict.insert(key, values[1].clone());
2611 }
2612
2613 Some(Value::Dict(dict))
2614 }
2615}
2616
2617#[derive(Default)]
2618struct SetupAliases {
2619 setup_names: HashSet<String>,
2620 module_aliases: HashMap<String, String>,
2621}
2622
2623fn extract_from_setup_py(path: &Path) -> PackageData {
2624 let content = match read_file_to_string(path) {
2625 Ok(content) => content,
2626 Err(e) => {
2627 warn!("Failed to read setup.py at {:?}: {}", path, e);
2628 return default_package_data();
2629 }
2630 };
2631
2632 if content.len() > MAX_SETUP_PY_BYTES {
2633 warn!("setup.py too large at {:?}: {} bytes", path, content.len());
2634 return extract_from_setup_py_regex(&content);
2635 }
2636
2637 let mut package_data = match extract_from_setup_py_ast(&content) {
2638 Ok(Some(data)) => data,
2639 Ok(None) => extract_from_setup_py_regex(&content),
2640 Err(e) => {
2641 warn!("Failed to parse setup.py AST at {:?}: {}", path, e);
2642 extract_from_setup_py_regex(&content)
2643 }
2644 };
2645
2646 if package_data.name.is_none() {
2647 package_data.name = extract_setup_value(&content, "name");
2648 }
2649
2650 if package_data.version.is_none() {
2651 package_data.version = extract_setup_value(&content, "version");
2652 }
2653
2654 fill_from_sibling_dunder_metadata(path, &content, &mut package_data);
2655
2656 if package_data.purl.is_none() {
2657 package_data.purl = build_setup_py_purl(
2658 package_data.name.as_deref(),
2659 package_data.version.as_deref(),
2660 );
2661 }
2662
2663 package_data
2664}
2665
2666fn fill_from_sibling_dunder_metadata(path: &Path, content: &str, package_data: &mut PackageData) {
2667 if package_data.version.is_some()
2668 && package_data.extracted_license_statement.is_some()
2669 && package_data
2670 .parties
2671 .iter()
2672 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some())
2673 {
2674 return;
2675 }
2676
2677 let Some(root) = path.parent() else {
2678 return;
2679 };
2680
2681 let dunder_metadata = collect_sibling_dunder_metadata(root, content);
2682
2683 if package_data.version.is_none() {
2684 package_data.version = dunder_metadata.version;
2685 }
2686
2687 if package_data.extracted_license_statement.is_none() {
2688 package_data.extracted_license_statement = dunder_metadata.license;
2689 }
2690
2691 let has_author = package_data
2692 .parties
2693 .iter()
2694 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some());
2695
2696 if !has_author && let Some(author) = dunder_metadata.author {
2697 package_data.parties.push(Party {
2698 r#type: Some("person".to_string()),
2699 role: Some("author".to_string()),
2700 name: Some(author),
2701 email: None,
2702 url: None,
2703 organization: None,
2704 organization_url: None,
2705 timezone: None,
2706 });
2707 }
2708}
2709
2710#[derive(Default)]
2711struct DunderMetadata {
2712 version: Option<String>,
2713 author: Option<String>,
2714 license: Option<String>,
2715}
2716
2717fn collect_sibling_dunder_metadata(root: &Path, content: &str) -> DunderMetadata {
2718 let statements = match ast::Suite::parse(content, "<setup.py>") {
2719 Ok(statements) => statements,
2720 Err(_) => return DunderMetadata::default(),
2721 };
2722
2723 let version_re = Regex::new(r#"(?m)^\s*__version__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2724 let author_re = Regex::new(r#"(?m)^\s*__author__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2725 let license_re = Regex::new(r#"(?m)^\s*__license__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2726 let mut metadata = DunderMetadata::default();
2727
2728 for module in imported_dunder_modules(&statements) {
2729 let Some(path) = resolve_imported_module_path(root, &module) else {
2730 continue;
2731 };
2732 let Ok(module_content) = read_file_to_string(&path) else {
2733 continue;
2734 };
2735
2736 if metadata.version.is_none() {
2737 metadata.version = version_re
2738 .as_ref()
2739 .and_then(|regex| regex.captures(&module_content))
2740 .and_then(|captures| captures.get(1))
2741 .map(|match_| match_.as_str().to_string());
2742 }
2743
2744 if metadata.author.is_none() {
2745 metadata.author = author_re
2746 .as_ref()
2747 .and_then(|regex| regex.captures(&module_content))
2748 .and_then(|captures| captures.get(1))
2749 .map(|match_| match_.as_str().to_string());
2750 }
2751
2752 if metadata.license.is_none() {
2753 metadata.license = license_re
2754 .as_ref()
2755 .and_then(|regex| regex.captures(&module_content))
2756 .and_then(|captures| captures.get(1))
2757 .map(|match_| match_.as_str().to_string());
2758 }
2759
2760 if metadata.version.is_some() && metadata.author.is_some() && metadata.license.is_some() {
2761 return metadata;
2762 }
2763 }
2764
2765 metadata
2766}
2767
2768fn imported_dunder_modules(statements: &[ast::Stmt]) -> Vec<String> {
2769 let mut modules = Vec::new();
2770
2771 for statement in statements {
2772 let ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) = statement else {
2773 continue;
2774 };
2775 let Some(module) = module.as_ref().map(|name| name.as_str()) else {
2776 continue;
2777 };
2778 let imports_dunder = names.iter().any(|alias| {
2779 matches!(
2780 alias.name.as_str(),
2781 "__version__" | "__author__" | "__license__"
2782 )
2783 });
2784 if imports_dunder {
2785 modules.push(module.to_string());
2786 }
2787 }
2788
2789 modules
2790}
2791
2792fn resolve_imported_module_path(root: &Path, module: &str) -> Option<PathBuf> {
2793 let relative = PathBuf::from_iter(module.split('.'));
2794 let candidates = [
2795 root.join(relative.with_extension("py")),
2796 root.join(&relative).join("__init__.py"),
2797 root.join("src").join(relative.with_extension("py")),
2798 root.join("src").join(relative).join("__init__.py"),
2799 ];
2800
2801 candidates.into_iter().find(|candidate| candidate.exists())
2802}
2803
2804fn extract_from_setup_py_ast(content: &str) -> Result<Option<PackageData>, String> {
2820 let statements = ast::Suite::parse(content, "<setup.py>").map_err(|e| format!("{}", e))?;
2821 let aliases = collect_setup_aliases(&statements);
2822 let mut evaluator = LiteralEvaluator::new(HashMap::new());
2823 build_setup_py_constants(&statements, &mut evaluator);
2824
2825 let setup_call = find_setup_call(&statements, &aliases);
2826 let Some(call_expr) = setup_call else {
2827 return Ok(None);
2828 };
2829
2830 let setup_values = extract_setup_keywords(call_expr, &mut evaluator);
2831 Ok(Some(build_setup_py_package_data(&setup_values)))
2832}
2833
2834fn build_setup_py_constants(statements: &[ast::Stmt], evaluator: &mut LiteralEvaluator) {
2835 for stmt in statements {
2836 if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = stmt {
2837 if targets.len() != 1 {
2838 continue;
2839 }
2840
2841 let Some(name) = extract_assign_name(&targets[0]) else {
2842 continue;
2843 };
2844
2845 if let Some(value) = evaluator.evaluate_expr(value.as_ref(), 0) {
2846 evaluator.insert_constant(name, value);
2847 }
2848 }
2849 }
2850}
2851
2852fn extract_assign_name(target: &ast::Expr) -> Option<String> {
2853 match target {
2854 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
2855 _ => None,
2856 }
2857}
2858
2859fn collect_setup_aliases(statements: &[ast::Stmt]) -> SetupAliases {
2860 let mut aliases = SetupAliases::default();
2861 aliases.setup_names.insert("setup".to_string());
2862
2863 for stmt in statements {
2864 match stmt {
2865 ast::Stmt::Import(ast::StmtImport { names, .. }) => {
2866 for alias in names {
2867 let module_name = alias.name.as_str();
2868 if !is_setup_module(module_name) {
2869 continue;
2870 }
2871 let alias_name = alias
2872 .asname
2873 .as_ref()
2874 .map(|name| name.as_str())
2875 .unwrap_or(module_name);
2876 aliases
2877 .module_aliases
2878 .insert(alias_name.to_string(), module_name.to_string());
2879 }
2880 }
2881 ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
2882 let Some(module_name) = module.as_ref().map(|name| name.as_str()) else {
2883 continue;
2884 };
2885 if !is_setup_module(module_name) {
2886 continue;
2887 }
2888 for alias in names {
2889 if alias.name.as_str() != "setup" {
2890 continue;
2891 }
2892 let alias_name = alias
2893 .asname
2894 .as_ref()
2895 .map(|name| name.as_str())
2896 .unwrap_or("setup");
2897 aliases.setup_names.insert(alias_name.to_string());
2898 }
2899 }
2900 _ => {}
2901 }
2902 }
2903
2904 aliases
2905}
2906
2907fn is_setup_module(module_name: &str) -> bool {
2908 matches!(module_name, "setuptools" | "distutils" | "distutils.core")
2909}
2910
2911fn find_setup_call<'a>(
2912 statements: &'a [ast::Stmt],
2913 aliases: &'a SetupAliases,
2914) -> Option<&'a ast::Expr> {
2915 let mut finder = SetupCallFinder {
2916 aliases,
2917 nodes_visited: 0,
2918 };
2919 finder.find_in_statements(statements)
2920}
2921
2922struct SetupCallFinder<'a> {
2923 aliases: &'a SetupAliases,
2924 nodes_visited: usize,
2925}
2926
2927impl<'a> SetupCallFinder<'a> {
2928 fn find_in_statements(&mut self, statements: &'a [ast::Stmt]) -> Option<&'a ast::Expr> {
2929 for stmt in statements {
2930 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2931 return None;
2932 }
2933 self.nodes_visited += 1;
2934
2935 let found = match stmt {
2936 ast::Stmt::Expr(ast::StmtExpr { value, .. }) => self.visit_expr(value.as_ref()),
2937 ast::Stmt::Assign(ast::StmtAssign { value, .. }) => self.visit_expr(value.as_ref()),
2938 ast::Stmt::If(ast::StmtIf { body, orelse, .. }) => self
2939 .find_in_statements(body)
2940 .or_else(|| self.find_in_statements(orelse)),
2941 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
2942 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => self
2943 .find_in_statements(body)
2944 .or_else(|| self.find_in_statements(orelse)),
2945 ast::Stmt::With(ast::StmtWith { body, .. }) => self.find_in_statements(body),
2946 ast::Stmt::Try(ast::StmtTry {
2947 body,
2948 orelse,
2949 finalbody,
2950 handlers,
2951 ..
2952 })
2953 | ast::Stmt::TryStar(ast::StmtTryStar {
2954 body,
2955 orelse,
2956 finalbody,
2957 handlers,
2958 ..
2959 }) => self
2960 .find_in_statements(body)
2961 .or_else(|| self.find_in_statements(orelse))
2962 .or_else(|| self.find_in_statements(finalbody))
2963 .or_else(|| {
2964 for handler in handlers {
2965 let ast::ExceptHandler::ExceptHandler(
2966 ast::ExceptHandlerExceptHandler { body, .. },
2967 ) = handler;
2968 if let Some(found) = self.find_in_statements(body) {
2969 return Some(found);
2970 }
2971 }
2972 None
2973 }),
2974 _ => None,
2975 };
2976
2977 if found.is_some() {
2978 return found;
2979 }
2980 }
2981
2982 None
2983 }
2984
2985 fn visit_expr(&mut self, expr: &'a ast::Expr) -> Option<&'a ast::Expr> {
2986 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2987 return None;
2988 }
2989 self.nodes_visited += 1;
2990
2991 match expr {
2992 ast::Expr::Call(ast::ExprCall { func, .. })
2993 if is_setup_call(func.as_ref(), self.aliases) =>
2994 {
2995 Some(expr)
2996 }
2997 _ => None,
2998 }
2999 }
3000}
3001
3002fn is_setup_call(func: &ast::Expr, aliases: &SetupAliases) -> bool {
3003 let Some(dotted) = dotted_name(func, 0) else {
3004 return false;
3005 };
3006
3007 if aliases.setup_names.contains(&dotted) {
3008 return true;
3009 }
3010
3011 let Some(module) = dotted.strip_suffix(".setup") else {
3012 return false;
3013 };
3014
3015 let resolved = resolve_module_alias(module, aliases);
3016 is_setup_module(&resolved)
3017}
3018
3019fn dotted_name(expr: &ast::Expr, depth: usize) -> Option<String> {
3020 if depth >= MAX_SETUP_PY_AST_DEPTH {
3021 return None;
3022 }
3023
3024 match expr {
3025 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
3026 ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
3027 let base = dotted_name(value.as_ref(), depth + 1)?;
3028 Some(format!("{}.{}", base, attr.as_str()))
3029 }
3030 _ => None,
3031 }
3032}
3033
3034fn resolve_module_alias(module: &str, aliases: &SetupAliases) -> String {
3035 if let Some(mapped) = aliases.module_aliases.get(module) {
3036 return mapped.clone();
3037 }
3038
3039 let Some((base, rest)) = module.split_once('.') else {
3040 return module.to_string();
3041 };
3042
3043 if let Some(mapped) = aliases.module_aliases.get(base) {
3044 return format!("{}.{}", mapped, rest);
3045 }
3046
3047 module.to_string()
3048}
3049
3050fn extract_setup_keywords(
3051 call_expr: &ast::Expr,
3052 evaluator: &mut LiteralEvaluator,
3053) -> HashMap<String, Value> {
3054 let mut values = HashMap::new();
3055 let ast::Expr::Call(ast::ExprCall { keywords, .. }) = call_expr else {
3056 return values;
3057 };
3058
3059 for keyword in keywords {
3060 if let Some(arg) = keyword.arg.as_ref().map(|name| name.as_str()) {
3061 if let Some(value) = evaluator.evaluate_expr(&keyword.value, 0) {
3062 values.insert(arg.to_string(), value);
3063 }
3064 } else if let Some(Value::Dict(dict)) = evaluator.evaluate_expr(&keyword.value, 0) {
3065 for (key, value) in dict {
3066 values.insert(key, value);
3067 }
3068 }
3069 }
3070
3071 values
3072}
3073
3074fn build_setup_py_package_data(values: &HashMap<String, Value>) -> PackageData {
3075 let name = get_value_string(values, "name");
3076 let version = get_value_string(values, "version");
3077 let description =
3078 get_value_string(values, "description").or_else(|| get_value_string(values, "summary"));
3079 let homepage_url =
3080 get_value_string(values, "url").or_else(|| get_value_string(values, "home_page"));
3081 let author = get_value_string(values, "author");
3082 let author_email = get_value_string(values, "author_email");
3083 let maintainer = get_value_string(values, "maintainer");
3084 let maintainer_email = get_value_string(values, "maintainer_email");
3085 let license = get_value_string(values, "license");
3086 let classifiers = values
3087 .get("classifiers")
3088 .and_then(value_to_string_list)
3089 .unwrap_or_default();
3090
3091 let mut parties = Vec::new();
3092 if author.is_some() || author_email.is_some() {
3093 parties.push(Party {
3094 r#type: Some("person".to_string()),
3095 role: Some("author".to_string()),
3096 name: author,
3097 email: author_email,
3098 url: None,
3099 organization: None,
3100 organization_url: None,
3101 timezone: None,
3102 });
3103 }
3104
3105 if maintainer.is_some() || maintainer_email.is_some() {
3106 parties.push(Party {
3107 r#type: Some("person".to_string()),
3108 role: Some("maintainer".to_string()),
3109 name: maintainer,
3110 email: maintainer_email,
3111 url: None,
3112 organization: None,
3113 organization_url: None,
3114 timezone: None,
3115 });
3116 }
3117
3118 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3119 normalize_spdx_declared_license(license.as_deref());
3120 let extracted_license_statement = license.clone();
3121
3122 let dependencies = build_setup_py_dependencies(values);
3123 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3124 let mut homepage_from_project_urls = None;
3125 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
3126 let mut extra_data = HashMap::new();
3127
3128 if let Some(parsed_project_urls) = values.get("project_urls").and_then(value_to_string_pairs) {
3129 apply_project_url_mappings(
3130 &parsed_project_urls,
3131 &mut homepage_from_project_urls,
3132 &mut bug_tracking_url,
3133 &mut code_view_url,
3134 &mut vcs_url,
3135 &mut extra_data,
3136 );
3137 }
3138
3139 let extra_data = if extra_data.is_empty() {
3140 None
3141 } else {
3142 Some(extra_data)
3143 };
3144
3145 PackageData {
3146 package_type: Some(PythonParser::PACKAGE_TYPE),
3147 namespace: None,
3148 name,
3149 version,
3150 qualifiers: None,
3151 subpath: None,
3152 primary_language: Some("Python".to_string()),
3153 description,
3154 release_date: None,
3155 parties,
3156 keywords: Vec::new(),
3157 homepage_url: homepage_url.or(homepage_from_project_urls),
3158 download_url: None,
3159 size: None,
3160 sha1: None,
3161 md5: None,
3162 sha256: None,
3163 sha512: None,
3164 bug_tracking_url,
3165 code_view_url,
3166 vcs_url,
3167 copyright: None,
3168 holder: None,
3169 declared_license_expression,
3170 declared_license_expression_spdx,
3171 license_detections,
3172 other_license_expression: None,
3173 other_license_expression_spdx: None,
3174 other_license_detections: Vec::new(),
3175 extracted_license_statement,
3176 notice_text: None,
3177 source_packages: Vec::new(),
3178 file_references: Vec::new(),
3179 is_private: has_private_classifier(&classifiers),
3180 is_virtual: false,
3181 extra_data,
3182 dependencies,
3183 repository_homepage_url: None,
3184 repository_download_url: None,
3185 api_data_url: None,
3186 datasource_id: Some(DatasourceId::PypiSetupPy),
3187 purl,
3188 }
3189}
3190
3191fn build_setup_py_dependencies(values: &HashMap<String, Value>) -> Vec<Dependency> {
3192 let mut dependencies = Vec::new();
3193
3194 if let Some(reqs) = values
3195 .get("install_requires")
3196 .and_then(value_to_string_list)
3197 {
3198 dependencies.extend(build_setup_py_dependency_list(&reqs, "install", false));
3199 }
3200
3201 if let Some(reqs) = values.get("tests_require").and_then(value_to_string_list) {
3202 dependencies.extend(build_setup_py_dependency_list(&reqs, "test", true));
3203 }
3204
3205 if let Some(Value::Dict(extras)) = values.get("extras_require") {
3206 let mut extra_items: Vec<_> = extras.iter().collect();
3207 extra_items.sort_by_key(|(name, _)| *name);
3208 for (extra_name, extra_value) in extra_items {
3209 if let Some(reqs) = value_to_string_list(extra_value) {
3210 dependencies.extend(build_setup_py_dependency_list(
3211 reqs.as_slice(),
3212 extra_name,
3213 true,
3214 ));
3215 }
3216 }
3217 }
3218
3219 dependencies
3220}
3221
3222fn build_setup_py_dependency_list(
3223 reqs: &[String],
3224 scope: &str,
3225 is_optional: bool,
3226) -> Vec<Dependency> {
3227 reqs.iter()
3228 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
3229 .collect()
3230}
3231
3232fn get_value_string(values: &HashMap<String, Value>, key: &str) -> Option<String> {
3233 values.get(key).and_then(value_to_string)
3234}
3235
3236fn value_to_string(value: &Value) -> Option<String> {
3237 match value {
3238 Value::String(value) => Some(value.clone()),
3239 Value::Number(value) => Some(value.to_string()),
3240 Value::Bool(value) => Some(value.to_string()),
3241 _ => None,
3242 }
3243}
3244
3245fn value_to_string_list(value: &Value) -> Option<Vec<String>> {
3246 match value {
3247 Value::String(value) => Some(vec![value.clone()]),
3248 Value::List(values) | Value::Tuple(values) => {
3249 let mut items = Vec::new();
3250 for item in values {
3251 items.push(value_to_string(item)?);
3252 }
3253 Some(items)
3254 }
3255 _ => None,
3256 }
3257}
3258
3259fn value_to_string_pairs(value: &Value) -> Option<Vec<(String, String)>> {
3260 let Value::Dict(dict) = value else {
3261 return None;
3262 };
3263
3264 let mut pairs: Vec<(String, String)> = dict
3265 .iter()
3266 .map(|(key, value)| Some((key.clone(), value_to_string(value)?)))
3267 .collect::<Option<Vec<_>>>()?;
3268 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3269 Some(pairs)
3270}
3271
3272fn extract_rfc822_dependencies(headers: &HashMap<String, Vec<String>>) -> Vec<Dependency> {
3273 let requires_dist = super::rfc822::get_header_all(headers, "requires-dist");
3274 requires_dist
3275 .iter()
3276 .filter_map(|entry| build_rfc822_dependency(entry))
3277 .collect()
3278}
3279
3280fn build_rfc822_dependency(entry: &str) -> Option<Dependency> {
3281 build_python_dependency(entry, "install", false, None)
3282}
3283
3284fn build_python_dependency(
3285 entry: &str,
3286 default_scope: &str,
3287 default_optional: bool,
3288 marker_override: Option<&str>,
3289) -> Option<Dependency> {
3290 let (requirement_part, marker_part) = entry
3291 .split_once(';')
3292 .map(|(req, marker)| (req.trim(), Some(marker.trim())))
3293 .unwrap_or((entry.trim(), None));
3294
3295 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3296 let requirement = normalize_rfc822_requirement(requirement_part);
3297 let (scope, is_optional, marker, marker_data) = parse_rfc822_marker(
3298 marker_part.or(marker_override),
3299 default_scope,
3300 default_optional,
3301 );
3302 let mut purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
3303
3304 let is_pinned = requirement
3305 .as_deref()
3306 .is_some_and(|req| req.starts_with("==") || req.starts_with("==="));
3307 if is_pinned
3308 && let Some(version) = requirement
3309 .as_deref()
3310 .map(|req| req.trim_start_matches('='))
3311 {
3312 purl.with_version(version).ok()?;
3313 }
3314
3315 let mut extra_data = HashMap::new();
3316 extra_data.extend(marker_data);
3317 if let Some(marker) = marker {
3318 extra_data.insert("marker".to_string(), serde_json::Value::String(marker));
3319 }
3320
3321 Some(Dependency {
3322 purl: Some(purl.to_string()),
3323 extracted_requirement: requirement,
3324 scope: Some(scope),
3325 is_runtime: Some(true),
3326 is_optional: Some(is_optional),
3327 is_pinned: Some(is_pinned),
3328 is_direct: Some(true),
3329 resolved_package: None,
3330 extra_data: if extra_data.is_empty() {
3331 None
3332 } else {
3333 Some(extra_data)
3334 },
3335 })
3336}
3337
3338fn normalize_rfc822_requirement(requirement_part: &str) -> Option<String> {
3339 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3340 let trimmed = requirement_part.trim();
3341 let mut remainder = trimmed[name.len()..].trim();
3342
3343 if let Some(stripped) = remainder.strip_prefix('[')
3344 && let Some(end_idx) = stripped.find(']')
3345 {
3346 remainder = stripped[end_idx + 1..].trim();
3347 }
3348
3349 let remainder = remainder
3350 .strip_prefix('(')
3351 .and_then(|value| value.strip_suffix(')'))
3352 .unwrap_or(remainder)
3353 .trim();
3354
3355 if remainder.is_empty() {
3356 return None;
3357 }
3358
3359 let mut specifiers: Vec<String> = remainder
3360 .split(',')
3361 .map(|specifier| specifier.trim().replace(' ', ""))
3362 .filter(|specifier| !specifier.is_empty())
3363 .collect();
3364 specifiers.sort();
3365 Some(specifiers.join(","))
3366}
3367
3368fn parse_rfc822_marker(
3369 marker_part: Option<&str>,
3370 default_scope: &str,
3371 default_optional: bool,
3372) -> (
3373 String,
3374 bool,
3375 Option<String>,
3376 HashMap<String, serde_json::Value>,
3377) {
3378 let Some(marker) = marker_part.filter(|marker| !marker.trim().is_empty()) else {
3379 return (
3380 default_scope.to_string(),
3381 default_optional,
3382 None,
3383 HashMap::new(),
3384 );
3385 };
3386
3387 let extra_re = Regex::new(r#"extra\s*==\s*['\"]([^'\"]+)['\"]"#)
3388 .expect("extra marker regex should compile");
3389 let mut extra_data = HashMap::new();
3390
3391 if let Some(python_version) = extract_marker_field(marker, "python_version") {
3392 extra_data.insert(
3393 "python_version".to_string(),
3394 serde_json::Value::String(python_version),
3395 );
3396 }
3397 if let Some(sys_platform) = extract_marker_field(marker, "sys_platform") {
3398 extra_data.insert(
3399 "sys_platform".to_string(),
3400 serde_json::Value::String(sys_platform),
3401 );
3402 }
3403
3404 if let Some(captures) = extra_re.captures(marker)
3405 && let Some(scope) = captures.get(1)
3406 {
3407 return (
3408 scope.as_str().to_string(),
3409 true,
3410 Some(marker.trim().to_string()),
3411 extra_data,
3412 );
3413 }
3414
3415 (
3416 default_scope.to_string(),
3417 default_optional,
3418 Some(marker.trim().to_string()),
3419 extra_data,
3420 )
3421}
3422
3423fn extract_marker_field(marker: &str, field: &str) -> Option<String> {
3424 let re = Regex::new(&format!(
3425 r#"{}\s*(==|!=|<=|>=|<|>)\s*['\"]([^'\"]+)['\"]"#,
3426 field
3427 ))
3428 .ok()?;
3429 let captures = re.captures(marker)?;
3430 let operator = captures.get(1)?.as_str();
3431 let value = captures.get(2)?.as_str();
3432 Some(format!("{} {}", operator, value))
3433}
3434
3435fn parse_requires_txt(content: &str) -> Vec<Dependency> {
3436 let mut dependencies = Vec::new();
3437 let mut current_scope = "install".to_string();
3438 let mut current_optional = false;
3439 let mut current_marker: Option<String> = None;
3440
3441 for line in content.lines() {
3442 let trimmed = line.trim();
3443 if trimmed.is_empty() || trimmed.starts_with('#') {
3444 continue;
3445 }
3446
3447 if trimmed.starts_with('[') && trimmed.ends_with(']') {
3448 let inner = &trimmed[1..trimmed.len() - 1];
3449 if let Some(rest) = inner.strip_prefix(':') {
3450 current_scope = "install".to_string();
3451 current_optional = false;
3452 current_marker = Some(rest.trim().to_string());
3453 } else if let Some((scope, marker)) = inner.split_once(':') {
3454 current_scope = scope.trim().to_string();
3455 current_optional = true;
3456 current_marker = Some(marker.trim().to_string());
3457 } else {
3458 current_scope = inner.trim().to_string();
3459 current_optional = true;
3460 current_marker = None;
3461 }
3462 continue;
3463 }
3464
3465 if let Some(dependency) = build_python_dependency(
3466 trimmed,
3467 ¤t_scope,
3468 current_optional,
3469 current_marker.as_deref(),
3470 ) {
3471 dependencies.push(dependency);
3472 }
3473 }
3474
3475 dependencies
3476}
3477
3478fn has_private_classifier(classifiers: &[String]) -> bool {
3479 classifiers
3480 .iter()
3481 .any(|classifier| classifier.eq_ignore_ascii_case("Private :: Do Not Upload"))
3482}
3483
3484fn build_setup_py_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
3485 let name = name?;
3486 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
3487 if let Some(version) = version {
3488 package_url.with_version(version).ok()?;
3489 }
3490 Some(package_url.to_string())
3491}
3492
3493fn extract_from_setup_py_regex(content: &str) -> PackageData {
3494 let name = extract_setup_value(content, "name");
3495 let version = extract_setup_value(content, "version");
3496 let license_expression = extract_setup_value(content, "license");
3497
3498 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3499 normalize_spdx_declared_license(license_expression.as_deref());
3500 let extracted_license_statement = license_expression.clone();
3501
3502 let dependencies = extract_setup_py_dependencies(content);
3503 let homepage_url = extract_setup_value(content, "url");
3504 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3505
3506 PackageData {
3507 package_type: Some(PythonParser::PACKAGE_TYPE),
3508 namespace: None,
3509 name,
3510 version,
3511 qualifiers: None,
3512 subpath: None,
3513 primary_language: Some("Python".to_string()),
3514 description: None,
3515 release_date: None,
3516 parties: Vec::new(),
3517 keywords: Vec::new(),
3518 homepage_url,
3519 download_url: None,
3520 size: None,
3521 sha1: None,
3522 md5: None,
3523 sha256: None,
3524 sha512: None,
3525 bug_tracking_url: None,
3526 code_view_url: None,
3527 vcs_url: None,
3528 copyright: None,
3529 holder: None,
3530 declared_license_expression,
3531 declared_license_expression_spdx,
3532 license_detections,
3533 other_license_expression: None,
3534 other_license_expression_spdx: None,
3535 other_license_detections: Vec::new(),
3536 extracted_license_statement,
3537 notice_text: None,
3538 source_packages: Vec::new(),
3539 file_references: Vec::new(),
3540 is_private: false,
3541 is_virtual: false,
3542 extra_data: None,
3543 dependencies,
3544 repository_homepage_url: None,
3545 repository_download_url: None,
3546 api_data_url: None,
3547 datasource_id: Some(DatasourceId::PypiSetupPy),
3548 purl,
3549 }
3550}
3551
3552fn package_data_to_resolved(pkg: &PackageData) -> crate::models::ResolvedPackage {
3553 crate::models::ResolvedPackage {
3554 package_type: pkg.package_type.unwrap_or(PackageType::Pypi),
3555 namespace: pkg.namespace.clone().unwrap_or_default(),
3556 name: pkg.name.clone().unwrap_or_default(),
3557 version: pkg.version.clone().unwrap_or_default(),
3558 primary_language: pkg.primary_language.clone(),
3559 download_url: pkg.download_url.clone(),
3560 sha1: pkg.sha1.clone(),
3561 sha256: pkg.sha256.clone(),
3562 sha512: pkg.sha512.clone(),
3563 md5: pkg.md5.clone(),
3564 is_virtual: pkg.is_virtual,
3565 extra_data: None,
3566 dependencies: pkg.dependencies.clone(),
3567 repository_homepage_url: pkg.repository_homepage_url.clone(),
3568 repository_download_url: pkg.repository_download_url.clone(),
3569 api_data_url: pkg.api_data_url.clone(),
3570 datasource_id: pkg.datasource_id,
3571 purl: pkg.purl.clone(),
3572 }
3573}
3574
3575fn extract_from_pypi_json(path: &Path) -> PackageData {
3576 let default = PackageData {
3577 package_type: Some(PythonParser::PACKAGE_TYPE),
3578 datasource_id: Some(DatasourceId::PypiJson),
3579 ..Default::default()
3580 };
3581
3582 let content = match read_file_to_string(path) {
3583 Ok(content) => content,
3584 Err(error) => {
3585 warn!("Failed to read pypi.json at {:?}: {}", path, error);
3586 return default;
3587 }
3588 };
3589
3590 let root: serde_json::Value = match serde_json::from_str(&content) {
3591 Ok(value) => value,
3592 Err(error) => {
3593 warn!("Failed to parse pypi.json at {:?}: {}", path, error);
3594 return default;
3595 }
3596 };
3597
3598 let Some(info) = root.get("info").and_then(|value| value.as_object()) else {
3599 warn!("No info object found in pypi.json at {:?}", path);
3600 return default;
3601 };
3602
3603 let name = info
3604 .get("name")
3605 .and_then(|value| value.as_str())
3606 .map(ToOwned::to_owned);
3607 let version = info
3608 .get("version")
3609 .and_then(|value| value.as_str())
3610 .map(ToOwned::to_owned);
3611 let summary = info
3612 .get("summary")
3613 .and_then(|value| value.as_str())
3614 .map(ToOwned::to_owned);
3615 let description = info
3616 .get("description")
3617 .and_then(|value| value.as_str())
3618 .filter(|value| !value.trim().is_empty())
3619 .map(ToOwned::to_owned)
3620 .or(summary);
3621 let mut homepage_url = info
3622 .get("home_page")
3623 .and_then(|value| value.as_str())
3624 .map(ToOwned::to_owned);
3625 let author = info
3626 .get("author")
3627 .and_then(|value| value.as_str())
3628 .filter(|value| !value.trim().is_empty())
3629 .map(ToOwned::to_owned);
3630 let author_email = info
3631 .get("author_email")
3632 .and_then(|value| value.as_str())
3633 .filter(|value| !value.trim().is_empty())
3634 .map(ToOwned::to_owned);
3635 let license = info
3636 .get("license")
3637 .and_then(|value| value.as_str())
3638 .filter(|value| !value.trim().is_empty())
3639 .map(ToOwned::to_owned);
3640 let keywords = parse_setup_cfg_keywords(
3641 info.get("keywords")
3642 .and_then(|value| value.as_str())
3643 .map(ToOwned::to_owned),
3644 );
3645 let classifiers = info
3646 .get("classifiers")
3647 .and_then(|value| value.as_array())
3648 .map(|values| {
3649 values
3650 .iter()
3651 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
3652 .collect::<Vec<_>>()
3653 })
3654 .unwrap_or_default();
3655
3656 let mut parties = Vec::new();
3657 if author.is_some() || author_email.is_some() {
3658 parties.push(Party {
3659 r#type: Some("person".to_string()),
3660 role: Some("author".to_string()),
3661 name: author,
3662 email: author_email,
3663 url: None,
3664 organization: None,
3665 organization_url: None,
3666 timezone: None,
3667 });
3668 }
3669
3670 let mut bug_tracking_url = None;
3671 let mut code_view_url = None;
3672 let mut vcs_url = None;
3673 let mut extra_data = HashMap::new();
3674
3675 let parsed_project_urls = info
3676 .get("project_urls")
3677 .and_then(|value| value.as_object())
3678 .map(|map| {
3679 let mut pairs: Vec<(String, String)> = map
3680 .iter()
3681 .filter_map(|(key, value)| Some((key.clone(), value.as_str()?.to_string())))
3682 .collect();
3683 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3684 pairs
3685 })
3686 .unwrap_or_default();
3687
3688 apply_project_url_mappings(
3689 &parsed_project_urls,
3690 &mut homepage_url,
3691 &mut bug_tracking_url,
3692 &mut code_view_url,
3693 &mut vcs_url,
3694 &mut extra_data,
3695 );
3696
3697 let (download_url, size, sha256) = root
3698 .get("urls")
3699 .and_then(|value| value.as_array())
3700 .map(|urls| select_pypi_json_artifact(urls))
3701 .unwrap_or((None, None, None));
3702
3703 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
3704 build_pypi_urls(name.as_deref(), version.as_deref());
3705
3706 PackageData {
3707 package_type: Some(PythonParser::PACKAGE_TYPE),
3708 namespace: None,
3709 name,
3710 version,
3711 qualifiers: None,
3712 subpath: None,
3713 primary_language: None,
3714 description,
3715 release_date: None,
3716 parties,
3717 keywords,
3718 homepage_url: homepage_url.or(repository_homepage_url.clone()),
3719 download_url,
3720 size,
3721 sha1: None,
3722 md5: None,
3723 sha256,
3724 sha512: None,
3725 bug_tracking_url,
3726 code_view_url,
3727 vcs_url,
3728 copyright: None,
3729 holder: None,
3730 declared_license_expression: None,
3731 declared_license_expression_spdx: None,
3732 license_detections: Vec::new(),
3733 other_license_expression: None,
3734 other_license_expression_spdx: None,
3735 other_license_detections: Vec::new(),
3736 extracted_license_statement: license,
3737 notice_text: None,
3738 source_packages: Vec::new(),
3739 file_references: Vec::new(),
3740 is_private: has_private_classifier(&classifiers),
3741 is_virtual: false,
3742 extra_data: if extra_data.is_empty() {
3743 None
3744 } else {
3745 Some(extra_data)
3746 },
3747 dependencies: Vec::new(),
3748 repository_homepage_url,
3749 repository_download_url,
3750 api_data_url,
3751 datasource_id: Some(DatasourceId::PypiJson),
3752 purl,
3753 }
3754}
3755
3756fn select_pypi_json_artifact(
3757 urls: &[serde_json::Value],
3758) -> (Option<String>, Option<u64>, Option<String>) {
3759 let selected = urls
3760 .iter()
3761 .find(|entry| entry.get("packagetype").and_then(|value| value.as_str()) == Some("sdist"))
3762 .or_else(|| urls.first());
3763
3764 let Some(entry) = selected else {
3765 return (None, None, None);
3766 };
3767
3768 let download_url = entry
3769 .get("url")
3770 .and_then(|value| value.as_str())
3771 .map(ToOwned::to_owned);
3772 let size = entry.get("size").and_then(|value| value.as_u64());
3773 let sha256 = entry
3774 .get("digests")
3775 .and_then(|value| value.as_object())
3776 .and_then(|digests| digests.get("sha256"))
3777 .and_then(|value| value.as_str())
3778 .map(ToOwned::to_owned);
3779
3780 (download_url, size, sha256)
3781}
3782
3783fn extract_from_pip_inspect(path: &Path) -> PackageData {
3784 let content = match read_file_to_string(path) {
3785 Ok(content) => content,
3786 Err(e) => {
3787 warn!("Failed to read pip-inspect.deplock at {:?}: {}", path, e);
3788 return default_package_data();
3789 }
3790 };
3791
3792 let root: serde_json::Value = match serde_json::from_str(&content) {
3793 Ok(value) => value,
3794 Err(e) => {
3795 warn!(
3796 "Failed to parse pip-inspect.deplock JSON at {:?}: {}",
3797 path, e
3798 );
3799 return default_package_data();
3800 }
3801 };
3802
3803 let installed = match root.get("installed").and_then(|v| v.as_array()) {
3804 Some(arr) => arr,
3805 None => {
3806 warn!(
3807 "No 'installed' array found in pip-inspect.deplock at {:?}",
3808 path
3809 );
3810 return default_package_data();
3811 }
3812 };
3813
3814 let pip_version = root
3815 .get("pip_version")
3816 .and_then(|v| v.as_str())
3817 .map(String::from);
3818 let inspect_version = root
3819 .get("version")
3820 .and_then(|v| v.as_str())
3821 .map(String::from);
3822
3823 let mut main_package: Option<PackageData> = None;
3824 let mut dependencies: Vec<Dependency> = Vec::new();
3825
3826 for package_entry in installed {
3827 let metadata = match package_entry.get("metadata") {
3828 Some(m) => m,
3829 None => continue,
3830 };
3831
3832 let is_requested = package_entry
3833 .get("requested")
3834 .and_then(|v| v.as_bool())
3835 .unwrap_or(false);
3836 let has_direct_url = package_entry.get("direct_url").is_some();
3837
3838 let name = metadata
3839 .get("name")
3840 .and_then(|v| v.as_str())
3841 .map(String::from);
3842 let version = metadata
3843 .get("version")
3844 .and_then(|v| v.as_str())
3845 .map(String::from);
3846 let summary = metadata
3847 .get("summary")
3848 .and_then(|v| v.as_str())
3849 .map(String::from);
3850 let home_page = metadata
3851 .get("home_page")
3852 .and_then(|v| v.as_str())
3853 .map(String::from);
3854 let author = metadata
3855 .get("author")
3856 .and_then(|v| v.as_str())
3857 .map(String::from);
3858 let author_email = metadata
3859 .get("author_email")
3860 .and_then(|v| v.as_str())
3861 .map(String::from);
3862 let license = metadata
3863 .get("license")
3864 .and_then(|v| v.as_str())
3865 .map(String::from);
3866 let description = metadata
3867 .get("description")
3868 .and_then(|v| v.as_str())
3869 .map(String::from);
3870 let keywords = metadata
3871 .get("keywords")
3872 .and_then(|v| v.as_array())
3873 .map(|arr| {
3874 arr.iter()
3875 .filter_map(|k| k.as_str().map(String::from))
3876 .collect::<Vec<_>>()
3877 })
3878 .unwrap_or_default();
3879
3880 let mut parties = Vec::new();
3881 if author.is_some() || author_email.is_some() {
3882 parties.push(Party {
3883 r#type: Some("person".to_string()),
3884 role: Some("author".to_string()),
3885 name: author,
3886 email: author_email,
3887 url: None,
3888 organization: None,
3889 organization_url: None,
3890 timezone: None,
3891 });
3892 }
3893
3894 let license_detections = Vec::new();
3896 let declared_license_expression = None;
3897 let declared_license_expression_spdx = None;
3898 let extracted_license_statement = license.clone();
3899
3900 let purl = name.as_ref().and_then(|n| {
3901 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
3902 if let Some(v) = &version {
3903 package_url.with_version(v).ok()?;
3904 }
3905 Some(package_url.to_string())
3906 });
3907
3908 if is_requested && has_direct_url {
3909 let mut extra_data = HashMap::new();
3910 if let Some(pv) = &pip_version {
3911 extra_data.insert(
3912 "pip_version".to_string(),
3913 serde_json::Value::String(pv.clone()),
3914 );
3915 }
3916 if let Some(iv) = &inspect_version {
3917 extra_data.insert(
3918 "inspect_version".to_string(),
3919 serde_json::Value::String(iv.clone()),
3920 );
3921 }
3922
3923 main_package = Some(PackageData {
3924 package_type: Some(PythonParser::PACKAGE_TYPE),
3925 namespace: None,
3926 name,
3927 version,
3928 qualifiers: None,
3929 subpath: None,
3930 primary_language: Some("Python".to_string()),
3931 description: description.or(summary),
3932 release_date: None,
3933 parties,
3934 keywords,
3935 homepage_url: home_page,
3936 download_url: None,
3937 size: None,
3938 sha1: None,
3939 md5: None,
3940 sha256: None,
3941 sha512: None,
3942 bug_tracking_url: None,
3943 code_view_url: None,
3944 vcs_url: None,
3945 copyright: None,
3946 holder: None,
3947 declared_license_expression,
3948 declared_license_expression_spdx,
3949 license_detections,
3950 other_license_expression: None,
3951 other_license_expression_spdx: None,
3952 other_license_detections: Vec::new(),
3953 extracted_license_statement,
3954 notice_text: None,
3955 source_packages: Vec::new(),
3956 file_references: Vec::new(),
3957 is_private: false,
3958 is_virtual: true,
3959 extra_data: if extra_data.is_empty() {
3960 None
3961 } else {
3962 Some(extra_data)
3963 },
3964 dependencies: Vec::new(),
3965 repository_homepage_url: None,
3966 repository_download_url: None,
3967 api_data_url: None,
3968 datasource_id: Some(DatasourceId::PypiInspectDeplock),
3969 purl,
3970 });
3971 } else {
3972 let resolved_package = PackageData {
3973 package_type: Some(PythonParser::PACKAGE_TYPE),
3974 namespace: None,
3975 name: name.clone(),
3976 version: version.clone(),
3977 qualifiers: None,
3978 subpath: None,
3979 primary_language: Some("Python".to_string()),
3980 description: description.or(summary),
3981 release_date: None,
3982 parties,
3983 keywords,
3984 homepage_url: home_page,
3985 download_url: None,
3986 size: None,
3987 sha1: None,
3988 md5: None,
3989 sha256: None,
3990 sha512: None,
3991 bug_tracking_url: None,
3992 code_view_url: None,
3993 vcs_url: None,
3994 copyright: None,
3995 holder: None,
3996 declared_license_expression,
3997 declared_license_expression_spdx,
3998 license_detections,
3999 other_license_expression: None,
4000 other_license_expression_spdx: None,
4001 other_license_detections: Vec::new(),
4002 extracted_license_statement,
4003 notice_text: None,
4004 source_packages: Vec::new(),
4005 file_references: Vec::new(),
4006 is_private: false,
4007 is_virtual: true,
4008 extra_data: None,
4009 dependencies: Vec::new(),
4010 repository_homepage_url: None,
4011 repository_download_url: None,
4012 api_data_url: None,
4013 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4014 purl: purl.clone(),
4015 };
4016
4017 let resolved = package_data_to_resolved(&resolved_package);
4018 dependencies.push(Dependency {
4019 purl,
4020 extracted_requirement: None,
4021 scope: None,
4022 is_runtime: Some(true),
4023 is_optional: Some(false),
4024 is_pinned: Some(true),
4025 is_direct: Some(is_requested),
4026 resolved_package: Some(Box::new(resolved)),
4027 extra_data: None,
4028 });
4029 }
4030 }
4031
4032 if let Some(mut main_pkg) = main_package {
4033 main_pkg.dependencies = dependencies;
4034 main_pkg
4035 } else {
4036 default_package_data()
4037 }
4038}
4039
4040type IniSections = HashMap<String, HashMap<String, Vec<String>>>;
4041
4042fn extract_from_setup_cfg(path: &Path) -> PackageData {
4043 let content = match read_file_to_string(path) {
4044 Ok(content) => content,
4045 Err(e) => {
4046 warn!("Failed to read setup.cfg at {:?}: {}", path, e);
4047 return default_package_data();
4048 }
4049 };
4050
4051 let sections = parse_setup_cfg(&content);
4052 let name = get_ini_value(§ions, "metadata", "name");
4053 let version = get_ini_value(§ions, "metadata", "version");
4054 let description = get_ini_value(§ions, "metadata", "description");
4055 let author = get_ini_value(§ions, "metadata", "author");
4056 let author_email = get_ini_value(§ions, "metadata", "author_email");
4057 let maintainer = get_ini_value(§ions, "metadata", "maintainer");
4058 let maintainer_email = get_ini_value(§ions, "metadata", "maintainer_email");
4059 let license = get_ini_value(§ions, "metadata", "license");
4060 let mut homepage_url = get_ini_value(§ions, "metadata", "url");
4061 let classifiers = get_ini_values(§ions, "metadata", "classifiers");
4062 let keywords = parse_setup_cfg_keywords(get_ini_value(§ions, "metadata", "keywords"));
4063 let python_requires = get_ini_value(§ions, "options", "python_requires");
4064 let parsed_project_urls =
4065 parse_setup_cfg_project_urls(&get_ini_values(§ions, "metadata", "project_urls"));
4066 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
4067 let mut extra_data = HashMap::new();
4068
4069 let mut parties = Vec::new();
4070 if author.is_some() || author_email.is_some() {
4071 parties.push(Party {
4072 r#type: Some("person".to_string()),
4073 role: Some("author".to_string()),
4074 name: author,
4075 email: author_email,
4076 url: None,
4077 organization: None,
4078 organization_url: None,
4079 timezone: None,
4080 });
4081 }
4082
4083 if maintainer.is_some() || maintainer_email.is_some() {
4084 parties.push(Party {
4085 r#type: Some("person".to_string()),
4086 role: Some("maintainer".to_string()),
4087 name: maintainer,
4088 email: maintainer_email,
4089 url: None,
4090 organization: None,
4091 organization_url: None,
4092 timezone: None,
4093 });
4094 }
4095
4096 let declared_license_expression = None;
4097 let declared_license_expression_spdx = None;
4098 let license_detections = Vec::new();
4099 let extracted_license_statement = license.clone();
4100
4101 let dependencies = extract_setup_cfg_dependencies(§ions);
4102
4103 if let Some(value) = python_requires {
4104 extra_data.insert(
4105 "python_requires".to_string(),
4106 serde_json::Value::String(value),
4107 );
4108 }
4109
4110 apply_project_url_mappings(
4111 &parsed_project_urls,
4112 &mut homepage_url,
4113 &mut bug_tracking_url,
4114 &mut code_view_url,
4115 &mut vcs_url,
4116 &mut extra_data,
4117 );
4118
4119 let extra_data = if extra_data.is_empty() {
4120 None
4121 } else {
4122 Some(extra_data)
4123 };
4124
4125 let purl = name.as_ref().and_then(|n| {
4126 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
4127 if let Some(v) = &version {
4128 package_url.with_version(v).ok()?;
4129 }
4130 Some(package_url.to_string())
4131 });
4132
4133 PackageData {
4134 package_type: Some(PythonParser::PACKAGE_TYPE),
4135 namespace: None,
4136 name,
4137 version,
4138 qualifiers: None,
4139 subpath: None,
4140 primary_language: Some("Python".to_string()),
4141 description,
4142 release_date: None,
4143 parties,
4144 keywords,
4145 homepage_url,
4146 download_url: None,
4147 size: None,
4148 sha1: None,
4149 md5: None,
4150 sha256: None,
4151 sha512: None,
4152 bug_tracking_url,
4153 code_view_url,
4154 vcs_url,
4155 copyright: None,
4156 holder: None,
4157 declared_license_expression,
4158 declared_license_expression_spdx,
4159 license_detections,
4160 other_license_expression: None,
4161 other_license_expression_spdx: None,
4162 other_license_detections: Vec::new(),
4163 extracted_license_statement,
4164 notice_text: None,
4165 source_packages: Vec::new(),
4166 file_references: Vec::new(),
4167 is_private: has_private_classifier(&classifiers),
4168 is_virtual: false,
4169 extra_data,
4170 dependencies,
4171 repository_homepage_url: None,
4172 repository_download_url: None,
4173 api_data_url: None,
4174 datasource_id: Some(DatasourceId::PypiSetupCfg),
4175 purl,
4176 }
4177}
4178
4179fn parse_setup_cfg_keywords(value: Option<String>) -> Vec<String> {
4180 let Some(keywords) = value else {
4181 return Vec::new();
4182 };
4183
4184 keywords
4185 .split(',')
4186 .map(str::trim)
4187 .filter(|keyword| !keyword.is_empty())
4188 .map(ToOwned::to_owned)
4189 .collect()
4190}
4191
4192fn parse_setup_cfg_project_urls(entries: &[String]) -> Vec<(String, String)> {
4193 entries
4194 .iter()
4195 .filter_map(|entry| {
4196 let (label, url) = entry.split_once('=')?;
4197 let label = label.trim();
4198 let url = url.trim();
4199 if label.is_empty() || url.is_empty() {
4200 None
4201 } else {
4202 Some((label.to_string(), url.to_string()))
4203 }
4204 })
4205 .collect()
4206}
4207
4208fn apply_project_url_mappings(
4209 parsed_urls: &[(String, String)],
4210 homepage_url: &mut Option<String>,
4211 bug_tracking_url: &mut Option<String>,
4212 code_view_url: &mut Option<String>,
4213 vcs_url: &mut Option<String>,
4214 extra_data: &mut HashMap<String, serde_json::Value>,
4215) {
4216 for (label, url) in parsed_urls {
4217 let label_lower = label.to_lowercase();
4218
4219 if bug_tracking_url.is_none()
4220 && matches!(
4221 label_lower.as_str(),
4222 "tracker"
4223 | "bug reports"
4224 | "bug tracker"
4225 | "issues"
4226 | "issue tracker"
4227 | "github: issues"
4228 )
4229 {
4230 *bug_tracking_url = Some(url.clone());
4231 } else if code_view_url.is_none()
4232 && matches!(label_lower.as_str(), "source" | "source code" | "code")
4233 {
4234 *code_view_url = Some(url.clone());
4235 } else if vcs_url.is_none()
4236 && matches!(
4237 label_lower.as_str(),
4238 "github" | "gitlab" | "github: repo" | "repository"
4239 )
4240 {
4241 *vcs_url = Some(url.clone());
4242 } else if homepage_url.is_none()
4243 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
4244 {
4245 *homepage_url = Some(url.clone());
4246 } else if label_lower == "changelog" {
4247 extra_data.insert(
4248 "changelog_url".to_string(),
4249 serde_json::Value::String(url.clone()),
4250 );
4251 }
4252 }
4253
4254 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
4255 .iter()
4256 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
4257 .collect();
4258
4259 if !project_urls_json.is_empty() {
4260 extra_data.insert(
4261 "project_urls".to_string(),
4262 serde_json::Value::Object(project_urls_json),
4263 );
4264 }
4265}
4266
4267fn parse_setup_cfg(content: &str) -> IniSections {
4268 let mut sections: IniSections = HashMap::new();
4269 let mut current_section: Option<String> = None;
4270 let mut current_key: Option<String> = None;
4271
4272 for raw_line in content.lines() {
4273 let line = raw_line.trim_end_matches('\r');
4274 let trimmed = line.trim();
4275 if trimmed.is_empty() {
4276 continue;
4277 }
4278
4279 let stripped = line.trim_start();
4280 if stripped.starts_with('#') || stripped.starts_with(';') {
4281 continue;
4282 }
4283
4284 if stripped.starts_with('[') && stripped.ends_with(']') {
4285 let section_name = stripped
4286 .trim_start_matches('[')
4287 .trim_end_matches(']')
4288 .trim()
4289 .to_ascii_lowercase();
4290 current_section = if section_name.is_empty() {
4291 None
4292 } else {
4293 Some(section_name)
4294 };
4295 current_key = None;
4296 continue;
4297 }
4298
4299 if (line.starts_with(' ') || line.starts_with('\t')) && current_key.is_some() {
4300 if let (Some(section), Some(key)) = (current_section.as_ref(), current_key.as_ref()) {
4301 let value = stripped.trim();
4302 if !value.is_empty() {
4303 sections
4304 .entry(section.clone())
4305 .or_default()
4306 .entry(key.clone())
4307 .or_default()
4308 .push(value.to_string());
4309 }
4310 }
4311 continue;
4312 }
4313
4314 if let Some((key, value)) = stripped.split_once('=')
4315 && let Some(section) = current_section.as_ref()
4316 {
4317 let key_name = key.trim().to_ascii_lowercase();
4318 let value_trimmed = value.trim();
4319 let entry = sections
4320 .entry(section.clone())
4321 .or_default()
4322 .entry(key_name.clone())
4323 .or_default();
4324 if !value_trimmed.is_empty() {
4325 entry.push(value_trimmed.to_string());
4326 }
4327 current_key = Some(key_name);
4328 }
4329 }
4330
4331 sections
4332}
4333
4334fn get_ini_value(sections: &IniSections, section: &str, key: &str) -> Option<String> {
4335 sections
4336 .get(§ion.to_ascii_lowercase())
4337 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4338 .and_then(|entries| entries.first())
4339 .map(|value| value.trim().to_string())
4340}
4341
4342fn get_ini_values(sections: &IniSections, section: &str, key: &str) -> Vec<String> {
4343 sections
4344 .get(§ion.to_ascii_lowercase())
4345 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4346 .cloned()
4347 .unwrap_or_default()
4348}
4349
4350fn extract_setup_cfg_dependencies(sections: &IniSections) -> Vec<Dependency> {
4351 let mut dependencies = Vec::new();
4352
4353 for (sub_section, scope) in [
4354 ("install_requires", "install"),
4355 ("tests_require", "test"),
4356 ("setup_requires", "setup"),
4357 ] {
4358 let reqs = get_ini_values(sections, "options", sub_section);
4359 dependencies.extend(parse_setup_cfg_requirements(&reqs, scope, false));
4360 }
4361
4362 if let Some(extras) = sections.get("options.extras_require") {
4363 let mut extra_items: Vec<_> = extras.iter().collect();
4364 extra_items.sort_by_key(|(name, _)| *name);
4365 for (extra_name, reqs) in extra_items {
4366 dependencies.extend(parse_setup_cfg_requirements(reqs, extra_name, true));
4367 }
4368 }
4369
4370 dependencies
4371}
4372
4373fn parse_setup_cfg_requirements(
4374 reqs: &[String],
4375 scope: &str,
4376 is_optional: bool,
4377) -> Vec<Dependency> {
4378 reqs.iter()
4379 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
4380 .collect()
4381}
4382
4383fn build_setup_cfg_dependency(req: &str, scope: &str, is_optional: bool) -> Option<Dependency> {
4384 let trimmed = req.trim();
4385 if trimmed.is_empty() || trimmed.starts_with('#') {
4386 return None;
4387 }
4388
4389 let name = extract_setup_cfg_dependency_name(trimmed)?;
4390 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4391
4392 Some(Dependency {
4393 purl: Some(purl.to_string()),
4394 extracted_requirement: Some(normalize_setup_cfg_requirement(trimmed)),
4395 scope: Some(scope.to_string()),
4396 is_runtime: Some(true),
4397 is_optional: Some(is_optional),
4398 is_pinned: Some(false),
4399 is_direct: Some(true),
4400 resolved_package: None,
4401 extra_data: None,
4402 })
4403}
4404
4405fn extract_setup_cfg_dependency_name(req: &str) -> Option<String> {
4406 let trimmed = req.trim();
4407 if trimmed.is_empty() {
4408 return None;
4409 }
4410
4411 let end = trimmed
4412 .find(|c: char| c.is_whitespace() || matches!(c, '<' | '>' | '=' | '!' | '~' | ';' | '['))
4413 .unwrap_or(trimmed.len());
4414 let name = trimmed[..end].trim();
4415 if name.is_empty() {
4416 None
4417 } else {
4418 Some(name.to_string())
4419 }
4420}
4421
4422fn normalize_setup_cfg_requirement(req: &str) -> String {
4423 req.chars().filter(|c| !c.is_whitespace()).collect()
4424}
4425
4426fn extract_setup_value(content: &str, key: &str) -> Option<String> {
4427 let patterns = vec![
4428 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
4437
4438 for pattern in patterns {
4439 if let Some(start_idx) = content.find(&pattern) {
4440 let value_start = start_idx + pattern.len();
4441 let remaining = &content[value_start..];
4442
4443 if let Some(end_idx) = remaining.find(['"', '\'']) {
4444 return Some(remaining[..end_idx].to_string());
4445 }
4446 }
4447 }
4448
4449 None
4450}
4451
4452fn extract_setup_py_dependencies(content: &str) -> Vec<Dependency> {
4453 let mut dependencies = Vec::new();
4454
4455 if let Some(tests_deps) = extract_tests_require(content) {
4456 dependencies.extend(tests_deps);
4457 }
4458
4459 if let Some(extras_deps) = extract_extras_require(content) {
4460 dependencies.extend(extras_deps);
4461 }
4462
4463 dependencies
4464}
4465
4466fn extract_tests_require(content: &str) -> Option<Vec<Dependency>> {
4467 let pattern = r"tests_require\s*=\s*\[([^\]]+)\]";
4468 let re = Regex::new(pattern).ok()?;
4469 let captures = re.captures(content)?;
4470 let deps_str = captures.get(1)?.as_str();
4471
4472 let deps = parse_setup_py_dep_list(deps_str, "test", true);
4473 if deps.is_empty() { None } else { Some(deps) }
4474}
4475
4476fn extract_extras_require(content: &str) -> Option<Vec<Dependency>> {
4477 let pattern = r"extras_require\s*=\s*\{([^}]+)\}";
4478 let re = Regex::new(pattern).ok()?;
4479 let captures = re.captures(content)?;
4480 let dict_content = captures.get(1)?.as_str();
4481
4482 let mut all_deps = Vec::new();
4483
4484 let entry_pattern = r#"['"]([^'"]+)['"]\s*:\s*\[([^\]]+)\]"#;
4485 let entry_re = Regex::new(entry_pattern).ok()?;
4486
4487 for entry_cap in entry_re.captures_iter(dict_content) {
4488 if let (Some(extra_name), Some(deps_str)) = (entry_cap.get(1), entry_cap.get(2)) {
4489 let deps = parse_setup_py_dep_list(deps_str.as_str(), extra_name.as_str(), true);
4490 all_deps.extend(deps);
4491 }
4492 }
4493
4494 if all_deps.is_empty() {
4495 None
4496 } else {
4497 Some(all_deps)
4498 }
4499}
4500
4501fn parse_setup_py_dep_list(deps_str: &str, scope: &str, is_optional: bool) -> Vec<Dependency> {
4502 let dep_pattern = r#"['"]([^'"]+)['"]"#;
4503 let re = match Regex::new(dep_pattern) {
4504 Ok(r) => r,
4505 Err(_) => return Vec::new(),
4506 };
4507
4508 re.captures_iter(deps_str)
4509 .filter_map(|cap| {
4510 let dep_str = cap.get(1)?.as_str().trim();
4511 if dep_str.is_empty() {
4512 return None;
4513 }
4514
4515 let name = extract_setup_cfg_dependency_name(dep_str)?;
4516 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4517
4518 Some(Dependency {
4519 purl: Some(purl.to_string()),
4520 extracted_requirement: Some(dep_str.to_string()),
4521 scope: Some(scope.to_string()),
4522 is_runtime: Some(true),
4523 is_optional: Some(is_optional),
4524 is_pinned: Some(false),
4525 is_direct: Some(true),
4526 resolved_package: None,
4527 extra_data: None,
4528 })
4529 })
4530 .collect()
4531}
4532
4533pub(crate) fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
4535 let content = read_file_to_string(path).map_err(|e| e.to_string())?;
4536 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
4537}
4538
4539fn calculate_file_checksums(path: &Path) -> (Option<u64>, Option<String>) {
4550 let mut file = match File::open(path) {
4551 Ok(f) => f,
4552 Err(_) => return (None, None),
4553 };
4554
4555 let metadata = match file.metadata() {
4556 Ok(m) => m,
4557 Err(_) => return (None, None),
4558 };
4559 let size = metadata.len();
4560
4561 let mut hasher = Sha256::new();
4562 let mut buffer = vec![0; 8192];
4563
4564 loop {
4565 match file.read(&mut buffer) {
4566 Ok(0) => break,
4567 Ok(n) => hasher.update(&buffer[..n]),
4568 Err(_) => return (Some(size), None),
4569 }
4570 }
4571
4572 let hash = format!("{:x}", hasher.finalize());
4573 (Some(size), Some(hash))
4574}
4575
4576fn default_package_data() -> PackageData {
4577 PackageData::default()
4578}
4579
4580crate::register_parser!(
4581 "Python package manifests (pyproject.toml, setup.py, setup.cfg, pypi.json, PKG-INFO, METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
4582 &[
4583 "**/pyproject.toml",
4584 "**/setup.py",
4585 "**/setup.cfg",
4586 "**/pypi.json",
4587 "**/PKG-INFO",
4588 "**/METADATA",
4589 "**/origin.json",
4590 "**/*.tar.gz",
4591 "**/*.tgz",
4592 "**/*.tar.bz2",
4593 "**/*.tar.xz",
4594 "**/*.zip",
4595 "**/*.whl",
4596 "**/*.egg"
4597 ],
4598 "pypi",
4599 "Python",
4600 Some("https://packaging.python.org/"),
4601);