1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parser_warn as warn;
36use crate::parsers::utils::{read_file_to_string, split_name_email};
37use base64::Engine;
38use base64::engine::general_purpose::URL_SAFE_NO_PAD;
39use bzip2::read::BzDecoder;
40use csv::ReaderBuilder;
41use flate2::read::GzDecoder;
42use liblzma::read::XzDecoder;
43use packageurl::PackageUrl;
44use regex::Regex;
45use rustpython_parser::{Parse, ast};
46use serde_json::{Map as JsonMap, Value as JsonValue};
47use sha2::{Digest, Sha256};
48use std::collections::{HashMap, HashSet};
49use std::fs::File;
50use std::io::Read;
51use std::path::{Component, Path, PathBuf};
52use tar::Archive;
53use toml::Value as TomlValue;
54use toml::map::Map as TomlMap;
55use zip::ZipArchive;
56
57use super::PackageParser;
58use super::license_normalization::{
59 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
60 normalize_spdx_expression,
61};
62
63const FIELD_PROJECT: &str = "project";
65const FIELD_NAME: &str = "name";
66const FIELD_VERSION: &str = "version";
67const FIELD_LICENSE: &str = "license";
68const FIELD_AUTHORS: &str = "authors";
69const FIELD_MAINTAINERS: &str = "maintainers";
70const FIELD_URLS: &str = "urls";
71const FIELD_HOMEPAGE: &str = "homepage";
72const FIELD_REPOSITORY: &str = "repository";
73const FIELD_DEPENDENCIES: &str = "dependencies";
74const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
75const FIELD_DEPENDENCY_GROUPS: &str = "dependency-groups";
76const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
77const MAX_SETUP_PY_BYTES: usize = 1_048_576;
78const MAX_SETUP_PY_AST_NODES: usize = 10_000;
79const MAX_SETUP_PY_AST_DEPTH: usize = 50;
80const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct PythonParser;
94
95#[derive(Clone, Copy, Debug)]
96enum PythonSdistArchiveFormat {
97 TarGz,
98 Tgz,
99 TarBz2,
100 TarXz,
101 Zip,
102}
103
104#[derive(Clone, Debug)]
105struct ValidatedZipEntry {
106 index: usize,
107 name: String,
108}
109
110impl PackageParser for PythonParser {
111 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
112
113 fn extract_packages(path: &Path) -> Vec<PackageData> {
114 vec![
115 if path.file_name().unwrap_or_default() == "pyproject.toml" {
116 extract_from_pyproject_toml(path)
117 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
118 extract_from_setup_cfg(path)
119 } else if path.file_name().unwrap_or_default() == "setup.py" {
120 extract_from_setup_py(path)
121 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
122 extract_from_rfc822_metadata(path, DatasourceId::PypiSdistPkginfo)
123 } else if path.file_name().unwrap_or_default() == "METADATA" {
124 extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
125 } else if is_pip_cache_origin_json(path) {
126 extract_from_pip_origin_json(path)
127 } else if path.file_name().unwrap_or_default() == "pypi.json" {
128 extract_from_pypi_json(path)
129 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
130 extract_from_pip_inspect(path)
131 } else if is_python_sdist_archive_path(path) {
132 extract_from_sdist_archive(path)
133 } else if path
134 .extension()
135 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
136 {
137 extract_from_wheel_archive(path)
138 } else if path
139 .extension()
140 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
141 {
142 extract_from_egg_archive(path)
143 } else {
144 default_package_data(path)
145 },
146 ]
147 }
148
149 fn is_match(path: &Path) -> bool {
150 if let Some(filename) = path.file_name()
151 && (filename == "pyproject.toml"
152 || filename == "setup.cfg"
153 || filename == "setup.py"
154 || filename == "PKG-INFO"
155 || filename == "METADATA"
156 || filename == "pypi.json"
157 || filename == "pip-inspect.deplock"
158 || is_pip_cache_origin_json(path))
159 {
160 return true;
161 }
162
163 if let Some(extension) = path.extension() {
164 let ext = extension.to_string_lossy().to_lowercase();
165 if ext == "whl" || ext == "egg" || is_python_sdist_archive_path(path) {
166 return true;
167 }
168 }
169
170 false
171 }
172}
173
174#[derive(Debug, Clone)]
175struct InstalledWheelMetadata {
176 wheel_tags: Vec<String>,
177 wheel_version: Option<String>,
178 wheel_generator: Option<String>,
179 root_is_purelib: Option<bool>,
180 compressed_tag: Option<String>,
181}
182
183fn merge_sibling_wheel_metadata(path: &Path, package_data: &mut PackageData) {
184 let Some(parent) = path.parent() else {
185 return;
186 };
187
188 if !parent
189 .file_name()
190 .and_then(|name| name.to_str())
191 .is_some_and(|name| name.ends_with(".dist-info"))
192 {
193 return;
194 }
195
196 let wheel_path = parent.join("WHEEL");
197 if !wheel_path.exists() {
198 return;
199 }
200
201 let Ok(content) = read_file_to_string(&wheel_path) else {
202 warn!("Failed to read sibling WHEEL file at {:?}", wheel_path);
203 return;
204 };
205
206 let Some(wheel_metadata) = parse_installed_wheel_metadata(&content) else {
207 return;
208 };
209
210 apply_installed_wheel_metadata(package_data, &wheel_metadata);
211}
212
213fn parse_installed_wheel_metadata(content: &str) -> Option<InstalledWheelMetadata> {
214 use super::rfc822::{get_header_all, get_header_first};
215
216 let metadata = super::rfc822::parse_rfc822_content(content);
217 let wheel_tags = get_header_all(&metadata.headers, "tag");
218 if wheel_tags.is_empty() {
219 return None;
220 }
221
222 let wheel_version = get_header_first(&metadata.headers, "wheel-version");
223 let wheel_generator = get_header_first(&metadata.headers, "generator");
224 let root_is_purelib =
225 get_header_first(&metadata.headers, "root-is-purelib").and_then(|value| {
226 match value.to_ascii_lowercase().as_str() {
227 "true" => Some(true),
228 "false" => Some(false),
229 _ => None,
230 }
231 });
232
233 let compressed_tag = compress_wheel_tags(&wheel_tags);
234
235 Some(InstalledWheelMetadata {
236 wheel_tags,
237 wheel_version,
238 wheel_generator,
239 root_is_purelib,
240 compressed_tag,
241 })
242}
243
244fn compress_wheel_tags(tags: &[String]) -> Option<String> {
245 if tags.is_empty() {
246 return None;
247 }
248
249 if tags.len() == 1 {
250 return Some(tags[0].clone());
251 }
252
253 let mut python_tags = Vec::new();
254 let mut abi_tag: Option<&str> = None;
255 let mut platform_tag: Option<&str> = None;
256
257 for tag in tags {
258 let mut parts = tag.splitn(3, '-');
259 let python = parts.next()?;
260 let abi = parts.next()?;
261 let platform = parts.next()?;
262
263 if abi_tag.is_some_and(|existing| existing != abi)
264 || platform_tag.is_some_and(|existing| existing != platform)
265 {
266 return None;
267 }
268
269 abi_tag = Some(abi);
270 platform_tag = Some(platform);
271 python_tags.push(python.to_string());
272 }
273
274 Some(format!(
275 "{}-{}-{}",
276 python_tags.join("."),
277 abi_tag?,
278 platform_tag?
279 ))
280}
281
282fn apply_installed_wheel_metadata(
283 package_data: &mut PackageData,
284 wheel_metadata: &InstalledWheelMetadata,
285) {
286 let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
287 extra_data.insert(
288 "wheel_tags".to_string(),
289 JsonValue::Array(
290 wheel_metadata
291 .wheel_tags
292 .iter()
293 .cloned()
294 .map(JsonValue::String)
295 .collect(),
296 ),
297 );
298
299 if let Some(wheel_version) = &wheel_metadata.wheel_version {
300 extra_data.insert(
301 "wheel_version".to_string(),
302 JsonValue::String(wheel_version.clone()),
303 );
304 }
305
306 if let Some(wheel_generator) = &wheel_metadata.wheel_generator {
307 extra_data.insert(
308 "wheel_generator".to_string(),
309 JsonValue::String(wheel_generator.clone()),
310 );
311 }
312
313 if let Some(root_is_purelib) = wheel_metadata.root_is_purelib {
314 extra_data.insert(
315 "root_is_purelib".to_string(),
316 JsonValue::Bool(root_is_purelib),
317 );
318 }
319
320 if let (Some(name), Some(version), Some(extension)) = (
321 package_data.name.as_deref(),
322 package_data.version.as_deref(),
323 wheel_metadata.compressed_tag.as_deref(),
324 ) {
325 package_data.purl = build_pypi_purl_with_extension(name, Some(version), extension);
326 }
327}
328
329fn is_pip_cache_origin_json(path: &Path) -> bool {
330 path.file_name().and_then(|name| name.to_str()) == Some("origin.json")
331 && path.ancestors().skip(1).any(|ancestor| {
332 ancestor
333 .file_name()
334 .and_then(|name| name.to_str())
335 .is_some_and(|name| name.eq_ignore_ascii_case("wheels"))
336 })
337}
338
339fn extract_from_pip_origin_json(path: &Path) -> PackageData {
340 let content = match read_file_to_string(path) {
341 Ok(content) => content,
342 Err(e) => {
343 warn!("Failed to read pip cache origin.json at {:?}: {}", path, e);
344 return default_package_data(path);
345 }
346 };
347
348 let root: JsonValue = match serde_json::from_str(&content) {
349 Ok(root) => root,
350 Err(e) => {
351 warn!("Failed to parse pip cache origin.json at {:?}: {}", path, e);
352 return default_package_data(path);
353 }
354 };
355
356 let Some(download_url) = root.get("url").and_then(|value| value.as_str()) else {
357 warn!("No url found in pip cache origin.json at {:?}", path);
358 return default_package_data(path);
359 };
360
361 let sibling_wheel = find_sibling_cached_wheel(path);
362 let name_version = parse_name_version_from_origin_url(download_url).or_else(|| {
363 sibling_wheel
364 .as_ref()
365 .map(|wheel_info| (wheel_info.name.clone(), wheel_info.version.clone()))
366 });
367
368 let Some((name, version)) = name_version else {
369 warn!(
370 "Failed to infer package name/version from pip cache origin.json at {:?}",
371 path
372 );
373 return default_package_data(path);
374 };
375
376 let (repository_homepage_url, repository_download_url, api_data_url, plain_purl) =
377 build_pypi_urls(Some(&name), Some(&version));
378 let purl = sibling_wheel
379 .as_ref()
380 .and_then(|wheel_info| build_wheel_purl(Some(&name), Some(&version), wheel_info))
381 .or(plain_purl);
382
383 PackageData {
384 package_type: Some(PythonParser::PACKAGE_TYPE),
385 primary_language: Some("Python".to_string()),
386 name: Some(name),
387 version: Some(version),
388 datasource_id: Some(DatasourceId::PypiPipOriginJson),
389 download_url: Some(download_url.to_string()),
390 sha256: extract_sha256_from_origin_json(&root),
391 repository_homepage_url,
392 repository_download_url,
393 api_data_url,
394 purl,
395 ..Default::default()
396 }
397}
398
399fn find_sibling_cached_wheel(path: &Path) -> Option<WheelInfo> {
400 let parent = path.parent()?;
401 let entries = parent.read_dir().ok()?;
402
403 for entry in entries.flatten() {
404 let sibling_path = entry.path();
405 if sibling_path
406 .extension()
407 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
408 && let Some(wheel_info) = parse_wheel_filename(&sibling_path)
409 {
410 return Some(wheel_info);
411 }
412 }
413
414 None
415}
416
417fn parse_name_version_from_origin_url(url: &str) -> Option<(String, String)> {
418 let file_name = url.rsplit('/').next()?;
419
420 if file_name.ends_with(".whl") {
421 return parse_wheel_filename(Path::new(file_name))
422 .map(|wheel_info| (wheel_info.name, wheel_info.version));
423 }
424
425 let stem = strip_python_archive_extension(file_name)?;
426 let (name, version) = stem.rsplit_once('-')?;
427 if name.is_empty() || version.is_empty() {
428 return None;
429 }
430
431 Some((name.replace('_', "-"), version.to_string()))
432}
433
434fn strip_python_archive_extension(file_name: &str) -> Option<&str> {
435 [".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".zip", ".whl"]
436 .iter()
437 .find_map(|suffix| file_name.strip_suffix(suffix))
438}
439
440fn extract_sha256_from_origin_json(root: &JsonValue) -> Option<String> {
441 root.pointer("/archive_info/hashes/sha256")
442 .and_then(|value| value.as_str())
443 .map(ToOwned::to_owned)
444 .or_else(|| {
445 root.pointer("/archive_info/hash")
446 .and_then(|value| value.as_str())
447 .and_then(normalize_origin_hash)
448 })
449}
450
451fn normalize_origin_hash(hash: &str) -> Option<String> {
452 if let Some(value) = hash.strip_prefix("sha256=") {
453 return Some(value.to_string());
454 }
455 if let Some(value) = hash.strip_prefix("sha256:") {
456 return Some(value.to_string());
457 }
458 if hash.len() == 64 && hash.chars().all(|ch| ch.is_ascii_hexdigit()) {
459 return Some(hash.to_string());
460 }
461 None
462}
463
464fn extract_from_rfc822_metadata(path: &Path, datasource_id: DatasourceId) -> PackageData {
465 let content = match read_file_to_string(path) {
466 Ok(content) => content,
467 Err(e) => {
468 warn!("Failed to read metadata at {:?}: {}", path, e);
469 return default_package_data(path);
470 }
471 };
472
473 let metadata = super::rfc822::parse_rfc822_content(&content);
474 let mut package_data = build_package_data_from_rfc822(&metadata, datasource_id);
475 merge_sibling_metadata_dependencies(path, &mut package_data);
476 merge_sibling_metadata_file_references(path, &mut package_data);
477 if datasource_id == DatasourceId::PypiWheelMetadata {
478 merge_sibling_wheel_metadata(path, &mut package_data);
479 }
480 package_data
481}
482
483fn merge_sibling_metadata_dependencies(path: &Path, package_data: &mut PackageData) {
484 let mut extra_dependencies = Vec::new();
485
486 if let Some(parent) = path.parent() {
487 let direct_requires = parent.join("requires.txt");
488 if direct_requires.exists()
489 && let Ok(content) = read_file_to_string(&direct_requires)
490 {
491 extra_dependencies.extend(parse_requires_txt(&content));
492 }
493
494 let sibling_egg_info_requires = parent
495 .read_dir()
496 .ok()
497 .into_iter()
498 .flatten()
499 .flatten()
500 .find_map(|entry| {
501 let child_path = entry.path();
502 if child_path.is_dir()
503 && child_path
504 .file_name()
505 .and_then(|name| name.to_str())
506 .is_some_and(|name| name.ends_with(".egg-info"))
507 {
508 let requires = child_path.join("requires.txt");
509 requires.exists().then_some(requires)
510 } else {
511 None
512 }
513 });
514
515 if let Some(requires_path) = sibling_egg_info_requires
516 && let Ok(content) = read_file_to_string(&requires_path)
517 {
518 extra_dependencies.extend(parse_requires_txt(&content));
519 }
520 }
521
522 for dependency in extra_dependencies {
523 if !package_data.dependencies.iter().any(|existing| {
524 existing.purl == dependency.purl
525 && existing.scope == dependency.scope
526 && existing.extracted_requirement == dependency.extracted_requirement
527 && existing.extra_data == dependency.extra_data
528 }) {
529 package_data.dependencies.push(dependency);
530 }
531 }
532}
533
534fn merge_sibling_metadata_file_references(path: &Path, package_data: &mut PackageData) {
535 let mut extra_refs = Vec::new();
536
537 if let Some(parent) = path.parent() {
538 let record_path = parent.join("RECORD");
539 if record_path.exists()
540 && let Ok(content) = read_file_to_string(&record_path)
541 {
542 extra_refs.extend(parse_record_csv(&content));
543 }
544
545 let installed_files_path = parent.join("installed-files.txt");
546 if installed_files_path.exists()
547 && let Ok(content) = read_file_to_string(&installed_files_path)
548 {
549 extra_refs.extend(parse_installed_files_txt(&content));
550 }
551
552 let sources_path = parent.join("SOURCES.txt");
553 if sources_path.exists()
554 && let Ok(content) = read_file_to_string(&sources_path)
555 {
556 extra_refs.extend(parse_sources_txt(&content));
557 }
558 }
559
560 for file_ref in extra_refs {
561 if !package_data
562 .file_references
563 .iter()
564 .any(|existing| existing.path == file_ref.path)
565 {
566 package_data.file_references.push(file_ref);
567 }
568 }
569}
570
571fn collect_validated_zip_entries<R: Read + std::io::Seek>(
572 archive: &mut ZipArchive<R>,
573 path: &Path,
574 archive_type: &str,
575) -> Result<Vec<ValidatedZipEntry>, String> {
576 let mut total_extracted = 0u64;
577 let mut entries = Vec::new();
578
579 for i in 0..archive.len() {
580 if let Ok(file) = archive.by_index_raw(i) {
581 let compressed_size = file.compressed_size();
582 let uncompressed_size = file.size();
583 let Some(entry_name) = normalize_archive_entry_path(file.name()) else {
584 warn!(
585 "Skipping unsafe path in {} {:?}: {}",
586 archive_type,
587 path,
588 file.name()
589 );
590 continue;
591 };
592
593 if compressed_size > 0 {
594 let ratio = uncompressed_size as f64 / compressed_size as f64;
595 if ratio > MAX_COMPRESSION_RATIO {
596 warn!(
597 "Suspicious compression ratio in {} {:?}: {:.2}:1",
598 archive_type, path, ratio
599 );
600 continue;
601 }
602 }
603
604 if uncompressed_size > MAX_FILE_SIZE {
605 warn!(
606 "File too large in {} {:?}: {} bytes (limit: {} bytes)",
607 archive_type, path, uncompressed_size, MAX_FILE_SIZE
608 );
609 continue;
610 }
611
612 total_extracted += uncompressed_size;
613 if total_extracted > MAX_ARCHIVE_SIZE {
614 let msg = format!(
615 "Total extracted size exceeds limit for {} {:?}",
616 archive_type, path
617 );
618 warn!("{}", msg);
619 return Err(msg);
620 }
621
622 entries.push(ValidatedZipEntry {
623 index: i,
624 name: entry_name,
625 });
626 }
627 }
628
629 Ok(entries)
630}
631
632fn is_python_sdist_archive_path(path: &Path) -> bool {
633 detect_python_sdist_archive_format(path).is_some()
634}
635
636fn detect_python_sdist_archive_format(path: &Path) -> Option<PythonSdistArchiveFormat> {
637 let file_name = path.file_name()?.to_str()?.to_ascii_lowercase();
638
639 if !is_likely_python_sdist_filename(&file_name) {
640 return None;
641 }
642
643 if file_name.ends_with(".tar.gz") {
644 Some(PythonSdistArchiveFormat::TarGz)
645 } else if file_name.ends_with(".tgz") {
646 Some(PythonSdistArchiveFormat::Tgz)
647 } else if file_name.ends_with(".tar.bz2") {
648 Some(PythonSdistArchiveFormat::TarBz2)
649 } else if file_name.ends_with(".tar.xz") {
650 Some(PythonSdistArchiveFormat::TarXz)
651 } else if file_name.ends_with(".zip") {
652 Some(PythonSdistArchiveFormat::Zip)
653 } else {
654 None
655 }
656}
657
658fn is_likely_python_sdist_filename(file_name: &str) -> bool {
659 let Some(stem) = strip_python_archive_extension(file_name) else {
660 return false;
661 };
662
663 let Some((name, version)) = stem.rsplit_once('-') else {
664 return false;
665 };
666
667 !name.is_empty()
668 && !version.is_empty()
669 && version.chars().any(|ch| ch.is_ascii_digit())
670 && name
671 .chars()
672 .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
673}
674
675fn extract_from_sdist_archive(path: &Path) -> PackageData {
676 let metadata = match std::fs::metadata(path) {
677 Ok(m) => m,
678 Err(e) => {
679 warn!(
680 "Failed to read metadata for sdist archive {:?}: {}",
681 path, e
682 );
683 return default_package_data(path);
684 }
685 };
686
687 if metadata.len() > MAX_ARCHIVE_SIZE {
688 warn!(
689 "sdist archive too large: {} bytes (limit: {} bytes)",
690 metadata.len(),
691 MAX_ARCHIVE_SIZE
692 );
693 return default_package_data(path);
694 }
695
696 let Some(format) = detect_python_sdist_archive_format(path) else {
697 return default_package_data(path);
698 };
699
700 let mut package_data = match format {
701 PythonSdistArchiveFormat::TarGz | PythonSdistArchiveFormat::Tgz => {
702 let file = match File::open(path) {
703 Ok(file) => file,
704 Err(e) => {
705 warn!("Failed to open sdist archive {:?}: {}", path, e);
706 return default_package_data(path);
707 }
708 };
709 let decoder = GzDecoder::new(file);
710 extract_from_tar_sdist_archive(path, decoder, "tar.gz", metadata.len())
711 }
712 PythonSdistArchiveFormat::TarBz2 => {
713 let file = match File::open(path) {
714 Ok(file) => file,
715 Err(e) => {
716 warn!("Failed to open sdist archive {:?}: {}", path, e);
717 return default_package_data(path);
718 }
719 };
720 let decoder = BzDecoder::new(file);
721 extract_from_tar_sdist_archive(path, decoder, "tar.bz2", metadata.len())
722 }
723 PythonSdistArchiveFormat::TarXz => {
724 let file = match File::open(path) {
725 Ok(file) => file,
726 Err(e) => {
727 warn!("Failed to open sdist archive {:?}: {}", path, e);
728 return default_package_data(path);
729 }
730 };
731 let decoder = XzDecoder::new(file);
732 extract_from_tar_sdist_archive(path, decoder, "tar.xz", metadata.len())
733 }
734 PythonSdistArchiveFormat::Zip => extract_from_zip_sdist_archive(path),
735 };
736
737 if package_data.package_type.is_some() {
738 let (size, sha256) = calculate_file_checksums(path);
739 package_data.size = size;
740 package_data.sha256 = sha256;
741 }
742
743 package_data
744}
745
746fn extract_from_tar_sdist_archive<R: Read>(
747 path: &Path,
748 reader: R,
749 archive_type: &str,
750 compressed_size: u64,
751) -> PackageData {
752 let mut archive = Archive::new(reader);
753 let archive_entries = match archive.entries() {
754 Ok(entries) => entries,
755 Err(e) => {
756 warn!(
757 "Failed to read {} sdist archive {:?}: {}",
758 archive_type, path, e
759 );
760 return default_package_data(path);
761 }
762 };
763
764 let mut total_extracted = 0u64;
765 let mut entries = Vec::new();
766
767 for entry_result in archive_entries {
768 let mut entry = match entry_result {
769 Ok(entry) => entry,
770 Err(e) => {
771 warn!(
772 "Failed to read {} sdist entry from {:?}: {}",
773 archive_type, path, e
774 );
775 continue;
776 }
777 };
778
779 let entry_size = entry.size();
780 if entry_size > MAX_FILE_SIZE {
781 warn!(
782 "File too large in {} sdist {:?}: {} bytes (limit: {} bytes)",
783 archive_type, path, entry_size, MAX_FILE_SIZE
784 );
785 continue;
786 }
787
788 total_extracted += entry_size;
789 if total_extracted > MAX_ARCHIVE_SIZE {
790 warn!(
791 "Total extracted size exceeds limit for {} sdist {:?}",
792 archive_type, path
793 );
794 return default_package_data(path);
795 }
796
797 if compressed_size > 0 {
798 let ratio = total_extracted as f64 / compressed_size as f64;
799 if ratio > MAX_COMPRESSION_RATIO {
800 warn!(
801 "Suspicious compression ratio in {} sdist {:?}: {:.2}:1",
802 archive_type, path, ratio
803 );
804 return default_package_data(path);
805 }
806 }
807
808 let entry_path = match entry.path() {
809 Ok(path) => path.to_string_lossy().replace('\\', "/"),
810 Err(e) => {
811 warn!(
812 "Failed to get {} sdist entry path from {:?}: {}",
813 archive_type, path, e
814 );
815 continue;
816 }
817 };
818
819 let Some(entry_path) = normalize_archive_entry_path(&entry_path) else {
820 warn!("Skipping unsafe {} sdist path in {:?}", archive_type, path);
821 continue;
822 };
823
824 if !is_relevant_sdist_text_entry(&entry_path) {
825 continue;
826 }
827
828 if let Ok(content) = read_limited_utf8(
829 &mut entry,
830 MAX_FILE_SIZE,
831 &format!("{} entry {}", archive_type, entry_path),
832 ) {
833 entries.push((entry_path, content));
834 }
835 }
836
837 build_sdist_package_data(path, entries)
838}
839
840fn extract_from_zip_sdist_archive(path: &Path) -> PackageData {
841 let file = match File::open(path) {
842 Ok(file) => file,
843 Err(e) => {
844 warn!("Failed to open zip sdist archive {:?}: {}", path, e);
845 return default_package_data(path);
846 }
847 };
848
849 let mut archive = match ZipArchive::new(file) {
850 Ok(archive) => archive,
851 Err(e) => {
852 warn!("Failed to read zip sdist archive {:?}: {}", path, e);
853 return default_package_data(path);
854 }
855 };
856
857 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "sdist zip") {
858 Ok(entries) => entries,
859 Err(_) => return default_package_data(path),
860 };
861
862 let mut entries = Vec::new();
863 for entry in validated_entries.iter() {
864 if !is_relevant_sdist_text_entry(&entry.name) {
865 continue;
866 }
867
868 if let Ok(content) = read_validated_zip_entry(&mut archive, entry, path, "sdist zip") {
869 entries.push((entry.name.clone(), content));
870 }
871 }
872
873 build_sdist_package_data(path, entries)
874}
875
876fn is_relevant_sdist_text_entry(entry_path: &str) -> bool {
877 entry_path.ends_with("/PKG-INFO")
878 || entry_path.ends_with("/requires.txt")
879 || entry_path.ends_with("/SOURCES.txt")
880}
881
882fn build_sdist_package_data(path: &Path, entries: Vec<(String, String)>) -> PackageData {
883 let Some((metadata_path, metadata_content)) = select_sdist_pkginfo_entry(path, &entries) else {
884 warn!("No PKG-INFO file found in sdist archive {:?}", path);
885 return default_package_data(path);
886 };
887
888 let mut package_data =
889 python_parse_rfc822_content(&metadata_content, DatasourceId::PypiSdistPkginfo);
890 merge_sdist_archive_dependencies(&entries, &metadata_path, &mut package_data);
891 merge_sdist_archive_file_references(&entries, &metadata_path, &mut package_data);
892 apply_sdist_name_version_fallback(path, &mut package_data);
893 package_data
894}
895
896fn select_sdist_pkginfo_entry(
897 archive_path: &Path,
898 entries: &[(String, String)],
899) -> Option<(String, String)> {
900 let expected_name = archive_path
901 .file_name()
902 .and_then(|name| name.to_str())
903 .and_then(strip_python_archive_extension)
904 .and_then(|stem| {
905 stem.rsplit_once('-')
906 .map(|(name, _)| normalize_python_package_name(name))
907 });
908
909 entries
910 .iter()
911 .filter(|(entry_path, _)| entry_path.ends_with("/PKG-INFO"))
912 .min_by_key(|(entry_path, content)| {
913 let components: Vec<_> = entry_path
914 .split('/')
915 .filter(|part| !part.is_empty())
916 .collect();
917 let metadata = super::rfc822::parse_rfc822_content(content);
918 let candidate_name = super::rfc822::get_header_first(&metadata.headers, "name")
919 .map(|name| normalize_python_package_name(&name));
920 let name_rank = if candidate_name == expected_name {
921 0
922 } else {
923 1
924 };
925 let kind_rank = if components.len() == 3
926 && components[1].ends_with(".egg-info")
927 && components[2] == "PKG-INFO"
928 {
929 0
930 } else if components.len() == 2 && components[1] == "PKG-INFO" {
931 1
932 } else if entry_path.ends_with(".egg-info/PKG-INFO") {
933 2
934 } else {
935 3
936 };
937
938 (name_rank, kind_rank, components.len(), entry_path.clone())
939 })
940 .map(|(entry_path, content)| (entry_path.clone(), content.clone()))
941}
942
943fn merge_sdist_archive_dependencies(
944 entries: &[(String, String)],
945 metadata_path: &str,
946 package_data: &mut PackageData,
947) {
948 let metadata_dir = metadata_path
949 .rsplit_once('/')
950 .map(|(dir, _)| dir)
951 .unwrap_or("");
952 let archive_root = metadata_path.split('/').next().unwrap_or("");
953 let matched_egg_info_dir =
954 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
955 let mut extra_dependencies = Vec::new();
956
957 for (entry_path, content) in entries {
958 let is_direct_requires =
959 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/requires.txt");
960 let is_egg_info_requires = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
961 entry_path == &format!("{archive_root}/{egg_info_dir}/requires.txt")
962 });
963
964 if is_direct_requires || is_egg_info_requires {
965 extra_dependencies.extend(parse_requires_txt(content));
966 }
967 }
968
969 for dependency in extra_dependencies {
970 if !package_data.dependencies.iter().any(|existing| {
971 existing.purl == dependency.purl
972 && existing.scope == dependency.scope
973 && existing.extracted_requirement == dependency.extracted_requirement
974 && existing.extra_data == dependency.extra_data
975 }) {
976 package_data.dependencies.push(dependency);
977 }
978 }
979}
980
981fn merge_sdist_archive_file_references(
982 entries: &[(String, String)],
983 metadata_path: &str,
984 package_data: &mut PackageData,
985) {
986 let metadata_dir = metadata_path
987 .rsplit_once('/')
988 .map(|(dir, _)| dir)
989 .unwrap_or("");
990 let archive_root = metadata_path.split('/').next().unwrap_or("");
991 let matched_egg_info_dir =
992 select_matching_sdist_egg_info_dir(entries, archive_root, package_data.name.as_deref());
993 let mut extra_refs = Vec::new();
994
995 for (entry_path, content) in entries {
996 let is_direct_sources =
997 !metadata_dir.is_empty() && entry_path == &format!("{metadata_dir}/SOURCES.txt");
998 let is_egg_info_sources = matched_egg_info_dir.as_ref().is_some_and(|egg_info_dir| {
999 entry_path == &format!("{archive_root}/{egg_info_dir}/SOURCES.txt")
1000 });
1001
1002 if is_direct_sources || is_egg_info_sources {
1003 extra_refs.extend(parse_sources_txt(content));
1004 }
1005 }
1006
1007 for file_ref in extra_refs {
1008 if !package_data
1009 .file_references
1010 .iter()
1011 .any(|existing| existing.path == file_ref.path)
1012 {
1013 package_data.file_references.push(file_ref);
1014 }
1015 }
1016}
1017
1018fn select_matching_sdist_egg_info_dir(
1019 entries: &[(String, String)],
1020 archive_root: &str,
1021 package_name: Option<&str>,
1022) -> Option<String> {
1023 let normalized_package_name = package_name.map(normalize_python_package_name);
1024
1025 entries
1026 .iter()
1027 .filter_map(|(entry_path, _)| {
1028 let components: Vec<_> = entry_path
1029 .split('/')
1030 .filter(|part| !part.is_empty())
1031 .collect();
1032 if components.len() == 3
1033 && components[0] == archive_root
1034 && components[1].ends_with(".egg-info")
1035 {
1036 Some(components[1].to_string())
1037 } else {
1038 None
1039 }
1040 })
1041 .min_by_key(|egg_info_dir| {
1042 let normalized_dir_name =
1043 normalize_python_package_name(egg_info_dir.trim_end_matches(".egg-info"));
1044 let name_rank = if Some(normalized_dir_name.clone()) == normalized_package_name {
1045 0
1046 } else {
1047 1
1048 };
1049
1050 (name_rank, egg_info_dir.clone())
1051 })
1052}
1053
1054fn normalize_python_package_name(name: &str) -> String {
1055 name.to_ascii_lowercase().replace('_', "-")
1056}
1057
1058fn apply_sdist_name_version_fallback(path: &Path, package_data: &mut PackageData) {
1059 let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
1060 return;
1061 };
1062
1063 let Some(stem) = strip_python_archive_extension(file_name) else {
1064 return;
1065 };
1066
1067 let Some((name, version)) = stem.rsplit_once('-') else {
1068 return;
1069 };
1070
1071 if package_data.name.is_none() {
1072 package_data.name = Some(name.replace('_', "-"));
1073 }
1074 if package_data.version.is_none() {
1075 package_data.version = Some(version.to_string());
1076 }
1077
1078 if package_data.purl.is_none()
1079 || package_data.repository_homepage_url.is_none()
1080 || package_data.repository_download_url.is_none()
1081 || package_data.api_data_url.is_none()
1082 {
1083 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1084 build_pypi_urls(
1085 package_data.name.as_deref(),
1086 package_data.version.as_deref(),
1087 );
1088
1089 if package_data.repository_homepage_url.is_none() {
1090 package_data.repository_homepage_url = repository_homepage_url;
1091 }
1092 if package_data.repository_download_url.is_none() {
1093 package_data.repository_download_url = repository_download_url;
1094 }
1095 if package_data.api_data_url.is_none() {
1096 package_data.api_data_url = api_data_url;
1097 }
1098 if package_data.purl.is_none() {
1099 package_data.purl = purl;
1100 }
1101 }
1102}
1103
1104fn extract_from_wheel_archive(path: &Path) -> PackageData {
1105 let metadata = match std::fs::metadata(path) {
1106 Ok(m) => m,
1107 Err(e) => {
1108 warn!(
1109 "Failed to read metadata for wheel archive {:?}: {}",
1110 path, e
1111 );
1112 return default_package_data(path);
1113 }
1114 };
1115
1116 if metadata.len() > MAX_ARCHIVE_SIZE {
1117 warn!(
1118 "Wheel archive too large: {} bytes (limit: {} bytes)",
1119 metadata.len(),
1120 MAX_ARCHIVE_SIZE
1121 );
1122 return default_package_data(path);
1123 }
1124
1125 let file = match File::open(path) {
1126 Ok(f) => f,
1127 Err(e) => {
1128 warn!("Failed to open wheel archive {:?}: {}", path, e);
1129 return default_package_data(path);
1130 }
1131 };
1132
1133 let mut archive = match ZipArchive::new(file) {
1134 Ok(a) => a,
1135 Err(e) => {
1136 warn!("Failed to read wheel archive {:?}: {}", path, e);
1137 return default_package_data(path);
1138 }
1139 };
1140
1141 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "wheel") {
1142 Ok(entries) => entries,
1143 Err(_) => return default_package_data(path),
1144 };
1145
1146 let metadata_entry =
1147 match find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/METADATA") {
1148 Some(entry) => entry,
1149 None => {
1150 warn!("No METADATA file found in wheel archive {:?}", path);
1151 return default_package_data(path);
1152 }
1153 };
1154
1155 let content = match read_validated_zip_entry(&mut archive, metadata_entry, path, "wheel") {
1156 Ok(c) => c,
1157 Err(e) => {
1158 warn!("Failed to read METADATA from {:?}: {}", path, e);
1159 return default_package_data(path);
1160 }
1161 };
1162
1163 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiWheel);
1164
1165 let (size, sha256) = calculate_file_checksums(path);
1166 package_data.size = size;
1167 package_data.sha256 = sha256;
1168
1169 if let Some(record_entry) =
1170 find_validated_zip_entry_by_suffix(&validated_entries, ".dist-info/RECORD")
1171 && let Ok(record_content) =
1172 read_validated_zip_entry(&mut archive, record_entry, path, "wheel")
1173 {
1174 package_data.file_references = parse_record_csv(&record_content);
1175 }
1176
1177 if let Some(wheel_info) = parse_wheel_filename(path) {
1178 if package_data.name.is_none() {
1179 package_data.name = Some(wheel_info.name.clone());
1180 }
1181 if package_data.version.is_none() {
1182 package_data.version = Some(wheel_info.version.clone());
1183 }
1184
1185 package_data.qualifiers = Some(std::collections::HashMap::from([(
1186 "extension".to_string(),
1187 format!(
1188 "{}-{}-{}",
1189 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1190 ),
1191 )]));
1192
1193 package_data.purl = build_wheel_purl(
1194 package_data.name.as_deref(),
1195 package_data.version.as_deref(),
1196 &wheel_info,
1197 );
1198
1199 let mut extra_data = package_data.extra_data.unwrap_or_default();
1200 extra_data.insert(
1201 "python_requires".to_string(),
1202 serde_json::Value::String(wheel_info.python_tag.clone()),
1203 );
1204 extra_data.insert(
1205 "abi_tag".to_string(),
1206 serde_json::Value::String(wheel_info.abi_tag.clone()),
1207 );
1208 extra_data.insert(
1209 "platform_tag".to_string(),
1210 serde_json::Value::String(wheel_info.platform_tag.clone()),
1211 );
1212 package_data.extra_data = Some(extra_data);
1213 }
1214
1215 package_data
1216}
1217
1218fn extract_from_egg_archive(path: &Path) -> PackageData {
1219 let metadata = match std::fs::metadata(path) {
1220 Ok(m) => m,
1221 Err(e) => {
1222 warn!("Failed to read metadata for egg archive {:?}: {}", path, e);
1223 return default_package_data(path);
1224 }
1225 };
1226
1227 if metadata.len() > MAX_ARCHIVE_SIZE {
1228 warn!(
1229 "Egg archive too large: {} bytes (limit: {} bytes)",
1230 metadata.len(),
1231 MAX_ARCHIVE_SIZE
1232 );
1233 return default_package_data(path);
1234 }
1235
1236 let file = match File::open(path) {
1237 Ok(f) => f,
1238 Err(e) => {
1239 warn!("Failed to open egg archive {:?}: {}", path, e);
1240 return default_package_data(path);
1241 }
1242 };
1243
1244 let mut archive = match ZipArchive::new(file) {
1245 Ok(a) => a,
1246 Err(e) => {
1247 warn!("Failed to read egg archive {:?}: {}", path, e);
1248 return default_package_data(path);
1249 }
1250 };
1251
1252 let validated_entries = match collect_validated_zip_entries(&mut archive, path, "egg") {
1253 Ok(entries) => entries,
1254 Err(_) => return default_package_data(path),
1255 };
1256
1257 let pkginfo_entry = match find_validated_zip_entry_by_any_suffix(
1258 &validated_entries,
1259 &["EGG-INFO/PKG-INFO", ".egg-info/PKG-INFO"],
1260 ) {
1261 Some(entry) => entry,
1262 None => {
1263 warn!("No PKG-INFO file found in egg archive {:?}", path);
1264 return default_package_data(path);
1265 }
1266 };
1267
1268 let content = match read_validated_zip_entry(&mut archive, pkginfo_entry, path, "egg") {
1269 Ok(c) => c,
1270 Err(e) => {
1271 warn!("Failed to read PKG-INFO from {:?}: {}", path, e);
1272 return default_package_data(path);
1273 }
1274 };
1275
1276 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiEgg);
1277
1278 let (size, sha256) = calculate_file_checksums(path);
1279 package_data.size = size;
1280 package_data.sha256 = sha256;
1281
1282 if let Some(installed_files_entry) = find_validated_zip_entry_by_any_suffix(
1283 &validated_entries,
1284 &[
1285 "EGG-INFO/installed-files.txt",
1286 ".egg-info/installed-files.txt",
1287 ],
1288 ) && let Ok(installed_files_content) =
1289 read_validated_zip_entry(&mut archive, installed_files_entry, path, "egg")
1290 {
1291 package_data.file_references = parse_installed_files_txt(&installed_files_content);
1292 }
1293
1294 if let Some(egg_info) = parse_egg_filename(path) {
1295 if package_data.name.is_none() {
1296 package_data.name = Some(egg_info.name.clone());
1297 }
1298 if package_data.version.is_none() {
1299 package_data.version = Some(egg_info.version.clone());
1300 }
1301
1302 if let Some(python_version) = &egg_info.python_version {
1303 let mut extra_data = package_data.extra_data.unwrap_or_default();
1304 extra_data.insert(
1305 "python_version".to_string(),
1306 serde_json::Value::String(python_version.clone()),
1307 );
1308 package_data.extra_data = Some(extra_data);
1309 }
1310 }
1311
1312 package_data.purl = build_egg_purl(
1313 package_data.name.as_deref(),
1314 package_data.version.as_deref(),
1315 );
1316
1317 package_data
1318}
1319
1320fn find_validated_zip_entry_by_suffix<'a>(
1321 entries: &'a [ValidatedZipEntry],
1322 suffix: &str,
1323) -> Option<&'a ValidatedZipEntry> {
1324 entries.iter().find(|entry| entry.name.ends_with(suffix))
1325}
1326
1327fn find_validated_zip_entry_by_any_suffix<'a>(
1328 entries: &'a [ValidatedZipEntry],
1329 suffixes: &[&str],
1330) -> Option<&'a ValidatedZipEntry> {
1331 entries
1332 .iter()
1333 .find(|entry| suffixes.iter().any(|suffix| entry.name.ends_with(suffix)))
1334}
1335
1336fn read_validated_zip_entry<R: Read + std::io::Seek>(
1337 archive: &mut ZipArchive<R>,
1338 entry: &ValidatedZipEntry,
1339 path: &Path,
1340 archive_type: &str,
1341) -> Result<String, String> {
1342 let mut file = archive
1343 .by_index(entry.index)
1344 .map_err(|e| format!("Failed to find entry {}: {}", entry.name, e))?;
1345
1346 let compressed_size = file.compressed_size();
1347 let uncompressed_size = file.size();
1348
1349 if compressed_size > 0 {
1350 let ratio = uncompressed_size as f64 / compressed_size as f64;
1351 if ratio > MAX_COMPRESSION_RATIO {
1352 return Err(format!(
1353 "Rejected suspicious compression ratio in {} {:?}: {:.2}:1",
1354 archive_type, path, ratio
1355 ));
1356 }
1357 }
1358
1359 if uncompressed_size > MAX_FILE_SIZE {
1360 return Err(format!(
1361 "Rejected oversized entry in {} {:?}: {} bytes",
1362 archive_type, path, uncompressed_size
1363 ));
1364 }
1365
1366 read_limited_utf8(
1367 &mut file,
1368 MAX_FILE_SIZE,
1369 &format!("{} entry {}", archive_type, entry.name),
1370 )
1371}
1372
1373fn read_limited_utf8<R: Read>(
1374 reader: &mut R,
1375 max_bytes: u64,
1376 context: &str,
1377) -> Result<String, String> {
1378 let mut limited = reader.take(max_bytes + 1);
1379 let mut bytes = Vec::new();
1380 limited
1381 .read_to_end(&mut bytes)
1382 .map_err(|e| format!("Failed to read {}: {}", context, e))?;
1383
1384 if bytes.len() as u64 > max_bytes {
1385 return Err(format!(
1386 "{} exceeded {} byte limit while reading",
1387 context, max_bytes
1388 ));
1389 }
1390
1391 String::from_utf8(bytes).map_err(|e| format!("{} is not valid UTF-8: {}", context, e))
1392}
1393
1394fn normalize_archive_entry_path(entry_path: &str) -> Option<String> {
1395 let normalized = entry_path.replace('\\', "/");
1396 if normalized.len() >= 3 {
1397 let bytes = normalized.as_bytes();
1398 if bytes[1] == b':' && bytes[2] == b'/' && bytes[0].is_ascii_alphabetic() {
1399 return None;
1400 }
1401 }
1402 let path = Path::new(&normalized);
1403 let mut components = Vec::new();
1404
1405 for component in path.components() {
1406 match component {
1407 Component::Normal(segment) => components.push(segment.to_string_lossy().to_string()),
1408 Component::CurDir => {}
1409 Component::RootDir | Component::ParentDir | Component::Prefix(_) => return None,
1410 }
1411 }
1412
1413 (!components.is_empty()).then_some(components.join("/"))
1414}
1415
1416pub fn parse_record_csv(content: &str) -> Vec<FileReference> {
1421 let mut reader = ReaderBuilder::new()
1422 .has_headers(false)
1423 .from_reader(content.as_bytes());
1424
1425 let mut file_references = Vec::new();
1426
1427 for result in reader.records() {
1428 match result {
1429 Ok(record) => {
1430 if record.len() < 3 {
1431 continue;
1432 }
1433
1434 let path = record.get(0).unwrap_or("").trim().to_string();
1435 if path.is_empty() {
1436 continue;
1437 }
1438
1439 let hash_field = record.get(1).unwrap_or("").trim();
1440 let size_field = record.get(2).unwrap_or("").trim();
1441
1442 let sha256 = if !hash_field.is_empty() && hash_field.contains('=') {
1444 let parts: Vec<&str> = hash_field.split('=').collect();
1445 if parts.len() == 2 && parts[0] == "sha256" {
1446 match URL_SAFE_NO_PAD.decode(parts[1]) {
1448 Ok(decoded) => {
1449 let hex = decoded
1450 .iter()
1451 .map(|b| format!("{:02x}", b))
1452 .collect::<String>();
1453 Some(hex)
1454 }
1455 Err(_) => None,
1456 }
1457 } else {
1458 None
1459 }
1460 } else {
1461 None
1462 };
1463
1464 let size = if !size_field.is_empty() && size_field != "-" {
1466 size_field.parse::<u64>().ok()
1467 } else {
1468 None
1469 };
1470
1471 file_references.push(FileReference {
1472 path,
1473 size,
1474 sha1: None,
1475 md5: None,
1476 sha256,
1477 sha512: None,
1478 extra_data: None,
1479 });
1480 }
1481 Err(e) => {
1482 warn!("Failed to parse RECORD CSV row: {}", e);
1483 continue;
1484 }
1485 }
1486 }
1487
1488 file_references
1489}
1490
1491pub fn parse_installed_files_txt(content: &str) -> Vec<FileReference> {
1494 content
1495 .lines()
1496 .map(|line| line.trim())
1497 .filter(|line| !line.is_empty())
1498 .map(|path| FileReference {
1499 path: path.to_string(),
1500 size: None,
1501 sha1: None,
1502 md5: None,
1503 sha256: None,
1504 sha512: None,
1505 extra_data: None,
1506 })
1507 .collect()
1508}
1509
1510pub fn parse_sources_txt(content: &str) -> Vec<FileReference> {
1511 content
1512 .lines()
1513 .map(str::trim)
1514 .filter(|line| !line.is_empty())
1515 .map(|path| FileReference {
1516 path: path.to_string(),
1517 size: None,
1518 sha1: None,
1519 md5: None,
1520 sha256: None,
1521 sha512: None,
1522 extra_data: None,
1523 })
1524 .collect()
1525}
1526
1527struct WheelInfo {
1528 name: String,
1529 version: String,
1530 python_tag: String,
1531 abi_tag: String,
1532 platform_tag: String,
1533}
1534
1535fn parse_wheel_filename(path: &Path) -> Option<WheelInfo> {
1536 let stem = path.file_stem()?.to_string_lossy();
1537 let parts: Vec<&str> = stem.split('-').collect();
1538
1539 if parts.len() >= 5 {
1540 Some(WheelInfo {
1541 name: parts[0].replace('_', "-"),
1542 version: parts[1].to_string(),
1543 python_tag: parts[2].to_string(),
1544 abi_tag: parts[3].to_string(),
1545 platform_tag: parts[4..].join("-"),
1546 })
1547 } else {
1548 None
1549 }
1550}
1551
1552struct EggInfo {
1553 name: String,
1554 version: String,
1555 python_version: Option<String>,
1556}
1557
1558fn parse_egg_filename(path: &Path) -> Option<EggInfo> {
1559 let stem = path.file_stem()?.to_string_lossy();
1560 let parts: Vec<&str> = stem.split('-').collect();
1561
1562 if parts.len() >= 2 {
1563 Some(EggInfo {
1564 name: parts[0].replace('_', "-"),
1565 version: parts[1].to_string(),
1566 python_version: parts.get(2).map(|s| s.to_string()),
1567 })
1568 } else {
1569 None
1570 }
1571}
1572
1573fn build_wheel_purl(
1574 name: Option<&str>,
1575 version: Option<&str>,
1576 wheel_info: &WheelInfo,
1577) -> Option<String> {
1578 let name = name?;
1579 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1580
1581 if let Some(ver) = version {
1582 package_url.with_version(ver).ok()?;
1583 }
1584
1585 let extension = format!(
1586 "{}-{}-{}",
1587 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1588 );
1589 package_url.add_qualifier("extension", extension).ok()?;
1590
1591 Some(package_url.to_string())
1592}
1593
1594fn build_egg_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1595 let name = name?;
1596 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1597
1598 if let Some(ver) = version {
1599 package_url.with_version(ver).ok()?;
1600 }
1601
1602 package_url.add_qualifier("type", "egg").ok()?;
1603
1604 Some(package_url.to_string())
1605}
1606
1607fn python_parse_rfc822_content(content: &str, datasource_id: DatasourceId) -> PackageData {
1608 let metadata = super::rfc822::parse_rfc822_content(content);
1609 build_package_data_from_rfc822(&metadata, datasource_id)
1610}
1611
1612fn build_package_data_from_rfc822(
1617 metadata: &super::rfc822::Rfc822Metadata,
1618 datasource_id: DatasourceId,
1619) -> PackageData {
1620 use super::rfc822::{get_header_all, get_header_first};
1621
1622 let name = get_header_first(&metadata.headers, "name");
1623 let version = get_header_first(&metadata.headers, "version");
1624 let summary = get_header_first(&metadata.headers, "summary");
1625 let mut homepage_url = get_header_first(&metadata.headers, "home-page");
1626 let author = get_header_first(&metadata.headers, "author");
1627 let author_email = get_header_first(&metadata.headers, "author-email");
1628 let license = get_header_first(&metadata.headers, "license");
1629 let license_expression = get_header_first(&metadata.headers, "license-expression");
1630 let download_url = get_header_first(&metadata.headers, "download-url");
1631 let platform = get_header_first(&metadata.headers, "platform");
1632 let requires_python = get_header_first(&metadata.headers, "requires-python");
1633 let classifiers = get_header_all(&metadata.headers, "classifier");
1634 let license_files = get_header_all(&metadata.headers, "license-file");
1635
1636 let description_body = if metadata.body.is_empty() {
1637 get_header_first(&metadata.headers, "description").unwrap_or_default()
1638 } else {
1639 metadata.body.clone()
1640 };
1641
1642 let description = build_description(summary.as_deref(), &description_body);
1643
1644 let mut parties = Vec::new();
1645 if author.is_some() || author_email.is_some() {
1646 parties.push(Party {
1647 r#type: Some("person".to_string()),
1648 role: Some("author".to_string()),
1649 name: author,
1650 email: author_email,
1651 url: None,
1652 organization: None,
1653 organization_url: None,
1654 timezone: None,
1655 });
1656 }
1657
1658 let (keywords, license_classifiers) = split_classifiers(&classifiers);
1659 let referenced_license_files: Vec<&str> = license_files.iter().map(String::as_str).collect();
1660 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1661 license_expression
1662 .as_deref()
1663 .and_then(normalize_spdx_expression)
1664 .map(|normalized| {
1665 build_declared_license_data(
1666 normalized,
1667 DeclaredLicenseMatchMetadata::single_line(
1668 license_expression.as_deref().unwrap_or_default(),
1669 )
1670 .with_referenced_filenames(&referenced_license_files),
1671 )
1672 })
1673 .unwrap_or_else(|| normalize_spdx_declared_license(license_expression.as_deref()));
1674
1675 let extracted_license_statement = license_expression
1676 .clone()
1677 .or_else(|| build_extracted_license_statement(license.as_deref(), &license_classifiers));
1678
1679 let mut extra_data = HashMap::new();
1680 if let Some(platform_value) = platform
1681 && !platform_value.eq_ignore_ascii_case("unknown")
1682 && !platform_value.is_empty()
1683 {
1684 extra_data.insert(
1685 "platform".to_string(),
1686 serde_json::Value::String(platform_value),
1687 );
1688 }
1689
1690 if let Some(requires_python_value) = requires_python
1691 && !requires_python_value.is_empty()
1692 {
1693 extra_data.insert(
1694 "requires_python".to_string(),
1695 serde_json::Value::String(requires_python_value),
1696 );
1697 }
1698
1699 if !license_files.is_empty() {
1700 extra_data.insert(
1701 "license_files".to_string(),
1702 serde_json::Value::Array(
1703 license_files
1704 .iter()
1705 .cloned()
1706 .map(serde_json::Value::String)
1707 .collect(),
1708 ),
1709 );
1710 }
1711
1712 let file_references = license_files
1713 .iter()
1714 .map(|path| FileReference {
1715 path: path.clone(),
1716 size: None,
1717 sha1: None,
1718 md5: None,
1719 sha256: None,
1720 sha512: None,
1721 extra_data: None,
1722 })
1723 .collect();
1724
1725 let project_urls = get_header_all(&metadata.headers, "project-url");
1726 let dependencies = extract_rfc822_dependencies(&metadata.headers);
1727 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
1728
1729 if !project_urls.is_empty() {
1730 let parsed_urls = parse_project_urls(&project_urls);
1731
1732 for (label, url) in &parsed_urls {
1733 let label_lower = label.to_lowercase();
1734
1735 if bug_tracking_url.is_none()
1736 && matches!(
1737 label_lower.as_str(),
1738 "tracker"
1739 | "bug reports"
1740 | "bug tracker"
1741 | "issues"
1742 | "issue tracker"
1743 | "github: issues"
1744 )
1745 {
1746 bug_tracking_url = Some(url.clone());
1747 } else if code_view_url.is_none()
1748 && matches!(label_lower.as_str(), "source" | "source code" | "code")
1749 {
1750 code_view_url = Some(url.clone());
1751 } else if vcs_url.is_none()
1752 && matches!(
1753 label_lower.as_str(),
1754 "github" | "gitlab" | "github: repo" | "repository"
1755 )
1756 {
1757 vcs_url = Some(url.clone());
1758 } else if homepage_url.is_none()
1759 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
1760 {
1761 homepage_url = Some(url.clone());
1762 } else if label_lower == "changelog" {
1763 extra_data.insert(
1764 "changelog_url".to_string(),
1765 serde_json::Value::String(url.clone()),
1766 );
1767 }
1768 }
1769
1770 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
1771 .iter()
1772 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
1773 .collect();
1774
1775 if !project_urls_json.is_empty() {
1776 extra_data.insert(
1777 "project_urls".to_string(),
1778 serde_json::Value::Object(project_urls_json),
1779 );
1780 }
1781 }
1782
1783 let extra_data = if extra_data.is_empty() {
1784 None
1785 } else {
1786 Some(extra_data)
1787 };
1788
1789 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1790 build_pypi_urls(name.as_deref(), version.as_deref());
1791
1792 PackageData {
1793 package_type: Some(PythonParser::PACKAGE_TYPE),
1794 namespace: None,
1795 name,
1796 version,
1797 qualifiers: None,
1798 subpath: None,
1799 primary_language: Some("Python".to_string()),
1800 description,
1801 release_date: None,
1802 parties,
1803 keywords,
1804 homepage_url,
1805 download_url,
1806 size: None,
1807 sha1: None,
1808 md5: None,
1809 sha256: None,
1810 sha512: None,
1811 bug_tracking_url,
1812 code_view_url,
1813 vcs_url,
1814 copyright: None,
1815 holder: None,
1816 declared_license_expression,
1817 declared_license_expression_spdx,
1818 license_detections,
1819 other_license_expression: None,
1820 other_license_expression_spdx: None,
1821 other_license_detections: Vec::new(),
1822 extracted_license_statement,
1823 notice_text: None,
1824 source_packages: Vec::new(),
1825 file_references,
1826 is_private: false,
1827 is_virtual: false,
1828 extra_data,
1829 dependencies,
1830 repository_homepage_url,
1831 repository_download_url,
1832 api_data_url,
1833 datasource_id: Some(datasource_id),
1834 purl,
1835 }
1836}
1837
1838fn parse_project_urls(project_urls: &[String]) -> Vec<(String, String)> {
1839 project_urls
1840 .iter()
1841 .filter_map(|url_entry| {
1842 if let Some((label, url)) = url_entry.split_once(", ") {
1843 let label_trimmed = label.trim();
1844 let url_trimmed = url.trim();
1845 if !label_trimmed.is_empty() && !url_trimmed.is_empty() {
1846 return Some((label_trimmed.to_string(), url_trimmed.to_string()));
1847 }
1848 }
1849 None
1850 })
1851 .collect()
1852}
1853
1854fn build_description(summary: Option<&str>, body: &str) -> Option<String> {
1855 let mut parts = Vec::new();
1856 if let Some(summary_value) = summary
1857 && !summary_value.trim().is_empty()
1858 {
1859 parts.push(summary_value.trim().to_string());
1860 }
1861
1862 if !body.trim().is_empty() {
1863 parts.push(body.trim().to_string());
1864 }
1865
1866 if parts.is_empty() {
1867 None
1868 } else {
1869 Some(parts.join("\n"))
1870 }
1871}
1872
1873fn split_classifiers(classifiers: &[String]) -> (Vec<String>, Vec<String>) {
1874 let mut keywords = Vec::new();
1875 let mut license_classifiers = Vec::new();
1876
1877 for classifier in classifiers {
1878 if classifier.starts_with("License ::") {
1879 license_classifiers.push(classifier.to_string());
1880 } else {
1881 keywords.push(classifier.to_string());
1882 }
1883 }
1884
1885 (keywords, license_classifiers)
1886}
1887
1888fn build_extracted_license_statement(
1889 license: Option<&str>,
1890 license_classifiers: &[String],
1891) -> Option<String> {
1892 let mut lines = Vec::new();
1893
1894 if let Some(value) = license
1895 && !value.trim().is_empty()
1896 {
1897 lines.push(format!("license: {}", value.trim()));
1898 }
1899
1900 if !license_classifiers.is_empty() {
1901 lines.push("classifiers:".to_string());
1902 for classifier in license_classifiers {
1903 lines.push(format!(" - '{}'", classifier));
1904 }
1905 }
1906
1907 if lines.is_empty() {
1908 None
1909 } else {
1910 Some(format!("{}\n", lines.join("\n")))
1911 }
1912}
1913
1914pub(crate) fn build_pypi_urls(
1915 name: Option<&str>,
1916 version: Option<&str>,
1917) -> (
1918 Option<String>,
1919 Option<String>,
1920 Option<String>,
1921 Option<String>,
1922) {
1923 let repository_homepage_url = name.map(|value| format!("https://pypi.org/project/{}", value));
1924
1925 let repository_download_url = name.and_then(|value| {
1926 version.map(|ver| {
1927 format!(
1928 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
1929 &value[..1.min(value.len())],
1930 value,
1931 value,
1932 ver
1933 )
1934 })
1935 });
1936
1937 let api_data_url = name.map(|value| {
1938 if let Some(ver) = version {
1939 format!("https://pypi.org/pypi/{}/{}/json", value, ver)
1940 } else {
1941 format!("https://pypi.org/pypi/{}/json", value)
1942 }
1943 });
1944
1945 let purl = name.and_then(|value| {
1946 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), value).ok()?;
1947 if let Some(ver) = version {
1948 package_url.with_version(ver).ok()?;
1949 }
1950 Some(package_url.to_string())
1951 });
1952
1953 (
1954 repository_homepage_url,
1955 repository_download_url,
1956 api_data_url,
1957 purl,
1958 )
1959}
1960
1961fn build_pypi_purl_with_extension(
1962 name: &str,
1963 version: Option<&str>,
1964 extension: &str,
1965) -> Option<String> {
1966 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1967 if let Some(ver) = version {
1968 package_url.with_version(ver).ok()?;
1969 }
1970 package_url.add_qualifier("extension", extension).ok()?;
1971 Some(package_url.to_string())
1972}
1973
1974fn extract_from_pyproject_toml(path: &Path) -> PackageData {
1975 let toml_content = match read_toml_file(path) {
1976 Ok(content) => content,
1977 Err(e) => {
1978 warn!(
1979 "Failed to read or parse pyproject.toml at {:?}: {}",
1980 path, e
1981 );
1982 return default_package_data(path);
1983 }
1984 };
1985
1986 let tool_table = toml_content.get("tool").and_then(|v| v.as_table());
1987
1988 let project_table =
1990 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
1991 project.clone()
1993 } else if let Some(tool) = tool_table {
1994 if let Some(poetry) = tool.get("poetry").and_then(|v| v.as_table()) {
1995 poetry.clone()
1997 } else {
1998 warn!(
1999 "No project or tool.poetry data found in pyproject.toml at {:?}",
2000 path
2001 );
2002 return default_package_data(path);
2003 }
2004 } else if toml_content.get(FIELD_NAME).is_some() {
2005 match toml_content.as_table() {
2007 Some(table) => table.clone(),
2008 None => {
2009 warn!("Failed to convert TOML content to table in {:?}", path);
2010 return default_package_data(path);
2011 }
2012 }
2013 } else {
2014 warn!("No project data found in pyproject.toml at {:?}", path);
2015 return default_package_data(path);
2016 };
2017
2018 let name = project_table
2019 .get(FIELD_NAME)
2020 .and_then(|v| v.as_str())
2021 .map(String::from);
2022
2023 let version = project_table
2024 .get(FIELD_VERSION)
2025 .and_then(|v| v.as_str())
2026 .map(String::from);
2027 let classifiers = project_table
2028 .get("classifiers")
2029 .and_then(|value| value.as_array())
2030 .map(|values| {
2031 values
2032 .iter()
2033 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
2034 .collect::<Vec<_>>()
2035 })
2036 .unwrap_or_default();
2037
2038 let extracted_license_statement = extract_raw_license_string(&project_table);
2039 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2040 normalize_spdx_declared_license(extract_license_expression_candidate(&project_table));
2041
2042 let (homepage_url, repository_url) = extract_urls(&project_table);
2044
2045 let (dependencies, optional_dependencies) = extract_dependencies(&project_table, &toml_content);
2046 let extra_data = extract_pyproject_extra_data(&toml_content);
2047
2048 let purl = name.as_ref().and_then(|n| {
2050 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n) {
2051 Ok(p) => p,
2052 Err(e) => {
2053 warn!(
2054 "Failed to create PackageUrl for Python package '{}': {}",
2055 n, e
2056 );
2057 return None;
2058 }
2059 };
2060
2061 if let Some(v) = &version
2062 && let Err(e) = package_url.with_version(v)
2063 {
2064 warn!(
2065 "Failed to set version '{}' for Python package '{}': {}",
2066 v, n, e
2067 );
2068 return None;
2069 }
2070
2071 Some(package_url.to_string())
2072 });
2073
2074 let api_data_url = name.as_ref().map(|n| {
2075 if let Some(v) = &version {
2076 format!("https://pypi.org/pypi/{}/{}/json", n, v)
2077 } else {
2078 format!("https://pypi.org/pypi/{}/json", n)
2079 }
2080 });
2081
2082 let pypi_homepage_url = name
2083 .as_ref()
2084 .map(|n| format!("https://pypi.org/project/{}", n));
2085
2086 let pypi_download_url = name.as_ref().and_then(|n| {
2087 version.as_ref().map(|v| {
2088 format!(
2089 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
2090 &n[..1.min(n.len())],
2091 n,
2092 n,
2093 v
2094 )
2095 })
2096 });
2097
2098 PackageData {
2099 package_type: Some(PythonParser::PACKAGE_TYPE),
2100 namespace: None,
2101 name,
2102 version,
2103 qualifiers: None,
2104 subpath: None,
2105 primary_language: None,
2106 description: None,
2107 release_date: None,
2108 parties: extract_parties(&project_table),
2109 keywords: Vec::new(),
2110 homepage_url: homepage_url.or(pypi_homepage_url),
2111 download_url: repository_url.clone().or(pypi_download_url),
2112 size: None,
2113 sha1: None,
2114 md5: None,
2115 sha256: None,
2116 sha512: None,
2117 bug_tracking_url: None,
2118 code_view_url: None,
2119 vcs_url: repository_url,
2120 copyright: None,
2121 holder: None,
2122 declared_license_expression,
2123 declared_license_expression_spdx,
2124 license_detections,
2125 other_license_expression: None,
2126 other_license_expression_spdx: None,
2127 other_license_detections: Vec::new(),
2128 extracted_license_statement,
2129 notice_text: None,
2130 source_packages: Vec::new(),
2131 file_references: Vec::new(),
2132 is_private: has_private_classifier(&classifiers),
2133 is_virtual: false,
2134 extra_data,
2135 dependencies: [dependencies, optional_dependencies].concat(),
2136 repository_homepage_url: None,
2137 repository_download_url: None,
2138 api_data_url,
2139 datasource_id: Some(DatasourceId::PypiPyprojectToml),
2140 purl,
2141 }
2142}
2143
2144fn extract_raw_license_string(project: &TomlMap<String, TomlValue>) -> Option<String> {
2145 project
2146 .get(FIELD_LICENSE)
2147 .and_then(|license_value| match license_value {
2148 TomlValue::String(license_str) => Some(license_str.clone()),
2149 TomlValue::Table(license_table) => license_table
2150 .get("text")
2151 .and_then(|v| v.as_str())
2152 .map(|s| s.to_string())
2153 .or_else(|| {
2154 license_table
2155 .get("expression")
2156 .and_then(|v| v.as_str())
2157 .map(|expr| expr.to_string())
2158 }),
2159 _ => None,
2160 })
2161}
2162
2163fn extract_license_expression_candidate(project: &TomlMap<String, TomlValue>) -> Option<&str> {
2164 match project.get(FIELD_LICENSE) {
2165 Some(TomlValue::String(license_str)) => Some(license_str.as_str()),
2166 Some(TomlValue::Table(license_table)) => license_table
2167 .get("expression")
2168 .and_then(|value| value.as_str()),
2169 _ => None,
2170 }
2171}
2172
2173fn extract_urls(project: &TomlMap<String, TomlValue>) -> (Option<String>, Option<String>) {
2174 let mut homepage_url = None;
2175 let mut repository_url = None;
2176
2177 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
2179 homepage_url = urls
2180 .get(FIELD_HOMEPAGE)
2181 .and_then(|v| v.as_str())
2182 .map(String::from);
2183 repository_url = urls
2184 .get(FIELD_REPOSITORY)
2185 .and_then(|v| v.as_str())
2186 .map(String::from);
2187 }
2188
2189 if homepage_url.is_none() {
2191 homepage_url = project
2192 .get(FIELD_HOMEPAGE)
2193 .and_then(|v| v.as_str())
2194 .map(String::from);
2195 }
2196
2197 if repository_url.is_none() {
2198 repository_url = project
2199 .get(FIELD_REPOSITORY)
2200 .and_then(|v| v.as_str())
2201 .map(String::from);
2202 }
2203
2204 (homepage_url, repository_url)
2205}
2206
2207fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
2208 let mut parties = Vec::new();
2209
2210 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
2211 for author in authors {
2212 if let Some(author_str) = author.as_str() {
2213 let (name, email) = split_name_email(author_str);
2214 parties.push(Party {
2215 r#type: None,
2216 role: Some("author".to_string()),
2217 name,
2218 email,
2219 url: None,
2220 organization: None,
2221 organization_url: None,
2222 timezone: None,
2223 });
2224 }
2225 }
2226 }
2227
2228 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
2229 for maintainer in maintainers {
2230 if let Some(maintainer_str) = maintainer.as_str() {
2231 let (name, email) = split_name_email(maintainer_str);
2232 parties.push(Party {
2233 r#type: None,
2234 role: Some("maintainer".to_string()),
2235 name,
2236 email,
2237 url: None,
2238 organization: None,
2239 organization_url: None,
2240 timezone: None,
2241 });
2242 }
2243 }
2244 }
2245
2246 parties
2247}
2248
2249fn extract_dependencies(
2250 project: &TomlMap<String, TomlValue>,
2251 toml_content: &TomlValue,
2252) -> (Vec<Dependency>, Vec<Dependency>) {
2253 let mut dependencies = Vec::new();
2254 let mut optional_dependencies = Vec::new();
2255
2256 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
2258 match deps_value {
2259 TomlValue::Array(arr) => {
2260 dependencies = parse_dependency_array(arr, false, None);
2261 }
2262 TomlValue::Table(table) => {
2263 dependencies = parse_dependency_table(table, false, None);
2264 }
2265 _ => {}
2266 }
2267 }
2268
2269 if let Some(opt_deps_table) = project
2271 .get(FIELD_OPTIONAL_DEPENDENCIES)
2272 .and_then(|v| v.as_table())
2273 {
2274 for (extra_name, deps) in opt_deps_table {
2275 match deps {
2276 TomlValue::Array(arr) => {
2277 optional_dependencies.extend(parse_dependency_array(
2278 arr,
2279 true,
2280 Some(extra_name),
2281 ));
2282 }
2283 TomlValue::Table(table) => {
2284 optional_dependencies.extend(parse_dependency_table(
2285 table,
2286 true,
2287 Some(extra_name),
2288 ));
2289 }
2290 _ => {}
2291 }
2292 }
2293 }
2294
2295 if let Some(dev_deps_value) = project.get(FIELD_DEV_DEPENDENCIES) {
2297 match dev_deps_value {
2298 TomlValue::Array(arr) => {
2299 optional_dependencies.extend(parse_dependency_array(
2300 arr,
2301 true,
2302 Some(FIELD_DEV_DEPENDENCIES),
2303 ));
2304 }
2305 TomlValue::Table(table) => {
2306 optional_dependencies.extend(parse_dependency_table(
2307 table,
2308 true,
2309 Some(FIELD_DEV_DEPENDENCIES),
2310 ));
2311 }
2312 _ => {}
2313 }
2314 }
2315
2316 if let Some(groups_table) = project.get("group").and_then(|v| v.as_table()) {
2318 for (group_name, group_data) in groups_table {
2319 if let Some(group_deps) = group_data.as_table().and_then(|t| t.get("dependencies")) {
2320 match group_deps {
2321 TomlValue::Array(arr) => {
2322 optional_dependencies.extend(parse_dependency_array(
2323 arr,
2324 true,
2325 Some(group_name),
2326 ));
2327 }
2328 TomlValue::Table(table) => {
2329 optional_dependencies.extend(parse_dependency_table(
2330 table,
2331 true,
2332 Some(group_name),
2333 ));
2334 }
2335 _ => {}
2336 }
2337 }
2338 }
2339 }
2340
2341 if let Some(groups_table) = toml_content
2342 .get(FIELD_DEPENDENCY_GROUPS)
2343 .and_then(|value| value.as_table())
2344 {
2345 for (group_name, deps) in groups_table {
2346 match deps {
2347 TomlValue::Array(arr) => {
2348 optional_dependencies.extend(parse_dependency_array(
2349 arr,
2350 true,
2351 Some(group_name),
2352 ));
2353 }
2354 TomlValue::Table(table) => {
2355 optional_dependencies.extend(parse_dependency_table(
2356 table,
2357 true,
2358 Some(group_name),
2359 ));
2360 }
2361 _ => {}
2362 }
2363 }
2364 }
2365
2366 if let Some(dev_deps_value) = toml_content
2367 .get("tool")
2368 .and_then(|value| value.as_table())
2369 .and_then(|tool| tool.get("uv"))
2370 .and_then(|value| value.as_table())
2371 .and_then(|uv| uv.get(FIELD_DEV_DEPENDENCIES))
2372 {
2373 match dev_deps_value {
2374 TomlValue::Array(arr) => {
2375 optional_dependencies.extend(parse_dependency_array(arr, true, Some("dev")));
2376 }
2377 TomlValue::Table(table) => {
2378 optional_dependencies.extend(parse_dependency_table(table, true, Some("dev")));
2379 }
2380 _ => {}
2381 }
2382 }
2383
2384 (dependencies, optional_dependencies)
2385}
2386
2387fn extract_pyproject_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
2388 let mut extra_data = HashMap::new();
2389
2390 if let Some(tool_uv) = toml_content
2391 .get("tool")
2392 .and_then(|value| value.as_table())
2393 .and_then(|tool| tool.get("uv"))
2394 {
2395 extra_data.insert("tool_uv".to_string(), toml_value_to_json(tool_uv));
2396 }
2397
2398 if extra_data.is_empty() {
2399 None
2400 } else {
2401 Some(extra_data)
2402 }
2403}
2404
2405fn toml_value_to_json(value: &TomlValue) -> JsonValue {
2406 match value {
2407 TomlValue::String(value) => JsonValue::String(value.clone()),
2408 TomlValue::Integer(value) => JsonValue::String(value.to_string()),
2409 TomlValue::Float(value) => JsonValue::String(value.to_string()),
2410 TomlValue::Boolean(value) => JsonValue::Bool(*value),
2411 TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
2412 TomlValue::Array(values) => {
2413 JsonValue::Array(values.iter().map(toml_value_to_json).collect())
2414 }
2415 TomlValue::Table(values) => JsonValue::Object(
2416 values
2417 .iter()
2418 .map(|(key, value)| (key.clone(), toml_value_to_json(value)))
2419 .collect::<JsonMap<String, JsonValue>>(),
2420 ),
2421 }
2422}
2423
2424fn parse_dependency_table(
2425 table: &TomlMap<String, TomlValue>,
2426 is_optional: bool,
2427 scope: Option<&str>,
2428) -> Vec<Dependency> {
2429 table
2430 .iter()
2431 .filter_map(|(name, version)| {
2432 let version_str = version.as_str().map(|s| s.to_string());
2433 let mut package_url =
2434 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2435
2436 if let Some(v) = &version_str {
2437 package_url.with_version(v).ok()?;
2438 }
2439
2440 Some(Dependency {
2441 purl: Some(package_url.to_string()),
2442 extracted_requirement: None,
2443 scope: scope.map(|s| s.to_string()),
2444 is_runtime: Some(!is_optional),
2445 is_optional: Some(is_optional),
2446 is_pinned: None,
2447 is_direct: Some(true),
2448 resolved_package: None,
2449 extra_data: None,
2450 })
2451 })
2452 .collect()
2453}
2454
2455fn parse_dependency_array(
2456 array: &[TomlValue],
2457 is_optional: bool,
2458 scope: Option<&str>,
2459) -> Vec<Dependency> {
2460 array
2461 .iter()
2462 .filter_map(|dep| {
2463 let dep_str = dep.as_str()?;
2464
2465 let mut parts = dep_str.split(['>', '=', '<', '~']);
2466 let name = parts.next()?.trim().to_string();
2467
2468 let version = parts.next().map(|v| v.trim().to_string());
2469
2470 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name)
2471 {
2472 Ok(purl) => purl,
2473 Err(_) => return None,
2474 };
2475
2476 if let Some(ref v) = version {
2477 package_url.with_version(v).ok()?;
2478 }
2479
2480 Some(Dependency {
2481 purl: Some(package_url.to_string()),
2482 extracted_requirement: None,
2483 scope: scope.map(|s| s.to_string()),
2484 is_runtime: Some(!is_optional),
2485 is_optional: Some(is_optional),
2486 is_pinned: None,
2487 is_direct: Some(true),
2488 resolved_package: None,
2489 extra_data: None,
2490 })
2491 })
2492 .collect()
2493}
2494
2495#[derive(Debug, Clone)]
2496enum Value {
2497 String(String),
2498 Number(f64),
2499 Bool(bool),
2500 None,
2501 List(Vec<Value>),
2502 Tuple(Vec<Value>),
2503 Dict(HashMap<String, Value>),
2504}
2505
2506struct LiteralEvaluator {
2507 constants: HashMap<String, Value>,
2508 max_depth: usize,
2509 max_nodes: usize,
2510 nodes_visited: usize,
2511}
2512
2513impl LiteralEvaluator {
2514 fn new(constants: HashMap<String, Value>) -> Self {
2515 Self {
2516 constants,
2517 max_depth: MAX_SETUP_PY_AST_DEPTH,
2518 max_nodes: MAX_SETUP_PY_AST_NODES,
2519 nodes_visited: 0,
2520 }
2521 }
2522
2523 fn insert_constant(&mut self, name: String, value: Value) {
2524 self.constants.insert(name, value);
2525 }
2526
2527 fn evaluate_expr(&mut self, expr: &ast::Expr, depth: usize) -> Option<Value> {
2528 if depth >= self.max_depth || self.nodes_visited >= self.max_nodes {
2529 return None;
2530 }
2531 self.nodes_visited += 1;
2532
2533 match expr {
2534 ast::Expr::Constant(ast::ExprConstant { value, .. }) => self.evaluate_constant(value),
2535 ast::Expr::Name(ast::ExprName { id, .. }) => self.constants.get(id.as_str()).cloned(),
2536 ast::Expr::List(ast::ExprList { elts, .. }) => {
2537 let mut values = Vec::new();
2538 for elt in elts {
2539 values.push(self.evaluate_expr(elt, depth + 1)?);
2540 }
2541 Some(Value::List(values))
2542 }
2543 ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
2544 let mut values = Vec::new();
2545 for elt in elts {
2546 values.push(self.evaluate_expr(elt, depth + 1)?);
2547 }
2548 Some(Value::Tuple(values))
2549 }
2550 ast::Expr::Dict(ast::ExprDict { keys, values, .. }) => {
2551 let mut dict = HashMap::new();
2552 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
2553 let key_expr = key_expr.as_ref()?;
2554 let key_value = self.evaluate_expr(key_expr, depth + 1)?;
2555 let key = value_to_string(&key_value)?;
2556 let value = self.evaluate_expr(value_expr, depth + 1)?;
2557 dict.insert(key, value);
2558 }
2559 Some(Value::Dict(dict))
2560 }
2561 ast::Expr::Call(ast::ExprCall {
2562 func,
2563 args,
2564 keywords,
2565 ..
2566 }) => {
2567 if keywords.is_empty()
2568 && let Some(name) = dotted_name(func.as_ref(), depth + 1)
2569 && matches!(name.as_str(), "OrderedDict" | "collections.OrderedDict")
2570 {
2571 return self.evaluate_ordered_dict(args, depth + 1);
2572 }
2573
2574 if !args.is_empty() {
2575 return None;
2576 }
2577
2578 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref()
2579 && id == "dict"
2580 {
2581 let mut dict = HashMap::new();
2582 for keyword in keywords {
2583 let key = keyword.arg.as_ref().map(|name| name.as_str())?;
2584 let value = self.evaluate_expr(&keyword.value, depth + 1)?;
2585 dict.insert(key.to_string(), value);
2586 }
2587 return Some(Value::Dict(dict));
2588 }
2589
2590 None
2591 }
2592 _ => None,
2593 }
2594 }
2595
2596 fn evaluate_constant(&self, constant: &ast::Constant) -> Option<Value> {
2597 match constant {
2598 ast::Constant::Str(value) => Some(Value::String(value.clone())),
2599 ast::Constant::Bool(value) => Some(Value::Bool(*value)),
2600 ast::Constant::Int(value) => value.to_string().parse::<f64>().ok().map(Value::Number),
2601 ast::Constant::Float(value) => Some(Value::Number(*value)),
2602 ast::Constant::None => Some(Value::None),
2603 _ => None,
2604 }
2605 }
2606
2607 fn evaluate_ordered_dict(&mut self, args: &[ast::Expr], depth: usize) -> Option<Value> {
2608 if args.len() != 1 {
2609 return None;
2610 }
2611
2612 let items = match self.evaluate_expr(&args[0], depth)? {
2613 Value::List(items) | Value::Tuple(items) => items,
2614 _ => return None,
2615 };
2616
2617 let mut dict = HashMap::new();
2618 for item in items {
2619 let Value::Tuple(values) = item else {
2620 return None;
2621 };
2622 if values.len() != 2 {
2623 return None;
2624 }
2625 let key = value_to_string(&values[0])?;
2626 dict.insert(key, values[1].clone());
2627 }
2628
2629 Some(Value::Dict(dict))
2630 }
2631}
2632
2633#[derive(Default)]
2634struct SetupAliases {
2635 setup_names: HashSet<String>,
2636 module_aliases: HashMap<String, String>,
2637}
2638
2639fn extract_from_setup_py(path: &Path) -> PackageData {
2640 let content = match read_file_to_string(path) {
2641 Ok(content) => content,
2642 Err(e) => {
2643 warn!("Failed to read setup.py at {:?}: {}", path, e);
2644 return default_package_data(path);
2645 }
2646 };
2647
2648 if content.len() > MAX_SETUP_PY_BYTES {
2649 warn!("setup.py too large at {:?}: {} bytes", path, content.len());
2650 return extract_from_setup_py_regex(&content);
2651 }
2652
2653 let mut package_data = match extract_from_setup_py_ast(&content) {
2654 Ok(Some(data)) => data,
2655 Ok(None) => extract_from_setup_py_regex(&content),
2656 Err(e) => {
2657 warn!("Failed to parse setup.py AST at {:?}: {}", path, e);
2658 extract_from_setup_py_regex(&content)
2659 }
2660 };
2661
2662 if package_data.name.is_none() {
2663 package_data.name = extract_setup_value(&content, "name");
2664 }
2665
2666 if package_data.version.is_none() {
2667 package_data.version = extract_setup_value(&content, "version");
2668 }
2669
2670 fill_from_sibling_dunder_metadata(path, &content, &mut package_data);
2671
2672 if package_data.purl.is_none() {
2673 package_data.purl = build_setup_py_purl(
2674 package_data.name.as_deref(),
2675 package_data.version.as_deref(),
2676 );
2677 }
2678
2679 package_data
2680}
2681
2682fn fill_from_sibling_dunder_metadata(path: &Path, content: &str, package_data: &mut PackageData) {
2683 if package_data.version.is_some()
2684 && package_data.extracted_license_statement.is_some()
2685 && package_data
2686 .parties
2687 .iter()
2688 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some())
2689 {
2690 return;
2691 }
2692
2693 let Some(root) = path.parent() else {
2694 return;
2695 };
2696
2697 let dunder_metadata = collect_sibling_dunder_metadata(root, content);
2698
2699 if package_data.version.is_none() {
2700 package_data.version = dunder_metadata.version;
2701 }
2702
2703 if package_data.extracted_license_statement.is_none() {
2704 package_data.extracted_license_statement = dunder_metadata.license;
2705 }
2706
2707 let has_author = package_data
2708 .parties
2709 .iter()
2710 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some());
2711
2712 if !has_author && let Some(author) = dunder_metadata.author {
2713 package_data.parties.push(Party {
2714 r#type: Some("person".to_string()),
2715 role: Some("author".to_string()),
2716 name: Some(author),
2717 email: None,
2718 url: None,
2719 organization: None,
2720 organization_url: None,
2721 timezone: None,
2722 });
2723 }
2724}
2725
2726#[derive(Default)]
2727struct DunderMetadata {
2728 version: Option<String>,
2729 author: Option<String>,
2730 license: Option<String>,
2731}
2732
2733fn collect_sibling_dunder_metadata(root: &Path, content: &str) -> DunderMetadata {
2734 let statements = match ast::Suite::parse(content, "<setup.py>") {
2735 Ok(statements) => statements,
2736 Err(_) => return DunderMetadata::default(),
2737 };
2738
2739 let version_re = Regex::new(r#"(?m)^\s*__version__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2740 let author_re = Regex::new(r#"(?m)^\s*__author__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2741 let license_re = Regex::new(r#"(?m)^\s*__license__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2742 let mut metadata = DunderMetadata::default();
2743
2744 for module in imported_dunder_modules(&statements) {
2745 let Some(path) = resolve_imported_module_path(root, &module) else {
2746 continue;
2747 };
2748 let Ok(module_content) = read_file_to_string(&path) else {
2749 continue;
2750 };
2751
2752 if metadata.version.is_none() {
2753 metadata.version = version_re
2754 .as_ref()
2755 .and_then(|regex| regex.captures(&module_content))
2756 .and_then(|captures| captures.get(1))
2757 .map(|match_| match_.as_str().to_string());
2758 }
2759
2760 if metadata.author.is_none() {
2761 metadata.author = author_re
2762 .as_ref()
2763 .and_then(|regex| regex.captures(&module_content))
2764 .and_then(|captures| captures.get(1))
2765 .map(|match_| match_.as_str().to_string());
2766 }
2767
2768 if metadata.license.is_none() {
2769 metadata.license = license_re
2770 .as_ref()
2771 .and_then(|regex| regex.captures(&module_content))
2772 .and_then(|captures| captures.get(1))
2773 .map(|match_| match_.as_str().to_string());
2774 }
2775
2776 if metadata.version.is_some() && metadata.author.is_some() && metadata.license.is_some() {
2777 return metadata;
2778 }
2779 }
2780
2781 metadata
2782}
2783
2784fn imported_dunder_modules(statements: &[ast::Stmt]) -> Vec<String> {
2785 let mut modules = Vec::new();
2786
2787 for statement in statements {
2788 let ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) = statement else {
2789 continue;
2790 };
2791 let Some(module) = module.as_ref().map(|name| name.as_str()) else {
2792 continue;
2793 };
2794 let imports_dunder = names.iter().any(|alias| {
2795 matches!(
2796 alias.name.as_str(),
2797 "__version__" | "__author__" | "__license__"
2798 )
2799 });
2800 if imports_dunder {
2801 modules.push(module.to_string());
2802 }
2803 }
2804
2805 modules
2806}
2807
2808fn resolve_imported_module_path(root: &Path, module: &str) -> Option<PathBuf> {
2809 let relative = PathBuf::from_iter(module.split('.'));
2810 let candidates = [
2811 root.join(relative.with_extension("py")),
2812 root.join(&relative).join("__init__.py"),
2813 root.join("src").join(relative.with_extension("py")),
2814 root.join("src").join(relative).join("__init__.py"),
2815 ];
2816
2817 candidates.into_iter().find(|candidate| candidate.exists())
2818}
2819
2820fn extract_from_setup_py_ast(content: &str) -> Result<Option<PackageData>, String> {
2836 let statements = ast::Suite::parse(content, "<setup.py>").map_err(|e| format!("{}", e))?;
2837 let aliases = collect_setup_aliases(&statements);
2838 let mut evaluator = LiteralEvaluator::new(HashMap::new());
2839 build_setup_py_constants(&statements, &mut evaluator);
2840
2841 let setup_call = find_setup_call(&statements, &aliases);
2842 let Some(call_expr) = setup_call else {
2843 return Ok(None);
2844 };
2845
2846 let setup_values = extract_setup_keywords(call_expr, &mut evaluator);
2847 Ok(Some(build_setup_py_package_data(&setup_values)))
2848}
2849
2850fn build_setup_py_constants(statements: &[ast::Stmt], evaluator: &mut LiteralEvaluator) {
2851 for stmt in statements {
2852 if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = stmt {
2853 if targets.len() != 1 {
2854 continue;
2855 }
2856
2857 let Some(name) = extract_assign_name(&targets[0]) else {
2858 continue;
2859 };
2860
2861 if let Some(value) = evaluator.evaluate_expr(value.as_ref(), 0) {
2862 evaluator.insert_constant(name, value);
2863 }
2864 }
2865 }
2866}
2867
2868fn extract_assign_name(target: &ast::Expr) -> Option<String> {
2869 match target {
2870 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
2871 _ => None,
2872 }
2873}
2874
2875fn collect_setup_aliases(statements: &[ast::Stmt]) -> SetupAliases {
2876 let mut aliases = SetupAliases::default();
2877 aliases.setup_names.insert("setup".to_string());
2878
2879 for stmt in statements {
2880 match stmt {
2881 ast::Stmt::Import(ast::StmtImport { names, .. }) => {
2882 for alias in names {
2883 let module_name = alias.name.as_str();
2884 if !is_setup_module(module_name) {
2885 continue;
2886 }
2887 let alias_name = alias
2888 .asname
2889 .as_ref()
2890 .map(|name| name.as_str())
2891 .unwrap_or(module_name);
2892 aliases
2893 .module_aliases
2894 .insert(alias_name.to_string(), module_name.to_string());
2895 }
2896 }
2897 ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
2898 let Some(module_name) = module.as_ref().map(|name| name.as_str()) else {
2899 continue;
2900 };
2901 if !is_setup_module(module_name) {
2902 continue;
2903 }
2904 for alias in names {
2905 if alias.name.as_str() != "setup" {
2906 continue;
2907 }
2908 let alias_name = alias
2909 .asname
2910 .as_ref()
2911 .map(|name| name.as_str())
2912 .unwrap_or("setup");
2913 aliases.setup_names.insert(alias_name.to_string());
2914 }
2915 }
2916 _ => {}
2917 }
2918 }
2919
2920 aliases
2921}
2922
2923fn is_setup_module(module_name: &str) -> bool {
2924 matches!(module_name, "setuptools" | "distutils" | "distutils.core")
2925}
2926
2927fn find_setup_call<'a>(
2928 statements: &'a [ast::Stmt],
2929 aliases: &'a SetupAliases,
2930) -> Option<&'a ast::Expr> {
2931 let mut finder = SetupCallFinder {
2932 aliases,
2933 nodes_visited: 0,
2934 };
2935 finder.find_in_statements(statements)
2936}
2937
2938struct SetupCallFinder<'a> {
2939 aliases: &'a SetupAliases,
2940 nodes_visited: usize,
2941}
2942
2943impl<'a> SetupCallFinder<'a> {
2944 fn find_in_statements(&mut self, statements: &'a [ast::Stmt]) -> Option<&'a ast::Expr> {
2945 for stmt in statements {
2946 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2947 return None;
2948 }
2949 self.nodes_visited += 1;
2950
2951 let found = match stmt {
2952 ast::Stmt::Expr(ast::StmtExpr { value, .. }) => self.visit_expr(value.as_ref()),
2953 ast::Stmt::Assign(ast::StmtAssign { value, .. }) => self.visit_expr(value.as_ref()),
2954 ast::Stmt::If(ast::StmtIf { body, orelse, .. }) => self
2955 .find_in_statements(body)
2956 .or_else(|| self.find_in_statements(orelse)),
2957 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
2958 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => self
2959 .find_in_statements(body)
2960 .or_else(|| self.find_in_statements(orelse)),
2961 ast::Stmt::With(ast::StmtWith { body, .. }) => self.find_in_statements(body),
2962 ast::Stmt::Try(ast::StmtTry {
2963 body,
2964 orelse,
2965 finalbody,
2966 handlers,
2967 ..
2968 })
2969 | ast::Stmt::TryStar(ast::StmtTryStar {
2970 body,
2971 orelse,
2972 finalbody,
2973 handlers,
2974 ..
2975 }) => self
2976 .find_in_statements(body)
2977 .or_else(|| self.find_in_statements(orelse))
2978 .or_else(|| self.find_in_statements(finalbody))
2979 .or_else(|| {
2980 for handler in handlers {
2981 let ast::ExceptHandler::ExceptHandler(
2982 ast::ExceptHandlerExceptHandler { body, .. },
2983 ) = handler;
2984 if let Some(found) = self.find_in_statements(body) {
2985 return Some(found);
2986 }
2987 }
2988 None
2989 }),
2990 _ => None,
2991 };
2992
2993 if found.is_some() {
2994 return found;
2995 }
2996 }
2997
2998 None
2999 }
3000
3001 fn visit_expr(&mut self, expr: &'a ast::Expr) -> Option<&'a ast::Expr> {
3002 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
3003 return None;
3004 }
3005 self.nodes_visited += 1;
3006
3007 match expr {
3008 ast::Expr::Call(ast::ExprCall { func, .. })
3009 if is_setup_call(func.as_ref(), self.aliases) =>
3010 {
3011 Some(expr)
3012 }
3013 _ => None,
3014 }
3015 }
3016}
3017
3018fn is_setup_call(func: &ast::Expr, aliases: &SetupAliases) -> bool {
3019 let Some(dotted) = dotted_name(func, 0) else {
3020 return false;
3021 };
3022
3023 if aliases.setup_names.contains(&dotted) {
3024 return true;
3025 }
3026
3027 let Some(module) = dotted.strip_suffix(".setup") else {
3028 return false;
3029 };
3030
3031 let resolved = resolve_module_alias(module, aliases);
3032 is_setup_module(&resolved)
3033}
3034
3035fn dotted_name(expr: &ast::Expr, depth: usize) -> Option<String> {
3036 if depth >= MAX_SETUP_PY_AST_DEPTH {
3037 return None;
3038 }
3039
3040 match expr {
3041 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
3042 ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
3043 let base = dotted_name(value.as_ref(), depth + 1)?;
3044 Some(format!("{}.{}", base, attr.as_str()))
3045 }
3046 _ => None,
3047 }
3048}
3049
3050fn resolve_module_alias(module: &str, aliases: &SetupAliases) -> String {
3051 if let Some(mapped) = aliases.module_aliases.get(module) {
3052 return mapped.clone();
3053 }
3054
3055 let Some((base, rest)) = module.split_once('.') else {
3056 return module.to_string();
3057 };
3058
3059 if let Some(mapped) = aliases.module_aliases.get(base) {
3060 return format!("{}.{}", mapped, rest);
3061 }
3062
3063 module.to_string()
3064}
3065
3066fn extract_setup_keywords(
3067 call_expr: &ast::Expr,
3068 evaluator: &mut LiteralEvaluator,
3069) -> HashMap<String, Value> {
3070 let mut values = HashMap::new();
3071 let ast::Expr::Call(ast::ExprCall { keywords, .. }) = call_expr else {
3072 return values;
3073 };
3074
3075 for keyword in keywords {
3076 if let Some(arg) = keyword.arg.as_ref().map(|name| name.as_str()) {
3077 if let Some(value) = evaluator.evaluate_expr(&keyword.value, 0) {
3078 values.insert(arg.to_string(), value);
3079 }
3080 } else if let Some(Value::Dict(dict)) = evaluator.evaluate_expr(&keyword.value, 0) {
3081 for (key, value) in dict {
3082 values.insert(key, value);
3083 }
3084 }
3085 }
3086
3087 values
3088}
3089
3090fn build_setup_py_package_data(values: &HashMap<String, Value>) -> PackageData {
3091 let name = get_value_string(values, "name");
3092 let version = get_value_string(values, "version");
3093 let description =
3094 get_value_string(values, "description").or_else(|| get_value_string(values, "summary"));
3095 let homepage_url =
3096 get_value_string(values, "url").or_else(|| get_value_string(values, "home_page"));
3097 let author = get_value_string(values, "author");
3098 let author_email = get_value_string(values, "author_email");
3099 let maintainer = get_value_string(values, "maintainer");
3100 let maintainer_email = get_value_string(values, "maintainer_email");
3101 let license = get_value_string(values, "license");
3102 let classifiers = values
3103 .get("classifiers")
3104 .and_then(value_to_string_list)
3105 .unwrap_or_default();
3106
3107 let mut parties = Vec::new();
3108 if author.is_some() || author_email.is_some() {
3109 parties.push(Party {
3110 r#type: Some("person".to_string()),
3111 role: Some("author".to_string()),
3112 name: author,
3113 email: author_email,
3114 url: None,
3115 organization: None,
3116 organization_url: None,
3117 timezone: None,
3118 });
3119 }
3120
3121 if maintainer.is_some() || maintainer_email.is_some() {
3122 parties.push(Party {
3123 r#type: Some("person".to_string()),
3124 role: Some("maintainer".to_string()),
3125 name: maintainer,
3126 email: maintainer_email,
3127 url: None,
3128 organization: None,
3129 organization_url: None,
3130 timezone: None,
3131 });
3132 }
3133
3134 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3135 normalize_spdx_declared_license(license.as_deref());
3136 let extracted_license_statement = license.clone();
3137
3138 let dependencies = build_setup_py_dependencies(values);
3139 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3140 let mut homepage_from_project_urls = None;
3141 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
3142 let mut extra_data = HashMap::new();
3143
3144 if let Some(parsed_project_urls) = values.get("project_urls").and_then(value_to_string_pairs) {
3145 apply_project_url_mappings(
3146 &parsed_project_urls,
3147 &mut homepage_from_project_urls,
3148 &mut bug_tracking_url,
3149 &mut code_view_url,
3150 &mut vcs_url,
3151 &mut extra_data,
3152 );
3153 }
3154
3155 let extra_data = if extra_data.is_empty() {
3156 None
3157 } else {
3158 Some(extra_data)
3159 };
3160
3161 PackageData {
3162 package_type: Some(PythonParser::PACKAGE_TYPE),
3163 namespace: None,
3164 name,
3165 version,
3166 qualifiers: None,
3167 subpath: None,
3168 primary_language: Some("Python".to_string()),
3169 description,
3170 release_date: None,
3171 parties,
3172 keywords: Vec::new(),
3173 homepage_url: homepage_url.or(homepage_from_project_urls),
3174 download_url: None,
3175 size: None,
3176 sha1: None,
3177 md5: None,
3178 sha256: None,
3179 sha512: None,
3180 bug_tracking_url,
3181 code_view_url,
3182 vcs_url,
3183 copyright: None,
3184 holder: None,
3185 declared_license_expression,
3186 declared_license_expression_spdx,
3187 license_detections,
3188 other_license_expression: None,
3189 other_license_expression_spdx: None,
3190 other_license_detections: Vec::new(),
3191 extracted_license_statement,
3192 notice_text: None,
3193 source_packages: Vec::new(),
3194 file_references: Vec::new(),
3195 is_private: has_private_classifier(&classifiers),
3196 is_virtual: false,
3197 extra_data,
3198 dependencies,
3199 repository_homepage_url: None,
3200 repository_download_url: None,
3201 api_data_url: None,
3202 datasource_id: Some(DatasourceId::PypiSetupPy),
3203 purl,
3204 }
3205}
3206
3207fn build_setup_py_dependencies(values: &HashMap<String, Value>) -> Vec<Dependency> {
3208 let mut dependencies = Vec::new();
3209
3210 if let Some(reqs) = values
3211 .get("install_requires")
3212 .and_then(value_to_string_list)
3213 {
3214 dependencies.extend(build_setup_py_dependency_list(&reqs, "install", false));
3215 }
3216
3217 if let Some(reqs) = values.get("tests_require").and_then(value_to_string_list) {
3218 dependencies.extend(build_setup_py_dependency_list(&reqs, "test", true));
3219 }
3220
3221 if let Some(Value::Dict(extras)) = values.get("extras_require") {
3222 let mut extra_items: Vec<_> = extras.iter().collect();
3223 extra_items.sort_by_key(|(name, _)| *name);
3224 for (extra_name, extra_value) in extra_items {
3225 if let Some(reqs) = value_to_string_list(extra_value) {
3226 dependencies.extend(build_setup_py_dependency_list(
3227 reqs.as_slice(),
3228 extra_name,
3229 true,
3230 ));
3231 }
3232 }
3233 }
3234
3235 dependencies
3236}
3237
3238fn build_setup_py_dependency_list(
3239 reqs: &[String],
3240 scope: &str,
3241 is_optional: bool,
3242) -> Vec<Dependency> {
3243 reqs.iter()
3244 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
3245 .collect()
3246}
3247
3248fn get_value_string(values: &HashMap<String, Value>, key: &str) -> Option<String> {
3249 values.get(key).and_then(value_to_string)
3250}
3251
3252fn value_to_string(value: &Value) -> Option<String> {
3253 match value {
3254 Value::String(value) => Some(value.clone()),
3255 Value::Number(value) => Some(value.to_string()),
3256 Value::Bool(value) => Some(value.to_string()),
3257 _ => None,
3258 }
3259}
3260
3261fn value_to_string_list(value: &Value) -> Option<Vec<String>> {
3262 match value {
3263 Value::String(value) => Some(vec![value.clone()]),
3264 Value::List(values) | Value::Tuple(values) => {
3265 let mut items = Vec::new();
3266 for item in values {
3267 items.push(value_to_string(item)?);
3268 }
3269 Some(items)
3270 }
3271 _ => None,
3272 }
3273}
3274
3275fn value_to_string_pairs(value: &Value) -> Option<Vec<(String, String)>> {
3276 let Value::Dict(dict) = value else {
3277 return None;
3278 };
3279
3280 let mut pairs: Vec<(String, String)> = dict
3281 .iter()
3282 .map(|(key, value)| Some((key.clone(), value_to_string(value)?)))
3283 .collect::<Option<Vec<_>>>()?;
3284 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3285 Some(pairs)
3286}
3287
3288fn extract_rfc822_dependencies(headers: &HashMap<String, Vec<String>>) -> Vec<Dependency> {
3289 let requires_dist = super::rfc822::get_header_all(headers, "requires-dist");
3290 extract_requires_dist_dependencies(&requires_dist)
3291}
3292
3293pub(crate) fn extract_requires_dist_dependencies(requires_dist: &[String]) -> Vec<Dependency> {
3294 requires_dist
3295 .iter()
3296 .filter_map(|entry| build_rfc822_dependency(entry))
3297 .collect()
3298}
3299
3300fn build_rfc822_dependency(entry: &str) -> Option<Dependency> {
3301 build_python_dependency(entry, "install", false, None)
3302}
3303
3304fn build_python_dependency(
3305 entry: &str,
3306 default_scope: &str,
3307 default_optional: bool,
3308 marker_override: Option<&str>,
3309) -> Option<Dependency> {
3310 let (requirement_part, marker_part) = entry
3311 .split_once(';')
3312 .map(|(req, marker)| (req.trim(), Some(marker.trim())))
3313 .unwrap_or((entry.trim(), None));
3314
3315 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3316 let requirement = normalize_rfc822_requirement(requirement_part);
3317 let (scope, is_optional, marker, marker_data) = parse_rfc822_marker(
3318 marker_part.or(marker_override),
3319 default_scope,
3320 default_optional,
3321 );
3322 let mut purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
3323
3324 let is_pinned = requirement
3325 .as_deref()
3326 .is_some_and(|req| req.starts_with("==") || req.starts_with("==="));
3327 if is_pinned
3328 && let Some(version) = requirement
3329 .as_deref()
3330 .map(|req| req.trim_start_matches('='))
3331 {
3332 purl.with_version(version).ok()?;
3333 }
3334
3335 let mut extra_data = HashMap::new();
3336 extra_data.extend(marker_data);
3337 if let Some(marker) = marker {
3338 extra_data.insert("marker".to_string(), serde_json::Value::String(marker));
3339 }
3340
3341 Some(Dependency {
3342 purl: Some(purl.to_string()),
3343 extracted_requirement: requirement,
3344 scope: Some(scope),
3345 is_runtime: Some(true),
3346 is_optional: Some(is_optional),
3347 is_pinned: Some(is_pinned),
3348 is_direct: Some(true),
3349 resolved_package: None,
3350 extra_data: if extra_data.is_empty() {
3351 None
3352 } else {
3353 Some(extra_data)
3354 },
3355 })
3356}
3357
3358fn normalize_rfc822_requirement(requirement_part: &str) -> Option<String> {
3359 let name = extract_setup_cfg_dependency_name(requirement_part)?;
3360 let trimmed = requirement_part.trim();
3361 let mut remainder = trimmed[name.len()..].trim();
3362
3363 if let Some(stripped) = remainder.strip_prefix('[')
3364 && let Some(end_idx) = stripped.find(']')
3365 {
3366 remainder = stripped[end_idx + 1..].trim();
3367 }
3368
3369 let remainder = remainder
3370 .strip_prefix('(')
3371 .and_then(|value| value.strip_suffix(')'))
3372 .unwrap_or(remainder)
3373 .trim();
3374
3375 if remainder.is_empty() {
3376 return None;
3377 }
3378
3379 let mut specifiers: Vec<String> = remainder
3380 .split(',')
3381 .map(|specifier| specifier.trim().replace(' ', ""))
3382 .filter(|specifier| !specifier.is_empty())
3383 .collect();
3384 specifiers.sort();
3385 Some(specifiers.join(","))
3386}
3387
3388fn parse_rfc822_marker(
3389 marker_part: Option<&str>,
3390 default_scope: &str,
3391 default_optional: bool,
3392) -> (
3393 String,
3394 bool,
3395 Option<String>,
3396 HashMap<String, serde_json::Value>,
3397) {
3398 let Some(marker) = marker_part.filter(|marker| !marker.trim().is_empty()) else {
3399 return (
3400 default_scope.to_string(),
3401 default_optional,
3402 None,
3403 HashMap::new(),
3404 );
3405 };
3406
3407 let extra_re = Regex::new(r#"extra\s*==\s*['\"]([^'\"]+)['\"]"#)
3408 .expect("extra marker regex should compile");
3409 let mut extra_data = HashMap::new();
3410
3411 if let Some(python_version) = extract_marker_field(marker, "python_version") {
3412 extra_data.insert(
3413 "python_version".to_string(),
3414 serde_json::Value::String(python_version),
3415 );
3416 }
3417 if let Some(sys_platform) = extract_marker_field(marker, "sys_platform") {
3418 extra_data.insert(
3419 "sys_platform".to_string(),
3420 serde_json::Value::String(sys_platform),
3421 );
3422 }
3423
3424 if let Some(captures) = extra_re.captures(marker)
3425 && let Some(scope) = captures.get(1)
3426 {
3427 return (
3428 scope.as_str().to_string(),
3429 true,
3430 Some(marker.trim().to_string()),
3431 extra_data,
3432 );
3433 }
3434
3435 (
3436 default_scope.to_string(),
3437 default_optional,
3438 Some(marker.trim().to_string()),
3439 extra_data,
3440 )
3441}
3442
3443fn extract_marker_field(marker: &str, field: &str) -> Option<String> {
3444 let re = Regex::new(&format!(
3445 r#"{}\s*(==|!=|<=|>=|<|>)\s*['\"]([^'\"]+)['\"]"#,
3446 field
3447 ))
3448 .ok()?;
3449 let captures = re.captures(marker)?;
3450 let operator = captures.get(1)?.as_str();
3451 let value = captures.get(2)?.as_str();
3452 Some(format!("{} {}", operator, value))
3453}
3454
3455fn parse_requires_txt(content: &str) -> Vec<Dependency> {
3456 let mut dependencies = Vec::new();
3457 let mut current_scope = "install".to_string();
3458 let mut current_optional = false;
3459 let mut current_marker: Option<String> = None;
3460
3461 for line in content.lines() {
3462 let trimmed = line.trim();
3463 if trimmed.is_empty() || trimmed.starts_with('#') {
3464 continue;
3465 }
3466
3467 if trimmed.starts_with('[') && trimmed.ends_with(']') {
3468 let inner = &trimmed[1..trimmed.len() - 1];
3469 if let Some(rest) = inner.strip_prefix(':') {
3470 current_scope = "install".to_string();
3471 current_optional = false;
3472 current_marker = Some(rest.trim().to_string());
3473 } else if let Some((scope, marker)) = inner.split_once(':') {
3474 current_scope = scope.trim().to_string();
3475 current_optional = true;
3476 current_marker = Some(marker.trim().to_string());
3477 } else {
3478 current_scope = inner.trim().to_string();
3479 current_optional = true;
3480 current_marker = None;
3481 }
3482 continue;
3483 }
3484
3485 if let Some(dependency) = build_python_dependency(
3486 trimmed,
3487 ¤t_scope,
3488 current_optional,
3489 current_marker.as_deref(),
3490 ) {
3491 dependencies.push(dependency);
3492 }
3493 }
3494
3495 dependencies
3496}
3497
3498fn has_private_classifier(classifiers: &[String]) -> bool {
3499 classifiers
3500 .iter()
3501 .any(|classifier| classifier.eq_ignore_ascii_case("Private :: Do Not Upload"))
3502}
3503
3504fn build_setup_py_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
3505 let name = name?;
3506 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
3507 if let Some(version) = version {
3508 package_url.with_version(version).ok()?;
3509 }
3510 Some(package_url.to_string())
3511}
3512
3513fn extract_from_setup_py_regex(content: &str) -> PackageData {
3514 let name = extract_setup_value(content, "name");
3515 let version = extract_setup_value(content, "version");
3516 let license_expression = extract_setup_value(content, "license");
3517
3518 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3519 normalize_spdx_declared_license(license_expression.as_deref());
3520 let extracted_license_statement = license_expression.clone();
3521
3522 let dependencies = extract_setup_py_dependencies(content);
3523 let homepage_url = extract_setup_value(content, "url");
3524 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
3525
3526 PackageData {
3527 package_type: Some(PythonParser::PACKAGE_TYPE),
3528 namespace: None,
3529 name,
3530 version,
3531 qualifiers: None,
3532 subpath: None,
3533 primary_language: Some("Python".to_string()),
3534 description: None,
3535 release_date: None,
3536 parties: Vec::new(),
3537 keywords: Vec::new(),
3538 homepage_url,
3539 download_url: None,
3540 size: None,
3541 sha1: None,
3542 md5: None,
3543 sha256: None,
3544 sha512: None,
3545 bug_tracking_url: None,
3546 code_view_url: None,
3547 vcs_url: None,
3548 copyright: None,
3549 holder: None,
3550 declared_license_expression,
3551 declared_license_expression_spdx,
3552 license_detections,
3553 other_license_expression: None,
3554 other_license_expression_spdx: None,
3555 other_license_detections: Vec::new(),
3556 extracted_license_statement,
3557 notice_text: None,
3558 source_packages: Vec::new(),
3559 file_references: Vec::new(),
3560 is_private: false,
3561 is_virtual: false,
3562 extra_data: None,
3563 dependencies,
3564 repository_homepage_url: None,
3565 repository_download_url: None,
3566 api_data_url: None,
3567 datasource_id: Some(DatasourceId::PypiSetupPy),
3568 purl,
3569 }
3570}
3571
3572fn package_data_to_resolved(pkg: &PackageData) -> crate::models::ResolvedPackage {
3573 crate::models::ResolvedPackage {
3574 package_type: pkg.package_type.unwrap_or(PackageType::Pypi),
3575 namespace: pkg.namespace.clone().unwrap_or_default(),
3576 name: pkg.name.clone().unwrap_or_default(),
3577 version: pkg.version.clone().unwrap_or_default(),
3578 primary_language: pkg.primary_language.clone(),
3579 download_url: pkg.download_url.clone(),
3580 sha1: pkg.sha1.clone(),
3581 sha256: pkg.sha256.clone(),
3582 sha512: pkg.sha512.clone(),
3583 md5: pkg.md5.clone(),
3584 is_virtual: pkg.is_virtual,
3585 extra_data: None,
3586 dependencies: pkg.dependencies.clone(),
3587 repository_homepage_url: pkg.repository_homepage_url.clone(),
3588 repository_download_url: pkg.repository_download_url.clone(),
3589 api_data_url: pkg.api_data_url.clone(),
3590 datasource_id: pkg.datasource_id,
3591 purl: pkg.purl.clone(),
3592 }
3593}
3594
3595fn extract_from_pypi_json(path: &Path) -> PackageData {
3596 let default = PackageData {
3597 package_type: Some(PythonParser::PACKAGE_TYPE),
3598 datasource_id: Some(DatasourceId::PypiJson),
3599 ..Default::default()
3600 };
3601
3602 let content = match read_file_to_string(path) {
3603 Ok(content) => content,
3604 Err(error) => {
3605 warn!("Failed to read pypi.json at {:?}: {}", path, error);
3606 return default;
3607 }
3608 };
3609
3610 let root: serde_json::Value = match serde_json::from_str(&content) {
3611 Ok(value) => value,
3612 Err(error) => {
3613 warn!("Failed to parse pypi.json at {:?}: {}", path, error);
3614 return default;
3615 }
3616 };
3617
3618 let Some(info) = root.get("info").and_then(|value| value.as_object()) else {
3619 warn!("No info object found in pypi.json at {:?}", path);
3620 return default;
3621 };
3622
3623 let name = info
3624 .get("name")
3625 .and_then(|value| value.as_str())
3626 .map(ToOwned::to_owned);
3627 let version = info
3628 .get("version")
3629 .and_then(|value| value.as_str())
3630 .map(ToOwned::to_owned);
3631 let summary = info
3632 .get("summary")
3633 .and_then(|value| value.as_str())
3634 .map(ToOwned::to_owned);
3635 let description = info
3636 .get("description")
3637 .and_then(|value| value.as_str())
3638 .filter(|value| !value.trim().is_empty())
3639 .map(ToOwned::to_owned)
3640 .or(summary);
3641 let mut homepage_url = info
3642 .get("home_page")
3643 .and_then(|value| value.as_str())
3644 .map(ToOwned::to_owned);
3645 let author = info
3646 .get("author")
3647 .and_then(|value| value.as_str())
3648 .filter(|value| !value.trim().is_empty())
3649 .map(ToOwned::to_owned);
3650 let author_email = info
3651 .get("author_email")
3652 .and_then(|value| value.as_str())
3653 .filter(|value| !value.trim().is_empty())
3654 .map(ToOwned::to_owned);
3655 let license = info
3656 .get("license")
3657 .and_then(|value| value.as_str())
3658 .filter(|value| !value.trim().is_empty())
3659 .map(ToOwned::to_owned);
3660 let keywords = parse_setup_cfg_keywords(
3661 info.get("keywords")
3662 .and_then(|value| value.as_str())
3663 .map(ToOwned::to_owned),
3664 );
3665 let classifiers = info
3666 .get("classifiers")
3667 .and_then(|value| value.as_array())
3668 .map(|values| {
3669 values
3670 .iter()
3671 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
3672 .collect::<Vec<_>>()
3673 })
3674 .unwrap_or_default();
3675
3676 let mut parties = Vec::new();
3677 if author.is_some() || author_email.is_some() {
3678 parties.push(Party {
3679 r#type: Some("person".to_string()),
3680 role: Some("author".to_string()),
3681 name: author,
3682 email: author_email,
3683 url: None,
3684 organization: None,
3685 organization_url: None,
3686 timezone: None,
3687 });
3688 }
3689
3690 let mut bug_tracking_url = None;
3691 let mut code_view_url = None;
3692 let mut vcs_url = None;
3693 let mut extra_data = HashMap::new();
3694
3695 let parsed_project_urls = info
3696 .get("project_urls")
3697 .and_then(|value| value.as_object())
3698 .map(|map| {
3699 let mut pairs: Vec<(String, String)> = map
3700 .iter()
3701 .filter_map(|(key, value)| Some((key.clone(), value.as_str()?.to_string())))
3702 .collect();
3703 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3704 pairs
3705 })
3706 .unwrap_or_default();
3707
3708 apply_project_url_mappings(
3709 &parsed_project_urls,
3710 &mut homepage_url,
3711 &mut bug_tracking_url,
3712 &mut code_view_url,
3713 &mut vcs_url,
3714 &mut extra_data,
3715 );
3716
3717 let (download_url, size, sha256) = root
3718 .get("urls")
3719 .and_then(|value| value.as_array())
3720 .map(|urls| select_pypi_json_artifact(urls))
3721 .unwrap_or((None, None, None));
3722
3723 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3724 normalize_spdx_declared_license(license.as_deref());
3725 let dependencies = info
3726 .get("requires_dist")
3727 .and_then(|value| value.as_array())
3728 .map(|entries| {
3729 entries
3730 .iter()
3731 .filter_map(|entry| entry.as_str().map(ToOwned::to_owned))
3732 .collect::<Vec<_>>()
3733 })
3734 .map(|entries| extract_requires_dist_dependencies(&entries))
3735 .unwrap_or_default();
3736
3737 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
3738 build_pypi_urls(name.as_deref(), version.as_deref());
3739
3740 PackageData {
3741 package_type: Some(PythonParser::PACKAGE_TYPE),
3742 namespace: None,
3743 name,
3744 version,
3745 qualifiers: None,
3746 subpath: None,
3747 primary_language: None,
3748 description,
3749 release_date: None,
3750 parties,
3751 keywords,
3752 homepage_url: homepage_url.or(repository_homepage_url.clone()),
3753 download_url,
3754 size,
3755 sha1: None,
3756 md5: None,
3757 sha256,
3758 sha512: None,
3759 bug_tracking_url,
3760 code_view_url,
3761 vcs_url,
3762 copyright: None,
3763 holder: None,
3764 declared_license_expression,
3765 declared_license_expression_spdx,
3766 license_detections,
3767 other_license_expression: None,
3768 other_license_expression_spdx: None,
3769 other_license_detections: Vec::new(),
3770 extracted_license_statement: license,
3771 notice_text: None,
3772 source_packages: Vec::new(),
3773 file_references: Vec::new(),
3774 is_private: has_private_classifier(&classifiers),
3775 is_virtual: false,
3776 extra_data: if extra_data.is_empty() {
3777 None
3778 } else {
3779 Some(extra_data)
3780 },
3781 dependencies,
3782 repository_homepage_url,
3783 repository_download_url,
3784 api_data_url,
3785 datasource_id: Some(DatasourceId::PypiJson),
3786 purl,
3787 }
3788}
3789
3790fn select_pypi_json_artifact(
3791 urls: &[serde_json::Value],
3792) -> (Option<String>, Option<u64>, Option<String>) {
3793 let selected = urls
3794 .iter()
3795 .find(|entry| entry.get("packagetype").and_then(|value| value.as_str()) == Some("sdist"))
3796 .or_else(|| urls.first());
3797
3798 let Some(entry) = selected else {
3799 return (None, None, None);
3800 };
3801
3802 let download_url = entry
3803 .get("url")
3804 .and_then(|value| value.as_str())
3805 .map(ToOwned::to_owned);
3806 let size = entry.get("size").and_then(|value| value.as_u64());
3807 let sha256 = entry
3808 .get("digests")
3809 .and_then(|value| value.as_object())
3810 .and_then(|digests| digests.get("sha256"))
3811 .and_then(|value| value.as_str())
3812 .map(ToOwned::to_owned);
3813
3814 (download_url, size, sha256)
3815}
3816
3817fn extract_from_pip_inspect(path: &Path) -> PackageData {
3818 let content = match read_file_to_string(path) {
3819 Ok(content) => content,
3820 Err(e) => {
3821 warn!("Failed to read pip-inspect.deplock at {:?}: {}", path, e);
3822 return default_package_data(path);
3823 }
3824 };
3825
3826 let root: serde_json::Value = match serde_json::from_str(&content) {
3827 Ok(value) => value,
3828 Err(e) => {
3829 warn!(
3830 "Failed to parse pip-inspect.deplock JSON at {:?}: {}",
3831 path, e
3832 );
3833 return default_package_data(path);
3834 }
3835 };
3836
3837 let installed = match root.get("installed").and_then(|v| v.as_array()) {
3838 Some(arr) => arr,
3839 None => {
3840 warn!(
3841 "No 'installed' array found in pip-inspect.deplock at {:?}",
3842 path
3843 );
3844 return default_package_data(path);
3845 }
3846 };
3847
3848 let pip_version = root
3849 .get("pip_version")
3850 .and_then(|v| v.as_str())
3851 .map(String::from);
3852 let inspect_version = root
3853 .get("version")
3854 .and_then(|v| v.as_str())
3855 .map(String::from);
3856
3857 let mut main_package: Option<PackageData> = None;
3858 let mut dependencies: Vec<Dependency> = Vec::new();
3859
3860 for package_entry in installed {
3861 let metadata = match package_entry.get("metadata") {
3862 Some(m) => m,
3863 None => continue,
3864 };
3865
3866 let is_requested = package_entry
3867 .get("requested")
3868 .and_then(|v| v.as_bool())
3869 .unwrap_or(false);
3870 let has_direct_url = package_entry.get("direct_url").is_some();
3871
3872 let name = metadata
3873 .get("name")
3874 .and_then(|v| v.as_str())
3875 .map(String::from);
3876 let version = metadata
3877 .get("version")
3878 .and_then(|v| v.as_str())
3879 .map(String::from);
3880 let summary = metadata
3881 .get("summary")
3882 .and_then(|v| v.as_str())
3883 .map(String::from);
3884 let home_page = metadata
3885 .get("home_page")
3886 .and_then(|v| v.as_str())
3887 .map(String::from);
3888 let author = metadata
3889 .get("author")
3890 .and_then(|v| v.as_str())
3891 .map(String::from);
3892 let author_email = metadata
3893 .get("author_email")
3894 .and_then(|v| v.as_str())
3895 .map(String::from);
3896 let license = metadata
3897 .get("license")
3898 .and_then(|v| v.as_str())
3899 .map(String::from);
3900 let description = metadata
3901 .get("description")
3902 .and_then(|v| v.as_str())
3903 .map(String::from);
3904 let keywords = metadata
3905 .get("keywords")
3906 .and_then(|v| v.as_array())
3907 .map(|arr| {
3908 arr.iter()
3909 .filter_map(|k| k.as_str().map(String::from))
3910 .collect::<Vec<_>>()
3911 })
3912 .unwrap_or_default();
3913
3914 let mut parties = Vec::new();
3915 if author.is_some() || author_email.is_some() {
3916 parties.push(Party {
3917 r#type: Some("person".to_string()),
3918 role: Some("author".to_string()),
3919 name: author,
3920 email: author_email,
3921 url: None,
3922 organization: None,
3923 organization_url: None,
3924 timezone: None,
3925 });
3926 }
3927
3928 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
3929 normalize_spdx_declared_license(license.as_deref());
3930 let extracted_license_statement = license.clone();
3931 let requires_dist = metadata
3932 .get("requires_dist")
3933 .and_then(|v| v.as_array())
3934 .map(|entries| {
3935 entries
3936 .iter()
3937 .filter_map(|entry| entry.as_str().map(ToOwned::to_owned))
3938 .collect::<Vec<_>>()
3939 })
3940 .unwrap_or_default();
3941 let parsed_dependencies = extract_requires_dist_dependencies(&requires_dist);
3942
3943 let purl = name.as_ref().and_then(|n| {
3944 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
3945 if let Some(v) = &version {
3946 package_url.with_version(v).ok()?;
3947 }
3948 Some(package_url.to_string())
3949 });
3950
3951 if is_requested && has_direct_url {
3952 let mut extra_data = HashMap::new();
3953 if let Some(pv) = &pip_version {
3954 extra_data.insert(
3955 "pip_version".to_string(),
3956 serde_json::Value::String(pv.clone()),
3957 );
3958 }
3959 if let Some(iv) = &inspect_version {
3960 extra_data.insert(
3961 "inspect_version".to_string(),
3962 serde_json::Value::String(iv.clone()),
3963 );
3964 }
3965
3966 main_package = Some(PackageData {
3967 package_type: Some(PythonParser::PACKAGE_TYPE),
3968 namespace: None,
3969 name,
3970 version,
3971 qualifiers: None,
3972 subpath: None,
3973 primary_language: Some("Python".to_string()),
3974 description: description.or(summary),
3975 release_date: None,
3976 parties,
3977 keywords,
3978 homepage_url: home_page,
3979 download_url: None,
3980 size: None,
3981 sha1: None,
3982 md5: None,
3983 sha256: None,
3984 sha512: None,
3985 bug_tracking_url: None,
3986 code_view_url: None,
3987 vcs_url: None,
3988 copyright: None,
3989 holder: None,
3990 declared_license_expression,
3991 declared_license_expression_spdx,
3992 license_detections,
3993 other_license_expression: None,
3994 other_license_expression_spdx: None,
3995 other_license_detections: Vec::new(),
3996 extracted_license_statement,
3997 notice_text: None,
3998 source_packages: Vec::new(),
3999 file_references: Vec::new(),
4000 is_private: false,
4001 is_virtual: true,
4002 extra_data: if extra_data.is_empty() {
4003 None
4004 } else {
4005 Some(extra_data)
4006 },
4007 dependencies: parsed_dependencies,
4008 repository_homepage_url: None,
4009 repository_download_url: None,
4010 api_data_url: None,
4011 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4012 purl,
4013 });
4014 } else {
4015 let resolved_package = PackageData {
4016 package_type: Some(PythonParser::PACKAGE_TYPE),
4017 namespace: None,
4018 name: name.clone(),
4019 version: version.clone(),
4020 qualifiers: None,
4021 subpath: None,
4022 primary_language: Some("Python".to_string()),
4023 description: description.or(summary),
4024 release_date: None,
4025 parties,
4026 keywords,
4027 homepage_url: home_page,
4028 download_url: None,
4029 size: None,
4030 sha1: None,
4031 md5: None,
4032 sha256: None,
4033 sha512: None,
4034 bug_tracking_url: None,
4035 code_view_url: None,
4036 vcs_url: None,
4037 copyright: None,
4038 holder: None,
4039 declared_license_expression,
4040 declared_license_expression_spdx,
4041 license_detections,
4042 other_license_expression: None,
4043 other_license_expression_spdx: None,
4044 other_license_detections: Vec::new(),
4045 extracted_license_statement,
4046 notice_text: None,
4047 source_packages: Vec::new(),
4048 file_references: Vec::new(),
4049 is_private: false,
4050 is_virtual: true,
4051 extra_data: None,
4052 dependencies: parsed_dependencies,
4053 repository_homepage_url: None,
4054 repository_download_url: None,
4055 api_data_url: None,
4056 datasource_id: Some(DatasourceId::PypiInspectDeplock),
4057 purl: purl.clone(),
4058 };
4059
4060 let resolved = package_data_to_resolved(&resolved_package);
4061 dependencies.push(Dependency {
4062 purl,
4063 extracted_requirement: None,
4064 scope: None,
4065 is_runtime: Some(true),
4066 is_optional: Some(false),
4067 is_pinned: Some(true),
4068 is_direct: Some(is_requested),
4069 resolved_package: Some(Box::new(resolved)),
4070 extra_data: None,
4071 });
4072 }
4073 }
4074
4075 if let Some(mut main_pkg) = main_package {
4076 let direct_requirement_purls: HashSet<String> = main_pkg
4077 .dependencies
4078 .iter()
4079 .filter_map(|dep| dep.purl.as_deref().map(base_dependency_purl))
4080 .collect();
4081
4082 let resolved_requirement_purls: HashSet<String> = dependencies
4083 .iter()
4084 .filter_map(|dep| dep.purl.as_deref().map(base_dependency_purl))
4085 .collect();
4086
4087 let unresolved_dependencies = main_pkg
4088 .dependencies
4089 .iter()
4090 .filter(|dep| {
4091 dep.purl.as_ref().is_some_and(|purl| {
4092 !resolved_requirement_purls.contains(&base_dependency_purl(purl))
4093 })
4094 })
4095 .cloned()
4096 .collect::<Vec<_>>();
4097
4098 for dependency in &mut dependencies {
4099 if dependency
4100 .purl
4101 .as_ref()
4102 .is_some_and(|purl| direct_requirement_purls.contains(&base_dependency_purl(purl)))
4103 {
4104 dependency.is_direct = Some(true);
4105 }
4106 }
4107
4108 main_pkg.dependencies = dependencies;
4109 main_pkg.dependencies.extend(unresolved_dependencies);
4110 main_pkg
4111 } else {
4112 default_package_data(path)
4113 }
4114}
4115
4116fn base_dependency_purl(purl: &str) -> String {
4117 purl.split_once('@')
4118 .map(|(base, _)| base.to_string())
4119 .unwrap_or_else(|| purl.to_string())
4120}
4121
4122type IniSections = HashMap<String, HashMap<String, Vec<String>>>;
4123
4124fn extract_from_setup_cfg(path: &Path) -> PackageData {
4125 let content = match read_file_to_string(path) {
4126 Ok(content) => content,
4127 Err(e) => {
4128 warn!("Failed to read setup.cfg at {:?}: {}", path, e);
4129 return default_package_data(path);
4130 }
4131 };
4132
4133 let sections = parse_setup_cfg(&content);
4134 let name = get_ini_value(§ions, "metadata", "name");
4135 let version = get_ini_value(§ions, "metadata", "version");
4136 let description = get_ini_value(§ions, "metadata", "description");
4137 let author = get_ini_value(§ions, "metadata", "author");
4138 let author_email = get_ini_value(§ions, "metadata", "author_email");
4139 let maintainer = get_ini_value(§ions, "metadata", "maintainer");
4140 let maintainer_email = get_ini_value(§ions, "metadata", "maintainer_email");
4141 let license = get_ini_value(§ions, "metadata", "license");
4142 let mut homepage_url = get_ini_value(§ions, "metadata", "url");
4143 let classifiers = get_ini_values(§ions, "metadata", "classifiers");
4144 let keywords = parse_setup_cfg_keywords(get_ini_value(§ions, "metadata", "keywords"));
4145 let python_requires = get_ini_value(§ions, "options", "python_requires");
4146 let parsed_project_urls =
4147 parse_setup_cfg_project_urls(&get_ini_values(§ions, "metadata", "project_urls"));
4148 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
4149 let mut extra_data = HashMap::new();
4150
4151 let mut parties = Vec::new();
4152 if author.is_some() || author_email.is_some() {
4153 parties.push(Party {
4154 r#type: Some("person".to_string()),
4155 role: Some("author".to_string()),
4156 name: author,
4157 email: author_email,
4158 url: None,
4159 organization: None,
4160 organization_url: None,
4161 timezone: None,
4162 });
4163 }
4164
4165 if maintainer.is_some() || maintainer_email.is_some() {
4166 parties.push(Party {
4167 r#type: Some("person".to_string()),
4168 role: Some("maintainer".to_string()),
4169 name: maintainer,
4170 email: maintainer_email,
4171 url: None,
4172 organization: None,
4173 organization_url: None,
4174 timezone: None,
4175 });
4176 }
4177
4178 let declared_license_expression = None;
4179 let declared_license_expression_spdx = None;
4180 let license_detections = Vec::new();
4181 let extracted_license_statement = license.clone();
4182
4183 let dependencies = extract_setup_cfg_dependencies(§ions);
4184
4185 if let Some(value) = python_requires {
4186 extra_data.insert(
4187 "python_requires".to_string(),
4188 serde_json::Value::String(value),
4189 );
4190 }
4191
4192 apply_project_url_mappings(
4193 &parsed_project_urls,
4194 &mut homepage_url,
4195 &mut bug_tracking_url,
4196 &mut code_view_url,
4197 &mut vcs_url,
4198 &mut extra_data,
4199 );
4200
4201 let extra_data = if extra_data.is_empty() {
4202 None
4203 } else {
4204 Some(extra_data)
4205 };
4206
4207 let purl = name.as_ref().and_then(|n| {
4208 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
4209 if let Some(v) = &version {
4210 package_url.with_version(v).ok()?;
4211 }
4212 Some(package_url.to_string())
4213 });
4214
4215 PackageData {
4216 package_type: Some(PythonParser::PACKAGE_TYPE),
4217 namespace: None,
4218 name,
4219 version,
4220 qualifiers: None,
4221 subpath: None,
4222 primary_language: Some("Python".to_string()),
4223 description,
4224 release_date: None,
4225 parties,
4226 keywords,
4227 homepage_url,
4228 download_url: None,
4229 size: None,
4230 sha1: None,
4231 md5: None,
4232 sha256: None,
4233 sha512: None,
4234 bug_tracking_url,
4235 code_view_url,
4236 vcs_url,
4237 copyright: None,
4238 holder: None,
4239 declared_license_expression,
4240 declared_license_expression_spdx,
4241 license_detections,
4242 other_license_expression: None,
4243 other_license_expression_spdx: None,
4244 other_license_detections: Vec::new(),
4245 extracted_license_statement,
4246 notice_text: None,
4247 source_packages: Vec::new(),
4248 file_references: Vec::new(),
4249 is_private: has_private_classifier(&classifiers),
4250 is_virtual: false,
4251 extra_data,
4252 dependencies,
4253 repository_homepage_url: None,
4254 repository_download_url: None,
4255 api_data_url: None,
4256 datasource_id: Some(DatasourceId::PypiSetupCfg),
4257 purl,
4258 }
4259}
4260
4261fn parse_setup_cfg_keywords(value: Option<String>) -> Vec<String> {
4262 let Some(keywords) = value else {
4263 return Vec::new();
4264 };
4265
4266 keywords
4267 .split(',')
4268 .map(str::trim)
4269 .filter(|keyword| !keyword.is_empty())
4270 .map(ToOwned::to_owned)
4271 .collect()
4272}
4273
4274fn parse_setup_cfg_project_urls(entries: &[String]) -> Vec<(String, String)> {
4275 entries
4276 .iter()
4277 .filter_map(|entry| {
4278 let (label, url) = entry.split_once('=')?;
4279 let label = label.trim();
4280 let url = url.trim();
4281 if label.is_empty() || url.is_empty() {
4282 None
4283 } else {
4284 Some((label.to_string(), url.to_string()))
4285 }
4286 })
4287 .collect()
4288}
4289
4290fn apply_project_url_mappings(
4291 parsed_urls: &[(String, String)],
4292 homepage_url: &mut Option<String>,
4293 bug_tracking_url: &mut Option<String>,
4294 code_view_url: &mut Option<String>,
4295 vcs_url: &mut Option<String>,
4296 extra_data: &mut HashMap<String, serde_json::Value>,
4297) {
4298 for (label, url) in parsed_urls {
4299 let label_lower = label.to_lowercase();
4300
4301 if bug_tracking_url.is_none()
4302 && matches!(
4303 label_lower.as_str(),
4304 "tracker"
4305 | "bug reports"
4306 | "bug tracker"
4307 | "issues"
4308 | "issue tracker"
4309 | "github: issues"
4310 )
4311 {
4312 *bug_tracking_url = Some(url.clone());
4313 } else if code_view_url.is_none()
4314 && matches!(label_lower.as_str(), "source" | "source code" | "code")
4315 {
4316 *code_view_url = Some(url.clone());
4317 } else if vcs_url.is_none()
4318 && matches!(
4319 label_lower.as_str(),
4320 "github" | "gitlab" | "github: repo" | "repository"
4321 )
4322 {
4323 *vcs_url = Some(url.clone());
4324 } else if homepage_url.is_none()
4325 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
4326 {
4327 *homepage_url = Some(url.clone());
4328 } else if label_lower == "changelog" {
4329 extra_data.insert(
4330 "changelog_url".to_string(),
4331 serde_json::Value::String(url.clone()),
4332 );
4333 }
4334 }
4335
4336 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
4337 .iter()
4338 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
4339 .collect();
4340
4341 if !project_urls_json.is_empty() {
4342 extra_data.insert(
4343 "project_urls".to_string(),
4344 serde_json::Value::Object(project_urls_json),
4345 );
4346 }
4347}
4348
4349fn parse_setup_cfg(content: &str) -> IniSections {
4350 let mut sections: IniSections = HashMap::new();
4351 let mut current_section: Option<String> = None;
4352 let mut current_key: Option<String> = None;
4353
4354 for raw_line in content.lines() {
4355 let line = raw_line.trim_end_matches('\r');
4356 let trimmed = line.trim();
4357 if trimmed.is_empty() {
4358 continue;
4359 }
4360
4361 let stripped = line.trim_start();
4362 if stripped.starts_with('#') || stripped.starts_with(';') {
4363 continue;
4364 }
4365
4366 if stripped.starts_with('[') && stripped.ends_with(']') {
4367 let section_name = stripped
4368 .trim_start_matches('[')
4369 .trim_end_matches(']')
4370 .trim()
4371 .to_ascii_lowercase();
4372 current_section = if section_name.is_empty() {
4373 None
4374 } else {
4375 Some(section_name)
4376 };
4377 current_key = None;
4378 continue;
4379 }
4380
4381 if (line.starts_with(' ') || line.starts_with('\t')) && current_key.is_some() {
4382 if let (Some(section), Some(key)) = (current_section.as_ref(), current_key.as_ref()) {
4383 let value = stripped.trim();
4384 if !value.is_empty() {
4385 sections
4386 .entry(section.clone())
4387 .or_default()
4388 .entry(key.clone())
4389 .or_default()
4390 .push(value.to_string());
4391 }
4392 }
4393 continue;
4394 }
4395
4396 if let Some((key, value)) = stripped.split_once('=')
4397 && let Some(section) = current_section.as_ref()
4398 {
4399 let key_name = key.trim().to_ascii_lowercase();
4400 let value_trimmed = value.trim();
4401 let entry = sections
4402 .entry(section.clone())
4403 .or_default()
4404 .entry(key_name.clone())
4405 .or_default();
4406 if !value_trimmed.is_empty() {
4407 entry.push(value_trimmed.to_string());
4408 }
4409 current_key = Some(key_name);
4410 }
4411 }
4412
4413 sections
4414}
4415
4416fn get_ini_value(sections: &IniSections, section: &str, key: &str) -> Option<String> {
4417 sections
4418 .get(§ion.to_ascii_lowercase())
4419 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4420 .and_then(|entries| entries.first())
4421 .map(|value| value.trim().to_string())
4422}
4423
4424fn get_ini_values(sections: &IniSections, section: &str, key: &str) -> Vec<String> {
4425 sections
4426 .get(§ion.to_ascii_lowercase())
4427 .and_then(|values| values.get(&key.to_ascii_lowercase()))
4428 .cloned()
4429 .unwrap_or_default()
4430}
4431
4432fn extract_setup_cfg_dependencies(sections: &IniSections) -> Vec<Dependency> {
4433 let mut dependencies = Vec::new();
4434
4435 for (sub_section, scope) in [
4436 ("install_requires", "install"),
4437 ("tests_require", "test"),
4438 ("setup_requires", "setup"),
4439 ] {
4440 let reqs = get_ini_values(sections, "options", sub_section);
4441 dependencies.extend(parse_setup_cfg_requirements(&reqs, scope, false));
4442 }
4443
4444 if let Some(extras) = sections.get("options.extras_require") {
4445 let mut extra_items: Vec<_> = extras.iter().collect();
4446 extra_items.sort_by_key(|(name, _)| *name);
4447 for (extra_name, reqs) in extra_items {
4448 dependencies.extend(parse_setup_cfg_requirements(reqs, extra_name, true));
4449 }
4450 }
4451
4452 dependencies
4453}
4454
4455fn parse_setup_cfg_requirements(
4456 reqs: &[String],
4457 scope: &str,
4458 is_optional: bool,
4459) -> Vec<Dependency> {
4460 reqs.iter()
4461 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
4462 .collect()
4463}
4464
4465fn build_setup_cfg_dependency(req: &str, scope: &str, is_optional: bool) -> Option<Dependency> {
4466 let trimmed = req.trim();
4467 if trimmed.is_empty() || trimmed.starts_with('#') {
4468 return None;
4469 }
4470
4471 let name = extract_setup_cfg_dependency_name(trimmed)?;
4472 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4473
4474 Some(Dependency {
4475 purl: Some(purl.to_string()),
4476 extracted_requirement: Some(normalize_setup_cfg_requirement(trimmed)),
4477 scope: Some(scope.to_string()),
4478 is_runtime: Some(true),
4479 is_optional: Some(is_optional),
4480 is_pinned: Some(false),
4481 is_direct: Some(true),
4482 resolved_package: None,
4483 extra_data: None,
4484 })
4485}
4486
4487fn extract_setup_cfg_dependency_name(req: &str) -> Option<String> {
4488 let trimmed = req.trim();
4489 if trimmed.is_empty() {
4490 return None;
4491 }
4492
4493 let end = trimmed
4494 .find(|c: char| c.is_whitespace() || matches!(c, '<' | '>' | '=' | '!' | '~' | ';' | '['))
4495 .unwrap_or(trimmed.len());
4496 let name = trimmed[..end].trim();
4497 if name.is_empty() {
4498 None
4499 } else {
4500 Some(name.to_string())
4501 }
4502}
4503
4504fn normalize_setup_cfg_requirement(req: &str) -> String {
4505 req.chars().filter(|c| !c.is_whitespace()).collect()
4506}
4507
4508fn extract_setup_value(content: &str, key: &str) -> Option<String> {
4509 let patterns = vec![
4510 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
4519
4520 for pattern in patterns {
4521 if let Some(start_idx) = content.find(&pattern) {
4522 let value_start = start_idx + pattern.len();
4523 let remaining = &content[value_start..];
4524
4525 if let Some(end_idx) = remaining.find(['"', '\'']) {
4526 return Some(remaining[..end_idx].to_string());
4527 }
4528 }
4529 }
4530
4531 None
4532}
4533
4534fn extract_setup_py_dependencies(content: &str) -> Vec<Dependency> {
4535 let mut dependencies = Vec::new();
4536
4537 if let Some(tests_deps) = extract_tests_require(content) {
4538 dependencies.extend(tests_deps);
4539 }
4540
4541 if let Some(extras_deps) = extract_extras_require(content) {
4542 dependencies.extend(extras_deps);
4543 }
4544
4545 dependencies
4546}
4547
4548fn extract_tests_require(content: &str) -> Option<Vec<Dependency>> {
4549 let pattern = r"tests_require\s*=\s*\[([^\]]+)\]";
4550 let re = Regex::new(pattern).ok()?;
4551 let captures = re.captures(content)?;
4552 let deps_str = captures.get(1)?.as_str();
4553
4554 let deps = parse_setup_py_dep_list(deps_str, "test", true);
4555 if deps.is_empty() { None } else { Some(deps) }
4556}
4557
4558fn extract_extras_require(content: &str) -> Option<Vec<Dependency>> {
4559 let pattern = r"extras_require\s*=\s*\{([^}]+)\}";
4560 let re = Regex::new(pattern).ok()?;
4561 let captures = re.captures(content)?;
4562 let dict_content = captures.get(1)?.as_str();
4563
4564 let mut all_deps = Vec::new();
4565
4566 let entry_pattern = r#"['"]([^'"]+)['"]\s*:\s*\[([^\]]+)\]"#;
4567 let entry_re = Regex::new(entry_pattern).ok()?;
4568
4569 for entry_cap in entry_re.captures_iter(dict_content) {
4570 if let (Some(extra_name), Some(deps_str)) = (entry_cap.get(1), entry_cap.get(2)) {
4571 let deps = parse_setup_py_dep_list(deps_str.as_str(), extra_name.as_str(), true);
4572 all_deps.extend(deps);
4573 }
4574 }
4575
4576 if all_deps.is_empty() {
4577 None
4578 } else {
4579 Some(all_deps)
4580 }
4581}
4582
4583fn parse_setup_py_dep_list(deps_str: &str, scope: &str, is_optional: bool) -> Vec<Dependency> {
4584 let dep_pattern = r#"['"]([^'"]+)['"]"#;
4585 let re = match Regex::new(dep_pattern) {
4586 Ok(r) => r,
4587 Err(_) => return Vec::new(),
4588 };
4589
4590 re.captures_iter(deps_str)
4591 .filter_map(|cap| {
4592 let dep_str = cap.get(1)?.as_str().trim();
4593 if dep_str.is_empty() {
4594 return None;
4595 }
4596
4597 let name = extract_setup_cfg_dependency_name(dep_str)?;
4598 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
4599
4600 Some(Dependency {
4601 purl: Some(purl.to_string()),
4602 extracted_requirement: Some(dep_str.to_string()),
4603 scope: Some(scope.to_string()),
4604 is_runtime: Some(true),
4605 is_optional: Some(is_optional),
4606 is_pinned: Some(false),
4607 is_direct: Some(true),
4608 resolved_package: None,
4609 extra_data: None,
4610 })
4611 })
4612 .collect()
4613}
4614
4615pub(crate) fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
4617 let content = read_file_to_string(path).map_err(|e| e.to_string())?;
4618 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
4619}
4620
4621fn calculate_file_checksums(path: &Path) -> (Option<u64>, Option<String>) {
4632 let mut file = match File::open(path) {
4633 Ok(f) => f,
4634 Err(_) => return (None, None),
4635 };
4636
4637 let metadata = match file.metadata() {
4638 Ok(m) => m,
4639 Err(_) => return (None, None),
4640 };
4641 let size = metadata.len();
4642
4643 let mut hasher = Sha256::new();
4644 let mut buffer = vec![0; 8192];
4645
4646 loop {
4647 match file.read(&mut buffer) {
4648 Ok(0) => break,
4649 Ok(n) => hasher.update(&buffer[..n]),
4650 Err(_) => return (Some(size), None),
4651 }
4652 }
4653
4654 let hash = hex::encode(hasher.finalize());
4655 (Some(size), Some(hash))
4656}
4657
4658fn default_package_data(path: &Path) -> PackageData {
4659 PackageData {
4660 package_type: Some(PythonParser::PACKAGE_TYPE),
4661 primary_language: Some("Python".to_string()),
4662 datasource_id: infer_python_datasource_id(path),
4663 ..Default::default()
4664 }
4665}
4666
4667fn infer_python_datasource_id(path: &Path) -> Option<DatasourceId> {
4668 let file_name = path.file_name().and_then(|name| name.to_str());
4669
4670 match file_name {
4671 Some("pyproject.toml") => Some(DatasourceId::PypiPyprojectToml),
4672 Some("setup.py") => Some(DatasourceId::PypiSetupPy),
4673 Some("setup.cfg") => Some(DatasourceId::PypiSetupCfg),
4674 Some("PKG-INFO") => Some(DatasourceId::PypiSdistPkginfo),
4675 Some("METADATA") => Some(DatasourceId::PypiWheelMetadata),
4676 Some("pypi.json") => Some(DatasourceId::PypiJson),
4677 Some("pip-inspect.deplock") => Some(DatasourceId::PypiInspectDeplock),
4678 Some("origin.json") if is_pip_cache_origin_json(path) => {
4679 Some(DatasourceId::PypiPipOriginJson)
4680 }
4681 _ if is_python_sdist_archive_path(path) => Some(DatasourceId::PypiSdistPkginfo),
4682 _ if path
4683 .extension()
4684 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) =>
4685 {
4686 Some(DatasourceId::PypiWheel)
4687 }
4688 _ if path
4689 .extension()
4690 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg")) =>
4691 {
4692 Some(DatasourceId::PypiEgg)
4693 }
4694 _ => None,
4695 }
4696}
4697
4698crate::register_parser!(
4699 "Python package manifests (pyproject.toml, setup.py, setup.cfg, pypi.json, PKG-INFO, METADATA, pip cache origin.json, sdist archives, .whl, .egg)",
4700 &[
4701 "**/pyproject.toml",
4702 "**/setup.py",
4703 "**/setup.cfg",
4704 "**/pypi.json",
4705 "**/PKG-INFO",
4706 "**/METADATA",
4707 "**/origin.json",
4708 "**/*.tar.gz",
4709 "**/*.tgz",
4710 "**/*.tar.bz2",
4711 "**/*.tar.xz",
4712 "**/*.zip",
4713 "**/*.whl",
4714 "**/*.egg"
4715 ],
4716 "pypi",
4717 "Python",
4718 Some("https://packaging.python.org/"),
4719);