1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parsers::utils::{read_file_to_string, split_name_email};
36use base64::Engine;
37use base64::engine::general_purpose::URL_SAFE_NO_PAD;
38use csv::ReaderBuilder;
39use log::warn;
40use packageurl::PackageUrl;
41use regex::Regex;
42use rustpython_parser::{Parse, ast};
43use serde_json::{Map as JsonMap, Value as JsonValue};
44use sha2::{Digest, Sha256};
45use std::collections::{HashMap, HashSet};
46use std::fs::File;
47use std::io::Read;
48use std::path::{Path, PathBuf};
49use toml::Value as TomlValue;
50use toml::map::Map as TomlMap;
51use zip::ZipArchive;
52
53use super::PackageParser;
54
55const FIELD_PROJECT: &str = "project";
57const FIELD_NAME: &str = "name";
58const FIELD_VERSION: &str = "version";
59const FIELD_LICENSE: &str = "license";
60const FIELD_AUTHORS: &str = "authors";
61const FIELD_MAINTAINERS: &str = "maintainers";
62const FIELD_URLS: &str = "urls";
63const FIELD_HOMEPAGE: &str = "homepage";
64const FIELD_REPOSITORY: &str = "repository";
65const FIELD_DEPENDENCIES: &str = "dependencies";
66const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
67const FIELD_DEPENDENCY_GROUPS: &str = "dependency-groups";
68const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
69const MAX_SETUP_PY_BYTES: usize = 1_048_576;
70const MAX_SETUP_PY_AST_NODES: usize = 10_000;
71const MAX_SETUP_PY_AST_DEPTH: usize = 50;
72const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct PythonParser;
86
87impl PackageParser for PythonParser {
88 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
89
90 fn extract_packages(path: &Path) -> Vec<PackageData> {
91 vec![
92 if path.file_name().unwrap_or_default() == "pyproject.toml" {
93 extract_from_pyproject_toml(path)
94 } else if path.file_name().unwrap_or_default() == "setup.cfg" {
95 extract_from_setup_cfg(path)
96 } else if path.file_name().unwrap_or_default() == "setup.py" {
97 extract_from_setup_py(path)
98 } else if path.file_name().unwrap_or_default() == "PKG-INFO" {
99 extract_from_rfc822_metadata(path, DatasourceId::PypiSdistPkginfo)
100 } else if path.file_name().unwrap_or_default() == "METADATA" {
101 extract_from_rfc822_metadata(path, DatasourceId::PypiWheelMetadata)
102 } else if is_pip_cache_origin_json(path) {
103 extract_from_pip_origin_json(path)
104 } else if path.file_name().unwrap_or_default() == "pypi.json" {
105 extract_from_pypi_json(path)
106 } else if path.file_name().unwrap_or_default() == "pip-inspect.deplock" {
107 extract_from_pip_inspect(path)
108 } else if path
109 .extension()
110 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
111 {
112 extract_from_wheel_archive(path)
113 } else if path
114 .extension()
115 .is_some_and(|ext| ext.eq_ignore_ascii_case("egg"))
116 {
117 extract_from_egg_archive(path)
118 } else {
119 default_package_data()
120 },
121 ]
122 }
123
124 fn is_match(path: &Path) -> bool {
125 if let Some(filename) = path.file_name()
126 && (filename == "pyproject.toml"
127 || filename == "setup.cfg"
128 || filename == "setup.py"
129 || filename == "PKG-INFO"
130 || filename == "METADATA"
131 || filename == "pypi.json"
132 || filename == "pip-inspect.deplock"
133 || is_pip_cache_origin_json(path))
134 {
135 return true;
136 }
137
138 if let Some(extension) = path.extension() {
139 let ext = extension.to_string_lossy().to_lowercase();
140 if ext == "whl" || ext == "egg" {
141 return true;
142 }
143 }
144
145 false
146 }
147}
148
149#[derive(Debug, Clone)]
150struct InstalledWheelMetadata {
151 wheel_tags: Vec<String>,
152 wheel_version: Option<String>,
153 wheel_generator: Option<String>,
154 root_is_purelib: Option<bool>,
155 compressed_tag: Option<String>,
156}
157
158fn merge_sibling_wheel_metadata(path: &Path, package_data: &mut PackageData) {
159 let Some(parent) = path.parent() else {
160 return;
161 };
162
163 if !parent
164 .file_name()
165 .and_then(|name| name.to_str())
166 .is_some_and(|name| name.ends_with(".dist-info"))
167 {
168 return;
169 }
170
171 let wheel_path = parent.join("WHEEL");
172 if !wheel_path.exists() {
173 return;
174 }
175
176 let Ok(content) = read_file_to_string(&wheel_path) else {
177 warn!("Failed to read sibling WHEEL file at {:?}", wheel_path);
178 return;
179 };
180
181 let Some(wheel_metadata) = parse_installed_wheel_metadata(&content) else {
182 return;
183 };
184
185 apply_installed_wheel_metadata(package_data, &wheel_metadata);
186}
187
188fn parse_installed_wheel_metadata(content: &str) -> Option<InstalledWheelMetadata> {
189 use super::rfc822::{get_header_all, get_header_first};
190
191 let metadata = super::rfc822::parse_rfc822_content(content);
192 let wheel_tags = get_header_all(&metadata.headers, "tag");
193 if wheel_tags.is_empty() {
194 return None;
195 }
196
197 let wheel_version = get_header_first(&metadata.headers, "wheel-version");
198 let wheel_generator = get_header_first(&metadata.headers, "generator");
199 let root_is_purelib =
200 get_header_first(&metadata.headers, "root-is-purelib").and_then(|value| {
201 match value.to_ascii_lowercase().as_str() {
202 "true" => Some(true),
203 "false" => Some(false),
204 _ => None,
205 }
206 });
207
208 let compressed_tag = compress_wheel_tags(&wheel_tags);
209
210 Some(InstalledWheelMetadata {
211 wheel_tags,
212 wheel_version,
213 wheel_generator,
214 root_is_purelib,
215 compressed_tag,
216 })
217}
218
219fn compress_wheel_tags(tags: &[String]) -> Option<String> {
220 if tags.is_empty() {
221 return None;
222 }
223
224 if tags.len() == 1 {
225 return Some(tags[0].clone());
226 }
227
228 let mut python_tags = Vec::new();
229 let mut abi_tag: Option<&str> = None;
230 let mut platform_tag: Option<&str> = None;
231
232 for tag in tags {
233 let mut parts = tag.splitn(3, '-');
234 let python = parts.next()?;
235 let abi = parts.next()?;
236 let platform = parts.next()?;
237
238 if abi_tag.is_some_and(|existing| existing != abi)
239 || platform_tag.is_some_and(|existing| existing != platform)
240 {
241 return None;
242 }
243
244 abi_tag = Some(abi);
245 platform_tag = Some(platform);
246 python_tags.push(python.to_string());
247 }
248
249 Some(format!(
250 "{}-{}-{}",
251 python_tags.join("."),
252 abi_tag?,
253 platform_tag?
254 ))
255}
256
257fn apply_installed_wheel_metadata(
258 package_data: &mut PackageData,
259 wheel_metadata: &InstalledWheelMetadata,
260) {
261 let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
262 extra_data.insert(
263 "wheel_tags".to_string(),
264 JsonValue::Array(
265 wheel_metadata
266 .wheel_tags
267 .iter()
268 .cloned()
269 .map(JsonValue::String)
270 .collect(),
271 ),
272 );
273
274 if let Some(wheel_version) = &wheel_metadata.wheel_version {
275 extra_data.insert(
276 "wheel_version".to_string(),
277 JsonValue::String(wheel_version.clone()),
278 );
279 }
280
281 if let Some(wheel_generator) = &wheel_metadata.wheel_generator {
282 extra_data.insert(
283 "wheel_generator".to_string(),
284 JsonValue::String(wheel_generator.clone()),
285 );
286 }
287
288 if let Some(root_is_purelib) = wheel_metadata.root_is_purelib {
289 extra_data.insert(
290 "root_is_purelib".to_string(),
291 JsonValue::Bool(root_is_purelib),
292 );
293 }
294
295 if let (Some(name), Some(version), Some(extension)) = (
296 package_data.name.as_deref(),
297 package_data.version.as_deref(),
298 wheel_metadata.compressed_tag.as_deref(),
299 ) {
300 package_data.purl = build_pypi_purl_with_extension(name, Some(version), extension);
301 }
302}
303
304fn is_pip_cache_origin_json(path: &Path) -> bool {
305 path.file_name().and_then(|name| name.to_str()) == Some("origin.json")
306 && path.ancestors().skip(1).any(|ancestor| {
307 ancestor
308 .file_name()
309 .and_then(|name| name.to_str())
310 .is_some_and(|name| name.eq_ignore_ascii_case("wheels"))
311 })
312}
313
314fn extract_from_pip_origin_json(path: &Path) -> PackageData {
315 let content = match read_file_to_string(path) {
316 Ok(content) => content,
317 Err(e) => {
318 warn!("Failed to read pip cache origin.json at {:?}: {}", path, e);
319 return default_package_data();
320 }
321 };
322
323 let root: JsonValue = match serde_json::from_str(&content) {
324 Ok(root) => root,
325 Err(e) => {
326 warn!("Failed to parse pip cache origin.json at {:?}: {}", path, e);
327 return default_package_data();
328 }
329 };
330
331 let Some(download_url) = root.get("url").and_then(|value| value.as_str()) else {
332 warn!("No url found in pip cache origin.json at {:?}", path);
333 return default_package_data();
334 };
335
336 let sibling_wheel = find_sibling_cached_wheel(path);
337 let name_version = parse_name_version_from_origin_url(download_url).or_else(|| {
338 sibling_wheel
339 .as_ref()
340 .map(|wheel_info| (wheel_info.name.clone(), wheel_info.version.clone()))
341 });
342
343 let Some((name, version)) = name_version else {
344 warn!(
345 "Failed to infer package name/version from pip cache origin.json at {:?}",
346 path
347 );
348 return default_package_data();
349 };
350
351 let (repository_homepage_url, repository_download_url, api_data_url, plain_purl) =
352 build_pypi_urls(Some(&name), Some(&version));
353 let purl = sibling_wheel
354 .as_ref()
355 .and_then(|wheel_info| build_wheel_purl(Some(&name), Some(&version), wheel_info))
356 .or(plain_purl);
357
358 PackageData {
359 package_type: Some(PythonParser::PACKAGE_TYPE),
360 primary_language: Some("Python".to_string()),
361 name: Some(name),
362 version: Some(version),
363 datasource_id: Some(DatasourceId::PypiPipOriginJson),
364 download_url: Some(download_url.to_string()),
365 sha256: extract_sha256_from_origin_json(&root),
366 repository_homepage_url,
367 repository_download_url,
368 api_data_url,
369 purl,
370 ..Default::default()
371 }
372}
373
374fn find_sibling_cached_wheel(path: &Path) -> Option<WheelInfo> {
375 let parent = path.parent()?;
376 let entries = parent.read_dir().ok()?;
377
378 for entry in entries.flatten() {
379 let sibling_path = entry.path();
380 if sibling_path
381 .extension()
382 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
383 && let Some(wheel_info) = parse_wheel_filename(&sibling_path)
384 {
385 return Some(wheel_info);
386 }
387 }
388
389 None
390}
391
392fn parse_name_version_from_origin_url(url: &str) -> Option<(String, String)> {
393 let file_name = url.rsplit('/').next()?;
394
395 if file_name.ends_with(".whl") {
396 return parse_wheel_filename(Path::new(file_name))
397 .map(|wheel_info| (wheel_info.name, wheel_info.version));
398 }
399
400 let stem = strip_python_archive_extension(file_name)?;
401 let (name, version) = stem.rsplit_once('-')?;
402 if name.is_empty() || version.is_empty() {
403 return None;
404 }
405
406 Some((name.replace('_', "-"), version.to_string()))
407}
408
409fn strip_python_archive_extension(file_name: &str) -> Option<&str> {
410 [".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".zip", ".whl"]
411 .iter()
412 .find_map(|suffix| file_name.strip_suffix(suffix))
413}
414
415fn extract_sha256_from_origin_json(root: &JsonValue) -> Option<String> {
416 root.pointer("/archive_info/hashes/sha256")
417 .and_then(|value| value.as_str())
418 .map(ToOwned::to_owned)
419 .or_else(|| {
420 root.pointer("/archive_info/hash")
421 .and_then(|value| value.as_str())
422 .and_then(normalize_origin_hash)
423 })
424}
425
426fn normalize_origin_hash(hash: &str) -> Option<String> {
427 if let Some(value) = hash.strip_prefix("sha256=") {
428 return Some(value.to_string());
429 }
430 if let Some(value) = hash.strip_prefix("sha256:") {
431 return Some(value.to_string());
432 }
433 if hash.len() == 64 && hash.chars().all(|ch| ch.is_ascii_hexdigit()) {
434 return Some(hash.to_string());
435 }
436 None
437}
438
439fn extract_from_rfc822_metadata(path: &Path, datasource_id: DatasourceId) -> PackageData {
440 let content = match read_file_to_string(path) {
441 Ok(content) => content,
442 Err(e) => {
443 warn!("Failed to read metadata at {:?}: {}", path, e);
444 return default_package_data();
445 }
446 };
447
448 let metadata = super::rfc822::parse_rfc822_content(&content);
449 let mut package_data = build_package_data_from_rfc822(&metadata, datasource_id);
450 merge_sibling_metadata_dependencies(path, &mut package_data);
451 merge_sibling_metadata_file_references(path, &mut package_data);
452 if datasource_id == DatasourceId::PypiWheelMetadata {
453 merge_sibling_wheel_metadata(path, &mut package_data);
454 }
455 package_data
456}
457
458fn merge_sibling_metadata_dependencies(path: &Path, package_data: &mut PackageData) {
459 let mut extra_dependencies = Vec::new();
460
461 if let Some(parent) = path.parent() {
462 let direct_requires = parent.join("requires.txt");
463 if direct_requires.exists()
464 && let Ok(content) = read_file_to_string(&direct_requires)
465 {
466 extra_dependencies.extend(parse_requires_txt(&content));
467 }
468
469 let sibling_egg_info_requires = parent
470 .read_dir()
471 .ok()
472 .into_iter()
473 .flatten()
474 .flatten()
475 .find_map(|entry| {
476 let child_path = entry.path();
477 if child_path.is_dir()
478 && child_path
479 .file_name()
480 .and_then(|name| name.to_str())
481 .is_some_and(|name| name.ends_with(".egg-info"))
482 {
483 let requires = child_path.join("requires.txt");
484 requires.exists().then_some(requires)
485 } else {
486 None
487 }
488 });
489
490 if let Some(requires_path) = sibling_egg_info_requires
491 && let Ok(content) = read_file_to_string(&requires_path)
492 {
493 extra_dependencies.extend(parse_requires_txt(&content));
494 }
495 }
496
497 for dependency in extra_dependencies {
498 if !package_data.dependencies.iter().any(|existing| {
499 existing.purl == dependency.purl
500 && existing.scope == dependency.scope
501 && existing.extracted_requirement == dependency.extracted_requirement
502 && existing.extra_data == dependency.extra_data
503 }) {
504 package_data.dependencies.push(dependency);
505 }
506 }
507}
508
509fn merge_sibling_metadata_file_references(path: &Path, package_data: &mut PackageData) {
510 let mut extra_refs = Vec::new();
511
512 if let Some(parent) = path.parent() {
513 let record_path = parent.join("RECORD");
514 if record_path.exists()
515 && let Ok(content) = read_file_to_string(&record_path)
516 {
517 extra_refs.extend(parse_record_csv(&content));
518 }
519
520 let installed_files_path = parent.join("installed-files.txt");
521 if installed_files_path.exists()
522 && let Ok(content) = read_file_to_string(&installed_files_path)
523 {
524 extra_refs.extend(parse_installed_files_txt(&content));
525 }
526
527 let sources_path = parent.join("SOURCES.txt");
528 if sources_path.exists()
529 && let Ok(content) = read_file_to_string(&sources_path)
530 {
531 extra_refs.extend(parse_sources_txt(&content));
532 }
533 }
534
535 for file_ref in extra_refs {
536 if !package_data
537 .file_references
538 .iter()
539 .any(|existing| existing.path == file_ref.path)
540 {
541 package_data.file_references.push(file_ref);
542 }
543 }
544}
545
546fn validate_zip_archive<R: Read + std::io::Seek>(
547 archive: &mut ZipArchive<R>,
548 path: &Path,
549 archive_type: &str,
550) -> Result<u64, String> {
551 let mut total_extracted = 0u64;
552
553 for i in 0..archive.len() {
554 if let Ok(file) = archive.by_index(i) {
555 let compressed_size = file.compressed_size();
556 let uncompressed_size = file.size();
557
558 if compressed_size > 0 {
559 let ratio = uncompressed_size as f64 / compressed_size as f64;
560 if ratio > MAX_COMPRESSION_RATIO {
561 warn!(
562 "Suspicious compression ratio in {} {:?}: {:.2}:1",
563 archive_type, path, ratio
564 );
565 continue;
566 }
567 }
568
569 if uncompressed_size > MAX_FILE_SIZE {
570 warn!(
571 "File too large in {} {:?}: {} bytes (limit: {} bytes)",
572 archive_type, path, uncompressed_size, MAX_FILE_SIZE
573 );
574 continue;
575 }
576
577 total_extracted += uncompressed_size;
578 if total_extracted > MAX_ARCHIVE_SIZE {
579 let msg = format!(
580 "Total extracted size exceeds limit for {} {:?}",
581 archive_type, path
582 );
583 warn!("{}", msg);
584 return Err(msg);
585 }
586 }
587 }
588
589 Ok(total_extracted)
590}
591
592fn extract_from_wheel_archive(path: &Path) -> PackageData {
593 let metadata = match std::fs::metadata(path) {
594 Ok(m) => m,
595 Err(e) => {
596 warn!(
597 "Failed to read metadata for wheel archive {:?}: {}",
598 path, e
599 );
600 return default_package_data();
601 }
602 };
603
604 if metadata.len() > MAX_ARCHIVE_SIZE {
605 warn!(
606 "Wheel archive too large: {} bytes (limit: {} bytes)",
607 metadata.len(),
608 MAX_ARCHIVE_SIZE
609 );
610 return default_package_data();
611 }
612
613 let file = match File::open(path) {
614 Ok(f) => f,
615 Err(e) => {
616 warn!("Failed to open wheel archive {:?}: {}", path, e);
617 return default_package_data();
618 }
619 };
620
621 let mut archive = match ZipArchive::new(file) {
622 Ok(a) => a,
623 Err(e) => {
624 warn!("Failed to read wheel archive {:?}: {}", path, e);
625 return default_package_data();
626 }
627 };
628
629 if validate_zip_archive(&mut archive, path, "wheel").is_err() {
630 return default_package_data();
631 }
632
633 let metadata_path = find_wheel_metadata_path(&mut archive);
634 let metadata_path = match metadata_path {
635 Some(p) => p,
636 None => {
637 warn!("No METADATA file found in wheel archive {:?}", path);
638 return default_package_data();
639 }
640 };
641
642 let content = match read_zip_entry(&mut archive, &metadata_path) {
643 Ok(c) => c,
644 Err(e) => {
645 warn!("Failed to read METADATA from {:?}: {}", path, e);
646 return default_package_data();
647 }
648 };
649
650 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiWheel);
651
652 let (size, sha256) = calculate_file_checksums(path);
653 package_data.size = size;
654 package_data.sha256 = sha256;
655
656 if let Some(record_path) = find_wheel_record_path(&mut archive)
657 && let Ok(record_content) = read_zip_entry(&mut archive, &record_path)
658 {
659 package_data.file_references = parse_record_csv(&record_content);
660 }
661
662 if let Some(wheel_info) = parse_wheel_filename(path) {
663 if package_data.name.is_none() {
664 package_data.name = Some(wheel_info.name.clone());
665 }
666 if package_data.version.is_none() {
667 package_data.version = Some(wheel_info.version.clone());
668 }
669
670 package_data.purl = build_wheel_purl(
671 package_data.name.as_deref(),
672 package_data.version.as_deref(),
673 &wheel_info,
674 );
675
676 let mut extra_data = package_data.extra_data.unwrap_or_default();
677 extra_data.insert(
678 "python_requires".to_string(),
679 serde_json::Value::String(wheel_info.python_tag.clone()),
680 );
681 extra_data.insert(
682 "abi_tag".to_string(),
683 serde_json::Value::String(wheel_info.abi_tag.clone()),
684 );
685 extra_data.insert(
686 "platform_tag".to_string(),
687 serde_json::Value::String(wheel_info.platform_tag.clone()),
688 );
689 package_data.extra_data = Some(extra_data);
690 }
691
692 package_data
693}
694
695fn extract_from_egg_archive(path: &Path) -> PackageData {
696 let metadata = match std::fs::metadata(path) {
697 Ok(m) => m,
698 Err(e) => {
699 warn!("Failed to read metadata for egg archive {:?}: {}", path, e);
700 return default_package_data();
701 }
702 };
703
704 if metadata.len() > MAX_ARCHIVE_SIZE {
705 warn!(
706 "Egg archive too large: {} bytes (limit: {} bytes)",
707 metadata.len(),
708 MAX_ARCHIVE_SIZE
709 );
710 return default_package_data();
711 }
712
713 let file = match File::open(path) {
714 Ok(f) => f,
715 Err(e) => {
716 warn!("Failed to open egg archive {:?}: {}", path, e);
717 return default_package_data();
718 }
719 };
720
721 let mut archive = match ZipArchive::new(file) {
722 Ok(a) => a,
723 Err(e) => {
724 warn!("Failed to read egg archive {:?}: {}", path, e);
725 return default_package_data();
726 }
727 };
728
729 if validate_zip_archive(&mut archive, path, "egg").is_err() {
730 return default_package_data();
731 }
732
733 let pkginfo_path = find_egg_pkginfo_path(&mut archive);
734 let pkginfo_path = match pkginfo_path {
735 Some(p) => p,
736 None => {
737 warn!("No PKG-INFO file found in egg archive {:?}", path);
738 return default_package_data();
739 }
740 };
741
742 let content = match read_zip_entry(&mut archive, &pkginfo_path) {
743 Ok(c) => c,
744 Err(e) => {
745 warn!("Failed to read PKG-INFO from {:?}: {}", path, e);
746 return default_package_data();
747 }
748 };
749
750 let mut package_data = python_parse_rfc822_content(&content, DatasourceId::PypiEgg);
751
752 let (size, sha256) = calculate_file_checksums(path);
753 package_data.size = size;
754 package_data.sha256 = sha256;
755
756 if let Some(installed_files_path) = find_egg_installed_files_path(&mut archive)
757 && let Ok(installed_files_content) = read_zip_entry(&mut archive, &installed_files_path)
758 {
759 package_data.file_references = parse_installed_files_txt(&installed_files_content);
760 }
761
762 if let Some(egg_info) = parse_egg_filename(path) {
763 if package_data.name.is_none() {
764 package_data.name = Some(egg_info.name.clone());
765 }
766 if package_data.version.is_none() {
767 package_data.version = Some(egg_info.version.clone());
768 }
769
770 if let Some(python_version) = &egg_info.python_version {
771 let mut extra_data = package_data.extra_data.unwrap_or_default();
772 extra_data.insert(
773 "python_version".to_string(),
774 serde_json::Value::String(python_version.clone()),
775 );
776 package_data.extra_data = Some(extra_data);
777 }
778 }
779
780 package_data.purl = build_egg_purl(
781 package_data.name.as_deref(),
782 package_data.version.as_deref(),
783 );
784
785 package_data
786}
787
788fn find_wheel_metadata_path<R: Read + std::io::Seek>(
789 archive: &mut ZipArchive<R>,
790) -> Option<String> {
791 for i in 0..archive.len() {
792 if let Ok(file) = archive.by_index_raw(i) {
793 let name = file.name();
794 if name.ends_with(".dist-info/METADATA") {
795 return Some(name.to_string());
796 }
797 }
798 }
799 None
800}
801
802fn find_egg_pkginfo_path<R: Read + std::io::Seek>(archive: &mut ZipArchive<R>) -> Option<String> {
803 for i in 0..archive.len() {
804 if let Ok(file) = archive.by_index_raw(i) {
805 let name = file.name();
806 if name.ends_with("EGG-INFO/PKG-INFO") || name.ends_with(".egg-info/PKG-INFO") {
807 return Some(name.to_string());
808 }
809 }
810 }
811 None
812}
813
814fn read_zip_entry<R: Read + std::io::Seek>(
815 archive: &mut ZipArchive<R>,
816 path: &str,
817) -> Result<String, String> {
818 let mut file = archive
819 .by_name(path)
820 .map_err(|e| format!("Failed to find entry {}: {}", path, e))?;
821 let mut content = String::new();
822 file.read_to_string(&mut content)
823 .map_err(|e| format!("Failed to read {}: {}", path, e))?;
824 Ok(content)
825}
826
827fn find_wheel_record_path<R: Read + std::io::Seek>(archive: &mut ZipArchive<R>) -> Option<String> {
828 for i in 0..archive.len() {
829 if let Ok(file) = archive.by_index_raw(i) {
830 let name = file.name();
831 if name.ends_with(".dist-info/RECORD") {
832 return Some(name.to_string());
833 }
834 }
835 }
836 None
837}
838
839fn find_egg_installed_files_path<R: Read + std::io::Seek>(
840 archive: &mut ZipArchive<R>,
841) -> Option<String> {
842 for i in 0..archive.len() {
843 if let Ok(file) = archive.by_index_raw(i) {
844 let name = file.name();
845 if name.ends_with("EGG-INFO/installed-files.txt")
846 || name.ends_with(".egg-info/installed-files.txt")
847 {
848 return Some(name.to_string());
849 }
850 }
851 }
852 None
853}
854
855pub fn parse_record_csv(content: &str) -> Vec<FileReference> {
860 let mut reader = ReaderBuilder::new()
861 .has_headers(false)
862 .from_reader(content.as_bytes());
863
864 let mut file_references = Vec::new();
865
866 for result in reader.records() {
867 match result {
868 Ok(record) => {
869 if record.len() < 3 {
870 continue;
871 }
872
873 let path = record.get(0).unwrap_or("").trim().to_string();
874 if path.is_empty() {
875 continue;
876 }
877
878 let hash_field = record.get(1).unwrap_or("").trim();
879 let size_field = record.get(2).unwrap_or("").trim();
880
881 let sha256 = if !hash_field.is_empty() && hash_field.contains('=') {
883 let parts: Vec<&str> = hash_field.split('=').collect();
884 if parts.len() == 2 && parts[0] == "sha256" {
885 match URL_SAFE_NO_PAD.decode(parts[1]) {
887 Ok(decoded) => {
888 let hex = decoded
889 .iter()
890 .map(|b| format!("{:02x}", b))
891 .collect::<String>();
892 Some(hex)
893 }
894 Err(_) => None,
895 }
896 } else {
897 None
898 }
899 } else {
900 None
901 };
902
903 let size = if !size_field.is_empty() && size_field != "-" {
905 size_field.parse::<u64>().ok()
906 } else {
907 None
908 };
909
910 file_references.push(FileReference {
911 path,
912 size,
913 sha1: None,
914 md5: None,
915 sha256,
916 sha512: None,
917 extra_data: None,
918 });
919 }
920 Err(e) => {
921 warn!("Failed to parse RECORD CSV row: {}", e);
922 continue;
923 }
924 }
925 }
926
927 file_references
928}
929
930pub fn parse_installed_files_txt(content: &str) -> Vec<FileReference> {
933 content
934 .lines()
935 .map(|line| line.trim())
936 .filter(|line| !line.is_empty())
937 .map(|path| FileReference {
938 path: path.to_string(),
939 size: None,
940 sha1: None,
941 md5: None,
942 sha256: None,
943 sha512: None,
944 extra_data: None,
945 })
946 .collect()
947}
948
949pub fn parse_sources_txt(content: &str) -> Vec<FileReference> {
950 content
951 .lines()
952 .map(str::trim)
953 .filter(|line| !line.is_empty())
954 .map(|path| FileReference {
955 path: path.to_string(),
956 size: None,
957 sha1: None,
958 md5: None,
959 sha256: None,
960 sha512: None,
961 extra_data: None,
962 })
963 .collect()
964}
965
966struct WheelInfo {
967 name: String,
968 version: String,
969 python_tag: String,
970 abi_tag: String,
971 platform_tag: String,
972}
973
974fn parse_wheel_filename(path: &Path) -> Option<WheelInfo> {
975 let stem = path.file_stem()?.to_string_lossy();
976 let parts: Vec<&str> = stem.split('-').collect();
977
978 if parts.len() >= 5 {
979 Some(WheelInfo {
980 name: parts[0].replace('_', "-"),
981 version: parts[1].to_string(),
982 python_tag: parts[2].to_string(),
983 abi_tag: parts[3].to_string(),
984 platform_tag: parts[4..].join("-"),
985 })
986 } else {
987 None
988 }
989}
990
991struct EggInfo {
992 name: String,
993 version: String,
994 python_version: Option<String>,
995}
996
997fn parse_egg_filename(path: &Path) -> Option<EggInfo> {
998 let stem = path.file_stem()?.to_string_lossy();
999 let parts: Vec<&str> = stem.split('-').collect();
1000
1001 if parts.len() >= 2 {
1002 Some(EggInfo {
1003 name: parts[0].replace('_', "-"),
1004 version: parts[1].to_string(),
1005 python_version: parts.get(2).map(|s| s.to_string()),
1006 })
1007 } else {
1008 None
1009 }
1010}
1011
1012fn build_wheel_purl(
1013 name: Option<&str>,
1014 version: Option<&str>,
1015 wheel_info: &WheelInfo,
1016) -> Option<String> {
1017 let name = name?;
1018 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1019
1020 if let Some(ver) = version {
1021 package_url.with_version(ver).ok()?;
1022 }
1023
1024 let extension = format!(
1025 "{}-{}-{}",
1026 wheel_info.python_tag, wheel_info.abi_tag, wheel_info.platform_tag
1027 );
1028 package_url.add_qualifier("extension", extension).ok()?;
1029
1030 Some(package_url.to_string())
1031}
1032
1033fn build_egg_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1034 let name = name?;
1035 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1036
1037 if let Some(ver) = version {
1038 package_url.with_version(ver).ok()?;
1039 }
1040
1041 package_url.add_qualifier("type", "egg").ok()?;
1042
1043 Some(package_url.to_string())
1044}
1045
1046fn python_parse_rfc822_content(content: &str, datasource_id: DatasourceId) -> PackageData {
1047 let metadata = super::rfc822::parse_rfc822_content(content);
1048 build_package_data_from_rfc822(&metadata, datasource_id)
1049}
1050
1051fn build_package_data_from_rfc822(
1056 metadata: &super::rfc822::Rfc822Metadata,
1057 datasource_id: DatasourceId,
1058) -> PackageData {
1059 use super::rfc822::{get_header_all, get_header_first};
1060
1061 let name = get_header_first(&metadata.headers, "name");
1062 let version = get_header_first(&metadata.headers, "version");
1063 let summary = get_header_first(&metadata.headers, "summary");
1064 let mut homepage_url = get_header_first(&metadata.headers, "home-page");
1065 let author = get_header_first(&metadata.headers, "author");
1066 let author_email = get_header_first(&metadata.headers, "author-email");
1067 let license = get_header_first(&metadata.headers, "license");
1068 let download_url = get_header_first(&metadata.headers, "download-url");
1069 let platform = get_header_first(&metadata.headers, "platform");
1070 let requires_python = get_header_first(&metadata.headers, "requires-python");
1071 let classifiers = get_header_all(&metadata.headers, "classifier");
1072 let license_files = get_header_all(&metadata.headers, "license-file");
1073
1074 let description_body = if metadata.body.is_empty() {
1075 get_header_first(&metadata.headers, "description").unwrap_or_default()
1076 } else {
1077 metadata.body.clone()
1078 };
1079
1080 let description = build_description(summary.as_deref(), &description_body);
1081
1082 let mut parties = Vec::new();
1083 if author.is_some() || author_email.is_some() {
1084 parties.push(Party {
1085 r#type: Some("person".to_string()),
1086 role: Some("author".to_string()),
1087 name: author,
1088 email: author_email,
1089 url: None,
1090 organization: None,
1091 organization_url: None,
1092 timezone: None,
1093 });
1094 }
1095
1096 let (keywords, license_classifiers) = split_classifiers(&classifiers);
1097 let license_detections = Vec::new();
1099 let declared_license_expression = None;
1100 let declared_license_expression_spdx = None;
1101
1102 let extracted_license_statement =
1103 build_extracted_license_statement(license.as_deref(), &license_classifiers);
1104
1105 let mut extra_data = HashMap::new();
1106 if let Some(platform_value) = platform
1107 && !platform_value.eq_ignore_ascii_case("unknown")
1108 && !platform_value.is_empty()
1109 {
1110 extra_data.insert(
1111 "platform".to_string(),
1112 serde_json::Value::String(platform_value),
1113 );
1114 }
1115
1116 if let Some(requires_python_value) = requires_python
1117 && !requires_python_value.is_empty()
1118 {
1119 extra_data.insert(
1120 "requires_python".to_string(),
1121 serde_json::Value::String(requires_python_value),
1122 );
1123 }
1124
1125 if !license_files.is_empty() {
1126 extra_data.insert(
1127 "license_files".to_string(),
1128 serde_json::Value::Array(
1129 license_files
1130 .iter()
1131 .cloned()
1132 .map(serde_json::Value::String)
1133 .collect(),
1134 ),
1135 );
1136 }
1137
1138 let file_references = license_files
1139 .iter()
1140 .map(|path| FileReference {
1141 path: path.clone(),
1142 size: None,
1143 sha1: None,
1144 md5: None,
1145 sha256: None,
1146 sha512: None,
1147 extra_data: None,
1148 })
1149 .collect();
1150
1151 let project_urls = get_header_all(&metadata.headers, "project-url");
1152 let dependencies = extract_rfc822_dependencies(&metadata.headers);
1153 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
1154
1155 if !project_urls.is_empty() {
1156 let parsed_urls = parse_project_urls(&project_urls);
1157
1158 for (label, url) in &parsed_urls {
1159 let label_lower = label.to_lowercase();
1160
1161 if bug_tracking_url.is_none()
1162 && matches!(
1163 label_lower.as_str(),
1164 "tracker"
1165 | "bug reports"
1166 | "bug tracker"
1167 | "issues"
1168 | "issue tracker"
1169 | "github: issues"
1170 )
1171 {
1172 bug_tracking_url = Some(url.clone());
1173 } else if code_view_url.is_none()
1174 && matches!(label_lower.as_str(), "source" | "source code" | "code")
1175 {
1176 code_view_url = Some(url.clone());
1177 } else if vcs_url.is_none()
1178 && matches!(
1179 label_lower.as_str(),
1180 "github" | "gitlab" | "github: repo" | "repository"
1181 )
1182 {
1183 vcs_url = Some(url.clone());
1184 } else if homepage_url.is_none()
1185 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
1186 {
1187 homepage_url = Some(url.clone());
1188 } else if label_lower == "changelog" {
1189 extra_data.insert(
1190 "changelog_url".to_string(),
1191 serde_json::Value::String(url.clone()),
1192 );
1193 }
1194 }
1195
1196 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
1197 .iter()
1198 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
1199 .collect();
1200
1201 if !project_urls_json.is_empty() {
1202 extra_data.insert(
1203 "project_urls".to_string(),
1204 serde_json::Value::Object(project_urls_json),
1205 );
1206 }
1207 }
1208
1209 let extra_data = if extra_data.is_empty() {
1210 None
1211 } else {
1212 Some(extra_data)
1213 };
1214
1215 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
1216 build_pypi_urls(name.as_deref(), version.as_deref());
1217
1218 PackageData {
1219 package_type: Some(PythonParser::PACKAGE_TYPE),
1220 namespace: None,
1221 name,
1222 version,
1223 qualifiers: None,
1224 subpath: None,
1225 primary_language: Some("Python".to_string()),
1226 description,
1227 release_date: None,
1228 parties,
1229 keywords,
1230 homepage_url,
1231 download_url,
1232 size: None,
1233 sha1: None,
1234 md5: None,
1235 sha256: None,
1236 sha512: None,
1237 bug_tracking_url,
1238 code_view_url,
1239 vcs_url,
1240 copyright: None,
1241 holder: None,
1242 declared_license_expression,
1243 declared_license_expression_spdx,
1244 license_detections,
1245 other_license_expression: None,
1246 other_license_expression_spdx: None,
1247 other_license_detections: Vec::new(),
1248 extracted_license_statement,
1249 notice_text: None,
1250 source_packages: Vec::new(),
1251 file_references,
1252 is_private: false,
1253 is_virtual: false,
1254 extra_data,
1255 dependencies,
1256 repository_homepage_url,
1257 repository_download_url,
1258 api_data_url,
1259 datasource_id: Some(datasource_id),
1260 purl,
1261 }
1262}
1263
1264fn parse_project_urls(project_urls: &[String]) -> Vec<(String, String)> {
1265 project_urls
1266 .iter()
1267 .filter_map(|url_entry| {
1268 if let Some((label, url)) = url_entry.split_once(", ") {
1269 let label_trimmed = label.trim();
1270 let url_trimmed = url.trim();
1271 if !label_trimmed.is_empty() && !url_trimmed.is_empty() {
1272 return Some((label_trimmed.to_string(), url_trimmed.to_string()));
1273 }
1274 }
1275 None
1276 })
1277 .collect()
1278}
1279
1280fn build_description(summary: Option<&str>, body: &str) -> Option<String> {
1281 let mut parts = Vec::new();
1282 if let Some(summary_value) = summary
1283 && !summary_value.trim().is_empty()
1284 {
1285 parts.push(summary_value.trim().to_string());
1286 }
1287
1288 if !body.trim().is_empty() {
1289 parts.push(body.trim().to_string());
1290 }
1291
1292 if parts.is_empty() {
1293 None
1294 } else {
1295 Some(parts.join("\n"))
1296 }
1297}
1298
1299fn split_classifiers(classifiers: &[String]) -> (Vec<String>, Vec<String>) {
1300 let mut keywords = Vec::new();
1301 let mut license_classifiers = Vec::new();
1302
1303 for classifier in classifiers {
1304 if classifier.starts_with("License ::") {
1305 license_classifiers.push(classifier.to_string());
1306 } else {
1307 keywords.push(classifier.to_string());
1308 }
1309 }
1310
1311 (keywords, license_classifiers)
1312}
1313
1314fn build_extracted_license_statement(
1315 license: Option<&str>,
1316 license_classifiers: &[String],
1317) -> Option<String> {
1318 let mut lines = Vec::new();
1319
1320 if let Some(value) = license
1321 && !value.trim().is_empty()
1322 {
1323 lines.push(format!("license: {}", value.trim()));
1324 }
1325
1326 if !license_classifiers.is_empty() {
1327 lines.push("classifiers:".to_string());
1328 for classifier in license_classifiers {
1329 lines.push(format!(" - '{}'", classifier));
1330 }
1331 }
1332
1333 if lines.is_empty() {
1334 None
1335 } else {
1336 Some(format!("{}\n", lines.join("\n")))
1337 }
1338}
1339
1340pub(crate) fn build_pypi_urls(
1341 name: Option<&str>,
1342 version: Option<&str>,
1343) -> (
1344 Option<String>,
1345 Option<String>,
1346 Option<String>,
1347 Option<String>,
1348) {
1349 let repository_homepage_url = name.map(|value| format!("https://pypi.org/project/{}", value));
1350
1351 let repository_download_url = name.and_then(|value| {
1352 version.map(|ver| {
1353 format!(
1354 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
1355 &value[..1.min(value.len())],
1356 value,
1357 value,
1358 ver
1359 )
1360 })
1361 });
1362
1363 let api_data_url = name.map(|value| {
1364 if let Some(ver) = version {
1365 format!("https://pypi.org/pypi/{}/{}/json", value, ver)
1366 } else {
1367 format!("https://pypi.org/pypi/{}/json", value)
1368 }
1369 });
1370
1371 let purl = name.and_then(|value| {
1372 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), value).ok()?;
1373 if let Some(ver) = version {
1374 package_url.with_version(ver).ok()?;
1375 }
1376 Some(package_url.to_string())
1377 });
1378
1379 (
1380 repository_homepage_url,
1381 repository_download_url,
1382 api_data_url,
1383 purl,
1384 )
1385}
1386
1387fn build_pypi_purl_with_extension(
1388 name: &str,
1389 version: Option<&str>,
1390 extension: &str,
1391) -> Option<String> {
1392 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1393 if let Some(ver) = version {
1394 package_url.with_version(ver).ok()?;
1395 }
1396 package_url.add_qualifier("extension", extension).ok()?;
1397 Some(package_url.to_string())
1398}
1399
1400fn extract_from_pyproject_toml(path: &Path) -> PackageData {
1401 let toml_content = match read_toml_file(path) {
1402 Ok(content) => content,
1403 Err(e) => {
1404 warn!(
1405 "Failed to read or parse pyproject.toml at {:?}: {}",
1406 path, e
1407 );
1408 return default_package_data();
1409 }
1410 };
1411
1412 let tool_table = toml_content.get("tool").and_then(|v| v.as_table());
1413
1414 let project_table =
1416 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
1417 project.clone()
1419 } else if let Some(tool) = tool_table {
1420 if let Some(poetry) = tool.get("poetry").and_then(|v| v.as_table()) {
1421 poetry.clone()
1423 } else {
1424 warn!(
1425 "No project or tool.poetry data found in pyproject.toml at {:?}",
1426 path
1427 );
1428 return default_package_data();
1429 }
1430 } else if toml_content.get(FIELD_NAME).is_some() {
1431 match toml_content.as_table() {
1433 Some(table) => table.clone(),
1434 None => {
1435 warn!("Failed to convert TOML content to table in {:?}", path);
1436 return default_package_data();
1437 }
1438 }
1439 } else {
1440 warn!("No project data found in pyproject.toml at {:?}", path);
1441 return default_package_data();
1442 };
1443
1444 let name = project_table
1445 .get(FIELD_NAME)
1446 .and_then(|v| v.as_str())
1447 .map(String::from);
1448
1449 let version = project_table
1450 .get(FIELD_VERSION)
1451 .and_then(|v| v.as_str())
1452 .map(String::from);
1453 let classifiers = project_table
1454 .get("classifiers")
1455 .and_then(|value| value.as_array())
1456 .map(|values| {
1457 values
1458 .iter()
1459 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
1460 .collect::<Vec<_>>()
1461 })
1462 .unwrap_or_default();
1463
1464 let license_detections = Vec::new();
1466 let extracted_license_statement = extract_raw_license_string(&project_table);
1467 let declared_license_expression = None;
1468 let declared_license_expression_spdx = None;
1469
1470 let (homepage_url, repository_url) = extract_urls(&project_table);
1472
1473 let (dependencies, optional_dependencies) = extract_dependencies(&project_table, &toml_content);
1474 let extra_data = extract_pyproject_extra_data(&toml_content);
1475
1476 let purl = name.as_ref().and_then(|n| {
1478 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n) {
1479 Ok(p) => p,
1480 Err(e) => {
1481 warn!(
1482 "Failed to create PackageUrl for Python package '{}': {}",
1483 n, e
1484 );
1485 return None;
1486 }
1487 };
1488
1489 if let Some(v) = &version
1490 && let Err(e) = package_url.with_version(v)
1491 {
1492 warn!(
1493 "Failed to set version '{}' for Python package '{}': {}",
1494 v, n, e
1495 );
1496 return None;
1497 }
1498
1499 Some(package_url.to_string())
1500 });
1501
1502 let api_data_url = name.as_ref().map(|n| {
1503 if let Some(v) = &version {
1504 format!("https://pypi.org/pypi/{}/{}/json", n, v)
1505 } else {
1506 format!("https://pypi.org/pypi/{}/json", n)
1507 }
1508 });
1509
1510 let pypi_homepage_url = name
1511 .as_ref()
1512 .map(|n| format!("https://pypi.org/project/{}", n));
1513
1514 let pypi_download_url = name.as_ref().and_then(|n| {
1515 version.as_ref().map(|v| {
1516 format!(
1517 "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
1518 &n[..1.min(n.len())],
1519 n,
1520 n,
1521 v
1522 )
1523 })
1524 });
1525
1526 PackageData {
1527 package_type: Some(PythonParser::PACKAGE_TYPE),
1528 namespace: None,
1529 name,
1530 version,
1531 qualifiers: None,
1532 subpath: None,
1533 primary_language: None,
1534 description: None,
1535 release_date: None,
1536 parties: extract_parties(&project_table),
1537 keywords: Vec::new(),
1538 homepage_url: homepage_url.or(pypi_homepage_url),
1539 download_url: repository_url.clone().or(pypi_download_url),
1540 size: None,
1541 sha1: None,
1542 md5: None,
1543 sha256: None,
1544 sha512: None,
1545 bug_tracking_url: None,
1546 code_view_url: None,
1547 vcs_url: repository_url,
1548 copyright: None,
1549 holder: None,
1550 declared_license_expression,
1551 declared_license_expression_spdx,
1552 license_detections,
1553 other_license_expression: None,
1554 other_license_expression_spdx: None,
1555 other_license_detections: Vec::new(),
1556 extracted_license_statement,
1557 notice_text: None,
1558 source_packages: Vec::new(),
1559 file_references: Vec::new(),
1560 is_private: has_private_classifier(&classifiers),
1561 is_virtual: false,
1562 extra_data,
1563 dependencies: [dependencies, optional_dependencies].concat(),
1564 repository_homepage_url: None,
1565 repository_download_url: None,
1566 api_data_url,
1567 datasource_id: Some(DatasourceId::PypiPyprojectToml),
1568 purl,
1569 }
1570}
1571
1572fn extract_raw_license_string(project: &TomlMap<String, TomlValue>) -> Option<String> {
1573 project
1574 .get(FIELD_LICENSE)
1575 .and_then(|license_value| match license_value {
1576 TomlValue::String(license_str) => Some(license_str.clone()),
1577 TomlValue::Table(license_table) => license_table
1578 .get("text")
1579 .and_then(|v| v.as_str())
1580 .map(|s| s.to_string())
1581 .or_else(|| {
1582 license_table
1583 .get("expression")
1584 .and_then(|v| v.as_str())
1585 .map(|expr| expr.to_string())
1586 }),
1587 _ => None,
1588 })
1589}
1590
1591fn extract_urls(project: &TomlMap<String, TomlValue>) -> (Option<String>, Option<String>) {
1592 let mut homepage_url = None;
1593 let mut repository_url = None;
1594
1595 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
1597 homepage_url = urls
1598 .get(FIELD_HOMEPAGE)
1599 .and_then(|v| v.as_str())
1600 .map(String::from);
1601 repository_url = urls
1602 .get(FIELD_REPOSITORY)
1603 .and_then(|v| v.as_str())
1604 .map(String::from);
1605 }
1606
1607 if homepage_url.is_none() {
1609 homepage_url = project
1610 .get(FIELD_HOMEPAGE)
1611 .and_then(|v| v.as_str())
1612 .map(String::from);
1613 }
1614
1615 if repository_url.is_none() {
1616 repository_url = project
1617 .get(FIELD_REPOSITORY)
1618 .and_then(|v| v.as_str())
1619 .map(String::from);
1620 }
1621
1622 (homepage_url, repository_url)
1623}
1624
1625fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
1626 let mut parties = Vec::new();
1627
1628 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
1629 for author in authors {
1630 if let Some(author_str) = author.as_str() {
1631 let (name, email) = split_name_email(author_str);
1632 parties.push(Party {
1633 r#type: None,
1634 role: Some("author".to_string()),
1635 name,
1636 email,
1637 url: None,
1638 organization: None,
1639 organization_url: None,
1640 timezone: None,
1641 });
1642 }
1643 }
1644 }
1645
1646 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
1647 for maintainer in maintainers {
1648 if let Some(maintainer_str) = maintainer.as_str() {
1649 let (name, email) = split_name_email(maintainer_str);
1650 parties.push(Party {
1651 r#type: None,
1652 role: Some("maintainer".to_string()),
1653 name,
1654 email,
1655 url: None,
1656 organization: None,
1657 organization_url: None,
1658 timezone: None,
1659 });
1660 }
1661 }
1662 }
1663
1664 parties
1665}
1666
1667fn extract_dependencies(
1668 project: &TomlMap<String, TomlValue>,
1669 toml_content: &TomlValue,
1670) -> (Vec<Dependency>, Vec<Dependency>) {
1671 let mut dependencies = Vec::new();
1672 let mut optional_dependencies = Vec::new();
1673
1674 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
1676 match deps_value {
1677 TomlValue::Array(arr) => {
1678 dependencies = parse_dependency_array(arr, false, None);
1679 }
1680 TomlValue::Table(table) => {
1681 dependencies = parse_dependency_table(table, false, None);
1682 }
1683 _ => {}
1684 }
1685 }
1686
1687 if let Some(opt_deps_table) = project
1689 .get(FIELD_OPTIONAL_DEPENDENCIES)
1690 .and_then(|v| v.as_table())
1691 {
1692 for (extra_name, deps) in opt_deps_table {
1693 match deps {
1694 TomlValue::Array(arr) => {
1695 optional_dependencies.extend(parse_dependency_array(
1696 arr,
1697 true,
1698 Some(extra_name),
1699 ));
1700 }
1701 TomlValue::Table(table) => {
1702 optional_dependencies.extend(parse_dependency_table(
1703 table,
1704 true,
1705 Some(extra_name),
1706 ));
1707 }
1708 _ => {}
1709 }
1710 }
1711 }
1712
1713 if let Some(dev_deps_value) = project.get(FIELD_DEV_DEPENDENCIES) {
1715 match dev_deps_value {
1716 TomlValue::Array(arr) => {
1717 optional_dependencies.extend(parse_dependency_array(
1718 arr,
1719 true,
1720 Some(FIELD_DEV_DEPENDENCIES),
1721 ));
1722 }
1723 TomlValue::Table(table) => {
1724 optional_dependencies.extend(parse_dependency_table(
1725 table,
1726 true,
1727 Some(FIELD_DEV_DEPENDENCIES),
1728 ));
1729 }
1730 _ => {}
1731 }
1732 }
1733
1734 if let Some(groups_table) = project.get("group").and_then(|v| v.as_table()) {
1736 for (group_name, group_data) in groups_table {
1737 if let Some(group_deps) = group_data.as_table().and_then(|t| t.get("dependencies")) {
1738 match group_deps {
1739 TomlValue::Array(arr) => {
1740 optional_dependencies.extend(parse_dependency_array(
1741 arr,
1742 true,
1743 Some(group_name),
1744 ));
1745 }
1746 TomlValue::Table(table) => {
1747 optional_dependencies.extend(parse_dependency_table(
1748 table,
1749 true,
1750 Some(group_name),
1751 ));
1752 }
1753 _ => {}
1754 }
1755 }
1756 }
1757 }
1758
1759 if let Some(groups_table) = toml_content
1760 .get(FIELD_DEPENDENCY_GROUPS)
1761 .and_then(|value| value.as_table())
1762 {
1763 for (group_name, deps) in groups_table {
1764 match deps {
1765 TomlValue::Array(arr) => {
1766 optional_dependencies.extend(parse_dependency_array(
1767 arr,
1768 true,
1769 Some(group_name),
1770 ));
1771 }
1772 TomlValue::Table(table) => {
1773 optional_dependencies.extend(parse_dependency_table(
1774 table,
1775 true,
1776 Some(group_name),
1777 ));
1778 }
1779 _ => {}
1780 }
1781 }
1782 }
1783
1784 if let Some(dev_deps_value) = toml_content
1785 .get("tool")
1786 .and_then(|value| value.as_table())
1787 .and_then(|tool| tool.get("uv"))
1788 .and_then(|value| value.as_table())
1789 .and_then(|uv| uv.get(FIELD_DEV_DEPENDENCIES))
1790 {
1791 match dev_deps_value {
1792 TomlValue::Array(arr) => {
1793 optional_dependencies.extend(parse_dependency_array(arr, true, Some("dev")));
1794 }
1795 TomlValue::Table(table) => {
1796 optional_dependencies.extend(parse_dependency_table(table, true, Some("dev")));
1797 }
1798 _ => {}
1799 }
1800 }
1801
1802 (dependencies, optional_dependencies)
1803}
1804
1805fn extract_pyproject_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
1806 let mut extra_data = HashMap::new();
1807
1808 if let Some(tool_uv) = toml_content
1809 .get("tool")
1810 .and_then(|value| value.as_table())
1811 .and_then(|tool| tool.get("uv"))
1812 {
1813 extra_data.insert("tool_uv".to_string(), toml_value_to_json(tool_uv));
1814 }
1815
1816 if extra_data.is_empty() {
1817 None
1818 } else {
1819 Some(extra_data)
1820 }
1821}
1822
1823fn toml_value_to_json(value: &TomlValue) -> JsonValue {
1824 match value {
1825 TomlValue::String(value) => JsonValue::String(value.clone()),
1826 TomlValue::Integer(value) => JsonValue::String(value.to_string()),
1827 TomlValue::Float(value) => JsonValue::String(value.to_string()),
1828 TomlValue::Boolean(value) => JsonValue::Bool(*value),
1829 TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
1830 TomlValue::Array(values) => {
1831 JsonValue::Array(values.iter().map(toml_value_to_json).collect())
1832 }
1833 TomlValue::Table(values) => JsonValue::Object(
1834 values
1835 .iter()
1836 .map(|(key, value)| (key.clone(), toml_value_to_json(value)))
1837 .collect::<JsonMap<String, JsonValue>>(),
1838 ),
1839 }
1840}
1841
1842fn parse_dependency_table(
1843 table: &TomlMap<String, TomlValue>,
1844 is_optional: bool,
1845 scope: Option<&str>,
1846) -> Vec<Dependency> {
1847 table
1848 .iter()
1849 .filter_map(|(name, version)| {
1850 let version_str = version.as_str().map(|s| s.to_string());
1851 let mut package_url =
1852 PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
1853
1854 if let Some(v) = &version_str {
1855 package_url.with_version(v).ok()?;
1856 }
1857
1858 Some(Dependency {
1859 purl: Some(package_url.to_string()),
1860 extracted_requirement: None,
1861 scope: scope.map(|s| s.to_string()),
1862 is_runtime: Some(!is_optional),
1863 is_optional: Some(is_optional),
1864 is_pinned: None,
1865 is_direct: Some(true),
1866 resolved_package: None,
1867 extra_data: None,
1868 })
1869 })
1870 .collect()
1871}
1872
1873fn parse_dependency_array(
1874 array: &[TomlValue],
1875 is_optional: bool,
1876 scope: Option<&str>,
1877) -> Vec<Dependency> {
1878 array
1879 .iter()
1880 .filter_map(|dep| {
1881 let dep_str = dep.as_str()?;
1882
1883 let mut parts = dep_str.split(['>', '=', '<', '~']);
1884 let name = parts.next()?.trim().to_string();
1885
1886 let version = parts.next().map(|v| v.trim().to_string());
1887
1888 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name)
1889 {
1890 Ok(purl) => purl,
1891 Err(_) => return None,
1892 };
1893
1894 if let Some(ref v) = version {
1895 package_url.with_version(v).ok()?;
1896 }
1897
1898 Some(Dependency {
1899 purl: Some(package_url.to_string()),
1900 extracted_requirement: None,
1901 scope: scope.map(|s| s.to_string()),
1902 is_runtime: Some(!is_optional),
1903 is_optional: Some(is_optional),
1904 is_pinned: None,
1905 is_direct: Some(true),
1906 resolved_package: None,
1907 extra_data: None,
1908 })
1909 })
1910 .collect()
1911}
1912
1913#[derive(Debug, Clone)]
1914enum Value {
1915 String(String),
1916 Number(f64),
1917 Bool(bool),
1918 None,
1919 List(Vec<Value>),
1920 Tuple(Vec<Value>),
1921 Dict(HashMap<String, Value>),
1922}
1923
1924struct LiteralEvaluator {
1925 constants: HashMap<String, Value>,
1926 max_depth: usize,
1927 max_nodes: usize,
1928 nodes_visited: usize,
1929}
1930
1931impl LiteralEvaluator {
1932 fn new(constants: HashMap<String, Value>) -> Self {
1933 Self {
1934 constants,
1935 max_depth: MAX_SETUP_PY_AST_DEPTH,
1936 max_nodes: MAX_SETUP_PY_AST_NODES,
1937 nodes_visited: 0,
1938 }
1939 }
1940
1941 fn insert_constant(&mut self, name: String, value: Value) {
1942 self.constants.insert(name, value);
1943 }
1944
1945 fn evaluate_expr(&mut self, expr: &ast::Expr, depth: usize) -> Option<Value> {
1946 if depth >= self.max_depth || self.nodes_visited >= self.max_nodes {
1947 return None;
1948 }
1949 self.nodes_visited += 1;
1950
1951 match expr {
1952 ast::Expr::Constant(ast::ExprConstant { value, .. }) => self.evaluate_constant(value),
1953 ast::Expr::Name(ast::ExprName { id, .. }) => self.constants.get(id.as_str()).cloned(),
1954 ast::Expr::List(ast::ExprList { elts, .. }) => {
1955 let mut values = Vec::new();
1956 for elt in elts {
1957 values.push(self.evaluate_expr(elt, depth + 1)?);
1958 }
1959 Some(Value::List(values))
1960 }
1961 ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
1962 let mut values = Vec::new();
1963 for elt in elts {
1964 values.push(self.evaluate_expr(elt, depth + 1)?);
1965 }
1966 Some(Value::Tuple(values))
1967 }
1968 ast::Expr::Dict(ast::ExprDict { keys, values, .. }) => {
1969 let mut dict = HashMap::new();
1970 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
1971 let key_expr = key_expr.as_ref()?;
1972 let key_value = self.evaluate_expr(key_expr, depth + 1)?;
1973 let key = value_to_string(&key_value)?;
1974 let value = self.evaluate_expr(value_expr, depth + 1)?;
1975 dict.insert(key, value);
1976 }
1977 Some(Value::Dict(dict))
1978 }
1979 ast::Expr::Call(ast::ExprCall {
1980 func,
1981 args,
1982 keywords,
1983 ..
1984 }) => {
1985 if keywords.is_empty()
1986 && let Some(name) = dotted_name(func.as_ref(), depth + 1)
1987 && matches!(name.as_str(), "OrderedDict" | "collections.OrderedDict")
1988 {
1989 return self.evaluate_ordered_dict(args, depth + 1);
1990 }
1991
1992 if !args.is_empty() {
1993 return None;
1994 }
1995
1996 if let ast::Expr::Name(ast::ExprName { id, .. }) = func.as_ref()
1997 && id == "dict"
1998 {
1999 let mut dict = HashMap::new();
2000 for keyword in keywords {
2001 let key = keyword.arg.as_ref().map(|name| name.as_str())?;
2002 let value = self.evaluate_expr(&keyword.value, depth + 1)?;
2003 dict.insert(key.to_string(), value);
2004 }
2005 return Some(Value::Dict(dict));
2006 }
2007
2008 None
2009 }
2010 _ => None,
2011 }
2012 }
2013
2014 fn evaluate_constant(&self, constant: &ast::Constant) -> Option<Value> {
2015 match constant {
2016 ast::Constant::Str(value) => Some(Value::String(value.clone())),
2017 ast::Constant::Bool(value) => Some(Value::Bool(*value)),
2018 ast::Constant::Int(value) => value.to_string().parse::<f64>().ok().map(Value::Number),
2019 ast::Constant::Float(value) => Some(Value::Number(*value)),
2020 ast::Constant::None => Some(Value::None),
2021 _ => None,
2022 }
2023 }
2024
2025 fn evaluate_ordered_dict(&mut self, args: &[ast::Expr], depth: usize) -> Option<Value> {
2026 if args.len() != 1 {
2027 return None;
2028 }
2029
2030 let items = match self.evaluate_expr(&args[0], depth)? {
2031 Value::List(items) | Value::Tuple(items) => items,
2032 _ => return None,
2033 };
2034
2035 let mut dict = HashMap::new();
2036 for item in items {
2037 let Value::Tuple(values) = item else {
2038 return None;
2039 };
2040 if values.len() != 2 {
2041 return None;
2042 }
2043 let key = value_to_string(&values[0])?;
2044 dict.insert(key, values[1].clone());
2045 }
2046
2047 Some(Value::Dict(dict))
2048 }
2049}
2050
2051#[derive(Default)]
2052struct SetupAliases {
2053 setup_names: HashSet<String>,
2054 module_aliases: HashMap<String, String>,
2055}
2056
2057fn extract_from_setup_py(path: &Path) -> PackageData {
2058 let content = match read_file_to_string(path) {
2059 Ok(content) => content,
2060 Err(e) => {
2061 warn!("Failed to read setup.py at {:?}: {}", path, e);
2062 return default_package_data();
2063 }
2064 };
2065
2066 if content.len() > MAX_SETUP_PY_BYTES {
2067 warn!("setup.py too large at {:?}: {} bytes", path, content.len());
2068 return extract_from_setup_py_regex(&content);
2069 }
2070
2071 let mut package_data = match extract_from_setup_py_ast(&content) {
2072 Ok(Some(data)) => data,
2073 Ok(None) => extract_from_setup_py_regex(&content),
2074 Err(e) => {
2075 warn!("Failed to parse setup.py AST at {:?}: {}", path, e);
2076 extract_from_setup_py_regex(&content)
2077 }
2078 };
2079
2080 if package_data.name.is_none() {
2081 package_data.name = extract_setup_value(&content, "name");
2082 }
2083
2084 if package_data.version.is_none() {
2085 package_data.version = extract_setup_value(&content, "version");
2086 }
2087
2088 fill_from_sibling_dunder_metadata(path, &content, &mut package_data);
2089
2090 if package_data.purl.is_none() {
2091 package_data.purl = build_setup_py_purl(
2092 package_data.name.as_deref(),
2093 package_data.version.as_deref(),
2094 );
2095 }
2096
2097 package_data
2098}
2099
2100fn fill_from_sibling_dunder_metadata(path: &Path, content: &str, package_data: &mut PackageData) {
2101 if package_data.version.is_some()
2102 && package_data.extracted_license_statement.is_some()
2103 && package_data
2104 .parties
2105 .iter()
2106 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some())
2107 {
2108 return;
2109 }
2110
2111 let Some(root) = path.parent() else {
2112 return;
2113 };
2114
2115 let dunder_metadata = collect_sibling_dunder_metadata(root, content);
2116
2117 if package_data.version.is_none() {
2118 package_data.version = dunder_metadata.version;
2119 }
2120
2121 if package_data.extracted_license_statement.is_none() {
2122 package_data.extracted_license_statement = dunder_metadata.license;
2123 }
2124
2125 let has_author = package_data
2126 .parties
2127 .iter()
2128 .any(|party| party.role.as_deref() == Some("author") && party.name.is_some());
2129
2130 if !has_author && let Some(author) = dunder_metadata.author {
2131 package_data.parties.push(Party {
2132 r#type: Some("person".to_string()),
2133 role: Some("author".to_string()),
2134 name: Some(author),
2135 email: None,
2136 url: None,
2137 organization: None,
2138 organization_url: None,
2139 timezone: None,
2140 });
2141 }
2142}
2143
2144#[derive(Default)]
2145struct DunderMetadata {
2146 version: Option<String>,
2147 author: Option<String>,
2148 license: Option<String>,
2149}
2150
2151fn collect_sibling_dunder_metadata(root: &Path, content: &str) -> DunderMetadata {
2152 let statements = match ast::Suite::parse(content, "<setup.py>") {
2153 Ok(statements) => statements,
2154 Err(_) => return DunderMetadata::default(),
2155 };
2156
2157 let version_re = Regex::new(r#"(?m)^\s*__version__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2158 let author_re = Regex::new(r#"(?m)^\s*__author__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2159 let license_re = Regex::new(r#"(?m)^\s*__license__\s*=\s*['\"]([^'\"]+)['\"]"#).ok();
2160 let mut metadata = DunderMetadata::default();
2161
2162 for module in imported_dunder_modules(&statements) {
2163 let Some(path) = resolve_imported_module_path(root, &module) else {
2164 continue;
2165 };
2166 let Ok(module_content) = read_file_to_string(&path) else {
2167 continue;
2168 };
2169
2170 if metadata.version.is_none() {
2171 metadata.version = version_re
2172 .as_ref()
2173 .and_then(|regex| regex.captures(&module_content))
2174 .and_then(|captures| captures.get(1))
2175 .map(|match_| match_.as_str().to_string());
2176 }
2177
2178 if metadata.author.is_none() {
2179 metadata.author = author_re
2180 .as_ref()
2181 .and_then(|regex| regex.captures(&module_content))
2182 .and_then(|captures| captures.get(1))
2183 .map(|match_| match_.as_str().to_string());
2184 }
2185
2186 if metadata.license.is_none() {
2187 metadata.license = license_re
2188 .as_ref()
2189 .and_then(|regex| regex.captures(&module_content))
2190 .and_then(|captures| captures.get(1))
2191 .map(|match_| match_.as_str().to_string());
2192 }
2193
2194 if metadata.version.is_some() && metadata.author.is_some() && metadata.license.is_some() {
2195 return metadata;
2196 }
2197 }
2198
2199 metadata
2200}
2201
2202fn imported_dunder_modules(statements: &[ast::Stmt]) -> Vec<String> {
2203 let mut modules = Vec::new();
2204
2205 for statement in statements {
2206 let ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) = statement else {
2207 continue;
2208 };
2209 let Some(module) = module.as_ref().map(|name| name.as_str()) else {
2210 continue;
2211 };
2212 let imports_dunder = names.iter().any(|alias| {
2213 matches!(
2214 alias.name.as_str(),
2215 "__version__" | "__author__" | "__license__"
2216 )
2217 });
2218 if imports_dunder {
2219 modules.push(module.to_string());
2220 }
2221 }
2222
2223 modules
2224}
2225
2226fn resolve_imported_module_path(root: &Path, module: &str) -> Option<PathBuf> {
2227 let relative = PathBuf::from_iter(module.split('.'));
2228 let candidates = [
2229 root.join(relative.with_extension("py")),
2230 root.join(&relative).join("__init__.py"),
2231 root.join("src").join(relative.with_extension("py")),
2232 root.join("src").join(relative).join("__init__.py"),
2233 ];
2234
2235 candidates.into_iter().find(|candidate| candidate.exists())
2236}
2237
2238fn extract_from_setup_py_ast(content: &str) -> Result<Option<PackageData>, String> {
2254 let statements = ast::Suite::parse(content, "<setup.py>").map_err(|e| format!("{}", e))?;
2255 let aliases = collect_setup_aliases(&statements);
2256 let mut evaluator = LiteralEvaluator::new(HashMap::new());
2257 build_setup_py_constants(&statements, &mut evaluator);
2258
2259 let setup_call = find_setup_call(&statements, &aliases);
2260 let Some(call_expr) = setup_call else {
2261 return Ok(None);
2262 };
2263
2264 let setup_values = extract_setup_keywords(call_expr, &mut evaluator);
2265 Ok(Some(build_setup_py_package_data(&setup_values)))
2266}
2267
2268fn build_setup_py_constants(statements: &[ast::Stmt], evaluator: &mut LiteralEvaluator) {
2269 for stmt in statements {
2270 if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = stmt {
2271 if targets.len() != 1 {
2272 continue;
2273 }
2274
2275 let Some(name) = extract_assign_name(&targets[0]) else {
2276 continue;
2277 };
2278
2279 if let Some(value) = evaluator.evaluate_expr(value.as_ref(), 0) {
2280 evaluator.insert_constant(name, value);
2281 }
2282 }
2283 }
2284}
2285
2286fn extract_assign_name(target: &ast::Expr) -> Option<String> {
2287 match target {
2288 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
2289 _ => None,
2290 }
2291}
2292
2293fn collect_setup_aliases(statements: &[ast::Stmt]) -> SetupAliases {
2294 let mut aliases = SetupAliases::default();
2295 aliases.setup_names.insert("setup".to_string());
2296
2297 for stmt in statements {
2298 match stmt {
2299 ast::Stmt::Import(ast::StmtImport { names, .. }) => {
2300 for alias in names {
2301 let module_name = alias.name.as_str();
2302 if !is_setup_module(module_name) {
2303 continue;
2304 }
2305 let alias_name = alias
2306 .asname
2307 .as_ref()
2308 .map(|name| name.as_str())
2309 .unwrap_or(module_name);
2310 aliases
2311 .module_aliases
2312 .insert(alias_name.to_string(), module_name.to_string());
2313 }
2314 }
2315 ast::Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
2316 let Some(module_name) = module.as_ref().map(|name| name.as_str()) else {
2317 continue;
2318 };
2319 if !is_setup_module(module_name) {
2320 continue;
2321 }
2322 for alias in names {
2323 if alias.name.as_str() != "setup" {
2324 continue;
2325 }
2326 let alias_name = alias
2327 .asname
2328 .as_ref()
2329 .map(|name| name.as_str())
2330 .unwrap_or("setup");
2331 aliases.setup_names.insert(alias_name.to_string());
2332 }
2333 }
2334 _ => {}
2335 }
2336 }
2337
2338 aliases
2339}
2340
2341fn is_setup_module(module_name: &str) -> bool {
2342 matches!(module_name, "setuptools" | "distutils" | "distutils.core")
2343}
2344
2345fn find_setup_call<'a>(
2346 statements: &'a [ast::Stmt],
2347 aliases: &'a SetupAliases,
2348) -> Option<&'a ast::Expr> {
2349 let mut finder = SetupCallFinder {
2350 aliases,
2351 nodes_visited: 0,
2352 };
2353 finder.find_in_statements(statements)
2354}
2355
2356struct SetupCallFinder<'a> {
2357 aliases: &'a SetupAliases,
2358 nodes_visited: usize,
2359}
2360
2361impl<'a> SetupCallFinder<'a> {
2362 fn find_in_statements(&mut self, statements: &'a [ast::Stmt]) -> Option<&'a ast::Expr> {
2363 for stmt in statements {
2364 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2365 return None;
2366 }
2367 self.nodes_visited += 1;
2368
2369 let found = match stmt {
2370 ast::Stmt::Expr(ast::StmtExpr { value, .. }) => self.visit_expr(value.as_ref()),
2371 ast::Stmt::Assign(ast::StmtAssign { value, .. }) => self.visit_expr(value.as_ref()),
2372 ast::Stmt::If(ast::StmtIf { body, orelse, .. }) => self
2373 .find_in_statements(body)
2374 .or_else(|| self.find_in_statements(orelse)),
2375 ast::Stmt::For(ast::StmtFor { body, orelse, .. })
2376 | ast::Stmt::While(ast::StmtWhile { body, orelse, .. }) => self
2377 .find_in_statements(body)
2378 .or_else(|| self.find_in_statements(orelse)),
2379 ast::Stmt::With(ast::StmtWith { body, .. }) => self.find_in_statements(body),
2380 ast::Stmt::Try(ast::StmtTry {
2381 body,
2382 orelse,
2383 finalbody,
2384 handlers,
2385 ..
2386 })
2387 | ast::Stmt::TryStar(ast::StmtTryStar {
2388 body,
2389 orelse,
2390 finalbody,
2391 handlers,
2392 ..
2393 }) => self
2394 .find_in_statements(body)
2395 .or_else(|| self.find_in_statements(orelse))
2396 .or_else(|| self.find_in_statements(finalbody))
2397 .or_else(|| {
2398 for handler in handlers {
2399 let ast::ExceptHandler::ExceptHandler(
2400 ast::ExceptHandlerExceptHandler { body, .. },
2401 ) = handler;
2402 if let Some(found) = self.find_in_statements(body) {
2403 return Some(found);
2404 }
2405 }
2406 None
2407 }),
2408 _ => None,
2409 };
2410
2411 if found.is_some() {
2412 return found;
2413 }
2414 }
2415
2416 None
2417 }
2418
2419 fn visit_expr(&mut self, expr: &'a ast::Expr) -> Option<&'a ast::Expr> {
2420 if self.nodes_visited >= MAX_SETUP_PY_AST_NODES {
2421 return None;
2422 }
2423 self.nodes_visited += 1;
2424
2425 match expr {
2426 ast::Expr::Call(ast::ExprCall { func, .. })
2427 if is_setup_call(func.as_ref(), self.aliases) =>
2428 {
2429 Some(expr)
2430 }
2431 _ => None,
2432 }
2433 }
2434}
2435
2436fn is_setup_call(func: &ast::Expr, aliases: &SetupAliases) -> bool {
2437 let Some(dotted) = dotted_name(func, 0) else {
2438 return false;
2439 };
2440
2441 if aliases.setup_names.contains(&dotted) {
2442 return true;
2443 }
2444
2445 let Some(module) = dotted.strip_suffix(".setup") else {
2446 return false;
2447 };
2448
2449 let resolved = resolve_module_alias(module, aliases);
2450 is_setup_module(&resolved)
2451}
2452
2453fn dotted_name(expr: &ast::Expr, depth: usize) -> Option<String> {
2454 if depth >= MAX_SETUP_PY_AST_DEPTH {
2455 return None;
2456 }
2457
2458 match expr {
2459 ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str().to_string()),
2460 ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
2461 let base = dotted_name(value.as_ref(), depth + 1)?;
2462 Some(format!("{}.{}", base, attr.as_str()))
2463 }
2464 _ => None,
2465 }
2466}
2467
2468fn resolve_module_alias(module: &str, aliases: &SetupAliases) -> String {
2469 if let Some(mapped) = aliases.module_aliases.get(module) {
2470 return mapped.clone();
2471 }
2472
2473 let Some((base, rest)) = module.split_once('.') else {
2474 return module.to_string();
2475 };
2476
2477 if let Some(mapped) = aliases.module_aliases.get(base) {
2478 return format!("{}.{}", mapped, rest);
2479 }
2480
2481 module.to_string()
2482}
2483
2484fn extract_setup_keywords(
2485 call_expr: &ast::Expr,
2486 evaluator: &mut LiteralEvaluator,
2487) -> HashMap<String, Value> {
2488 let mut values = HashMap::new();
2489 let ast::Expr::Call(ast::ExprCall { keywords, .. }) = call_expr else {
2490 return values;
2491 };
2492
2493 for keyword in keywords {
2494 if let Some(arg) = keyword.arg.as_ref().map(|name| name.as_str()) {
2495 if let Some(value) = evaluator.evaluate_expr(&keyword.value, 0) {
2496 values.insert(arg.to_string(), value);
2497 }
2498 } else if let Some(Value::Dict(dict)) = evaluator.evaluate_expr(&keyword.value, 0) {
2499 for (key, value) in dict {
2500 values.insert(key, value);
2501 }
2502 }
2503 }
2504
2505 values
2506}
2507
2508fn build_setup_py_package_data(values: &HashMap<String, Value>) -> PackageData {
2509 let name = get_value_string(values, "name");
2510 let version = get_value_string(values, "version");
2511 let description =
2512 get_value_string(values, "description").or_else(|| get_value_string(values, "summary"));
2513 let homepage_url =
2514 get_value_string(values, "url").or_else(|| get_value_string(values, "home_page"));
2515 let author = get_value_string(values, "author");
2516 let author_email = get_value_string(values, "author_email");
2517 let maintainer = get_value_string(values, "maintainer");
2518 let maintainer_email = get_value_string(values, "maintainer_email");
2519 let license = get_value_string(values, "license");
2520 let classifiers = values
2521 .get("classifiers")
2522 .and_then(value_to_string_list)
2523 .unwrap_or_default();
2524
2525 let mut parties = Vec::new();
2526 if author.is_some() || author_email.is_some() {
2527 parties.push(Party {
2528 r#type: Some("person".to_string()),
2529 role: Some("author".to_string()),
2530 name: author,
2531 email: author_email,
2532 url: None,
2533 organization: None,
2534 organization_url: None,
2535 timezone: None,
2536 });
2537 }
2538
2539 if maintainer.is_some() || maintainer_email.is_some() {
2540 parties.push(Party {
2541 r#type: Some("person".to_string()),
2542 role: Some("maintainer".to_string()),
2543 name: maintainer,
2544 email: maintainer_email,
2545 url: None,
2546 organization: None,
2547 organization_url: None,
2548 timezone: None,
2549 });
2550 }
2551
2552 let declared_license_expression = None;
2554 let declared_license_expression_spdx = None;
2555 let license_detections = Vec::new();
2556 let extracted_license_statement = license.clone();
2557
2558 let dependencies = build_setup_py_dependencies(values);
2559 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
2560 let mut homepage_from_project_urls = None;
2561 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
2562 let mut extra_data = HashMap::new();
2563
2564 if let Some(parsed_project_urls) = values.get("project_urls").and_then(value_to_string_pairs) {
2565 apply_project_url_mappings(
2566 &parsed_project_urls,
2567 &mut homepage_from_project_urls,
2568 &mut bug_tracking_url,
2569 &mut code_view_url,
2570 &mut vcs_url,
2571 &mut extra_data,
2572 );
2573 }
2574
2575 let extra_data = if extra_data.is_empty() {
2576 None
2577 } else {
2578 Some(extra_data)
2579 };
2580
2581 PackageData {
2582 package_type: Some(PythonParser::PACKAGE_TYPE),
2583 namespace: None,
2584 name,
2585 version,
2586 qualifiers: None,
2587 subpath: None,
2588 primary_language: None,
2589 description,
2590 release_date: None,
2591 parties,
2592 keywords: Vec::new(),
2593 homepage_url: homepage_url.or(homepage_from_project_urls),
2594 download_url: None,
2595 size: None,
2596 sha1: None,
2597 md5: None,
2598 sha256: None,
2599 sha512: None,
2600 bug_tracking_url,
2601 code_view_url,
2602 vcs_url,
2603 copyright: None,
2604 holder: None,
2605 declared_license_expression,
2606 declared_license_expression_spdx,
2607 license_detections,
2608 other_license_expression: None,
2609 other_license_expression_spdx: None,
2610 other_license_detections: Vec::new(),
2611 extracted_license_statement,
2612 notice_text: None,
2613 source_packages: Vec::new(),
2614 file_references: Vec::new(),
2615 is_private: has_private_classifier(&classifiers),
2616 is_virtual: false,
2617 extra_data,
2618 dependencies,
2619 repository_homepage_url: None,
2620 repository_download_url: None,
2621 api_data_url: None,
2622 datasource_id: Some(DatasourceId::PypiSetupPy),
2623 purl,
2624 }
2625}
2626
2627fn build_setup_py_dependencies(values: &HashMap<String, Value>) -> Vec<Dependency> {
2628 let mut dependencies = Vec::new();
2629
2630 if let Some(reqs) = values
2631 .get("install_requires")
2632 .and_then(value_to_string_list)
2633 {
2634 dependencies.extend(build_setup_py_dependency_list(&reqs, "install", false));
2635 }
2636
2637 if let Some(reqs) = values.get("tests_require").and_then(value_to_string_list) {
2638 dependencies.extend(build_setup_py_dependency_list(&reqs, "test", true));
2639 }
2640
2641 if let Some(Value::Dict(extras)) = values.get("extras_require") {
2642 let mut extra_items: Vec<_> = extras.iter().collect();
2643 extra_items.sort_by_key(|(name, _)| *name);
2644 for (extra_name, extra_value) in extra_items {
2645 if let Some(reqs) = value_to_string_list(extra_value) {
2646 dependencies.extend(build_setup_py_dependency_list(
2647 reqs.as_slice(),
2648 extra_name,
2649 true,
2650 ));
2651 }
2652 }
2653 }
2654
2655 dependencies
2656}
2657
2658fn build_setup_py_dependency_list(
2659 reqs: &[String],
2660 scope: &str,
2661 is_optional: bool,
2662) -> Vec<Dependency> {
2663 reqs.iter()
2664 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
2665 .collect()
2666}
2667
2668fn get_value_string(values: &HashMap<String, Value>, key: &str) -> Option<String> {
2669 values.get(key).and_then(value_to_string)
2670}
2671
2672fn value_to_string(value: &Value) -> Option<String> {
2673 match value {
2674 Value::String(value) => Some(value.clone()),
2675 Value::Number(value) => Some(value.to_string()),
2676 Value::Bool(value) => Some(value.to_string()),
2677 _ => None,
2678 }
2679}
2680
2681fn value_to_string_list(value: &Value) -> Option<Vec<String>> {
2682 match value {
2683 Value::String(value) => Some(vec![value.clone()]),
2684 Value::List(values) | Value::Tuple(values) => {
2685 let mut items = Vec::new();
2686 for item in values {
2687 items.push(value_to_string(item)?);
2688 }
2689 Some(items)
2690 }
2691 _ => None,
2692 }
2693}
2694
2695fn value_to_string_pairs(value: &Value) -> Option<Vec<(String, String)>> {
2696 let Value::Dict(dict) = value else {
2697 return None;
2698 };
2699
2700 let mut pairs: Vec<(String, String)> = dict
2701 .iter()
2702 .map(|(key, value)| Some((key.clone(), value_to_string(value)?)))
2703 .collect::<Option<Vec<_>>>()?;
2704 pairs.sort_by(|left, right| left.0.cmp(&right.0));
2705 Some(pairs)
2706}
2707
2708fn extract_rfc822_dependencies(headers: &HashMap<String, Vec<String>>) -> Vec<Dependency> {
2709 let requires_dist = super::rfc822::get_header_all(headers, "requires-dist");
2710 requires_dist
2711 .iter()
2712 .filter_map(|entry| build_rfc822_dependency(entry))
2713 .collect()
2714}
2715
2716fn build_rfc822_dependency(entry: &str) -> Option<Dependency> {
2717 build_python_dependency(entry, "install", false, None)
2718}
2719
2720fn build_python_dependency(
2721 entry: &str,
2722 default_scope: &str,
2723 default_optional: bool,
2724 marker_override: Option<&str>,
2725) -> Option<Dependency> {
2726 let (requirement_part, marker_part) = entry
2727 .split_once(';')
2728 .map(|(req, marker)| (req.trim(), Some(marker.trim())))
2729 .unwrap_or((entry.trim(), None));
2730
2731 let name = extract_setup_cfg_dependency_name(requirement_part)?;
2732 let requirement = normalize_rfc822_requirement(requirement_part);
2733 let (scope, is_optional, marker, marker_data) = parse_rfc822_marker(
2734 marker_part.or(marker_override),
2735 default_scope,
2736 default_optional,
2737 );
2738 let mut purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
2739
2740 let is_pinned = requirement
2741 .as_deref()
2742 .is_some_and(|req| req.starts_with("==") || req.starts_with("==="));
2743 if is_pinned
2744 && let Some(version) = requirement
2745 .as_deref()
2746 .map(|req| req.trim_start_matches('='))
2747 {
2748 purl.with_version(version).ok()?;
2749 }
2750
2751 let mut extra_data = HashMap::new();
2752 extra_data.extend(marker_data);
2753 if let Some(marker) = marker {
2754 extra_data.insert("marker".to_string(), serde_json::Value::String(marker));
2755 }
2756
2757 Some(Dependency {
2758 purl: Some(purl.to_string()),
2759 extracted_requirement: requirement,
2760 scope: Some(scope),
2761 is_runtime: Some(true),
2762 is_optional: Some(is_optional),
2763 is_pinned: Some(is_pinned),
2764 is_direct: Some(true),
2765 resolved_package: None,
2766 extra_data: if extra_data.is_empty() {
2767 None
2768 } else {
2769 Some(extra_data)
2770 },
2771 })
2772}
2773
2774fn normalize_rfc822_requirement(requirement_part: &str) -> Option<String> {
2775 let name = extract_setup_cfg_dependency_name(requirement_part)?;
2776 let trimmed = requirement_part.trim();
2777 let mut remainder = trimmed[name.len()..].trim();
2778
2779 if let Some(stripped) = remainder.strip_prefix('[')
2780 && let Some(end_idx) = stripped.find(']')
2781 {
2782 remainder = stripped[end_idx + 1..].trim();
2783 }
2784
2785 let remainder = remainder
2786 .strip_prefix('(')
2787 .and_then(|value| value.strip_suffix(')'))
2788 .unwrap_or(remainder)
2789 .trim();
2790
2791 if remainder.is_empty() {
2792 return None;
2793 }
2794
2795 let mut specifiers: Vec<String> = remainder
2796 .split(',')
2797 .map(|specifier| specifier.trim().replace(' ', ""))
2798 .filter(|specifier| !specifier.is_empty())
2799 .collect();
2800 specifiers.sort();
2801 Some(specifiers.join(","))
2802}
2803
2804fn parse_rfc822_marker(
2805 marker_part: Option<&str>,
2806 default_scope: &str,
2807 default_optional: bool,
2808) -> (
2809 String,
2810 bool,
2811 Option<String>,
2812 HashMap<String, serde_json::Value>,
2813) {
2814 let Some(marker) = marker_part.filter(|marker| !marker.trim().is_empty()) else {
2815 return (
2816 default_scope.to_string(),
2817 default_optional,
2818 None,
2819 HashMap::new(),
2820 );
2821 };
2822
2823 let extra_re = Regex::new(r#"extra\s*==\s*['\"]([^'\"]+)['\"]"#)
2824 .expect("extra marker regex should compile");
2825 let mut extra_data = HashMap::new();
2826
2827 if let Some(python_version) = extract_marker_field(marker, "python_version") {
2828 extra_data.insert(
2829 "python_version".to_string(),
2830 serde_json::Value::String(python_version),
2831 );
2832 }
2833 if let Some(sys_platform) = extract_marker_field(marker, "sys_platform") {
2834 extra_data.insert(
2835 "sys_platform".to_string(),
2836 serde_json::Value::String(sys_platform),
2837 );
2838 }
2839
2840 if let Some(captures) = extra_re.captures(marker)
2841 && let Some(scope) = captures.get(1)
2842 {
2843 return (
2844 scope.as_str().to_string(),
2845 true,
2846 Some(marker.trim().to_string()),
2847 extra_data,
2848 );
2849 }
2850
2851 (
2852 default_scope.to_string(),
2853 default_optional,
2854 Some(marker.trim().to_string()),
2855 extra_data,
2856 )
2857}
2858
2859fn extract_marker_field(marker: &str, field: &str) -> Option<String> {
2860 let re = Regex::new(&format!(
2861 r#"{}\s*(==|!=|<=|>=|<|>)\s*['\"]([^'\"]+)['\"]"#,
2862 field
2863 ))
2864 .ok()?;
2865 let captures = re.captures(marker)?;
2866 let operator = captures.get(1)?.as_str();
2867 let value = captures.get(2)?.as_str();
2868 Some(format!("{} {}", operator, value))
2869}
2870
2871fn parse_requires_txt(content: &str) -> Vec<Dependency> {
2872 let mut dependencies = Vec::new();
2873 let mut current_scope = "install".to_string();
2874 let mut current_optional = false;
2875 let mut current_marker: Option<String> = None;
2876
2877 for line in content.lines() {
2878 let trimmed = line.trim();
2879 if trimmed.is_empty() || trimmed.starts_with('#') {
2880 continue;
2881 }
2882
2883 if trimmed.starts_with('[') && trimmed.ends_with(']') {
2884 let inner = &trimmed[1..trimmed.len() - 1];
2885 if let Some(rest) = inner.strip_prefix(':') {
2886 current_scope = "install".to_string();
2887 current_optional = false;
2888 current_marker = Some(rest.trim().to_string());
2889 } else if let Some((scope, marker)) = inner.split_once(':') {
2890 current_scope = scope.trim().to_string();
2891 current_optional = true;
2892 current_marker = Some(marker.trim().to_string());
2893 } else {
2894 current_scope = inner.trim().to_string();
2895 current_optional = true;
2896 current_marker = None;
2897 }
2898 continue;
2899 }
2900
2901 if let Some(dependency) = build_python_dependency(
2902 trimmed,
2903 ¤t_scope,
2904 current_optional,
2905 current_marker.as_deref(),
2906 ) {
2907 dependencies.push(dependency);
2908 }
2909 }
2910
2911 dependencies
2912}
2913
2914fn has_private_classifier(classifiers: &[String]) -> bool {
2915 classifiers
2916 .iter()
2917 .any(|classifier| classifier.eq_ignore_ascii_case("Private :: Do Not Upload"))
2918}
2919
2920fn build_setup_py_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
2921 let name = name?;
2922 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), name).ok()?;
2923 if let Some(version) = version {
2924 package_url.with_version(version).ok()?;
2925 }
2926 Some(package_url.to_string())
2927}
2928
2929fn extract_from_setup_py_regex(content: &str) -> PackageData {
2930 let name = extract_setup_value(content, "name");
2931 let version = extract_setup_value(content, "version");
2932 let license_expression = extract_setup_value(content, "license");
2933
2934 let declared_license_expression = None;
2936 let declared_license_expression_spdx = None;
2937 let license_detections = Vec::new();
2938 let extracted_license_statement = license_expression.clone();
2939
2940 let dependencies = extract_setup_py_dependencies(content);
2941 let homepage_url = extract_setup_value(content, "url");
2942 let purl = build_setup_py_purl(name.as_deref(), version.as_deref());
2943
2944 PackageData {
2945 package_type: Some(PythonParser::PACKAGE_TYPE),
2946 namespace: None,
2947 name,
2948 version,
2949 qualifiers: None,
2950 subpath: None,
2951 primary_language: None,
2952 description: None,
2953 release_date: None,
2954 parties: Vec::new(),
2955 keywords: Vec::new(),
2956 homepage_url,
2957 download_url: None,
2958 size: None,
2959 sha1: None,
2960 md5: None,
2961 sha256: None,
2962 sha512: None,
2963 bug_tracking_url: None,
2964 code_view_url: None,
2965 vcs_url: None,
2966 copyright: None,
2967 holder: None,
2968 declared_license_expression,
2969 declared_license_expression_spdx,
2970 license_detections,
2971 other_license_expression: None,
2972 other_license_expression_spdx: None,
2973 other_license_detections: Vec::new(),
2974 extracted_license_statement,
2975 notice_text: None,
2976 source_packages: Vec::new(),
2977 file_references: Vec::new(),
2978 is_private: false,
2979 is_virtual: false,
2980 extra_data: None,
2981 dependencies,
2982 repository_homepage_url: None,
2983 repository_download_url: None,
2984 api_data_url: None,
2985 datasource_id: Some(DatasourceId::PypiSetupPy),
2986 purl,
2987 }
2988}
2989
2990fn package_data_to_resolved(pkg: &PackageData) -> crate::models::ResolvedPackage {
2991 crate::models::ResolvedPackage {
2992 package_type: pkg.package_type.unwrap_or(PackageType::Pypi),
2993 namespace: pkg.namespace.clone().unwrap_or_default(),
2994 name: pkg.name.clone().unwrap_or_default(),
2995 version: pkg.version.clone().unwrap_or_default(),
2996 primary_language: pkg.primary_language.clone(),
2997 download_url: pkg.download_url.clone(),
2998 sha1: pkg.sha1.clone(),
2999 sha256: pkg.sha256.clone(),
3000 sha512: pkg.sha512.clone(),
3001 md5: pkg.md5.clone(),
3002 is_virtual: pkg.is_virtual,
3003 extra_data: None,
3004 dependencies: pkg.dependencies.clone(),
3005 repository_homepage_url: pkg.repository_homepage_url.clone(),
3006 repository_download_url: pkg.repository_download_url.clone(),
3007 api_data_url: pkg.api_data_url.clone(),
3008 datasource_id: pkg.datasource_id,
3009 purl: pkg.purl.clone(),
3010 }
3011}
3012
3013fn extract_from_pypi_json(path: &Path) -> PackageData {
3014 let default = PackageData {
3015 package_type: Some(PythonParser::PACKAGE_TYPE),
3016 datasource_id: Some(DatasourceId::PypiJson),
3017 ..Default::default()
3018 };
3019
3020 let content = match read_file_to_string(path) {
3021 Ok(content) => content,
3022 Err(error) => {
3023 warn!("Failed to read pypi.json at {:?}: {}", path, error);
3024 return default;
3025 }
3026 };
3027
3028 let root: serde_json::Value = match serde_json::from_str(&content) {
3029 Ok(value) => value,
3030 Err(error) => {
3031 warn!("Failed to parse pypi.json at {:?}: {}", path, error);
3032 return default;
3033 }
3034 };
3035
3036 let Some(info) = root.get("info").and_then(|value| value.as_object()) else {
3037 warn!("No info object found in pypi.json at {:?}", path);
3038 return default;
3039 };
3040
3041 let name = info
3042 .get("name")
3043 .and_then(|value| value.as_str())
3044 .map(ToOwned::to_owned);
3045 let version = info
3046 .get("version")
3047 .and_then(|value| value.as_str())
3048 .map(ToOwned::to_owned);
3049 let summary = info
3050 .get("summary")
3051 .and_then(|value| value.as_str())
3052 .map(ToOwned::to_owned);
3053 let description = info
3054 .get("description")
3055 .and_then(|value| value.as_str())
3056 .filter(|value| !value.trim().is_empty())
3057 .map(ToOwned::to_owned)
3058 .or(summary);
3059 let mut homepage_url = info
3060 .get("home_page")
3061 .and_then(|value| value.as_str())
3062 .map(ToOwned::to_owned);
3063 let author = info
3064 .get("author")
3065 .and_then(|value| value.as_str())
3066 .filter(|value| !value.trim().is_empty())
3067 .map(ToOwned::to_owned);
3068 let author_email = info
3069 .get("author_email")
3070 .and_then(|value| value.as_str())
3071 .filter(|value| !value.trim().is_empty())
3072 .map(ToOwned::to_owned);
3073 let license = info
3074 .get("license")
3075 .and_then(|value| value.as_str())
3076 .filter(|value| !value.trim().is_empty())
3077 .map(ToOwned::to_owned);
3078 let keywords = parse_setup_cfg_keywords(
3079 info.get("keywords")
3080 .and_then(|value| value.as_str())
3081 .map(ToOwned::to_owned),
3082 );
3083 let classifiers = info
3084 .get("classifiers")
3085 .and_then(|value| value.as_array())
3086 .map(|values| {
3087 values
3088 .iter()
3089 .filter_map(|value| value.as_str().map(ToOwned::to_owned))
3090 .collect::<Vec<_>>()
3091 })
3092 .unwrap_or_default();
3093
3094 let mut parties = Vec::new();
3095 if author.is_some() || author_email.is_some() {
3096 parties.push(Party {
3097 r#type: Some("person".to_string()),
3098 role: Some("author".to_string()),
3099 name: author,
3100 email: author_email,
3101 url: None,
3102 organization: None,
3103 organization_url: None,
3104 timezone: None,
3105 });
3106 }
3107
3108 let mut bug_tracking_url = None;
3109 let mut code_view_url = None;
3110 let mut vcs_url = None;
3111 let mut extra_data = HashMap::new();
3112
3113 let parsed_project_urls = info
3114 .get("project_urls")
3115 .and_then(|value| value.as_object())
3116 .map(|map| {
3117 let mut pairs: Vec<(String, String)> = map
3118 .iter()
3119 .filter_map(|(key, value)| Some((key.clone(), value.as_str()?.to_string())))
3120 .collect();
3121 pairs.sort_by(|left, right| left.0.cmp(&right.0));
3122 pairs
3123 })
3124 .unwrap_or_default();
3125
3126 apply_project_url_mappings(
3127 &parsed_project_urls,
3128 &mut homepage_url,
3129 &mut bug_tracking_url,
3130 &mut code_view_url,
3131 &mut vcs_url,
3132 &mut extra_data,
3133 );
3134
3135 let (download_url, size, sha256) = root
3136 .get("urls")
3137 .and_then(|value| value.as_array())
3138 .map(|urls| select_pypi_json_artifact(urls))
3139 .unwrap_or((None, None, None));
3140
3141 let (repository_homepage_url, repository_download_url, api_data_url, purl) =
3142 build_pypi_urls(name.as_deref(), version.as_deref());
3143
3144 PackageData {
3145 package_type: Some(PythonParser::PACKAGE_TYPE),
3146 namespace: None,
3147 name,
3148 version,
3149 qualifiers: None,
3150 subpath: None,
3151 primary_language: None,
3152 description,
3153 release_date: None,
3154 parties,
3155 keywords,
3156 homepage_url: homepage_url.or(repository_homepage_url.clone()),
3157 download_url,
3158 size,
3159 sha1: None,
3160 md5: None,
3161 sha256,
3162 sha512: None,
3163 bug_tracking_url,
3164 code_view_url,
3165 vcs_url,
3166 copyright: None,
3167 holder: None,
3168 declared_license_expression: None,
3169 declared_license_expression_spdx: None,
3170 license_detections: Vec::new(),
3171 other_license_expression: None,
3172 other_license_expression_spdx: None,
3173 other_license_detections: Vec::new(),
3174 extracted_license_statement: license,
3175 notice_text: None,
3176 source_packages: Vec::new(),
3177 file_references: Vec::new(),
3178 is_private: has_private_classifier(&classifiers),
3179 is_virtual: false,
3180 extra_data: if extra_data.is_empty() {
3181 None
3182 } else {
3183 Some(extra_data)
3184 },
3185 dependencies: Vec::new(),
3186 repository_homepage_url,
3187 repository_download_url,
3188 api_data_url,
3189 datasource_id: Some(DatasourceId::PypiJson),
3190 purl,
3191 }
3192}
3193
3194fn select_pypi_json_artifact(
3195 urls: &[serde_json::Value],
3196) -> (Option<String>, Option<u64>, Option<String>) {
3197 let selected = urls
3198 .iter()
3199 .find(|entry| entry.get("packagetype").and_then(|value| value.as_str()) == Some("sdist"))
3200 .or_else(|| urls.first());
3201
3202 let Some(entry) = selected else {
3203 return (None, None, None);
3204 };
3205
3206 let download_url = entry
3207 .get("url")
3208 .and_then(|value| value.as_str())
3209 .map(ToOwned::to_owned);
3210 let size = entry.get("size").and_then(|value| value.as_u64());
3211 let sha256 = entry
3212 .get("digests")
3213 .and_then(|value| value.as_object())
3214 .and_then(|digests| digests.get("sha256"))
3215 .and_then(|value| value.as_str())
3216 .map(ToOwned::to_owned);
3217
3218 (download_url, size, sha256)
3219}
3220
3221fn extract_from_pip_inspect(path: &Path) -> PackageData {
3222 let content = match read_file_to_string(path) {
3223 Ok(content) => content,
3224 Err(e) => {
3225 warn!("Failed to read pip-inspect.deplock at {:?}: {}", path, e);
3226 return default_package_data();
3227 }
3228 };
3229
3230 let root: serde_json::Value = match serde_json::from_str(&content) {
3231 Ok(value) => value,
3232 Err(e) => {
3233 warn!(
3234 "Failed to parse pip-inspect.deplock JSON at {:?}: {}",
3235 path, e
3236 );
3237 return default_package_data();
3238 }
3239 };
3240
3241 let installed = match root.get("installed").and_then(|v| v.as_array()) {
3242 Some(arr) => arr,
3243 None => {
3244 warn!(
3245 "No 'installed' array found in pip-inspect.deplock at {:?}",
3246 path
3247 );
3248 return default_package_data();
3249 }
3250 };
3251
3252 let pip_version = root
3253 .get("pip_version")
3254 .and_then(|v| v.as_str())
3255 .map(String::from);
3256 let inspect_version = root
3257 .get("version")
3258 .and_then(|v| v.as_str())
3259 .map(String::from);
3260
3261 let mut main_package: Option<PackageData> = None;
3262 let mut dependencies: Vec<Dependency> = Vec::new();
3263
3264 for package_entry in installed {
3265 let metadata = match package_entry.get("metadata") {
3266 Some(m) => m,
3267 None => continue,
3268 };
3269
3270 let is_requested = package_entry
3271 .get("requested")
3272 .and_then(|v| v.as_bool())
3273 .unwrap_or(false);
3274 let has_direct_url = package_entry.get("direct_url").is_some();
3275
3276 let name = metadata
3277 .get("name")
3278 .and_then(|v| v.as_str())
3279 .map(String::from);
3280 let version = metadata
3281 .get("version")
3282 .and_then(|v| v.as_str())
3283 .map(String::from);
3284 let summary = metadata
3285 .get("summary")
3286 .and_then(|v| v.as_str())
3287 .map(String::from);
3288 let home_page = metadata
3289 .get("home_page")
3290 .and_then(|v| v.as_str())
3291 .map(String::from);
3292 let author = metadata
3293 .get("author")
3294 .and_then(|v| v.as_str())
3295 .map(String::from);
3296 let author_email = metadata
3297 .get("author_email")
3298 .and_then(|v| v.as_str())
3299 .map(String::from);
3300 let license = metadata
3301 .get("license")
3302 .and_then(|v| v.as_str())
3303 .map(String::from);
3304 let description = metadata
3305 .get("description")
3306 .and_then(|v| v.as_str())
3307 .map(String::from);
3308 let keywords = metadata
3309 .get("keywords")
3310 .and_then(|v| v.as_array())
3311 .map(|arr| {
3312 arr.iter()
3313 .filter_map(|k| k.as_str().map(String::from))
3314 .collect::<Vec<_>>()
3315 })
3316 .unwrap_or_default();
3317
3318 let mut parties = Vec::new();
3319 if author.is_some() || author_email.is_some() {
3320 parties.push(Party {
3321 r#type: Some("person".to_string()),
3322 role: Some("author".to_string()),
3323 name: author,
3324 email: author_email,
3325 url: None,
3326 organization: None,
3327 organization_url: None,
3328 timezone: None,
3329 });
3330 }
3331
3332 let license_detections = Vec::new();
3334 let declared_license_expression = None;
3335 let declared_license_expression_spdx = None;
3336 let extracted_license_statement = license.clone();
3337
3338 let purl = name.as_ref().and_then(|n| {
3339 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
3340 if let Some(v) = &version {
3341 package_url.with_version(v).ok()?;
3342 }
3343 Some(package_url.to_string())
3344 });
3345
3346 if is_requested && has_direct_url {
3347 let mut extra_data = HashMap::new();
3348 if let Some(pv) = &pip_version {
3349 extra_data.insert(
3350 "pip_version".to_string(),
3351 serde_json::Value::String(pv.clone()),
3352 );
3353 }
3354 if let Some(iv) = &inspect_version {
3355 extra_data.insert(
3356 "inspect_version".to_string(),
3357 serde_json::Value::String(iv.clone()),
3358 );
3359 }
3360
3361 main_package = Some(PackageData {
3362 package_type: Some(PythonParser::PACKAGE_TYPE),
3363 namespace: None,
3364 name,
3365 version,
3366 qualifiers: None,
3367 subpath: None,
3368 primary_language: Some("Python".to_string()),
3369 description: description.or(summary),
3370 release_date: None,
3371 parties,
3372 keywords,
3373 homepage_url: home_page,
3374 download_url: None,
3375 size: None,
3376 sha1: None,
3377 md5: None,
3378 sha256: None,
3379 sha512: None,
3380 bug_tracking_url: None,
3381 code_view_url: None,
3382 vcs_url: None,
3383 copyright: None,
3384 holder: None,
3385 declared_license_expression,
3386 declared_license_expression_spdx,
3387 license_detections,
3388 other_license_expression: None,
3389 other_license_expression_spdx: None,
3390 other_license_detections: Vec::new(),
3391 extracted_license_statement,
3392 notice_text: None,
3393 source_packages: Vec::new(),
3394 file_references: Vec::new(),
3395 is_private: false,
3396 is_virtual: true,
3397 extra_data: if extra_data.is_empty() {
3398 None
3399 } else {
3400 Some(extra_data)
3401 },
3402 dependencies: Vec::new(),
3403 repository_homepage_url: None,
3404 repository_download_url: None,
3405 api_data_url: None,
3406 datasource_id: Some(DatasourceId::PypiInspectDeplock),
3407 purl,
3408 });
3409 } else {
3410 let resolved_package = PackageData {
3411 package_type: Some(PythonParser::PACKAGE_TYPE),
3412 namespace: None,
3413 name: name.clone(),
3414 version: version.clone(),
3415 qualifiers: None,
3416 subpath: None,
3417 primary_language: Some("Python".to_string()),
3418 description: description.or(summary),
3419 release_date: None,
3420 parties,
3421 keywords,
3422 homepage_url: home_page,
3423 download_url: None,
3424 size: None,
3425 sha1: None,
3426 md5: None,
3427 sha256: None,
3428 sha512: None,
3429 bug_tracking_url: None,
3430 code_view_url: None,
3431 vcs_url: None,
3432 copyright: None,
3433 holder: None,
3434 declared_license_expression,
3435 declared_license_expression_spdx,
3436 license_detections,
3437 other_license_expression: None,
3438 other_license_expression_spdx: None,
3439 other_license_detections: Vec::new(),
3440 extracted_license_statement,
3441 notice_text: None,
3442 source_packages: Vec::new(),
3443 file_references: Vec::new(),
3444 is_private: false,
3445 is_virtual: true,
3446 extra_data: None,
3447 dependencies: Vec::new(),
3448 repository_homepage_url: None,
3449 repository_download_url: None,
3450 api_data_url: None,
3451 datasource_id: Some(DatasourceId::PypiInspectDeplock),
3452 purl: purl.clone(),
3453 };
3454
3455 let resolved = package_data_to_resolved(&resolved_package);
3456 dependencies.push(Dependency {
3457 purl,
3458 extracted_requirement: None,
3459 scope: None,
3460 is_runtime: Some(true),
3461 is_optional: Some(false),
3462 is_pinned: Some(true),
3463 is_direct: Some(is_requested),
3464 resolved_package: Some(Box::new(resolved)),
3465 extra_data: None,
3466 });
3467 }
3468 }
3469
3470 if let Some(mut main_pkg) = main_package {
3471 main_pkg.dependencies = dependencies;
3472 main_pkg
3473 } else {
3474 default_package_data()
3475 }
3476}
3477
3478type IniSections = HashMap<String, HashMap<String, Vec<String>>>;
3479
3480fn extract_from_setup_cfg(path: &Path) -> PackageData {
3481 let content = match read_file_to_string(path) {
3482 Ok(content) => content,
3483 Err(e) => {
3484 warn!("Failed to read setup.cfg at {:?}: {}", path, e);
3485 return default_package_data();
3486 }
3487 };
3488
3489 let sections = parse_setup_cfg(&content);
3490 let name = get_ini_value(§ions, "metadata", "name");
3491 let version = get_ini_value(§ions, "metadata", "version");
3492 let description = get_ini_value(§ions, "metadata", "description");
3493 let author = get_ini_value(§ions, "metadata", "author");
3494 let author_email = get_ini_value(§ions, "metadata", "author_email");
3495 let maintainer = get_ini_value(§ions, "metadata", "maintainer");
3496 let maintainer_email = get_ini_value(§ions, "metadata", "maintainer_email");
3497 let license = get_ini_value(§ions, "metadata", "license");
3498 let mut homepage_url = get_ini_value(§ions, "metadata", "url");
3499 let classifiers = get_ini_values(§ions, "metadata", "classifiers");
3500 let keywords = parse_setup_cfg_keywords(get_ini_value(§ions, "metadata", "keywords"));
3501 let python_requires = get_ini_value(§ions, "options", "python_requires");
3502 let parsed_project_urls =
3503 parse_setup_cfg_project_urls(&get_ini_values(§ions, "metadata", "project_urls"));
3504 let (mut bug_tracking_url, mut code_view_url, mut vcs_url) = (None, None, None);
3505 let mut extra_data = HashMap::new();
3506
3507 let mut parties = Vec::new();
3508 if author.is_some() || author_email.is_some() {
3509 parties.push(Party {
3510 r#type: Some("person".to_string()),
3511 role: Some("author".to_string()),
3512 name: author,
3513 email: author_email,
3514 url: None,
3515 organization: None,
3516 organization_url: None,
3517 timezone: None,
3518 });
3519 }
3520
3521 if maintainer.is_some() || maintainer_email.is_some() {
3522 parties.push(Party {
3523 r#type: Some("person".to_string()),
3524 role: Some("maintainer".to_string()),
3525 name: maintainer,
3526 email: maintainer_email,
3527 url: None,
3528 organization: None,
3529 organization_url: None,
3530 timezone: None,
3531 });
3532 }
3533
3534 let declared_license_expression = None;
3536 let declared_license_expression_spdx = None;
3537 let license_detections = Vec::new();
3538 let extracted_license_statement = license.clone();
3539
3540 let dependencies = extract_setup_cfg_dependencies(§ions);
3541
3542 if let Some(value) = python_requires {
3543 extra_data.insert(
3544 "python_requires".to_string(),
3545 serde_json::Value::String(value),
3546 );
3547 }
3548
3549 apply_project_url_mappings(
3550 &parsed_project_urls,
3551 &mut homepage_url,
3552 &mut bug_tracking_url,
3553 &mut code_view_url,
3554 &mut vcs_url,
3555 &mut extra_data,
3556 );
3557
3558 let extra_data = if extra_data.is_empty() {
3559 None
3560 } else {
3561 Some(extra_data)
3562 };
3563
3564 let purl = name.as_ref().and_then(|n| {
3565 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), n).ok()?;
3566 if let Some(v) = &version {
3567 package_url.with_version(v).ok()?;
3568 }
3569 Some(package_url.to_string())
3570 });
3571
3572 PackageData {
3573 package_type: Some(PythonParser::PACKAGE_TYPE),
3574 namespace: None,
3575 name,
3576 version,
3577 qualifiers: None,
3578 subpath: None,
3579 primary_language: Some("Python".to_string()),
3580 description,
3581 release_date: None,
3582 parties,
3583 keywords,
3584 homepage_url,
3585 download_url: None,
3586 size: None,
3587 sha1: None,
3588 md5: None,
3589 sha256: None,
3590 sha512: None,
3591 bug_tracking_url,
3592 code_view_url,
3593 vcs_url,
3594 copyright: None,
3595 holder: None,
3596 declared_license_expression,
3597 declared_license_expression_spdx,
3598 license_detections,
3599 other_license_expression: None,
3600 other_license_expression_spdx: None,
3601 other_license_detections: Vec::new(),
3602 extracted_license_statement,
3603 notice_text: None,
3604 source_packages: Vec::new(),
3605 file_references: Vec::new(),
3606 is_private: has_private_classifier(&classifiers),
3607 is_virtual: false,
3608 extra_data,
3609 dependencies,
3610 repository_homepage_url: None,
3611 repository_download_url: None,
3612 api_data_url: None,
3613 datasource_id: Some(DatasourceId::PypiSetupCfg),
3614 purl,
3615 }
3616}
3617
3618fn parse_setup_cfg_keywords(value: Option<String>) -> Vec<String> {
3619 let Some(keywords) = value else {
3620 return Vec::new();
3621 };
3622
3623 keywords
3624 .split(',')
3625 .map(str::trim)
3626 .filter(|keyword| !keyword.is_empty())
3627 .map(ToOwned::to_owned)
3628 .collect()
3629}
3630
3631fn parse_setup_cfg_project_urls(entries: &[String]) -> Vec<(String, String)> {
3632 entries
3633 .iter()
3634 .filter_map(|entry| {
3635 let (label, url) = entry.split_once('=')?;
3636 let label = label.trim();
3637 let url = url.trim();
3638 if label.is_empty() || url.is_empty() {
3639 None
3640 } else {
3641 Some((label.to_string(), url.to_string()))
3642 }
3643 })
3644 .collect()
3645}
3646
3647fn apply_project_url_mappings(
3648 parsed_urls: &[(String, String)],
3649 homepage_url: &mut Option<String>,
3650 bug_tracking_url: &mut Option<String>,
3651 code_view_url: &mut Option<String>,
3652 vcs_url: &mut Option<String>,
3653 extra_data: &mut HashMap<String, serde_json::Value>,
3654) {
3655 for (label, url) in parsed_urls {
3656 let label_lower = label.to_lowercase();
3657
3658 if bug_tracking_url.is_none()
3659 && matches!(
3660 label_lower.as_str(),
3661 "tracker"
3662 | "bug reports"
3663 | "bug tracker"
3664 | "issues"
3665 | "issue tracker"
3666 | "github: issues"
3667 )
3668 {
3669 *bug_tracking_url = Some(url.clone());
3670 } else if code_view_url.is_none()
3671 && matches!(label_lower.as_str(), "source" | "source code" | "code")
3672 {
3673 *code_view_url = Some(url.clone());
3674 } else if vcs_url.is_none()
3675 && matches!(
3676 label_lower.as_str(),
3677 "github" | "gitlab" | "github: repo" | "repository"
3678 )
3679 {
3680 *vcs_url = Some(url.clone());
3681 } else if homepage_url.is_none()
3682 && matches!(label_lower.as_str(), "website" | "homepage" | "home")
3683 {
3684 *homepage_url = Some(url.clone());
3685 } else if label_lower == "changelog" {
3686 extra_data.insert(
3687 "changelog_url".to_string(),
3688 serde_json::Value::String(url.clone()),
3689 );
3690 }
3691 }
3692
3693 let project_urls_json: serde_json::Map<String, serde_json::Value> = parsed_urls
3694 .iter()
3695 .map(|(label, url)| (label.clone(), serde_json::Value::String(url.clone())))
3696 .collect();
3697
3698 if !project_urls_json.is_empty() {
3699 extra_data.insert(
3700 "project_urls".to_string(),
3701 serde_json::Value::Object(project_urls_json),
3702 );
3703 }
3704}
3705
3706fn parse_setup_cfg(content: &str) -> IniSections {
3707 let mut sections: IniSections = HashMap::new();
3708 let mut current_section: Option<String> = None;
3709 let mut current_key: Option<String> = None;
3710
3711 for raw_line in content.lines() {
3712 let line = raw_line.trim_end_matches('\r');
3713 let trimmed = line.trim();
3714 if trimmed.is_empty() {
3715 continue;
3716 }
3717
3718 let stripped = line.trim_start();
3719 if stripped.starts_with('#') || stripped.starts_with(';') {
3720 continue;
3721 }
3722
3723 if stripped.starts_with('[') && stripped.ends_with(']') {
3724 let section_name = stripped
3725 .trim_start_matches('[')
3726 .trim_end_matches(']')
3727 .trim()
3728 .to_ascii_lowercase();
3729 current_section = if section_name.is_empty() {
3730 None
3731 } else {
3732 Some(section_name)
3733 };
3734 current_key = None;
3735 continue;
3736 }
3737
3738 if (line.starts_with(' ') || line.starts_with('\t')) && current_key.is_some() {
3739 if let (Some(section), Some(key)) = (current_section.as_ref(), current_key.as_ref()) {
3740 let value = stripped.trim();
3741 if !value.is_empty() {
3742 sections
3743 .entry(section.clone())
3744 .or_default()
3745 .entry(key.clone())
3746 .or_default()
3747 .push(value.to_string());
3748 }
3749 }
3750 continue;
3751 }
3752
3753 if let Some((key, value)) = stripped.split_once('=')
3754 && let Some(section) = current_section.as_ref()
3755 {
3756 let key_name = key.trim().to_ascii_lowercase();
3757 let value_trimmed = value.trim();
3758 let entry = sections
3759 .entry(section.clone())
3760 .or_default()
3761 .entry(key_name.clone())
3762 .or_default();
3763 if !value_trimmed.is_empty() {
3764 entry.push(value_trimmed.to_string());
3765 }
3766 current_key = Some(key_name);
3767 }
3768 }
3769
3770 sections
3771}
3772
3773fn get_ini_value(sections: &IniSections, section: &str, key: &str) -> Option<String> {
3774 sections
3775 .get(§ion.to_ascii_lowercase())
3776 .and_then(|values| values.get(&key.to_ascii_lowercase()))
3777 .and_then(|entries| entries.first())
3778 .map(|value| value.trim().to_string())
3779}
3780
3781fn get_ini_values(sections: &IniSections, section: &str, key: &str) -> Vec<String> {
3782 sections
3783 .get(§ion.to_ascii_lowercase())
3784 .and_then(|values| values.get(&key.to_ascii_lowercase()))
3785 .cloned()
3786 .unwrap_or_default()
3787}
3788
3789fn extract_setup_cfg_dependencies(sections: &IniSections) -> Vec<Dependency> {
3790 let mut dependencies = Vec::new();
3791
3792 for (sub_section, scope) in [
3793 ("install_requires", "install"),
3794 ("tests_require", "test"),
3795 ("setup_requires", "setup"),
3796 ] {
3797 let reqs = get_ini_values(sections, "options", sub_section);
3798 dependencies.extend(parse_setup_cfg_requirements(&reqs, scope, false));
3799 }
3800
3801 if let Some(extras) = sections.get("options.extras_require") {
3802 let mut extra_items: Vec<_> = extras.iter().collect();
3803 extra_items.sort_by_key(|(name, _)| *name);
3804 for (extra_name, reqs) in extra_items {
3805 dependencies.extend(parse_setup_cfg_requirements(reqs, extra_name, true));
3806 }
3807 }
3808
3809 dependencies
3810}
3811
3812fn parse_setup_cfg_requirements(
3813 reqs: &[String],
3814 scope: &str,
3815 is_optional: bool,
3816) -> Vec<Dependency> {
3817 reqs.iter()
3818 .filter_map(|req| build_setup_cfg_dependency(req, scope, is_optional))
3819 .collect()
3820}
3821
3822fn build_setup_cfg_dependency(req: &str, scope: &str, is_optional: bool) -> Option<Dependency> {
3823 let trimmed = req.trim();
3824 if trimmed.is_empty() || trimmed.starts_with('#') {
3825 return None;
3826 }
3827
3828 let name = extract_setup_cfg_dependency_name(trimmed)?;
3829 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
3830
3831 Some(Dependency {
3832 purl: Some(purl.to_string()),
3833 extracted_requirement: Some(normalize_setup_cfg_requirement(trimmed)),
3834 scope: Some(scope.to_string()),
3835 is_runtime: Some(true),
3836 is_optional: Some(is_optional),
3837 is_pinned: Some(false),
3838 is_direct: Some(true),
3839 resolved_package: None,
3840 extra_data: None,
3841 })
3842}
3843
3844fn extract_setup_cfg_dependency_name(req: &str) -> Option<String> {
3845 let trimmed = req.trim();
3846 if trimmed.is_empty() {
3847 return None;
3848 }
3849
3850 let end = trimmed
3851 .find(|c: char| c.is_whitespace() || matches!(c, '<' | '>' | '=' | '!' | '~' | ';' | '['))
3852 .unwrap_or(trimmed.len());
3853 let name = trimmed[..end].trim();
3854 if name.is_empty() {
3855 None
3856 } else {
3857 Some(name.to_string())
3858 }
3859}
3860
3861fn normalize_setup_cfg_requirement(req: &str) -> String {
3862 req.chars().filter(|c| !c.is_whitespace()).collect()
3863}
3864
3865fn extract_setup_value(content: &str, key: &str) -> Option<String> {
3866 let patterns = vec![
3867 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
3876
3877 for pattern in patterns {
3878 if let Some(start_idx) = content.find(&pattern) {
3879 let value_start = start_idx + pattern.len();
3880 let remaining = &content[value_start..];
3881
3882 if let Some(end_idx) = remaining.find(['"', '\'']) {
3883 return Some(remaining[..end_idx].to_string());
3884 }
3885 }
3886 }
3887
3888 None
3889}
3890
3891fn extract_setup_py_dependencies(content: &str) -> Vec<Dependency> {
3892 let mut dependencies = Vec::new();
3893
3894 if let Some(tests_deps) = extract_tests_require(content) {
3895 dependencies.extend(tests_deps);
3896 }
3897
3898 if let Some(extras_deps) = extract_extras_require(content) {
3899 dependencies.extend(extras_deps);
3900 }
3901
3902 dependencies
3903}
3904
3905fn extract_tests_require(content: &str) -> Option<Vec<Dependency>> {
3906 let pattern = r"tests_require\s*=\s*\[([^\]]+)\]";
3907 let re = Regex::new(pattern).ok()?;
3908 let captures = re.captures(content)?;
3909 let deps_str = captures.get(1)?.as_str();
3910
3911 let deps = parse_setup_py_dep_list(deps_str, "test", true);
3912 if deps.is_empty() { None } else { Some(deps) }
3913}
3914
3915fn extract_extras_require(content: &str) -> Option<Vec<Dependency>> {
3916 let pattern = r"extras_require\s*=\s*\{([^}]+)\}";
3917 let re = Regex::new(pattern).ok()?;
3918 let captures = re.captures(content)?;
3919 let dict_content = captures.get(1)?.as_str();
3920
3921 let mut all_deps = Vec::new();
3922
3923 let entry_pattern = r#"['"]([^'"]+)['"]\s*:\s*\[([^\]]+)\]"#;
3924 let entry_re = Regex::new(entry_pattern).ok()?;
3925
3926 for entry_cap in entry_re.captures_iter(dict_content) {
3927 if let (Some(extra_name), Some(deps_str)) = (entry_cap.get(1), entry_cap.get(2)) {
3928 let deps = parse_setup_py_dep_list(deps_str.as_str(), extra_name.as_str(), true);
3929 all_deps.extend(deps);
3930 }
3931 }
3932
3933 if all_deps.is_empty() {
3934 None
3935 } else {
3936 Some(all_deps)
3937 }
3938}
3939
3940fn parse_setup_py_dep_list(deps_str: &str, scope: &str, is_optional: bool) -> Vec<Dependency> {
3941 let dep_pattern = r#"['"]([^'"]+)['"]"#;
3942 let re = match Regex::new(dep_pattern) {
3943 Ok(r) => r,
3944 Err(_) => return Vec::new(),
3945 };
3946
3947 re.captures_iter(deps_str)
3948 .filter_map(|cap| {
3949 let dep_str = cap.get(1)?.as_str().trim();
3950 if dep_str.is_empty() {
3951 return None;
3952 }
3953
3954 let name = extract_setup_cfg_dependency_name(dep_str)?;
3955 let purl = PackageUrl::new(PythonParser::PACKAGE_TYPE.as_str(), &name).ok()?;
3956
3957 Some(Dependency {
3958 purl: Some(purl.to_string()),
3959 extracted_requirement: Some(dep_str.to_string()),
3960 scope: Some(scope.to_string()),
3961 is_runtime: Some(true),
3962 is_optional: Some(is_optional),
3963 is_pinned: Some(false),
3964 is_direct: Some(true),
3965 resolved_package: None,
3966 extra_data: None,
3967 })
3968 })
3969 .collect()
3970}
3971
3972pub(crate) fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
3974 let content = read_file_to_string(path).map_err(|e| e.to_string())?;
3975 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
3976}
3977
3978fn calculate_file_checksums(path: &Path) -> (Option<u64>, Option<String>) {
3989 let mut file = match File::open(path) {
3990 Ok(f) => f,
3991 Err(_) => return (None, None),
3992 };
3993
3994 let metadata = match file.metadata() {
3995 Ok(m) => m,
3996 Err(_) => return (None, None),
3997 };
3998 let size = metadata.len();
3999
4000 let mut hasher = Sha256::new();
4001 let mut buffer = vec![0; 8192];
4002
4003 loop {
4004 match file.read(&mut buffer) {
4005 Ok(0) => break,
4006 Ok(n) => hasher.update(&buffer[..n]),
4007 Err(_) => return (Some(size), None),
4008 }
4009 }
4010
4011 let hash = format!("{:x}", hasher.finalize());
4012 (Some(size), Some(hash))
4013}
4014
4015fn default_package_data() -> PackageData {
4016 PackageData::default()
4017}
4018
4019crate::register_parser!(
4020 "Python package manifests (pyproject.toml, setup.py, setup.cfg, pypi.json, PKG-INFO, METADATA, pip cache origin.json, .whl, .egg)",
4021 &[
4022 "**/pyproject.toml",
4023 "**/setup.py",
4024 "**/setup.cfg",
4025 "**/pypi.json",
4026 "**/PKG-INFO",
4027 "**/METADATA",
4028 "**/origin.json",
4029 "**/*.whl",
4030 "**/*.egg"
4031 ],
4032 "pypi",
4033 "Python",
4034 Some("https://packaging.python.org/"),
4035);