1use std::collections::HashMap;
34use std::path::Path;
35
36use log::warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41 DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
42};
43use crate::parsers::rfc822::{self, Rfc822Metadata};
44use crate::parsers::utils::{read_file_to_string, split_name_email};
45use crate::utils::spdx::combine_license_expressions;
46
47use super::PackageParser;
48use super::license_normalization::{
49 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
50 normalize_declared_license_key,
51};
52
53const PACKAGE_TYPE: PackageType = PackageType::Deb;
54
55fn default_package_data(datasource_id: DatasourceId) -> PackageData {
56 PackageData {
57 package_type: Some(PACKAGE_TYPE),
58 datasource_id: Some(datasource_id),
59 ..Default::default()
60 }
61}
62
63const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
65const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
66
67const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
69 "packages.debian.org",
70 "lists.debian.org",
71 "lists.alioth.debian.org",
72 "@debian.org",
73 "debian-init-diversity@",
74];
75const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
76
77struct DepFieldSpec {
79 field: &'static str,
80 scope: &'static str,
81 is_runtime: bool,
82 is_optional: bool,
83}
84
85const DEP_FIELDS: &[DepFieldSpec] = &[
86 DepFieldSpec {
87 field: "depends",
88 scope: "depends",
89 is_runtime: true,
90 is_optional: false,
91 },
92 DepFieldSpec {
93 field: "pre-depends",
94 scope: "pre-depends",
95 is_runtime: true,
96 is_optional: false,
97 },
98 DepFieldSpec {
99 field: "recommends",
100 scope: "recommends",
101 is_runtime: true,
102 is_optional: true,
103 },
104 DepFieldSpec {
105 field: "suggests",
106 scope: "suggests",
107 is_runtime: true,
108 is_optional: true,
109 },
110 DepFieldSpec {
111 field: "breaks",
112 scope: "breaks",
113 is_runtime: false,
114 is_optional: false,
115 },
116 DepFieldSpec {
117 field: "conflicts",
118 scope: "conflicts",
119 is_runtime: false,
120 is_optional: false,
121 },
122 DepFieldSpec {
123 field: "replaces",
124 scope: "replaces",
125 is_runtime: false,
126 is_optional: false,
127 },
128 DepFieldSpec {
129 field: "provides",
130 scope: "provides",
131 is_runtime: false,
132 is_optional: false,
133 },
134 DepFieldSpec {
135 field: "build-depends",
136 scope: "build-depends",
137 is_runtime: false,
138 is_optional: false,
139 },
140 DepFieldSpec {
141 field: "build-depends-indep",
142 scope: "build-depends-indep",
143 is_runtime: false,
144 is_optional: false,
145 },
146 DepFieldSpec {
147 field: "build-conflicts",
148 scope: "build-conflicts",
149 is_runtime: false,
150 is_optional: false,
151 },
152];
153
154pub struct DebianControlParser;
159
160impl PackageParser for DebianControlParser {
161 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
162
163 fn is_match(path: &Path) -> bool {
164 if let Some(name) = path.file_name()
165 && name == "control"
166 && let Some(parent) = path.parent()
167 && let Some(parent_name) = parent.file_name()
168 {
169 return parent_name == "debian";
170 }
171 false
172 }
173
174 fn extract_packages(path: &Path) -> Vec<PackageData> {
175 let content = match read_file_to_string(path) {
176 Ok(c) => c,
177 Err(e) => {
178 warn!("Failed to read debian/control at {:?}: {}", path, e);
179 return Vec::new();
180 }
181 };
182
183 parse_debian_control(&content)
184 }
185}
186
187pub struct DebianInstalledParser;
192
193impl PackageParser for DebianInstalledParser {
194 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
195
196 fn is_match(path: &Path) -> bool {
197 let path_str = path.to_string_lossy();
198 path_str.ends_with("var/lib/dpkg/status")
199 }
200
201 fn extract_packages(path: &Path) -> Vec<PackageData> {
202 let content = match read_file_to_string(path) {
203 Ok(c) => c,
204 Err(e) => {
205 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
206 return Vec::new();
207 }
208 };
209
210 parse_dpkg_status(&content)
211 }
212}
213
214pub struct DebianDistrolessInstalledParser;
215
216impl PackageParser for DebianDistrolessInstalledParser {
217 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
218
219 fn is_match(path: &Path) -> bool {
220 let path_str = path.to_string_lossy();
221 path_str.contains("var/lib/dpkg/status.d/")
222 }
223
224 fn extract_packages(path: &Path) -> Vec<PackageData> {
225 let content = match read_file_to_string(path) {
226 Ok(c) => c,
227 Err(e) => {
228 warn!("Failed to read distroless status file at {:?}: {}", path, e);
229 return vec![default_package_data(
230 DatasourceId::DebianDistrolessInstalledDb,
231 )];
232 }
233 };
234
235 vec![parse_distroless_status(&content)]
236 }
237}
238
239fn parse_distroless_status(content: &str) -> PackageData {
240 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
241
242 if paragraphs.is_empty() {
243 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
244 }
245
246 build_package_from_paragraph(
247 ¶graphs[0],
248 None,
249 DatasourceId::DebianDistrolessInstalledDb,
250 )
251 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
252}
253
254fn parse_debian_control(content: &str) -> Vec<PackageData> {
264 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
265 if paragraphs.is_empty() {
266 return Vec::new();
267 }
268
269 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
271
272 let (source_paragraph, binary_start) = if has_source {
273 (Some(¶graphs[0]), 1)
274 } else {
275 (None, 0)
276 };
277
278 let source_meta = source_paragraph.map(extract_source_meta);
280
281 let mut packages = Vec::new();
282
283 for para in ¶graphs[binary_start..] {
284 if let Some(pkg) = build_package_from_paragraph(
285 para,
286 source_meta.as_ref(),
287 DatasourceId::DebianControlInSource,
288 ) {
289 packages.push(pkg);
290 }
291 }
292
293 if packages.is_empty()
294 && let Some(source_para) = source_paragraph
295 && let Some(pkg) = build_package_from_source_paragraph(source_para)
296 {
297 packages.push(pkg);
298 }
299
300 packages
301}
302
303fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
308 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
309 let mut packages = Vec::new();
310
311 for para in ¶graphs {
312 let status = rfc822::get_header_first(¶.headers, "status");
313 if status.as_deref() != Some("install ok installed") {
314 continue;
315 }
316
317 if let Some(pkg) =
318 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
319 {
320 packages.push(pkg);
321 }
322 }
323
324 packages
325}
326
327struct SourceMeta {
332 parties: Vec<Party>,
333 homepage_url: Option<String>,
334 vcs_url: Option<String>,
335 code_view_url: Option<String>,
336 bug_tracking_url: Option<String>,
337}
338
339fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
340 let mut parties = Vec::new();
341
342 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
344 let (name, email) = split_name_email(&maintainer);
345 parties.push(Party {
346 r#type: Some("person".to_string()),
347 role: Some("maintainer".to_string()),
348 name,
349 email,
350 url: None,
351 organization: None,
352 organization_url: None,
353 timezone: None,
354 });
355 }
356
357 if let Some(orig_maintainer) =
359 rfc822::get_header_first(¶graph.headers, "original-maintainer")
360 {
361 let (name, email) = split_name_email(&orig_maintainer);
362 parties.push(Party {
363 r#type: Some("person".to_string()),
364 role: Some("maintainer".to_string()),
365 name,
366 email,
367 url: None,
368 organization: None,
369 organization_url: None,
370 timezone: None,
371 });
372 }
373
374 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
376 for uploader in uploaders_str.split(',') {
377 let trimmed = uploader.trim();
378 if !trimmed.is_empty() {
379 let (name, email) = split_name_email(trimmed);
380 parties.push(Party {
381 r#type: Some("person".to_string()),
382 role: Some("uploader".to_string()),
383 name,
384 email,
385 url: None,
386 organization: None,
387 organization_url: None,
388 timezone: None,
389 });
390 }
391 }
392 }
393
394 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage");
395
396 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
398 .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
399
400 let code_view_url = rfc822::get_header_first(¶graph.headers, "vcs-browser");
401
402 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs");
403
404 SourceMeta {
405 parties,
406 homepage_url,
407 vcs_url,
408 code_view_url,
409 bug_tracking_url,
410 }
411}
412
413fn build_package_from_paragraph(
418 paragraph: &Rfc822Metadata,
419 source_meta: Option<&SourceMeta>,
420 datasource_id: DatasourceId,
421) -> Option<PackageData> {
422 let name = rfc822::get_header_first(¶graph.headers, "package")?;
423 let version = rfc822::get_header_first(¶graph.headers, "version");
424 let architecture = rfc822::get_header_first(¶graph.headers, "architecture");
425 let description = rfc822::get_header_first(¶graph.headers, "description");
426 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
427 let homepage = rfc822::get_header_first(¶graph.headers, "homepage");
428 let source_field = rfc822::get_header_first(¶graph.headers, "source");
429 let section = rfc822::get_header_first(¶graph.headers, "section");
430 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
431 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
432
433 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
434
435 let parties = if let Some(meta) = source_meta {
437 meta.parties.clone()
438 } else {
439 let mut p = Vec::new();
440 if let Some(m) = &maintainer_str {
441 let (n, e) = split_name_email(m);
442 p.push(Party {
443 r#type: Some("person".to_string()),
444 role: Some("maintainer".to_string()),
445 name: n,
446 email: e,
447 url: None,
448 organization: None,
449 organization_url: None,
450 timezone: None,
451 });
452 }
453 p
454 };
455
456 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
458 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
459 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
460 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
461
462 let purl = build_debian_purl(
464 &name,
465 version.as_deref(),
466 namespace.as_deref(),
467 architecture.as_deref(),
468 );
469
470 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
472
473 let keywords = section.into_iter().collect();
475
476 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
478
479 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
481 if let Some(ma) = &multi_arch
482 && !ma.is_empty()
483 {
484 extra_data.insert(
485 "multi_arch".to_string(),
486 serde_json::Value::String(ma.clone()),
487 );
488 }
489 if let Some(size_str) = &installed_size
490 && let Ok(size) = size_str.parse::<u64>()
491 {
492 extra_data.insert(
493 "installed_size".to_string(),
494 serde_json::Value::Number(serde_json::Number::from(size)),
495 );
496 }
497
498 let qualifiers = architecture.as_ref().map(|arch| {
500 let mut q = HashMap::new();
501 q.insert("arch".to_string(), arch.clone());
502 q
503 });
504
505 Some(PackageData {
506 package_type: Some(PACKAGE_TYPE),
507 namespace: namespace.clone(),
508 name: Some(name),
509 version,
510 qualifiers,
511 subpath: None,
512 primary_language: None,
513 description,
514 release_date: None,
515 parties,
516 keywords,
517 homepage_url,
518 download_url: None,
519 size: None,
520 sha1: None,
521 md5: None,
522 sha256: None,
523 sha512: None,
524 bug_tracking_url,
525 code_view_url,
526 vcs_url,
527 copyright: None,
528 holder: None,
529 declared_license_expression: None,
530 declared_license_expression_spdx: None,
531 license_detections: Vec::new(),
532 other_license_expression: None,
533 other_license_expression_spdx: None,
534 other_license_detections: Vec::new(),
535 extracted_license_statement: None,
536 notice_text: None,
537 source_packages,
538 file_references: Vec::new(),
539 is_private: false,
540 is_virtual: false,
541 extra_data: if extra_data.is_empty() {
542 None
543 } else {
544 Some(extra_data)
545 },
546 dependencies,
547 repository_homepage_url: None,
548 repository_download_url: None,
549 api_data_url: None,
550 datasource_id: Some(datasource_id),
551 purl,
552 })
553}
554
555fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
556 let name = rfc822::get_header_first(¶graph.headers, "source")?;
557 let version = rfc822::get_header_first(¶graph.headers, "version");
558 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
559
560 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
561 let source_meta = extract_source_meta(paragraph);
562
563 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
564 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
565
566 let section = rfc822::get_header_first(¶graph.headers, "section");
567 let keywords = section.into_iter().collect();
568
569 Some(PackageData {
570 package_type: Some(PACKAGE_TYPE),
571 namespace: namespace.clone(),
572 name: Some(name),
573 version,
574 qualifiers: None,
575 subpath: None,
576 primary_language: None,
577 description: None,
578 release_date: None,
579 parties: source_meta.parties,
580 keywords,
581 homepage_url: source_meta.homepage_url,
582 download_url: None,
583 size: None,
584 sha1: None,
585 md5: None,
586 sha256: None,
587 sha512: None,
588 bug_tracking_url: source_meta.bug_tracking_url,
589 code_view_url: source_meta.code_view_url,
590 vcs_url: source_meta.vcs_url,
591 copyright: None,
592 holder: None,
593 declared_license_expression: None,
594 declared_license_expression_spdx: None,
595 license_detections: Vec::new(),
596 other_license_expression: None,
597 other_license_expression_spdx: None,
598 other_license_detections: Vec::new(),
599 extracted_license_statement: None,
600 notice_text: None,
601 source_packages: Vec::new(),
602 file_references: Vec::new(),
603 is_private: false,
604 is_virtual: false,
605 extra_data: None,
606 dependencies,
607 repository_homepage_url: None,
608 repository_download_url: None,
609 api_data_url: None,
610 datasource_id: Some(DatasourceId::DebianControlInSource),
611 purl,
612 })
613}
614
615fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
620 if let Some(ver) = version {
622 let ver_lower = ver.to_lowercase();
623 for clue in VERSION_CLUES_UBUNTU {
624 if ver_lower.contains(clue) {
625 return Some("ubuntu".to_string());
626 }
627 }
628 for clue in VERSION_CLUES_DEBIAN {
629 if ver_lower.contains(clue) {
630 return Some("debian".to_string());
631 }
632 }
633 }
634
635 if let Some(maint) = maintainer {
637 let maint_lower = maint.to_lowercase();
638 for clue in MAINTAINER_CLUES_UBUNTU {
639 if maint_lower.contains(clue) {
640 return Some("ubuntu".to_string());
641 }
642 }
643 for clue in MAINTAINER_CLUES_DEBIAN {
644 if maint_lower.contains(clue) {
645 return Some("debian".to_string());
646 }
647 }
648 }
649
650 Some("debian".to_string())
652}
653
654fn build_debian_purl(
659 name: &str,
660 version: Option<&str>,
661 namespace: Option<&str>,
662 architecture: Option<&str>,
663) -> Option<String> {
664 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
665
666 if let Some(ns) = namespace {
667 purl.with_namespace(ns).ok()?;
668 }
669
670 if let Some(ver) = version {
671 purl.with_version(ver).ok()?;
672 }
673
674 if let Some(arch) = architecture {
675 purl.add_qualifier("arch", arch).ok()?;
676 }
677
678 Some(purl.to_string())
679}
680
681fn parse_all_dependencies(
686 headers: &HashMap<String, Vec<String>>,
687 namespace: Option<&str>,
688) -> Vec<Dependency> {
689 let mut dependencies = Vec::new();
690
691 for spec in DEP_FIELDS {
692 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
693 dependencies.extend(parse_dependency_field(
694 &dep_str,
695 spec.scope,
696 spec.is_runtime,
697 spec.is_optional,
698 namespace,
699 ));
700 }
701 }
702
703 dependencies
704}
705
706fn parse_dependency_field(
715 dep_str: &str,
716 scope: &str,
717 is_runtime: bool,
718 is_optional: bool,
719 namespace: Option<&str>,
720) -> Vec<Dependency> {
721 let mut deps = Vec::new();
722
723 let dep_re = Regex::new(
726 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
727 )
728 .unwrap();
729
730 for group in dep_str.split(',') {
731 let group = group.trim();
732 if group.is_empty() {
733 continue;
734 }
735
736 let alternatives: Vec<&str> = group.split('|').collect();
738 let has_alternatives = alternatives.len() > 1;
739
740 for alt in alternatives {
741 let alt = alt.trim();
742 if alt.is_empty() {
743 continue;
744 }
745
746 if let Some(caps) = dep_re.captures(alt) {
747 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
748 let operator = caps.get(2).map(|m| m.as_str().trim());
749 let version = caps.get(3).map(|m| m.as_str().trim());
750
751 if pkg_name.is_empty() {
752 continue;
753 }
754
755 if pkg_name.starts_with('$') {
757 continue;
758 }
759
760 let extracted_requirement = match (operator, version) {
761 (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
762 _ => None,
763 };
764
765 let is_pinned = operator.map(|op| op == "=");
766
767 let purl = build_debian_purl(pkg_name, None, namespace, None);
768
769 deps.push(Dependency {
770 purl,
771 extracted_requirement,
772 scope: Some(scope.to_string()),
773 is_runtime: Some(is_runtime),
774 is_optional: Some(is_optional || has_alternatives),
775 is_pinned,
776 is_direct: Some(true),
777 resolved_package: None,
778 extra_data: None,
779 });
780 }
781 }
782 }
783
784 deps
785}
786
787fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
795 let Some(source_str) = source else {
796 return Vec::new();
797 };
798
799 let trimmed = source_str.trim();
800 if trimmed.is_empty() {
801 return Vec::new();
802 }
803
804 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
806 let name = trimmed[..paren_start].trim();
807 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
808 (
809 name,
810 if version.is_empty() {
811 None
812 } else {
813 Some(version)
814 },
815 )
816 } else {
817 (trimmed, None)
818 };
819
820 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
821 vec![purl]
822 } else {
823 Vec::new()
824 }
825}
826
827crate::register_parser!(
832 "Debian source package control file (debian/control)",
833 &["**/debian/control"],
834 "deb",
835 "",
836 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
837);
838
839pub struct DebianDscParser;
848
849impl PackageParser for DebianDscParser {
850 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
851
852 fn is_match(path: &Path) -> bool {
853 path.extension().and_then(|e| e.to_str()) == Some("dsc")
854 }
855
856 fn extract_packages(path: &Path) -> Vec<PackageData> {
857 let content = match read_file_to_string(path) {
858 Ok(c) => c,
859 Err(e) => {
860 warn!("Failed to read .dsc file {:?}: {}", path, e);
861 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
862 }
863 };
864
865 vec![parse_dsc_content(&content)]
866 }
867}
868
869fn strip_pgp_signature(content: &str) -> String {
870 let mut result = String::new();
871 let mut in_pgp_block = false;
872 let mut in_signature = false;
873
874 for line in content.lines() {
875 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
876 in_pgp_block = true;
877 continue;
878 }
879 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
880 in_signature = true;
881 continue;
882 }
883 if line.starts_with("-----END PGP SIGNATURE-----") {
884 in_signature = false;
885 continue;
886 }
887 if in_pgp_block && line.starts_with("Hash:") {
888 continue;
889 }
890 if in_pgp_block && line.is_empty() && result.is_empty() {
891 in_pgp_block = false;
892 continue;
893 }
894 if !in_signature {
895 result.push_str(line);
896 result.push('\n');
897 }
898 }
899
900 result
901}
902
903fn parse_dsc_content(content: &str) -> PackageData {
904 let clean_content = strip_pgp_signature(content);
905 let metadata = rfc822::parse_rfc822_content(&clean_content);
906 let headers = &metadata.headers;
907
908 let name = rfc822::get_header_first(headers, "source");
909 let version = rfc822::get_header_first(headers, "version");
910 let architecture = rfc822::get_header_first(headers, "architecture");
911 let namespace = Some("debian".to_string());
912
913 let mut package = PackageData {
914 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
915 package_type: Some(PACKAGE_TYPE),
916 namespace: namespace.clone(),
917 name: name.clone(),
918 version: version.clone(),
919 description: rfc822::get_header_first(headers, "description"),
920 homepage_url: rfc822::get_header_first(headers, "homepage"),
921 vcs_url: rfc822::get_header_first(headers, "vcs-git"),
922 code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
923 ..Default::default()
924 };
925
926 if let (Some(n), Some(v)) = (&name, &version) {
928 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
929 }
930
931 if let Some(n) = &name
933 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
934 {
935 package.source_packages.push(source_purl);
936 }
937
938 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
939 let (name_opt, email_opt) = split_name_email(&maintainer);
940 package.parties.push(Party {
941 r#type: None,
942 role: Some("maintainer".to_string()),
943 name: name_opt,
944 email: email_opt,
945 url: None,
946 organization: None,
947 organization_url: None,
948 timezone: None,
949 });
950 }
951
952 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
953 for uploader in uploaders_str.split(',') {
954 let uploader = uploader.trim();
955 if uploader.is_empty() {
956 continue;
957 }
958 let (name_opt, email_opt) = split_name_email(uploader);
959 package.parties.push(Party {
960 r#type: None,
961 role: Some("uploader".to_string()),
962 name: name_opt,
963 email: email_opt,
964 url: None,
965 organization: None,
966 organization_url: None,
967 timezone: None,
968 });
969 }
970 }
971
972 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
974 package.dependencies.extend(parse_dependency_field(
975 &build_deps,
976 "build",
977 false,
978 false,
979 namespace.as_deref(),
980 ));
981 }
982
983 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
985 let map = package.extra_data.get_or_insert_with(HashMap::new);
986 map.insert("standards_version".to_string(), standards.into());
987 }
988
989 package
990}
991
992pub struct DebianOrigTarParser;
994
995impl PackageParser for DebianOrigTarParser {
996 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
997
998 fn is_match(path: &Path) -> bool {
999 path.file_name()
1000 .and_then(|n| n.to_str())
1001 .map(|name| name.contains(".orig.tar."))
1002 .unwrap_or(false)
1003 }
1004
1005 fn extract_packages(path: &Path) -> Vec<PackageData> {
1006 let filename = match path.file_name().and_then(|n| n.to_str()) {
1007 Some(f) => f,
1008 None => {
1009 return vec![default_package_data(
1010 DatasourceId::DebianOriginalSourceTarball,
1011 )];
1012 }
1013 };
1014
1015 vec![parse_source_tarball_filename(
1016 filename,
1017 DatasourceId::DebianOriginalSourceTarball,
1018 )]
1019 }
1020}
1021
1022pub struct DebianDebianTarParser;
1024
1025impl PackageParser for DebianDebianTarParser {
1026 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1027
1028 fn is_match(path: &Path) -> bool {
1029 path.file_name()
1030 .and_then(|n| n.to_str())
1031 .map(|name| name.contains(".debian.tar."))
1032 .unwrap_or(false)
1033 }
1034
1035 fn extract_packages(path: &Path) -> Vec<PackageData> {
1036 let filename = match path.file_name().and_then(|n| n.to_str()) {
1037 Some(f) => f,
1038 None => {
1039 return vec![default_package_data(
1040 DatasourceId::DebianSourceMetadataTarball,
1041 )];
1042 }
1043 };
1044
1045 vec![parse_source_tarball_filename(
1046 filename,
1047 DatasourceId::DebianSourceMetadataTarball,
1048 )]
1049 }
1050}
1051
1052fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1053 let without_tar_ext = filename
1054 .trim_end_matches(".gz")
1055 .trim_end_matches(".xz")
1056 .trim_end_matches(".bz2")
1057 .trim_end_matches(".tar");
1058
1059 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1060 if parts.len() < 2 {
1061 return default_package_data(datasource_id);
1062 }
1063
1064 let name = parts[0].to_string();
1065 let version_with_suffix = parts[1];
1066
1067 let version = version_with_suffix
1068 .trim_end_matches(".orig")
1069 .trim_end_matches(".debian")
1070 .to_string();
1071
1072 let namespace = Some("debian".to_string());
1073
1074 PackageData {
1075 datasource_id: Some(datasource_id),
1076 package_type: Some(PACKAGE_TYPE),
1077 namespace: namespace.clone(),
1078 name: Some(name.clone()),
1079 version: Some(version.clone()),
1080 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1081 ..Default::default()
1082 }
1083}
1084
1085pub struct DebianInstalledListParser;
1087
1088impl PackageParser for DebianInstalledListParser {
1089 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1090
1091 fn is_match(path: &Path) -> bool {
1092 path.extension().and_then(|e| e.to_str()) == Some("list")
1093 && path
1094 .to_str()
1095 .map(|p| p.contains("/var/lib/dpkg/info/"))
1096 .unwrap_or(false)
1097 }
1098
1099 fn extract_packages(path: &Path) -> Vec<PackageData> {
1100 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1101 Some(f) => f,
1102 None => {
1103 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1104 }
1105 };
1106
1107 let content = match read_file_to_string(path) {
1108 Ok(c) => c,
1109 Err(e) => {
1110 warn!("Failed to read .list file {:?}: {}", path, e);
1111 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1112 }
1113 };
1114
1115 vec![parse_debian_file_list(
1116 &content,
1117 filename,
1118 DatasourceId::DebianInstalledFilesList,
1119 )]
1120 }
1121}
1122
1123pub struct DebianInstalledMd5sumsParser;
1125
1126impl PackageParser for DebianInstalledMd5sumsParser {
1127 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1128
1129 fn is_match(path: &Path) -> bool {
1130 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1131 && path
1132 .to_str()
1133 .map(|p| p.contains("/var/lib/dpkg/info/"))
1134 .unwrap_or(false)
1135 }
1136
1137 fn extract_packages(path: &Path) -> Vec<PackageData> {
1138 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1139 Some(f) => f,
1140 None => {
1141 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1142 }
1143 };
1144
1145 let content = match read_file_to_string(path) {
1146 Ok(c) => c,
1147 Err(e) => {
1148 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1149 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1150 }
1151 };
1152
1153 vec![parse_debian_file_list(
1154 &content,
1155 filename,
1156 DatasourceId::DebianInstalledMd5Sums,
1157 )]
1158 }
1159}
1160
1161const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1162
1163fn parse_debian_file_list(
1164 content: &str,
1165 filename: &str,
1166 datasource_id: DatasourceId,
1167) -> PackageData {
1168 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1169 (Some(pkg.to_string()), Some(arch.to_string()))
1170 } else if filename == "md5sums" {
1171 (None, None)
1172 } else {
1173 (Some(filename.to_string()), None)
1174 };
1175
1176 let mut file_references = Vec::new();
1177
1178 for line in content.lines() {
1179 let line = line.trim();
1180 if line.is_empty() || line.starts_with('#') {
1181 continue;
1182 }
1183
1184 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1185 (Some(hash.trim().to_string()), p.trim())
1186 } else {
1187 (None, line)
1188 };
1189
1190 if IGNORED_ROOT_DIRS.contains(&path) {
1191 continue;
1192 }
1193
1194 file_references.push(FileReference {
1195 path: path.to_string(),
1196 size: None,
1197 sha1: None,
1198 md5: md5sum,
1199 sha256: None,
1200 sha512: None,
1201 extra_data: None,
1202 });
1203 }
1204
1205 if file_references.is_empty() {
1206 return default_package_data(datasource_id);
1207 }
1208
1209 let namespace = Some("debian".to_string());
1210 let mut package = PackageData {
1211 datasource_id: Some(datasource_id),
1212 package_type: Some(PACKAGE_TYPE),
1213 namespace: namespace.clone(),
1214 name: name.clone(),
1215 file_references,
1216 ..Default::default()
1217 };
1218
1219 if let Some(n) = &name {
1220 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1221 }
1222
1223 package
1224}
1225
1226pub struct DebianCopyrightParser;
1228
1229impl PackageParser for DebianCopyrightParser {
1230 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1231
1232 fn is_match(path: &Path) -> bool {
1233 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1234 if filename != "copyright" {
1235 return false;
1236 }
1237 let path_str = path.to_string_lossy();
1238 path_str.contains("/debian/")
1239 || path_str.contains("/usr/share/doc/")
1240 || path_str.ends_with("debian/copyright")
1241 } else {
1242 false
1243 }
1244 }
1245
1246 fn extract_packages(path: &Path) -> Vec<PackageData> {
1247 let content = match read_file_to_string(path) {
1248 Ok(c) => c,
1249 Err(e) => {
1250 warn!("Failed to read copyright file {:?}: {}", path, e);
1251 return vec![default_package_data(DatasourceId::DebianCopyright)];
1252 }
1253 };
1254
1255 let package_name = extract_package_name_from_path(path);
1256 vec![parse_copyright_file(&content, package_name.as_deref())]
1257 }
1258}
1259
1260fn extract_package_name_from_path(path: &Path) -> Option<String> {
1261 let components: Vec<_> = path.components().collect();
1262
1263 for (i, component) in components.iter().enumerate() {
1264 if let std::path::Component::Normal(os_str) = component
1265 && os_str.to_str() == Some("doc")
1266 && i + 1 < components.len()
1267 && let std::path::Component::Normal(next) = components[i + 1]
1268 {
1269 return next.to_str().map(|s| s.to_string());
1270 }
1271 }
1272 None
1273}
1274
1275fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1276 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1277
1278 let is_dep5 = paragraphs
1279 .first()
1280 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1281 .is_some();
1282
1283 let namespace = Some("debian".to_string());
1284 let mut parties = Vec::new();
1285 let mut license_statements = Vec::new();
1286 let mut primary_license_detection = None;
1287 let mut header_license_detection = None;
1288 let mut other_license_detections = Vec::new();
1289
1290 if is_dep5 {
1291 for para in ¶graphs {
1292 if let Some(copyright_text) =
1293 rfc822::get_header_first(¶.metadata.headers, "copyright")
1294 {
1295 for holder in parse_copyright_holders(©right_text) {
1296 if !holder.is_empty() {
1297 parties.push(Party {
1298 r#type: None,
1299 role: Some("copyright-holder".to_string()),
1300 name: Some(holder),
1301 email: None,
1302 url: None,
1303 organization: None,
1304 organization_url: None,
1305 timezone: None,
1306 });
1307 }
1308 }
1309 }
1310
1311 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1312 let license_name = license.lines().next().unwrap_or(&license).trim();
1313 if !license_name.is_empty()
1314 && !license_statements.contains(&license_name.to_string())
1315 {
1316 license_statements.push(license_name.to_string());
1317 }
1318
1319 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1320 let detection =
1321 build_primary_license_detection(license_name, matched_text, line_no);
1322 let is_header_paragraph =
1323 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1324 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1325 == Some("*")
1326 {
1327 primary_license_detection = Some(detection);
1328 } else if is_header_paragraph {
1329 header_license_detection.get_or_insert(detection);
1330 } else {
1331 other_license_detections.push(detection);
1332 }
1333 }
1334 }
1335 }
1336
1337 if primary_license_detection.is_none() && header_license_detection.is_some() {
1338 primary_license_detection = header_license_detection;
1339 }
1340 } else {
1341 let copyright_block = extract_unstructured_field(content, "Copyright:");
1342 if let Some(text) = copyright_block {
1343 for holder in parse_copyright_holders(&text) {
1344 if !holder.is_empty() {
1345 parties.push(Party {
1346 r#type: None,
1347 role: Some("copyright-holder".to_string()),
1348 name: Some(holder),
1349 email: None,
1350 url: None,
1351 organization: None,
1352 organization_url: None,
1353 timezone: None,
1354 });
1355 }
1356 }
1357 }
1358
1359 let license_block = extract_unstructured_field(content, "License:");
1360 if let Some(text) = license_block {
1361 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1362 }
1363 }
1364
1365 let extracted_license_statement = if license_statements.is_empty() {
1366 None
1367 } else {
1368 Some(license_statements.join(" AND "))
1369 };
1370
1371 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1372 let declared_license_expression = license_detections
1373 .first()
1374 .map(|detection| detection.license_expression.clone());
1375 let declared_license_expression_spdx = license_detections
1376 .first()
1377 .map(|detection| detection.license_expression_spdx.clone());
1378 let other_license_expression = combine_license_expressions(
1379 other_license_detections
1380 .iter()
1381 .map(|detection| detection.license_expression.clone()),
1382 );
1383 let other_license_expression_spdx = combine_license_expressions(
1384 other_license_detections
1385 .iter()
1386 .map(|detection| detection.license_expression_spdx.clone()),
1387 );
1388
1389 PackageData {
1390 datasource_id: Some(DatasourceId::DebianCopyright),
1391 package_type: Some(PACKAGE_TYPE),
1392 namespace: namespace.clone(),
1393 name: package_name.map(|s| s.to_string()),
1394 parties,
1395 declared_license_expression,
1396 declared_license_expression_spdx,
1397 license_detections,
1398 other_license_expression,
1399 other_license_expression_spdx,
1400 other_license_detections,
1401 extracted_license_statement,
1402 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1403 ..Default::default()
1404 }
1405}
1406
1407#[derive(Debug)]
1408struct CopyrightParagraph {
1409 metadata: Rfc822Metadata,
1410 license_header_line: Option<(String, usize)>,
1411}
1412
1413fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1414 let mut paragraphs = Vec::new();
1415 let mut current_lines = Vec::new();
1416 let mut current_start_line = 1usize;
1417
1418 for (idx, line) in content.lines().enumerate() {
1419 let line_no = idx + 1;
1420 if line.is_empty() {
1421 if !current_lines.is_empty() {
1422 paragraphs.push(finalize_copyright_paragraph(
1423 std::mem::take(&mut current_lines),
1424 current_start_line,
1425 ));
1426 }
1427 current_start_line = line_no + 1;
1428 } else {
1429 if current_lines.is_empty() {
1430 current_start_line = line_no;
1431 }
1432 current_lines.push(line.to_string());
1433 }
1434 }
1435
1436 if !current_lines.is_empty() {
1437 paragraphs.push(finalize_copyright_paragraph(
1438 current_lines,
1439 current_start_line,
1440 ));
1441 }
1442
1443 paragraphs
1444}
1445
1446fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1447 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1448 let mut current_name: Option<String> = None;
1449 let mut current_value = String::new();
1450 let mut license_header_line = None;
1451
1452 for (idx, line) in raw_lines.iter().enumerate() {
1453 if line.starts_with(' ') || line.starts_with('\t') {
1454 if current_name.is_some() {
1455 current_value.push('\n');
1456 current_value.push_str(line);
1457 }
1458 continue;
1459 }
1460
1461 if let Some(name) = current_name.take() {
1462 add_copyright_header_value(&mut headers, &name, ¤t_value);
1463 current_value.clear();
1464 }
1465
1466 if let Some((name, value)) = line.split_once(':') {
1467 let normalized_name = name.trim().to_ascii_lowercase();
1468 if normalized_name == "license" && license_header_line.is_none() {
1469 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1470 }
1471 current_name = Some(normalized_name);
1472 current_value = value.trim_start().to_string();
1473 }
1474 }
1475
1476 if let Some(name) = current_name.take() {
1477 add_copyright_header_value(&mut headers, &name, ¤t_value);
1478 }
1479
1480 CopyrightParagraph {
1481 metadata: Rfc822Metadata {
1482 headers,
1483 body: String::new(),
1484 },
1485 license_header_line,
1486 }
1487}
1488
1489fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1490 let entry = headers.entry(name.to_string()).or_default();
1491 let trimmed = value.trim_end();
1492 if !trimmed.is_empty() {
1493 entry.push(trimmed.to_string());
1494 }
1495}
1496
1497fn build_primary_license_detection(
1498 license_name: &str,
1499 matched_text: String,
1500 line_no: usize,
1501) -> LicenseDetection {
1502 let normalized = normalize_debian_license_name(license_name);
1503
1504 build_declared_license_detection(
1505 &normalized,
1506 DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no),
1507 )
1508}
1509
1510fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1511 match license_name.trim() {
1512 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1513 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1514 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1515 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1516 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1517 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1518 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1519 "public-domain" => {
1520 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1521 }
1522 other => normalize_declared_license_key(other)
1523 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1524 }
1525}
1526
1527fn parse_copyright_holders(text: &str) -> Vec<String> {
1528 let mut holders = Vec::new();
1529
1530 for line in text.lines() {
1531 let line = line.trim();
1532 if line.is_empty() {
1533 continue;
1534 }
1535
1536 let cleaned = line
1537 .trim_start_matches("Copyright")
1538 .trim_start_matches("copyright")
1539 .trim_start_matches("(C)")
1540 .trim_start_matches("(c)")
1541 .trim_start_matches("©")
1542 .trim();
1543
1544 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1545 let without_years = &cleaned[year_end..];
1546 let holder = without_years
1547 .trim_start_matches(',')
1548 .trim_start_matches('-')
1549 .trim();
1550
1551 if !holder.is_empty() && holder.len() > 2 {
1552 holders.push(holder.to_string());
1553 }
1554 }
1555 }
1556
1557 holders
1558}
1559
1560fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1561 let mut in_field = false;
1562 let mut field_content = String::new();
1563
1564 for line in content.lines() {
1565 if line.starts_with(field_name) {
1566 in_field = true;
1567 field_content.push_str(line.trim_start_matches(field_name).trim());
1568 field_content.push('\n');
1569 } else if in_field {
1570 if line.starts_with(char::is_whitespace) {
1571 field_content.push_str(line.trim());
1572 field_content.push('\n');
1573 } else if !line.trim().is_empty() {
1574 break;
1575 }
1576 }
1577 }
1578
1579 let trimmed = field_content.trim();
1580 if trimmed.is_empty() {
1581 None
1582 } else {
1583 Some(trimmed.to_string())
1584 }
1585}
1586
1587pub struct DebianDebParser;
1589
1590impl PackageParser for DebianDebParser {
1591 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1592
1593 fn is_match(path: &Path) -> bool {
1594 path.extension().and_then(|e| e.to_str()) == Some("deb")
1595 }
1596
1597 fn extract_packages(path: &Path) -> Vec<PackageData> {
1598 if let Ok(data) = extract_deb_archive(path) {
1600 return vec![data];
1601 }
1602
1603 let filename = match path.file_name().and_then(|n| n.to_str()) {
1605 Some(f) => f,
1606 None => {
1607 return vec![default_package_data(DatasourceId::DebianDeb)];
1608 }
1609 };
1610
1611 vec![parse_deb_filename(filename)]
1612 }
1613}
1614
1615fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1616 use flate2::read::GzDecoder;
1617 use liblzma::read::XzDecoder;
1618 use std::io::{Cursor, Read};
1619
1620 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1621
1622 let mut archive = ar::Archive::new(file);
1623 let mut package: Option<PackageData> = None;
1624
1625 while let Some(entry_result) = archive.next_entry() {
1626 let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1627
1628 let entry_name = std::str::from_utf8(entry.header().identifier())
1629 .map_err(|e| format!("Invalid entry name: {}", e))?;
1630 let entry_name = entry_name.trim().to_string();
1631
1632 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1633 let mut control_data = Vec::new();
1634 entry
1635 .read_to_end(&mut control_data)
1636 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1637
1638 if entry_name.ends_with(".gz") {
1639 let decoder = GzDecoder::new(Cursor::new(control_data));
1640 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1641 package = Some(parsed_package);
1642 }
1643 } else if entry_name.ends_with(".xz") {
1644 let decoder = XzDecoder::new(Cursor::new(control_data));
1645 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1646 package = Some(parsed_package);
1647 }
1648 }
1649 } else if entry_name.starts_with("data.tar") {
1650 let mut data = Vec::new();
1651 entry
1652 .read_to_end(&mut data)
1653 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1654
1655 let Some(current_package) = package.as_mut() else {
1656 continue;
1657 };
1658
1659 if entry_name.ends_with(".gz") {
1660 let decoder = GzDecoder::new(Cursor::new(data));
1661 merge_deb_data_archive(decoder, current_package)?;
1662 } else if entry_name.ends_with(".xz") {
1663 let decoder = XzDecoder::new(Cursor::new(data));
1664 merge_deb_data_archive(decoder, current_package)?;
1665 }
1666 }
1667 }
1668
1669 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1670}
1671
1672fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1673 use std::io::Read;
1674
1675 let mut tar_archive = tar::Archive::new(reader);
1676
1677 for tar_entry_result in tar_archive
1678 .entries()
1679 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1680 {
1681 let mut tar_entry =
1682 tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1683
1684 let tar_path = tar_entry
1685 .path()
1686 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1687
1688 if tar_path.ends_with("control") {
1689 let mut control_content = String::new();
1690 tar_entry
1691 .read_to_string(&mut control_content)
1692 .map_err(|e| format!("Failed to read control file: {}", e))?;
1693
1694 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1695 if paragraphs.is_empty() {
1696 return Err("No paragraphs in control file".to_string());
1697 }
1698
1699 if let Some(package) =
1700 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
1701 {
1702 return Ok(Some(package));
1703 }
1704
1705 return Err("Failed to parse control file".to_string());
1706 }
1707 }
1708
1709 Ok(None)
1710}
1711
1712fn merge_deb_data_archive<R: std::io::Read>(
1713 reader: R,
1714 package: &mut PackageData,
1715) -> Result<(), String> {
1716 use std::io::Read;
1717
1718 let mut tar_archive = tar::Archive::new(reader);
1719
1720 for tar_entry_result in tar_archive
1721 .entries()
1722 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1723 {
1724 let mut tar_entry =
1725 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1726
1727 let tar_path = tar_entry
1728 .path()
1729 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1730 let tar_path_str = tar_path.to_string_lossy();
1731
1732 if tar_path_str.ends_with(&format!(
1733 "/usr/share/doc/{}/copyright",
1734 package.name.as_deref().unwrap_or_default()
1735 )) || tar_path_str.ends_with(&format!(
1736 "usr/share/doc/{}/copyright",
1737 package.name.as_deref().unwrap_or_default()
1738 )) {
1739 let mut copyright_content = String::new();
1740 tar_entry
1741 .read_to_string(&mut copyright_content)
1742 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1743
1744 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
1745 merge_debian_copyright_into_package(package, ©right_pkg);
1746 break;
1747 }
1748 }
1749
1750 Ok(())
1751}
1752
1753fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1754 if target.extracted_license_statement.is_none() {
1755 target.extracted_license_statement = copyright.extracted_license_statement.clone();
1756 }
1757
1758 for party in ©right.parties {
1759 if !target.parties.iter().any(|existing| {
1760 existing.r#type == party.r#type
1761 && existing.role == party.role
1762 && existing.name == party.name
1763 && existing.email == party.email
1764 && existing.url == party.url
1765 && existing.organization == party.organization
1766 && existing.organization_url == party.organization_url
1767 && existing.timezone == party.timezone
1768 }) {
1769 target.parties.push(party.clone());
1770 }
1771 }
1772}
1773
1774fn parse_deb_filename(filename: &str) -> PackageData {
1775 let without_ext = filename.trim_end_matches(".deb");
1776
1777 let parts: Vec<&str> = without_ext.split('_').collect();
1778 if parts.len() < 2 {
1779 return default_package_data(DatasourceId::DebianDeb);
1780 }
1781
1782 let name = parts[0].to_string();
1783 let version = parts[1].to_string();
1784 let architecture = if parts.len() >= 3 {
1785 Some(parts[2].to_string())
1786 } else {
1787 None
1788 };
1789
1790 let namespace = Some("debian".to_string());
1791
1792 PackageData {
1793 datasource_id: Some(DatasourceId::DebianDeb),
1794 package_type: Some(PACKAGE_TYPE),
1795 namespace: namespace.clone(),
1796 name: Some(name.clone()),
1797 version: Some(version.clone()),
1798 purl: build_debian_purl(
1799 &name,
1800 Some(&version),
1801 namespace.as_deref(),
1802 architecture.as_deref(),
1803 ),
1804 ..Default::default()
1805 }
1806}
1807
1808pub struct DebianControlInExtractedDebParser;
1814
1815impl PackageParser for DebianControlInExtractedDebParser {
1816 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1817
1818 fn is_match(path: &Path) -> bool {
1819 path.file_name()
1820 .and_then(|n| n.to_str())
1821 .is_some_and(|name| name == "control")
1822 && path
1823 .to_str()
1824 .map(|p| {
1825 p.ends_with("control.tar.gz-extract/control")
1826 || p.ends_with("control.tar.xz-extract/control")
1827 })
1828 .unwrap_or(false)
1829 }
1830
1831 fn extract_packages(path: &Path) -> Vec<PackageData> {
1832 let content = match read_file_to_string(path) {
1833 Ok(c) => c,
1834 Err(e) => {
1835 warn!(
1836 "Failed to read control file in extracted deb {:?}: {}",
1837 path, e
1838 );
1839 return vec![default_package_data(
1840 DatasourceId::DebianControlExtractedDeb,
1841 )];
1842 }
1843 };
1844
1845 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1848 if paragraphs.is_empty() {
1849 return vec![default_package_data(
1850 DatasourceId::DebianControlExtractedDeb,
1851 )];
1852 }
1853
1854 if let Some(pkg) = build_package_from_paragraph(
1855 ¶graphs[0],
1856 None,
1857 DatasourceId::DebianControlExtractedDeb,
1858 ) {
1859 vec![pkg]
1860 } else {
1861 vec![default_package_data(
1862 DatasourceId::DebianControlExtractedDeb,
1863 )]
1864 }
1865 }
1866}
1867
1868pub struct DebianMd5sumInPackageParser;
1870
1871impl PackageParser for DebianMd5sumInPackageParser {
1872 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1873
1874 fn is_match(path: &Path) -> bool {
1875 path.file_name()
1876 .and_then(|n| n.to_str())
1877 .is_some_and(|name| name == "md5sums")
1878 && path
1879 .to_str()
1880 .map(|p| {
1881 p.ends_with("control.tar.gz-extract/md5sums")
1882 || p.ends_with("control.tar.xz-extract/md5sums")
1883 })
1884 .unwrap_or(false)
1885 }
1886
1887 fn extract_packages(path: &Path) -> Vec<PackageData> {
1888 let content = match read_file_to_string(path) {
1889 Ok(c) => c,
1890 Err(e) => {
1891 warn!("Failed to read md5sums file {:?}: {}", path, e);
1892 return vec![default_package_data(
1893 DatasourceId::DebianMd5SumsInExtractedDeb,
1894 )];
1895 }
1896 };
1897
1898 let package_name = extract_package_name_from_deb_path(path);
1899
1900 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
1901 }
1902}
1903
1904pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
1905 let parent = path.parent()?;
1906 let grandparent = parent.parent()?;
1907 let dirname = grandparent.file_name()?.to_str()?;
1908 let without_extract = dirname.strip_suffix("-extract")?;
1909 let without_deb = without_extract.strip_suffix(".deb")?;
1910 let name = without_deb.split('_').next()?;
1911
1912 Some(name.to_string())
1913}
1914
1915fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
1916 let mut file_references = Vec::new();
1917
1918 for line in content.lines() {
1919 let line = line.trim();
1920 if line.is_empty() || line.starts_with('#') {
1921 continue;
1922 }
1923
1924 let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find(" ") {
1925 (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
1926 } else if let Some((hash, path)) = line.split_once(' ') {
1927 (Some(hash.trim().to_string()), path.trim())
1928 } else {
1929 (None, line)
1930 };
1931
1932 if IGNORED_ROOT_DIRS.contains(&filepath) {
1933 continue;
1934 }
1935
1936 file_references.push(FileReference {
1937 path: filepath.to_string(),
1938 size: None,
1939 sha1: None,
1940 md5: md5sum,
1941 sha256: None,
1942 sha512: None,
1943 extra_data: None,
1944 });
1945 }
1946
1947 if file_references.is_empty() {
1948 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
1949 }
1950
1951 let namespace = Some("debian".to_string());
1952 let mut package = PackageData {
1953 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
1954 package_type: Some(PACKAGE_TYPE),
1955 namespace: namespace.clone(),
1956 name: package_name.map(|s| s.to_string()),
1957 file_references,
1958 ..Default::default()
1959 };
1960
1961 if let Some(n) = &package.name {
1962 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
1963 }
1964
1965 package
1966}
1967
1968crate::register_parser!(
1969 "Debian control file in extracted .deb control tarball",
1970 &[
1971 "**/control.tar.gz-extract/control",
1972 "**/control.tar.xz-extract/control"
1973 ],
1974 "deb",
1975 "",
1976 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
1977);
1978
1979crate::register_parser!(
1980 "Debian MD5 checksums in extracted .deb control tarball",
1981 &[
1982 "**/control.tar.gz-extract/md5sums",
1983 "**/control.tar.xz-extract/md5sums"
1984 ],
1985 "deb",
1986 "",
1987 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
1988);
1989
1990#[cfg(test)]
1991mod tests {
1992 use super::*;
1993 use crate::models::DatasourceId;
1994 use crate::models::PackageType;
1995 use ar::{Builder as ArBuilder, Header as ArHeader};
1996 use flate2::Compression;
1997 use flate2::write::GzEncoder;
1998 use liblzma::write::XzEncoder;
1999 use std::io::Cursor;
2000 use std::path::PathBuf;
2001 use tar::{Builder as TarBuilder, Header as TarHeader};
2002 use tempfile::NamedTempFile;
2003
2004 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2005 let mut control_tar = Vec::new();
2006 {
2007 let encoder = XzEncoder::new(&mut control_tar, 6);
2008 let mut tar_builder = TarBuilder::new(encoder);
2009
2010 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2011 let mut header = TarHeader::new_gnu();
2012 header
2013 .set_path("control")
2014 .expect("control tar path should be valid");
2015 header.set_size(control_content.len() as u64);
2016 header.set_mode(0o644);
2017 header.set_cksum();
2018 tar_builder
2019 .append(&header, Cursor::new(control_content))
2020 .expect("control file should be appended to tar.xz");
2021 tar_builder.finish().expect("control tar.xz should finish");
2022 }
2023
2024 let deb = NamedTempFile::new().expect("temp deb file should be created");
2025 {
2026 let mut builder = ArBuilder::new(
2027 deb.reopen()
2028 .expect("temporary deb file should reopen for writing"),
2029 );
2030
2031 let debian_binary = b"2.0\n";
2032 let mut debian_binary_header =
2033 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2034 debian_binary_header.set_mode(0o100644);
2035 builder
2036 .append(&debian_binary_header, Cursor::new(debian_binary))
2037 .expect("debian-binary entry should be appended");
2038
2039 let mut control_header =
2040 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2041 control_header.set_mode(0o100644);
2042 builder
2043 .append(&control_header, Cursor::new(control_tar))
2044 .expect("control.tar.xz entry should be appended");
2045 }
2046
2047 deb
2048 }
2049
2050 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2051 let mut control_tar = Vec::new();
2052 {
2053 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2054 let mut tar_builder = TarBuilder::new(encoder);
2055
2056 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2057 let mut header = TarHeader::new_gnu();
2058 header
2059 .set_path("control")
2060 .expect("control tar path should be valid");
2061 header.set_size(control_content.len() as u64);
2062 header.set_mode(0o644);
2063 header.set_cksum();
2064 tar_builder
2065 .append(&header, Cursor::new(control_content))
2066 .expect("control file should be appended to tar.gz");
2067 tar_builder.finish().expect("control tar.gz should finish");
2068 }
2069
2070 let mut data_tar = Vec::new();
2071 {
2072 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2073 let mut tar_builder = TarBuilder::new(encoder);
2074
2075 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2076 let mut header = TarHeader::new_gnu();
2077 header
2078 .set_path("./usr/share/doc/synthetic/copyright")
2079 .expect("copyright path should be valid");
2080 header.set_size(copyright.len() as u64);
2081 header.set_mode(0o644);
2082 header.set_cksum();
2083 tar_builder
2084 .append(&header, Cursor::new(copyright))
2085 .expect("copyright file should be appended to data tar");
2086 tar_builder.finish().expect("data tar.gz should finish");
2087 }
2088
2089 let deb = NamedTempFile::new().expect("temp deb file should be created");
2090 {
2091 let mut builder = ArBuilder::new(
2092 deb.reopen()
2093 .expect("temporary deb file should reopen for writing"),
2094 );
2095
2096 let debian_binary = b"2.0\n";
2097 let mut debian_binary_header =
2098 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2099 debian_binary_header.set_mode(0o100644);
2100 builder
2101 .append(&debian_binary_header, Cursor::new(debian_binary))
2102 .expect("debian-binary entry should be appended");
2103
2104 let mut control_header =
2105 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2106 control_header.set_mode(0o100644);
2107 builder
2108 .append(&control_header, Cursor::new(control_tar))
2109 .expect("control.tar.gz entry should be appended");
2110
2111 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2112 data_header.set_mode(0o100644);
2113 builder
2114 .append(&data_header, Cursor::new(data_tar))
2115 .expect("data.tar.gz entry should be appended");
2116 }
2117
2118 deb
2119 }
2120
2121 #[test]
2124 fn test_detect_namespace_from_ubuntu_version() {
2125 assert_eq!(
2126 detect_namespace(Some("1.0-1ubuntu1"), None),
2127 Some("ubuntu".to_string())
2128 );
2129 }
2130
2131 #[test]
2132 fn test_detect_namespace_from_debian_version() {
2133 assert_eq!(
2134 detect_namespace(Some("1.0-1+deb11u1"), None),
2135 Some("debian".to_string())
2136 );
2137 }
2138
2139 #[test]
2140 fn test_detect_namespace_from_ubuntu_maintainer() {
2141 assert_eq!(
2142 detect_namespace(
2143 None,
2144 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2145 ),
2146 Some("ubuntu".to_string())
2147 );
2148 }
2149
2150 #[test]
2151 fn test_detect_namespace_from_debian_maintainer() {
2152 assert_eq!(
2153 detect_namespace(None, Some("John Doe <john@debian.org>")),
2154 Some("debian".to_string())
2155 );
2156 }
2157
2158 #[test]
2159 fn test_detect_namespace_default() {
2160 assert_eq!(
2161 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2162 Some("debian".to_string())
2163 );
2164 }
2165
2166 #[test]
2167 fn test_detect_namespace_version_takes_priority() {
2168 assert_eq!(
2170 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2171 Some("ubuntu".to_string())
2172 );
2173 }
2174
2175 #[test]
2178 fn test_build_purl_basic() {
2179 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2180 assert_eq!(
2181 purl,
2182 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2183 );
2184 }
2185
2186 #[test]
2187 fn test_build_purl_no_version() {
2188 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2189 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2190 }
2191
2192 #[test]
2193 fn test_build_purl_no_arch() {
2194 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2195 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2196 }
2197
2198 #[test]
2199 fn test_build_purl_no_namespace() {
2200 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2201 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2202 }
2203
2204 #[test]
2207 fn test_parse_simple_dependency() {
2208 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2209 assert_eq!(deps.len(), 1);
2210 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2211 assert_eq!(deps[0].extracted_requirement, None);
2212 assert_eq!(deps[0].scope, Some("depends".to_string()));
2213 }
2214
2215 #[test]
2216 fn test_parse_dependency_with_version() {
2217 let deps =
2218 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2219 assert_eq!(deps.len(), 1);
2220 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2221 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2222 }
2223
2224 #[test]
2225 fn test_parse_dependency_exact_version() {
2226 let deps = parse_dependency_field(
2227 "libc6 (= 2.31-13+deb11u5)",
2228 "depends",
2229 true,
2230 false,
2231 Some("debian"),
2232 );
2233 assert_eq!(deps.len(), 1);
2234 assert_eq!(deps[0].is_pinned, Some(true));
2235 }
2236
2237 #[test]
2238 fn test_parse_dependency_strict_less() {
2239 let deps =
2240 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2241 assert_eq!(deps.len(), 1);
2242 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2243 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2244 }
2245
2246 #[test]
2247 fn test_parse_multiple_dependencies() {
2248 let deps = parse_dependency_field(
2249 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2250 "depends",
2251 true,
2252 false,
2253 Some("debian"),
2254 );
2255 assert_eq!(deps.len(), 3);
2256 }
2257
2258 #[test]
2259 fn test_parse_dependency_alternatives() {
2260 let deps = parse_dependency_field(
2261 "libssl1.1 | libssl3",
2262 "depends",
2263 true,
2264 false,
2265 Some("debian"),
2266 );
2267 assert_eq!(deps.len(), 2);
2268 assert_eq!(deps[0].is_optional, Some(true));
2270 assert_eq!(deps[1].is_optional, Some(true));
2271 }
2272
2273 #[test]
2274 fn test_parse_dependency_skips_substitutions() {
2275 let deps = parse_dependency_field(
2276 "${shlibs:Depends}, ${misc:Depends}, libc6",
2277 "depends",
2278 true,
2279 false,
2280 Some("debian"),
2281 );
2282 assert_eq!(deps.len(), 1);
2283 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2284 }
2285
2286 #[test]
2287 fn test_parse_dependency_with_arch_qualifier() {
2288 let deps = parse_dependency_field(
2290 "libc6 (>= 2.17) [amd64]",
2291 "depends",
2292 true,
2293 false,
2294 Some("debian"),
2295 );
2296 assert_eq!(deps.len(), 1);
2297 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2298 }
2299
2300 #[test]
2301 fn test_parse_empty_dependency() {
2302 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2303 assert!(deps.is_empty());
2304 }
2305
2306 #[test]
2309 fn test_parse_source_field_name_only() {
2310 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2311 assert_eq!(sources.len(), 1);
2312 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2313 }
2314
2315 #[test]
2316 fn test_parse_source_field_with_version() {
2317 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2318 assert_eq!(sources.len(), 1);
2319 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2320 }
2321
2322 #[test]
2323 fn test_parse_source_field_empty() {
2324 let sources = parse_source_field(None, Some("debian"));
2325 assert!(sources.is_empty());
2326 }
2327
2328 #[test]
2331 fn test_parse_debian_control_source_and_binary() {
2332 let content = "\
2333Source: curl
2334Section: web
2335Priority: optional
2336Maintainer: Alessandro Ghedini <ghedo@debian.org>
2337Homepage: https://curl.se/
2338Vcs-Browser: https://salsa.debian.org/debian/curl
2339Vcs-Git: https://salsa.debian.org/debian/curl.git
2340Build-Depends: debhelper (>= 12), libssl-dev
2341
2342Package: curl
2343Architecture: amd64
2344Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2345Description: command line tool for transferring data with URL syntax";
2346
2347 let packages = parse_debian_control(content);
2348 assert_eq!(packages.len(), 1);
2349
2350 let pkg = &packages[0];
2351 assert_eq!(pkg.name, Some("curl".to_string()));
2352 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2353 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2354 assert_eq!(
2355 pkg.vcs_url,
2356 Some("https://salsa.debian.org/debian/curl.git".to_string())
2357 );
2358 assert_eq!(
2359 pkg.code_view_url,
2360 Some("https://salsa.debian.org/debian/curl".to_string())
2361 );
2362
2363 assert_eq!(pkg.parties.len(), 1);
2365 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2366 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2367 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2368
2369 assert!(!pkg.dependencies.is_empty());
2371 }
2372
2373 #[test]
2374 fn test_parse_debian_control_multiple_binary() {
2375 let content = "\
2376Source: gzip
2377Maintainer: Debian Developer <dev@debian.org>
2378
2379Package: gzip
2380Architecture: any
2381Depends: libc6 (>= 2.17)
2382Description: GNU file compression
2383
2384Package: gzip-win32
2385Architecture: all
2386Description: gzip for Windows";
2387
2388 let packages = parse_debian_control(content);
2389 assert_eq!(packages.len(), 2);
2390 assert_eq!(packages[0].name, Some("gzip".to_string()));
2391 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2392
2393 assert_eq!(packages[0].parties.len(), 1);
2395 assert_eq!(packages[1].parties.len(), 1);
2396 }
2397
2398 #[test]
2399 fn test_parse_debian_control_source_only() {
2400 let content = "\
2401Source: my-package
2402Maintainer: Test User <test@debian.org>
2403Build-Depends: debhelper (>= 13)";
2404
2405 let packages = parse_debian_control(content);
2406 assert_eq!(packages.len(), 1);
2407 assert_eq!(packages[0].name, Some("my-package".to_string()));
2408 assert!(!packages[0].dependencies.is_empty());
2410 assert_eq!(
2411 packages[0].dependencies[0].scope,
2412 Some("build-depends".to_string())
2413 );
2414 }
2415
2416 #[test]
2417 fn test_parse_debian_control_with_uploaders() {
2418 let content = "\
2419Source: example
2420Maintainer: Main Dev <main@debian.org>
2421Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2422
2423Package: example
2424Architecture: any
2425Description: test package";
2426
2427 let packages = parse_debian_control(content);
2428 assert_eq!(packages.len(), 1);
2429 assert_eq!(packages[0].parties.len(), 3);
2431 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2432 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2433 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2434 }
2435
2436 #[test]
2437 fn test_parse_debian_control_vcs_git_with_branch() {
2438 let content = "\
2439Source: example
2440Maintainer: Dev <dev@debian.org>
2441Vcs-Git: https://salsa.debian.org/example.git -b main
2442
2443Package: example
2444Architecture: any
2445Description: test";
2446
2447 let packages = parse_debian_control(content);
2448 assert_eq!(packages.len(), 1);
2449 assert_eq!(
2451 packages[0].vcs_url,
2452 Some("https://salsa.debian.org/example.git".to_string())
2453 );
2454 }
2455
2456 #[test]
2457 fn test_parse_debian_control_multi_arch() {
2458 let content = "\
2459Source: example
2460Maintainer: Dev <dev@debian.org>
2461
2462Package: libexample
2463Architecture: any
2464Multi-Arch: same
2465Description: shared library";
2466
2467 let packages = parse_debian_control(content);
2468 assert_eq!(packages.len(), 1);
2469 let extra = packages[0].extra_data.as_ref().unwrap();
2470 assert_eq!(
2471 extra.get("multi_arch"),
2472 Some(&serde_json::Value::String("same".to_string()))
2473 );
2474 }
2475
2476 #[test]
2479 fn test_parse_dpkg_status_basic() {
2480 let content = "\
2481Package: base-files
2482Status: install ok installed
2483Priority: required
2484Section: admin
2485Installed-Size: 391
2486Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2487Architecture: amd64
2488Version: 11ubuntu5.6
2489Description: Debian base system miscellaneous files
2490Homepage: https://tracker.debian.org/pkg/base-files
2491
2492Package: not-installed
2493Status: deinstall ok config-files
2494Architecture: amd64
2495Version: 1.0
2496Description: This should be skipped";
2497
2498 let packages = parse_dpkg_status(content);
2499 assert_eq!(packages.len(), 1);
2500
2501 let pkg = &packages[0];
2502 assert_eq!(pkg.name, Some("base-files".to_string()));
2503 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2504 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2505 assert_eq!(
2506 pkg.datasource_id,
2507 Some(DatasourceId::DebianInstalledStatusDb)
2508 );
2509
2510 let extra = pkg.extra_data.as_ref().unwrap();
2512 assert_eq!(
2513 extra.get("installed_size"),
2514 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2515 );
2516 }
2517
2518 #[test]
2519 fn test_parse_dpkg_status_multiple_installed() {
2520 let content = "\
2521Package: libc6
2522Status: install ok installed
2523Architecture: amd64
2524Version: 2.31-13+deb11u5
2525Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2526Description: GNU C Library
2527
2528Package: zlib1g
2529Status: install ok installed
2530Architecture: amd64
2531Version: 1:1.2.11.dfsg-2+deb11u2
2532Maintainer: Mark Brown <broonie@debian.org>
2533Description: compression library";
2534
2535 let packages = parse_dpkg_status(content);
2536 assert_eq!(packages.len(), 2);
2537 assert_eq!(packages[0].name, Some("libc6".to_string()));
2538 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2539 }
2540
2541 #[test]
2542 fn test_parse_dpkg_status_with_dependencies() {
2543 let content = "\
2544Package: curl
2545Status: install ok installed
2546Architecture: amd64
2547Version: 7.74.0-1.3+deb11u7
2548Maintainer: Alessandro Ghedini <ghedo@debian.org>
2549Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2550Recommends: ca-certificates
2551Description: command line tool for transferring data with URL syntax";
2552
2553 let packages = parse_dpkg_status(content);
2554 assert_eq!(packages.len(), 1);
2555
2556 let deps = &packages[0].dependencies;
2557 assert_eq!(deps.len(), 3);
2559
2560 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2562 assert_eq!(deps[0].scope, Some("depends".to_string()));
2563 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2564
2565 assert_eq!(
2567 deps[2].purl,
2568 Some("pkg:deb/debian/ca-certificates".to_string())
2569 );
2570 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2571 assert_eq!(deps[2].is_optional, Some(true));
2572 }
2573
2574 #[test]
2575 fn test_parse_dpkg_status_with_source() {
2576 let content = "\
2577Package: libncurses6
2578Status: install ok installed
2579Architecture: amd64
2580Source: ncurses (6.2+20201114-2+deb11u1)
2581Version: 6.2+20201114-2+deb11u1
2582Maintainer: Craig Small <csmall@debian.org>
2583Description: shared libraries for terminal handling";
2584
2585 let packages = parse_dpkg_status(content);
2586 assert_eq!(packages.len(), 1);
2587 assert!(!packages[0].source_packages.is_empty());
2588 assert!(packages[0].source_packages[0].contains("ncurses"));
2590 }
2591
2592 #[test]
2593 fn test_parse_dpkg_status_filters_not_installed() {
2594 let content = "\
2595Package: installed-pkg
2596Status: install ok installed
2597Version: 1.0
2598Architecture: amd64
2599Description: installed
2600
2601Package: half-installed
2602Status: install ok half-installed
2603Version: 2.0
2604Architecture: amd64
2605Description: half installed
2606
2607Package: deinstall-pkg
2608Status: deinstall ok config-files
2609Version: 3.0
2610Architecture: amd64
2611Description: deinstalled
2612
2613Package: purge-pkg
2614Status: purge ok not-installed
2615Version: 4.0
2616Architecture: amd64
2617Description: purged";
2618
2619 let packages = parse_dpkg_status(content);
2620 assert_eq!(packages.len(), 1);
2621 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2622 }
2623
2624 #[test]
2625 fn test_parse_dpkg_status_empty() {
2626 let packages = parse_dpkg_status("");
2627 assert!(packages.is_empty());
2628 }
2629
2630 #[test]
2633 fn test_debian_control_is_match() {
2634 assert!(DebianControlParser::is_match(Path::new(
2635 "/path/to/debian/control"
2636 )));
2637 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2638 assert!(!DebianControlParser::is_match(Path::new(
2639 "/path/to/control"
2640 )));
2641 assert!(!DebianControlParser::is_match(Path::new(
2642 "/path/to/debian/changelog"
2643 )));
2644 }
2645
2646 #[test]
2647 fn test_debian_installed_is_match() {
2648 assert!(DebianInstalledParser::is_match(Path::new(
2649 "/var/lib/dpkg/status"
2650 )));
2651 assert!(DebianInstalledParser::is_match(Path::new(
2652 "some/root/var/lib/dpkg/status"
2653 )));
2654 assert!(!DebianInstalledParser::is_match(Path::new(
2655 "/var/lib/dpkg/status.d/something"
2656 )));
2657 assert!(!DebianInstalledParser::is_match(Path::new(
2658 "/var/lib/dpkg/available"
2659 )));
2660 }
2661
2662 #[test]
2665 fn test_parse_debian_control_empty_input() {
2666 let packages = parse_debian_control("");
2667 assert!(packages.is_empty());
2668 }
2669
2670 #[test]
2671 fn test_parse_debian_control_malformed_input() {
2672 let content = "this is not a valid control file\nwith random text";
2673 let packages = parse_debian_control(content);
2674 assert!(packages.is_empty());
2676 }
2677
2678 #[test]
2679 fn test_dependency_with_epoch_version() {
2680 let deps = parse_dependency_field(
2682 "zlib1g (>= 1:1.2.11)",
2683 "depends",
2684 true,
2685 false,
2686 Some("debian"),
2687 );
2688 assert_eq!(deps.len(), 1);
2689 assert_eq!(
2690 deps[0].extracted_requirement,
2691 Some(">= 1:1.2.11".to_string())
2692 );
2693 }
2694
2695 #[test]
2696 fn test_dependency_with_plus_in_name() {
2697 let deps =
2698 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2699 assert_eq!(deps.len(), 1);
2700 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2701 }
2702
2703 #[test]
2704 fn test_dsc_parser_is_match() {
2705 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2706 assert!(DebianDscParser::is_match(&PathBuf::from(
2707 "adduser_3.118+deb11u1.dsc"
2708 )));
2709 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2710 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2711 }
2712
2713 #[test]
2714 fn test_dsc_parser_adduser() {
2715 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2716 let package = DebianDscParser::extract_first_package(&path);
2717
2718 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2719 assert_eq!(package.namespace, Some("debian".to_string()));
2720 assert_eq!(package.name, Some("adduser".to_string()));
2721 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2722 assert_eq!(
2723 package.purl,
2724 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2725 );
2726 assert_eq!(
2727 package.vcs_url,
2728 Some("https://salsa.debian.org/debian/adduser.git".to_string())
2729 );
2730 assert_eq!(
2731 package.code_view_url,
2732 Some("https://salsa.debian.org/debian/adduser".to_string())
2733 );
2734 assert_eq!(
2735 package.datasource_id,
2736 Some(DatasourceId::DebianSourceControlDsc)
2737 );
2738
2739 assert_eq!(package.parties.len(), 2);
2740 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2741 assert_eq!(
2742 package.parties[0].name,
2743 Some("Debian Adduser Developers".to_string())
2744 );
2745 assert_eq!(
2746 package.parties[0].email,
2747 Some("adduser@packages.debian.org".to_string())
2748 );
2749 assert_eq!(package.parties[0].r#type, None);
2750
2751 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2752 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2753 assert_eq!(
2754 package.parties[1].email,
2755 Some("mh+debian-packages@zugschlus.de".to_string())
2756 );
2757 assert_eq!(package.parties[1].r#type, None);
2758
2759 assert_eq!(package.source_packages.len(), 1);
2760 assert_eq!(
2761 package.source_packages[0],
2762 "pkg:deb/debian/adduser".to_string()
2763 );
2764
2765 assert!(!package.dependencies.is_empty());
2766 let build_dep_names: Vec<String> = package
2767 .dependencies
2768 .iter()
2769 .filter_map(|d| d.purl.as_ref())
2770 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2771 .map(|p| p.to_string())
2772 .collect();
2773 assert!(build_dep_names.len() >= 2);
2774 }
2775
2776 #[test]
2777 fn test_dsc_parser_zsh() {
2778 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2779 let package = DebianDscParser::extract_first_package(&path);
2780
2781 assert_eq!(package.name, Some("zsh".to_string()));
2782 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2783 assert_eq!(package.namespace, Some("debian".to_string()));
2784 assert!(package.purl.is_some());
2785 assert!(package.purl.as_ref().unwrap().contains("zsh"));
2786 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2787 }
2788
2789 #[test]
2790 fn test_parse_dsc_content_basic() {
2791 let content = "Format: 3.0 (native)
2792Source: testpkg
2793Binary: testpkg
2794Architecture: amd64
2795Version: 1.0.0
2796Maintainer: Test User <test@example.com>
2797Standards-Version: 4.5.0
2798Build-Depends: debhelper (>= 12)
2799Files:
2800 abc123 1024 testpkg_1.0.0.tar.xz
2801";
2802
2803 let package = parse_dsc_content(content);
2804 assert_eq!(package.name, Some("testpkg".to_string()));
2805 assert_eq!(package.version, Some("1.0.0".to_string()));
2806 assert_eq!(package.namespace, Some("debian".to_string()));
2807 assert_eq!(package.parties.len(), 1);
2808 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2809 assert_eq!(
2810 package.parties[0].email,
2811 Some("test@example.com".to_string())
2812 );
2813 assert_eq!(package.dependencies.len(), 1);
2814 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2815 }
2816
2817 #[test]
2818 fn test_parse_dsc_content_with_uploaders() {
2819 let content = "Source: mypkg
2820Version: 2.0
2821Architecture: all
2822Maintainer: Main Dev <main@example.com>
2823Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2824";
2825
2826 let package = parse_dsc_content(content);
2827 assert_eq!(package.parties.len(), 3);
2828 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2829 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2830 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2831 }
2832
2833 #[test]
2834 fn test_orig_tar_parser_is_match() {
2835 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2836 "package_1.0.orig.tar.gz"
2837 )));
2838 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2839 "abseil_0~20200923.3.orig.tar.xz"
2840 )));
2841 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2842 "package.debian.tar.gz"
2843 )));
2844 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2845 }
2846
2847 #[test]
2848 fn test_debian_tar_parser_is_match() {
2849 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2850 "package_1.0-1.debian.tar.xz"
2851 )));
2852 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2853 "abseil_20220623.1-1.debian.tar.gz"
2854 )));
2855 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2856 "package.orig.tar.gz"
2857 )));
2858 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2859 }
2860
2861 #[test]
2862 fn test_parse_orig_tar_filename() {
2863 let pkg = parse_source_tarball_filename(
2864 "abseil_0~20200923.3.orig.tar.gz",
2865 DatasourceId::DebianOriginalSourceTarball,
2866 );
2867 assert_eq!(pkg.name, Some("abseil".to_string()));
2868 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2869 assert_eq!(pkg.namespace, Some("debian".to_string()));
2870 assert_eq!(
2871 pkg.purl,
2872 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2873 );
2874 assert_eq!(
2875 pkg.datasource_id,
2876 Some(DatasourceId::DebianOriginalSourceTarball)
2877 );
2878 }
2879
2880 #[test]
2881 fn test_parse_debian_tar_filename() {
2882 let pkg = parse_source_tarball_filename(
2883 "abseil_20220623.1-1.debian.tar.xz",
2884 DatasourceId::DebianSourceMetadataTarball,
2885 );
2886 assert_eq!(pkg.name, Some("abseil".to_string()));
2887 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2888 assert_eq!(pkg.namespace, Some("debian".to_string()));
2889 assert_eq!(
2890 pkg.purl,
2891 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2892 );
2893 }
2894
2895 #[test]
2896 fn test_parse_deb_filename() {
2897 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2898 assert_eq!(pkg.name, Some("nginx".to_string()));
2899 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
2900
2901 let pkg = parse_deb_filename("invalid.deb");
2902 assert!(pkg.name.is_none());
2903 assert!(pkg.version.is_none());
2904 }
2905
2906 #[test]
2907 fn test_parse_source_tarball_various_compressions() {
2908 let pkg_gz = parse_source_tarball_filename(
2909 "test_1.0.orig.tar.gz",
2910 DatasourceId::DebianOriginalSourceTarball,
2911 );
2912 let pkg_xz = parse_source_tarball_filename(
2913 "test_1.0.orig.tar.xz",
2914 DatasourceId::DebianOriginalSourceTarball,
2915 );
2916 let pkg_bz2 = parse_source_tarball_filename(
2917 "test_1.0.orig.tar.bz2",
2918 DatasourceId::DebianOriginalSourceTarball,
2919 );
2920
2921 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
2922 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
2923 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
2924 }
2925
2926 #[test]
2927 fn test_parse_source_tarball_invalid_format() {
2928 let pkg = parse_source_tarball_filename(
2929 "invalid-no-underscore.tar.gz",
2930 DatasourceId::DebianOriginalSourceTarball,
2931 );
2932 assert!(pkg.name.is_none());
2933 assert!(pkg.version.is_none());
2934 }
2935
2936 #[test]
2937 fn test_list_parser_is_match() {
2938 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2939 "/var/lib/dpkg/info/bash.list"
2940 )));
2941 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2942 "/var/lib/dpkg/info/package:amd64.list"
2943 )));
2944 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2945 "bash.list"
2946 )));
2947 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2948 "/var/lib/dpkg/info/bash.md5sums"
2949 )));
2950 }
2951
2952 #[test]
2953 fn test_md5sums_parser_is_match() {
2954 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2955 "/var/lib/dpkg/info/bash.md5sums"
2956 )));
2957 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2958 "/var/lib/dpkg/info/package:amd64.md5sums"
2959 )));
2960 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2961 "bash.md5sums"
2962 )));
2963 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2964 "/var/lib/dpkg/info/bash.list"
2965 )));
2966 }
2967
2968 #[test]
2969 fn test_parse_debian_file_list_plain_list() {
2970 let content = "/.
2971/bin
2972/bin/bash
2973/usr/bin/bashbug
2974/usr/share/doc/bash/README
2975";
2976 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
2977 assert_eq!(pkg.name, Some("bash".to_string()));
2978 assert_eq!(pkg.file_references.len(), 3);
2979 assert_eq!(pkg.file_references[0].path, "/bin/bash");
2980 assert_eq!(pkg.file_references[0].md5, None);
2981 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
2982 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
2983 }
2984
2985 #[test]
2986 fn test_parse_debian_file_list_md5sums() {
2987 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
29881c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
2989f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
2990";
2991 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
2992 assert_eq!(pkg.name, Some("bash".to_string()));
2993 assert_eq!(pkg.file_references.len(), 3);
2994 assert_eq!(pkg.file_references[0].path, "bin/bash");
2995 assert_eq!(
2996 pkg.file_references[0].md5,
2997 Some("77506afebd3b7e19e937a678a185b62e".to_string())
2998 );
2999 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3000 assert_eq!(
3001 pkg.file_references[1].md5,
3002 Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3003 );
3004 }
3005
3006 #[test]
3007 fn test_parse_debian_file_list_with_arch() {
3008 let content = "/usr/bin/foo
3009/usr/lib/x86_64-linux-gnu/libfoo.so
3010";
3011 let pkg = parse_debian_file_list(
3012 content,
3013 "libfoo:amd64",
3014 DatasourceId::DebianInstalledFilesList,
3015 );
3016 assert_eq!(pkg.name, Some("libfoo".to_string()));
3017 assert!(pkg.purl.is_some());
3018 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3019 assert_eq!(pkg.file_references.len(), 2);
3020 }
3021
3022 #[test]
3023 fn test_parse_debian_file_list_skips_comments_and_empty() {
3024 let content = "# This is a comment
3025/bin/bash
3026
3027/usr/bin/bashbug
3028
3029";
3030 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3031 assert_eq!(pkg.file_references.len(), 2);
3032 }
3033
3034 #[test]
3035 fn test_parse_debian_file_list_md5sums_only() {
3036 let content = "abc123 usr/bin/tool
3037";
3038 let pkg =
3039 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3040 assert_eq!(pkg.name, None);
3041 assert_eq!(pkg.file_references.len(), 1);
3042 }
3043
3044 #[test]
3045 fn test_parse_debian_file_list_ignores_root_dirs() {
3046 let content = "/.
3047/bin
3048/bin/bash
3049/etc
3050/usr
3051/var
3052";
3053 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3054 assert_eq!(pkg.file_references.len(), 1);
3055 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3056 }
3057
3058 #[test]
3059 fn test_copyright_parser_is_match() {
3060 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3061 "/usr/share/doc/bash/copyright"
3062 )));
3063 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3064 "debian/copyright"
3065 )));
3066 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3067 "copyright.txt"
3068 )));
3069 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3070 "/etc/copyright"
3071 )));
3072 }
3073
3074 #[test]
3075 fn test_extract_package_name_from_path() {
3076 assert_eq!(
3077 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3078 Some("bash".to_string())
3079 );
3080 assert_eq!(
3081 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3082 Some("libseccomp2".to_string())
3083 );
3084 assert_eq!(
3085 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3086 None
3087 );
3088 }
3089
3090 #[test]
3091 fn test_parse_copyright_dep5_format() {
3092 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3093Upstream-Name: libseccomp
3094Source: https://sourceforge.net/projects/libseccomp/
3095
3096Files: *
3097Copyright: 2012 Paul Moore <pmoore@redhat.com>
3098 2012 Ashley Lai <adlai@us.ibm.com>
3099License: LGPL-2.1
3100
3101License: LGPL-2.1
3102 This library is free software
3103";
3104 let pkg = parse_copyright_file(content, Some("libseccomp"));
3105 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3106 assert_eq!(pkg.namespace, Some("debian".to_string()));
3107 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3108 assert_eq!(
3109 pkg.extracted_license_statement,
3110 Some("LGPL-2.1".to_string())
3111 );
3112 assert!(pkg.parties.len() >= 2);
3113 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3114 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3115 }
3116
3117 #[test]
3118 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3119 let path = PathBuf::from(
3120 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3121 );
3122 let pkg = DebianCopyrightParser::extract_first_package(&path);
3123
3124 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3125 let extracted = pkg
3126 .extracted_license_statement
3127 .as_deref()
3128 .expect("license statement should exist");
3129 assert!(extracted.contains("GPL-2+"));
3130 assert!(!pkg.license_detections.is_empty());
3131
3132 let primary = &pkg.license_detections[0];
3133 assert_eq!(
3134 primary.matches[0].matched_text.as_deref(),
3135 Some("License: GPL-2+")
3136 );
3137 assert_eq!(primary.matches[0].start_line, 47);
3138 assert_eq!(primary.matches[0].end_line, 47);
3139 }
3140
3141 #[test]
3142 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3143 let path = PathBuf::from("testdata/debian/copyright/copyright");
3144 let pkg = DebianCopyrightParser::extract_first_package(&path);
3145
3146 assert_eq!(pkg.license_detections.len(), 1);
3147 assert_eq!(pkg.other_license_detections.len(), 4);
3148
3149 let primary = &pkg.license_detections[0];
3150 assert_eq!(
3151 primary.matches[0].matched_text.as_deref(),
3152 Some("License: LGPL-2.1")
3153 );
3154 assert_eq!(primary.matches[0].start_line, 11);
3155
3156 let ordered_lines: Vec<usize> = pkg
3157 .other_license_detections
3158 .iter()
3159 .map(|detection| detection.matches[0].start_line)
3160 .collect();
3161 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3162
3163 let ordered_texts: Vec<&str> = pkg
3164 .other_license_detections
3165 .iter()
3166 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3167 .collect();
3168 assert_eq!(
3169 ordered_texts,
3170 vec![
3171 "License: LGPL-2.1",
3172 "License: LGPL-2.1",
3173 "License: LGPL-2.1",
3174 "License: LGPL-2.1",
3175 ]
3176 );
3177 }
3178
3179 #[test]
3180 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3181 let path = PathBuf::from(
3182 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3183 );
3184 let pkg = DebianCopyrightParser::extract_first_package(&path);
3185
3186 let zlib = pkg
3187 .other_license_detections
3188 .iter()
3189 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3190 .expect("at least one Zlib license paragraph should be detected");
3191 assert_eq!(
3192 zlib.matches[0].matched_text.as_deref(),
3193 Some("License: Zlib")
3194 );
3195
3196 let last_zlib = pkg
3197 .other_license_detections
3198 .iter()
3199 .rev()
3200 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3201 .expect("bottom standalone Zlib license paragraph should be detected");
3202 assert_eq!(last_zlib.matches[0].start_line, 732);
3203 assert_eq!(last_zlib.matches[0].end_line, 732);
3204 }
3205
3206 #[test]
3207 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3208 let path =
3209 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3210 let pkg = DebianCopyrightParser::extract_first_package(&path);
3211
3212 assert_eq!(pkg.license_detections.len(), 1);
3213 let primary = &pkg.license_detections[0];
3214 assert_eq!(
3215 primary.matches[0].matched_text.as_deref(),
3216 Some("License: LGPL-3+ or GPL-2+")
3217 );
3218 assert_eq!(primary.matches[0].start_line, 8);
3219 assert_eq!(primary.matches[0].end_line, 8);
3220
3221 assert!(pkg.other_license_detections.iter().any(|detection| {
3222 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3223 }));
3224 }
3225
3226 #[test]
3227 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3228 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3229 let pkg = parse_copyright_file(content, Some("foo"));
3230
3231 assert_eq!(pkg.license_detections.len(), 1);
3232 let primary = &pkg.license_detections[0];
3233 assert_eq!(
3234 primary.matches[0].matched_text.as_deref(),
3235 Some("License: GPL-2+")
3236 );
3237 assert_eq!(primary.matches[0].start_line, 7);
3238 }
3239
3240 #[test]
3241 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3242 let raw_lines = vec![
3243 "Files: *".to_string(),
3244 "Copyright: 2024 Example Org".to_string(),
3245 "License: Apache-2.0".to_string(),
3246 " Licensed under the Apache License, Version 2.0.".to_string(),
3247 ];
3248
3249 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3250 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3251 .into_iter()
3252 .next()
3253 .expect("reference RFC822 paragraph should parse");
3254
3255 assert_eq!(paragraph.metadata.headers, expected.headers);
3256 assert_eq!(paragraph.metadata.body, expected.body);
3257 assert_eq!(
3258 paragraph.license_header_line,
3259 Some(("License: Apache-2.0".to_string(), 12))
3260 );
3261 }
3262
3263 #[test]
3264 fn test_parse_copyright_unstructured() {
3265 let content = "This package was debianized by John Doe.
3266
3267Upstream Authors:
3268 Jane Smith
3269
3270Copyright:
3271 2009 10gen
3272
3273License:
3274 SSPL
3275";
3276 let pkg = parse_copyright_file(content, Some("mongodb"));
3277 assert_eq!(pkg.name, Some("mongodb".to_string()));
3278 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3279 assert!(!pkg.parties.is_empty());
3280 }
3281
3282 #[test]
3283 fn test_parse_copyright_holders() {
3284 let text = "2012 Paul Moore <pmoore@redhat.com>
32852012 Ashley Lai <adlai@us.ibm.com>
3286Copyright (C) 2015-2018 Example Corp";
3287 let holders = parse_copyright_holders(text);
3288 assert!(holders.len() >= 3);
3289 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3290 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3291 }
3292
3293 #[test]
3294 fn test_parse_copyright_empty() {
3295 let content = "This is just some text without proper copyright info.";
3296 let pkg = parse_copyright_file(content, Some("test"));
3297 assert_eq!(pkg.name, Some("test".to_string()));
3298 assert!(pkg.parties.is_empty());
3299 assert!(pkg.extracted_license_statement.is_none());
3300 }
3301
3302 #[test]
3303 fn test_deb_parser_is_match() {
3304 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3305 assert!(DebianDebParser::is_match(&PathBuf::from(
3306 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3307 )));
3308 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3309 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3310 }
3311
3312 #[test]
3313 fn test_parse_deb_filename_with_arch() {
3314 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3315 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3316 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3317 assert_eq!(pkg.namespace, Some("debian".to_string()));
3318 assert_eq!(
3319 pkg.purl,
3320 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3321 );
3322 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3323 }
3324
3325 #[test]
3326 fn test_parse_deb_filename_without_arch() {
3327 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3328 assert_eq!(pkg.name, Some("package".to_string()));
3329 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3330 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3331 }
3332
3333 #[test]
3334 fn test_extract_deb_archive() {
3335 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3336 if !test_path.exists() {
3337 return;
3338 }
3339
3340 let pkg = DebianDebParser::extract_first_package(&test_path);
3341
3342 assert_eq!(pkg.name, Some("adduser".to_string()));
3343 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3344 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3345 assert!(pkg.description.is_some());
3346 assert!(!pkg.parties.is_empty());
3347
3348 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3349 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3350 }
3351
3352 #[test]
3353 fn test_extract_deb_archive_with_control_tar_xz() {
3354 let deb = create_synthetic_deb_with_control_tar_xz();
3355
3356 let pkg = DebianDebParser::extract_first_package(deb.path());
3357
3358 assert_eq!(pkg.name, Some("synthetic".to_string()));
3359 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3360 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3361 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3362 }
3363
3364 #[test]
3365 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3366 let deb = create_synthetic_deb_with_copyright();
3367
3368 let pkg = DebianDebParser::extract_first_package(deb.path());
3369
3370 assert_eq!(pkg.name, Some("synthetic".to_string()));
3371 assert_eq!(
3372 pkg.extracted_license_statement,
3373 Some("Apache-2.0".to_string())
3374 );
3375 assert!(pkg.parties.iter().any(|party| {
3376 party.role.as_deref() == Some("copyright-holder")
3377 && party.name.as_deref() == Some("Example Org")
3378 }));
3379 }
3380
3381 #[test]
3382 fn test_parse_deb_filename_simple() {
3383 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3384 assert_eq!(pkg.name, Some("adduser".to_string()));
3385 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3386 assert_eq!(pkg.namespace, Some("debian".to_string()));
3387 }
3388
3389 #[test]
3390 fn test_parse_deb_filename_invalid() {
3391 let pkg = parse_deb_filename("invalid.deb");
3392 assert!(pkg.name.is_none());
3393 assert!(pkg.version.is_none());
3394 }
3395
3396 #[test]
3397 fn test_distroless_parser() {
3398 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3399
3400 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3401
3402 if !test_file.exists() {
3403 eprintln!("Warning: Test file not found, skipping test");
3404 return;
3405 }
3406
3407 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3408
3409 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3410 assert_eq!(
3411 pkg.datasource_id,
3412 Some(DatasourceId::DebianDistrolessInstalledDb)
3413 );
3414 assert_eq!(pkg.name, Some("base-files".to_string()));
3415 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3416 assert_eq!(pkg.namespace, Some("debian".to_string()));
3417 assert!(pkg.purl.is_some());
3418 assert!(
3419 pkg.purl
3420 .as_ref()
3421 .unwrap()
3422 .contains("pkg:deb/debian/base-files")
3423 );
3424 }
3425}