1use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41 DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
42 PackageType, Party,
43};
44use crate::parsers::rfc822::{self, Rfc822Metadata};
45use crate::parsers::utils::{read_file_to_string, split_name_email};
46use crate::utils::spdx::combine_license_expressions;
47
48use super::PackageParser;
49use super::license_normalization::{
50 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
51 normalize_declared_license_key,
52};
53
54const PACKAGE_TYPE: PackageType = PackageType::Deb;
55
56fn default_package_data(datasource_id: DatasourceId) -> PackageData {
57 PackageData {
58 package_type: Some(PACKAGE_TYPE),
59 datasource_id: Some(datasource_id),
60 ..Default::default()
61 }
62}
63
64const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
66const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
67
68const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
70 "packages.debian.org",
71 "lists.debian.org",
72 "lists.alioth.debian.org",
73 "@debian.org",
74 "debian-init-diversity@",
75];
76const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
77
78struct DepFieldSpec {
80 field: &'static str,
81 scope: &'static str,
82 is_runtime: bool,
83 is_optional: bool,
84}
85
86const DEP_FIELDS: &[DepFieldSpec] = &[
87 DepFieldSpec {
88 field: "depends",
89 scope: "depends",
90 is_runtime: true,
91 is_optional: false,
92 },
93 DepFieldSpec {
94 field: "pre-depends",
95 scope: "pre-depends",
96 is_runtime: true,
97 is_optional: false,
98 },
99 DepFieldSpec {
100 field: "recommends",
101 scope: "recommends",
102 is_runtime: true,
103 is_optional: true,
104 },
105 DepFieldSpec {
106 field: "suggests",
107 scope: "suggests",
108 is_runtime: true,
109 is_optional: true,
110 },
111 DepFieldSpec {
112 field: "breaks",
113 scope: "breaks",
114 is_runtime: false,
115 is_optional: false,
116 },
117 DepFieldSpec {
118 field: "conflicts",
119 scope: "conflicts",
120 is_runtime: false,
121 is_optional: false,
122 },
123 DepFieldSpec {
124 field: "replaces",
125 scope: "replaces",
126 is_runtime: false,
127 is_optional: false,
128 },
129 DepFieldSpec {
130 field: "provides",
131 scope: "provides",
132 is_runtime: false,
133 is_optional: false,
134 },
135 DepFieldSpec {
136 field: "build-depends",
137 scope: "build-depends",
138 is_runtime: false,
139 is_optional: false,
140 },
141 DepFieldSpec {
142 field: "build-depends-indep",
143 scope: "build-depends-indep",
144 is_runtime: false,
145 is_optional: false,
146 },
147 DepFieldSpec {
148 field: "build-conflicts",
149 scope: "build-conflicts",
150 is_runtime: false,
151 is_optional: false,
152 },
153];
154
155pub struct DebianControlParser;
160
161impl PackageParser for DebianControlParser {
162 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
163
164 fn is_match(path: &Path) -> bool {
165 if let Some(name) = path.file_name()
166 && name == "control"
167 && let Some(parent) = path.parent()
168 && let Some(parent_name) = parent.file_name()
169 {
170 return parent_name == "debian";
171 }
172 false
173 }
174
175 fn extract_packages(path: &Path) -> Vec<PackageData> {
176 let content = match read_file_to_string(path) {
177 Ok(c) => c,
178 Err(e) => {
179 warn!("Failed to read debian/control at {:?}: {}", path, e);
180 return vec![default_package_data(DatasourceId::DebianControlInSource)];
181 }
182 };
183
184 let packages = parse_debian_control(&content);
185 if packages.is_empty() {
186 vec![default_package_data(DatasourceId::DebianControlInSource)]
187 } else {
188 packages
189 }
190 }
191}
192
193pub struct DebianInstalledParser;
198
199impl PackageParser for DebianInstalledParser {
200 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
201
202 fn is_match(path: &Path) -> bool {
203 let path_str = path.to_string_lossy();
204 path_str.ends_with("var/lib/dpkg/status")
205 }
206
207 fn extract_packages(path: &Path) -> Vec<PackageData> {
208 let content = match read_file_to_string(path) {
209 Ok(c) => c,
210 Err(e) => {
211 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
212 return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
213 }
214 };
215
216 let packages = parse_dpkg_status(&content);
217 if packages.is_empty() {
218 vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
219 } else {
220 packages
221 }
222 }
223}
224
225pub struct DebianDistrolessInstalledParser;
226
227impl PackageParser for DebianDistrolessInstalledParser {
228 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
229
230 fn is_match(path: &Path) -> bool {
231 let path_str = path.to_string_lossy();
232 path_str.contains("var/lib/dpkg/status.d/")
233 }
234
235 fn extract_packages(path: &Path) -> Vec<PackageData> {
236 let content = match read_file_to_string(path) {
237 Ok(c) => c,
238 Err(e) => {
239 warn!("Failed to read distroless status file at {:?}: {}", path, e);
240 return vec![default_package_data(
241 DatasourceId::DebianDistrolessInstalledDb,
242 )];
243 }
244 };
245
246 vec![parse_distroless_status(&content)]
247 }
248}
249
250fn parse_distroless_status(content: &str) -> PackageData {
251 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
252
253 if paragraphs.is_empty() {
254 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
255 }
256
257 build_package_from_paragraph(
258 ¶graphs[0],
259 None,
260 DatasourceId::DebianDistrolessInstalledDb,
261 )
262 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
263}
264
265fn parse_debian_control(content: &str) -> Vec<PackageData> {
275 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
276 if paragraphs.is_empty() {
277 return Vec::new();
278 }
279
280 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
282
283 let (source_paragraph, binary_start) = if has_source {
284 (Some(¶graphs[0]), 1)
285 } else {
286 (None, 0)
287 };
288
289 let source_meta = source_paragraph.map(extract_source_meta);
291
292 let mut packages = Vec::new();
293
294 for para in ¶graphs[binary_start..] {
295 if let Some(pkg) = build_package_from_paragraph(
296 para,
297 source_meta.as_ref(),
298 DatasourceId::DebianControlInSource,
299 ) {
300 packages.push(pkg);
301 }
302 }
303
304 if packages.is_empty()
305 && let Some(source_para) = source_paragraph
306 && let Some(pkg) = build_package_from_source_paragraph(source_para)
307 {
308 packages.push(pkg);
309 }
310
311 packages
312}
313
314fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
319 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
320 let mut packages = Vec::new();
321
322 for para in ¶graphs {
323 let status = rfc822::get_header_first(¶.headers, "status");
324 if status.as_deref() != Some("install ok installed") {
325 continue;
326 }
327
328 if let Some(pkg) =
329 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
330 {
331 packages.push(pkg);
332 }
333 }
334
335 packages
336}
337
338struct SourceMeta {
343 parties: Vec<Party>,
344 homepage_url: Option<String>,
345 vcs_url: Option<String>,
346 code_view_url: Option<String>,
347 bug_tracking_url: Option<String>,
348}
349
350fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
351 let mut parties = Vec::new();
352
353 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
355 let (name, email) = split_name_email(&maintainer);
356 parties.push(Party {
357 r#type: Some("person".to_string()),
358 role: Some("maintainer".to_string()),
359 name,
360 email,
361 url: None,
362 organization: None,
363 organization_url: None,
364 timezone: None,
365 });
366 }
367
368 if let Some(orig_maintainer) =
370 rfc822::get_header_first(¶graph.headers, "original-maintainer")
371 {
372 let (name, email) = split_name_email(&orig_maintainer);
373 parties.push(Party {
374 r#type: Some("person".to_string()),
375 role: Some("maintainer".to_string()),
376 name,
377 email,
378 url: None,
379 organization: None,
380 organization_url: None,
381 timezone: None,
382 });
383 }
384
385 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
387 for uploader in uploaders_str.split(',') {
388 let trimmed = uploader.trim();
389 if !trimmed.is_empty() {
390 let (name, email) = split_name_email(trimmed);
391 parties.push(Party {
392 r#type: Some("person".to_string()),
393 role: Some("uploader".to_string()),
394 name,
395 email,
396 url: None,
397 organization: None,
398 organization_url: None,
399 timezone: None,
400 });
401 }
402 }
403 }
404
405 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage");
406
407 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
409 .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
410
411 let code_view_url = rfc822::get_header_first(¶graph.headers, "vcs-browser");
412
413 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs");
414
415 SourceMeta {
416 parties,
417 homepage_url,
418 vcs_url,
419 code_view_url,
420 bug_tracking_url,
421 }
422}
423
424fn build_package_from_paragraph(
429 paragraph: &Rfc822Metadata,
430 source_meta: Option<&SourceMeta>,
431 datasource_id: DatasourceId,
432) -> Option<PackageData> {
433 let name = rfc822::get_header_first(¶graph.headers, "package")?;
434 let version = rfc822::get_header_first(¶graph.headers, "version");
435 let architecture = rfc822::get_header_first(¶graph.headers, "architecture");
436 let description = rfc822::get_header_first(¶graph.headers, "description");
437 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
438 let homepage = rfc822::get_header_first(¶graph.headers, "homepage");
439 let source_field = rfc822::get_header_first(¶graph.headers, "source");
440 let section = rfc822::get_header_first(¶graph.headers, "section");
441 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
442 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
443
444 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
445
446 let parties = if let Some(meta) = source_meta {
448 meta.parties.clone()
449 } else {
450 let mut p = Vec::new();
451 if let Some(m) = &maintainer_str {
452 let (n, e) = split_name_email(m);
453 p.push(Party {
454 r#type: Some("person".to_string()),
455 role: Some("maintainer".to_string()),
456 name: n,
457 email: e,
458 url: None,
459 organization: None,
460 organization_url: None,
461 timezone: None,
462 });
463 }
464 p
465 };
466
467 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
469 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
470 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
471 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
472
473 let purl = build_debian_purl(
475 &name,
476 version.as_deref(),
477 namespace.as_deref(),
478 architecture.as_deref(),
479 );
480
481 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
483
484 let keywords = section.into_iter().collect();
486
487 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
489
490 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
492 if let Some(ma) = &multi_arch
493 && !ma.is_empty()
494 {
495 extra_data.insert(
496 "multi_arch".to_string(),
497 serde_json::Value::String(ma.clone()),
498 );
499 }
500 if let Some(size_str) = &installed_size
501 && let Ok(size) = size_str.parse::<u64>()
502 {
503 extra_data.insert(
504 "installed_size".to_string(),
505 serde_json::Value::Number(serde_json::Number::from(size)),
506 );
507 }
508
509 let qualifiers = architecture.as_ref().map(|arch| {
511 let mut q = HashMap::new();
512 q.insert("arch".to_string(), arch.clone());
513 q
514 });
515
516 Some(PackageData {
517 package_type: Some(PACKAGE_TYPE),
518 namespace: namespace.clone(),
519 name: Some(name),
520 version,
521 qualifiers,
522 subpath: None,
523 primary_language: None,
524 description,
525 release_date: None,
526 parties,
527 keywords,
528 homepage_url,
529 download_url: None,
530 size: None,
531 sha1: None,
532 md5: None,
533 sha256: None,
534 sha512: None,
535 bug_tracking_url,
536 code_view_url,
537 vcs_url,
538 copyright: None,
539 holder: None,
540 declared_license_expression: None,
541 declared_license_expression_spdx: None,
542 license_detections: Vec::new(),
543 other_license_expression: None,
544 other_license_expression_spdx: None,
545 other_license_detections: Vec::new(),
546 extracted_license_statement: None,
547 notice_text: None,
548 source_packages,
549 file_references: Vec::new(),
550 is_private: false,
551 is_virtual: false,
552 extra_data: if extra_data.is_empty() {
553 None
554 } else {
555 Some(extra_data)
556 },
557 dependencies,
558 repository_homepage_url: None,
559 repository_download_url: None,
560 api_data_url: None,
561 datasource_id: Some(datasource_id),
562 purl,
563 })
564}
565
566fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
567 let name = rfc822::get_header_first(¶graph.headers, "source")?;
568 let version = rfc822::get_header_first(¶graph.headers, "version");
569 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
570
571 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
572 let source_meta = extract_source_meta(paragraph);
573
574 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
575 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
576
577 let section = rfc822::get_header_first(¶graph.headers, "section");
578 let keywords = section.into_iter().collect();
579
580 Some(PackageData {
581 package_type: Some(PACKAGE_TYPE),
582 namespace: namespace.clone(),
583 name: Some(name),
584 version,
585 qualifiers: None,
586 subpath: None,
587 primary_language: None,
588 description: None,
589 release_date: None,
590 parties: source_meta.parties,
591 keywords,
592 homepage_url: source_meta.homepage_url,
593 download_url: None,
594 size: None,
595 sha1: None,
596 md5: None,
597 sha256: None,
598 sha512: None,
599 bug_tracking_url: source_meta.bug_tracking_url,
600 code_view_url: source_meta.code_view_url,
601 vcs_url: source_meta.vcs_url,
602 copyright: None,
603 holder: None,
604 declared_license_expression: None,
605 declared_license_expression_spdx: None,
606 license_detections: Vec::new(),
607 other_license_expression: None,
608 other_license_expression_spdx: None,
609 other_license_detections: Vec::new(),
610 extracted_license_statement: None,
611 notice_text: None,
612 source_packages: Vec::new(),
613 file_references: Vec::new(),
614 is_private: false,
615 is_virtual: false,
616 extra_data: None,
617 dependencies,
618 repository_homepage_url: None,
619 repository_download_url: None,
620 api_data_url: None,
621 datasource_id: Some(DatasourceId::DebianControlInSource),
622 purl,
623 })
624}
625
626fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
631 if let Some(ver) = version {
633 let ver_lower = ver.to_lowercase();
634 for clue in VERSION_CLUES_UBUNTU {
635 if ver_lower.contains(clue) {
636 return Some("ubuntu".to_string());
637 }
638 }
639 for clue in VERSION_CLUES_DEBIAN {
640 if ver_lower.contains(clue) {
641 return Some("debian".to_string());
642 }
643 }
644 }
645
646 if let Some(maint) = maintainer {
648 let maint_lower = maint.to_lowercase();
649 for clue in MAINTAINER_CLUES_UBUNTU {
650 if maint_lower.contains(clue) {
651 return Some("ubuntu".to_string());
652 }
653 }
654 for clue in MAINTAINER_CLUES_DEBIAN {
655 if maint_lower.contains(clue) {
656 return Some("debian".to_string());
657 }
658 }
659 }
660
661 Some("debian".to_string())
663}
664
665fn build_debian_purl(
670 name: &str,
671 version: Option<&str>,
672 namespace: Option<&str>,
673 architecture: Option<&str>,
674) -> Option<String> {
675 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
676
677 if let Some(ns) = namespace {
678 purl.with_namespace(ns).ok()?;
679 }
680
681 if let Some(ver) = version {
682 purl.with_version(ver).ok()?;
683 }
684
685 if let Some(arch) = architecture {
686 purl.add_qualifier("arch", arch).ok()?;
687 }
688
689 Some(purl.to_string())
690}
691
692fn parse_all_dependencies(
697 headers: &HashMap<String, Vec<String>>,
698 namespace: Option<&str>,
699) -> Vec<Dependency> {
700 let mut dependencies = Vec::new();
701
702 for spec in DEP_FIELDS {
703 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
704 dependencies.extend(parse_dependency_field(
705 &dep_str,
706 spec.scope,
707 spec.is_runtime,
708 spec.is_optional,
709 namespace,
710 ));
711 }
712 }
713
714 dependencies
715}
716
717fn parse_dependency_field(
726 dep_str: &str,
727 scope: &str,
728 is_runtime: bool,
729 is_optional: bool,
730 namespace: Option<&str>,
731) -> Vec<Dependency> {
732 let mut deps = Vec::new();
733
734 let dep_re = Regex::new(
737 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
738 )
739 .unwrap();
740
741 for group in dep_str.split(',') {
742 let group = group.trim();
743 if group.is_empty() {
744 continue;
745 }
746
747 let alternatives: Vec<&str> = group.split('|').collect();
749 let has_alternatives = alternatives.len() > 1;
750
751 for alt in alternatives {
752 let alt = alt.trim();
753 if alt.is_empty() {
754 continue;
755 }
756
757 if let Some(caps) = dep_re.captures(alt) {
758 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
759 let operator = caps.get(2).map(|m| m.as_str().trim());
760 let version = caps.get(3).map(|m| m.as_str().trim());
761
762 if pkg_name.is_empty() {
763 continue;
764 }
765
766 if pkg_name.starts_with('$') {
768 continue;
769 }
770
771 let extracted_requirement = match (operator, version) {
772 (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
773 _ => None,
774 };
775
776 let is_pinned = operator.map(|op| op == "=");
777
778 let purl = build_debian_purl(pkg_name, None, namespace, None);
779
780 deps.push(Dependency {
781 purl,
782 extracted_requirement,
783 scope: Some(scope.to_string()),
784 is_runtime: Some(is_runtime),
785 is_optional: Some(is_optional || has_alternatives),
786 is_pinned,
787 is_direct: Some(true),
788 resolved_package: None,
789 extra_data: None,
790 });
791 }
792 }
793 }
794
795 deps
796}
797
798fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
806 let Some(source_str) = source else {
807 return Vec::new();
808 };
809
810 let trimmed = source_str.trim();
811 if trimmed.is_empty() {
812 return Vec::new();
813 }
814
815 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
817 let name = trimmed[..paren_start].trim();
818 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
819 (
820 name,
821 if version.is_empty() {
822 None
823 } else {
824 Some(version)
825 },
826 )
827 } else {
828 (trimmed, None)
829 };
830
831 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
832 vec![purl]
833 } else {
834 Vec::new()
835 }
836}
837
838crate::register_parser!(
843 "Debian source package control file (debian/control)",
844 &["**/debian/control"],
845 "deb",
846 "",
847 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
848);
849
850crate::register_parser!(
851 "Debian installed package database (dpkg status)",
852 &["**/var/lib/dpkg/status"],
853 "deb",
854 "",
855 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
856);
857
858crate::register_parser!(
859 "Debian distroless package database (status.d)",
860 &["**/var/lib/dpkg/status.d/*"],
861 "deb",
862 "",
863 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
864);
865
866pub struct DebianDscParser;
875
876impl PackageParser for DebianDscParser {
877 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
878
879 fn is_match(path: &Path) -> bool {
880 path.extension().and_then(|e| e.to_str()) == Some("dsc")
881 }
882
883 fn extract_packages(path: &Path) -> Vec<PackageData> {
884 let content = match read_file_to_string(path) {
885 Ok(c) => c,
886 Err(e) => {
887 warn!("Failed to read .dsc file {:?}: {}", path, e);
888 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
889 }
890 };
891
892 vec![parse_dsc_content(&content)]
893 }
894}
895
896crate::register_parser!(
897 "Debian source control file (.dsc)",
898 &["**/*.dsc"],
899 "deb",
900 "",
901 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
902);
903
904fn strip_pgp_signature(content: &str) -> String {
905 let mut result = String::new();
906 let mut in_pgp_block = false;
907 let mut in_signature = false;
908
909 for line in content.lines() {
910 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
911 in_pgp_block = true;
912 continue;
913 }
914 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
915 in_signature = true;
916 continue;
917 }
918 if line.starts_with("-----END PGP SIGNATURE-----") {
919 in_signature = false;
920 continue;
921 }
922 if in_pgp_block && line.starts_with("Hash:") {
923 continue;
924 }
925 if in_pgp_block && line.is_empty() && result.is_empty() {
926 in_pgp_block = false;
927 continue;
928 }
929 if !in_signature {
930 result.push_str(line);
931 result.push('\n');
932 }
933 }
934
935 result
936}
937
938fn parse_dsc_content(content: &str) -> PackageData {
939 let clean_content = strip_pgp_signature(content);
940 let metadata = rfc822::parse_rfc822_content(&clean_content);
941 let headers = &metadata.headers;
942
943 let name = rfc822::get_header_first(headers, "source");
944 let version = rfc822::get_header_first(headers, "version");
945 let architecture = rfc822::get_header_first(headers, "architecture");
946 let namespace = Some("debian".to_string());
947
948 let mut package = PackageData {
949 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
950 package_type: Some(PACKAGE_TYPE),
951 namespace: namespace.clone(),
952 name: name.clone(),
953 version: version.clone(),
954 description: rfc822::get_header_first(headers, "description"),
955 homepage_url: rfc822::get_header_first(headers, "homepage"),
956 vcs_url: rfc822::get_header_first(headers, "vcs-git"),
957 code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
958 ..Default::default()
959 };
960
961 if let (Some(n), Some(v)) = (&name, &version) {
963 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
964 }
965
966 if let Some(n) = &name
968 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
969 {
970 package.source_packages.push(source_purl);
971 }
972
973 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
974 let (name_opt, email_opt) = split_name_email(&maintainer);
975 package.parties.push(Party {
976 r#type: None,
977 role: Some("maintainer".to_string()),
978 name: name_opt,
979 email: email_opt,
980 url: None,
981 organization: None,
982 organization_url: None,
983 timezone: None,
984 });
985 }
986
987 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
988 for uploader in uploaders_str.split(',') {
989 let uploader = uploader.trim();
990 if uploader.is_empty() {
991 continue;
992 }
993 let (name_opt, email_opt) = split_name_email(uploader);
994 package.parties.push(Party {
995 r#type: None,
996 role: Some("uploader".to_string()),
997 name: name_opt,
998 email: email_opt,
999 url: None,
1000 organization: None,
1001 organization_url: None,
1002 timezone: None,
1003 });
1004 }
1005 }
1006
1007 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1009 package.dependencies.extend(parse_dependency_field(
1010 &build_deps,
1011 "build",
1012 false,
1013 false,
1014 namespace.as_deref(),
1015 ));
1016 }
1017
1018 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1020 let map = package.extra_data.get_or_insert_with(HashMap::new);
1021 map.insert("standards_version".to_string(), standards.into());
1022 }
1023
1024 package
1025}
1026
1027pub struct DebianOrigTarParser;
1029
1030impl PackageParser for DebianOrigTarParser {
1031 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1032
1033 fn is_match(path: &Path) -> bool {
1034 path.file_name()
1035 .and_then(|n| n.to_str())
1036 .map(|name| name.contains(".orig.tar."))
1037 .unwrap_or(false)
1038 }
1039
1040 fn extract_packages(path: &Path) -> Vec<PackageData> {
1041 let filename = match path.file_name().and_then(|n| n.to_str()) {
1042 Some(f) => f,
1043 None => {
1044 return vec![default_package_data(
1045 DatasourceId::DebianOriginalSourceTarball,
1046 )];
1047 }
1048 };
1049
1050 vec![parse_source_tarball_filename(
1051 filename,
1052 DatasourceId::DebianOriginalSourceTarball,
1053 )]
1054 }
1055}
1056
1057crate::register_parser!(
1058 "Debian original source tarball",
1059 &["**/*.orig.tar.*"],
1060 "deb",
1061 "",
1062 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1063);
1064
1065pub struct DebianDebianTarParser;
1067
1068impl PackageParser for DebianDebianTarParser {
1069 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1070
1071 fn is_match(path: &Path) -> bool {
1072 path.file_name()
1073 .and_then(|n| n.to_str())
1074 .map(|name| name.contains(".debian.tar."))
1075 .unwrap_or(false)
1076 }
1077
1078 fn extract_packages(path: &Path) -> Vec<PackageData> {
1079 let filename = match path.file_name().and_then(|n| n.to_str()) {
1080 Some(f) => f,
1081 None => {
1082 return vec![default_package_data(
1083 DatasourceId::DebianSourceMetadataTarball,
1084 )];
1085 }
1086 };
1087
1088 vec![parse_source_tarball_filename(
1089 filename,
1090 DatasourceId::DebianSourceMetadataTarball,
1091 )]
1092 }
1093}
1094
1095crate::register_parser!(
1096 "Debian source metadata tarball",
1097 &["**/*.debian.tar.*"],
1098 "deb",
1099 "",
1100 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1101);
1102
1103fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1104 let without_tar_ext = filename
1105 .trim_end_matches(".gz")
1106 .trim_end_matches(".xz")
1107 .trim_end_matches(".bz2")
1108 .trim_end_matches(".tar");
1109
1110 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1111 if parts.len() < 2 {
1112 return default_package_data(datasource_id);
1113 }
1114
1115 let name = parts[0].to_string();
1116 let version_with_suffix = parts[1];
1117
1118 let version = version_with_suffix
1119 .trim_end_matches(".orig")
1120 .trim_end_matches(".debian")
1121 .to_string();
1122
1123 let namespace = Some("debian".to_string());
1124
1125 PackageData {
1126 datasource_id: Some(datasource_id),
1127 package_type: Some(PACKAGE_TYPE),
1128 namespace: namespace.clone(),
1129 name: Some(name.clone()),
1130 version: Some(version.clone()),
1131 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1132 ..Default::default()
1133 }
1134}
1135
1136pub struct DebianInstalledListParser;
1138
1139impl PackageParser for DebianInstalledListParser {
1140 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1141
1142 fn is_match(path: &Path) -> bool {
1143 path.extension().and_then(|e| e.to_str()) == Some("list")
1144 && path
1145 .to_str()
1146 .map(|p| p.contains("/var/lib/dpkg/info/"))
1147 .unwrap_or(false)
1148 }
1149
1150 fn extract_packages(path: &Path) -> Vec<PackageData> {
1151 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1152 Some(f) => f,
1153 None => {
1154 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1155 }
1156 };
1157
1158 let content = match read_file_to_string(path) {
1159 Ok(c) => c,
1160 Err(e) => {
1161 warn!("Failed to read .list file {:?}: {}", path, e);
1162 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1163 }
1164 };
1165
1166 vec![parse_debian_file_list(
1167 &content,
1168 filename,
1169 DatasourceId::DebianInstalledFilesList,
1170 )]
1171 }
1172}
1173
1174crate::register_parser!(
1175 "Debian installed files list",
1176 &["**/var/lib/dpkg/info/*.list"],
1177 "deb",
1178 "",
1179 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1180);
1181
1182pub struct DebianInstalledMd5sumsParser;
1184
1185impl PackageParser for DebianInstalledMd5sumsParser {
1186 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1187
1188 fn is_match(path: &Path) -> bool {
1189 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1190 && path
1191 .to_str()
1192 .map(|p| p.contains("/var/lib/dpkg/info/"))
1193 .unwrap_or(false)
1194 }
1195
1196 fn extract_packages(path: &Path) -> Vec<PackageData> {
1197 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1198 Some(f) => f,
1199 None => {
1200 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1201 }
1202 };
1203
1204 let content = match read_file_to_string(path) {
1205 Ok(c) => c,
1206 Err(e) => {
1207 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1208 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1209 }
1210 };
1211
1212 vec![parse_debian_file_list(
1213 &content,
1214 filename,
1215 DatasourceId::DebianInstalledMd5Sums,
1216 )]
1217 }
1218}
1219
1220crate::register_parser!(
1221 "Debian installed package md5sums",
1222 &["**/var/lib/dpkg/info/*.md5sums"],
1223 "deb",
1224 "",
1225 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1226);
1227
1228const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1229
1230fn parse_debian_file_list(
1231 content: &str,
1232 filename: &str,
1233 datasource_id: DatasourceId,
1234) -> PackageData {
1235 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1236 (Some(pkg.to_string()), Some(arch.to_string()))
1237 } else if filename == "md5sums" {
1238 (None, None)
1239 } else {
1240 (Some(filename.to_string()), None)
1241 };
1242
1243 let mut file_references = Vec::new();
1244
1245 for line in content.lines() {
1246 let line = line.trim();
1247 if line.is_empty() || line.starts_with('#') {
1248 continue;
1249 }
1250
1251 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1252 (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1253 } else {
1254 (None, line)
1255 };
1256
1257 if IGNORED_ROOT_DIRS.contains(&path) {
1258 continue;
1259 }
1260
1261 file_references.push(FileReference {
1262 path: path.to_string(),
1263 size: None,
1264 sha1: None,
1265 md5: md5sum,
1266 sha256: None,
1267 sha512: None,
1268 extra_data: None,
1269 });
1270 }
1271
1272 if file_references.is_empty() {
1273 return default_package_data(datasource_id);
1274 }
1275
1276 let namespace = Some("debian".to_string());
1277 let mut package = PackageData {
1278 datasource_id: Some(datasource_id),
1279 package_type: Some(PACKAGE_TYPE),
1280 namespace: namespace.clone(),
1281 name: name.clone(),
1282 file_references,
1283 ..Default::default()
1284 };
1285
1286 if let Some(n) = &name {
1287 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1288 }
1289
1290 package
1291}
1292
1293pub struct DebianCopyrightParser;
1295
1296impl PackageParser for DebianCopyrightParser {
1297 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1298
1299 fn is_match(path: &Path) -> bool {
1300 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1301 if filename != "copyright" {
1302 return filename.ends_with("_copyright");
1303 }
1304 let path_str = path.to_string_lossy();
1305 path_str.contains("/debian/")
1306 || path_str.contains("/packages/deb/")
1307 || path_str.contains("/usr/share/doc/")
1308 || path_str.ends_with("debian/copyright")
1309 } else {
1310 false
1311 }
1312 }
1313
1314 fn extract_packages(path: &Path) -> Vec<PackageData> {
1315 let datasource_id = detect_debian_copyright_datasource(path);
1316 let content = match read_file_to_string(path) {
1317 Ok(c) => c,
1318 Err(e) => {
1319 warn!("Failed to read copyright file {:?}: {}", path, e);
1320 return vec![default_package_data(datasource_id)];
1321 }
1322 };
1323
1324 let package_name = extract_package_name_from_path(path);
1325 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1326 package_data.datasource_id = Some(datasource_id);
1327 vec![package_data]
1328 }
1329}
1330
1331crate::register_parser!(
1332 "Debian machine-readable copyright file",
1333 &[
1334 "**/debian/copyright",
1335 "**/packages/deb/copyright",
1336 "**/usr/share/doc/*/copyright",
1337 "**/*_copyright"
1338 ],
1339 "deb",
1340 "",
1341 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1342);
1343
1344fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1345 let path_str = path.to_string_lossy();
1346 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1347 DatasourceId::DebianCopyrightInSource
1348 } else if path_str.contains("/usr/share/doc/") {
1349 DatasourceId::DebianCopyrightInPackage
1350 } else {
1351 DatasourceId::DebianCopyrightStandalone
1352 }
1353}
1354
1355fn extract_package_name_from_path(path: &Path) -> Option<String> {
1356 let components: Vec<_> = path.components().collect();
1357
1358 for (i, component) in components.iter().enumerate() {
1359 if let std::path::Component::Normal(os_str) = component
1360 && os_str.to_str() == Some("doc")
1361 && i + 1 < components.len()
1362 && let std::path::Component::Normal(next) = components[i + 1]
1363 {
1364 return next.to_str().map(|s| s.to_string());
1365 }
1366 }
1367 None
1368}
1369
1370fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1371 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1372
1373 let is_dep5 = paragraphs
1374 .first()
1375 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1376 .is_some();
1377
1378 let namespace = Some("debian".to_string());
1379 let mut parties = Vec::new();
1380 let mut license_statements = Vec::new();
1381 let mut primary_license_detection = None;
1382 let mut header_license_detection = None;
1383 let mut other_license_detections = Vec::new();
1384
1385 if is_dep5 {
1386 for para in ¶graphs {
1387 if let Some(copyright_text) =
1388 rfc822::get_header_first(¶.metadata.headers, "copyright")
1389 {
1390 for holder in parse_copyright_holders(©right_text) {
1391 if !holder.is_empty() {
1392 parties.push(Party {
1393 r#type: None,
1394 role: Some("copyright-holder".to_string()),
1395 name: Some(holder),
1396 email: None,
1397 url: None,
1398 organization: None,
1399 organization_url: None,
1400 timezone: None,
1401 });
1402 }
1403 }
1404 }
1405
1406 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1407 let license_name = license.lines().next().unwrap_or(&license).trim();
1408 if !license_name.is_empty()
1409 && !license_statements.contains(&license_name.to_string())
1410 {
1411 license_statements.push(license_name.to_string());
1412 }
1413
1414 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1415 let detection =
1416 build_primary_license_detection(license_name, matched_text, line_no);
1417 let is_header_paragraph =
1418 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1419 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1420 == Some("*")
1421 {
1422 primary_license_detection = Some(detection);
1423 } else if is_header_paragraph {
1424 header_license_detection.get_or_insert(detection);
1425 } else {
1426 other_license_detections.push(detection);
1427 }
1428 }
1429 }
1430 }
1431
1432 if primary_license_detection.is_none() && header_license_detection.is_some() {
1433 primary_license_detection = header_license_detection;
1434 }
1435 } else {
1436 let copyright_block = extract_unstructured_field(content, "Copyright:");
1437 if let Some(text) = copyright_block {
1438 for holder in parse_copyright_holders(&text) {
1439 if !holder.is_empty() {
1440 parties.push(Party {
1441 r#type: None,
1442 role: Some("copyright-holder".to_string()),
1443 name: Some(holder),
1444 email: None,
1445 url: None,
1446 organization: None,
1447 organization_url: None,
1448 timezone: None,
1449 });
1450 }
1451 }
1452 }
1453
1454 let license_block = extract_unstructured_field(content, "License:");
1455 if let Some(text) = license_block {
1456 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1457 }
1458 }
1459
1460 let extracted_license_statement = if license_statements.is_empty() {
1461 None
1462 } else {
1463 Some(license_statements.join(" AND "))
1464 };
1465
1466 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1467 let declared_license_expression = license_detections
1468 .first()
1469 .map(|detection| detection.license_expression.clone());
1470 let declared_license_expression_spdx = license_detections
1471 .first()
1472 .map(|detection| detection.license_expression_spdx.clone());
1473 let other_license_expression = combine_license_expressions(
1474 other_license_detections
1475 .iter()
1476 .map(|detection| detection.license_expression.clone()),
1477 );
1478 let other_license_expression_spdx = combine_license_expressions(
1479 other_license_detections
1480 .iter()
1481 .map(|detection| detection.license_expression_spdx.clone()),
1482 );
1483
1484 PackageData {
1485 datasource_id: Some(DatasourceId::DebianCopyright),
1486 package_type: Some(PACKAGE_TYPE),
1487 namespace: namespace.clone(),
1488 name: package_name.map(|s| s.to_string()),
1489 parties,
1490 declared_license_expression,
1491 declared_license_expression_spdx,
1492 license_detections,
1493 other_license_expression,
1494 other_license_expression_spdx,
1495 other_license_detections,
1496 extracted_license_statement,
1497 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1498 ..Default::default()
1499 }
1500}
1501
1502#[derive(Debug)]
1503struct CopyrightParagraph {
1504 metadata: Rfc822Metadata,
1505 license_header_line: Option<(String, usize)>,
1506}
1507
1508fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1509 let mut paragraphs = Vec::new();
1510 let mut current_lines = Vec::new();
1511 let mut current_start_line = 1usize;
1512
1513 for (idx, line) in content.lines().enumerate() {
1514 let line_no = idx + 1;
1515 if line.is_empty() {
1516 if !current_lines.is_empty() {
1517 paragraphs.push(finalize_copyright_paragraph(
1518 std::mem::take(&mut current_lines),
1519 current_start_line,
1520 ));
1521 }
1522 current_start_line = line_no + 1;
1523 } else {
1524 if current_lines.is_empty() {
1525 current_start_line = line_no;
1526 }
1527 current_lines.push(line.to_string());
1528 }
1529 }
1530
1531 if !current_lines.is_empty() {
1532 paragraphs.push(finalize_copyright_paragraph(
1533 current_lines,
1534 current_start_line,
1535 ));
1536 }
1537
1538 paragraphs
1539}
1540
1541fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1542 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1543 let mut current_name: Option<String> = None;
1544 let mut current_value = String::new();
1545 let mut license_header_line = None;
1546
1547 for (idx, line) in raw_lines.iter().enumerate() {
1548 if line.starts_with(' ') || line.starts_with('\t') {
1549 if current_name.is_some() {
1550 current_value.push('\n');
1551 current_value.push_str(line);
1552 }
1553 continue;
1554 }
1555
1556 if let Some(name) = current_name.take() {
1557 add_copyright_header_value(&mut headers, &name, ¤t_value);
1558 current_value.clear();
1559 }
1560
1561 if let Some((name, value)) = line.split_once(':') {
1562 let normalized_name = name.trim().to_ascii_lowercase();
1563 if normalized_name == "license" && license_header_line.is_none() {
1564 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1565 }
1566 current_name = Some(normalized_name);
1567 current_value = value.trim_start().to_string();
1568 }
1569 }
1570
1571 if let Some(name) = current_name.take() {
1572 add_copyright_header_value(&mut headers, &name, ¤t_value);
1573 }
1574
1575 CopyrightParagraph {
1576 metadata: Rfc822Metadata {
1577 headers,
1578 body: String::new(),
1579 },
1580 license_header_line,
1581 }
1582}
1583
1584fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1585 let entry = headers.entry(name.to_string()).or_default();
1586 let trimmed = value.trim_end();
1587 if !trimmed.is_empty() {
1588 entry.push(trimmed.to_string());
1589 }
1590}
1591
1592fn build_primary_license_detection(
1593 license_name: &str,
1594 matched_text: String,
1595 line_no: usize,
1596) -> LicenseDetection {
1597 let normalized = normalize_debian_license_name(license_name);
1598 let line = LineNumber::new(line_no).unwrap();
1599
1600 build_declared_license_detection(
1601 &normalized,
1602 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1603 )
1604}
1605
1606fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1607 match license_name.trim() {
1608 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1609 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1610 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1611 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1612 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1613 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1614 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1615 "public-domain" => {
1616 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1617 }
1618 other => normalize_declared_license_key(other)
1619 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1620 }
1621}
1622
1623fn parse_copyright_holders(text: &str) -> Vec<String> {
1624 let mut holders = Vec::new();
1625
1626 for line in text.lines() {
1627 let line = line.trim();
1628 if line.is_empty() {
1629 continue;
1630 }
1631
1632 let cleaned = line
1633 .trim_start_matches("Copyright")
1634 .trim_start_matches("copyright")
1635 .trim_start_matches("(C)")
1636 .trim_start_matches("(c)")
1637 .trim_start_matches("©")
1638 .trim();
1639
1640 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1641 let without_years = &cleaned[year_end..];
1642 let holder = without_years
1643 .trim_start_matches(',')
1644 .trim_start_matches('-')
1645 .trim();
1646
1647 if !holder.is_empty() && holder.len() > 2 {
1648 holders.push(holder.to_string());
1649 }
1650 }
1651 }
1652
1653 holders
1654}
1655
1656fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1657 let mut in_field = false;
1658 let mut field_content = String::new();
1659
1660 for line in content.lines() {
1661 if line.starts_with(field_name) {
1662 in_field = true;
1663 field_content.push_str(line.trim_start_matches(field_name).trim());
1664 field_content.push('\n');
1665 } else if in_field {
1666 if line.starts_with(char::is_whitespace) {
1667 field_content.push_str(line.trim());
1668 field_content.push('\n');
1669 } else if !line.trim().is_empty() {
1670 break;
1671 }
1672 }
1673 }
1674
1675 let trimmed = field_content.trim();
1676 if trimmed.is_empty() {
1677 None
1678 } else {
1679 Some(trimmed.to_string())
1680 }
1681}
1682
1683pub struct DebianDebParser;
1685
1686impl PackageParser for DebianDebParser {
1687 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1688
1689 fn is_match(path: &Path) -> bool {
1690 path.extension().and_then(|e| e.to_str()) == Some("deb")
1691 }
1692
1693 fn extract_packages(path: &Path) -> Vec<PackageData> {
1694 if let Ok(data) = extract_deb_archive(path) {
1696 return vec![data];
1697 }
1698
1699 let filename = match path.file_name().and_then(|n| n.to_str()) {
1701 Some(f) => f,
1702 None => {
1703 return vec![default_package_data(DatasourceId::DebianDeb)];
1704 }
1705 };
1706
1707 vec![parse_deb_filename(filename)]
1708 }
1709}
1710
1711crate::register_parser!(
1712 "Debian binary package archive (.deb)",
1713 &["**/*.deb"],
1714 "deb",
1715 "",
1716 Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1717);
1718
1719fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1720 use flate2::read::GzDecoder;
1721 use liblzma::read::XzDecoder;
1722 use std::io::{Cursor, Read};
1723
1724 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1725
1726 let mut archive = ar::Archive::new(file);
1727 let mut package: Option<PackageData> = None;
1728
1729 while let Some(entry_result) = archive.next_entry() {
1730 let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1731
1732 let entry_name = std::str::from_utf8(entry.header().identifier())
1733 .map_err(|e| format!("Invalid entry name: {}", e))?;
1734 let entry_name = entry_name.trim().to_string();
1735
1736 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1737 let mut control_data = Vec::new();
1738 entry
1739 .read_to_end(&mut control_data)
1740 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1741
1742 if entry_name.ends_with(".gz") {
1743 let decoder = GzDecoder::new(Cursor::new(control_data));
1744 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1745 package = Some(parsed_package);
1746 }
1747 } else if entry_name.ends_with(".xz") {
1748 let decoder = XzDecoder::new(Cursor::new(control_data));
1749 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1750 package = Some(parsed_package);
1751 }
1752 }
1753 } else if entry_name.starts_with("data.tar") {
1754 let mut data = Vec::new();
1755 entry
1756 .read_to_end(&mut data)
1757 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1758
1759 let Some(current_package) = package.as_mut() else {
1760 continue;
1761 };
1762
1763 if entry_name.ends_with(".gz") {
1764 let decoder = GzDecoder::new(Cursor::new(data));
1765 merge_deb_data_archive(decoder, current_package)?;
1766 } else if entry_name.ends_with(".xz") {
1767 let decoder = XzDecoder::new(Cursor::new(data));
1768 merge_deb_data_archive(decoder, current_package)?;
1769 }
1770 }
1771 }
1772
1773 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1774}
1775
1776fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1777 use std::io::Read;
1778
1779 let mut tar_archive = tar::Archive::new(reader);
1780
1781 for tar_entry_result in tar_archive
1782 .entries()
1783 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1784 {
1785 let mut tar_entry =
1786 tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1787
1788 let tar_path = tar_entry
1789 .path()
1790 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1791
1792 if tar_path.ends_with("control") {
1793 let mut control_content = String::new();
1794 tar_entry
1795 .read_to_string(&mut control_content)
1796 .map_err(|e| format!("Failed to read control file: {}", e))?;
1797
1798 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1799 if paragraphs.is_empty() {
1800 return Err("No paragraphs in control file".to_string());
1801 }
1802
1803 if let Some(package) =
1804 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
1805 {
1806 return Ok(Some(package));
1807 }
1808
1809 return Err("Failed to parse control file".to_string());
1810 }
1811 }
1812
1813 Ok(None)
1814}
1815
1816fn merge_deb_data_archive<R: std::io::Read>(
1817 reader: R,
1818 package: &mut PackageData,
1819) -> Result<(), String> {
1820 use std::io::Read;
1821
1822 let mut tar_archive = tar::Archive::new(reader);
1823
1824 for tar_entry_result in tar_archive
1825 .entries()
1826 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1827 {
1828 let mut tar_entry =
1829 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1830
1831 let tar_path = tar_entry
1832 .path()
1833 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1834 let tar_path_str = tar_path.to_string_lossy();
1835
1836 if tar_path_str.ends_with(&format!(
1837 "/usr/share/doc/{}/copyright",
1838 package.name.as_deref().unwrap_or_default()
1839 )) || tar_path_str.ends_with(&format!(
1840 "usr/share/doc/{}/copyright",
1841 package.name.as_deref().unwrap_or_default()
1842 )) {
1843 let mut copyright_content = String::new();
1844 tar_entry
1845 .read_to_string(&mut copyright_content)
1846 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1847
1848 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
1849 merge_debian_copyright_into_package(package, ©right_pkg);
1850 break;
1851 }
1852 }
1853
1854 Ok(())
1855}
1856
1857fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1858 if target.extracted_license_statement.is_none() {
1859 target.extracted_license_statement = copyright.extracted_license_statement.clone();
1860 }
1861
1862 for party in ©right.parties {
1863 if !target.parties.iter().any(|existing| {
1864 existing.r#type == party.r#type
1865 && existing.role == party.role
1866 && existing.name == party.name
1867 && existing.email == party.email
1868 && existing.url == party.url
1869 && existing.organization == party.organization
1870 && existing.organization_url == party.organization_url
1871 && existing.timezone == party.timezone
1872 }) {
1873 target.parties.push(party.clone());
1874 }
1875 }
1876}
1877
1878fn parse_deb_filename(filename: &str) -> PackageData {
1879 let without_ext = filename.trim_end_matches(".deb");
1880
1881 let parts: Vec<&str> = without_ext.split('_').collect();
1882 if parts.len() < 2 {
1883 return default_package_data(DatasourceId::DebianDeb);
1884 }
1885
1886 let name = parts[0].to_string();
1887 let version = parts[1].to_string();
1888 let architecture = if parts.len() >= 3 {
1889 Some(parts[2].to_string())
1890 } else {
1891 None
1892 };
1893
1894 let namespace = Some("debian".to_string());
1895
1896 PackageData {
1897 datasource_id: Some(DatasourceId::DebianDeb),
1898 package_type: Some(PACKAGE_TYPE),
1899 namespace: namespace.clone(),
1900 name: Some(name.clone()),
1901 version: Some(version.clone()),
1902 purl: build_debian_purl(
1903 &name,
1904 Some(&version),
1905 namespace.as_deref(),
1906 architecture.as_deref(),
1907 ),
1908 ..Default::default()
1909 }
1910}
1911
1912pub struct DebianControlInExtractedDebParser;
1918
1919impl PackageParser for DebianControlInExtractedDebParser {
1920 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1921
1922 fn is_match(path: &Path) -> bool {
1923 path.file_name()
1924 .and_then(|n| n.to_str())
1925 .is_some_and(|name| name == "control")
1926 && path
1927 .to_str()
1928 .map(|p| {
1929 p.ends_with("control.tar.gz-extract/control")
1930 || p.ends_with("control.tar.xz-extract/control")
1931 })
1932 .unwrap_or(false)
1933 }
1934
1935 fn extract_packages(path: &Path) -> Vec<PackageData> {
1936 let content = match read_file_to_string(path) {
1937 Ok(c) => c,
1938 Err(e) => {
1939 warn!(
1940 "Failed to read control file in extracted deb {:?}: {}",
1941 path, e
1942 );
1943 return vec![default_package_data(
1944 DatasourceId::DebianControlExtractedDeb,
1945 )];
1946 }
1947 };
1948
1949 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1952 if paragraphs.is_empty() {
1953 return vec![default_package_data(
1954 DatasourceId::DebianControlExtractedDeb,
1955 )];
1956 }
1957
1958 if let Some(pkg) = build_package_from_paragraph(
1959 ¶graphs[0],
1960 None,
1961 DatasourceId::DebianControlExtractedDeb,
1962 ) {
1963 vec![pkg]
1964 } else {
1965 vec![default_package_data(
1966 DatasourceId::DebianControlExtractedDeb,
1967 )]
1968 }
1969 }
1970}
1971
1972pub struct DebianMd5sumInPackageParser;
1974
1975impl PackageParser for DebianMd5sumInPackageParser {
1976 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1977
1978 fn is_match(path: &Path) -> bool {
1979 path.file_name()
1980 .and_then(|n| n.to_str())
1981 .is_some_and(|name| name == "md5sums")
1982 && path
1983 .to_str()
1984 .map(|p| {
1985 p.ends_with("control.tar.gz-extract/md5sums")
1986 || p.ends_with("control.tar.xz-extract/md5sums")
1987 })
1988 .unwrap_or(false)
1989 }
1990
1991 fn extract_packages(path: &Path) -> Vec<PackageData> {
1992 let content = match read_file_to_string(path) {
1993 Ok(c) => c,
1994 Err(e) => {
1995 warn!("Failed to read md5sums file {:?}: {}", path, e);
1996 return vec![default_package_data(
1997 DatasourceId::DebianMd5SumsInExtractedDeb,
1998 )];
1999 }
2000 };
2001
2002 let package_name = extract_package_name_from_deb_path(path);
2003
2004 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2005 }
2006}
2007
2008pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2009 let parent = path.parent()?;
2010 let grandparent = parent.parent()?;
2011 let dirname = grandparent.file_name()?.to_str()?;
2012 let without_extract = dirname.strip_suffix("-extract")?;
2013 let without_deb = without_extract.strip_suffix(".deb")?;
2014 let name = without_deb.split('_').next()?;
2015
2016 Some(name.to_string())
2017}
2018
2019fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2020 let mut file_references = Vec::new();
2021
2022 for line in content.lines() {
2023 let line = line.trim();
2024 if line.is_empty() || line.starts_with('#') {
2025 continue;
2026 }
2027
2028 let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find(" ") {
2029 (
2030 Md5Digest::from_hex(line[..idx].trim()).ok(),
2031 line[idx + 2..].trim(),
2032 )
2033 } else if let Some((hash, path)) = line.split_once(' ') {
2034 (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2035 } else {
2036 (None, line)
2037 };
2038
2039 if IGNORED_ROOT_DIRS.contains(&filepath) {
2040 continue;
2041 }
2042
2043 file_references.push(FileReference {
2044 path: filepath.to_string(),
2045 size: None,
2046 sha1: None,
2047 md5: md5sum,
2048 sha256: None,
2049 sha512: None,
2050 extra_data: None,
2051 });
2052 }
2053
2054 if file_references.is_empty() {
2055 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2056 }
2057
2058 let namespace = Some("debian".to_string());
2059 let mut package = PackageData {
2060 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2061 package_type: Some(PACKAGE_TYPE),
2062 namespace: namespace.clone(),
2063 name: package_name.map(|s| s.to_string()),
2064 file_references,
2065 ..Default::default()
2066 };
2067
2068 if let Some(n) = &package.name {
2069 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2070 }
2071
2072 package
2073}
2074
2075crate::register_parser!(
2076 "Debian control file in extracted .deb control tarball",
2077 &[
2078 "**/control.tar.gz-extract/control",
2079 "**/control.tar.xz-extract/control"
2080 ],
2081 "deb",
2082 "",
2083 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2084);
2085
2086crate::register_parser!(
2087 "Debian MD5 checksums in extracted .deb control tarball",
2088 &[
2089 "**/control.tar.gz-extract/md5sums",
2090 "**/control.tar.xz-extract/md5sums"
2091 ],
2092 "deb",
2093 "",
2094 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2095);
2096
2097#[cfg(test)]
2098mod tests {
2099 use super::*;
2100 use crate::models::DatasourceId;
2101 use crate::models::PackageType;
2102 use ar::{Builder as ArBuilder, Header as ArHeader};
2103 use flate2::Compression;
2104 use flate2::write::GzEncoder;
2105 use liblzma::write::XzEncoder;
2106 use std::io::Cursor;
2107 use std::path::PathBuf;
2108 use tar::{Builder as TarBuilder, Header as TarHeader};
2109 use tempfile::NamedTempFile;
2110
2111 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2112 let mut control_tar = Vec::new();
2113 {
2114 let encoder = XzEncoder::new(&mut control_tar, 6);
2115 let mut tar_builder = TarBuilder::new(encoder);
2116
2117 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2118 let mut header = TarHeader::new_gnu();
2119 header
2120 .set_path("control")
2121 .expect("control tar path should be valid");
2122 header.set_size(control_content.len() as u64);
2123 header.set_mode(0o644);
2124 header.set_cksum();
2125 tar_builder
2126 .append(&header, Cursor::new(control_content))
2127 .expect("control file should be appended to tar.xz");
2128 tar_builder.finish().expect("control tar.xz should finish");
2129 }
2130
2131 let deb = NamedTempFile::new().expect("temp deb file should be created");
2132 {
2133 let mut builder = ArBuilder::new(
2134 deb.reopen()
2135 .expect("temporary deb file should reopen for writing"),
2136 );
2137
2138 let debian_binary = b"2.0\n";
2139 let mut debian_binary_header =
2140 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2141 debian_binary_header.set_mode(0o100644);
2142 builder
2143 .append(&debian_binary_header, Cursor::new(debian_binary))
2144 .expect("debian-binary entry should be appended");
2145
2146 let mut control_header =
2147 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2148 control_header.set_mode(0o100644);
2149 builder
2150 .append(&control_header, Cursor::new(control_tar))
2151 .expect("control.tar.xz entry should be appended");
2152 }
2153
2154 deb
2155 }
2156
2157 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2158 let mut control_tar = Vec::new();
2159 {
2160 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2161 let mut tar_builder = TarBuilder::new(encoder);
2162
2163 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2164 let mut header = TarHeader::new_gnu();
2165 header
2166 .set_path("control")
2167 .expect("control tar path should be valid");
2168 header.set_size(control_content.len() as u64);
2169 header.set_mode(0o644);
2170 header.set_cksum();
2171 tar_builder
2172 .append(&header, Cursor::new(control_content))
2173 .expect("control file should be appended to tar.gz");
2174 tar_builder.finish().expect("control tar.gz should finish");
2175 }
2176
2177 let mut data_tar = Vec::new();
2178 {
2179 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2180 let mut tar_builder = TarBuilder::new(encoder);
2181
2182 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2183 let mut header = TarHeader::new_gnu();
2184 header
2185 .set_path("./usr/share/doc/synthetic/copyright")
2186 .expect("copyright path should be valid");
2187 header.set_size(copyright.len() as u64);
2188 header.set_mode(0o644);
2189 header.set_cksum();
2190 tar_builder
2191 .append(&header, Cursor::new(copyright))
2192 .expect("copyright file should be appended to data tar");
2193 tar_builder.finish().expect("data tar.gz should finish");
2194 }
2195
2196 let deb = NamedTempFile::new().expect("temp deb file should be created");
2197 {
2198 let mut builder = ArBuilder::new(
2199 deb.reopen()
2200 .expect("temporary deb file should reopen for writing"),
2201 );
2202
2203 let debian_binary = b"2.0\n";
2204 let mut debian_binary_header =
2205 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2206 debian_binary_header.set_mode(0o100644);
2207 builder
2208 .append(&debian_binary_header, Cursor::new(debian_binary))
2209 .expect("debian-binary entry should be appended");
2210
2211 let mut control_header =
2212 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2213 control_header.set_mode(0o100644);
2214 builder
2215 .append(&control_header, Cursor::new(control_tar))
2216 .expect("control.tar.gz entry should be appended");
2217
2218 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2219 data_header.set_mode(0o100644);
2220 builder
2221 .append(&data_header, Cursor::new(data_tar))
2222 .expect("data.tar.gz entry should be appended");
2223 }
2224
2225 deb
2226 }
2227
2228 #[test]
2231 fn test_detect_namespace_from_ubuntu_version() {
2232 assert_eq!(
2233 detect_namespace(Some("1.0-1ubuntu1"), None),
2234 Some("ubuntu".to_string())
2235 );
2236 }
2237
2238 #[test]
2239 fn test_detect_namespace_from_debian_version() {
2240 assert_eq!(
2241 detect_namespace(Some("1.0-1+deb11u1"), None),
2242 Some("debian".to_string())
2243 );
2244 }
2245
2246 #[test]
2247 fn test_detect_namespace_from_ubuntu_maintainer() {
2248 assert_eq!(
2249 detect_namespace(
2250 None,
2251 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2252 ),
2253 Some("ubuntu".to_string())
2254 );
2255 }
2256
2257 #[test]
2258 fn test_detect_namespace_from_debian_maintainer() {
2259 assert_eq!(
2260 detect_namespace(None, Some("John Doe <john@debian.org>")),
2261 Some("debian".to_string())
2262 );
2263 }
2264
2265 #[test]
2266 fn test_detect_namespace_default() {
2267 assert_eq!(
2268 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2269 Some("debian".to_string())
2270 );
2271 }
2272
2273 #[test]
2274 fn test_detect_namespace_version_takes_priority() {
2275 assert_eq!(
2277 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2278 Some("ubuntu".to_string())
2279 );
2280 }
2281
2282 #[test]
2285 fn test_build_purl_basic() {
2286 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2287 assert_eq!(
2288 purl,
2289 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2290 );
2291 }
2292
2293 #[test]
2294 fn test_build_purl_no_version() {
2295 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2296 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2297 }
2298
2299 #[test]
2300 fn test_build_purl_no_arch() {
2301 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2302 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2303 }
2304
2305 #[test]
2306 fn test_build_purl_no_namespace() {
2307 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2308 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2309 }
2310
2311 #[test]
2314 fn test_parse_simple_dependency() {
2315 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2316 assert_eq!(deps.len(), 1);
2317 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2318 assert_eq!(deps[0].extracted_requirement, None);
2319 assert_eq!(deps[0].scope, Some("depends".to_string()));
2320 }
2321
2322 #[test]
2323 fn test_parse_dependency_with_version() {
2324 let deps =
2325 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2326 assert_eq!(deps.len(), 1);
2327 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2328 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2329 }
2330
2331 #[test]
2332 fn test_parse_dependency_exact_version() {
2333 let deps = parse_dependency_field(
2334 "libc6 (= 2.31-13+deb11u5)",
2335 "depends",
2336 true,
2337 false,
2338 Some("debian"),
2339 );
2340 assert_eq!(deps.len(), 1);
2341 assert_eq!(deps[0].is_pinned, Some(true));
2342 }
2343
2344 #[test]
2345 fn test_parse_dependency_strict_less() {
2346 let deps =
2347 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2348 assert_eq!(deps.len(), 1);
2349 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2350 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2351 }
2352
2353 #[test]
2354 fn test_parse_multiple_dependencies() {
2355 let deps = parse_dependency_field(
2356 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2357 "depends",
2358 true,
2359 false,
2360 Some("debian"),
2361 );
2362 assert_eq!(deps.len(), 3);
2363 }
2364
2365 #[test]
2366 fn test_parse_dependency_alternatives() {
2367 let deps = parse_dependency_field(
2368 "libssl1.1 | libssl3",
2369 "depends",
2370 true,
2371 false,
2372 Some("debian"),
2373 );
2374 assert_eq!(deps.len(), 2);
2375 assert_eq!(deps[0].is_optional, Some(true));
2377 assert_eq!(deps[1].is_optional, Some(true));
2378 }
2379
2380 #[test]
2381 fn test_parse_dependency_skips_substitutions() {
2382 let deps = parse_dependency_field(
2383 "${shlibs:Depends}, ${misc:Depends}, libc6",
2384 "depends",
2385 true,
2386 false,
2387 Some("debian"),
2388 );
2389 assert_eq!(deps.len(), 1);
2390 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2391 }
2392
2393 #[test]
2394 fn test_parse_dependency_with_arch_qualifier() {
2395 let deps = parse_dependency_field(
2397 "libc6 (>= 2.17) [amd64]",
2398 "depends",
2399 true,
2400 false,
2401 Some("debian"),
2402 );
2403 assert_eq!(deps.len(), 1);
2404 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2405 }
2406
2407 #[test]
2408 fn test_parse_empty_dependency() {
2409 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2410 assert!(deps.is_empty());
2411 }
2412
2413 #[test]
2416 fn test_parse_source_field_name_only() {
2417 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2418 assert_eq!(sources.len(), 1);
2419 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2420 }
2421
2422 #[test]
2423 fn test_parse_source_field_with_version() {
2424 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2425 assert_eq!(sources.len(), 1);
2426 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2427 }
2428
2429 #[test]
2430 fn test_parse_source_field_empty() {
2431 let sources = parse_source_field(None, Some("debian"));
2432 assert!(sources.is_empty());
2433 }
2434
2435 #[test]
2438 fn test_parse_debian_control_source_and_binary() {
2439 let content = "\
2440Source: curl
2441Section: web
2442Priority: optional
2443Maintainer: Alessandro Ghedini <ghedo@debian.org>
2444Homepage: https://curl.se/
2445Vcs-Browser: https://salsa.debian.org/debian/curl
2446Vcs-Git: https://salsa.debian.org/debian/curl.git
2447Build-Depends: debhelper (>= 12), libssl-dev
2448
2449Package: curl
2450Architecture: amd64
2451Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2452Description: command line tool for transferring data with URL syntax";
2453
2454 let packages = parse_debian_control(content);
2455 assert_eq!(packages.len(), 1);
2456
2457 let pkg = &packages[0];
2458 assert_eq!(pkg.name, Some("curl".to_string()));
2459 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2460 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2461 assert_eq!(
2462 pkg.vcs_url,
2463 Some("https://salsa.debian.org/debian/curl.git".to_string())
2464 );
2465 assert_eq!(
2466 pkg.code_view_url,
2467 Some("https://salsa.debian.org/debian/curl".to_string())
2468 );
2469
2470 assert_eq!(pkg.parties.len(), 1);
2472 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2473 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2474 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2475
2476 assert!(!pkg.dependencies.is_empty());
2478 }
2479
2480 #[test]
2481 fn test_parse_debian_control_multiple_binary() {
2482 let content = "\
2483Source: gzip
2484Maintainer: Debian Developer <dev@debian.org>
2485
2486Package: gzip
2487Architecture: any
2488Depends: libc6 (>= 2.17)
2489Description: GNU file compression
2490
2491Package: gzip-win32
2492Architecture: all
2493Description: gzip for Windows";
2494
2495 let packages = parse_debian_control(content);
2496 assert_eq!(packages.len(), 2);
2497 assert_eq!(packages[0].name, Some("gzip".to_string()));
2498 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2499
2500 assert_eq!(packages[0].parties.len(), 1);
2502 assert_eq!(packages[1].parties.len(), 1);
2503 }
2504
2505 #[test]
2506 fn test_parse_debian_control_source_only() {
2507 let content = "\
2508Source: my-package
2509Maintainer: Test User <test@debian.org>
2510Build-Depends: debhelper (>= 13)";
2511
2512 let packages = parse_debian_control(content);
2513 assert_eq!(packages.len(), 1);
2514 assert_eq!(packages[0].name, Some("my-package".to_string()));
2515 assert!(!packages[0].dependencies.is_empty());
2517 assert_eq!(
2518 packages[0].dependencies[0].scope,
2519 Some("build-depends".to_string())
2520 );
2521 }
2522
2523 #[test]
2524 fn test_parse_debian_control_with_uploaders() {
2525 let content = "\
2526Source: example
2527Maintainer: Main Dev <main@debian.org>
2528Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2529
2530Package: example
2531Architecture: any
2532Description: test package";
2533
2534 let packages = parse_debian_control(content);
2535 assert_eq!(packages.len(), 1);
2536 assert_eq!(packages[0].parties.len(), 3);
2538 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2539 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2540 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2541 }
2542
2543 #[test]
2544 fn test_parse_debian_control_vcs_git_with_branch() {
2545 let content = "\
2546Source: example
2547Maintainer: Dev <dev@debian.org>
2548Vcs-Git: https://salsa.debian.org/example.git -b main
2549
2550Package: example
2551Architecture: any
2552Description: test";
2553
2554 let packages = parse_debian_control(content);
2555 assert_eq!(packages.len(), 1);
2556 assert_eq!(
2558 packages[0].vcs_url,
2559 Some("https://salsa.debian.org/example.git".to_string())
2560 );
2561 }
2562
2563 #[test]
2564 fn test_parse_debian_control_multi_arch() {
2565 let content = "\
2566Source: example
2567Maintainer: Dev <dev@debian.org>
2568
2569Package: libexample
2570Architecture: any
2571Multi-Arch: same
2572Description: shared library";
2573
2574 let packages = parse_debian_control(content);
2575 assert_eq!(packages.len(), 1);
2576 let extra = packages[0].extra_data.as_ref().unwrap();
2577 assert_eq!(
2578 extra.get("multi_arch"),
2579 Some(&serde_json::Value::String("same".to_string()))
2580 );
2581 }
2582
2583 #[test]
2586 fn test_parse_dpkg_status_basic() {
2587 let content = "\
2588Package: base-files
2589Status: install ok installed
2590Priority: required
2591Section: admin
2592Installed-Size: 391
2593Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2594Architecture: amd64
2595Version: 11ubuntu5.6
2596Description: Debian base system miscellaneous files
2597Homepage: https://tracker.debian.org/pkg/base-files
2598
2599Package: not-installed
2600Status: deinstall ok config-files
2601Architecture: amd64
2602Version: 1.0
2603Description: This should be skipped";
2604
2605 let packages = parse_dpkg_status(content);
2606 assert_eq!(packages.len(), 1);
2607
2608 let pkg = &packages[0];
2609 assert_eq!(pkg.name, Some("base-files".to_string()));
2610 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2611 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2612 assert_eq!(
2613 pkg.datasource_id,
2614 Some(DatasourceId::DebianInstalledStatusDb)
2615 );
2616
2617 let extra = pkg.extra_data.as_ref().unwrap();
2619 assert_eq!(
2620 extra.get("installed_size"),
2621 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2622 );
2623 }
2624
2625 #[test]
2626 fn test_parse_dpkg_status_multiple_installed() {
2627 let content = "\
2628Package: libc6
2629Status: install ok installed
2630Architecture: amd64
2631Version: 2.31-13+deb11u5
2632Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2633Description: GNU C Library
2634
2635Package: zlib1g
2636Status: install ok installed
2637Architecture: amd64
2638Version: 1:1.2.11.dfsg-2+deb11u2
2639Maintainer: Mark Brown <broonie@debian.org>
2640Description: compression library";
2641
2642 let packages = parse_dpkg_status(content);
2643 assert_eq!(packages.len(), 2);
2644 assert_eq!(packages[0].name, Some("libc6".to_string()));
2645 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2646 }
2647
2648 #[test]
2649 fn test_parse_dpkg_status_with_dependencies() {
2650 let content = "\
2651Package: curl
2652Status: install ok installed
2653Architecture: amd64
2654Version: 7.74.0-1.3+deb11u7
2655Maintainer: Alessandro Ghedini <ghedo@debian.org>
2656Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2657Recommends: ca-certificates
2658Description: command line tool for transferring data with URL syntax";
2659
2660 let packages = parse_dpkg_status(content);
2661 assert_eq!(packages.len(), 1);
2662
2663 let deps = &packages[0].dependencies;
2664 assert_eq!(deps.len(), 3);
2666
2667 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2669 assert_eq!(deps[0].scope, Some("depends".to_string()));
2670 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2671
2672 assert_eq!(
2674 deps[2].purl,
2675 Some("pkg:deb/debian/ca-certificates".to_string())
2676 );
2677 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2678 assert_eq!(deps[2].is_optional, Some(true));
2679 }
2680
2681 #[test]
2682 fn test_parse_dpkg_status_with_source() {
2683 let content = "\
2684Package: libncurses6
2685Status: install ok installed
2686Architecture: amd64
2687Source: ncurses (6.2+20201114-2+deb11u1)
2688Version: 6.2+20201114-2+deb11u1
2689Maintainer: Craig Small <csmall@debian.org>
2690Description: shared libraries for terminal handling";
2691
2692 let packages = parse_dpkg_status(content);
2693 assert_eq!(packages.len(), 1);
2694 assert!(!packages[0].source_packages.is_empty());
2695 assert!(packages[0].source_packages[0].contains("ncurses"));
2697 }
2698
2699 #[test]
2700 fn test_parse_dpkg_status_filters_not_installed() {
2701 let content = "\
2702Package: installed-pkg
2703Status: install ok installed
2704Version: 1.0
2705Architecture: amd64
2706Description: installed
2707
2708Package: half-installed
2709Status: install ok half-installed
2710Version: 2.0
2711Architecture: amd64
2712Description: half installed
2713
2714Package: deinstall-pkg
2715Status: deinstall ok config-files
2716Version: 3.0
2717Architecture: amd64
2718Description: deinstalled
2719
2720Package: purge-pkg
2721Status: purge ok not-installed
2722Version: 4.0
2723Architecture: amd64
2724Description: purged";
2725
2726 let packages = parse_dpkg_status(content);
2727 assert_eq!(packages.len(), 1);
2728 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2729 }
2730
2731 #[test]
2732 fn test_parse_dpkg_status_empty() {
2733 let packages = parse_dpkg_status("");
2734 assert!(packages.is_empty());
2735 }
2736
2737 #[test]
2740 fn test_debian_control_is_match() {
2741 assert!(DebianControlParser::is_match(Path::new(
2742 "/path/to/debian/control"
2743 )));
2744 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2745 assert!(!DebianControlParser::is_match(Path::new(
2746 "/path/to/control"
2747 )));
2748 assert!(!DebianControlParser::is_match(Path::new(
2749 "/path/to/debian/changelog"
2750 )));
2751 }
2752
2753 #[test]
2754 fn test_debian_installed_is_match() {
2755 assert!(DebianInstalledParser::is_match(Path::new(
2756 "/var/lib/dpkg/status"
2757 )));
2758 assert!(DebianInstalledParser::is_match(Path::new(
2759 "some/root/var/lib/dpkg/status"
2760 )));
2761 assert!(!DebianInstalledParser::is_match(Path::new(
2762 "/var/lib/dpkg/status.d/something"
2763 )));
2764 assert!(!DebianInstalledParser::is_match(Path::new(
2765 "/var/lib/dpkg/available"
2766 )));
2767 }
2768
2769 #[test]
2772 fn test_parse_debian_control_empty_input() {
2773 let packages = parse_debian_control("");
2774 assert!(packages.is_empty());
2775 }
2776
2777 #[test]
2778 fn test_parse_debian_control_malformed_input() {
2779 let content = "this is not a valid control file\nwith random text";
2780 let packages = parse_debian_control(content);
2781 assert!(packages.is_empty());
2783 }
2784
2785 #[test]
2786 fn test_dependency_with_epoch_version() {
2787 let deps = parse_dependency_field(
2789 "zlib1g (>= 1:1.2.11)",
2790 "depends",
2791 true,
2792 false,
2793 Some("debian"),
2794 );
2795 assert_eq!(deps.len(), 1);
2796 assert_eq!(
2797 deps[0].extracted_requirement,
2798 Some(">= 1:1.2.11".to_string())
2799 );
2800 }
2801
2802 #[test]
2803 fn test_dependency_with_plus_in_name() {
2804 let deps =
2805 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2806 assert_eq!(deps.len(), 1);
2807 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2808 }
2809
2810 #[test]
2811 fn test_dsc_parser_is_match() {
2812 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2813 assert!(DebianDscParser::is_match(&PathBuf::from(
2814 "adduser_3.118+deb11u1.dsc"
2815 )));
2816 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2817 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2818 }
2819
2820 #[test]
2821 fn test_dsc_parser_adduser() {
2822 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2823 let package = DebianDscParser::extract_first_package(&path);
2824
2825 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2826 assert_eq!(package.namespace, Some("debian".to_string()));
2827 assert_eq!(package.name, Some("adduser".to_string()));
2828 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2829 assert_eq!(
2830 package.purl,
2831 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2832 );
2833 assert_eq!(
2834 package.vcs_url,
2835 Some("https://salsa.debian.org/debian/adduser.git".to_string())
2836 );
2837 assert_eq!(
2838 package.code_view_url,
2839 Some("https://salsa.debian.org/debian/adduser".to_string())
2840 );
2841 assert_eq!(
2842 package.datasource_id,
2843 Some(DatasourceId::DebianSourceControlDsc)
2844 );
2845
2846 assert_eq!(package.parties.len(), 2);
2847 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2848 assert_eq!(
2849 package.parties[0].name,
2850 Some("Debian Adduser Developers".to_string())
2851 );
2852 assert_eq!(
2853 package.parties[0].email,
2854 Some("adduser@packages.debian.org".to_string())
2855 );
2856 assert_eq!(package.parties[0].r#type, None);
2857
2858 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2859 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2860 assert_eq!(
2861 package.parties[1].email,
2862 Some("mh+debian-packages@zugschlus.de".to_string())
2863 );
2864 assert_eq!(package.parties[1].r#type, None);
2865
2866 assert_eq!(package.source_packages.len(), 1);
2867 assert_eq!(
2868 package.source_packages[0],
2869 "pkg:deb/debian/adduser".to_string()
2870 );
2871
2872 assert!(!package.dependencies.is_empty());
2873 let build_dep_names: Vec<String> = package
2874 .dependencies
2875 .iter()
2876 .filter_map(|d| d.purl.as_ref())
2877 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2878 .map(|p| p.to_string())
2879 .collect();
2880 assert!(build_dep_names.len() >= 2);
2881 }
2882
2883 #[test]
2884 fn test_dsc_parser_zsh() {
2885 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2886 let package = DebianDscParser::extract_first_package(&path);
2887
2888 assert_eq!(package.name, Some("zsh".to_string()));
2889 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2890 assert_eq!(package.namespace, Some("debian".to_string()));
2891 assert!(package.purl.is_some());
2892 assert!(package.purl.as_ref().unwrap().contains("zsh"));
2893 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2894 }
2895
2896 #[test]
2897 fn test_parse_dsc_content_basic() {
2898 let content = "Format: 3.0 (native)
2899Source: testpkg
2900Binary: testpkg
2901Architecture: amd64
2902Version: 1.0.0
2903Maintainer: Test User <test@example.com>
2904Standards-Version: 4.5.0
2905Build-Depends: debhelper (>= 12)
2906Files:
2907 abc123 1024 testpkg_1.0.0.tar.xz
2908";
2909
2910 let package = parse_dsc_content(content);
2911 assert_eq!(package.name, Some("testpkg".to_string()));
2912 assert_eq!(package.version, Some("1.0.0".to_string()));
2913 assert_eq!(package.namespace, Some("debian".to_string()));
2914 assert_eq!(package.parties.len(), 1);
2915 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2916 assert_eq!(
2917 package.parties[0].email,
2918 Some("test@example.com".to_string())
2919 );
2920 assert_eq!(package.dependencies.len(), 1);
2921 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2922 }
2923
2924 #[test]
2925 fn test_parse_dsc_content_with_uploaders() {
2926 let content = "Source: mypkg
2927Version: 2.0
2928Architecture: all
2929Maintainer: Main Dev <main@example.com>
2930Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2931";
2932
2933 let package = parse_dsc_content(content);
2934 assert_eq!(package.parties.len(), 3);
2935 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2936 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2937 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2938 }
2939
2940 #[test]
2941 fn test_orig_tar_parser_is_match() {
2942 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2943 "package_1.0.orig.tar.gz"
2944 )));
2945 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2946 "abseil_0~20200923.3.orig.tar.xz"
2947 )));
2948 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2949 "package.debian.tar.gz"
2950 )));
2951 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2952 }
2953
2954 #[test]
2955 fn test_debian_tar_parser_is_match() {
2956 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2957 "package_1.0-1.debian.tar.xz"
2958 )));
2959 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2960 "abseil_20220623.1-1.debian.tar.gz"
2961 )));
2962 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2963 "package.orig.tar.gz"
2964 )));
2965 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2966 }
2967
2968 #[test]
2969 fn test_parse_orig_tar_filename() {
2970 let pkg = parse_source_tarball_filename(
2971 "abseil_0~20200923.3.orig.tar.gz",
2972 DatasourceId::DebianOriginalSourceTarball,
2973 );
2974 assert_eq!(pkg.name, Some("abseil".to_string()));
2975 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2976 assert_eq!(pkg.namespace, Some("debian".to_string()));
2977 assert_eq!(
2978 pkg.purl,
2979 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2980 );
2981 assert_eq!(
2982 pkg.datasource_id,
2983 Some(DatasourceId::DebianOriginalSourceTarball)
2984 );
2985 }
2986
2987 #[test]
2988 fn test_parse_debian_tar_filename() {
2989 let pkg = parse_source_tarball_filename(
2990 "abseil_20220623.1-1.debian.tar.xz",
2991 DatasourceId::DebianSourceMetadataTarball,
2992 );
2993 assert_eq!(pkg.name, Some("abseil".to_string()));
2994 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2995 assert_eq!(pkg.namespace, Some("debian".to_string()));
2996 assert_eq!(
2997 pkg.purl,
2998 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2999 );
3000 }
3001
3002 #[test]
3003 fn test_parse_deb_filename() {
3004 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3005 assert_eq!(pkg.name, Some("nginx".to_string()));
3006 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3007
3008 let pkg = parse_deb_filename("invalid.deb");
3009 assert!(pkg.name.is_none());
3010 assert!(pkg.version.is_none());
3011 }
3012
3013 #[test]
3014 fn test_parse_source_tarball_various_compressions() {
3015 let pkg_gz = parse_source_tarball_filename(
3016 "test_1.0.orig.tar.gz",
3017 DatasourceId::DebianOriginalSourceTarball,
3018 );
3019 let pkg_xz = parse_source_tarball_filename(
3020 "test_1.0.orig.tar.xz",
3021 DatasourceId::DebianOriginalSourceTarball,
3022 );
3023 let pkg_bz2 = parse_source_tarball_filename(
3024 "test_1.0.orig.tar.bz2",
3025 DatasourceId::DebianOriginalSourceTarball,
3026 );
3027
3028 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3029 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3030 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3031 }
3032
3033 #[test]
3034 fn test_parse_source_tarball_invalid_format() {
3035 let pkg = parse_source_tarball_filename(
3036 "invalid-no-underscore.tar.gz",
3037 DatasourceId::DebianOriginalSourceTarball,
3038 );
3039 assert!(pkg.name.is_none());
3040 assert!(pkg.version.is_none());
3041 }
3042
3043 #[test]
3044 fn test_list_parser_is_match() {
3045 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3046 "/var/lib/dpkg/info/bash.list"
3047 )));
3048 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3049 "/var/lib/dpkg/info/package:amd64.list"
3050 )));
3051 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3052 "bash.list"
3053 )));
3054 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3055 "/var/lib/dpkg/info/bash.md5sums"
3056 )));
3057 }
3058
3059 #[test]
3060 fn test_md5sums_parser_is_match() {
3061 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3062 "/var/lib/dpkg/info/bash.md5sums"
3063 )));
3064 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3065 "/var/lib/dpkg/info/package:amd64.md5sums"
3066 )));
3067 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3068 "bash.md5sums"
3069 )));
3070 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3071 "/var/lib/dpkg/info/bash.list"
3072 )));
3073 }
3074
3075 #[test]
3076 fn test_parse_debian_file_list_plain_list() {
3077 let content = "/.
3078/bin
3079/bin/bash
3080/usr/bin/bashbug
3081/usr/share/doc/bash/README
3082";
3083 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3084 assert_eq!(pkg.name, Some("bash".to_string()));
3085 assert_eq!(pkg.file_references.len(), 3);
3086 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3087 assert_eq!(pkg.file_references[0].md5, None);
3088 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3089 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3090 }
3091
3092 #[test]
3093 fn test_parse_debian_file_list_md5sums() {
3094 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
30951c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3096f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3097";
3098 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3099 assert_eq!(pkg.name, Some("bash".to_string()));
3100 assert_eq!(pkg.file_references.len(), 3);
3101 assert_eq!(pkg.file_references[0].path, "bin/bash");
3102 assert_eq!(
3103 pkg.file_references[0].md5,
3104 Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3105 );
3106 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3107 assert_eq!(
3108 pkg.file_references[1].md5,
3109 Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3110 );
3111 }
3112
3113 #[test]
3114 fn test_parse_debian_file_list_with_arch() {
3115 let content = "/usr/bin/foo
3116/usr/lib/x86_64-linux-gnu/libfoo.so
3117";
3118 let pkg = parse_debian_file_list(
3119 content,
3120 "libfoo:amd64",
3121 DatasourceId::DebianInstalledFilesList,
3122 );
3123 assert_eq!(pkg.name, Some("libfoo".to_string()));
3124 assert!(pkg.purl.is_some());
3125 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3126 assert_eq!(pkg.file_references.len(), 2);
3127 }
3128
3129 #[test]
3130 fn test_parse_debian_file_list_skips_comments_and_empty() {
3131 let content = "# This is a comment
3132/bin/bash
3133
3134/usr/bin/bashbug
3135
3136";
3137 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3138 assert_eq!(pkg.file_references.len(), 2);
3139 }
3140
3141 #[test]
3142 fn test_parse_debian_file_list_md5sums_only() {
3143 let content = "abc123 usr/bin/tool
3144";
3145 let pkg =
3146 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3147 assert_eq!(pkg.name, None);
3148 assert_eq!(pkg.file_references.len(), 1);
3149 }
3150
3151 #[test]
3152 fn test_parse_debian_file_list_ignores_root_dirs() {
3153 let content = "/.
3154/bin
3155/bin/bash
3156/etc
3157/usr
3158/var
3159";
3160 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3161 assert_eq!(pkg.file_references.len(), 1);
3162 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3163 }
3164
3165 #[test]
3166 fn test_copyright_parser_is_match() {
3167 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3168 "/usr/share/doc/bash/copyright"
3169 )));
3170 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3171 "debian/copyright"
3172 )));
3173 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3174 "src/third_party/gperftools/dist/packages/deb/copyright"
3175 )));
3176 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3177 "copyright.txt"
3178 )));
3179 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3180 "/etc/copyright"
3181 )));
3182 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3183 "/tmp/sample_copyright"
3184 )));
3185 }
3186
3187 #[test]
3188 fn test_detect_debian_copyright_datasource() {
3189 assert_eq!(
3190 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3191 DatasourceId::DebianCopyrightInSource
3192 );
3193 assert_eq!(
3194 detect_debian_copyright_datasource(&PathBuf::from(
3195 "src/third_party/gperftools/dist/packages/deb/copyright"
3196 )),
3197 DatasourceId::DebianCopyrightStandalone
3198 );
3199 assert_eq!(
3200 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3201 DatasourceId::DebianCopyrightInPackage
3202 );
3203 assert_eq!(
3204 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3205 DatasourceId::DebianCopyrightStandalone
3206 );
3207 }
3208
3209 #[test]
3210 fn test_extract_package_name_from_path() {
3211 assert_eq!(
3212 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3213 Some("bash".to_string())
3214 );
3215 assert_eq!(
3216 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3217 Some("libseccomp2".to_string())
3218 );
3219 assert_eq!(
3220 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3221 None
3222 );
3223 }
3224
3225 #[test]
3226 fn test_parse_copyright_dep5_format() {
3227 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3228Upstream-Name: libseccomp
3229Source: https://sourceforge.net/projects/libseccomp/
3230
3231Files: *
3232Copyright: 2012 Paul Moore <pmoore@redhat.com>
3233 2012 Ashley Lai <adlai@us.ibm.com>
3234License: LGPL-2.1
3235
3236License: LGPL-2.1
3237 This library is free software
3238";
3239 let pkg = parse_copyright_file(content, Some("libseccomp"));
3240 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3241 assert_eq!(pkg.namespace, Some("debian".to_string()));
3242 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3243 assert_eq!(
3244 pkg.extracted_license_statement,
3245 Some("LGPL-2.1".to_string())
3246 );
3247 assert!(pkg.parties.len() >= 2);
3248 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3249 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3250 }
3251
3252 #[test]
3253 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3254 let path = PathBuf::from(
3255 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3256 );
3257 let pkg = DebianCopyrightParser::extract_first_package(&path);
3258
3259 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3260 let extracted = pkg
3261 .extracted_license_statement
3262 .as_deref()
3263 .expect("license statement should exist");
3264 assert!(extracted.contains("GPL-2+"));
3265 assert!(!pkg.license_detections.is_empty());
3266
3267 let primary = &pkg.license_detections[0];
3268 assert_eq!(
3269 primary.matches[0].matched_text.as_deref(),
3270 Some("License: GPL-2+")
3271 );
3272 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3273 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3274 }
3275
3276 #[test]
3277 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3278 let path = PathBuf::from("testdata/debian/copyright/copyright");
3279 let pkg = DebianCopyrightParser::extract_first_package(&path);
3280
3281 assert_eq!(pkg.license_detections.len(), 1);
3282 assert_eq!(pkg.other_license_detections.len(), 4);
3283
3284 let primary = &pkg.license_detections[0];
3285 assert_eq!(
3286 primary.matches[0].matched_text.as_deref(),
3287 Some("License: LGPL-2.1")
3288 );
3289 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3290
3291 let ordered_lines: Vec<usize> = pkg
3292 .other_license_detections
3293 .iter()
3294 .map(|detection| detection.matches[0].start_line.get())
3295 .collect();
3296 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3297
3298 let ordered_texts: Vec<&str> = pkg
3299 .other_license_detections
3300 .iter()
3301 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3302 .collect();
3303 assert_eq!(
3304 ordered_texts,
3305 vec![
3306 "License: LGPL-2.1",
3307 "License: LGPL-2.1",
3308 "License: LGPL-2.1",
3309 "License: LGPL-2.1",
3310 ]
3311 );
3312 }
3313
3314 #[test]
3315 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3316 let path = PathBuf::from(
3317 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3318 );
3319 let pkg = DebianCopyrightParser::extract_first_package(&path);
3320
3321 let zlib = pkg
3322 .other_license_detections
3323 .iter()
3324 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3325 .expect("at least one Zlib license paragraph should be detected");
3326 assert_eq!(
3327 zlib.matches[0].matched_text.as_deref(),
3328 Some("License: Zlib")
3329 );
3330
3331 let last_zlib = pkg
3332 .other_license_detections
3333 .iter()
3334 .rev()
3335 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3336 .expect("bottom standalone Zlib license paragraph should be detected");
3337 assert_eq!(
3338 last_zlib.matches[0].start_line,
3339 LineNumber::new(732).unwrap()
3340 );
3341 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3342 }
3343
3344 #[test]
3345 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3346 let path =
3347 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3348 let pkg = DebianCopyrightParser::extract_first_package(&path);
3349
3350 assert_eq!(pkg.license_detections.len(), 1);
3351 let primary = &pkg.license_detections[0];
3352 assert_eq!(
3353 primary.matches[0].matched_text.as_deref(),
3354 Some("License: LGPL-3+ or GPL-2+")
3355 );
3356 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3357 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3358
3359 assert!(pkg.other_license_detections.iter().any(|detection| {
3360 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3361 }));
3362 }
3363
3364 #[test]
3365 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3366 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3367 let pkg = parse_copyright_file(content, Some("foo"));
3368
3369 assert_eq!(pkg.license_detections.len(), 1);
3370 let primary = &pkg.license_detections[0];
3371 assert_eq!(
3372 primary.matches[0].matched_text.as_deref(),
3373 Some("License: GPL-2+")
3374 );
3375 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3376 }
3377
3378 #[test]
3379 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3380 let raw_lines = vec![
3381 "Files: *".to_string(),
3382 "Copyright: 2024 Example Org".to_string(),
3383 "License: Apache-2.0".to_string(),
3384 " Licensed under the Apache License, Version 2.0.".to_string(),
3385 ];
3386
3387 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3388 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3389 .into_iter()
3390 .next()
3391 .expect("reference RFC822 paragraph should parse");
3392
3393 assert_eq!(paragraph.metadata.headers, expected.headers);
3394 assert_eq!(paragraph.metadata.body, expected.body);
3395 assert_eq!(
3396 paragraph.license_header_line,
3397 Some(("License: Apache-2.0".to_string(), 12))
3398 );
3399 }
3400
3401 #[test]
3402 fn test_parse_copyright_unstructured() {
3403 let content = "This package was debianized by John Doe.
3404
3405Upstream Authors:
3406 Jane Smith
3407
3408Copyright:
3409 2009 10gen
3410
3411License:
3412 SSPL
3413";
3414 let pkg = parse_copyright_file(content, Some("mongodb"));
3415 assert_eq!(pkg.name, Some("mongodb".to_string()));
3416 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3417 assert!(!pkg.parties.is_empty());
3418 }
3419
3420 #[test]
3421 fn test_parse_copyright_holders() {
3422 let text = "2012 Paul Moore <pmoore@redhat.com>
34232012 Ashley Lai <adlai@us.ibm.com>
3424Copyright (C) 2015-2018 Example Corp";
3425 let holders = parse_copyright_holders(text);
3426 assert!(holders.len() >= 3);
3427 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3428 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3429 }
3430
3431 #[test]
3432 fn test_parse_copyright_empty() {
3433 let content = "This is just some text without proper copyright info.";
3434 let pkg = parse_copyright_file(content, Some("test"));
3435 assert_eq!(pkg.name, Some("test".to_string()));
3436 assert!(pkg.parties.is_empty());
3437 assert!(pkg.extracted_license_statement.is_none());
3438 }
3439
3440 #[test]
3441 fn test_deb_parser_is_match() {
3442 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3443 assert!(DebianDebParser::is_match(&PathBuf::from(
3444 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3445 )));
3446 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3447 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3448 }
3449
3450 #[test]
3451 fn test_parse_deb_filename_with_arch() {
3452 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3453 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3454 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3455 assert_eq!(pkg.namespace, Some("debian".to_string()));
3456 assert_eq!(
3457 pkg.purl,
3458 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3459 );
3460 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3461 }
3462
3463 #[test]
3464 fn test_parse_deb_filename_without_arch() {
3465 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3466 assert_eq!(pkg.name, Some("package".to_string()));
3467 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3468 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3469 }
3470
3471 #[test]
3472 fn test_extract_deb_archive() {
3473 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3474 if !test_path.exists() {
3475 return;
3476 }
3477
3478 let pkg = DebianDebParser::extract_first_package(&test_path);
3479
3480 assert_eq!(pkg.name, Some("adduser".to_string()));
3481 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3482 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3483 assert!(pkg.description.is_some());
3484 assert!(!pkg.parties.is_empty());
3485
3486 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3487 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3488 }
3489
3490 #[test]
3491 fn test_extract_deb_archive_with_control_tar_xz() {
3492 let deb = create_synthetic_deb_with_control_tar_xz();
3493
3494 let pkg = DebianDebParser::extract_first_package(deb.path());
3495
3496 assert_eq!(pkg.name, Some("synthetic".to_string()));
3497 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3498 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3499 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3500 }
3501
3502 #[test]
3503 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3504 let deb = create_synthetic_deb_with_copyright();
3505
3506 let pkg = DebianDebParser::extract_first_package(deb.path());
3507
3508 assert_eq!(pkg.name, Some("synthetic".to_string()));
3509 assert_eq!(
3510 pkg.extracted_license_statement,
3511 Some("Apache-2.0".to_string())
3512 );
3513 assert!(pkg.parties.iter().any(|party| {
3514 party.role.as_deref() == Some("copyright-holder")
3515 && party.name.as_deref() == Some("Example Org")
3516 }));
3517 }
3518
3519 #[test]
3520 fn test_parse_deb_filename_simple() {
3521 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3522 assert_eq!(pkg.name, Some("adduser".to_string()));
3523 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3524 assert_eq!(pkg.namespace, Some("debian".to_string()));
3525 }
3526
3527 #[test]
3528 fn test_parse_deb_filename_invalid() {
3529 let pkg = parse_deb_filename("invalid.deb");
3530 assert!(pkg.name.is_none());
3531 assert!(pkg.version.is_none());
3532 }
3533
3534 #[test]
3535 fn test_distroless_parser() {
3536 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3537
3538 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3539
3540 if !test_file.exists() {
3541 eprintln!("Warning: Test file not found, skipping test");
3542 return;
3543 }
3544
3545 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3546
3547 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3548 assert_eq!(
3549 pkg.datasource_id,
3550 Some(DatasourceId::DebianDistrolessInstalledDb)
3551 );
3552 assert_eq!(pkg.name, Some("base-files".to_string()));
3553 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3554 assert_eq!(pkg.namespace, Some("debian".to_string()));
3555 assert!(pkg.purl.is_some());
3556 assert!(
3557 pkg.purl
3558 .as_ref()
3559 .unwrap()
3560 .contains("pkg:deb/debian/base-files")
3561 );
3562 }
3563}