1use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41 DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
42};
43use crate::parsers::rfc822::{self, Rfc822Metadata};
44use crate::parsers::utils::{read_file_to_string, split_name_email};
45use crate::utils::spdx::combine_license_expressions;
46
47use super::PackageParser;
48use super::license_normalization::{
49 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
50 normalize_declared_license_key,
51};
52
53const PACKAGE_TYPE: PackageType = PackageType::Deb;
54
55fn default_package_data(datasource_id: DatasourceId) -> PackageData {
56 PackageData {
57 package_type: Some(PACKAGE_TYPE),
58 datasource_id: Some(datasource_id),
59 ..Default::default()
60 }
61}
62
63const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
65const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
66
67const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
69 "packages.debian.org",
70 "lists.debian.org",
71 "lists.alioth.debian.org",
72 "@debian.org",
73 "debian-init-diversity@",
74];
75const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
76
77struct DepFieldSpec {
79 field: &'static str,
80 scope: &'static str,
81 is_runtime: bool,
82 is_optional: bool,
83}
84
85const DEP_FIELDS: &[DepFieldSpec] = &[
86 DepFieldSpec {
87 field: "depends",
88 scope: "depends",
89 is_runtime: true,
90 is_optional: false,
91 },
92 DepFieldSpec {
93 field: "pre-depends",
94 scope: "pre-depends",
95 is_runtime: true,
96 is_optional: false,
97 },
98 DepFieldSpec {
99 field: "recommends",
100 scope: "recommends",
101 is_runtime: true,
102 is_optional: true,
103 },
104 DepFieldSpec {
105 field: "suggests",
106 scope: "suggests",
107 is_runtime: true,
108 is_optional: true,
109 },
110 DepFieldSpec {
111 field: "breaks",
112 scope: "breaks",
113 is_runtime: false,
114 is_optional: false,
115 },
116 DepFieldSpec {
117 field: "conflicts",
118 scope: "conflicts",
119 is_runtime: false,
120 is_optional: false,
121 },
122 DepFieldSpec {
123 field: "replaces",
124 scope: "replaces",
125 is_runtime: false,
126 is_optional: false,
127 },
128 DepFieldSpec {
129 field: "provides",
130 scope: "provides",
131 is_runtime: false,
132 is_optional: false,
133 },
134 DepFieldSpec {
135 field: "build-depends",
136 scope: "build-depends",
137 is_runtime: false,
138 is_optional: false,
139 },
140 DepFieldSpec {
141 field: "build-depends-indep",
142 scope: "build-depends-indep",
143 is_runtime: false,
144 is_optional: false,
145 },
146 DepFieldSpec {
147 field: "build-conflicts",
148 scope: "build-conflicts",
149 is_runtime: false,
150 is_optional: false,
151 },
152];
153
154pub struct DebianControlParser;
159
160impl PackageParser for DebianControlParser {
161 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
162
163 fn is_match(path: &Path) -> bool {
164 if let Some(name) = path.file_name()
165 && name == "control"
166 && let Some(parent) = path.parent()
167 && let Some(parent_name) = parent.file_name()
168 {
169 return parent_name == "debian";
170 }
171 false
172 }
173
174 fn extract_packages(path: &Path) -> Vec<PackageData> {
175 let content = match read_file_to_string(path) {
176 Ok(c) => c,
177 Err(e) => {
178 warn!("Failed to read debian/control at {:?}: {}", path, e);
179 return vec![default_package_data(DatasourceId::DebianControlInSource)];
180 }
181 };
182
183 let packages = parse_debian_control(&content);
184 if packages.is_empty() {
185 vec![default_package_data(DatasourceId::DebianControlInSource)]
186 } else {
187 packages
188 }
189 }
190}
191
192pub struct DebianInstalledParser;
197
198impl PackageParser for DebianInstalledParser {
199 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
200
201 fn is_match(path: &Path) -> bool {
202 let path_str = path.to_string_lossy();
203 path_str.ends_with("var/lib/dpkg/status")
204 }
205
206 fn extract_packages(path: &Path) -> Vec<PackageData> {
207 let content = match read_file_to_string(path) {
208 Ok(c) => c,
209 Err(e) => {
210 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
211 return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
212 }
213 };
214
215 let packages = parse_dpkg_status(&content);
216 if packages.is_empty() {
217 vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
218 } else {
219 packages
220 }
221 }
222}
223
224pub struct DebianDistrolessInstalledParser;
225
226impl PackageParser for DebianDistrolessInstalledParser {
227 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
228
229 fn is_match(path: &Path) -> bool {
230 let path_str = path.to_string_lossy();
231 path_str.contains("var/lib/dpkg/status.d/")
232 }
233
234 fn extract_packages(path: &Path) -> Vec<PackageData> {
235 let content = match read_file_to_string(path) {
236 Ok(c) => c,
237 Err(e) => {
238 warn!("Failed to read distroless status file at {:?}: {}", path, e);
239 return vec![default_package_data(
240 DatasourceId::DebianDistrolessInstalledDb,
241 )];
242 }
243 };
244
245 vec![parse_distroless_status(&content)]
246 }
247}
248
249fn parse_distroless_status(content: &str) -> PackageData {
250 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
251
252 if paragraphs.is_empty() {
253 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
254 }
255
256 build_package_from_paragraph(
257 ¶graphs[0],
258 None,
259 DatasourceId::DebianDistrolessInstalledDb,
260 )
261 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
262}
263
264fn parse_debian_control(content: &str) -> Vec<PackageData> {
274 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
275 if paragraphs.is_empty() {
276 return Vec::new();
277 }
278
279 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
281
282 let (source_paragraph, binary_start) = if has_source {
283 (Some(¶graphs[0]), 1)
284 } else {
285 (None, 0)
286 };
287
288 let source_meta = source_paragraph.map(extract_source_meta);
290
291 let mut packages = Vec::new();
292
293 for para in ¶graphs[binary_start..] {
294 if let Some(pkg) = build_package_from_paragraph(
295 para,
296 source_meta.as_ref(),
297 DatasourceId::DebianControlInSource,
298 ) {
299 packages.push(pkg);
300 }
301 }
302
303 if packages.is_empty()
304 && let Some(source_para) = source_paragraph
305 && let Some(pkg) = build_package_from_source_paragraph(source_para)
306 {
307 packages.push(pkg);
308 }
309
310 packages
311}
312
313fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
318 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
319 let mut packages = Vec::new();
320
321 for para in ¶graphs {
322 let status = rfc822::get_header_first(¶.headers, "status");
323 if status.as_deref() != Some("install ok installed") {
324 continue;
325 }
326
327 if let Some(pkg) =
328 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
329 {
330 packages.push(pkg);
331 }
332 }
333
334 packages
335}
336
337struct SourceMeta {
342 parties: Vec<Party>,
343 homepage_url: Option<String>,
344 vcs_url: Option<String>,
345 code_view_url: Option<String>,
346 bug_tracking_url: Option<String>,
347}
348
349fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
350 let mut parties = Vec::new();
351
352 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
354 let (name, email) = split_name_email(&maintainer);
355 parties.push(Party {
356 r#type: Some("person".to_string()),
357 role: Some("maintainer".to_string()),
358 name,
359 email,
360 url: None,
361 organization: None,
362 organization_url: None,
363 timezone: None,
364 });
365 }
366
367 if let Some(orig_maintainer) =
369 rfc822::get_header_first(¶graph.headers, "original-maintainer")
370 {
371 let (name, email) = split_name_email(&orig_maintainer);
372 parties.push(Party {
373 r#type: Some("person".to_string()),
374 role: Some("maintainer".to_string()),
375 name,
376 email,
377 url: None,
378 organization: None,
379 organization_url: None,
380 timezone: None,
381 });
382 }
383
384 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
386 for uploader in uploaders_str.split(',') {
387 let trimmed = uploader.trim();
388 if !trimmed.is_empty() {
389 let (name, email) = split_name_email(trimmed);
390 parties.push(Party {
391 r#type: Some("person".to_string()),
392 role: Some("uploader".to_string()),
393 name,
394 email,
395 url: None,
396 organization: None,
397 organization_url: None,
398 timezone: None,
399 });
400 }
401 }
402 }
403
404 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage");
405
406 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
408 .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
409
410 let code_view_url = rfc822::get_header_first(¶graph.headers, "vcs-browser");
411
412 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs");
413
414 SourceMeta {
415 parties,
416 homepage_url,
417 vcs_url,
418 code_view_url,
419 bug_tracking_url,
420 }
421}
422
423fn build_package_from_paragraph(
428 paragraph: &Rfc822Metadata,
429 source_meta: Option<&SourceMeta>,
430 datasource_id: DatasourceId,
431) -> Option<PackageData> {
432 let name = rfc822::get_header_first(¶graph.headers, "package")?;
433 let version = rfc822::get_header_first(¶graph.headers, "version");
434 let architecture = rfc822::get_header_first(¶graph.headers, "architecture");
435 let description = rfc822::get_header_first(¶graph.headers, "description");
436 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
437 let homepage = rfc822::get_header_first(¶graph.headers, "homepage");
438 let source_field = rfc822::get_header_first(¶graph.headers, "source");
439 let section = rfc822::get_header_first(¶graph.headers, "section");
440 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
441 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
442
443 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
444
445 let parties = if let Some(meta) = source_meta {
447 meta.parties.clone()
448 } else {
449 let mut p = Vec::new();
450 if let Some(m) = &maintainer_str {
451 let (n, e) = split_name_email(m);
452 p.push(Party {
453 r#type: Some("person".to_string()),
454 role: Some("maintainer".to_string()),
455 name: n,
456 email: e,
457 url: None,
458 organization: None,
459 organization_url: None,
460 timezone: None,
461 });
462 }
463 p
464 };
465
466 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
468 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
469 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
470 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
471
472 let purl = build_debian_purl(
474 &name,
475 version.as_deref(),
476 namespace.as_deref(),
477 architecture.as_deref(),
478 );
479
480 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
482
483 let keywords = section.into_iter().collect();
485
486 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
488
489 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
491 if let Some(ma) = &multi_arch
492 && !ma.is_empty()
493 {
494 extra_data.insert(
495 "multi_arch".to_string(),
496 serde_json::Value::String(ma.clone()),
497 );
498 }
499 if let Some(size_str) = &installed_size
500 && let Ok(size) = size_str.parse::<u64>()
501 {
502 extra_data.insert(
503 "installed_size".to_string(),
504 serde_json::Value::Number(serde_json::Number::from(size)),
505 );
506 }
507
508 let qualifiers = architecture.as_ref().map(|arch| {
510 let mut q = HashMap::new();
511 q.insert("arch".to_string(), arch.clone());
512 q
513 });
514
515 Some(PackageData {
516 package_type: Some(PACKAGE_TYPE),
517 namespace: namespace.clone(),
518 name: Some(name),
519 version,
520 qualifiers,
521 subpath: None,
522 primary_language: None,
523 description,
524 release_date: None,
525 parties,
526 keywords,
527 homepage_url,
528 download_url: None,
529 size: None,
530 sha1: None,
531 md5: None,
532 sha256: None,
533 sha512: None,
534 bug_tracking_url,
535 code_view_url,
536 vcs_url,
537 copyright: None,
538 holder: None,
539 declared_license_expression: None,
540 declared_license_expression_spdx: None,
541 license_detections: Vec::new(),
542 other_license_expression: None,
543 other_license_expression_spdx: None,
544 other_license_detections: Vec::new(),
545 extracted_license_statement: None,
546 notice_text: None,
547 source_packages,
548 file_references: Vec::new(),
549 is_private: false,
550 is_virtual: false,
551 extra_data: if extra_data.is_empty() {
552 None
553 } else {
554 Some(extra_data)
555 },
556 dependencies,
557 repository_homepage_url: None,
558 repository_download_url: None,
559 api_data_url: None,
560 datasource_id: Some(datasource_id),
561 purl,
562 })
563}
564
565fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
566 let name = rfc822::get_header_first(¶graph.headers, "source")?;
567 let version = rfc822::get_header_first(¶graph.headers, "version");
568 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
569
570 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
571 let source_meta = extract_source_meta(paragraph);
572
573 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
574 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
575
576 let section = rfc822::get_header_first(¶graph.headers, "section");
577 let keywords = section.into_iter().collect();
578
579 Some(PackageData {
580 package_type: Some(PACKAGE_TYPE),
581 namespace: namespace.clone(),
582 name: Some(name),
583 version,
584 qualifiers: None,
585 subpath: None,
586 primary_language: None,
587 description: None,
588 release_date: None,
589 parties: source_meta.parties,
590 keywords,
591 homepage_url: source_meta.homepage_url,
592 download_url: None,
593 size: None,
594 sha1: None,
595 md5: None,
596 sha256: None,
597 sha512: None,
598 bug_tracking_url: source_meta.bug_tracking_url,
599 code_view_url: source_meta.code_view_url,
600 vcs_url: source_meta.vcs_url,
601 copyright: None,
602 holder: None,
603 declared_license_expression: None,
604 declared_license_expression_spdx: None,
605 license_detections: Vec::new(),
606 other_license_expression: None,
607 other_license_expression_spdx: None,
608 other_license_detections: Vec::new(),
609 extracted_license_statement: None,
610 notice_text: None,
611 source_packages: Vec::new(),
612 file_references: Vec::new(),
613 is_private: false,
614 is_virtual: false,
615 extra_data: None,
616 dependencies,
617 repository_homepage_url: None,
618 repository_download_url: None,
619 api_data_url: None,
620 datasource_id: Some(DatasourceId::DebianControlInSource),
621 purl,
622 })
623}
624
625fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
630 if let Some(ver) = version {
632 let ver_lower = ver.to_lowercase();
633 for clue in VERSION_CLUES_UBUNTU {
634 if ver_lower.contains(clue) {
635 return Some("ubuntu".to_string());
636 }
637 }
638 for clue in VERSION_CLUES_DEBIAN {
639 if ver_lower.contains(clue) {
640 return Some("debian".to_string());
641 }
642 }
643 }
644
645 if let Some(maint) = maintainer {
647 let maint_lower = maint.to_lowercase();
648 for clue in MAINTAINER_CLUES_UBUNTU {
649 if maint_lower.contains(clue) {
650 return Some("ubuntu".to_string());
651 }
652 }
653 for clue in MAINTAINER_CLUES_DEBIAN {
654 if maint_lower.contains(clue) {
655 return Some("debian".to_string());
656 }
657 }
658 }
659
660 Some("debian".to_string())
662}
663
664fn build_debian_purl(
669 name: &str,
670 version: Option<&str>,
671 namespace: Option<&str>,
672 architecture: Option<&str>,
673) -> Option<String> {
674 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
675
676 if let Some(ns) = namespace {
677 purl.with_namespace(ns).ok()?;
678 }
679
680 if let Some(ver) = version {
681 purl.with_version(ver).ok()?;
682 }
683
684 if let Some(arch) = architecture {
685 purl.add_qualifier("arch", arch).ok()?;
686 }
687
688 Some(purl.to_string())
689}
690
691fn parse_all_dependencies(
696 headers: &HashMap<String, Vec<String>>,
697 namespace: Option<&str>,
698) -> Vec<Dependency> {
699 let mut dependencies = Vec::new();
700
701 for spec in DEP_FIELDS {
702 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
703 dependencies.extend(parse_dependency_field(
704 &dep_str,
705 spec.scope,
706 spec.is_runtime,
707 spec.is_optional,
708 namespace,
709 ));
710 }
711 }
712
713 dependencies
714}
715
716fn parse_dependency_field(
725 dep_str: &str,
726 scope: &str,
727 is_runtime: bool,
728 is_optional: bool,
729 namespace: Option<&str>,
730) -> Vec<Dependency> {
731 let mut deps = Vec::new();
732
733 let dep_re = Regex::new(
736 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
737 )
738 .unwrap();
739
740 for group in dep_str.split(',') {
741 let group = group.trim();
742 if group.is_empty() {
743 continue;
744 }
745
746 let alternatives: Vec<&str> = group.split('|').collect();
748 let has_alternatives = alternatives.len() > 1;
749
750 for alt in alternatives {
751 let alt = alt.trim();
752 if alt.is_empty() {
753 continue;
754 }
755
756 if let Some(caps) = dep_re.captures(alt) {
757 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
758 let operator = caps.get(2).map(|m| m.as_str().trim());
759 let version = caps.get(3).map(|m| m.as_str().trim());
760
761 if pkg_name.is_empty() {
762 continue;
763 }
764
765 if pkg_name.starts_with('$') {
767 continue;
768 }
769
770 let extracted_requirement = match (operator, version) {
771 (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
772 _ => None,
773 };
774
775 let is_pinned = operator.map(|op| op == "=");
776
777 let purl = build_debian_purl(pkg_name, None, namespace, None);
778
779 deps.push(Dependency {
780 purl,
781 extracted_requirement,
782 scope: Some(scope.to_string()),
783 is_runtime: Some(is_runtime),
784 is_optional: Some(is_optional || has_alternatives),
785 is_pinned,
786 is_direct: Some(true),
787 resolved_package: None,
788 extra_data: None,
789 });
790 }
791 }
792 }
793
794 deps
795}
796
797fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
805 let Some(source_str) = source else {
806 return Vec::new();
807 };
808
809 let trimmed = source_str.trim();
810 if trimmed.is_empty() {
811 return Vec::new();
812 }
813
814 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
816 let name = trimmed[..paren_start].trim();
817 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
818 (
819 name,
820 if version.is_empty() {
821 None
822 } else {
823 Some(version)
824 },
825 )
826 } else {
827 (trimmed, None)
828 };
829
830 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
831 vec![purl]
832 } else {
833 Vec::new()
834 }
835}
836
837crate::register_parser!(
842 "Debian source package control file (debian/control)",
843 &["**/debian/control"],
844 "deb",
845 "",
846 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
847);
848
849crate::register_parser!(
850 "Debian installed package database (dpkg status)",
851 &["**/var/lib/dpkg/status"],
852 "deb",
853 "",
854 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
855);
856
857crate::register_parser!(
858 "Debian distroless package database (status.d)",
859 &["**/var/lib/dpkg/status.d/*"],
860 "deb",
861 "",
862 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
863);
864
865pub struct DebianDscParser;
874
875impl PackageParser for DebianDscParser {
876 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
877
878 fn is_match(path: &Path) -> bool {
879 path.extension().and_then(|e| e.to_str()) == Some("dsc")
880 }
881
882 fn extract_packages(path: &Path) -> Vec<PackageData> {
883 let content = match read_file_to_string(path) {
884 Ok(c) => c,
885 Err(e) => {
886 warn!("Failed to read .dsc file {:?}: {}", path, e);
887 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
888 }
889 };
890
891 vec![parse_dsc_content(&content)]
892 }
893}
894
895crate::register_parser!(
896 "Debian source control file (.dsc)",
897 &["**/*.dsc"],
898 "deb",
899 "",
900 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
901);
902
903fn strip_pgp_signature(content: &str) -> String {
904 let mut result = String::new();
905 let mut in_pgp_block = false;
906 let mut in_signature = false;
907
908 for line in content.lines() {
909 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
910 in_pgp_block = true;
911 continue;
912 }
913 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
914 in_signature = true;
915 continue;
916 }
917 if line.starts_with("-----END PGP SIGNATURE-----") {
918 in_signature = false;
919 continue;
920 }
921 if in_pgp_block && line.starts_with("Hash:") {
922 continue;
923 }
924 if in_pgp_block && line.is_empty() && result.is_empty() {
925 in_pgp_block = false;
926 continue;
927 }
928 if !in_signature {
929 result.push_str(line);
930 result.push('\n');
931 }
932 }
933
934 result
935}
936
937fn parse_dsc_content(content: &str) -> PackageData {
938 let clean_content = strip_pgp_signature(content);
939 let metadata = rfc822::parse_rfc822_content(&clean_content);
940 let headers = &metadata.headers;
941
942 let name = rfc822::get_header_first(headers, "source");
943 let version = rfc822::get_header_first(headers, "version");
944 let architecture = rfc822::get_header_first(headers, "architecture");
945 let namespace = Some("debian".to_string());
946
947 let mut package = PackageData {
948 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
949 package_type: Some(PACKAGE_TYPE),
950 namespace: namespace.clone(),
951 name: name.clone(),
952 version: version.clone(),
953 description: rfc822::get_header_first(headers, "description"),
954 homepage_url: rfc822::get_header_first(headers, "homepage"),
955 vcs_url: rfc822::get_header_first(headers, "vcs-git"),
956 code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
957 ..Default::default()
958 };
959
960 if let (Some(n), Some(v)) = (&name, &version) {
962 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
963 }
964
965 if let Some(n) = &name
967 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
968 {
969 package.source_packages.push(source_purl);
970 }
971
972 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
973 let (name_opt, email_opt) = split_name_email(&maintainer);
974 package.parties.push(Party {
975 r#type: None,
976 role: Some("maintainer".to_string()),
977 name: name_opt,
978 email: email_opt,
979 url: None,
980 organization: None,
981 organization_url: None,
982 timezone: None,
983 });
984 }
985
986 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
987 for uploader in uploaders_str.split(',') {
988 let uploader = uploader.trim();
989 if uploader.is_empty() {
990 continue;
991 }
992 let (name_opt, email_opt) = split_name_email(uploader);
993 package.parties.push(Party {
994 r#type: None,
995 role: Some("uploader".to_string()),
996 name: name_opt,
997 email: email_opt,
998 url: None,
999 organization: None,
1000 organization_url: None,
1001 timezone: None,
1002 });
1003 }
1004 }
1005
1006 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1008 package.dependencies.extend(parse_dependency_field(
1009 &build_deps,
1010 "build",
1011 false,
1012 false,
1013 namespace.as_deref(),
1014 ));
1015 }
1016
1017 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1019 let map = package.extra_data.get_or_insert_with(HashMap::new);
1020 map.insert("standards_version".to_string(), standards.into());
1021 }
1022
1023 package
1024}
1025
1026pub struct DebianOrigTarParser;
1028
1029impl PackageParser for DebianOrigTarParser {
1030 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1031
1032 fn is_match(path: &Path) -> bool {
1033 path.file_name()
1034 .and_then(|n| n.to_str())
1035 .map(|name| name.contains(".orig.tar."))
1036 .unwrap_or(false)
1037 }
1038
1039 fn extract_packages(path: &Path) -> Vec<PackageData> {
1040 let filename = match path.file_name().and_then(|n| n.to_str()) {
1041 Some(f) => f,
1042 None => {
1043 return vec![default_package_data(
1044 DatasourceId::DebianOriginalSourceTarball,
1045 )];
1046 }
1047 };
1048
1049 vec![parse_source_tarball_filename(
1050 filename,
1051 DatasourceId::DebianOriginalSourceTarball,
1052 )]
1053 }
1054}
1055
1056crate::register_parser!(
1057 "Debian original source tarball",
1058 &["**/*.orig.tar.*"],
1059 "deb",
1060 "",
1061 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1062);
1063
1064pub struct DebianDebianTarParser;
1066
1067impl PackageParser for DebianDebianTarParser {
1068 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1069
1070 fn is_match(path: &Path) -> bool {
1071 path.file_name()
1072 .and_then(|n| n.to_str())
1073 .map(|name| name.contains(".debian.tar."))
1074 .unwrap_or(false)
1075 }
1076
1077 fn extract_packages(path: &Path) -> Vec<PackageData> {
1078 let filename = match path.file_name().and_then(|n| n.to_str()) {
1079 Some(f) => f,
1080 None => {
1081 return vec![default_package_data(
1082 DatasourceId::DebianSourceMetadataTarball,
1083 )];
1084 }
1085 };
1086
1087 vec![parse_source_tarball_filename(
1088 filename,
1089 DatasourceId::DebianSourceMetadataTarball,
1090 )]
1091 }
1092}
1093
1094crate::register_parser!(
1095 "Debian source metadata tarball",
1096 &["**/*.debian.tar.*"],
1097 "deb",
1098 "",
1099 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1100);
1101
1102fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1103 let without_tar_ext = filename
1104 .trim_end_matches(".gz")
1105 .trim_end_matches(".xz")
1106 .trim_end_matches(".bz2")
1107 .trim_end_matches(".tar");
1108
1109 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1110 if parts.len() < 2 {
1111 return default_package_data(datasource_id);
1112 }
1113
1114 let name = parts[0].to_string();
1115 let version_with_suffix = parts[1];
1116
1117 let version = version_with_suffix
1118 .trim_end_matches(".orig")
1119 .trim_end_matches(".debian")
1120 .to_string();
1121
1122 let namespace = Some("debian".to_string());
1123
1124 PackageData {
1125 datasource_id: Some(datasource_id),
1126 package_type: Some(PACKAGE_TYPE),
1127 namespace: namespace.clone(),
1128 name: Some(name.clone()),
1129 version: Some(version.clone()),
1130 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1131 ..Default::default()
1132 }
1133}
1134
1135pub struct DebianInstalledListParser;
1137
1138impl PackageParser for DebianInstalledListParser {
1139 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1140
1141 fn is_match(path: &Path) -> bool {
1142 path.extension().and_then(|e| e.to_str()) == Some("list")
1143 && path
1144 .to_str()
1145 .map(|p| p.contains("/var/lib/dpkg/info/"))
1146 .unwrap_or(false)
1147 }
1148
1149 fn extract_packages(path: &Path) -> Vec<PackageData> {
1150 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1151 Some(f) => f,
1152 None => {
1153 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1154 }
1155 };
1156
1157 let content = match read_file_to_string(path) {
1158 Ok(c) => c,
1159 Err(e) => {
1160 warn!("Failed to read .list file {:?}: {}", path, e);
1161 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1162 }
1163 };
1164
1165 vec![parse_debian_file_list(
1166 &content,
1167 filename,
1168 DatasourceId::DebianInstalledFilesList,
1169 )]
1170 }
1171}
1172
1173crate::register_parser!(
1174 "Debian installed files list",
1175 &["**/var/lib/dpkg/info/*.list"],
1176 "deb",
1177 "",
1178 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1179);
1180
1181pub struct DebianInstalledMd5sumsParser;
1183
1184impl PackageParser for DebianInstalledMd5sumsParser {
1185 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1186
1187 fn is_match(path: &Path) -> bool {
1188 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1189 && path
1190 .to_str()
1191 .map(|p| p.contains("/var/lib/dpkg/info/"))
1192 .unwrap_or(false)
1193 }
1194
1195 fn extract_packages(path: &Path) -> Vec<PackageData> {
1196 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1197 Some(f) => f,
1198 None => {
1199 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1200 }
1201 };
1202
1203 let content = match read_file_to_string(path) {
1204 Ok(c) => c,
1205 Err(e) => {
1206 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1207 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1208 }
1209 };
1210
1211 vec![parse_debian_file_list(
1212 &content,
1213 filename,
1214 DatasourceId::DebianInstalledMd5Sums,
1215 )]
1216 }
1217}
1218
1219crate::register_parser!(
1220 "Debian installed package md5sums",
1221 &["**/var/lib/dpkg/info/*.md5sums"],
1222 "deb",
1223 "",
1224 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1225);
1226
1227const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1228
1229fn parse_debian_file_list(
1230 content: &str,
1231 filename: &str,
1232 datasource_id: DatasourceId,
1233) -> PackageData {
1234 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1235 (Some(pkg.to_string()), Some(arch.to_string()))
1236 } else if filename == "md5sums" {
1237 (None, None)
1238 } else {
1239 (Some(filename.to_string()), None)
1240 };
1241
1242 let mut file_references = Vec::new();
1243
1244 for line in content.lines() {
1245 let line = line.trim();
1246 if line.is_empty() || line.starts_with('#') {
1247 continue;
1248 }
1249
1250 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1251 (Some(hash.trim().to_string()), p.trim())
1252 } else {
1253 (None, line)
1254 };
1255
1256 if IGNORED_ROOT_DIRS.contains(&path) {
1257 continue;
1258 }
1259
1260 file_references.push(FileReference {
1261 path: path.to_string(),
1262 size: None,
1263 sha1: None,
1264 md5: md5sum,
1265 sha256: None,
1266 sha512: None,
1267 extra_data: None,
1268 });
1269 }
1270
1271 if file_references.is_empty() {
1272 return default_package_data(datasource_id);
1273 }
1274
1275 let namespace = Some("debian".to_string());
1276 let mut package = PackageData {
1277 datasource_id: Some(datasource_id),
1278 package_type: Some(PACKAGE_TYPE),
1279 namespace: namespace.clone(),
1280 name: name.clone(),
1281 file_references,
1282 ..Default::default()
1283 };
1284
1285 if let Some(n) = &name {
1286 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1287 }
1288
1289 package
1290}
1291
1292pub struct DebianCopyrightParser;
1294
1295impl PackageParser for DebianCopyrightParser {
1296 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1297
1298 fn is_match(path: &Path) -> bool {
1299 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1300 if filename != "copyright" {
1301 return false;
1302 }
1303 let path_str = path.to_string_lossy();
1304 path_str.contains("/debian/")
1305 || path_str.contains("/usr/share/doc/")
1306 || path_str.ends_with("debian/copyright")
1307 } else {
1308 false
1309 }
1310 }
1311
1312 fn extract_packages(path: &Path) -> Vec<PackageData> {
1313 let content = match read_file_to_string(path) {
1314 Ok(c) => c,
1315 Err(e) => {
1316 warn!("Failed to read copyright file {:?}: {}", path, e);
1317 return vec![default_package_data(DatasourceId::DebianCopyright)];
1318 }
1319 };
1320
1321 let package_name = extract_package_name_from_path(path);
1322 vec![parse_copyright_file(&content, package_name.as_deref())]
1323 }
1324}
1325
1326crate::register_parser!(
1327 "Debian machine-readable copyright file",
1328 &["**/debian/copyright", "**/usr/share/doc/*/copyright"],
1329 "deb",
1330 "",
1331 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1332);
1333
1334fn extract_package_name_from_path(path: &Path) -> Option<String> {
1335 let components: Vec<_> = path.components().collect();
1336
1337 for (i, component) in components.iter().enumerate() {
1338 if let std::path::Component::Normal(os_str) = component
1339 && os_str.to_str() == Some("doc")
1340 && i + 1 < components.len()
1341 && let std::path::Component::Normal(next) = components[i + 1]
1342 {
1343 return next.to_str().map(|s| s.to_string());
1344 }
1345 }
1346 None
1347}
1348
1349fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1350 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1351
1352 let is_dep5 = paragraphs
1353 .first()
1354 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1355 .is_some();
1356
1357 let namespace = Some("debian".to_string());
1358 let mut parties = Vec::new();
1359 let mut license_statements = Vec::new();
1360 let mut primary_license_detection = None;
1361 let mut header_license_detection = None;
1362 let mut other_license_detections = Vec::new();
1363
1364 if is_dep5 {
1365 for para in ¶graphs {
1366 if let Some(copyright_text) =
1367 rfc822::get_header_first(¶.metadata.headers, "copyright")
1368 {
1369 for holder in parse_copyright_holders(©right_text) {
1370 if !holder.is_empty() {
1371 parties.push(Party {
1372 r#type: None,
1373 role: Some("copyright-holder".to_string()),
1374 name: Some(holder),
1375 email: None,
1376 url: None,
1377 organization: None,
1378 organization_url: None,
1379 timezone: None,
1380 });
1381 }
1382 }
1383 }
1384
1385 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1386 let license_name = license.lines().next().unwrap_or(&license).trim();
1387 if !license_name.is_empty()
1388 && !license_statements.contains(&license_name.to_string())
1389 {
1390 license_statements.push(license_name.to_string());
1391 }
1392
1393 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1394 let detection =
1395 build_primary_license_detection(license_name, matched_text, line_no);
1396 let is_header_paragraph =
1397 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1398 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1399 == Some("*")
1400 {
1401 primary_license_detection = Some(detection);
1402 } else if is_header_paragraph {
1403 header_license_detection.get_or_insert(detection);
1404 } else {
1405 other_license_detections.push(detection);
1406 }
1407 }
1408 }
1409 }
1410
1411 if primary_license_detection.is_none() && header_license_detection.is_some() {
1412 primary_license_detection = header_license_detection;
1413 }
1414 } else {
1415 let copyright_block = extract_unstructured_field(content, "Copyright:");
1416 if let Some(text) = copyright_block {
1417 for holder in parse_copyright_holders(&text) {
1418 if !holder.is_empty() {
1419 parties.push(Party {
1420 r#type: None,
1421 role: Some("copyright-holder".to_string()),
1422 name: Some(holder),
1423 email: None,
1424 url: None,
1425 organization: None,
1426 organization_url: None,
1427 timezone: None,
1428 });
1429 }
1430 }
1431 }
1432
1433 let license_block = extract_unstructured_field(content, "License:");
1434 if let Some(text) = license_block {
1435 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1436 }
1437 }
1438
1439 let extracted_license_statement = if license_statements.is_empty() {
1440 None
1441 } else {
1442 Some(license_statements.join(" AND "))
1443 };
1444
1445 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1446 let declared_license_expression = license_detections
1447 .first()
1448 .map(|detection| detection.license_expression.clone());
1449 let declared_license_expression_spdx = license_detections
1450 .first()
1451 .map(|detection| detection.license_expression_spdx.clone());
1452 let other_license_expression = combine_license_expressions(
1453 other_license_detections
1454 .iter()
1455 .map(|detection| detection.license_expression.clone()),
1456 );
1457 let other_license_expression_spdx = combine_license_expressions(
1458 other_license_detections
1459 .iter()
1460 .map(|detection| detection.license_expression_spdx.clone()),
1461 );
1462
1463 PackageData {
1464 datasource_id: Some(DatasourceId::DebianCopyright),
1465 package_type: Some(PACKAGE_TYPE),
1466 namespace: namespace.clone(),
1467 name: package_name.map(|s| s.to_string()),
1468 parties,
1469 declared_license_expression,
1470 declared_license_expression_spdx,
1471 license_detections,
1472 other_license_expression,
1473 other_license_expression_spdx,
1474 other_license_detections,
1475 extracted_license_statement,
1476 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1477 ..Default::default()
1478 }
1479}
1480
1481#[derive(Debug)]
1482struct CopyrightParagraph {
1483 metadata: Rfc822Metadata,
1484 license_header_line: Option<(String, usize)>,
1485}
1486
1487fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1488 let mut paragraphs = Vec::new();
1489 let mut current_lines = Vec::new();
1490 let mut current_start_line = 1usize;
1491
1492 for (idx, line) in content.lines().enumerate() {
1493 let line_no = idx + 1;
1494 if line.is_empty() {
1495 if !current_lines.is_empty() {
1496 paragraphs.push(finalize_copyright_paragraph(
1497 std::mem::take(&mut current_lines),
1498 current_start_line,
1499 ));
1500 }
1501 current_start_line = line_no + 1;
1502 } else {
1503 if current_lines.is_empty() {
1504 current_start_line = line_no;
1505 }
1506 current_lines.push(line.to_string());
1507 }
1508 }
1509
1510 if !current_lines.is_empty() {
1511 paragraphs.push(finalize_copyright_paragraph(
1512 current_lines,
1513 current_start_line,
1514 ));
1515 }
1516
1517 paragraphs
1518}
1519
1520fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1521 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1522 let mut current_name: Option<String> = None;
1523 let mut current_value = String::new();
1524 let mut license_header_line = None;
1525
1526 for (idx, line) in raw_lines.iter().enumerate() {
1527 if line.starts_with(' ') || line.starts_with('\t') {
1528 if current_name.is_some() {
1529 current_value.push('\n');
1530 current_value.push_str(line);
1531 }
1532 continue;
1533 }
1534
1535 if let Some(name) = current_name.take() {
1536 add_copyright_header_value(&mut headers, &name, ¤t_value);
1537 current_value.clear();
1538 }
1539
1540 if let Some((name, value)) = line.split_once(':') {
1541 let normalized_name = name.trim().to_ascii_lowercase();
1542 if normalized_name == "license" && license_header_line.is_none() {
1543 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1544 }
1545 current_name = Some(normalized_name);
1546 current_value = value.trim_start().to_string();
1547 }
1548 }
1549
1550 if let Some(name) = current_name.take() {
1551 add_copyright_header_value(&mut headers, &name, ¤t_value);
1552 }
1553
1554 CopyrightParagraph {
1555 metadata: Rfc822Metadata {
1556 headers,
1557 body: String::new(),
1558 },
1559 license_header_line,
1560 }
1561}
1562
1563fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1564 let entry = headers.entry(name.to_string()).or_default();
1565 let trimmed = value.trim_end();
1566 if !trimmed.is_empty() {
1567 entry.push(trimmed.to_string());
1568 }
1569}
1570
1571fn build_primary_license_detection(
1572 license_name: &str,
1573 matched_text: String,
1574 line_no: usize,
1575) -> LicenseDetection {
1576 let normalized = normalize_debian_license_name(license_name);
1577
1578 build_declared_license_detection(
1579 &normalized,
1580 DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no),
1581 )
1582}
1583
1584fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1585 match license_name.trim() {
1586 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1587 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1588 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1589 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1590 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1591 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1592 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1593 "public-domain" => {
1594 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1595 }
1596 other => normalize_declared_license_key(other)
1597 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1598 }
1599}
1600
1601fn parse_copyright_holders(text: &str) -> Vec<String> {
1602 let mut holders = Vec::new();
1603
1604 for line in text.lines() {
1605 let line = line.trim();
1606 if line.is_empty() {
1607 continue;
1608 }
1609
1610 let cleaned = line
1611 .trim_start_matches("Copyright")
1612 .trim_start_matches("copyright")
1613 .trim_start_matches("(C)")
1614 .trim_start_matches("(c)")
1615 .trim_start_matches("©")
1616 .trim();
1617
1618 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1619 let without_years = &cleaned[year_end..];
1620 let holder = without_years
1621 .trim_start_matches(',')
1622 .trim_start_matches('-')
1623 .trim();
1624
1625 if !holder.is_empty() && holder.len() > 2 {
1626 holders.push(holder.to_string());
1627 }
1628 }
1629 }
1630
1631 holders
1632}
1633
1634fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1635 let mut in_field = false;
1636 let mut field_content = String::new();
1637
1638 for line in content.lines() {
1639 if line.starts_with(field_name) {
1640 in_field = true;
1641 field_content.push_str(line.trim_start_matches(field_name).trim());
1642 field_content.push('\n');
1643 } else if in_field {
1644 if line.starts_with(char::is_whitespace) {
1645 field_content.push_str(line.trim());
1646 field_content.push('\n');
1647 } else if !line.trim().is_empty() {
1648 break;
1649 }
1650 }
1651 }
1652
1653 let trimmed = field_content.trim();
1654 if trimmed.is_empty() {
1655 None
1656 } else {
1657 Some(trimmed.to_string())
1658 }
1659}
1660
1661pub struct DebianDebParser;
1663
1664impl PackageParser for DebianDebParser {
1665 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1666
1667 fn is_match(path: &Path) -> bool {
1668 path.extension().and_then(|e| e.to_str()) == Some("deb")
1669 }
1670
1671 fn extract_packages(path: &Path) -> Vec<PackageData> {
1672 if let Ok(data) = extract_deb_archive(path) {
1674 return vec![data];
1675 }
1676
1677 let filename = match path.file_name().and_then(|n| n.to_str()) {
1679 Some(f) => f,
1680 None => {
1681 return vec![default_package_data(DatasourceId::DebianDeb)];
1682 }
1683 };
1684
1685 vec![parse_deb_filename(filename)]
1686 }
1687}
1688
1689crate::register_parser!(
1690 "Debian binary package archive (.deb)",
1691 &["**/*.deb"],
1692 "deb",
1693 "",
1694 Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1695);
1696
1697fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1698 use flate2::read::GzDecoder;
1699 use liblzma::read::XzDecoder;
1700 use std::io::{Cursor, Read};
1701
1702 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1703
1704 let mut archive = ar::Archive::new(file);
1705 let mut package: Option<PackageData> = None;
1706
1707 while let Some(entry_result) = archive.next_entry() {
1708 let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1709
1710 let entry_name = std::str::from_utf8(entry.header().identifier())
1711 .map_err(|e| format!("Invalid entry name: {}", e))?;
1712 let entry_name = entry_name.trim().to_string();
1713
1714 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1715 let mut control_data = Vec::new();
1716 entry
1717 .read_to_end(&mut control_data)
1718 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1719
1720 if entry_name.ends_with(".gz") {
1721 let decoder = GzDecoder::new(Cursor::new(control_data));
1722 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1723 package = Some(parsed_package);
1724 }
1725 } else if entry_name.ends_with(".xz") {
1726 let decoder = XzDecoder::new(Cursor::new(control_data));
1727 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1728 package = Some(parsed_package);
1729 }
1730 }
1731 } else if entry_name.starts_with("data.tar") {
1732 let mut data = Vec::new();
1733 entry
1734 .read_to_end(&mut data)
1735 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1736
1737 let Some(current_package) = package.as_mut() else {
1738 continue;
1739 };
1740
1741 if entry_name.ends_with(".gz") {
1742 let decoder = GzDecoder::new(Cursor::new(data));
1743 merge_deb_data_archive(decoder, current_package)?;
1744 } else if entry_name.ends_with(".xz") {
1745 let decoder = XzDecoder::new(Cursor::new(data));
1746 merge_deb_data_archive(decoder, current_package)?;
1747 }
1748 }
1749 }
1750
1751 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1752}
1753
1754fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1755 use std::io::Read;
1756
1757 let mut tar_archive = tar::Archive::new(reader);
1758
1759 for tar_entry_result in tar_archive
1760 .entries()
1761 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1762 {
1763 let mut tar_entry =
1764 tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1765
1766 let tar_path = tar_entry
1767 .path()
1768 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1769
1770 if tar_path.ends_with("control") {
1771 let mut control_content = String::new();
1772 tar_entry
1773 .read_to_string(&mut control_content)
1774 .map_err(|e| format!("Failed to read control file: {}", e))?;
1775
1776 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1777 if paragraphs.is_empty() {
1778 return Err("No paragraphs in control file".to_string());
1779 }
1780
1781 if let Some(package) =
1782 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
1783 {
1784 return Ok(Some(package));
1785 }
1786
1787 return Err("Failed to parse control file".to_string());
1788 }
1789 }
1790
1791 Ok(None)
1792}
1793
1794fn merge_deb_data_archive<R: std::io::Read>(
1795 reader: R,
1796 package: &mut PackageData,
1797) -> Result<(), String> {
1798 use std::io::Read;
1799
1800 let mut tar_archive = tar::Archive::new(reader);
1801
1802 for tar_entry_result in tar_archive
1803 .entries()
1804 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1805 {
1806 let mut tar_entry =
1807 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1808
1809 let tar_path = tar_entry
1810 .path()
1811 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1812 let tar_path_str = tar_path.to_string_lossy();
1813
1814 if tar_path_str.ends_with(&format!(
1815 "/usr/share/doc/{}/copyright",
1816 package.name.as_deref().unwrap_or_default()
1817 )) || tar_path_str.ends_with(&format!(
1818 "usr/share/doc/{}/copyright",
1819 package.name.as_deref().unwrap_or_default()
1820 )) {
1821 let mut copyright_content = String::new();
1822 tar_entry
1823 .read_to_string(&mut copyright_content)
1824 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1825
1826 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
1827 merge_debian_copyright_into_package(package, ©right_pkg);
1828 break;
1829 }
1830 }
1831
1832 Ok(())
1833}
1834
1835fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1836 if target.extracted_license_statement.is_none() {
1837 target.extracted_license_statement = copyright.extracted_license_statement.clone();
1838 }
1839
1840 for party in ©right.parties {
1841 if !target.parties.iter().any(|existing| {
1842 existing.r#type == party.r#type
1843 && existing.role == party.role
1844 && existing.name == party.name
1845 && existing.email == party.email
1846 && existing.url == party.url
1847 && existing.organization == party.organization
1848 && existing.organization_url == party.organization_url
1849 && existing.timezone == party.timezone
1850 }) {
1851 target.parties.push(party.clone());
1852 }
1853 }
1854}
1855
1856fn parse_deb_filename(filename: &str) -> PackageData {
1857 let without_ext = filename.trim_end_matches(".deb");
1858
1859 let parts: Vec<&str> = without_ext.split('_').collect();
1860 if parts.len() < 2 {
1861 return default_package_data(DatasourceId::DebianDeb);
1862 }
1863
1864 let name = parts[0].to_string();
1865 let version = parts[1].to_string();
1866 let architecture = if parts.len() >= 3 {
1867 Some(parts[2].to_string())
1868 } else {
1869 None
1870 };
1871
1872 let namespace = Some("debian".to_string());
1873
1874 PackageData {
1875 datasource_id: Some(DatasourceId::DebianDeb),
1876 package_type: Some(PACKAGE_TYPE),
1877 namespace: namespace.clone(),
1878 name: Some(name.clone()),
1879 version: Some(version.clone()),
1880 purl: build_debian_purl(
1881 &name,
1882 Some(&version),
1883 namespace.as_deref(),
1884 architecture.as_deref(),
1885 ),
1886 ..Default::default()
1887 }
1888}
1889
1890pub struct DebianControlInExtractedDebParser;
1896
1897impl PackageParser for DebianControlInExtractedDebParser {
1898 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1899
1900 fn is_match(path: &Path) -> bool {
1901 path.file_name()
1902 .and_then(|n| n.to_str())
1903 .is_some_and(|name| name == "control")
1904 && path
1905 .to_str()
1906 .map(|p| {
1907 p.ends_with("control.tar.gz-extract/control")
1908 || p.ends_with("control.tar.xz-extract/control")
1909 })
1910 .unwrap_or(false)
1911 }
1912
1913 fn extract_packages(path: &Path) -> Vec<PackageData> {
1914 let content = match read_file_to_string(path) {
1915 Ok(c) => c,
1916 Err(e) => {
1917 warn!(
1918 "Failed to read control file in extracted deb {:?}: {}",
1919 path, e
1920 );
1921 return vec![default_package_data(
1922 DatasourceId::DebianControlExtractedDeb,
1923 )];
1924 }
1925 };
1926
1927 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1930 if paragraphs.is_empty() {
1931 return vec![default_package_data(
1932 DatasourceId::DebianControlExtractedDeb,
1933 )];
1934 }
1935
1936 if let Some(pkg) = build_package_from_paragraph(
1937 ¶graphs[0],
1938 None,
1939 DatasourceId::DebianControlExtractedDeb,
1940 ) {
1941 vec![pkg]
1942 } else {
1943 vec![default_package_data(
1944 DatasourceId::DebianControlExtractedDeb,
1945 )]
1946 }
1947 }
1948}
1949
1950pub struct DebianMd5sumInPackageParser;
1952
1953impl PackageParser for DebianMd5sumInPackageParser {
1954 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1955
1956 fn is_match(path: &Path) -> bool {
1957 path.file_name()
1958 .and_then(|n| n.to_str())
1959 .is_some_and(|name| name == "md5sums")
1960 && path
1961 .to_str()
1962 .map(|p| {
1963 p.ends_with("control.tar.gz-extract/md5sums")
1964 || p.ends_with("control.tar.xz-extract/md5sums")
1965 })
1966 .unwrap_or(false)
1967 }
1968
1969 fn extract_packages(path: &Path) -> Vec<PackageData> {
1970 let content = match read_file_to_string(path) {
1971 Ok(c) => c,
1972 Err(e) => {
1973 warn!("Failed to read md5sums file {:?}: {}", path, e);
1974 return vec![default_package_data(
1975 DatasourceId::DebianMd5SumsInExtractedDeb,
1976 )];
1977 }
1978 };
1979
1980 let package_name = extract_package_name_from_deb_path(path);
1981
1982 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
1983 }
1984}
1985
1986pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
1987 let parent = path.parent()?;
1988 let grandparent = parent.parent()?;
1989 let dirname = grandparent.file_name()?.to_str()?;
1990 let without_extract = dirname.strip_suffix("-extract")?;
1991 let without_deb = without_extract.strip_suffix(".deb")?;
1992 let name = without_deb.split('_').next()?;
1993
1994 Some(name.to_string())
1995}
1996
1997fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
1998 let mut file_references = Vec::new();
1999
2000 for line in content.lines() {
2001 let line = line.trim();
2002 if line.is_empty() || line.starts_with('#') {
2003 continue;
2004 }
2005
2006 let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find(" ") {
2007 (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
2008 } else if let Some((hash, path)) = line.split_once(' ') {
2009 (Some(hash.trim().to_string()), path.trim())
2010 } else {
2011 (None, line)
2012 };
2013
2014 if IGNORED_ROOT_DIRS.contains(&filepath) {
2015 continue;
2016 }
2017
2018 file_references.push(FileReference {
2019 path: filepath.to_string(),
2020 size: None,
2021 sha1: None,
2022 md5: md5sum,
2023 sha256: None,
2024 sha512: None,
2025 extra_data: None,
2026 });
2027 }
2028
2029 if file_references.is_empty() {
2030 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2031 }
2032
2033 let namespace = Some("debian".to_string());
2034 let mut package = PackageData {
2035 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2036 package_type: Some(PACKAGE_TYPE),
2037 namespace: namespace.clone(),
2038 name: package_name.map(|s| s.to_string()),
2039 file_references,
2040 ..Default::default()
2041 };
2042
2043 if let Some(n) = &package.name {
2044 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2045 }
2046
2047 package
2048}
2049
2050crate::register_parser!(
2051 "Debian control file in extracted .deb control tarball",
2052 &[
2053 "**/control.tar.gz-extract/control",
2054 "**/control.tar.xz-extract/control"
2055 ],
2056 "deb",
2057 "",
2058 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2059);
2060
2061crate::register_parser!(
2062 "Debian MD5 checksums in extracted .deb control tarball",
2063 &[
2064 "**/control.tar.gz-extract/md5sums",
2065 "**/control.tar.xz-extract/md5sums"
2066 ],
2067 "deb",
2068 "",
2069 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2070);
2071
2072#[cfg(test)]
2073mod tests {
2074 use super::*;
2075 use crate::models::DatasourceId;
2076 use crate::models::PackageType;
2077 use ar::{Builder as ArBuilder, Header as ArHeader};
2078 use flate2::Compression;
2079 use flate2::write::GzEncoder;
2080 use liblzma::write::XzEncoder;
2081 use std::io::Cursor;
2082 use std::path::PathBuf;
2083 use tar::{Builder as TarBuilder, Header as TarHeader};
2084 use tempfile::NamedTempFile;
2085
2086 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2087 let mut control_tar = Vec::new();
2088 {
2089 let encoder = XzEncoder::new(&mut control_tar, 6);
2090 let mut tar_builder = TarBuilder::new(encoder);
2091
2092 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2093 let mut header = TarHeader::new_gnu();
2094 header
2095 .set_path("control")
2096 .expect("control tar path should be valid");
2097 header.set_size(control_content.len() as u64);
2098 header.set_mode(0o644);
2099 header.set_cksum();
2100 tar_builder
2101 .append(&header, Cursor::new(control_content))
2102 .expect("control file should be appended to tar.xz");
2103 tar_builder.finish().expect("control tar.xz should finish");
2104 }
2105
2106 let deb = NamedTempFile::new().expect("temp deb file should be created");
2107 {
2108 let mut builder = ArBuilder::new(
2109 deb.reopen()
2110 .expect("temporary deb file should reopen for writing"),
2111 );
2112
2113 let debian_binary = b"2.0\n";
2114 let mut debian_binary_header =
2115 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2116 debian_binary_header.set_mode(0o100644);
2117 builder
2118 .append(&debian_binary_header, Cursor::new(debian_binary))
2119 .expect("debian-binary entry should be appended");
2120
2121 let mut control_header =
2122 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2123 control_header.set_mode(0o100644);
2124 builder
2125 .append(&control_header, Cursor::new(control_tar))
2126 .expect("control.tar.xz entry should be appended");
2127 }
2128
2129 deb
2130 }
2131
2132 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2133 let mut control_tar = Vec::new();
2134 {
2135 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2136 let mut tar_builder = TarBuilder::new(encoder);
2137
2138 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2139 let mut header = TarHeader::new_gnu();
2140 header
2141 .set_path("control")
2142 .expect("control tar path should be valid");
2143 header.set_size(control_content.len() as u64);
2144 header.set_mode(0o644);
2145 header.set_cksum();
2146 tar_builder
2147 .append(&header, Cursor::new(control_content))
2148 .expect("control file should be appended to tar.gz");
2149 tar_builder.finish().expect("control tar.gz should finish");
2150 }
2151
2152 let mut data_tar = Vec::new();
2153 {
2154 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2155 let mut tar_builder = TarBuilder::new(encoder);
2156
2157 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2158 let mut header = TarHeader::new_gnu();
2159 header
2160 .set_path("./usr/share/doc/synthetic/copyright")
2161 .expect("copyright path should be valid");
2162 header.set_size(copyright.len() as u64);
2163 header.set_mode(0o644);
2164 header.set_cksum();
2165 tar_builder
2166 .append(&header, Cursor::new(copyright))
2167 .expect("copyright file should be appended to data tar");
2168 tar_builder.finish().expect("data tar.gz should finish");
2169 }
2170
2171 let deb = NamedTempFile::new().expect("temp deb file should be created");
2172 {
2173 let mut builder = ArBuilder::new(
2174 deb.reopen()
2175 .expect("temporary deb file should reopen for writing"),
2176 );
2177
2178 let debian_binary = b"2.0\n";
2179 let mut debian_binary_header =
2180 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2181 debian_binary_header.set_mode(0o100644);
2182 builder
2183 .append(&debian_binary_header, Cursor::new(debian_binary))
2184 .expect("debian-binary entry should be appended");
2185
2186 let mut control_header =
2187 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2188 control_header.set_mode(0o100644);
2189 builder
2190 .append(&control_header, Cursor::new(control_tar))
2191 .expect("control.tar.gz entry should be appended");
2192
2193 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2194 data_header.set_mode(0o100644);
2195 builder
2196 .append(&data_header, Cursor::new(data_tar))
2197 .expect("data.tar.gz entry should be appended");
2198 }
2199
2200 deb
2201 }
2202
2203 #[test]
2206 fn test_detect_namespace_from_ubuntu_version() {
2207 assert_eq!(
2208 detect_namespace(Some("1.0-1ubuntu1"), None),
2209 Some("ubuntu".to_string())
2210 );
2211 }
2212
2213 #[test]
2214 fn test_detect_namespace_from_debian_version() {
2215 assert_eq!(
2216 detect_namespace(Some("1.0-1+deb11u1"), None),
2217 Some("debian".to_string())
2218 );
2219 }
2220
2221 #[test]
2222 fn test_detect_namespace_from_ubuntu_maintainer() {
2223 assert_eq!(
2224 detect_namespace(
2225 None,
2226 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2227 ),
2228 Some("ubuntu".to_string())
2229 );
2230 }
2231
2232 #[test]
2233 fn test_detect_namespace_from_debian_maintainer() {
2234 assert_eq!(
2235 detect_namespace(None, Some("John Doe <john@debian.org>")),
2236 Some("debian".to_string())
2237 );
2238 }
2239
2240 #[test]
2241 fn test_detect_namespace_default() {
2242 assert_eq!(
2243 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2244 Some("debian".to_string())
2245 );
2246 }
2247
2248 #[test]
2249 fn test_detect_namespace_version_takes_priority() {
2250 assert_eq!(
2252 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2253 Some("ubuntu".to_string())
2254 );
2255 }
2256
2257 #[test]
2260 fn test_build_purl_basic() {
2261 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2262 assert_eq!(
2263 purl,
2264 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2265 );
2266 }
2267
2268 #[test]
2269 fn test_build_purl_no_version() {
2270 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2271 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2272 }
2273
2274 #[test]
2275 fn test_build_purl_no_arch() {
2276 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2277 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2278 }
2279
2280 #[test]
2281 fn test_build_purl_no_namespace() {
2282 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2283 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2284 }
2285
2286 #[test]
2289 fn test_parse_simple_dependency() {
2290 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2291 assert_eq!(deps.len(), 1);
2292 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2293 assert_eq!(deps[0].extracted_requirement, None);
2294 assert_eq!(deps[0].scope, Some("depends".to_string()));
2295 }
2296
2297 #[test]
2298 fn test_parse_dependency_with_version() {
2299 let deps =
2300 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2301 assert_eq!(deps.len(), 1);
2302 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2303 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2304 }
2305
2306 #[test]
2307 fn test_parse_dependency_exact_version() {
2308 let deps = parse_dependency_field(
2309 "libc6 (= 2.31-13+deb11u5)",
2310 "depends",
2311 true,
2312 false,
2313 Some("debian"),
2314 );
2315 assert_eq!(deps.len(), 1);
2316 assert_eq!(deps[0].is_pinned, Some(true));
2317 }
2318
2319 #[test]
2320 fn test_parse_dependency_strict_less() {
2321 let deps =
2322 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2323 assert_eq!(deps.len(), 1);
2324 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2325 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2326 }
2327
2328 #[test]
2329 fn test_parse_multiple_dependencies() {
2330 let deps = parse_dependency_field(
2331 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2332 "depends",
2333 true,
2334 false,
2335 Some("debian"),
2336 );
2337 assert_eq!(deps.len(), 3);
2338 }
2339
2340 #[test]
2341 fn test_parse_dependency_alternatives() {
2342 let deps = parse_dependency_field(
2343 "libssl1.1 | libssl3",
2344 "depends",
2345 true,
2346 false,
2347 Some("debian"),
2348 );
2349 assert_eq!(deps.len(), 2);
2350 assert_eq!(deps[0].is_optional, Some(true));
2352 assert_eq!(deps[1].is_optional, Some(true));
2353 }
2354
2355 #[test]
2356 fn test_parse_dependency_skips_substitutions() {
2357 let deps = parse_dependency_field(
2358 "${shlibs:Depends}, ${misc:Depends}, libc6",
2359 "depends",
2360 true,
2361 false,
2362 Some("debian"),
2363 );
2364 assert_eq!(deps.len(), 1);
2365 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2366 }
2367
2368 #[test]
2369 fn test_parse_dependency_with_arch_qualifier() {
2370 let deps = parse_dependency_field(
2372 "libc6 (>= 2.17) [amd64]",
2373 "depends",
2374 true,
2375 false,
2376 Some("debian"),
2377 );
2378 assert_eq!(deps.len(), 1);
2379 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2380 }
2381
2382 #[test]
2383 fn test_parse_empty_dependency() {
2384 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2385 assert!(deps.is_empty());
2386 }
2387
2388 #[test]
2391 fn test_parse_source_field_name_only() {
2392 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2393 assert_eq!(sources.len(), 1);
2394 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2395 }
2396
2397 #[test]
2398 fn test_parse_source_field_with_version() {
2399 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2400 assert_eq!(sources.len(), 1);
2401 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2402 }
2403
2404 #[test]
2405 fn test_parse_source_field_empty() {
2406 let sources = parse_source_field(None, Some("debian"));
2407 assert!(sources.is_empty());
2408 }
2409
2410 #[test]
2413 fn test_parse_debian_control_source_and_binary() {
2414 let content = "\
2415Source: curl
2416Section: web
2417Priority: optional
2418Maintainer: Alessandro Ghedini <ghedo@debian.org>
2419Homepage: https://curl.se/
2420Vcs-Browser: https://salsa.debian.org/debian/curl
2421Vcs-Git: https://salsa.debian.org/debian/curl.git
2422Build-Depends: debhelper (>= 12), libssl-dev
2423
2424Package: curl
2425Architecture: amd64
2426Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2427Description: command line tool for transferring data with URL syntax";
2428
2429 let packages = parse_debian_control(content);
2430 assert_eq!(packages.len(), 1);
2431
2432 let pkg = &packages[0];
2433 assert_eq!(pkg.name, Some("curl".to_string()));
2434 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2435 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2436 assert_eq!(
2437 pkg.vcs_url,
2438 Some("https://salsa.debian.org/debian/curl.git".to_string())
2439 );
2440 assert_eq!(
2441 pkg.code_view_url,
2442 Some("https://salsa.debian.org/debian/curl".to_string())
2443 );
2444
2445 assert_eq!(pkg.parties.len(), 1);
2447 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2448 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2449 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2450
2451 assert!(!pkg.dependencies.is_empty());
2453 }
2454
2455 #[test]
2456 fn test_parse_debian_control_multiple_binary() {
2457 let content = "\
2458Source: gzip
2459Maintainer: Debian Developer <dev@debian.org>
2460
2461Package: gzip
2462Architecture: any
2463Depends: libc6 (>= 2.17)
2464Description: GNU file compression
2465
2466Package: gzip-win32
2467Architecture: all
2468Description: gzip for Windows";
2469
2470 let packages = parse_debian_control(content);
2471 assert_eq!(packages.len(), 2);
2472 assert_eq!(packages[0].name, Some("gzip".to_string()));
2473 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2474
2475 assert_eq!(packages[0].parties.len(), 1);
2477 assert_eq!(packages[1].parties.len(), 1);
2478 }
2479
2480 #[test]
2481 fn test_parse_debian_control_source_only() {
2482 let content = "\
2483Source: my-package
2484Maintainer: Test User <test@debian.org>
2485Build-Depends: debhelper (>= 13)";
2486
2487 let packages = parse_debian_control(content);
2488 assert_eq!(packages.len(), 1);
2489 assert_eq!(packages[0].name, Some("my-package".to_string()));
2490 assert!(!packages[0].dependencies.is_empty());
2492 assert_eq!(
2493 packages[0].dependencies[0].scope,
2494 Some("build-depends".to_string())
2495 );
2496 }
2497
2498 #[test]
2499 fn test_parse_debian_control_with_uploaders() {
2500 let content = "\
2501Source: example
2502Maintainer: Main Dev <main@debian.org>
2503Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2504
2505Package: example
2506Architecture: any
2507Description: test package";
2508
2509 let packages = parse_debian_control(content);
2510 assert_eq!(packages.len(), 1);
2511 assert_eq!(packages[0].parties.len(), 3);
2513 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2514 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2515 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2516 }
2517
2518 #[test]
2519 fn test_parse_debian_control_vcs_git_with_branch() {
2520 let content = "\
2521Source: example
2522Maintainer: Dev <dev@debian.org>
2523Vcs-Git: https://salsa.debian.org/example.git -b main
2524
2525Package: example
2526Architecture: any
2527Description: test";
2528
2529 let packages = parse_debian_control(content);
2530 assert_eq!(packages.len(), 1);
2531 assert_eq!(
2533 packages[0].vcs_url,
2534 Some("https://salsa.debian.org/example.git".to_string())
2535 );
2536 }
2537
2538 #[test]
2539 fn test_parse_debian_control_multi_arch() {
2540 let content = "\
2541Source: example
2542Maintainer: Dev <dev@debian.org>
2543
2544Package: libexample
2545Architecture: any
2546Multi-Arch: same
2547Description: shared library";
2548
2549 let packages = parse_debian_control(content);
2550 assert_eq!(packages.len(), 1);
2551 let extra = packages[0].extra_data.as_ref().unwrap();
2552 assert_eq!(
2553 extra.get("multi_arch"),
2554 Some(&serde_json::Value::String("same".to_string()))
2555 );
2556 }
2557
2558 #[test]
2561 fn test_parse_dpkg_status_basic() {
2562 let content = "\
2563Package: base-files
2564Status: install ok installed
2565Priority: required
2566Section: admin
2567Installed-Size: 391
2568Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2569Architecture: amd64
2570Version: 11ubuntu5.6
2571Description: Debian base system miscellaneous files
2572Homepage: https://tracker.debian.org/pkg/base-files
2573
2574Package: not-installed
2575Status: deinstall ok config-files
2576Architecture: amd64
2577Version: 1.0
2578Description: This should be skipped";
2579
2580 let packages = parse_dpkg_status(content);
2581 assert_eq!(packages.len(), 1);
2582
2583 let pkg = &packages[0];
2584 assert_eq!(pkg.name, Some("base-files".to_string()));
2585 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2586 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2587 assert_eq!(
2588 pkg.datasource_id,
2589 Some(DatasourceId::DebianInstalledStatusDb)
2590 );
2591
2592 let extra = pkg.extra_data.as_ref().unwrap();
2594 assert_eq!(
2595 extra.get("installed_size"),
2596 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2597 );
2598 }
2599
2600 #[test]
2601 fn test_parse_dpkg_status_multiple_installed() {
2602 let content = "\
2603Package: libc6
2604Status: install ok installed
2605Architecture: amd64
2606Version: 2.31-13+deb11u5
2607Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2608Description: GNU C Library
2609
2610Package: zlib1g
2611Status: install ok installed
2612Architecture: amd64
2613Version: 1:1.2.11.dfsg-2+deb11u2
2614Maintainer: Mark Brown <broonie@debian.org>
2615Description: compression library";
2616
2617 let packages = parse_dpkg_status(content);
2618 assert_eq!(packages.len(), 2);
2619 assert_eq!(packages[0].name, Some("libc6".to_string()));
2620 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2621 }
2622
2623 #[test]
2624 fn test_parse_dpkg_status_with_dependencies() {
2625 let content = "\
2626Package: curl
2627Status: install ok installed
2628Architecture: amd64
2629Version: 7.74.0-1.3+deb11u7
2630Maintainer: Alessandro Ghedini <ghedo@debian.org>
2631Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2632Recommends: ca-certificates
2633Description: command line tool for transferring data with URL syntax";
2634
2635 let packages = parse_dpkg_status(content);
2636 assert_eq!(packages.len(), 1);
2637
2638 let deps = &packages[0].dependencies;
2639 assert_eq!(deps.len(), 3);
2641
2642 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2644 assert_eq!(deps[0].scope, Some("depends".to_string()));
2645 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2646
2647 assert_eq!(
2649 deps[2].purl,
2650 Some("pkg:deb/debian/ca-certificates".to_string())
2651 );
2652 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2653 assert_eq!(deps[2].is_optional, Some(true));
2654 }
2655
2656 #[test]
2657 fn test_parse_dpkg_status_with_source() {
2658 let content = "\
2659Package: libncurses6
2660Status: install ok installed
2661Architecture: amd64
2662Source: ncurses (6.2+20201114-2+deb11u1)
2663Version: 6.2+20201114-2+deb11u1
2664Maintainer: Craig Small <csmall@debian.org>
2665Description: shared libraries for terminal handling";
2666
2667 let packages = parse_dpkg_status(content);
2668 assert_eq!(packages.len(), 1);
2669 assert!(!packages[0].source_packages.is_empty());
2670 assert!(packages[0].source_packages[0].contains("ncurses"));
2672 }
2673
2674 #[test]
2675 fn test_parse_dpkg_status_filters_not_installed() {
2676 let content = "\
2677Package: installed-pkg
2678Status: install ok installed
2679Version: 1.0
2680Architecture: amd64
2681Description: installed
2682
2683Package: half-installed
2684Status: install ok half-installed
2685Version: 2.0
2686Architecture: amd64
2687Description: half installed
2688
2689Package: deinstall-pkg
2690Status: deinstall ok config-files
2691Version: 3.0
2692Architecture: amd64
2693Description: deinstalled
2694
2695Package: purge-pkg
2696Status: purge ok not-installed
2697Version: 4.0
2698Architecture: amd64
2699Description: purged";
2700
2701 let packages = parse_dpkg_status(content);
2702 assert_eq!(packages.len(), 1);
2703 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2704 }
2705
2706 #[test]
2707 fn test_parse_dpkg_status_empty() {
2708 let packages = parse_dpkg_status("");
2709 assert!(packages.is_empty());
2710 }
2711
2712 #[test]
2715 fn test_debian_control_is_match() {
2716 assert!(DebianControlParser::is_match(Path::new(
2717 "/path/to/debian/control"
2718 )));
2719 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2720 assert!(!DebianControlParser::is_match(Path::new(
2721 "/path/to/control"
2722 )));
2723 assert!(!DebianControlParser::is_match(Path::new(
2724 "/path/to/debian/changelog"
2725 )));
2726 }
2727
2728 #[test]
2729 fn test_debian_installed_is_match() {
2730 assert!(DebianInstalledParser::is_match(Path::new(
2731 "/var/lib/dpkg/status"
2732 )));
2733 assert!(DebianInstalledParser::is_match(Path::new(
2734 "some/root/var/lib/dpkg/status"
2735 )));
2736 assert!(!DebianInstalledParser::is_match(Path::new(
2737 "/var/lib/dpkg/status.d/something"
2738 )));
2739 assert!(!DebianInstalledParser::is_match(Path::new(
2740 "/var/lib/dpkg/available"
2741 )));
2742 }
2743
2744 #[test]
2747 fn test_parse_debian_control_empty_input() {
2748 let packages = parse_debian_control("");
2749 assert!(packages.is_empty());
2750 }
2751
2752 #[test]
2753 fn test_parse_debian_control_malformed_input() {
2754 let content = "this is not a valid control file\nwith random text";
2755 let packages = parse_debian_control(content);
2756 assert!(packages.is_empty());
2758 }
2759
2760 #[test]
2761 fn test_dependency_with_epoch_version() {
2762 let deps = parse_dependency_field(
2764 "zlib1g (>= 1:1.2.11)",
2765 "depends",
2766 true,
2767 false,
2768 Some("debian"),
2769 );
2770 assert_eq!(deps.len(), 1);
2771 assert_eq!(
2772 deps[0].extracted_requirement,
2773 Some(">= 1:1.2.11".to_string())
2774 );
2775 }
2776
2777 #[test]
2778 fn test_dependency_with_plus_in_name() {
2779 let deps =
2780 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2781 assert_eq!(deps.len(), 1);
2782 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2783 }
2784
2785 #[test]
2786 fn test_dsc_parser_is_match() {
2787 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2788 assert!(DebianDscParser::is_match(&PathBuf::from(
2789 "adduser_3.118+deb11u1.dsc"
2790 )));
2791 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2792 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2793 }
2794
2795 #[test]
2796 fn test_dsc_parser_adduser() {
2797 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2798 let package = DebianDscParser::extract_first_package(&path);
2799
2800 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2801 assert_eq!(package.namespace, Some("debian".to_string()));
2802 assert_eq!(package.name, Some("adduser".to_string()));
2803 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2804 assert_eq!(
2805 package.purl,
2806 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2807 );
2808 assert_eq!(
2809 package.vcs_url,
2810 Some("https://salsa.debian.org/debian/adduser.git".to_string())
2811 );
2812 assert_eq!(
2813 package.code_view_url,
2814 Some("https://salsa.debian.org/debian/adduser".to_string())
2815 );
2816 assert_eq!(
2817 package.datasource_id,
2818 Some(DatasourceId::DebianSourceControlDsc)
2819 );
2820
2821 assert_eq!(package.parties.len(), 2);
2822 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2823 assert_eq!(
2824 package.parties[0].name,
2825 Some("Debian Adduser Developers".to_string())
2826 );
2827 assert_eq!(
2828 package.parties[0].email,
2829 Some("adduser@packages.debian.org".to_string())
2830 );
2831 assert_eq!(package.parties[0].r#type, None);
2832
2833 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2834 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2835 assert_eq!(
2836 package.parties[1].email,
2837 Some("mh+debian-packages@zugschlus.de".to_string())
2838 );
2839 assert_eq!(package.parties[1].r#type, None);
2840
2841 assert_eq!(package.source_packages.len(), 1);
2842 assert_eq!(
2843 package.source_packages[0],
2844 "pkg:deb/debian/adduser".to_string()
2845 );
2846
2847 assert!(!package.dependencies.is_empty());
2848 let build_dep_names: Vec<String> = package
2849 .dependencies
2850 .iter()
2851 .filter_map(|d| d.purl.as_ref())
2852 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2853 .map(|p| p.to_string())
2854 .collect();
2855 assert!(build_dep_names.len() >= 2);
2856 }
2857
2858 #[test]
2859 fn test_dsc_parser_zsh() {
2860 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2861 let package = DebianDscParser::extract_first_package(&path);
2862
2863 assert_eq!(package.name, Some("zsh".to_string()));
2864 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2865 assert_eq!(package.namespace, Some("debian".to_string()));
2866 assert!(package.purl.is_some());
2867 assert!(package.purl.as_ref().unwrap().contains("zsh"));
2868 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2869 }
2870
2871 #[test]
2872 fn test_parse_dsc_content_basic() {
2873 let content = "Format: 3.0 (native)
2874Source: testpkg
2875Binary: testpkg
2876Architecture: amd64
2877Version: 1.0.0
2878Maintainer: Test User <test@example.com>
2879Standards-Version: 4.5.0
2880Build-Depends: debhelper (>= 12)
2881Files:
2882 abc123 1024 testpkg_1.0.0.tar.xz
2883";
2884
2885 let package = parse_dsc_content(content);
2886 assert_eq!(package.name, Some("testpkg".to_string()));
2887 assert_eq!(package.version, Some("1.0.0".to_string()));
2888 assert_eq!(package.namespace, Some("debian".to_string()));
2889 assert_eq!(package.parties.len(), 1);
2890 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2891 assert_eq!(
2892 package.parties[0].email,
2893 Some("test@example.com".to_string())
2894 );
2895 assert_eq!(package.dependencies.len(), 1);
2896 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2897 }
2898
2899 #[test]
2900 fn test_parse_dsc_content_with_uploaders() {
2901 let content = "Source: mypkg
2902Version: 2.0
2903Architecture: all
2904Maintainer: Main Dev <main@example.com>
2905Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2906";
2907
2908 let package = parse_dsc_content(content);
2909 assert_eq!(package.parties.len(), 3);
2910 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2911 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2912 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2913 }
2914
2915 #[test]
2916 fn test_orig_tar_parser_is_match() {
2917 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2918 "package_1.0.orig.tar.gz"
2919 )));
2920 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2921 "abseil_0~20200923.3.orig.tar.xz"
2922 )));
2923 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2924 "package.debian.tar.gz"
2925 )));
2926 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2927 }
2928
2929 #[test]
2930 fn test_debian_tar_parser_is_match() {
2931 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2932 "package_1.0-1.debian.tar.xz"
2933 )));
2934 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2935 "abseil_20220623.1-1.debian.tar.gz"
2936 )));
2937 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2938 "package.orig.tar.gz"
2939 )));
2940 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2941 }
2942
2943 #[test]
2944 fn test_parse_orig_tar_filename() {
2945 let pkg = parse_source_tarball_filename(
2946 "abseil_0~20200923.3.orig.tar.gz",
2947 DatasourceId::DebianOriginalSourceTarball,
2948 );
2949 assert_eq!(pkg.name, Some("abseil".to_string()));
2950 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2951 assert_eq!(pkg.namespace, Some("debian".to_string()));
2952 assert_eq!(
2953 pkg.purl,
2954 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2955 );
2956 assert_eq!(
2957 pkg.datasource_id,
2958 Some(DatasourceId::DebianOriginalSourceTarball)
2959 );
2960 }
2961
2962 #[test]
2963 fn test_parse_debian_tar_filename() {
2964 let pkg = parse_source_tarball_filename(
2965 "abseil_20220623.1-1.debian.tar.xz",
2966 DatasourceId::DebianSourceMetadataTarball,
2967 );
2968 assert_eq!(pkg.name, Some("abseil".to_string()));
2969 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2970 assert_eq!(pkg.namespace, Some("debian".to_string()));
2971 assert_eq!(
2972 pkg.purl,
2973 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2974 );
2975 }
2976
2977 #[test]
2978 fn test_parse_deb_filename() {
2979 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2980 assert_eq!(pkg.name, Some("nginx".to_string()));
2981 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
2982
2983 let pkg = parse_deb_filename("invalid.deb");
2984 assert!(pkg.name.is_none());
2985 assert!(pkg.version.is_none());
2986 }
2987
2988 #[test]
2989 fn test_parse_source_tarball_various_compressions() {
2990 let pkg_gz = parse_source_tarball_filename(
2991 "test_1.0.orig.tar.gz",
2992 DatasourceId::DebianOriginalSourceTarball,
2993 );
2994 let pkg_xz = parse_source_tarball_filename(
2995 "test_1.0.orig.tar.xz",
2996 DatasourceId::DebianOriginalSourceTarball,
2997 );
2998 let pkg_bz2 = parse_source_tarball_filename(
2999 "test_1.0.orig.tar.bz2",
3000 DatasourceId::DebianOriginalSourceTarball,
3001 );
3002
3003 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3004 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3005 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3006 }
3007
3008 #[test]
3009 fn test_parse_source_tarball_invalid_format() {
3010 let pkg = parse_source_tarball_filename(
3011 "invalid-no-underscore.tar.gz",
3012 DatasourceId::DebianOriginalSourceTarball,
3013 );
3014 assert!(pkg.name.is_none());
3015 assert!(pkg.version.is_none());
3016 }
3017
3018 #[test]
3019 fn test_list_parser_is_match() {
3020 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3021 "/var/lib/dpkg/info/bash.list"
3022 )));
3023 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3024 "/var/lib/dpkg/info/package:amd64.list"
3025 )));
3026 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3027 "bash.list"
3028 )));
3029 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3030 "/var/lib/dpkg/info/bash.md5sums"
3031 )));
3032 }
3033
3034 #[test]
3035 fn test_md5sums_parser_is_match() {
3036 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3037 "/var/lib/dpkg/info/bash.md5sums"
3038 )));
3039 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3040 "/var/lib/dpkg/info/package:amd64.md5sums"
3041 )));
3042 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3043 "bash.md5sums"
3044 )));
3045 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3046 "/var/lib/dpkg/info/bash.list"
3047 )));
3048 }
3049
3050 #[test]
3051 fn test_parse_debian_file_list_plain_list() {
3052 let content = "/.
3053/bin
3054/bin/bash
3055/usr/bin/bashbug
3056/usr/share/doc/bash/README
3057";
3058 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3059 assert_eq!(pkg.name, Some("bash".to_string()));
3060 assert_eq!(pkg.file_references.len(), 3);
3061 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3062 assert_eq!(pkg.file_references[0].md5, None);
3063 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3064 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3065 }
3066
3067 #[test]
3068 fn test_parse_debian_file_list_md5sums() {
3069 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
30701c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3071f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3072";
3073 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3074 assert_eq!(pkg.name, Some("bash".to_string()));
3075 assert_eq!(pkg.file_references.len(), 3);
3076 assert_eq!(pkg.file_references[0].path, "bin/bash");
3077 assert_eq!(
3078 pkg.file_references[0].md5,
3079 Some("77506afebd3b7e19e937a678a185b62e".to_string())
3080 );
3081 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3082 assert_eq!(
3083 pkg.file_references[1].md5,
3084 Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3085 );
3086 }
3087
3088 #[test]
3089 fn test_parse_debian_file_list_with_arch() {
3090 let content = "/usr/bin/foo
3091/usr/lib/x86_64-linux-gnu/libfoo.so
3092";
3093 let pkg = parse_debian_file_list(
3094 content,
3095 "libfoo:amd64",
3096 DatasourceId::DebianInstalledFilesList,
3097 );
3098 assert_eq!(pkg.name, Some("libfoo".to_string()));
3099 assert!(pkg.purl.is_some());
3100 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3101 assert_eq!(pkg.file_references.len(), 2);
3102 }
3103
3104 #[test]
3105 fn test_parse_debian_file_list_skips_comments_and_empty() {
3106 let content = "# This is a comment
3107/bin/bash
3108
3109/usr/bin/bashbug
3110
3111";
3112 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3113 assert_eq!(pkg.file_references.len(), 2);
3114 }
3115
3116 #[test]
3117 fn test_parse_debian_file_list_md5sums_only() {
3118 let content = "abc123 usr/bin/tool
3119";
3120 let pkg =
3121 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3122 assert_eq!(pkg.name, None);
3123 assert_eq!(pkg.file_references.len(), 1);
3124 }
3125
3126 #[test]
3127 fn test_parse_debian_file_list_ignores_root_dirs() {
3128 let content = "/.
3129/bin
3130/bin/bash
3131/etc
3132/usr
3133/var
3134";
3135 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3136 assert_eq!(pkg.file_references.len(), 1);
3137 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3138 }
3139
3140 #[test]
3141 fn test_copyright_parser_is_match() {
3142 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3143 "/usr/share/doc/bash/copyright"
3144 )));
3145 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3146 "debian/copyright"
3147 )));
3148 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3149 "copyright.txt"
3150 )));
3151 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3152 "/etc/copyright"
3153 )));
3154 }
3155
3156 #[test]
3157 fn test_extract_package_name_from_path() {
3158 assert_eq!(
3159 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3160 Some("bash".to_string())
3161 );
3162 assert_eq!(
3163 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3164 Some("libseccomp2".to_string())
3165 );
3166 assert_eq!(
3167 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3168 None
3169 );
3170 }
3171
3172 #[test]
3173 fn test_parse_copyright_dep5_format() {
3174 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3175Upstream-Name: libseccomp
3176Source: https://sourceforge.net/projects/libseccomp/
3177
3178Files: *
3179Copyright: 2012 Paul Moore <pmoore@redhat.com>
3180 2012 Ashley Lai <adlai@us.ibm.com>
3181License: LGPL-2.1
3182
3183License: LGPL-2.1
3184 This library is free software
3185";
3186 let pkg = parse_copyright_file(content, Some("libseccomp"));
3187 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3188 assert_eq!(pkg.namespace, Some("debian".to_string()));
3189 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3190 assert_eq!(
3191 pkg.extracted_license_statement,
3192 Some("LGPL-2.1".to_string())
3193 );
3194 assert!(pkg.parties.len() >= 2);
3195 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3196 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3197 }
3198
3199 #[test]
3200 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3201 let path = PathBuf::from(
3202 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3203 );
3204 let pkg = DebianCopyrightParser::extract_first_package(&path);
3205
3206 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3207 let extracted = pkg
3208 .extracted_license_statement
3209 .as_deref()
3210 .expect("license statement should exist");
3211 assert!(extracted.contains("GPL-2+"));
3212 assert!(!pkg.license_detections.is_empty());
3213
3214 let primary = &pkg.license_detections[0];
3215 assert_eq!(
3216 primary.matches[0].matched_text.as_deref(),
3217 Some("License: GPL-2+")
3218 );
3219 assert_eq!(primary.matches[0].start_line, 47);
3220 assert_eq!(primary.matches[0].end_line, 47);
3221 }
3222
3223 #[test]
3224 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3225 let path = PathBuf::from("testdata/debian/copyright/copyright");
3226 let pkg = DebianCopyrightParser::extract_first_package(&path);
3227
3228 assert_eq!(pkg.license_detections.len(), 1);
3229 assert_eq!(pkg.other_license_detections.len(), 4);
3230
3231 let primary = &pkg.license_detections[0];
3232 assert_eq!(
3233 primary.matches[0].matched_text.as_deref(),
3234 Some("License: LGPL-2.1")
3235 );
3236 assert_eq!(primary.matches[0].start_line, 11);
3237
3238 let ordered_lines: Vec<usize> = pkg
3239 .other_license_detections
3240 .iter()
3241 .map(|detection| detection.matches[0].start_line)
3242 .collect();
3243 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3244
3245 let ordered_texts: Vec<&str> = pkg
3246 .other_license_detections
3247 .iter()
3248 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3249 .collect();
3250 assert_eq!(
3251 ordered_texts,
3252 vec![
3253 "License: LGPL-2.1",
3254 "License: LGPL-2.1",
3255 "License: LGPL-2.1",
3256 "License: LGPL-2.1",
3257 ]
3258 );
3259 }
3260
3261 #[test]
3262 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3263 let path = PathBuf::from(
3264 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3265 );
3266 let pkg = DebianCopyrightParser::extract_first_package(&path);
3267
3268 let zlib = pkg
3269 .other_license_detections
3270 .iter()
3271 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3272 .expect("at least one Zlib license paragraph should be detected");
3273 assert_eq!(
3274 zlib.matches[0].matched_text.as_deref(),
3275 Some("License: Zlib")
3276 );
3277
3278 let last_zlib = pkg
3279 .other_license_detections
3280 .iter()
3281 .rev()
3282 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3283 .expect("bottom standalone Zlib license paragraph should be detected");
3284 assert_eq!(last_zlib.matches[0].start_line, 732);
3285 assert_eq!(last_zlib.matches[0].end_line, 732);
3286 }
3287
3288 #[test]
3289 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3290 let path =
3291 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3292 let pkg = DebianCopyrightParser::extract_first_package(&path);
3293
3294 assert_eq!(pkg.license_detections.len(), 1);
3295 let primary = &pkg.license_detections[0];
3296 assert_eq!(
3297 primary.matches[0].matched_text.as_deref(),
3298 Some("License: LGPL-3+ or GPL-2+")
3299 );
3300 assert_eq!(primary.matches[0].start_line, 8);
3301 assert_eq!(primary.matches[0].end_line, 8);
3302
3303 assert!(pkg.other_license_detections.iter().any(|detection| {
3304 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3305 }));
3306 }
3307
3308 #[test]
3309 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3310 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3311 let pkg = parse_copyright_file(content, Some("foo"));
3312
3313 assert_eq!(pkg.license_detections.len(), 1);
3314 let primary = &pkg.license_detections[0];
3315 assert_eq!(
3316 primary.matches[0].matched_text.as_deref(),
3317 Some("License: GPL-2+")
3318 );
3319 assert_eq!(primary.matches[0].start_line, 7);
3320 }
3321
3322 #[test]
3323 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3324 let raw_lines = vec![
3325 "Files: *".to_string(),
3326 "Copyright: 2024 Example Org".to_string(),
3327 "License: Apache-2.0".to_string(),
3328 " Licensed under the Apache License, Version 2.0.".to_string(),
3329 ];
3330
3331 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3332 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3333 .into_iter()
3334 .next()
3335 .expect("reference RFC822 paragraph should parse");
3336
3337 assert_eq!(paragraph.metadata.headers, expected.headers);
3338 assert_eq!(paragraph.metadata.body, expected.body);
3339 assert_eq!(
3340 paragraph.license_header_line,
3341 Some(("License: Apache-2.0".to_string(), 12))
3342 );
3343 }
3344
3345 #[test]
3346 fn test_parse_copyright_unstructured() {
3347 let content = "This package was debianized by John Doe.
3348
3349Upstream Authors:
3350 Jane Smith
3351
3352Copyright:
3353 2009 10gen
3354
3355License:
3356 SSPL
3357";
3358 let pkg = parse_copyright_file(content, Some("mongodb"));
3359 assert_eq!(pkg.name, Some("mongodb".to_string()));
3360 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3361 assert!(!pkg.parties.is_empty());
3362 }
3363
3364 #[test]
3365 fn test_parse_copyright_holders() {
3366 let text = "2012 Paul Moore <pmoore@redhat.com>
33672012 Ashley Lai <adlai@us.ibm.com>
3368Copyright (C) 2015-2018 Example Corp";
3369 let holders = parse_copyright_holders(text);
3370 assert!(holders.len() >= 3);
3371 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3372 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3373 }
3374
3375 #[test]
3376 fn test_parse_copyright_empty() {
3377 let content = "This is just some text without proper copyright info.";
3378 let pkg = parse_copyright_file(content, Some("test"));
3379 assert_eq!(pkg.name, Some("test".to_string()));
3380 assert!(pkg.parties.is_empty());
3381 assert!(pkg.extracted_license_statement.is_none());
3382 }
3383
3384 #[test]
3385 fn test_deb_parser_is_match() {
3386 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3387 assert!(DebianDebParser::is_match(&PathBuf::from(
3388 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3389 )));
3390 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3391 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3392 }
3393
3394 #[test]
3395 fn test_parse_deb_filename_with_arch() {
3396 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3397 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3398 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3399 assert_eq!(pkg.namespace, Some("debian".to_string()));
3400 assert_eq!(
3401 pkg.purl,
3402 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3403 );
3404 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3405 }
3406
3407 #[test]
3408 fn test_parse_deb_filename_without_arch() {
3409 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3410 assert_eq!(pkg.name, Some("package".to_string()));
3411 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3412 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3413 }
3414
3415 #[test]
3416 fn test_extract_deb_archive() {
3417 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3418 if !test_path.exists() {
3419 return;
3420 }
3421
3422 let pkg = DebianDebParser::extract_first_package(&test_path);
3423
3424 assert_eq!(pkg.name, Some("adduser".to_string()));
3425 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3426 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3427 assert!(pkg.description.is_some());
3428 assert!(!pkg.parties.is_empty());
3429
3430 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3431 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3432 }
3433
3434 #[test]
3435 fn test_extract_deb_archive_with_control_tar_xz() {
3436 let deb = create_synthetic_deb_with_control_tar_xz();
3437
3438 let pkg = DebianDebParser::extract_first_package(deb.path());
3439
3440 assert_eq!(pkg.name, Some("synthetic".to_string()));
3441 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3442 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3443 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3444 }
3445
3446 #[test]
3447 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3448 let deb = create_synthetic_deb_with_copyright();
3449
3450 let pkg = DebianDebParser::extract_first_package(deb.path());
3451
3452 assert_eq!(pkg.name, Some("synthetic".to_string()));
3453 assert_eq!(
3454 pkg.extracted_license_statement,
3455 Some("Apache-2.0".to_string())
3456 );
3457 assert!(pkg.parties.iter().any(|party| {
3458 party.role.as_deref() == Some("copyright-holder")
3459 && party.name.as_deref() == Some("Example Org")
3460 }));
3461 }
3462
3463 #[test]
3464 fn test_parse_deb_filename_simple() {
3465 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3466 assert_eq!(pkg.name, Some("adduser".to_string()));
3467 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3468 assert_eq!(pkg.namespace, Some("debian".to_string()));
3469 }
3470
3471 #[test]
3472 fn test_parse_deb_filename_invalid() {
3473 let pkg = parse_deb_filename("invalid.deb");
3474 assert!(pkg.name.is_none());
3475 assert!(pkg.version.is_none());
3476 }
3477
3478 #[test]
3479 fn test_distroless_parser() {
3480 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3481
3482 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3483
3484 if !test_file.exists() {
3485 eprintln!("Warning: Test file not found, skipping test");
3486 return;
3487 }
3488
3489 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3490
3491 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3492 assert_eq!(
3493 pkg.datasource_id,
3494 Some(DatasourceId::DebianDistrolessInstalledDb)
3495 );
3496 assert_eq!(pkg.name, Some("base-files".to_string()));
3497 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3498 assert_eq!(pkg.namespace, Some("debian".to_string()));
3499 assert!(pkg.purl.is_some());
3500 assert!(
3501 pkg.purl
3502 .as_ref()
3503 .unwrap()
3504 .contains("pkg:deb/debian/base-files")
3505 );
3506 }
3507}