1use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41 DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
42};
43use crate::parsers::rfc822::{self, Rfc822Metadata};
44use crate::parsers::utils::{read_file_to_string, split_name_email};
45use crate::utils::spdx::combine_license_expressions;
46
47use super::PackageParser;
48use super::license_normalization::{
49 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
50 normalize_declared_license_key,
51};
52
53const PACKAGE_TYPE: PackageType = PackageType::Deb;
54
55fn default_package_data(datasource_id: DatasourceId) -> PackageData {
56 PackageData {
57 package_type: Some(PACKAGE_TYPE),
58 datasource_id: Some(datasource_id),
59 ..Default::default()
60 }
61}
62
63const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
65const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
66
67const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
69 "packages.debian.org",
70 "lists.debian.org",
71 "lists.alioth.debian.org",
72 "@debian.org",
73 "debian-init-diversity@",
74];
75const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
76
77struct DepFieldSpec {
79 field: &'static str,
80 scope: &'static str,
81 is_runtime: bool,
82 is_optional: bool,
83}
84
85const DEP_FIELDS: &[DepFieldSpec] = &[
86 DepFieldSpec {
87 field: "depends",
88 scope: "depends",
89 is_runtime: true,
90 is_optional: false,
91 },
92 DepFieldSpec {
93 field: "pre-depends",
94 scope: "pre-depends",
95 is_runtime: true,
96 is_optional: false,
97 },
98 DepFieldSpec {
99 field: "recommends",
100 scope: "recommends",
101 is_runtime: true,
102 is_optional: true,
103 },
104 DepFieldSpec {
105 field: "suggests",
106 scope: "suggests",
107 is_runtime: true,
108 is_optional: true,
109 },
110 DepFieldSpec {
111 field: "breaks",
112 scope: "breaks",
113 is_runtime: false,
114 is_optional: false,
115 },
116 DepFieldSpec {
117 field: "conflicts",
118 scope: "conflicts",
119 is_runtime: false,
120 is_optional: false,
121 },
122 DepFieldSpec {
123 field: "replaces",
124 scope: "replaces",
125 is_runtime: false,
126 is_optional: false,
127 },
128 DepFieldSpec {
129 field: "provides",
130 scope: "provides",
131 is_runtime: false,
132 is_optional: false,
133 },
134 DepFieldSpec {
135 field: "build-depends",
136 scope: "build-depends",
137 is_runtime: false,
138 is_optional: false,
139 },
140 DepFieldSpec {
141 field: "build-depends-indep",
142 scope: "build-depends-indep",
143 is_runtime: false,
144 is_optional: false,
145 },
146 DepFieldSpec {
147 field: "build-conflicts",
148 scope: "build-conflicts",
149 is_runtime: false,
150 is_optional: false,
151 },
152];
153
154pub struct DebianControlParser;
159
160impl PackageParser for DebianControlParser {
161 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
162
163 fn is_match(path: &Path) -> bool {
164 if let Some(name) = path.file_name()
165 && name == "control"
166 && let Some(parent) = path.parent()
167 && let Some(parent_name) = parent.file_name()
168 {
169 return parent_name == "debian";
170 }
171 false
172 }
173
174 fn extract_packages(path: &Path) -> Vec<PackageData> {
175 let content = match read_file_to_string(path) {
176 Ok(c) => c,
177 Err(e) => {
178 warn!("Failed to read debian/control at {:?}: {}", path, e);
179 return vec![default_package_data(DatasourceId::DebianControlInSource)];
180 }
181 };
182
183 let packages = parse_debian_control(&content);
184 if packages.is_empty() {
185 vec![default_package_data(DatasourceId::DebianControlInSource)]
186 } else {
187 packages
188 }
189 }
190}
191
192pub struct DebianInstalledParser;
197
198impl PackageParser for DebianInstalledParser {
199 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
200
201 fn is_match(path: &Path) -> bool {
202 let path_str = path.to_string_lossy();
203 path_str.ends_with("var/lib/dpkg/status")
204 }
205
206 fn extract_packages(path: &Path) -> Vec<PackageData> {
207 let content = match read_file_to_string(path) {
208 Ok(c) => c,
209 Err(e) => {
210 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
211 return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
212 }
213 };
214
215 let packages = parse_dpkg_status(&content);
216 if packages.is_empty() {
217 vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
218 } else {
219 packages
220 }
221 }
222}
223
224pub struct DebianDistrolessInstalledParser;
225
226impl PackageParser for DebianDistrolessInstalledParser {
227 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
228
229 fn is_match(path: &Path) -> bool {
230 let path_str = path.to_string_lossy();
231 path_str.contains("var/lib/dpkg/status.d/")
232 }
233
234 fn extract_packages(path: &Path) -> Vec<PackageData> {
235 let content = match read_file_to_string(path) {
236 Ok(c) => c,
237 Err(e) => {
238 warn!("Failed to read distroless status file at {:?}: {}", path, e);
239 return vec![default_package_data(
240 DatasourceId::DebianDistrolessInstalledDb,
241 )];
242 }
243 };
244
245 vec![parse_distroless_status(&content)]
246 }
247}
248
249fn parse_distroless_status(content: &str) -> PackageData {
250 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
251
252 if paragraphs.is_empty() {
253 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
254 }
255
256 build_package_from_paragraph(
257 ¶graphs[0],
258 None,
259 DatasourceId::DebianDistrolessInstalledDb,
260 )
261 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
262}
263
264fn parse_debian_control(content: &str) -> Vec<PackageData> {
274 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
275 if paragraphs.is_empty() {
276 return Vec::new();
277 }
278
279 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
281
282 let (source_paragraph, binary_start) = if has_source {
283 (Some(¶graphs[0]), 1)
284 } else {
285 (None, 0)
286 };
287
288 let source_meta = source_paragraph.map(extract_source_meta);
290
291 let mut packages = Vec::new();
292
293 for para in ¶graphs[binary_start..] {
294 if let Some(pkg) = build_package_from_paragraph(
295 para,
296 source_meta.as_ref(),
297 DatasourceId::DebianControlInSource,
298 ) {
299 packages.push(pkg);
300 }
301 }
302
303 if packages.is_empty()
304 && let Some(source_para) = source_paragraph
305 && let Some(pkg) = build_package_from_source_paragraph(source_para)
306 {
307 packages.push(pkg);
308 }
309
310 packages
311}
312
313fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
318 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
319 let mut packages = Vec::new();
320
321 for para in ¶graphs {
322 let status = rfc822::get_header_first(¶.headers, "status");
323 if status.as_deref() != Some("install ok installed") {
324 continue;
325 }
326
327 if let Some(pkg) =
328 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
329 {
330 packages.push(pkg);
331 }
332 }
333
334 packages
335}
336
337struct SourceMeta {
342 parties: Vec<Party>,
343 homepage_url: Option<String>,
344 vcs_url: Option<String>,
345 code_view_url: Option<String>,
346 bug_tracking_url: Option<String>,
347}
348
349fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
350 let mut parties = Vec::new();
351
352 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
354 let (name, email) = split_name_email(&maintainer);
355 parties.push(Party {
356 r#type: Some("person".to_string()),
357 role: Some("maintainer".to_string()),
358 name,
359 email,
360 url: None,
361 organization: None,
362 organization_url: None,
363 timezone: None,
364 });
365 }
366
367 if let Some(orig_maintainer) =
369 rfc822::get_header_first(¶graph.headers, "original-maintainer")
370 {
371 let (name, email) = split_name_email(&orig_maintainer);
372 parties.push(Party {
373 r#type: Some("person".to_string()),
374 role: Some("maintainer".to_string()),
375 name,
376 email,
377 url: None,
378 organization: None,
379 organization_url: None,
380 timezone: None,
381 });
382 }
383
384 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
386 for uploader in uploaders_str.split(',') {
387 let trimmed = uploader.trim();
388 if !trimmed.is_empty() {
389 let (name, email) = split_name_email(trimmed);
390 parties.push(Party {
391 r#type: Some("person".to_string()),
392 role: Some("uploader".to_string()),
393 name,
394 email,
395 url: None,
396 organization: None,
397 organization_url: None,
398 timezone: None,
399 });
400 }
401 }
402 }
403
404 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage");
405
406 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
408 .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
409
410 let code_view_url = rfc822::get_header_first(¶graph.headers, "vcs-browser");
411
412 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs");
413
414 SourceMeta {
415 parties,
416 homepage_url,
417 vcs_url,
418 code_view_url,
419 bug_tracking_url,
420 }
421}
422
423fn build_package_from_paragraph(
428 paragraph: &Rfc822Metadata,
429 source_meta: Option<&SourceMeta>,
430 datasource_id: DatasourceId,
431) -> Option<PackageData> {
432 let name = rfc822::get_header_first(¶graph.headers, "package")?;
433 let version = rfc822::get_header_first(¶graph.headers, "version");
434 let architecture = rfc822::get_header_first(¶graph.headers, "architecture");
435 let description = rfc822::get_header_first(¶graph.headers, "description");
436 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
437 let homepage = rfc822::get_header_first(¶graph.headers, "homepage");
438 let source_field = rfc822::get_header_first(¶graph.headers, "source");
439 let section = rfc822::get_header_first(¶graph.headers, "section");
440 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
441 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
442
443 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
444
445 let parties = if let Some(meta) = source_meta {
447 meta.parties.clone()
448 } else {
449 let mut p = Vec::new();
450 if let Some(m) = &maintainer_str {
451 let (n, e) = split_name_email(m);
452 p.push(Party {
453 r#type: Some("person".to_string()),
454 role: Some("maintainer".to_string()),
455 name: n,
456 email: e,
457 url: None,
458 organization: None,
459 organization_url: None,
460 timezone: None,
461 });
462 }
463 p
464 };
465
466 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
468 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
469 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
470 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
471
472 let purl = build_debian_purl(
474 &name,
475 version.as_deref(),
476 namespace.as_deref(),
477 architecture.as_deref(),
478 );
479
480 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
482
483 let keywords = section.into_iter().collect();
485
486 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
488
489 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
491 if let Some(ma) = &multi_arch
492 && !ma.is_empty()
493 {
494 extra_data.insert(
495 "multi_arch".to_string(),
496 serde_json::Value::String(ma.clone()),
497 );
498 }
499 if let Some(size_str) = &installed_size
500 && let Ok(size) = size_str.parse::<u64>()
501 {
502 extra_data.insert(
503 "installed_size".to_string(),
504 serde_json::Value::Number(serde_json::Number::from(size)),
505 );
506 }
507
508 let qualifiers = architecture.as_ref().map(|arch| {
510 let mut q = HashMap::new();
511 q.insert("arch".to_string(), arch.clone());
512 q
513 });
514
515 Some(PackageData {
516 package_type: Some(PACKAGE_TYPE),
517 namespace: namespace.clone(),
518 name: Some(name),
519 version,
520 qualifiers,
521 subpath: None,
522 primary_language: None,
523 description,
524 release_date: None,
525 parties,
526 keywords,
527 homepage_url,
528 download_url: None,
529 size: None,
530 sha1: None,
531 md5: None,
532 sha256: None,
533 sha512: None,
534 bug_tracking_url,
535 code_view_url,
536 vcs_url,
537 copyright: None,
538 holder: None,
539 declared_license_expression: None,
540 declared_license_expression_spdx: None,
541 license_detections: Vec::new(),
542 other_license_expression: None,
543 other_license_expression_spdx: None,
544 other_license_detections: Vec::new(),
545 extracted_license_statement: None,
546 notice_text: None,
547 source_packages,
548 file_references: Vec::new(),
549 is_private: false,
550 is_virtual: false,
551 extra_data: if extra_data.is_empty() {
552 None
553 } else {
554 Some(extra_data)
555 },
556 dependencies,
557 repository_homepage_url: None,
558 repository_download_url: None,
559 api_data_url: None,
560 datasource_id: Some(datasource_id),
561 purl,
562 })
563}
564
565fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
566 let name = rfc822::get_header_first(¶graph.headers, "source")?;
567 let version = rfc822::get_header_first(¶graph.headers, "version");
568 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
569
570 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
571 let source_meta = extract_source_meta(paragraph);
572
573 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
574 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
575
576 let section = rfc822::get_header_first(¶graph.headers, "section");
577 let keywords = section.into_iter().collect();
578
579 Some(PackageData {
580 package_type: Some(PACKAGE_TYPE),
581 namespace: namespace.clone(),
582 name: Some(name),
583 version,
584 qualifiers: None,
585 subpath: None,
586 primary_language: None,
587 description: None,
588 release_date: None,
589 parties: source_meta.parties,
590 keywords,
591 homepage_url: source_meta.homepage_url,
592 download_url: None,
593 size: None,
594 sha1: None,
595 md5: None,
596 sha256: None,
597 sha512: None,
598 bug_tracking_url: source_meta.bug_tracking_url,
599 code_view_url: source_meta.code_view_url,
600 vcs_url: source_meta.vcs_url,
601 copyright: None,
602 holder: None,
603 declared_license_expression: None,
604 declared_license_expression_spdx: None,
605 license_detections: Vec::new(),
606 other_license_expression: None,
607 other_license_expression_spdx: None,
608 other_license_detections: Vec::new(),
609 extracted_license_statement: None,
610 notice_text: None,
611 source_packages: Vec::new(),
612 file_references: Vec::new(),
613 is_private: false,
614 is_virtual: false,
615 extra_data: None,
616 dependencies,
617 repository_homepage_url: None,
618 repository_download_url: None,
619 api_data_url: None,
620 datasource_id: Some(DatasourceId::DebianControlInSource),
621 purl,
622 })
623}
624
625fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
630 if let Some(ver) = version {
632 let ver_lower = ver.to_lowercase();
633 for clue in VERSION_CLUES_UBUNTU {
634 if ver_lower.contains(clue) {
635 return Some("ubuntu".to_string());
636 }
637 }
638 for clue in VERSION_CLUES_DEBIAN {
639 if ver_lower.contains(clue) {
640 return Some("debian".to_string());
641 }
642 }
643 }
644
645 if let Some(maint) = maintainer {
647 let maint_lower = maint.to_lowercase();
648 for clue in MAINTAINER_CLUES_UBUNTU {
649 if maint_lower.contains(clue) {
650 return Some("ubuntu".to_string());
651 }
652 }
653 for clue in MAINTAINER_CLUES_DEBIAN {
654 if maint_lower.contains(clue) {
655 return Some("debian".to_string());
656 }
657 }
658 }
659
660 Some("debian".to_string())
662}
663
664fn build_debian_purl(
669 name: &str,
670 version: Option<&str>,
671 namespace: Option<&str>,
672 architecture: Option<&str>,
673) -> Option<String> {
674 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
675
676 if let Some(ns) = namespace {
677 purl.with_namespace(ns).ok()?;
678 }
679
680 if let Some(ver) = version {
681 purl.with_version(ver).ok()?;
682 }
683
684 if let Some(arch) = architecture {
685 purl.add_qualifier("arch", arch).ok()?;
686 }
687
688 Some(purl.to_string())
689}
690
691fn parse_all_dependencies(
696 headers: &HashMap<String, Vec<String>>,
697 namespace: Option<&str>,
698) -> Vec<Dependency> {
699 let mut dependencies = Vec::new();
700
701 for spec in DEP_FIELDS {
702 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
703 dependencies.extend(parse_dependency_field(
704 &dep_str,
705 spec.scope,
706 spec.is_runtime,
707 spec.is_optional,
708 namespace,
709 ));
710 }
711 }
712
713 dependencies
714}
715
716fn parse_dependency_field(
725 dep_str: &str,
726 scope: &str,
727 is_runtime: bool,
728 is_optional: bool,
729 namespace: Option<&str>,
730) -> Vec<Dependency> {
731 let mut deps = Vec::new();
732
733 let dep_re = Regex::new(
736 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
737 )
738 .unwrap();
739
740 for group in dep_str.split(',') {
741 let group = group.trim();
742 if group.is_empty() {
743 continue;
744 }
745
746 let alternatives: Vec<&str> = group.split('|').collect();
748 let has_alternatives = alternatives.len() > 1;
749
750 for alt in alternatives {
751 let alt = alt.trim();
752 if alt.is_empty() {
753 continue;
754 }
755
756 if let Some(caps) = dep_re.captures(alt) {
757 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
758 let operator = caps.get(2).map(|m| m.as_str().trim());
759 let version = caps.get(3).map(|m| m.as_str().trim());
760
761 if pkg_name.is_empty() {
762 continue;
763 }
764
765 if pkg_name.starts_with('$') {
767 continue;
768 }
769
770 let extracted_requirement = match (operator, version) {
771 (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
772 _ => None,
773 };
774
775 let is_pinned = operator.map(|op| op == "=");
776
777 let purl = build_debian_purl(pkg_name, None, namespace, None);
778
779 deps.push(Dependency {
780 purl,
781 extracted_requirement,
782 scope: Some(scope.to_string()),
783 is_runtime: Some(is_runtime),
784 is_optional: Some(is_optional || has_alternatives),
785 is_pinned,
786 is_direct: Some(true),
787 resolved_package: None,
788 extra_data: None,
789 });
790 }
791 }
792 }
793
794 deps
795}
796
797fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
805 let Some(source_str) = source else {
806 return Vec::new();
807 };
808
809 let trimmed = source_str.trim();
810 if trimmed.is_empty() {
811 return Vec::new();
812 }
813
814 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
816 let name = trimmed[..paren_start].trim();
817 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
818 (
819 name,
820 if version.is_empty() {
821 None
822 } else {
823 Some(version)
824 },
825 )
826 } else {
827 (trimmed, None)
828 };
829
830 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
831 vec![purl]
832 } else {
833 Vec::new()
834 }
835}
836
837crate::register_parser!(
842 "Debian source package control file (debian/control)",
843 &["**/debian/control"],
844 "deb",
845 "",
846 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
847);
848
849crate::register_parser!(
850 "Debian installed package database (dpkg status)",
851 &["**/var/lib/dpkg/status"],
852 "deb",
853 "",
854 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
855);
856
857crate::register_parser!(
858 "Debian distroless package database (status.d)",
859 &["**/var/lib/dpkg/status.d/*"],
860 "deb",
861 "",
862 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
863);
864
865pub struct DebianDscParser;
874
875impl PackageParser for DebianDscParser {
876 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
877
878 fn is_match(path: &Path) -> bool {
879 path.extension().and_then(|e| e.to_str()) == Some("dsc")
880 }
881
882 fn extract_packages(path: &Path) -> Vec<PackageData> {
883 let content = match read_file_to_string(path) {
884 Ok(c) => c,
885 Err(e) => {
886 warn!("Failed to read .dsc file {:?}: {}", path, e);
887 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
888 }
889 };
890
891 vec![parse_dsc_content(&content)]
892 }
893}
894
895crate::register_parser!(
896 "Debian source control file (.dsc)",
897 &["**/*.dsc"],
898 "deb",
899 "",
900 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
901);
902
903fn strip_pgp_signature(content: &str) -> String {
904 let mut result = String::new();
905 let mut in_pgp_block = false;
906 let mut in_signature = false;
907
908 for line in content.lines() {
909 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
910 in_pgp_block = true;
911 continue;
912 }
913 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
914 in_signature = true;
915 continue;
916 }
917 if line.starts_with("-----END PGP SIGNATURE-----") {
918 in_signature = false;
919 continue;
920 }
921 if in_pgp_block && line.starts_with("Hash:") {
922 continue;
923 }
924 if in_pgp_block && line.is_empty() && result.is_empty() {
925 in_pgp_block = false;
926 continue;
927 }
928 if !in_signature {
929 result.push_str(line);
930 result.push('\n');
931 }
932 }
933
934 result
935}
936
937fn parse_dsc_content(content: &str) -> PackageData {
938 let clean_content = strip_pgp_signature(content);
939 let metadata = rfc822::parse_rfc822_content(&clean_content);
940 let headers = &metadata.headers;
941
942 let name = rfc822::get_header_first(headers, "source");
943 let version = rfc822::get_header_first(headers, "version");
944 let architecture = rfc822::get_header_first(headers, "architecture");
945 let namespace = Some("debian".to_string());
946
947 let mut package = PackageData {
948 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
949 package_type: Some(PACKAGE_TYPE),
950 namespace: namespace.clone(),
951 name: name.clone(),
952 version: version.clone(),
953 description: rfc822::get_header_first(headers, "description"),
954 homepage_url: rfc822::get_header_first(headers, "homepage"),
955 vcs_url: rfc822::get_header_first(headers, "vcs-git"),
956 code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
957 ..Default::default()
958 };
959
960 if let (Some(n), Some(v)) = (&name, &version) {
962 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
963 }
964
965 if let Some(n) = &name
967 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
968 {
969 package.source_packages.push(source_purl);
970 }
971
972 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
973 let (name_opt, email_opt) = split_name_email(&maintainer);
974 package.parties.push(Party {
975 r#type: None,
976 role: Some("maintainer".to_string()),
977 name: name_opt,
978 email: email_opt,
979 url: None,
980 organization: None,
981 organization_url: None,
982 timezone: None,
983 });
984 }
985
986 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
987 for uploader in uploaders_str.split(',') {
988 let uploader = uploader.trim();
989 if uploader.is_empty() {
990 continue;
991 }
992 let (name_opt, email_opt) = split_name_email(uploader);
993 package.parties.push(Party {
994 r#type: None,
995 role: Some("uploader".to_string()),
996 name: name_opt,
997 email: email_opt,
998 url: None,
999 organization: None,
1000 organization_url: None,
1001 timezone: None,
1002 });
1003 }
1004 }
1005
1006 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1008 package.dependencies.extend(parse_dependency_field(
1009 &build_deps,
1010 "build",
1011 false,
1012 false,
1013 namespace.as_deref(),
1014 ));
1015 }
1016
1017 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1019 let map = package.extra_data.get_or_insert_with(HashMap::new);
1020 map.insert("standards_version".to_string(), standards.into());
1021 }
1022
1023 package
1024}
1025
1026pub struct DebianOrigTarParser;
1028
1029impl PackageParser for DebianOrigTarParser {
1030 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1031
1032 fn is_match(path: &Path) -> bool {
1033 path.file_name()
1034 .and_then(|n| n.to_str())
1035 .map(|name| name.contains(".orig.tar."))
1036 .unwrap_or(false)
1037 }
1038
1039 fn extract_packages(path: &Path) -> Vec<PackageData> {
1040 let filename = match path.file_name().and_then(|n| n.to_str()) {
1041 Some(f) => f,
1042 None => {
1043 return vec![default_package_data(
1044 DatasourceId::DebianOriginalSourceTarball,
1045 )];
1046 }
1047 };
1048
1049 vec![parse_source_tarball_filename(
1050 filename,
1051 DatasourceId::DebianOriginalSourceTarball,
1052 )]
1053 }
1054}
1055
1056crate::register_parser!(
1057 "Debian original source tarball",
1058 &["**/*.orig.tar.*"],
1059 "deb",
1060 "",
1061 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1062);
1063
1064pub struct DebianDebianTarParser;
1066
1067impl PackageParser for DebianDebianTarParser {
1068 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1069
1070 fn is_match(path: &Path) -> bool {
1071 path.file_name()
1072 .and_then(|n| n.to_str())
1073 .map(|name| name.contains(".debian.tar."))
1074 .unwrap_or(false)
1075 }
1076
1077 fn extract_packages(path: &Path) -> Vec<PackageData> {
1078 let filename = match path.file_name().and_then(|n| n.to_str()) {
1079 Some(f) => f,
1080 None => {
1081 return vec![default_package_data(
1082 DatasourceId::DebianSourceMetadataTarball,
1083 )];
1084 }
1085 };
1086
1087 vec![parse_source_tarball_filename(
1088 filename,
1089 DatasourceId::DebianSourceMetadataTarball,
1090 )]
1091 }
1092}
1093
1094crate::register_parser!(
1095 "Debian source metadata tarball",
1096 &["**/*.debian.tar.*"],
1097 "deb",
1098 "",
1099 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1100);
1101
1102fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1103 let without_tar_ext = filename
1104 .trim_end_matches(".gz")
1105 .trim_end_matches(".xz")
1106 .trim_end_matches(".bz2")
1107 .trim_end_matches(".tar");
1108
1109 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1110 if parts.len() < 2 {
1111 return default_package_data(datasource_id);
1112 }
1113
1114 let name = parts[0].to_string();
1115 let version_with_suffix = parts[1];
1116
1117 let version = version_with_suffix
1118 .trim_end_matches(".orig")
1119 .trim_end_matches(".debian")
1120 .to_string();
1121
1122 let namespace = Some("debian".to_string());
1123
1124 PackageData {
1125 datasource_id: Some(datasource_id),
1126 package_type: Some(PACKAGE_TYPE),
1127 namespace: namespace.clone(),
1128 name: Some(name.clone()),
1129 version: Some(version.clone()),
1130 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1131 ..Default::default()
1132 }
1133}
1134
1135pub struct DebianInstalledListParser;
1137
1138impl PackageParser for DebianInstalledListParser {
1139 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1140
1141 fn is_match(path: &Path) -> bool {
1142 path.extension().and_then(|e| e.to_str()) == Some("list")
1143 && path
1144 .to_str()
1145 .map(|p| p.contains("/var/lib/dpkg/info/"))
1146 .unwrap_or(false)
1147 }
1148
1149 fn extract_packages(path: &Path) -> Vec<PackageData> {
1150 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1151 Some(f) => f,
1152 None => {
1153 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1154 }
1155 };
1156
1157 let content = match read_file_to_string(path) {
1158 Ok(c) => c,
1159 Err(e) => {
1160 warn!("Failed to read .list file {:?}: {}", path, e);
1161 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1162 }
1163 };
1164
1165 vec![parse_debian_file_list(
1166 &content,
1167 filename,
1168 DatasourceId::DebianInstalledFilesList,
1169 )]
1170 }
1171}
1172
1173crate::register_parser!(
1174 "Debian installed files list",
1175 &["**/var/lib/dpkg/info/*.list"],
1176 "deb",
1177 "",
1178 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1179);
1180
1181pub struct DebianInstalledMd5sumsParser;
1183
1184impl PackageParser for DebianInstalledMd5sumsParser {
1185 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1186
1187 fn is_match(path: &Path) -> bool {
1188 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1189 && path
1190 .to_str()
1191 .map(|p| p.contains("/var/lib/dpkg/info/"))
1192 .unwrap_or(false)
1193 }
1194
1195 fn extract_packages(path: &Path) -> Vec<PackageData> {
1196 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1197 Some(f) => f,
1198 None => {
1199 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1200 }
1201 };
1202
1203 let content = match read_file_to_string(path) {
1204 Ok(c) => c,
1205 Err(e) => {
1206 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1207 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1208 }
1209 };
1210
1211 vec![parse_debian_file_list(
1212 &content,
1213 filename,
1214 DatasourceId::DebianInstalledMd5Sums,
1215 )]
1216 }
1217}
1218
1219crate::register_parser!(
1220 "Debian installed package md5sums",
1221 &["**/var/lib/dpkg/info/*.md5sums"],
1222 "deb",
1223 "",
1224 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1225);
1226
1227const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1228
1229fn parse_debian_file_list(
1230 content: &str,
1231 filename: &str,
1232 datasource_id: DatasourceId,
1233) -> PackageData {
1234 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1235 (Some(pkg.to_string()), Some(arch.to_string()))
1236 } else if filename == "md5sums" {
1237 (None, None)
1238 } else {
1239 (Some(filename.to_string()), None)
1240 };
1241
1242 let mut file_references = Vec::new();
1243
1244 for line in content.lines() {
1245 let line = line.trim();
1246 if line.is_empty() || line.starts_with('#') {
1247 continue;
1248 }
1249
1250 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1251 (Some(hash.trim().to_string()), p.trim())
1252 } else {
1253 (None, line)
1254 };
1255
1256 if IGNORED_ROOT_DIRS.contains(&path) {
1257 continue;
1258 }
1259
1260 file_references.push(FileReference {
1261 path: path.to_string(),
1262 size: None,
1263 sha1: None,
1264 md5: md5sum,
1265 sha256: None,
1266 sha512: None,
1267 extra_data: None,
1268 });
1269 }
1270
1271 if file_references.is_empty() {
1272 return default_package_data(datasource_id);
1273 }
1274
1275 let namespace = Some("debian".to_string());
1276 let mut package = PackageData {
1277 datasource_id: Some(datasource_id),
1278 package_type: Some(PACKAGE_TYPE),
1279 namespace: namespace.clone(),
1280 name: name.clone(),
1281 file_references,
1282 ..Default::default()
1283 };
1284
1285 if let Some(n) = &name {
1286 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1287 }
1288
1289 package
1290}
1291
1292pub struct DebianCopyrightParser;
1294
1295impl PackageParser for DebianCopyrightParser {
1296 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1297
1298 fn is_match(path: &Path) -> bool {
1299 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1300 if filename != "copyright" {
1301 return filename.ends_with("_copyright");
1302 }
1303 let path_str = path.to_string_lossy();
1304 path_str.contains("/debian/")
1305 || path_str.contains("/usr/share/doc/")
1306 || path_str.ends_with("debian/copyright")
1307 } else {
1308 false
1309 }
1310 }
1311
1312 fn extract_packages(path: &Path) -> Vec<PackageData> {
1313 let datasource_id = detect_debian_copyright_datasource(path);
1314 let content = match read_file_to_string(path) {
1315 Ok(c) => c,
1316 Err(e) => {
1317 warn!("Failed to read copyright file {:?}: {}", path, e);
1318 return vec![default_package_data(datasource_id)];
1319 }
1320 };
1321
1322 let package_name = extract_package_name_from_path(path);
1323 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1324 package_data.datasource_id = Some(datasource_id);
1325 vec![package_data]
1326 }
1327}
1328
1329crate::register_parser!(
1330 "Debian machine-readable copyright file",
1331 &[
1332 "**/debian/copyright",
1333 "**/usr/share/doc/*/copyright",
1334 "**/*_copyright"
1335 ],
1336 "deb",
1337 "",
1338 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1339);
1340
1341fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1342 let path_str = path.to_string_lossy();
1343 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1344 DatasourceId::DebianCopyrightInSource
1345 } else if path_str.contains("/usr/share/doc/") {
1346 DatasourceId::DebianCopyrightInPackage
1347 } else {
1348 DatasourceId::DebianCopyrightStandalone
1349 }
1350}
1351
1352fn extract_package_name_from_path(path: &Path) -> Option<String> {
1353 let components: Vec<_> = path.components().collect();
1354
1355 for (i, component) in components.iter().enumerate() {
1356 if let std::path::Component::Normal(os_str) = component
1357 && os_str.to_str() == Some("doc")
1358 && i + 1 < components.len()
1359 && let std::path::Component::Normal(next) = components[i + 1]
1360 {
1361 return next.to_str().map(|s| s.to_string());
1362 }
1363 }
1364 None
1365}
1366
1367fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1368 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1369
1370 let is_dep5 = paragraphs
1371 .first()
1372 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1373 .is_some();
1374
1375 let namespace = Some("debian".to_string());
1376 let mut parties = Vec::new();
1377 let mut license_statements = Vec::new();
1378 let mut primary_license_detection = None;
1379 let mut header_license_detection = None;
1380 let mut other_license_detections = Vec::new();
1381
1382 if is_dep5 {
1383 for para in ¶graphs {
1384 if let Some(copyright_text) =
1385 rfc822::get_header_first(¶.metadata.headers, "copyright")
1386 {
1387 for holder in parse_copyright_holders(©right_text) {
1388 if !holder.is_empty() {
1389 parties.push(Party {
1390 r#type: None,
1391 role: Some("copyright-holder".to_string()),
1392 name: Some(holder),
1393 email: None,
1394 url: None,
1395 organization: None,
1396 organization_url: None,
1397 timezone: None,
1398 });
1399 }
1400 }
1401 }
1402
1403 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1404 let license_name = license.lines().next().unwrap_or(&license).trim();
1405 if !license_name.is_empty()
1406 && !license_statements.contains(&license_name.to_string())
1407 {
1408 license_statements.push(license_name.to_string());
1409 }
1410
1411 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1412 let detection =
1413 build_primary_license_detection(license_name, matched_text, line_no);
1414 let is_header_paragraph =
1415 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1416 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1417 == Some("*")
1418 {
1419 primary_license_detection = Some(detection);
1420 } else if is_header_paragraph {
1421 header_license_detection.get_or_insert(detection);
1422 } else {
1423 other_license_detections.push(detection);
1424 }
1425 }
1426 }
1427 }
1428
1429 if primary_license_detection.is_none() && header_license_detection.is_some() {
1430 primary_license_detection = header_license_detection;
1431 }
1432 } else {
1433 let copyright_block = extract_unstructured_field(content, "Copyright:");
1434 if let Some(text) = copyright_block {
1435 for holder in parse_copyright_holders(&text) {
1436 if !holder.is_empty() {
1437 parties.push(Party {
1438 r#type: None,
1439 role: Some("copyright-holder".to_string()),
1440 name: Some(holder),
1441 email: None,
1442 url: None,
1443 organization: None,
1444 organization_url: None,
1445 timezone: None,
1446 });
1447 }
1448 }
1449 }
1450
1451 let license_block = extract_unstructured_field(content, "License:");
1452 if let Some(text) = license_block {
1453 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1454 }
1455 }
1456
1457 let extracted_license_statement = if license_statements.is_empty() {
1458 None
1459 } else {
1460 Some(license_statements.join(" AND "))
1461 };
1462
1463 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1464 let declared_license_expression = license_detections
1465 .first()
1466 .map(|detection| detection.license_expression.clone());
1467 let declared_license_expression_spdx = license_detections
1468 .first()
1469 .map(|detection| detection.license_expression_spdx.clone());
1470 let other_license_expression = combine_license_expressions(
1471 other_license_detections
1472 .iter()
1473 .map(|detection| detection.license_expression.clone()),
1474 );
1475 let other_license_expression_spdx = combine_license_expressions(
1476 other_license_detections
1477 .iter()
1478 .map(|detection| detection.license_expression_spdx.clone()),
1479 );
1480
1481 PackageData {
1482 datasource_id: Some(DatasourceId::DebianCopyright),
1483 package_type: Some(PACKAGE_TYPE),
1484 namespace: namespace.clone(),
1485 name: package_name.map(|s| s.to_string()),
1486 parties,
1487 declared_license_expression,
1488 declared_license_expression_spdx,
1489 license_detections,
1490 other_license_expression,
1491 other_license_expression_spdx,
1492 other_license_detections,
1493 extracted_license_statement,
1494 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1495 ..Default::default()
1496 }
1497}
1498
1499#[derive(Debug)]
1500struct CopyrightParagraph {
1501 metadata: Rfc822Metadata,
1502 license_header_line: Option<(String, usize)>,
1503}
1504
1505fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1506 let mut paragraphs = Vec::new();
1507 let mut current_lines = Vec::new();
1508 let mut current_start_line = 1usize;
1509
1510 for (idx, line) in content.lines().enumerate() {
1511 let line_no = idx + 1;
1512 if line.is_empty() {
1513 if !current_lines.is_empty() {
1514 paragraphs.push(finalize_copyright_paragraph(
1515 std::mem::take(&mut current_lines),
1516 current_start_line,
1517 ));
1518 }
1519 current_start_line = line_no + 1;
1520 } else {
1521 if current_lines.is_empty() {
1522 current_start_line = line_no;
1523 }
1524 current_lines.push(line.to_string());
1525 }
1526 }
1527
1528 if !current_lines.is_empty() {
1529 paragraphs.push(finalize_copyright_paragraph(
1530 current_lines,
1531 current_start_line,
1532 ));
1533 }
1534
1535 paragraphs
1536}
1537
1538fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1539 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1540 let mut current_name: Option<String> = None;
1541 let mut current_value = String::new();
1542 let mut license_header_line = None;
1543
1544 for (idx, line) in raw_lines.iter().enumerate() {
1545 if line.starts_with(' ') || line.starts_with('\t') {
1546 if current_name.is_some() {
1547 current_value.push('\n');
1548 current_value.push_str(line);
1549 }
1550 continue;
1551 }
1552
1553 if let Some(name) = current_name.take() {
1554 add_copyright_header_value(&mut headers, &name, ¤t_value);
1555 current_value.clear();
1556 }
1557
1558 if let Some((name, value)) = line.split_once(':') {
1559 let normalized_name = name.trim().to_ascii_lowercase();
1560 if normalized_name == "license" && license_header_line.is_none() {
1561 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1562 }
1563 current_name = Some(normalized_name);
1564 current_value = value.trim_start().to_string();
1565 }
1566 }
1567
1568 if let Some(name) = current_name.take() {
1569 add_copyright_header_value(&mut headers, &name, ¤t_value);
1570 }
1571
1572 CopyrightParagraph {
1573 metadata: Rfc822Metadata {
1574 headers,
1575 body: String::new(),
1576 },
1577 license_header_line,
1578 }
1579}
1580
1581fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1582 let entry = headers.entry(name.to_string()).or_default();
1583 let trimmed = value.trim_end();
1584 if !trimmed.is_empty() {
1585 entry.push(trimmed.to_string());
1586 }
1587}
1588
1589fn build_primary_license_detection(
1590 license_name: &str,
1591 matched_text: String,
1592 line_no: usize,
1593) -> LicenseDetection {
1594 let normalized = normalize_debian_license_name(license_name);
1595
1596 build_declared_license_detection(
1597 &normalized,
1598 DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no),
1599 )
1600}
1601
1602fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1603 match license_name.trim() {
1604 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1605 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1606 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1607 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1608 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1609 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1610 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1611 "public-domain" => {
1612 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1613 }
1614 other => normalize_declared_license_key(other)
1615 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1616 }
1617}
1618
1619fn parse_copyright_holders(text: &str) -> Vec<String> {
1620 let mut holders = Vec::new();
1621
1622 for line in text.lines() {
1623 let line = line.trim();
1624 if line.is_empty() {
1625 continue;
1626 }
1627
1628 let cleaned = line
1629 .trim_start_matches("Copyright")
1630 .trim_start_matches("copyright")
1631 .trim_start_matches("(C)")
1632 .trim_start_matches("(c)")
1633 .trim_start_matches("©")
1634 .trim();
1635
1636 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1637 let without_years = &cleaned[year_end..];
1638 let holder = without_years
1639 .trim_start_matches(',')
1640 .trim_start_matches('-')
1641 .trim();
1642
1643 if !holder.is_empty() && holder.len() > 2 {
1644 holders.push(holder.to_string());
1645 }
1646 }
1647 }
1648
1649 holders
1650}
1651
1652fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1653 let mut in_field = false;
1654 let mut field_content = String::new();
1655
1656 for line in content.lines() {
1657 if line.starts_with(field_name) {
1658 in_field = true;
1659 field_content.push_str(line.trim_start_matches(field_name).trim());
1660 field_content.push('\n');
1661 } else if in_field {
1662 if line.starts_with(char::is_whitespace) {
1663 field_content.push_str(line.trim());
1664 field_content.push('\n');
1665 } else if !line.trim().is_empty() {
1666 break;
1667 }
1668 }
1669 }
1670
1671 let trimmed = field_content.trim();
1672 if trimmed.is_empty() {
1673 None
1674 } else {
1675 Some(trimmed.to_string())
1676 }
1677}
1678
1679pub struct DebianDebParser;
1681
1682impl PackageParser for DebianDebParser {
1683 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1684
1685 fn is_match(path: &Path) -> bool {
1686 path.extension().and_then(|e| e.to_str()) == Some("deb")
1687 }
1688
1689 fn extract_packages(path: &Path) -> Vec<PackageData> {
1690 if let Ok(data) = extract_deb_archive(path) {
1692 return vec![data];
1693 }
1694
1695 let filename = match path.file_name().and_then(|n| n.to_str()) {
1697 Some(f) => f,
1698 None => {
1699 return vec![default_package_data(DatasourceId::DebianDeb)];
1700 }
1701 };
1702
1703 vec![parse_deb_filename(filename)]
1704 }
1705}
1706
1707crate::register_parser!(
1708 "Debian binary package archive (.deb)",
1709 &["**/*.deb"],
1710 "deb",
1711 "",
1712 Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1713);
1714
1715fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1716 use flate2::read::GzDecoder;
1717 use liblzma::read::XzDecoder;
1718 use std::io::{Cursor, Read};
1719
1720 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1721
1722 let mut archive = ar::Archive::new(file);
1723 let mut package: Option<PackageData> = None;
1724
1725 while let Some(entry_result) = archive.next_entry() {
1726 let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1727
1728 let entry_name = std::str::from_utf8(entry.header().identifier())
1729 .map_err(|e| format!("Invalid entry name: {}", e))?;
1730 let entry_name = entry_name.trim().to_string();
1731
1732 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1733 let mut control_data = Vec::new();
1734 entry
1735 .read_to_end(&mut control_data)
1736 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1737
1738 if entry_name.ends_with(".gz") {
1739 let decoder = GzDecoder::new(Cursor::new(control_data));
1740 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1741 package = Some(parsed_package);
1742 }
1743 } else if entry_name.ends_with(".xz") {
1744 let decoder = XzDecoder::new(Cursor::new(control_data));
1745 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1746 package = Some(parsed_package);
1747 }
1748 }
1749 } else if entry_name.starts_with("data.tar") {
1750 let mut data = Vec::new();
1751 entry
1752 .read_to_end(&mut data)
1753 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1754
1755 let Some(current_package) = package.as_mut() else {
1756 continue;
1757 };
1758
1759 if entry_name.ends_with(".gz") {
1760 let decoder = GzDecoder::new(Cursor::new(data));
1761 merge_deb_data_archive(decoder, current_package)?;
1762 } else if entry_name.ends_with(".xz") {
1763 let decoder = XzDecoder::new(Cursor::new(data));
1764 merge_deb_data_archive(decoder, current_package)?;
1765 }
1766 }
1767 }
1768
1769 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1770}
1771
1772fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1773 use std::io::Read;
1774
1775 let mut tar_archive = tar::Archive::new(reader);
1776
1777 for tar_entry_result in tar_archive
1778 .entries()
1779 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1780 {
1781 let mut tar_entry =
1782 tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1783
1784 let tar_path = tar_entry
1785 .path()
1786 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1787
1788 if tar_path.ends_with("control") {
1789 let mut control_content = String::new();
1790 tar_entry
1791 .read_to_string(&mut control_content)
1792 .map_err(|e| format!("Failed to read control file: {}", e))?;
1793
1794 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1795 if paragraphs.is_empty() {
1796 return Err("No paragraphs in control file".to_string());
1797 }
1798
1799 if let Some(package) =
1800 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
1801 {
1802 return Ok(Some(package));
1803 }
1804
1805 return Err("Failed to parse control file".to_string());
1806 }
1807 }
1808
1809 Ok(None)
1810}
1811
1812fn merge_deb_data_archive<R: std::io::Read>(
1813 reader: R,
1814 package: &mut PackageData,
1815) -> Result<(), String> {
1816 use std::io::Read;
1817
1818 let mut tar_archive = tar::Archive::new(reader);
1819
1820 for tar_entry_result in tar_archive
1821 .entries()
1822 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1823 {
1824 let mut tar_entry =
1825 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1826
1827 let tar_path = tar_entry
1828 .path()
1829 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1830 let tar_path_str = tar_path.to_string_lossy();
1831
1832 if tar_path_str.ends_with(&format!(
1833 "/usr/share/doc/{}/copyright",
1834 package.name.as_deref().unwrap_or_default()
1835 )) || tar_path_str.ends_with(&format!(
1836 "usr/share/doc/{}/copyright",
1837 package.name.as_deref().unwrap_or_default()
1838 )) {
1839 let mut copyright_content = String::new();
1840 tar_entry
1841 .read_to_string(&mut copyright_content)
1842 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1843
1844 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
1845 merge_debian_copyright_into_package(package, ©right_pkg);
1846 break;
1847 }
1848 }
1849
1850 Ok(())
1851}
1852
1853fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1854 if target.extracted_license_statement.is_none() {
1855 target.extracted_license_statement = copyright.extracted_license_statement.clone();
1856 }
1857
1858 for party in ©right.parties {
1859 if !target.parties.iter().any(|existing| {
1860 existing.r#type == party.r#type
1861 && existing.role == party.role
1862 && existing.name == party.name
1863 && existing.email == party.email
1864 && existing.url == party.url
1865 && existing.organization == party.organization
1866 && existing.organization_url == party.organization_url
1867 && existing.timezone == party.timezone
1868 }) {
1869 target.parties.push(party.clone());
1870 }
1871 }
1872}
1873
1874fn parse_deb_filename(filename: &str) -> PackageData {
1875 let without_ext = filename.trim_end_matches(".deb");
1876
1877 let parts: Vec<&str> = without_ext.split('_').collect();
1878 if parts.len() < 2 {
1879 return default_package_data(DatasourceId::DebianDeb);
1880 }
1881
1882 let name = parts[0].to_string();
1883 let version = parts[1].to_string();
1884 let architecture = if parts.len() >= 3 {
1885 Some(parts[2].to_string())
1886 } else {
1887 None
1888 };
1889
1890 let namespace = Some("debian".to_string());
1891
1892 PackageData {
1893 datasource_id: Some(DatasourceId::DebianDeb),
1894 package_type: Some(PACKAGE_TYPE),
1895 namespace: namespace.clone(),
1896 name: Some(name.clone()),
1897 version: Some(version.clone()),
1898 purl: build_debian_purl(
1899 &name,
1900 Some(&version),
1901 namespace.as_deref(),
1902 architecture.as_deref(),
1903 ),
1904 ..Default::default()
1905 }
1906}
1907
1908pub struct DebianControlInExtractedDebParser;
1914
1915impl PackageParser for DebianControlInExtractedDebParser {
1916 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1917
1918 fn is_match(path: &Path) -> bool {
1919 path.file_name()
1920 .and_then(|n| n.to_str())
1921 .is_some_and(|name| name == "control")
1922 && path
1923 .to_str()
1924 .map(|p| {
1925 p.ends_with("control.tar.gz-extract/control")
1926 || p.ends_with("control.tar.xz-extract/control")
1927 })
1928 .unwrap_or(false)
1929 }
1930
1931 fn extract_packages(path: &Path) -> Vec<PackageData> {
1932 let content = match read_file_to_string(path) {
1933 Ok(c) => c,
1934 Err(e) => {
1935 warn!(
1936 "Failed to read control file in extracted deb {:?}: {}",
1937 path, e
1938 );
1939 return vec![default_package_data(
1940 DatasourceId::DebianControlExtractedDeb,
1941 )];
1942 }
1943 };
1944
1945 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1948 if paragraphs.is_empty() {
1949 return vec![default_package_data(
1950 DatasourceId::DebianControlExtractedDeb,
1951 )];
1952 }
1953
1954 if let Some(pkg) = build_package_from_paragraph(
1955 ¶graphs[0],
1956 None,
1957 DatasourceId::DebianControlExtractedDeb,
1958 ) {
1959 vec![pkg]
1960 } else {
1961 vec![default_package_data(
1962 DatasourceId::DebianControlExtractedDeb,
1963 )]
1964 }
1965 }
1966}
1967
1968pub struct DebianMd5sumInPackageParser;
1970
1971impl PackageParser for DebianMd5sumInPackageParser {
1972 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1973
1974 fn is_match(path: &Path) -> bool {
1975 path.file_name()
1976 .and_then(|n| n.to_str())
1977 .is_some_and(|name| name == "md5sums")
1978 && path
1979 .to_str()
1980 .map(|p| {
1981 p.ends_with("control.tar.gz-extract/md5sums")
1982 || p.ends_with("control.tar.xz-extract/md5sums")
1983 })
1984 .unwrap_or(false)
1985 }
1986
1987 fn extract_packages(path: &Path) -> Vec<PackageData> {
1988 let content = match read_file_to_string(path) {
1989 Ok(c) => c,
1990 Err(e) => {
1991 warn!("Failed to read md5sums file {:?}: {}", path, e);
1992 return vec![default_package_data(
1993 DatasourceId::DebianMd5SumsInExtractedDeb,
1994 )];
1995 }
1996 };
1997
1998 let package_name = extract_package_name_from_deb_path(path);
1999
2000 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2001 }
2002}
2003
2004pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2005 let parent = path.parent()?;
2006 let grandparent = parent.parent()?;
2007 let dirname = grandparent.file_name()?.to_str()?;
2008 let without_extract = dirname.strip_suffix("-extract")?;
2009 let without_deb = without_extract.strip_suffix(".deb")?;
2010 let name = without_deb.split('_').next()?;
2011
2012 Some(name.to_string())
2013}
2014
2015fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2016 let mut file_references = Vec::new();
2017
2018 for line in content.lines() {
2019 let line = line.trim();
2020 if line.is_empty() || line.starts_with('#') {
2021 continue;
2022 }
2023
2024 let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find(" ") {
2025 (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
2026 } else if let Some((hash, path)) = line.split_once(' ') {
2027 (Some(hash.trim().to_string()), path.trim())
2028 } else {
2029 (None, line)
2030 };
2031
2032 if IGNORED_ROOT_DIRS.contains(&filepath) {
2033 continue;
2034 }
2035
2036 file_references.push(FileReference {
2037 path: filepath.to_string(),
2038 size: None,
2039 sha1: None,
2040 md5: md5sum,
2041 sha256: None,
2042 sha512: None,
2043 extra_data: None,
2044 });
2045 }
2046
2047 if file_references.is_empty() {
2048 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2049 }
2050
2051 let namespace = Some("debian".to_string());
2052 let mut package = PackageData {
2053 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2054 package_type: Some(PACKAGE_TYPE),
2055 namespace: namespace.clone(),
2056 name: package_name.map(|s| s.to_string()),
2057 file_references,
2058 ..Default::default()
2059 };
2060
2061 if let Some(n) = &package.name {
2062 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2063 }
2064
2065 package
2066}
2067
2068crate::register_parser!(
2069 "Debian control file in extracted .deb control tarball",
2070 &[
2071 "**/control.tar.gz-extract/control",
2072 "**/control.tar.xz-extract/control"
2073 ],
2074 "deb",
2075 "",
2076 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2077);
2078
2079crate::register_parser!(
2080 "Debian MD5 checksums in extracted .deb control tarball",
2081 &[
2082 "**/control.tar.gz-extract/md5sums",
2083 "**/control.tar.xz-extract/md5sums"
2084 ],
2085 "deb",
2086 "",
2087 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2088);
2089
2090#[cfg(test)]
2091mod tests {
2092 use super::*;
2093 use crate::models::DatasourceId;
2094 use crate::models::PackageType;
2095 use ar::{Builder as ArBuilder, Header as ArHeader};
2096 use flate2::Compression;
2097 use flate2::write::GzEncoder;
2098 use liblzma::write::XzEncoder;
2099 use std::io::Cursor;
2100 use std::path::PathBuf;
2101 use tar::{Builder as TarBuilder, Header as TarHeader};
2102 use tempfile::NamedTempFile;
2103
2104 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2105 let mut control_tar = Vec::new();
2106 {
2107 let encoder = XzEncoder::new(&mut control_tar, 6);
2108 let mut tar_builder = TarBuilder::new(encoder);
2109
2110 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2111 let mut header = TarHeader::new_gnu();
2112 header
2113 .set_path("control")
2114 .expect("control tar path should be valid");
2115 header.set_size(control_content.len() as u64);
2116 header.set_mode(0o644);
2117 header.set_cksum();
2118 tar_builder
2119 .append(&header, Cursor::new(control_content))
2120 .expect("control file should be appended to tar.xz");
2121 tar_builder.finish().expect("control tar.xz should finish");
2122 }
2123
2124 let deb = NamedTempFile::new().expect("temp deb file should be created");
2125 {
2126 let mut builder = ArBuilder::new(
2127 deb.reopen()
2128 .expect("temporary deb file should reopen for writing"),
2129 );
2130
2131 let debian_binary = b"2.0\n";
2132 let mut debian_binary_header =
2133 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2134 debian_binary_header.set_mode(0o100644);
2135 builder
2136 .append(&debian_binary_header, Cursor::new(debian_binary))
2137 .expect("debian-binary entry should be appended");
2138
2139 let mut control_header =
2140 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2141 control_header.set_mode(0o100644);
2142 builder
2143 .append(&control_header, Cursor::new(control_tar))
2144 .expect("control.tar.xz entry should be appended");
2145 }
2146
2147 deb
2148 }
2149
2150 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2151 let mut control_tar = Vec::new();
2152 {
2153 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2154 let mut tar_builder = TarBuilder::new(encoder);
2155
2156 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2157 let mut header = TarHeader::new_gnu();
2158 header
2159 .set_path("control")
2160 .expect("control tar path should be valid");
2161 header.set_size(control_content.len() as u64);
2162 header.set_mode(0o644);
2163 header.set_cksum();
2164 tar_builder
2165 .append(&header, Cursor::new(control_content))
2166 .expect("control file should be appended to tar.gz");
2167 tar_builder.finish().expect("control tar.gz should finish");
2168 }
2169
2170 let mut data_tar = Vec::new();
2171 {
2172 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2173 let mut tar_builder = TarBuilder::new(encoder);
2174
2175 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2176 let mut header = TarHeader::new_gnu();
2177 header
2178 .set_path("./usr/share/doc/synthetic/copyright")
2179 .expect("copyright path should be valid");
2180 header.set_size(copyright.len() as u64);
2181 header.set_mode(0o644);
2182 header.set_cksum();
2183 tar_builder
2184 .append(&header, Cursor::new(copyright))
2185 .expect("copyright file should be appended to data tar");
2186 tar_builder.finish().expect("data tar.gz should finish");
2187 }
2188
2189 let deb = NamedTempFile::new().expect("temp deb file should be created");
2190 {
2191 let mut builder = ArBuilder::new(
2192 deb.reopen()
2193 .expect("temporary deb file should reopen for writing"),
2194 );
2195
2196 let debian_binary = b"2.0\n";
2197 let mut debian_binary_header =
2198 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2199 debian_binary_header.set_mode(0o100644);
2200 builder
2201 .append(&debian_binary_header, Cursor::new(debian_binary))
2202 .expect("debian-binary entry should be appended");
2203
2204 let mut control_header =
2205 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2206 control_header.set_mode(0o100644);
2207 builder
2208 .append(&control_header, Cursor::new(control_tar))
2209 .expect("control.tar.gz entry should be appended");
2210
2211 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2212 data_header.set_mode(0o100644);
2213 builder
2214 .append(&data_header, Cursor::new(data_tar))
2215 .expect("data.tar.gz entry should be appended");
2216 }
2217
2218 deb
2219 }
2220
2221 #[test]
2224 fn test_detect_namespace_from_ubuntu_version() {
2225 assert_eq!(
2226 detect_namespace(Some("1.0-1ubuntu1"), None),
2227 Some("ubuntu".to_string())
2228 );
2229 }
2230
2231 #[test]
2232 fn test_detect_namespace_from_debian_version() {
2233 assert_eq!(
2234 detect_namespace(Some("1.0-1+deb11u1"), None),
2235 Some("debian".to_string())
2236 );
2237 }
2238
2239 #[test]
2240 fn test_detect_namespace_from_ubuntu_maintainer() {
2241 assert_eq!(
2242 detect_namespace(
2243 None,
2244 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2245 ),
2246 Some("ubuntu".to_string())
2247 );
2248 }
2249
2250 #[test]
2251 fn test_detect_namespace_from_debian_maintainer() {
2252 assert_eq!(
2253 detect_namespace(None, Some("John Doe <john@debian.org>")),
2254 Some("debian".to_string())
2255 );
2256 }
2257
2258 #[test]
2259 fn test_detect_namespace_default() {
2260 assert_eq!(
2261 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2262 Some("debian".to_string())
2263 );
2264 }
2265
2266 #[test]
2267 fn test_detect_namespace_version_takes_priority() {
2268 assert_eq!(
2270 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2271 Some("ubuntu".to_string())
2272 );
2273 }
2274
2275 #[test]
2278 fn test_build_purl_basic() {
2279 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2280 assert_eq!(
2281 purl,
2282 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2283 );
2284 }
2285
2286 #[test]
2287 fn test_build_purl_no_version() {
2288 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2289 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2290 }
2291
2292 #[test]
2293 fn test_build_purl_no_arch() {
2294 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2295 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2296 }
2297
2298 #[test]
2299 fn test_build_purl_no_namespace() {
2300 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2301 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2302 }
2303
2304 #[test]
2307 fn test_parse_simple_dependency() {
2308 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2309 assert_eq!(deps.len(), 1);
2310 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2311 assert_eq!(deps[0].extracted_requirement, None);
2312 assert_eq!(deps[0].scope, Some("depends".to_string()));
2313 }
2314
2315 #[test]
2316 fn test_parse_dependency_with_version() {
2317 let deps =
2318 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2319 assert_eq!(deps.len(), 1);
2320 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2321 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2322 }
2323
2324 #[test]
2325 fn test_parse_dependency_exact_version() {
2326 let deps = parse_dependency_field(
2327 "libc6 (= 2.31-13+deb11u5)",
2328 "depends",
2329 true,
2330 false,
2331 Some("debian"),
2332 );
2333 assert_eq!(deps.len(), 1);
2334 assert_eq!(deps[0].is_pinned, Some(true));
2335 }
2336
2337 #[test]
2338 fn test_parse_dependency_strict_less() {
2339 let deps =
2340 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2341 assert_eq!(deps.len(), 1);
2342 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2343 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2344 }
2345
2346 #[test]
2347 fn test_parse_multiple_dependencies() {
2348 let deps = parse_dependency_field(
2349 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2350 "depends",
2351 true,
2352 false,
2353 Some("debian"),
2354 );
2355 assert_eq!(deps.len(), 3);
2356 }
2357
2358 #[test]
2359 fn test_parse_dependency_alternatives() {
2360 let deps = parse_dependency_field(
2361 "libssl1.1 | libssl3",
2362 "depends",
2363 true,
2364 false,
2365 Some("debian"),
2366 );
2367 assert_eq!(deps.len(), 2);
2368 assert_eq!(deps[0].is_optional, Some(true));
2370 assert_eq!(deps[1].is_optional, Some(true));
2371 }
2372
2373 #[test]
2374 fn test_parse_dependency_skips_substitutions() {
2375 let deps = parse_dependency_field(
2376 "${shlibs:Depends}, ${misc:Depends}, libc6",
2377 "depends",
2378 true,
2379 false,
2380 Some("debian"),
2381 );
2382 assert_eq!(deps.len(), 1);
2383 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2384 }
2385
2386 #[test]
2387 fn test_parse_dependency_with_arch_qualifier() {
2388 let deps = parse_dependency_field(
2390 "libc6 (>= 2.17) [amd64]",
2391 "depends",
2392 true,
2393 false,
2394 Some("debian"),
2395 );
2396 assert_eq!(deps.len(), 1);
2397 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2398 }
2399
2400 #[test]
2401 fn test_parse_empty_dependency() {
2402 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2403 assert!(deps.is_empty());
2404 }
2405
2406 #[test]
2409 fn test_parse_source_field_name_only() {
2410 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2411 assert_eq!(sources.len(), 1);
2412 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2413 }
2414
2415 #[test]
2416 fn test_parse_source_field_with_version() {
2417 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2418 assert_eq!(sources.len(), 1);
2419 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2420 }
2421
2422 #[test]
2423 fn test_parse_source_field_empty() {
2424 let sources = parse_source_field(None, Some("debian"));
2425 assert!(sources.is_empty());
2426 }
2427
2428 #[test]
2431 fn test_parse_debian_control_source_and_binary() {
2432 let content = "\
2433Source: curl
2434Section: web
2435Priority: optional
2436Maintainer: Alessandro Ghedini <ghedo@debian.org>
2437Homepage: https://curl.se/
2438Vcs-Browser: https://salsa.debian.org/debian/curl
2439Vcs-Git: https://salsa.debian.org/debian/curl.git
2440Build-Depends: debhelper (>= 12), libssl-dev
2441
2442Package: curl
2443Architecture: amd64
2444Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2445Description: command line tool for transferring data with URL syntax";
2446
2447 let packages = parse_debian_control(content);
2448 assert_eq!(packages.len(), 1);
2449
2450 let pkg = &packages[0];
2451 assert_eq!(pkg.name, Some("curl".to_string()));
2452 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2453 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2454 assert_eq!(
2455 pkg.vcs_url,
2456 Some("https://salsa.debian.org/debian/curl.git".to_string())
2457 );
2458 assert_eq!(
2459 pkg.code_view_url,
2460 Some("https://salsa.debian.org/debian/curl".to_string())
2461 );
2462
2463 assert_eq!(pkg.parties.len(), 1);
2465 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2466 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2467 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2468
2469 assert!(!pkg.dependencies.is_empty());
2471 }
2472
2473 #[test]
2474 fn test_parse_debian_control_multiple_binary() {
2475 let content = "\
2476Source: gzip
2477Maintainer: Debian Developer <dev@debian.org>
2478
2479Package: gzip
2480Architecture: any
2481Depends: libc6 (>= 2.17)
2482Description: GNU file compression
2483
2484Package: gzip-win32
2485Architecture: all
2486Description: gzip for Windows";
2487
2488 let packages = parse_debian_control(content);
2489 assert_eq!(packages.len(), 2);
2490 assert_eq!(packages[0].name, Some("gzip".to_string()));
2491 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2492
2493 assert_eq!(packages[0].parties.len(), 1);
2495 assert_eq!(packages[1].parties.len(), 1);
2496 }
2497
2498 #[test]
2499 fn test_parse_debian_control_source_only() {
2500 let content = "\
2501Source: my-package
2502Maintainer: Test User <test@debian.org>
2503Build-Depends: debhelper (>= 13)";
2504
2505 let packages = parse_debian_control(content);
2506 assert_eq!(packages.len(), 1);
2507 assert_eq!(packages[0].name, Some("my-package".to_string()));
2508 assert!(!packages[0].dependencies.is_empty());
2510 assert_eq!(
2511 packages[0].dependencies[0].scope,
2512 Some("build-depends".to_string())
2513 );
2514 }
2515
2516 #[test]
2517 fn test_parse_debian_control_with_uploaders() {
2518 let content = "\
2519Source: example
2520Maintainer: Main Dev <main@debian.org>
2521Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2522
2523Package: example
2524Architecture: any
2525Description: test package";
2526
2527 let packages = parse_debian_control(content);
2528 assert_eq!(packages.len(), 1);
2529 assert_eq!(packages[0].parties.len(), 3);
2531 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2532 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2533 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2534 }
2535
2536 #[test]
2537 fn test_parse_debian_control_vcs_git_with_branch() {
2538 let content = "\
2539Source: example
2540Maintainer: Dev <dev@debian.org>
2541Vcs-Git: https://salsa.debian.org/example.git -b main
2542
2543Package: example
2544Architecture: any
2545Description: test";
2546
2547 let packages = parse_debian_control(content);
2548 assert_eq!(packages.len(), 1);
2549 assert_eq!(
2551 packages[0].vcs_url,
2552 Some("https://salsa.debian.org/example.git".to_string())
2553 );
2554 }
2555
2556 #[test]
2557 fn test_parse_debian_control_multi_arch() {
2558 let content = "\
2559Source: example
2560Maintainer: Dev <dev@debian.org>
2561
2562Package: libexample
2563Architecture: any
2564Multi-Arch: same
2565Description: shared library";
2566
2567 let packages = parse_debian_control(content);
2568 assert_eq!(packages.len(), 1);
2569 let extra = packages[0].extra_data.as_ref().unwrap();
2570 assert_eq!(
2571 extra.get("multi_arch"),
2572 Some(&serde_json::Value::String("same".to_string()))
2573 );
2574 }
2575
2576 #[test]
2579 fn test_parse_dpkg_status_basic() {
2580 let content = "\
2581Package: base-files
2582Status: install ok installed
2583Priority: required
2584Section: admin
2585Installed-Size: 391
2586Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2587Architecture: amd64
2588Version: 11ubuntu5.6
2589Description: Debian base system miscellaneous files
2590Homepage: https://tracker.debian.org/pkg/base-files
2591
2592Package: not-installed
2593Status: deinstall ok config-files
2594Architecture: amd64
2595Version: 1.0
2596Description: This should be skipped";
2597
2598 let packages = parse_dpkg_status(content);
2599 assert_eq!(packages.len(), 1);
2600
2601 let pkg = &packages[0];
2602 assert_eq!(pkg.name, Some("base-files".to_string()));
2603 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2604 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2605 assert_eq!(
2606 pkg.datasource_id,
2607 Some(DatasourceId::DebianInstalledStatusDb)
2608 );
2609
2610 let extra = pkg.extra_data.as_ref().unwrap();
2612 assert_eq!(
2613 extra.get("installed_size"),
2614 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2615 );
2616 }
2617
2618 #[test]
2619 fn test_parse_dpkg_status_multiple_installed() {
2620 let content = "\
2621Package: libc6
2622Status: install ok installed
2623Architecture: amd64
2624Version: 2.31-13+deb11u5
2625Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2626Description: GNU C Library
2627
2628Package: zlib1g
2629Status: install ok installed
2630Architecture: amd64
2631Version: 1:1.2.11.dfsg-2+deb11u2
2632Maintainer: Mark Brown <broonie@debian.org>
2633Description: compression library";
2634
2635 let packages = parse_dpkg_status(content);
2636 assert_eq!(packages.len(), 2);
2637 assert_eq!(packages[0].name, Some("libc6".to_string()));
2638 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2639 }
2640
2641 #[test]
2642 fn test_parse_dpkg_status_with_dependencies() {
2643 let content = "\
2644Package: curl
2645Status: install ok installed
2646Architecture: amd64
2647Version: 7.74.0-1.3+deb11u7
2648Maintainer: Alessandro Ghedini <ghedo@debian.org>
2649Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2650Recommends: ca-certificates
2651Description: command line tool for transferring data with URL syntax";
2652
2653 let packages = parse_dpkg_status(content);
2654 assert_eq!(packages.len(), 1);
2655
2656 let deps = &packages[0].dependencies;
2657 assert_eq!(deps.len(), 3);
2659
2660 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2662 assert_eq!(deps[0].scope, Some("depends".to_string()));
2663 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2664
2665 assert_eq!(
2667 deps[2].purl,
2668 Some("pkg:deb/debian/ca-certificates".to_string())
2669 );
2670 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2671 assert_eq!(deps[2].is_optional, Some(true));
2672 }
2673
2674 #[test]
2675 fn test_parse_dpkg_status_with_source() {
2676 let content = "\
2677Package: libncurses6
2678Status: install ok installed
2679Architecture: amd64
2680Source: ncurses (6.2+20201114-2+deb11u1)
2681Version: 6.2+20201114-2+deb11u1
2682Maintainer: Craig Small <csmall@debian.org>
2683Description: shared libraries for terminal handling";
2684
2685 let packages = parse_dpkg_status(content);
2686 assert_eq!(packages.len(), 1);
2687 assert!(!packages[0].source_packages.is_empty());
2688 assert!(packages[0].source_packages[0].contains("ncurses"));
2690 }
2691
2692 #[test]
2693 fn test_parse_dpkg_status_filters_not_installed() {
2694 let content = "\
2695Package: installed-pkg
2696Status: install ok installed
2697Version: 1.0
2698Architecture: amd64
2699Description: installed
2700
2701Package: half-installed
2702Status: install ok half-installed
2703Version: 2.0
2704Architecture: amd64
2705Description: half installed
2706
2707Package: deinstall-pkg
2708Status: deinstall ok config-files
2709Version: 3.0
2710Architecture: amd64
2711Description: deinstalled
2712
2713Package: purge-pkg
2714Status: purge ok not-installed
2715Version: 4.0
2716Architecture: amd64
2717Description: purged";
2718
2719 let packages = parse_dpkg_status(content);
2720 assert_eq!(packages.len(), 1);
2721 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2722 }
2723
2724 #[test]
2725 fn test_parse_dpkg_status_empty() {
2726 let packages = parse_dpkg_status("");
2727 assert!(packages.is_empty());
2728 }
2729
2730 #[test]
2733 fn test_debian_control_is_match() {
2734 assert!(DebianControlParser::is_match(Path::new(
2735 "/path/to/debian/control"
2736 )));
2737 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2738 assert!(!DebianControlParser::is_match(Path::new(
2739 "/path/to/control"
2740 )));
2741 assert!(!DebianControlParser::is_match(Path::new(
2742 "/path/to/debian/changelog"
2743 )));
2744 }
2745
2746 #[test]
2747 fn test_debian_installed_is_match() {
2748 assert!(DebianInstalledParser::is_match(Path::new(
2749 "/var/lib/dpkg/status"
2750 )));
2751 assert!(DebianInstalledParser::is_match(Path::new(
2752 "some/root/var/lib/dpkg/status"
2753 )));
2754 assert!(!DebianInstalledParser::is_match(Path::new(
2755 "/var/lib/dpkg/status.d/something"
2756 )));
2757 assert!(!DebianInstalledParser::is_match(Path::new(
2758 "/var/lib/dpkg/available"
2759 )));
2760 }
2761
2762 #[test]
2765 fn test_parse_debian_control_empty_input() {
2766 let packages = parse_debian_control("");
2767 assert!(packages.is_empty());
2768 }
2769
2770 #[test]
2771 fn test_parse_debian_control_malformed_input() {
2772 let content = "this is not a valid control file\nwith random text";
2773 let packages = parse_debian_control(content);
2774 assert!(packages.is_empty());
2776 }
2777
2778 #[test]
2779 fn test_dependency_with_epoch_version() {
2780 let deps = parse_dependency_field(
2782 "zlib1g (>= 1:1.2.11)",
2783 "depends",
2784 true,
2785 false,
2786 Some("debian"),
2787 );
2788 assert_eq!(deps.len(), 1);
2789 assert_eq!(
2790 deps[0].extracted_requirement,
2791 Some(">= 1:1.2.11".to_string())
2792 );
2793 }
2794
2795 #[test]
2796 fn test_dependency_with_plus_in_name() {
2797 let deps =
2798 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2799 assert_eq!(deps.len(), 1);
2800 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2801 }
2802
2803 #[test]
2804 fn test_dsc_parser_is_match() {
2805 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2806 assert!(DebianDscParser::is_match(&PathBuf::from(
2807 "adduser_3.118+deb11u1.dsc"
2808 )));
2809 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2810 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2811 }
2812
2813 #[test]
2814 fn test_dsc_parser_adduser() {
2815 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2816 let package = DebianDscParser::extract_first_package(&path);
2817
2818 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2819 assert_eq!(package.namespace, Some("debian".to_string()));
2820 assert_eq!(package.name, Some("adduser".to_string()));
2821 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2822 assert_eq!(
2823 package.purl,
2824 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2825 );
2826 assert_eq!(
2827 package.vcs_url,
2828 Some("https://salsa.debian.org/debian/adduser.git".to_string())
2829 );
2830 assert_eq!(
2831 package.code_view_url,
2832 Some("https://salsa.debian.org/debian/adduser".to_string())
2833 );
2834 assert_eq!(
2835 package.datasource_id,
2836 Some(DatasourceId::DebianSourceControlDsc)
2837 );
2838
2839 assert_eq!(package.parties.len(), 2);
2840 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2841 assert_eq!(
2842 package.parties[0].name,
2843 Some("Debian Adduser Developers".to_string())
2844 );
2845 assert_eq!(
2846 package.parties[0].email,
2847 Some("adduser@packages.debian.org".to_string())
2848 );
2849 assert_eq!(package.parties[0].r#type, None);
2850
2851 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2852 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2853 assert_eq!(
2854 package.parties[1].email,
2855 Some("mh+debian-packages@zugschlus.de".to_string())
2856 );
2857 assert_eq!(package.parties[1].r#type, None);
2858
2859 assert_eq!(package.source_packages.len(), 1);
2860 assert_eq!(
2861 package.source_packages[0],
2862 "pkg:deb/debian/adduser".to_string()
2863 );
2864
2865 assert!(!package.dependencies.is_empty());
2866 let build_dep_names: Vec<String> = package
2867 .dependencies
2868 .iter()
2869 .filter_map(|d| d.purl.as_ref())
2870 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2871 .map(|p| p.to_string())
2872 .collect();
2873 assert!(build_dep_names.len() >= 2);
2874 }
2875
2876 #[test]
2877 fn test_dsc_parser_zsh() {
2878 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2879 let package = DebianDscParser::extract_first_package(&path);
2880
2881 assert_eq!(package.name, Some("zsh".to_string()));
2882 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2883 assert_eq!(package.namespace, Some("debian".to_string()));
2884 assert!(package.purl.is_some());
2885 assert!(package.purl.as_ref().unwrap().contains("zsh"));
2886 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2887 }
2888
2889 #[test]
2890 fn test_parse_dsc_content_basic() {
2891 let content = "Format: 3.0 (native)
2892Source: testpkg
2893Binary: testpkg
2894Architecture: amd64
2895Version: 1.0.0
2896Maintainer: Test User <test@example.com>
2897Standards-Version: 4.5.0
2898Build-Depends: debhelper (>= 12)
2899Files:
2900 abc123 1024 testpkg_1.0.0.tar.xz
2901";
2902
2903 let package = parse_dsc_content(content);
2904 assert_eq!(package.name, Some("testpkg".to_string()));
2905 assert_eq!(package.version, Some("1.0.0".to_string()));
2906 assert_eq!(package.namespace, Some("debian".to_string()));
2907 assert_eq!(package.parties.len(), 1);
2908 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2909 assert_eq!(
2910 package.parties[0].email,
2911 Some("test@example.com".to_string())
2912 );
2913 assert_eq!(package.dependencies.len(), 1);
2914 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2915 }
2916
2917 #[test]
2918 fn test_parse_dsc_content_with_uploaders() {
2919 let content = "Source: mypkg
2920Version: 2.0
2921Architecture: all
2922Maintainer: Main Dev <main@example.com>
2923Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2924";
2925
2926 let package = parse_dsc_content(content);
2927 assert_eq!(package.parties.len(), 3);
2928 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2929 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2930 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2931 }
2932
2933 #[test]
2934 fn test_orig_tar_parser_is_match() {
2935 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2936 "package_1.0.orig.tar.gz"
2937 )));
2938 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2939 "abseil_0~20200923.3.orig.tar.xz"
2940 )));
2941 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2942 "package.debian.tar.gz"
2943 )));
2944 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2945 }
2946
2947 #[test]
2948 fn test_debian_tar_parser_is_match() {
2949 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2950 "package_1.0-1.debian.tar.xz"
2951 )));
2952 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2953 "abseil_20220623.1-1.debian.tar.gz"
2954 )));
2955 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2956 "package.orig.tar.gz"
2957 )));
2958 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2959 }
2960
2961 #[test]
2962 fn test_parse_orig_tar_filename() {
2963 let pkg = parse_source_tarball_filename(
2964 "abseil_0~20200923.3.orig.tar.gz",
2965 DatasourceId::DebianOriginalSourceTarball,
2966 );
2967 assert_eq!(pkg.name, Some("abseil".to_string()));
2968 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2969 assert_eq!(pkg.namespace, Some("debian".to_string()));
2970 assert_eq!(
2971 pkg.purl,
2972 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2973 );
2974 assert_eq!(
2975 pkg.datasource_id,
2976 Some(DatasourceId::DebianOriginalSourceTarball)
2977 );
2978 }
2979
2980 #[test]
2981 fn test_parse_debian_tar_filename() {
2982 let pkg = parse_source_tarball_filename(
2983 "abseil_20220623.1-1.debian.tar.xz",
2984 DatasourceId::DebianSourceMetadataTarball,
2985 );
2986 assert_eq!(pkg.name, Some("abseil".to_string()));
2987 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2988 assert_eq!(pkg.namespace, Some("debian".to_string()));
2989 assert_eq!(
2990 pkg.purl,
2991 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2992 );
2993 }
2994
2995 #[test]
2996 fn test_parse_deb_filename() {
2997 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2998 assert_eq!(pkg.name, Some("nginx".to_string()));
2999 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3000
3001 let pkg = parse_deb_filename("invalid.deb");
3002 assert!(pkg.name.is_none());
3003 assert!(pkg.version.is_none());
3004 }
3005
3006 #[test]
3007 fn test_parse_source_tarball_various_compressions() {
3008 let pkg_gz = parse_source_tarball_filename(
3009 "test_1.0.orig.tar.gz",
3010 DatasourceId::DebianOriginalSourceTarball,
3011 );
3012 let pkg_xz = parse_source_tarball_filename(
3013 "test_1.0.orig.tar.xz",
3014 DatasourceId::DebianOriginalSourceTarball,
3015 );
3016 let pkg_bz2 = parse_source_tarball_filename(
3017 "test_1.0.orig.tar.bz2",
3018 DatasourceId::DebianOriginalSourceTarball,
3019 );
3020
3021 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3022 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3023 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3024 }
3025
3026 #[test]
3027 fn test_parse_source_tarball_invalid_format() {
3028 let pkg = parse_source_tarball_filename(
3029 "invalid-no-underscore.tar.gz",
3030 DatasourceId::DebianOriginalSourceTarball,
3031 );
3032 assert!(pkg.name.is_none());
3033 assert!(pkg.version.is_none());
3034 }
3035
3036 #[test]
3037 fn test_list_parser_is_match() {
3038 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3039 "/var/lib/dpkg/info/bash.list"
3040 )));
3041 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3042 "/var/lib/dpkg/info/package:amd64.list"
3043 )));
3044 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3045 "bash.list"
3046 )));
3047 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3048 "/var/lib/dpkg/info/bash.md5sums"
3049 )));
3050 }
3051
3052 #[test]
3053 fn test_md5sums_parser_is_match() {
3054 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3055 "/var/lib/dpkg/info/bash.md5sums"
3056 )));
3057 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3058 "/var/lib/dpkg/info/package:amd64.md5sums"
3059 )));
3060 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3061 "bash.md5sums"
3062 )));
3063 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3064 "/var/lib/dpkg/info/bash.list"
3065 )));
3066 }
3067
3068 #[test]
3069 fn test_parse_debian_file_list_plain_list() {
3070 let content = "/.
3071/bin
3072/bin/bash
3073/usr/bin/bashbug
3074/usr/share/doc/bash/README
3075";
3076 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3077 assert_eq!(pkg.name, Some("bash".to_string()));
3078 assert_eq!(pkg.file_references.len(), 3);
3079 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3080 assert_eq!(pkg.file_references[0].md5, None);
3081 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3082 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3083 }
3084
3085 #[test]
3086 fn test_parse_debian_file_list_md5sums() {
3087 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
30881c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3089f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3090";
3091 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3092 assert_eq!(pkg.name, Some("bash".to_string()));
3093 assert_eq!(pkg.file_references.len(), 3);
3094 assert_eq!(pkg.file_references[0].path, "bin/bash");
3095 assert_eq!(
3096 pkg.file_references[0].md5,
3097 Some("77506afebd3b7e19e937a678a185b62e".to_string())
3098 );
3099 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3100 assert_eq!(
3101 pkg.file_references[1].md5,
3102 Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3103 );
3104 }
3105
3106 #[test]
3107 fn test_parse_debian_file_list_with_arch() {
3108 let content = "/usr/bin/foo
3109/usr/lib/x86_64-linux-gnu/libfoo.so
3110";
3111 let pkg = parse_debian_file_list(
3112 content,
3113 "libfoo:amd64",
3114 DatasourceId::DebianInstalledFilesList,
3115 );
3116 assert_eq!(pkg.name, Some("libfoo".to_string()));
3117 assert!(pkg.purl.is_some());
3118 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3119 assert_eq!(pkg.file_references.len(), 2);
3120 }
3121
3122 #[test]
3123 fn test_parse_debian_file_list_skips_comments_and_empty() {
3124 let content = "# This is a comment
3125/bin/bash
3126
3127/usr/bin/bashbug
3128
3129";
3130 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3131 assert_eq!(pkg.file_references.len(), 2);
3132 }
3133
3134 #[test]
3135 fn test_parse_debian_file_list_md5sums_only() {
3136 let content = "abc123 usr/bin/tool
3137";
3138 let pkg =
3139 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3140 assert_eq!(pkg.name, None);
3141 assert_eq!(pkg.file_references.len(), 1);
3142 }
3143
3144 #[test]
3145 fn test_parse_debian_file_list_ignores_root_dirs() {
3146 let content = "/.
3147/bin
3148/bin/bash
3149/etc
3150/usr
3151/var
3152";
3153 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3154 assert_eq!(pkg.file_references.len(), 1);
3155 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3156 }
3157
3158 #[test]
3159 fn test_copyright_parser_is_match() {
3160 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3161 "/usr/share/doc/bash/copyright"
3162 )));
3163 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3164 "debian/copyright"
3165 )));
3166 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3167 "copyright.txt"
3168 )));
3169 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3170 "/etc/copyright"
3171 )));
3172 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3173 "/tmp/sample_copyright"
3174 )));
3175 }
3176
3177 #[test]
3178 fn test_detect_debian_copyright_datasource() {
3179 assert_eq!(
3180 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3181 DatasourceId::DebianCopyrightInSource
3182 );
3183 assert_eq!(
3184 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3185 DatasourceId::DebianCopyrightInPackage
3186 );
3187 assert_eq!(
3188 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3189 DatasourceId::DebianCopyrightStandalone
3190 );
3191 }
3192
3193 #[test]
3194 fn test_extract_package_name_from_path() {
3195 assert_eq!(
3196 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3197 Some("bash".to_string())
3198 );
3199 assert_eq!(
3200 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3201 Some("libseccomp2".to_string())
3202 );
3203 assert_eq!(
3204 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3205 None
3206 );
3207 }
3208
3209 #[test]
3210 fn test_parse_copyright_dep5_format() {
3211 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3212Upstream-Name: libseccomp
3213Source: https://sourceforge.net/projects/libseccomp/
3214
3215Files: *
3216Copyright: 2012 Paul Moore <pmoore@redhat.com>
3217 2012 Ashley Lai <adlai@us.ibm.com>
3218License: LGPL-2.1
3219
3220License: LGPL-2.1
3221 This library is free software
3222";
3223 let pkg = parse_copyright_file(content, Some("libseccomp"));
3224 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3225 assert_eq!(pkg.namespace, Some("debian".to_string()));
3226 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3227 assert_eq!(
3228 pkg.extracted_license_statement,
3229 Some("LGPL-2.1".to_string())
3230 );
3231 assert!(pkg.parties.len() >= 2);
3232 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3233 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3234 }
3235
3236 #[test]
3237 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3238 let path = PathBuf::from(
3239 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3240 );
3241 let pkg = DebianCopyrightParser::extract_first_package(&path);
3242
3243 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3244 let extracted = pkg
3245 .extracted_license_statement
3246 .as_deref()
3247 .expect("license statement should exist");
3248 assert!(extracted.contains("GPL-2+"));
3249 assert!(!pkg.license_detections.is_empty());
3250
3251 let primary = &pkg.license_detections[0];
3252 assert_eq!(
3253 primary.matches[0].matched_text.as_deref(),
3254 Some("License: GPL-2+")
3255 );
3256 assert_eq!(primary.matches[0].start_line, 47);
3257 assert_eq!(primary.matches[0].end_line, 47);
3258 }
3259
3260 #[test]
3261 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3262 let path = PathBuf::from("testdata/debian/copyright/copyright");
3263 let pkg = DebianCopyrightParser::extract_first_package(&path);
3264
3265 assert_eq!(pkg.license_detections.len(), 1);
3266 assert_eq!(pkg.other_license_detections.len(), 4);
3267
3268 let primary = &pkg.license_detections[0];
3269 assert_eq!(
3270 primary.matches[0].matched_text.as_deref(),
3271 Some("License: LGPL-2.1")
3272 );
3273 assert_eq!(primary.matches[0].start_line, 11);
3274
3275 let ordered_lines: Vec<usize> = pkg
3276 .other_license_detections
3277 .iter()
3278 .map(|detection| detection.matches[0].start_line)
3279 .collect();
3280 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3281
3282 let ordered_texts: Vec<&str> = pkg
3283 .other_license_detections
3284 .iter()
3285 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3286 .collect();
3287 assert_eq!(
3288 ordered_texts,
3289 vec![
3290 "License: LGPL-2.1",
3291 "License: LGPL-2.1",
3292 "License: LGPL-2.1",
3293 "License: LGPL-2.1",
3294 ]
3295 );
3296 }
3297
3298 #[test]
3299 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3300 let path = PathBuf::from(
3301 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3302 );
3303 let pkg = DebianCopyrightParser::extract_first_package(&path);
3304
3305 let zlib = pkg
3306 .other_license_detections
3307 .iter()
3308 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3309 .expect("at least one Zlib license paragraph should be detected");
3310 assert_eq!(
3311 zlib.matches[0].matched_text.as_deref(),
3312 Some("License: Zlib")
3313 );
3314
3315 let last_zlib = pkg
3316 .other_license_detections
3317 .iter()
3318 .rev()
3319 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3320 .expect("bottom standalone Zlib license paragraph should be detected");
3321 assert_eq!(last_zlib.matches[0].start_line, 732);
3322 assert_eq!(last_zlib.matches[0].end_line, 732);
3323 }
3324
3325 #[test]
3326 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3327 let path =
3328 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3329 let pkg = DebianCopyrightParser::extract_first_package(&path);
3330
3331 assert_eq!(pkg.license_detections.len(), 1);
3332 let primary = &pkg.license_detections[0];
3333 assert_eq!(
3334 primary.matches[0].matched_text.as_deref(),
3335 Some("License: LGPL-3+ or GPL-2+")
3336 );
3337 assert_eq!(primary.matches[0].start_line, 8);
3338 assert_eq!(primary.matches[0].end_line, 8);
3339
3340 assert!(pkg.other_license_detections.iter().any(|detection| {
3341 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3342 }));
3343 }
3344
3345 #[test]
3346 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3347 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3348 let pkg = parse_copyright_file(content, Some("foo"));
3349
3350 assert_eq!(pkg.license_detections.len(), 1);
3351 let primary = &pkg.license_detections[0];
3352 assert_eq!(
3353 primary.matches[0].matched_text.as_deref(),
3354 Some("License: GPL-2+")
3355 );
3356 assert_eq!(primary.matches[0].start_line, 7);
3357 }
3358
3359 #[test]
3360 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3361 let raw_lines = vec![
3362 "Files: *".to_string(),
3363 "Copyright: 2024 Example Org".to_string(),
3364 "License: Apache-2.0".to_string(),
3365 " Licensed under the Apache License, Version 2.0.".to_string(),
3366 ];
3367
3368 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3369 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3370 .into_iter()
3371 .next()
3372 .expect("reference RFC822 paragraph should parse");
3373
3374 assert_eq!(paragraph.metadata.headers, expected.headers);
3375 assert_eq!(paragraph.metadata.body, expected.body);
3376 assert_eq!(
3377 paragraph.license_header_line,
3378 Some(("License: Apache-2.0".to_string(), 12))
3379 );
3380 }
3381
3382 #[test]
3383 fn test_parse_copyright_unstructured() {
3384 let content = "This package was debianized by John Doe.
3385
3386Upstream Authors:
3387 Jane Smith
3388
3389Copyright:
3390 2009 10gen
3391
3392License:
3393 SSPL
3394";
3395 let pkg = parse_copyright_file(content, Some("mongodb"));
3396 assert_eq!(pkg.name, Some("mongodb".to_string()));
3397 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3398 assert!(!pkg.parties.is_empty());
3399 }
3400
3401 #[test]
3402 fn test_parse_copyright_holders() {
3403 let text = "2012 Paul Moore <pmoore@redhat.com>
34042012 Ashley Lai <adlai@us.ibm.com>
3405Copyright (C) 2015-2018 Example Corp";
3406 let holders = parse_copyright_holders(text);
3407 assert!(holders.len() >= 3);
3408 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3409 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3410 }
3411
3412 #[test]
3413 fn test_parse_copyright_empty() {
3414 let content = "This is just some text without proper copyright info.";
3415 let pkg = parse_copyright_file(content, Some("test"));
3416 assert_eq!(pkg.name, Some("test".to_string()));
3417 assert!(pkg.parties.is_empty());
3418 assert!(pkg.extracted_license_statement.is_none());
3419 }
3420
3421 #[test]
3422 fn test_deb_parser_is_match() {
3423 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3424 assert!(DebianDebParser::is_match(&PathBuf::from(
3425 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3426 )));
3427 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3428 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3429 }
3430
3431 #[test]
3432 fn test_parse_deb_filename_with_arch() {
3433 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3434 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3435 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3436 assert_eq!(pkg.namespace, Some("debian".to_string()));
3437 assert_eq!(
3438 pkg.purl,
3439 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3440 );
3441 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3442 }
3443
3444 #[test]
3445 fn test_parse_deb_filename_without_arch() {
3446 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3447 assert_eq!(pkg.name, Some("package".to_string()));
3448 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3449 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3450 }
3451
3452 #[test]
3453 fn test_extract_deb_archive() {
3454 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3455 if !test_path.exists() {
3456 return;
3457 }
3458
3459 let pkg = DebianDebParser::extract_first_package(&test_path);
3460
3461 assert_eq!(pkg.name, Some("adduser".to_string()));
3462 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3463 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3464 assert!(pkg.description.is_some());
3465 assert!(!pkg.parties.is_empty());
3466
3467 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3468 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3469 }
3470
3471 #[test]
3472 fn test_extract_deb_archive_with_control_tar_xz() {
3473 let deb = create_synthetic_deb_with_control_tar_xz();
3474
3475 let pkg = DebianDebParser::extract_first_package(deb.path());
3476
3477 assert_eq!(pkg.name, Some("synthetic".to_string()));
3478 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3479 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3480 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3481 }
3482
3483 #[test]
3484 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3485 let deb = create_synthetic_deb_with_copyright();
3486
3487 let pkg = DebianDebParser::extract_first_package(deb.path());
3488
3489 assert_eq!(pkg.name, Some("synthetic".to_string()));
3490 assert_eq!(
3491 pkg.extracted_license_statement,
3492 Some("Apache-2.0".to_string())
3493 );
3494 assert!(pkg.parties.iter().any(|party| {
3495 party.role.as_deref() == Some("copyright-holder")
3496 && party.name.as_deref() == Some("Example Org")
3497 }));
3498 }
3499
3500 #[test]
3501 fn test_parse_deb_filename_simple() {
3502 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3503 assert_eq!(pkg.name, Some("adduser".to_string()));
3504 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3505 assert_eq!(pkg.namespace, Some("debian".to_string()));
3506 }
3507
3508 #[test]
3509 fn test_parse_deb_filename_invalid() {
3510 let pkg = parse_deb_filename("invalid.deb");
3511 assert!(pkg.name.is_none());
3512 assert!(pkg.version.is_none());
3513 }
3514
3515 #[test]
3516 fn test_distroless_parser() {
3517 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3518
3519 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3520
3521 if !test_file.exists() {
3522 eprintln!("Warning: Test file not found, skipping test");
3523 return;
3524 }
3525
3526 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3527
3528 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3529 assert_eq!(
3530 pkg.datasource_id,
3531 Some(DatasourceId::DebianDistrolessInstalledDb)
3532 );
3533 assert_eq!(pkg.name, Some("base-files".to_string()));
3534 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3535 assert_eq!(pkg.namespace, Some("debian".to_string()));
3536 assert!(pkg.purl.is_some());
3537 assert!(
3538 pkg.purl
3539 .as_ref()
3540 .unwrap()
3541 .contains("pkg:deb/debian/base-files")
3542 );
3543 }
3544}