1use std::collections::HashMap;
34use std::path::Path;
35
36use log::warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41 DatasourceId, Dependency, FileReference, LicenseDetection, Match, PackageData, PackageType,
42 Party,
43};
44use crate::parsers::rfc822::{self, Rfc822Metadata};
45use crate::parsers::utils::{read_file_to_string, split_name_email};
46use crate::utils::spdx::combine_license_expressions;
47
48use super::PackageParser;
49
50const PACKAGE_TYPE: PackageType = PackageType::Deb;
51
52fn default_package_data(datasource_id: DatasourceId) -> PackageData {
53 PackageData {
54 package_type: Some(PACKAGE_TYPE),
55 datasource_id: Some(datasource_id),
56 ..Default::default()
57 }
58}
59
60const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
62const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
63
64const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
66 "packages.debian.org",
67 "lists.debian.org",
68 "lists.alioth.debian.org",
69 "@debian.org",
70 "debian-init-diversity@",
71];
72const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
73
74struct DepFieldSpec {
76 field: &'static str,
77 scope: &'static str,
78 is_runtime: bool,
79 is_optional: bool,
80}
81
82const DEP_FIELDS: &[DepFieldSpec] = &[
83 DepFieldSpec {
84 field: "depends",
85 scope: "depends",
86 is_runtime: true,
87 is_optional: false,
88 },
89 DepFieldSpec {
90 field: "pre-depends",
91 scope: "pre-depends",
92 is_runtime: true,
93 is_optional: false,
94 },
95 DepFieldSpec {
96 field: "recommends",
97 scope: "recommends",
98 is_runtime: true,
99 is_optional: true,
100 },
101 DepFieldSpec {
102 field: "suggests",
103 scope: "suggests",
104 is_runtime: true,
105 is_optional: true,
106 },
107 DepFieldSpec {
108 field: "breaks",
109 scope: "breaks",
110 is_runtime: false,
111 is_optional: false,
112 },
113 DepFieldSpec {
114 field: "conflicts",
115 scope: "conflicts",
116 is_runtime: false,
117 is_optional: false,
118 },
119 DepFieldSpec {
120 field: "replaces",
121 scope: "replaces",
122 is_runtime: false,
123 is_optional: false,
124 },
125 DepFieldSpec {
126 field: "provides",
127 scope: "provides",
128 is_runtime: false,
129 is_optional: false,
130 },
131 DepFieldSpec {
132 field: "build-depends",
133 scope: "build-depends",
134 is_runtime: false,
135 is_optional: false,
136 },
137 DepFieldSpec {
138 field: "build-depends-indep",
139 scope: "build-depends-indep",
140 is_runtime: false,
141 is_optional: false,
142 },
143 DepFieldSpec {
144 field: "build-conflicts",
145 scope: "build-conflicts",
146 is_runtime: false,
147 is_optional: false,
148 },
149];
150
151pub struct DebianControlParser;
156
157impl PackageParser for DebianControlParser {
158 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
159
160 fn is_match(path: &Path) -> bool {
161 if let Some(name) = path.file_name()
162 && name == "control"
163 && let Some(parent) = path.parent()
164 && let Some(parent_name) = parent.file_name()
165 {
166 return parent_name == "debian";
167 }
168 false
169 }
170
171 fn extract_packages(path: &Path) -> Vec<PackageData> {
172 let content = match read_file_to_string(path) {
173 Ok(c) => c,
174 Err(e) => {
175 warn!("Failed to read debian/control at {:?}: {}", path, e);
176 return Vec::new();
177 }
178 };
179
180 parse_debian_control(&content)
181 }
182}
183
184pub struct DebianInstalledParser;
189
190impl PackageParser for DebianInstalledParser {
191 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
192
193 fn is_match(path: &Path) -> bool {
194 let path_str = path.to_string_lossy();
195 path_str.ends_with("var/lib/dpkg/status")
196 }
197
198 fn extract_packages(path: &Path) -> Vec<PackageData> {
199 let content = match read_file_to_string(path) {
200 Ok(c) => c,
201 Err(e) => {
202 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
203 return Vec::new();
204 }
205 };
206
207 parse_dpkg_status(&content)
208 }
209}
210
211pub struct DebianDistrolessInstalledParser;
212
213impl PackageParser for DebianDistrolessInstalledParser {
214 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216 fn is_match(path: &Path) -> bool {
217 let path_str = path.to_string_lossy();
218 path_str.contains("var/lib/dpkg/status.d/")
219 }
220
221 fn extract_packages(path: &Path) -> Vec<PackageData> {
222 let content = match read_file_to_string(path) {
223 Ok(c) => c,
224 Err(e) => {
225 warn!("Failed to read distroless status file at {:?}: {}", path, e);
226 return vec![default_package_data(
227 DatasourceId::DebianDistrolessInstalledDb,
228 )];
229 }
230 };
231
232 vec![parse_distroless_status(&content)]
233 }
234}
235
236fn parse_distroless_status(content: &str) -> PackageData {
237 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
238
239 if paragraphs.is_empty() {
240 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
241 }
242
243 build_package_from_paragraph(
244 ¶graphs[0],
245 None,
246 DatasourceId::DebianDistrolessInstalledDb,
247 )
248 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
249}
250
251fn parse_debian_control(content: &str) -> Vec<PackageData> {
261 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
262 if paragraphs.is_empty() {
263 return Vec::new();
264 }
265
266 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
268
269 let (source_paragraph, binary_start) = if has_source {
270 (Some(¶graphs[0]), 1)
271 } else {
272 (None, 0)
273 };
274
275 let source_meta = source_paragraph.map(extract_source_meta);
277
278 let mut packages = Vec::new();
279
280 for para in ¶graphs[binary_start..] {
281 if let Some(pkg) = build_package_from_paragraph(
282 para,
283 source_meta.as_ref(),
284 DatasourceId::DebianControlInSource,
285 ) {
286 packages.push(pkg);
287 }
288 }
289
290 if packages.is_empty()
291 && let Some(source_para) = source_paragraph
292 && let Some(pkg) = build_package_from_source_paragraph(source_para)
293 {
294 packages.push(pkg);
295 }
296
297 packages
298}
299
300fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
305 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
306 let mut packages = Vec::new();
307
308 for para in ¶graphs {
309 let status = rfc822::get_header_first(¶.headers, "status");
310 if status.as_deref() != Some("install ok installed") {
311 continue;
312 }
313
314 if let Some(pkg) =
315 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
316 {
317 packages.push(pkg);
318 }
319 }
320
321 packages
322}
323
324struct SourceMeta {
329 parties: Vec<Party>,
330 homepage_url: Option<String>,
331 vcs_url: Option<String>,
332 code_view_url: Option<String>,
333 bug_tracking_url: Option<String>,
334}
335
336fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
337 let mut parties = Vec::new();
338
339 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
341 let (name, email) = split_name_email(&maintainer);
342 parties.push(Party {
343 r#type: Some("person".to_string()),
344 role: Some("maintainer".to_string()),
345 name,
346 email,
347 url: None,
348 organization: None,
349 organization_url: None,
350 timezone: None,
351 });
352 }
353
354 if let Some(orig_maintainer) =
356 rfc822::get_header_first(¶graph.headers, "original-maintainer")
357 {
358 let (name, email) = split_name_email(&orig_maintainer);
359 parties.push(Party {
360 r#type: Some("person".to_string()),
361 role: Some("maintainer".to_string()),
362 name,
363 email,
364 url: None,
365 organization: None,
366 organization_url: None,
367 timezone: None,
368 });
369 }
370
371 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
373 for uploader in uploaders_str.split(',') {
374 let trimmed = uploader.trim();
375 if !trimmed.is_empty() {
376 let (name, email) = split_name_email(trimmed);
377 parties.push(Party {
378 r#type: Some("person".to_string()),
379 role: Some("uploader".to_string()),
380 name,
381 email,
382 url: None,
383 organization: None,
384 organization_url: None,
385 timezone: None,
386 });
387 }
388 }
389 }
390
391 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage");
392
393 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
395 .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
396
397 let code_view_url = rfc822::get_header_first(¶graph.headers, "vcs-browser");
398
399 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs");
400
401 SourceMeta {
402 parties,
403 homepage_url,
404 vcs_url,
405 code_view_url,
406 bug_tracking_url,
407 }
408}
409
410fn build_package_from_paragraph(
415 paragraph: &Rfc822Metadata,
416 source_meta: Option<&SourceMeta>,
417 datasource_id: DatasourceId,
418) -> Option<PackageData> {
419 let name = rfc822::get_header_first(¶graph.headers, "package")?;
420 let version = rfc822::get_header_first(¶graph.headers, "version");
421 let architecture = rfc822::get_header_first(¶graph.headers, "architecture");
422 let description = rfc822::get_header_first(¶graph.headers, "description");
423 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
424 let homepage = rfc822::get_header_first(¶graph.headers, "homepage");
425 let source_field = rfc822::get_header_first(¶graph.headers, "source");
426 let section = rfc822::get_header_first(¶graph.headers, "section");
427 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
428 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
429
430 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
431
432 let parties = if let Some(meta) = source_meta {
434 meta.parties.clone()
435 } else {
436 let mut p = Vec::new();
437 if let Some(m) = &maintainer_str {
438 let (n, e) = split_name_email(m);
439 p.push(Party {
440 r#type: Some("person".to_string()),
441 role: Some("maintainer".to_string()),
442 name: n,
443 email: e,
444 url: None,
445 organization: None,
446 organization_url: None,
447 timezone: None,
448 });
449 }
450 p
451 };
452
453 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
455 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
456 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
457 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
458
459 let purl = build_debian_purl(
461 &name,
462 version.as_deref(),
463 namespace.as_deref(),
464 architecture.as_deref(),
465 );
466
467 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
469
470 let keywords = section.into_iter().collect();
472
473 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
475
476 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
478 if let Some(ma) = &multi_arch
479 && !ma.is_empty()
480 {
481 extra_data.insert(
482 "multi_arch".to_string(),
483 serde_json::Value::String(ma.clone()),
484 );
485 }
486 if let Some(size_str) = &installed_size
487 && let Ok(size) = size_str.parse::<u64>()
488 {
489 extra_data.insert(
490 "installed_size".to_string(),
491 serde_json::Value::Number(serde_json::Number::from(size)),
492 );
493 }
494
495 let qualifiers = architecture.as_ref().map(|arch| {
497 let mut q = HashMap::new();
498 q.insert("arch".to_string(), arch.clone());
499 q
500 });
501
502 Some(PackageData {
503 package_type: Some(PACKAGE_TYPE),
504 namespace: namespace.clone(),
505 name: Some(name),
506 version,
507 qualifiers,
508 subpath: None,
509 primary_language: None,
510 description,
511 release_date: None,
512 parties,
513 keywords,
514 homepage_url,
515 download_url: None,
516 size: None,
517 sha1: None,
518 md5: None,
519 sha256: None,
520 sha512: None,
521 bug_tracking_url,
522 code_view_url,
523 vcs_url,
524 copyright: None,
525 holder: None,
526 declared_license_expression: None,
527 declared_license_expression_spdx: None,
528 license_detections: Vec::new(),
529 other_license_expression: None,
530 other_license_expression_spdx: None,
531 other_license_detections: Vec::new(),
532 extracted_license_statement: None,
533 notice_text: None,
534 source_packages,
535 file_references: Vec::new(),
536 is_private: false,
537 is_virtual: false,
538 extra_data: if extra_data.is_empty() {
539 None
540 } else {
541 Some(extra_data)
542 },
543 dependencies,
544 repository_homepage_url: None,
545 repository_download_url: None,
546 api_data_url: None,
547 datasource_id: Some(datasource_id),
548 purl,
549 })
550}
551
552fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
553 let name = rfc822::get_header_first(¶graph.headers, "source")?;
554 let version = rfc822::get_header_first(¶graph.headers, "version");
555 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
556
557 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
558 let source_meta = extract_source_meta(paragraph);
559
560 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
561 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
562
563 let section = rfc822::get_header_first(¶graph.headers, "section");
564 let keywords = section.into_iter().collect();
565
566 Some(PackageData {
567 package_type: Some(PACKAGE_TYPE),
568 namespace: namespace.clone(),
569 name: Some(name),
570 version,
571 qualifiers: None,
572 subpath: None,
573 primary_language: None,
574 description: None,
575 release_date: None,
576 parties: source_meta.parties,
577 keywords,
578 homepage_url: source_meta.homepage_url,
579 download_url: None,
580 size: None,
581 sha1: None,
582 md5: None,
583 sha256: None,
584 sha512: None,
585 bug_tracking_url: source_meta.bug_tracking_url,
586 code_view_url: source_meta.code_view_url,
587 vcs_url: source_meta.vcs_url,
588 copyright: None,
589 holder: None,
590 declared_license_expression: None,
591 declared_license_expression_spdx: None,
592 license_detections: Vec::new(),
593 other_license_expression: None,
594 other_license_expression_spdx: None,
595 other_license_detections: Vec::new(),
596 extracted_license_statement: None,
597 notice_text: None,
598 source_packages: Vec::new(),
599 file_references: Vec::new(),
600 is_private: false,
601 is_virtual: false,
602 extra_data: None,
603 dependencies,
604 repository_homepage_url: None,
605 repository_download_url: None,
606 api_data_url: None,
607 datasource_id: Some(DatasourceId::DebianControlInSource),
608 purl,
609 })
610}
611
612fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
617 if let Some(ver) = version {
619 let ver_lower = ver.to_lowercase();
620 for clue in VERSION_CLUES_UBUNTU {
621 if ver_lower.contains(clue) {
622 return Some("ubuntu".to_string());
623 }
624 }
625 for clue in VERSION_CLUES_DEBIAN {
626 if ver_lower.contains(clue) {
627 return Some("debian".to_string());
628 }
629 }
630 }
631
632 if let Some(maint) = maintainer {
634 let maint_lower = maint.to_lowercase();
635 for clue in MAINTAINER_CLUES_UBUNTU {
636 if maint_lower.contains(clue) {
637 return Some("ubuntu".to_string());
638 }
639 }
640 for clue in MAINTAINER_CLUES_DEBIAN {
641 if maint_lower.contains(clue) {
642 return Some("debian".to_string());
643 }
644 }
645 }
646
647 Some("debian".to_string())
649}
650
651fn build_debian_purl(
656 name: &str,
657 version: Option<&str>,
658 namespace: Option<&str>,
659 architecture: Option<&str>,
660) -> Option<String> {
661 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
662
663 if let Some(ns) = namespace {
664 purl.with_namespace(ns).ok()?;
665 }
666
667 if let Some(ver) = version {
668 purl.with_version(ver).ok()?;
669 }
670
671 if let Some(arch) = architecture {
672 purl.add_qualifier("arch", arch).ok()?;
673 }
674
675 Some(purl.to_string())
676}
677
678fn parse_all_dependencies(
683 headers: &HashMap<String, Vec<String>>,
684 namespace: Option<&str>,
685) -> Vec<Dependency> {
686 let mut dependencies = Vec::new();
687
688 for spec in DEP_FIELDS {
689 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
690 dependencies.extend(parse_dependency_field(
691 &dep_str,
692 spec.scope,
693 spec.is_runtime,
694 spec.is_optional,
695 namespace,
696 ));
697 }
698 }
699
700 dependencies
701}
702
703fn parse_dependency_field(
712 dep_str: &str,
713 scope: &str,
714 is_runtime: bool,
715 is_optional: bool,
716 namespace: Option<&str>,
717) -> Vec<Dependency> {
718 let mut deps = Vec::new();
719
720 let dep_re = Regex::new(
723 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
724 )
725 .unwrap();
726
727 for group in dep_str.split(',') {
728 let group = group.trim();
729 if group.is_empty() {
730 continue;
731 }
732
733 let alternatives: Vec<&str> = group.split('|').collect();
735 let has_alternatives = alternatives.len() > 1;
736
737 for alt in alternatives {
738 let alt = alt.trim();
739 if alt.is_empty() {
740 continue;
741 }
742
743 if let Some(caps) = dep_re.captures(alt) {
744 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
745 let operator = caps.get(2).map(|m| m.as_str().trim());
746 let version = caps.get(3).map(|m| m.as_str().trim());
747
748 if pkg_name.is_empty() {
749 continue;
750 }
751
752 if pkg_name.starts_with('$') {
754 continue;
755 }
756
757 let extracted_requirement = match (operator, version) {
758 (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
759 _ => None,
760 };
761
762 let is_pinned = operator.map(|op| op == "=");
763
764 let purl = build_debian_purl(pkg_name, None, namespace, None);
765
766 deps.push(Dependency {
767 purl,
768 extracted_requirement,
769 scope: Some(scope.to_string()),
770 is_runtime: Some(is_runtime),
771 is_optional: Some(is_optional || has_alternatives),
772 is_pinned,
773 is_direct: Some(true),
774 resolved_package: None,
775 extra_data: None,
776 });
777 }
778 }
779 }
780
781 deps
782}
783
784fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
792 let Some(source_str) = source else {
793 return Vec::new();
794 };
795
796 let trimmed = source_str.trim();
797 if trimmed.is_empty() {
798 return Vec::new();
799 }
800
801 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
803 let name = trimmed[..paren_start].trim();
804 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
805 (
806 name,
807 if version.is_empty() {
808 None
809 } else {
810 Some(version)
811 },
812 )
813 } else {
814 (trimmed, None)
815 };
816
817 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
818 vec![purl]
819 } else {
820 Vec::new()
821 }
822}
823
824crate::register_parser!(
829 "Debian source package control file (debian/control)",
830 &["**/debian/control"],
831 "deb",
832 "",
833 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
834);
835
836pub struct DebianDscParser;
845
846impl PackageParser for DebianDscParser {
847 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
848
849 fn is_match(path: &Path) -> bool {
850 path.extension().and_then(|e| e.to_str()) == Some("dsc")
851 }
852
853 fn extract_packages(path: &Path) -> Vec<PackageData> {
854 let content = match read_file_to_string(path) {
855 Ok(c) => c,
856 Err(e) => {
857 warn!("Failed to read .dsc file {:?}: {}", path, e);
858 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
859 }
860 };
861
862 vec![parse_dsc_content(&content)]
863 }
864}
865
866fn strip_pgp_signature(content: &str) -> String {
867 let mut result = String::new();
868 let mut in_pgp_block = false;
869 let mut in_signature = false;
870
871 for line in content.lines() {
872 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
873 in_pgp_block = true;
874 continue;
875 }
876 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
877 in_signature = true;
878 continue;
879 }
880 if line.starts_with("-----END PGP SIGNATURE-----") {
881 in_signature = false;
882 continue;
883 }
884 if in_pgp_block && line.starts_with("Hash:") {
885 continue;
886 }
887 if in_pgp_block && line.is_empty() && result.is_empty() {
888 in_pgp_block = false;
889 continue;
890 }
891 if !in_signature {
892 result.push_str(line);
893 result.push('\n');
894 }
895 }
896
897 result
898}
899
900fn parse_dsc_content(content: &str) -> PackageData {
901 let clean_content = strip_pgp_signature(content);
902 let metadata = rfc822::parse_rfc822_content(&clean_content);
903 let headers = &metadata.headers;
904
905 let name = rfc822::get_header_first(headers, "source");
906 let version = rfc822::get_header_first(headers, "version");
907 let architecture = rfc822::get_header_first(headers, "architecture");
908 let namespace = Some("debian".to_string());
909
910 let mut package = PackageData {
911 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
912 package_type: Some(PACKAGE_TYPE),
913 namespace: namespace.clone(),
914 name: name.clone(),
915 version: version.clone(),
916 description: rfc822::get_header_first(headers, "description"),
917 homepage_url: rfc822::get_header_first(headers, "homepage"),
918 vcs_url: rfc822::get_header_first(headers, "vcs-git"),
919 code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
920 ..Default::default()
921 };
922
923 if let (Some(n), Some(v)) = (&name, &version) {
925 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
926 }
927
928 if let Some(n) = &name
930 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
931 {
932 package.source_packages.push(source_purl);
933 }
934
935 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
936 let (name_opt, email_opt) = split_name_email(&maintainer);
937 package.parties.push(Party {
938 r#type: None,
939 role: Some("maintainer".to_string()),
940 name: name_opt,
941 email: email_opt,
942 url: None,
943 organization: None,
944 organization_url: None,
945 timezone: None,
946 });
947 }
948
949 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
950 for uploader in uploaders_str.split(',') {
951 let uploader = uploader.trim();
952 if uploader.is_empty() {
953 continue;
954 }
955 let (name_opt, email_opt) = split_name_email(uploader);
956 package.parties.push(Party {
957 r#type: None,
958 role: Some("uploader".to_string()),
959 name: name_opt,
960 email: email_opt,
961 url: None,
962 organization: None,
963 organization_url: None,
964 timezone: None,
965 });
966 }
967 }
968
969 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
971 package.dependencies.extend(parse_dependency_field(
972 &build_deps,
973 "build",
974 false,
975 false,
976 namespace.as_deref(),
977 ));
978 }
979
980 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
982 let map = package.extra_data.get_or_insert_with(HashMap::new);
983 map.insert("standards_version".to_string(), standards.into());
984 }
985
986 package
987}
988
989pub struct DebianOrigTarParser;
991
992impl PackageParser for DebianOrigTarParser {
993 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
994
995 fn is_match(path: &Path) -> bool {
996 path.file_name()
997 .and_then(|n| n.to_str())
998 .map(|name| name.contains(".orig.tar."))
999 .unwrap_or(false)
1000 }
1001
1002 fn extract_packages(path: &Path) -> Vec<PackageData> {
1003 let filename = match path.file_name().and_then(|n| n.to_str()) {
1004 Some(f) => f,
1005 None => {
1006 return vec![default_package_data(
1007 DatasourceId::DebianOriginalSourceTarball,
1008 )];
1009 }
1010 };
1011
1012 vec![parse_source_tarball_filename(
1013 filename,
1014 DatasourceId::DebianOriginalSourceTarball,
1015 )]
1016 }
1017}
1018
1019pub struct DebianDebianTarParser;
1021
1022impl PackageParser for DebianDebianTarParser {
1023 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1024
1025 fn is_match(path: &Path) -> bool {
1026 path.file_name()
1027 .and_then(|n| n.to_str())
1028 .map(|name| name.contains(".debian.tar."))
1029 .unwrap_or(false)
1030 }
1031
1032 fn extract_packages(path: &Path) -> Vec<PackageData> {
1033 let filename = match path.file_name().and_then(|n| n.to_str()) {
1034 Some(f) => f,
1035 None => {
1036 return vec![default_package_data(
1037 DatasourceId::DebianSourceMetadataTarball,
1038 )];
1039 }
1040 };
1041
1042 vec![parse_source_tarball_filename(
1043 filename,
1044 DatasourceId::DebianSourceMetadataTarball,
1045 )]
1046 }
1047}
1048
1049fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1050 let without_tar_ext = filename
1051 .trim_end_matches(".gz")
1052 .trim_end_matches(".xz")
1053 .trim_end_matches(".bz2")
1054 .trim_end_matches(".tar");
1055
1056 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1057 if parts.len() < 2 {
1058 return default_package_data(datasource_id);
1059 }
1060
1061 let name = parts[0].to_string();
1062 let version_with_suffix = parts[1];
1063
1064 let version = version_with_suffix
1065 .trim_end_matches(".orig")
1066 .trim_end_matches(".debian")
1067 .to_string();
1068
1069 let namespace = Some("debian".to_string());
1070
1071 PackageData {
1072 datasource_id: Some(datasource_id),
1073 package_type: Some(PACKAGE_TYPE),
1074 namespace: namespace.clone(),
1075 name: Some(name.clone()),
1076 version: Some(version.clone()),
1077 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1078 ..Default::default()
1079 }
1080}
1081
1082pub struct DebianInstalledListParser;
1084
1085impl PackageParser for DebianInstalledListParser {
1086 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1087
1088 fn is_match(path: &Path) -> bool {
1089 path.extension().and_then(|e| e.to_str()) == Some("list")
1090 && path
1091 .to_str()
1092 .map(|p| p.contains("/var/lib/dpkg/info/"))
1093 .unwrap_or(false)
1094 }
1095
1096 fn extract_packages(path: &Path) -> Vec<PackageData> {
1097 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1098 Some(f) => f,
1099 None => {
1100 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1101 }
1102 };
1103
1104 let content = match read_file_to_string(path) {
1105 Ok(c) => c,
1106 Err(e) => {
1107 warn!("Failed to read .list file {:?}: {}", path, e);
1108 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1109 }
1110 };
1111
1112 vec![parse_debian_file_list(
1113 &content,
1114 filename,
1115 DatasourceId::DebianInstalledFilesList,
1116 )]
1117 }
1118}
1119
1120pub struct DebianInstalledMd5sumsParser;
1122
1123impl PackageParser for DebianInstalledMd5sumsParser {
1124 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1125
1126 fn is_match(path: &Path) -> bool {
1127 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1128 && path
1129 .to_str()
1130 .map(|p| p.contains("/var/lib/dpkg/info/"))
1131 .unwrap_or(false)
1132 }
1133
1134 fn extract_packages(path: &Path) -> Vec<PackageData> {
1135 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1136 Some(f) => f,
1137 None => {
1138 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1139 }
1140 };
1141
1142 let content = match read_file_to_string(path) {
1143 Ok(c) => c,
1144 Err(e) => {
1145 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1146 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1147 }
1148 };
1149
1150 vec![parse_debian_file_list(
1151 &content,
1152 filename,
1153 DatasourceId::DebianInstalledMd5Sums,
1154 )]
1155 }
1156}
1157
1158const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1159
1160fn parse_debian_file_list(
1161 content: &str,
1162 filename: &str,
1163 datasource_id: DatasourceId,
1164) -> PackageData {
1165 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1166 (Some(pkg.to_string()), Some(arch.to_string()))
1167 } else if filename == "md5sums" {
1168 (None, None)
1169 } else {
1170 (Some(filename.to_string()), None)
1171 };
1172
1173 let mut file_references = Vec::new();
1174
1175 for line in content.lines() {
1176 let line = line.trim();
1177 if line.is_empty() || line.starts_with('#') {
1178 continue;
1179 }
1180
1181 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1182 (Some(hash.trim().to_string()), p.trim())
1183 } else {
1184 (None, line)
1185 };
1186
1187 if IGNORED_ROOT_DIRS.contains(&path) {
1188 continue;
1189 }
1190
1191 file_references.push(FileReference {
1192 path: path.to_string(),
1193 size: None,
1194 sha1: None,
1195 md5: md5sum,
1196 sha256: None,
1197 sha512: None,
1198 extra_data: None,
1199 });
1200 }
1201
1202 if file_references.is_empty() {
1203 return default_package_data(datasource_id);
1204 }
1205
1206 let namespace = Some("debian".to_string());
1207 let mut package = PackageData {
1208 datasource_id: Some(datasource_id),
1209 package_type: Some(PACKAGE_TYPE),
1210 namespace: namespace.clone(),
1211 name: name.clone(),
1212 file_references,
1213 ..Default::default()
1214 };
1215
1216 if let Some(n) = &name {
1217 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1218 }
1219
1220 package
1221}
1222
1223pub struct DebianCopyrightParser;
1225
1226impl PackageParser for DebianCopyrightParser {
1227 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1228
1229 fn is_match(path: &Path) -> bool {
1230 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1231 if filename != "copyright" {
1232 return false;
1233 }
1234 let path_str = path.to_string_lossy();
1235 path_str.contains("/debian/")
1236 || path_str.contains("/usr/share/doc/")
1237 || path_str.ends_with("debian/copyright")
1238 } else {
1239 false
1240 }
1241 }
1242
1243 fn extract_packages(path: &Path) -> Vec<PackageData> {
1244 let content = match read_file_to_string(path) {
1245 Ok(c) => c,
1246 Err(e) => {
1247 warn!("Failed to read copyright file {:?}: {}", path, e);
1248 return vec![default_package_data(DatasourceId::DebianCopyright)];
1249 }
1250 };
1251
1252 let package_name = extract_package_name_from_path(path);
1253 vec![parse_copyright_file(&content, package_name.as_deref())]
1254 }
1255}
1256
1257fn extract_package_name_from_path(path: &Path) -> Option<String> {
1258 let components: Vec<_> = path.components().collect();
1259
1260 for (i, component) in components.iter().enumerate() {
1261 if let std::path::Component::Normal(os_str) = component
1262 && os_str.to_str() == Some("doc")
1263 && i + 1 < components.len()
1264 && let std::path::Component::Normal(next) = components[i + 1]
1265 {
1266 return next.to_str().map(|s| s.to_string());
1267 }
1268 }
1269 None
1270}
1271
1272fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1273 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1274
1275 let is_dep5 = paragraphs
1276 .first()
1277 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1278 .is_some();
1279
1280 let namespace = Some("debian".to_string());
1281 let mut parties = Vec::new();
1282 let mut license_statements = Vec::new();
1283 let mut primary_license_detection = None;
1284 let mut header_license_detection = None;
1285 let mut other_license_detections = Vec::new();
1286
1287 if is_dep5 {
1288 for para in ¶graphs {
1289 if let Some(copyright_text) =
1290 rfc822::get_header_first(¶.metadata.headers, "copyright")
1291 {
1292 for holder in parse_copyright_holders(©right_text) {
1293 if !holder.is_empty() {
1294 parties.push(Party {
1295 r#type: None,
1296 role: Some("copyright-holder".to_string()),
1297 name: Some(holder),
1298 email: None,
1299 url: None,
1300 organization: None,
1301 organization_url: None,
1302 timezone: None,
1303 });
1304 }
1305 }
1306 }
1307
1308 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1309 let license_name = license.lines().next().unwrap_or(&license).trim();
1310 if !license_name.is_empty()
1311 && !license_statements.contains(&license_name.to_string())
1312 {
1313 license_statements.push(license_name.to_string());
1314 }
1315
1316 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1317 let detection =
1318 build_primary_license_detection(license_name, matched_text, line_no);
1319 let is_header_paragraph =
1320 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1321 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1322 == Some("*")
1323 {
1324 primary_license_detection = Some(detection);
1325 } else if is_header_paragraph {
1326 header_license_detection.get_or_insert(detection);
1327 } else {
1328 other_license_detections.push(detection);
1329 }
1330 }
1331 }
1332 }
1333
1334 if primary_license_detection.is_none() && header_license_detection.is_some() {
1335 primary_license_detection = header_license_detection;
1336 }
1337 } else {
1338 let copyright_block = extract_unstructured_field(content, "Copyright:");
1339 if let Some(text) = copyright_block {
1340 for holder in parse_copyright_holders(&text) {
1341 if !holder.is_empty() {
1342 parties.push(Party {
1343 r#type: None,
1344 role: Some("copyright-holder".to_string()),
1345 name: Some(holder),
1346 email: None,
1347 url: None,
1348 organization: None,
1349 organization_url: None,
1350 timezone: None,
1351 });
1352 }
1353 }
1354 }
1355
1356 let license_block = extract_unstructured_field(content, "License:");
1357 if let Some(text) = license_block {
1358 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1359 }
1360 }
1361
1362 let extracted_license_statement = if license_statements.is_empty() {
1363 None
1364 } else {
1365 Some(license_statements.join(" AND "))
1366 };
1367
1368 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1369 let declared_license_expression = license_detections
1370 .first()
1371 .map(|detection| detection.license_expression.clone());
1372 let declared_license_expression_spdx = license_detections
1373 .first()
1374 .map(|detection| detection.license_expression_spdx.clone());
1375 let other_license_expression = combine_license_expressions(
1376 other_license_detections
1377 .iter()
1378 .map(|detection| detection.license_expression.clone()),
1379 );
1380 let other_license_expression_spdx = combine_license_expressions(
1381 other_license_detections
1382 .iter()
1383 .map(|detection| detection.license_expression_spdx.clone()),
1384 );
1385
1386 PackageData {
1387 datasource_id: Some(DatasourceId::DebianCopyright),
1388 package_type: Some(PACKAGE_TYPE),
1389 namespace: namespace.clone(),
1390 name: package_name.map(|s| s.to_string()),
1391 parties,
1392 declared_license_expression,
1393 declared_license_expression_spdx,
1394 license_detections,
1395 other_license_expression,
1396 other_license_expression_spdx,
1397 other_license_detections,
1398 extracted_license_statement,
1399 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1400 ..Default::default()
1401 }
1402}
1403
1404#[derive(Debug)]
1405struct CopyrightParagraph {
1406 metadata: Rfc822Metadata,
1407 license_header_line: Option<(String, usize)>,
1408}
1409
1410fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1411 let mut paragraphs = Vec::new();
1412 let mut current_lines = Vec::new();
1413 let mut current_start_line = 1usize;
1414
1415 for (idx, line) in content.lines().enumerate() {
1416 let line_no = idx + 1;
1417 if line.is_empty() {
1418 if !current_lines.is_empty() {
1419 paragraphs.push(finalize_copyright_paragraph(
1420 std::mem::take(&mut current_lines),
1421 current_start_line,
1422 ));
1423 }
1424 current_start_line = line_no + 1;
1425 } else {
1426 if current_lines.is_empty() {
1427 current_start_line = line_no;
1428 }
1429 current_lines.push(line.to_string());
1430 }
1431 }
1432
1433 if !current_lines.is_empty() {
1434 paragraphs.push(finalize_copyright_paragraph(
1435 current_lines,
1436 current_start_line,
1437 ));
1438 }
1439
1440 paragraphs
1441}
1442
1443fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1444 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1445 let mut current_name: Option<String> = None;
1446 let mut current_value = String::new();
1447 let mut license_header_line = None;
1448
1449 for (idx, line) in raw_lines.iter().enumerate() {
1450 if line.starts_with(' ') || line.starts_with('\t') {
1451 if current_name.is_some() {
1452 current_value.push('\n');
1453 current_value.push_str(line);
1454 }
1455 continue;
1456 }
1457
1458 if let Some(name) = current_name.take() {
1459 add_copyright_header_value(&mut headers, &name, ¤t_value);
1460 current_value.clear();
1461 }
1462
1463 if let Some((name, value)) = line.split_once(':') {
1464 let normalized_name = name.trim().to_ascii_lowercase();
1465 if normalized_name == "license" && license_header_line.is_none() {
1466 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1467 }
1468 current_name = Some(normalized_name);
1469 current_value = value.trim_start().to_string();
1470 }
1471 }
1472
1473 if let Some(name) = current_name.take() {
1474 add_copyright_header_value(&mut headers, &name, ¤t_value);
1475 }
1476
1477 CopyrightParagraph {
1478 metadata: Rfc822Metadata {
1479 headers,
1480 body: String::new(),
1481 },
1482 license_header_line,
1483 }
1484}
1485
1486fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1487 let entry = headers.entry(name.to_string()).or_default();
1488 let trimmed = value.trim_end();
1489 if !trimmed.is_empty() {
1490 entry.push(trimmed.to_string());
1491 }
1492}
1493
1494fn build_primary_license_detection(
1495 license_name: &str,
1496 matched_text: String,
1497 line_no: usize,
1498) -> LicenseDetection {
1499 let (license_expression, license_expression_spdx) = normalize_debian_license_name(license_name);
1500
1501 LicenseDetection {
1502 license_expression: license_expression.clone(),
1503 license_expression_spdx: license_expression_spdx.clone(),
1504 matches: vec![Match {
1505 license_expression,
1506 license_expression_spdx,
1507 from_file: None,
1508 start_line: line_no,
1509 end_line: line_no,
1510 matcher: Some("1-spdx-id".to_string()),
1511 score: 100.0,
1512 matched_length: Some(license_name.split_whitespace().count()),
1513 match_coverage: Some(100.0),
1514 rule_relevance: Some(100),
1515 rule_identifier: None,
1516 rule_url: None,
1517 matched_text: Some(matched_text),
1518 }],
1519 identifier: None,
1520 }
1521}
1522
1523fn normalize_debian_license_name(license_name: &str) -> (String, String) {
1524 match license_name.trim() {
1525 "GPL-2+" => ("gpl-2.0-plus".to_string(), "GPL-2.0-or-later".to_string()),
1526 "GPL-2" => ("gpl-2.0".to_string(), "GPL-2.0-only".to_string()),
1527 "LGPL-2+" => ("lgpl-2.0-plus".to_string(), "LGPL-2.0-or-later".to_string()),
1528 "LGPL-2.1" => ("lgpl-2.1".to_string(), "LGPL-2.1-only".to_string()),
1529 "LGPL-2.1+" => ("lgpl-2.1-plus".to_string(), "LGPL-2.1-or-later".to_string()),
1530 "LGPL-3+" => ("lgpl-3.0-plus".to_string(), "LGPL-3.0-or-later".to_string()),
1531 "MIT" => ("mit".to_string(), "MIT".to_string()),
1532 "BSD-4-clause" => ("bsd-original-uc".to_string(), "BSD-4-Clause-UC".to_string()),
1533 "public-domain" => (
1534 "public-domain".to_string(),
1535 "LicenseRef-provenant-public-domain".to_string(),
1536 ),
1537 other => (other.to_ascii_lowercase(), other.to_string()),
1538 }
1539}
1540
1541fn parse_copyright_holders(text: &str) -> Vec<String> {
1542 let mut holders = Vec::new();
1543
1544 for line in text.lines() {
1545 let line = line.trim();
1546 if line.is_empty() {
1547 continue;
1548 }
1549
1550 let cleaned = line
1551 .trim_start_matches("Copyright")
1552 .trim_start_matches("copyright")
1553 .trim_start_matches("(C)")
1554 .trim_start_matches("(c)")
1555 .trim_start_matches("©")
1556 .trim();
1557
1558 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1559 let without_years = &cleaned[year_end..];
1560 let holder = without_years
1561 .trim_start_matches(',')
1562 .trim_start_matches('-')
1563 .trim();
1564
1565 if !holder.is_empty() && holder.len() > 2 {
1566 holders.push(holder.to_string());
1567 }
1568 }
1569 }
1570
1571 holders
1572}
1573
1574fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1575 let mut in_field = false;
1576 let mut field_content = String::new();
1577
1578 for line in content.lines() {
1579 if line.starts_with(field_name) {
1580 in_field = true;
1581 field_content.push_str(line.trim_start_matches(field_name).trim());
1582 field_content.push('\n');
1583 } else if in_field {
1584 if line.starts_with(char::is_whitespace) {
1585 field_content.push_str(line.trim());
1586 field_content.push('\n');
1587 } else if !line.trim().is_empty() {
1588 break;
1589 }
1590 }
1591 }
1592
1593 let trimmed = field_content.trim();
1594 if trimmed.is_empty() {
1595 None
1596 } else {
1597 Some(trimmed.to_string())
1598 }
1599}
1600
1601pub struct DebianDebParser;
1603
1604impl PackageParser for DebianDebParser {
1605 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1606
1607 fn is_match(path: &Path) -> bool {
1608 path.extension().and_then(|e| e.to_str()) == Some("deb")
1609 }
1610
1611 fn extract_packages(path: &Path) -> Vec<PackageData> {
1612 if let Ok(data) = extract_deb_archive(path) {
1614 return vec![data];
1615 }
1616
1617 let filename = match path.file_name().and_then(|n| n.to_str()) {
1619 Some(f) => f,
1620 None => {
1621 return vec![default_package_data(DatasourceId::DebianDeb)];
1622 }
1623 };
1624
1625 vec![parse_deb_filename(filename)]
1626 }
1627}
1628
1629fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1630 use flate2::read::GzDecoder;
1631 use liblzma::read::XzDecoder;
1632 use std::io::{Cursor, Read};
1633
1634 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1635
1636 let mut archive = ar::Archive::new(file);
1637 let mut package: Option<PackageData> = None;
1638
1639 while let Some(entry_result) = archive.next_entry() {
1640 let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1641
1642 let entry_name = std::str::from_utf8(entry.header().identifier())
1643 .map_err(|e| format!("Invalid entry name: {}", e))?;
1644 let entry_name = entry_name.trim().to_string();
1645
1646 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1647 let mut control_data = Vec::new();
1648 entry
1649 .read_to_end(&mut control_data)
1650 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1651
1652 if entry_name.ends_with(".gz") {
1653 let decoder = GzDecoder::new(Cursor::new(control_data));
1654 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1655 package = Some(parsed_package);
1656 }
1657 } else if entry_name.ends_with(".xz") {
1658 let decoder = XzDecoder::new(Cursor::new(control_data));
1659 if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1660 package = Some(parsed_package);
1661 }
1662 }
1663 } else if entry_name.starts_with("data.tar") {
1664 let mut data = Vec::new();
1665 entry
1666 .read_to_end(&mut data)
1667 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1668
1669 let Some(current_package) = package.as_mut() else {
1670 continue;
1671 };
1672
1673 if entry_name.ends_with(".gz") {
1674 let decoder = GzDecoder::new(Cursor::new(data));
1675 merge_deb_data_archive(decoder, current_package)?;
1676 } else if entry_name.ends_with(".xz") {
1677 let decoder = XzDecoder::new(Cursor::new(data));
1678 merge_deb_data_archive(decoder, current_package)?;
1679 }
1680 }
1681 }
1682
1683 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1684}
1685
1686fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1687 use std::io::Read;
1688
1689 let mut tar_archive = tar::Archive::new(reader);
1690
1691 for tar_entry_result in tar_archive
1692 .entries()
1693 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1694 {
1695 let mut tar_entry =
1696 tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1697
1698 let tar_path = tar_entry
1699 .path()
1700 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1701
1702 if tar_path.ends_with("control") {
1703 let mut control_content = String::new();
1704 tar_entry
1705 .read_to_string(&mut control_content)
1706 .map_err(|e| format!("Failed to read control file: {}", e))?;
1707
1708 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1709 if paragraphs.is_empty() {
1710 return Err("No paragraphs in control file".to_string());
1711 }
1712
1713 if let Some(package) =
1714 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
1715 {
1716 return Ok(Some(package));
1717 }
1718
1719 return Err("Failed to parse control file".to_string());
1720 }
1721 }
1722
1723 Ok(None)
1724}
1725
1726fn merge_deb_data_archive<R: std::io::Read>(
1727 reader: R,
1728 package: &mut PackageData,
1729) -> Result<(), String> {
1730 use std::io::Read;
1731
1732 let mut tar_archive = tar::Archive::new(reader);
1733
1734 for tar_entry_result in tar_archive
1735 .entries()
1736 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1737 {
1738 let mut tar_entry =
1739 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1740
1741 let tar_path = tar_entry
1742 .path()
1743 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1744 let tar_path_str = tar_path.to_string_lossy();
1745
1746 if tar_path_str.ends_with(&format!(
1747 "/usr/share/doc/{}/copyright",
1748 package.name.as_deref().unwrap_or_default()
1749 )) || tar_path_str.ends_with(&format!(
1750 "usr/share/doc/{}/copyright",
1751 package.name.as_deref().unwrap_or_default()
1752 )) {
1753 let mut copyright_content = String::new();
1754 tar_entry
1755 .read_to_string(&mut copyright_content)
1756 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1757
1758 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
1759 merge_debian_copyright_into_package(package, ©right_pkg);
1760 break;
1761 }
1762 }
1763
1764 Ok(())
1765}
1766
1767fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1768 if target.extracted_license_statement.is_none() {
1769 target.extracted_license_statement = copyright.extracted_license_statement.clone();
1770 }
1771
1772 for party in ©right.parties {
1773 if !target.parties.iter().any(|existing| {
1774 existing.r#type == party.r#type
1775 && existing.role == party.role
1776 && existing.name == party.name
1777 && existing.email == party.email
1778 && existing.url == party.url
1779 && existing.organization == party.organization
1780 && existing.organization_url == party.organization_url
1781 && existing.timezone == party.timezone
1782 }) {
1783 target.parties.push(party.clone());
1784 }
1785 }
1786}
1787
1788fn parse_deb_filename(filename: &str) -> PackageData {
1789 let without_ext = filename.trim_end_matches(".deb");
1790
1791 let parts: Vec<&str> = without_ext.split('_').collect();
1792 if parts.len() < 2 {
1793 return default_package_data(DatasourceId::DebianDeb);
1794 }
1795
1796 let name = parts[0].to_string();
1797 let version = parts[1].to_string();
1798 let architecture = if parts.len() >= 3 {
1799 Some(parts[2].to_string())
1800 } else {
1801 None
1802 };
1803
1804 let namespace = Some("debian".to_string());
1805
1806 PackageData {
1807 datasource_id: Some(DatasourceId::DebianDeb),
1808 package_type: Some(PACKAGE_TYPE),
1809 namespace: namespace.clone(),
1810 name: Some(name.clone()),
1811 version: Some(version.clone()),
1812 purl: build_debian_purl(
1813 &name,
1814 Some(&version),
1815 namespace.as_deref(),
1816 architecture.as_deref(),
1817 ),
1818 ..Default::default()
1819 }
1820}
1821
1822pub struct DebianControlInExtractedDebParser;
1828
1829impl PackageParser for DebianControlInExtractedDebParser {
1830 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1831
1832 fn is_match(path: &Path) -> bool {
1833 path.file_name()
1834 .and_then(|n| n.to_str())
1835 .is_some_and(|name| name == "control")
1836 && path
1837 .to_str()
1838 .map(|p| {
1839 p.ends_with("control.tar.gz-extract/control")
1840 || p.ends_with("control.tar.xz-extract/control")
1841 })
1842 .unwrap_or(false)
1843 }
1844
1845 fn extract_packages(path: &Path) -> Vec<PackageData> {
1846 let content = match read_file_to_string(path) {
1847 Ok(c) => c,
1848 Err(e) => {
1849 warn!(
1850 "Failed to read control file in extracted deb {:?}: {}",
1851 path, e
1852 );
1853 return vec![default_package_data(
1854 DatasourceId::DebianControlExtractedDeb,
1855 )];
1856 }
1857 };
1858
1859 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1862 if paragraphs.is_empty() {
1863 return vec![default_package_data(
1864 DatasourceId::DebianControlExtractedDeb,
1865 )];
1866 }
1867
1868 if let Some(pkg) = build_package_from_paragraph(
1869 ¶graphs[0],
1870 None,
1871 DatasourceId::DebianControlExtractedDeb,
1872 ) {
1873 vec![pkg]
1874 } else {
1875 vec![default_package_data(
1876 DatasourceId::DebianControlExtractedDeb,
1877 )]
1878 }
1879 }
1880}
1881
1882pub struct DebianMd5sumInPackageParser;
1884
1885impl PackageParser for DebianMd5sumInPackageParser {
1886 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1887
1888 fn is_match(path: &Path) -> bool {
1889 path.file_name()
1890 .and_then(|n| n.to_str())
1891 .is_some_and(|name| name == "md5sums")
1892 && path
1893 .to_str()
1894 .map(|p| {
1895 p.ends_with("control.tar.gz-extract/md5sums")
1896 || p.ends_with("control.tar.xz-extract/md5sums")
1897 })
1898 .unwrap_or(false)
1899 }
1900
1901 fn extract_packages(path: &Path) -> Vec<PackageData> {
1902 let content = match read_file_to_string(path) {
1903 Ok(c) => c,
1904 Err(e) => {
1905 warn!("Failed to read md5sums file {:?}: {}", path, e);
1906 return vec![default_package_data(
1907 DatasourceId::DebianMd5SumsInExtractedDeb,
1908 )];
1909 }
1910 };
1911
1912 let package_name = extract_package_name_from_deb_path(path);
1913
1914 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
1915 }
1916}
1917
1918pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
1919 let parent = path.parent()?;
1920 let grandparent = parent.parent()?;
1921 let dirname = grandparent.file_name()?.to_str()?;
1922 let without_extract = dirname.strip_suffix("-extract")?;
1923 let without_deb = without_extract.strip_suffix(".deb")?;
1924 let name = without_deb.split('_').next()?;
1925
1926 Some(name.to_string())
1927}
1928
1929fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
1930 let mut file_references = Vec::new();
1931
1932 for line in content.lines() {
1933 let line = line.trim();
1934 if line.is_empty() || line.starts_with('#') {
1935 continue;
1936 }
1937
1938 let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find(" ") {
1939 (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
1940 } else if let Some((hash, path)) = line.split_once(' ') {
1941 (Some(hash.trim().to_string()), path.trim())
1942 } else {
1943 (None, line)
1944 };
1945
1946 if IGNORED_ROOT_DIRS.contains(&filepath) {
1947 continue;
1948 }
1949
1950 file_references.push(FileReference {
1951 path: filepath.to_string(),
1952 size: None,
1953 sha1: None,
1954 md5: md5sum,
1955 sha256: None,
1956 sha512: None,
1957 extra_data: None,
1958 });
1959 }
1960
1961 if file_references.is_empty() {
1962 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
1963 }
1964
1965 let namespace = Some("debian".to_string());
1966 let mut package = PackageData {
1967 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
1968 package_type: Some(PACKAGE_TYPE),
1969 namespace: namespace.clone(),
1970 name: package_name.map(|s| s.to_string()),
1971 file_references,
1972 ..Default::default()
1973 };
1974
1975 if let Some(n) = &package.name {
1976 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
1977 }
1978
1979 package
1980}
1981
1982crate::register_parser!(
1983 "Debian control file in extracted .deb control tarball",
1984 &[
1985 "**/control.tar.gz-extract/control",
1986 "**/control.tar.xz-extract/control"
1987 ],
1988 "deb",
1989 "",
1990 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
1991);
1992
1993crate::register_parser!(
1994 "Debian MD5 checksums in extracted .deb control tarball",
1995 &[
1996 "**/control.tar.gz-extract/md5sums",
1997 "**/control.tar.xz-extract/md5sums"
1998 ],
1999 "deb",
2000 "",
2001 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2002);
2003
2004#[cfg(test)]
2005mod tests {
2006 use super::*;
2007 use crate::models::DatasourceId;
2008 use crate::models::PackageType;
2009 use ar::{Builder as ArBuilder, Header as ArHeader};
2010 use flate2::Compression;
2011 use flate2::write::GzEncoder;
2012 use liblzma::write::XzEncoder;
2013 use std::io::Cursor;
2014 use std::path::PathBuf;
2015 use tar::{Builder as TarBuilder, Header as TarHeader};
2016 use tempfile::NamedTempFile;
2017
2018 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2019 let mut control_tar = Vec::new();
2020 {
2021 let encoder = XzEncoder::new(&mut control_tar, 6);
2022 let mut tar_builder = TarBuilder::new(encoder);
2023
2024 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2025 let mut header = TarHeader::new_gnu();
2026 header
2027 .set_path("control")
2028 .expect("control tar path should be valid");
2029 header.set_size(control_content.len() as u64);
2030 header.set_mode(0o644);
2031 header.set_cksum();
2032 tar_builder
2033 .append(&header, Cursor::new(control_content))
2034 .expect("control file should be appended to tar.xz");
2035 tar_builder.finish().expect("control tar.xz should finish");
2036 }
2037
2038 let deb = NamedTempFile::new().expect("temp deb file should be created");
2039 {
2040 let mut builder = ArBuilder::new(
2041 deb.reopen()
2042 .expect("temporary deb file should reopen for writing"),
2043 );
2044
2045 let debian_binary = b"2.0\n";
2046 let mut debian_binary_header =
2047 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2048 debian_binary_header.set_mode(0o100644);
2049 builder
2050 .append(&debian_binary_header, Cursor::new(debian_binary))
2051 .expect("debian-binary entry should be appended");
2052
2053 let mut control_header =
2054 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2055 control_header.set_mode(0o100644);
2056 builder
2057 .append(&control_header, Cursor::new(control_tar))
2058 .expect("control.tar.xz entry should be appended");
2059 }
2060
2061 deb
2062 }
2063
2064 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2065 let mut control_tar = Vec::new();
2066 {
2067 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2068 let mut tar_builder = TarBuilder::new(encoder);
2069
2070 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2071 let mut header = TarHeader::new_gnu();
2072 header
2073 .set_path("control")
2074 .expect("control tar path should be valid");
2075 header.set_size(control_content.len() as u64);
2076 header.set_mode(0o644);
2077 header.set_cksum();
2078 tar_builder
2079 .append(&header, Cursor::new(control_content))
2080 .expect("control file should be appended to tar.gz");
2081 tar_builder.finish().expect("control tar.gz should finish");
2082 }
2083
2084 let mut data_tar = Vec::new();
2085 {
2086 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2087 let mut tar_builder = TarBuilder::new(encoder);
2088
2089 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2090 let mut header = TarHeader::new_gnu();
2091 header
2092 .set_path("./usr/share/doc/synthetic/copyright")
2093 .expect("copyright path should be valid");
2094 header.set_size(copyright.len() as u64);
2095 header.set_mode(0o644);
2096 header.set_cksum();
2097 tar_builder
2098 .append(&header, Cursor::new(copyright))
2099 .expect("copyright file should be appended to data tar");
2100 tar_builder.finish().expect("data tar.gz should finish");
2101 }
2102
2103 let deb = NamedTempFile::new().expect("temp deb file should be created");
2104 {
2105 let mut builder = ArBuilder::new(
2106 deb.reopen()
2107 .expect("temporary deb file should reopen for writing"),
2108 );
2109
2110 let debian_binary = b"2.0\n";
2111 let mut debian_binary_header =
2112 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2113 debian_binary_header.set_mode(0o100644);
2114 builder
2115 .append(&debian_binary_header, Cursor::new(debian_binary))
2116 .expect("debian-binary entry should be appended");
2117
2118 let mut control_header =
2119 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2120 control_header.set_mode(0o100644);
2121 builder
2122 .append(&control_header, Cursor::new(control_tar))
2123 .expect("control.tar.gz entry should be appended");
2124
2125 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2126 data_header.set_mode(0o100644);
2127 builder
2128 .append(&data_header, Cursor::new(data_tar))
2129 .expect("data.tar.gz entry should be appended");
2130 }
2131
2132 deb
2133 }
2134
2135 #[test]
2138 fn test_detect_namespace_from_ubuntu_version() {
2139 assert_eq!(
2140 detect_namespace(Some("1.0-1ubuntu1"), None),
2141 Some("ubuntu".to_string())
2142 );
2143 }
2144
2145 #[test]
2146 fn test_detect_namespace_from_debian_version() {
2147 assert_eq!(
2148 detect_namespace(Some("1.0-1+deb11u1"), None),
2149 Some("debian".to_string())
2150 );
2151 }
2152
2153 #[test]
2154 fn test_detect_namespace_from_ubuntu_maintainer() {
2155 assert_eq!(
2156 detect_namespace(
2157 None,
2158 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2159 ),
2160 Some("ubuntu".to_string())
2161 );
2162 }
2163
2164 #[test]
2165 fn test_detect_namespace_from_debian_maintainer() {
2166 assert_eq!(
2167 detect_namespace(None, Some("John Doe <john@debian.org>")),
2168 Some("debian".to_string())
2169 );
2170 }
2171
2172 #[test]
2173 fn test_detect_namespace_default() {
2174 assert_eq!(
2175 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2176 Some("debian".to_string())
2177 );
2178 }
2179
2180 #[test]
2181 fn test_detect_namespace_version_takes_priority() {
2182 assert_eq!(
2184 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2185 Some("ubuntu".to_string())
2186 );
2187 }
2188
2189 #[test]
2192 fn test_build_purl_basic() {
2193 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2194 assert_eq!(
2195 purl,
2196 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2197 );
2198 }
2199
2200 #[test]
2201 fn test_build_purl_no_version() {
2202 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2203 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2204 }
2205
2206 #[test]
2207 fn test_build_purl_no_arch() {
2208 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2209 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2210 }
2211
2212 #[test]
2213 fn test_build_purl_no_namespace() {
2214 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2215 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2216 }
2217
2218 #[test]
2221 fn test_parse_simple_dependency() {
2222 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2223 assert_eq!(deps.len(), 1);
2224 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2225 assert_eq!(deps[0].extracted_requirement, None);
2226 assert_eq!(deps[0].scope, Some("depends".to_string()));
2227 }
2228
2229 #[test]
2230 fn test_parse_dependency_with_version() {
2231 let deps =
2232 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2233 assert_eq!(deps.len(), 1);
2234 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2235 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2236 }
2237
2238 #[test]
2239 fn test_parse_dependency_exact_version() {
2240 let deps = parse_dependency_field(
2241 "libc6 (= 2.31-13+deb11u5)",
2242 "depends",
2243 true,
2244 false,
2245 Some("debian"),
2246 );
2247 assert_eq!(deps.len(), 1);
2248 assert_eq!(deps[0].is_pinned, Some(true));
2249 }
2250
2251 #[test]
2252 fn test_parse_dependency_strict_less() {
2253 let deps =
2254 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2255 assert_eq!(deps.len(), 1);
2256 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2257 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2258 }
2259
2260 #[test]
2261 fn test_parse_multiple_dependencies() {
2262 let deps = parse_dependency_field(
2263 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2264 "depends",
2265 true,
2266 false,
2267 Some("debian"),
2268 );
2269 assert_eq!(deps.len(), 3);
2270 }
2271
2272 #[test]
2273 fn test_parse_dependency_alternatives() {
2274 let deps = parse_dependency_field(
2275 "libssl1.1 | libssl3",
2276 "depends",
2277 true,
2278 false,
2279 Some("debian"),
2280 );
2281 assert_eq!(deps.len(), 2);
2282 assert_eq!(deps[0].is_optional, Some(true));
2284 assert_eq!(deps[1].is_optional, Some(true));
2285 }
2286
2287 #[test]
2288 fn test_parse_dependency_skips_substitutions() {
2289 let deps = parse_dependency_field(
2290 "${shlibs:Depends}, ${misc:Depends}, libc6",
2291 "depends",
2292 true,
2293 false,
2294 Some("debian"),
2295 );
2296 assert_eq!(deps.len(), 1);
2297 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2298 }
2299
2300 #[test]
2301 fn test_parse_dependency_with_arch_qualifier() {
2302 let deps = parse_dependency_field(
2304 "libc6 (>= 2.17) [amd64]",
2305 "depends",
2306 true,
2307 false,
2308 Some("debian"),
2309 );
2310 assert_eq!(deps.len(), 1);
2311 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2312 }
2313
2314 #[test]
2315 fn test_parse_empty_dependency() {
2316 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2317 assert!(deps.is_empty());
2318 }
2319
2320 #[test]
2323 fn test_parse_source_field_name_only() {
2324 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2325 assert_eq!(sources.len(), 1);
2326 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2327 }
2328
2329 #[test]
2330 fn test_parse_source_field_with_version() {
2331 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2332 assert_eq!(sources.len(), 1);
2333 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2334 }
2335
2336 #[test]
2337 fn test_parse_source_field_empty() {
2338 let sources = parse_source_field(None, Some("debian"));
2339 assert!(sources.is_empty());
2340 }
2341
2342 #[test]
2345 fn test_parse_debian_control_source_and_binary() {
2346 let content = "\
2347Source: curl
2348Section: web
2349Priority: optional
2350Maintainer: Alessandro Ghedini <ghedo@debian.org>
2351Homepage: https://curl.se/
2352Vcs-Browser: https://salsa.debian.org/debian/curl
2353Vcs-Git: https://salsa.debian.org/debian/curl.git
2354Build-Depends: debhelper (>= 12), libssl-dev
2355
2356Package: curl
2357Architecture: amd64
2358Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2359Description: command line tool for transferring data with URL syntax";
2360
2361 let packages = parse_debian_control(content);
2362 assert_eq!(packages.len(), 1);
2363
2364 let pkg = &packages[0];
2365 assert_eq!(pkg.name, Some("curl".to_string()));
2366 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2367 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2368 assert_eq!(
2369 pkg.vcs_url,
2370 Some("https://salsa.debian.org/debian/curl.git".to_string())
2371 );
2372 assert_eq!(
2373 pkg.code_view_url,
2374 Some("https://salsa.debian.org/debian/curl".to_string())
2375 );
2376
2377 assert_eq!(pkg.parties.len(), 1);
2379 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2380 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2381 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2382
2383 assert!(!pkg.dependencies.is_empty());
2385 }
2386
2387 #[test]
2388 fn test_parse_debian_control_multiple_binary() {
2389 let content = "\
2390Source: gzip
2391Maintainer: Debian Developer <dev@debian.org>
2392
2393Package: gzip
2394Architecture: any
2395Depends: libc6 (>= 2.17)
2396Description: GNU file compression
2397
2398Package: gzip-win32
2399Architecture: all
2400Description: gzip for Windows";
2401
2402 let packages = parse_debian_control(content);
2403 assert_eq!(packages.len(), 2);
2404 assert_eq!(packages[0].name, Some("gzip".to_string()));
2405 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2406
2407 assert_eq!(packages[0].parties.len(), 1);
2409 assert_eq!(packages[1].parties.len(), 1);
2410 }
2411
2412 #[test]
2413 fn test_parse_debian_control_source_only() {
2414 let content = "\
2415Source: my-package
2416Maintainer: Test User <test@debian.org>
2417Build-Depends: debhelper (>= 13)";
2418
2419 let packages = parse_debian_control(content);
2420 assert_eq!(packages.len(), 1);
2421 assert_eq!(packages[0].name, Some("my-package".to_string()));
2422 assert!(!packages[0].dependencies.is_empty());
2424 assert_eq!(
2425 packages[0].dependencies[0].scope,
2426 Some("build-depends".to_string())
2427 );
2428 }
2429
2430 #[test]
2431 fn test_parse_debian_control_with_uploaders() {
2432 let content = "\
2433Source: example
2434Maintainer: Main Dev <main@debian.org>
2435Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2436
2437Package: example
2438Architecture: any
2439Description: test package";
2440
2441 let packages = parse_debian_control(content);
2442 assert_eq!(packages.len(), 1);
2443 assert_eq!(packages[0].parties.len(), 3);
2445 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2446 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2447 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2448 }
2449
2450 #[test]
2451 fn test_parse_debian_control_vcs_git_with_branch() {
2452 let content = "\
2453Source: example
2454Maintainer: Dev <dev@debian.org>
2455Vcs-Git: https://salsa.debian.org/example.git -b main
2456
2457Package: example
2458Architecture: any
2459Description: test";
2460
2461 let packages = parse_debian_control(content);
2462 assert_eq!(packages.len(), 1);
2463 assert_eq!(
2465 packages[0].vcs_url,
2466 Some("https://salsa.debian.org/example.git".to_string())
2467 );
2468 }
2469
2470 #[test]
2471 fn test_parse_debian_control_multi_arch() {
2472 let content = "\
2473Source: example
2474Maintainer: Dev <dev@debian.org>
2475
2476Package: libexample
2477Architecture: any
2478Multi-Arch: same
2479Description: shared library";
2480
2481 let packages = parse_debian_control(content);
2482 assert_eq!(packages.len(), 1);
2483 let extra = packages[0].extra_data.as_ref().unwrap();
2484 assert_eq!(
2485 extra.get("multi_arch"),
2486 Some(&serde_json::Value::String("same".to_string()))
2487 );
2488 }
2489
2490 #[test]
2493 fn test_parse_dpkg_status_basic() {
2494 let content = "\
2495Package: base-files
2496Status: install ok installed
2497Priority: required
2498Section: admin
2499Installed-Size: 391
2500Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2501Architecture: amd64
2502Version: 11ubuntu5.6
2503Description: Debian base system miscellaneous files
2504Homepage: https://tracker.debian.org/pkg/base-files
2505
2506Package: not-installed
2507Status: deinstall ok config-files
2508Architecture: amd64
2509Version: 1.0
2510Description: This should be skipped";
2511
2512 let packages = parse_dpkg_status(content);
2513 assert_eq!(packages.len(), 1);
2514
2515 let pkg = &packages[0];
2516 assert_eq!(pkg.name, Some("base-files".to_string()));
2517 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2518 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2519 assert_eq!(
2520 pkg.datasource_id,
2521 Some(DatasourceId::DebianInstalledStatusDb)
2522 );
2523
2524 let extra = pkg.extra_data.as_ref().unwrap();
2526 assert_eq!(
2527 extra.get("installed_size"),
2528 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2529 );
2530 }
2531
2532 #[test]
2533 fn test_parse_dpkg_status_multiple_installed() {
2534 let content = "\
2535Package: libc6
2536Status: install ok installed
2537Architecture: amd64
2538Version: 2.31-13+deb11u5
2539Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2540Description: GNU C Library
2541
2542Package: zlib1g
2543Status: install ok installed
2544Architecture: amd64
2545Version: 1:1.2.11.dfsg-2+deb11u2
2546Maintainer: Mark Brown <broonie@debian.org>
2547Description: compression library";
2548
2549 let packages = parse_dpkg_status(content);
2550 assert_eq!(packages.len(), 2);
2551 assert_eq!(packages[0].name, Some("libc6".to_string()));
2552 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2553 }
2554
2555 #[test]
2556 fn test_parse_dpkg_status_with_dependencies() {
2557 let content = "\
2558Package: curl
2559Status: install ok installed
2560Architecture: amd64
2561Version: 7.74.0-1.3+deb11u7
2562Maintainer: Alessandro Ghedini <ghedo@debian.org>
2563Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2564Recommends: ca-certificates
2565Description: command line tool for transferring data with URL syntax";
2566
2567 let packages = parse_dpkg_status(content);
2568 assert_eq!(packages.len(), 1);
2569
2570 let deps = &packages[0].dependencies;
2571 assert_eq!(deps.len(), 3);
2573
2574 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2576 assert_eq!(deps[0].scope, Some("depends".to_string()));
2577 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2578
2579 assert_eq!(
2581 deps[2].purl,
2582 Some("pkg:deb/debian/ca-certificates".to_string())
2583 );
2584 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2585 assert_eq!(deps[2].is_optional, Some(true));
2586 }
2587
2588 #[test]
2589 fn test_parse_dpkg_status_with_source() {
2590 let content = "\
2591Package: libncurses6
2592Status: install ok installed
2593Architecture: amd64
2594Source: ncurses (6.2+20201114-2+deb11u1)
2595Version: 6.2+20201114-2+deb11u1
2596Maintainer: Craig Small <csmall@debian.org>
2597Description: shared libraries for terminal handling";
2598
2599 let packages = parse_dpkg_status(content);
2600 assert_eq!(packages.len(), 1);
2601 assert!(!packages[0].source_packages.is_empty());
2602 assert!(packages[0].source_packages[0].contains("ncurses"));
2604 }
2605
2606 #[test]
2607 fn test_parse_dpkg_status_filters_not_installed() {
2608 let content = "\
2609Package: installed-pkg
2610Status: install ok installed
2611Version: 1.0
2612Architecture: amd64
2613Description: installed
2614
2615Package: half-installed
2616Status: install ok half-installed
2617Version: 2.0
2618Architecture: amd64
2619Description: half installed
2620
2621Package: deinstall-pkg
2622Status: deinstall ok config-files
2623Version: 3.0
2624Architecture: amd64
2625Description: deinstalled
2626
2627Package: purge-pkg
2628Status: purge ok not-installed
2629Version: 4.0
2630Architecture: amd64
2631Description: purged";
2632
2633 let packages = parse_dpkg_status(content);
2634 assert_eq!(packages.len(), 1);
2635 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2636 }
2637
2638 #[test]
2639 fn test_parse_dpkg_status_empty() {
2640 let packages = parse_dpkg_status("");
2641 assert!(packages.is_empty());
2642 }
2643
2644 #[test]
2647 fn test_debian_control_is_match() {
2648 assert!(DebianControlParser::is_match(Path::new(
2649 "/path/to/debian/control"
2650 )));
2651 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2652 assert!(!DebianControlParser::is_match(Path::new(
2653 "/path/to/control"
2654 )));
2655 assert!(!DebianControlParser::is_match(Path::new(
2656 "/path/to/debian/changelog"
2657 )));
2658 }
2659
2660 #[test]
2661 fn test_debian_installed_is_match() {
2662 assert!(DebianInstalledParser::is_match(Path::new(
2663 "/var/lib/dpkg/status"
2664 )));
2665 assert!(DebianInstalledParser::is_match(Path::new(
2666 "some/root/var/lib/dpkg/status"
2667 )));
2668 assert!(!DebianInstalledParser::is_match(Path::new(
2669 "/var/lib/dpkg/status.d/something"
2670 )));
2671 assert!(!DebianInstalledParser::is_match(Path::new(
2672 "/var/lib/dpkg/available"
2673 )));
2674 }
2675
2676 #[test]
2679 fn test_parse_debian_control_empty_input() {
2680 let packages = parse_debian_control("");
2681 assert!(packages.is_empty());
2682 }
2683
2684 #[test]
2685 fn test_parse_debian_control_malformed_input() {
2686 let content = "this is not a valid control file\nwith random text";
2687 let packages = parse_debian_control(content);
2688 assert!(packages.is_empty());
2690 }
2691
2692 #[test]
2693 fn test_dependency_with_epoch_version() {
2694 let deps = parse_dependency_field(
2696 "zlib1g (>= 1:1.2.11)",
2697 "depends",
2698 true,
2699 false,
2700 Some("debian"),
2701 );
2702 assert_eq!(deps.len(), 1);
2703 assert_eq!(
2704 deps[0].extracted_requirement,
2705 Some(">= 1:1.2.11".to_string())
2706 );
2707 }
2708
2709 #[test]
2710 fn test_dependency_with_plus_in_name() {
2711 let deps =
2712 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2713 assert_eq!(deps.len(), 1);
2714 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2715 }
2716
2717 #[test]
2718 fn test_dsc_parser_is_match() {
2719 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2720 assert!(DebianDscParser::is_match(&PathBuf::from(
2721 "adduser_3.118+deb11u1.dsc"
2722 )));
2723 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2724 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2725 }
2726
2727 #[test]
2728 fn test_dsc_parser_adduser() {
2729 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2730 let package = DebianDscParser::extract_first_package(&path);
2731
2732 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2733 assert_eq!(package.namespace, Some("debian".to_string()));
2734 assert_eq!(package.name, Some("adduser".to_string()));
2735 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2736 assert_eq!(
2737 package.purl,
2738 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2739 );
2740 assert_eq!(
2741 package.vcs_url,
2742 Some("https://salsa.debian.org/debian/adduser.git".to_string())
2743 );
2744 assert_eq!(
2745 package.code_view_url,
2746 Some("https://salsa.debian.org/debian/adduser".to_string())
2747 );
2748 assert_eq!(
2749 package.datasource_id,
2750 Some(DatasourceId::DebianSourceControlDsc)
2751 );
2752
2753 assert_eq!(package.parties.len(), 2);
2754 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2755 assert_eq!(
2756 package.parties[0].name,
2757 Some("Debian Adduser Developers".to_string())
2758 );
2759 assert_eq!(
2760 package.parties[0].email,
2761 Some("adduser@packages.debian.org".to_string())
2762 );
2763 assert_eq!(package.parties[0].r#type, None);
2764
2765 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2766 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2767 assert_eq!(
2768 package.parties[1].email,
2769 Some("mh+debian-packages@zugschlus.de".to_string())
2770 );
2771 assert_eq!(package.parties[1].r#type, None);
2772
2773 assert_eq!(package.source_packages.len(), 1);
2774 assert_eq!(
2775 package.source_packages[0],
2776 "pkg:deb/debian/adduser".to_string()
2777 );
2778
2779 assert!(!package.dependencies.is_empty());
2780 let build_dep_names: Vec<String> = package
2781 .dependencies
2782 .iter()
2783 .filter_map(|d| d.purl.as_ref())
2784 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2785 .map(|p| p.to_string())
2786 .collect();
2787 assert!(build_dep_names.len() >= 2);
2788 }
2789
2790 #[test]
2791 fn test_dsc_parser_zsh() {
2792 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2793 let package = DebianDscParser::extract_first_package(&path);
2794
2795 assert_eq!(package.name, Some("zsh".to_string()));
2796 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2797 assert_eq!(package.namespace, Some("debian".to_string()));
2798 assert!(package.purl.is_some());
2799 assert!(package.purl.as_ref().unwrap().contains("zsh"));
2800 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2801 }
2802
2803 #[test]
2804 fn test_parse_dsc_content_basic() {
2805 let content = "Format: 3.0 (native)
2806Source: testpkg
2807Binary: testpkg
2808Architecture: amd64
2809Version: 1.0.0
2810Maintainer: Test User <test@example.com>
2811Standards-Version: 4.5.0
2812Build-Depends: debhelper (>= 12)
2813Files:
2814 abc123 1024 testpkg_1.0.0.tar.xz
2815";
2816
2817 let package = parse_dsc_content(content);
2818 assert_eq!(package.name, Some("testpkg".to_string()));
2819 assert_eq!(package.version, Some("1.0.0".to_string()));
2820 assert_eq!(package.namespace, Some("debian".to_string()));
2821 assert_eq!(package.parties.len(), 1);
2822 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2823 assert_eq!(
2824 package.parties[0].email,
2825 Some("test@example.com".to_string())
2826 );
2827 assert_eq!(package.dependencies.len(), 1);
2828 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2829 }
2830
2831 #[test]
2832 fn test_parse_dsc_content_with_uploaders() {
2833 let content = "Source: mypkg
2834Version: 2.0
2835Architecture: all
2836Maintainer: Main Dev <main@example.com>
2837Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2838";
2839
2840 let package = parse_dsc_content(content);
2841 assert_eq!(package.parties.len(), 3);
2842 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2843 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2844 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2845 }
2846
2847 #[test]
2848 fn test_orig_tar_parser_is_match() {
2849 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2850 "package_1.0.orig.tar.gz"
2851 )));
2852 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2853 "abseil_0~20200923.3.orig.tar.xz"
2854 )));
2855 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2856 "package.debian.tar.gz"
2857 )));
2858 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2859 }
2860
2861 #[test]
2862 fn test_debian_tar_parser_is_match() {
2863 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2864 "package_1.0-1.debian.tar.xz"
2865 )));
2866 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2867 "abseil_20220623.1-1.debian.tar.gz"
2868 )));
2869 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2870 "package.orig.tar.gz"
2871 )));
2872 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2873 }
2874
2875 #[test]
2876 fn test_parse_orig_tar_filename() {
2877 let pkg = parse_source_tarball_filename(
2878 "abseil_0~20200923.3.orig.tar.gz",
2879 DatasourceId::DebianOriginalSourceTarball,
2880 );
2881 assert_eq!(pkg.name, Some("abseil".to_string()));
2882 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2883 assert_eq!(pkg.namespace, Some("debian".to_string()));
2884 assert_eq!(
2885 pkg.purl,
2886 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2887 );
2888 assert_eq!(
2889 pkg.datasource_id,
2890 Some(DatasourceId::DebianOriginalSourceTarball)
2891 );
2892 }
2893
2894 #[test]
2895 fn test_parse_debian_tar_filename() {
2896 let pkg = parse_source_tarball_filename(
2897 "abseil_20220623.1-1.debian.tar.xz",
2898 DatasourceId::DebianSourceMetadataTarball,
2899 );
2900 assert_eq!(pkg.name, Some("abseil".to_string()));
2901 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2902 assert_eq!(pkg.namespace, Some("debian".to_string()));
2903 assert_eq!(
2904 pkg.purl,
2905 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2906 );
2907 }
2908
2909 #[test]
2910 fn test_parse_deb_filename() {
2911 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2912 assert_eq!(pkg.name, Some("nginx".to_string()));
2913 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
2914
2915 let pkg = parse_deb_filename("invalid.deb");
2916 assert!(pkg.name.is_none());
2917 assert!(pkg.version.is_none());
2918 }
2919
2920 #[test]
2921 fn test_parse_source_tarball_various_compressions() {
2922 let pkg_gz = parse_source_tarball_filename(
2923 "test_1.0.orig.tar.gz",
2924 DatasourceId::DebianOriginalSourceTarball,
2925 );
2926 let pkg_xz = parse_source_tarball_filename(
2927 "test_1.0.orig.tar.xz",
2928 DatasourceId::DebianOriginalSourceTarball,
2929 );
2930 let pkg_bz2 = parse_source_tarball_filename(
2931 "test_1.0.orig.tar.bz2",
2932 DatasourceId::DebianOriginalSourceTarball,
2933 );
2934
2935 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
2936 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
2937 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
2938 }
2939
2940 #[test]
2941 fn test_parse_source_tarball_invalid_format() {
2942 let pkg = parse_source_tarball_filename(
2943 "invalid-no-underscore.tar.gz",
2944 DatasourceId::DebianOriginalSourceTarball,
2945 );
2946 assert!(pkg.name.is_none());
2947 assert!(pkg.version.is_none());
2948 }
2949
2950 #[test]
2951 fn test_list_parser_is_match() {
2952 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2953 "/var/lib/dpkg/info/bash.list"
2954 )));
2955 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2956 "/var/lib/dpkg/info/package:amd64.list"
2957 )));
2958 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2959 "bash.list"
2960 )));
2961 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2962 "/var/lib/dpkg/info/bash.md5sums"
2963 )));
2964 }
2965
2966 #[test]
2967 fn test_md5sums_parser_is_match() {
2968 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2969 "/var/lib/dpkg/info/bash.md5sums"
2970 )));
2971 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2972 "/var/lib/dpkg/info/package:amd64.md5sums"
2973 )));
2974 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2975 "bash.md5sums"
2976 )));
2977 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2978 "/var/lib/dpkg/info/bash.list"
2979 )));
2980 }
2981
2982 #[test]
2983 fn test_parse_debian_file_list_plain_list() {
2984 let content = "/.
2985/bin
2986/bin/bash
2987/usr/bin/bashbug
2988/usr/share/doc/bash/README
2989";
2990 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
2991 assert_eq!(pkg.name, Some("bash".to_string()));
2992 assert_eq!(pkg.file_references.len(), 3);
2993 assert_eq!(pkg.file_references[0].path, "/bin/bash");
2994 assert_eq!(pkg.file_references[0].md5, None);
2995 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
2996 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
2997 }
2998
2999 #[test]
3000 fn test_parse_debian_file_list_md5sums() {
3001 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
30021c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3003f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3004";
3005 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3006 assert_eq!(pkg.name, Some("bash".to_string()));
3007 assert_eq!(pkg.file_references.len(), 3);
3008 assert_eq!(pkg.file_references[0].path, "bin/bash");
3009 assert_eq!(
3010 pkg.file_references[0].md5,
3011 Some("77506afebd3b7e19e937a678a185b62e".to_string())
3012 );
3013 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3014 assert_eq!(
3015 pkg.file_references[1].md5,
3016 Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3017 );
3018 }
3019
3020 #[test]
3021 fn test_parse_debian_file_list_with_arch() {
3022 let content = "/usr/bin/foo
3023/usr/lib/x86_64-linux-gnu/libfoo.so
3024";
3025 let pkg = parse_debian_file_list(
3026 content,
3027 "libfoo:amd64",
3028 DatasourceId::DebianInstalledFilesList,
3029 );
3030 assert_eq!(pkg.name, Some("libfoo".to_string()));
3031 assert!(pkg.purl.is_some());
3032 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3033 assert_eq!(pkg.file_references.len(), 2);
3034 }
3035
3036 #[test]
3037 fn test_parse_debian_file_list_skips_comments_and_empty() {
3038 let content = "# This is a comment
3039/bin/bash
3040
3041/usr/bin/bashbug
3042
3043";
3044 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3045 assert_eq!(pkg.file_references.len(), 2);
3046 }
3047
3048 #[test]
3049 fn test_parse_debian_file_list_md5sums_only() {
3050 let content = "abc123 usr/bin/tool
3051";
3052 let pkg =
3053 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3054 assert_eq!(pkg.name, None);
3055 assert_eq!(pkg.file_references.len(), 1);
3056 }
3057
3058 #[test]
3059 fn test_parse_debian_file_list_ignores_root_dirs() {
3060 let content = "/.
3061/bin
3062/bin/bash
3063/etc
3064/usr
3065/var
3066";
3067 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3068 assert_eq!(pkg.file_references.len(), 1);
3069 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3070 }
3071
3072 #[test]
3073 fn test_copyright_parser_is_match() {
3074 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3075 "/usr/share/doc/bash/copyright"
3076 )));
3077 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3078 "debian/copyright"
3079 )));
3080 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3081 "copyright.txt"
3082 )));
3083 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3084 "/etc/copyright"
3085 )));
3086 }
3087
3088 #[test]
3089 fn test_extract_package_name_from_path() {
3090 assert_eq!(
3091 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3092 Some("bash".to_string())
3093 );
3094 assert_eq!(
3095 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3096 Some("libseccomp2".to_string())
3097 );
3098 assert_eq!(
3099 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3100 None
3101 );
3102 }
3103
3104 #[test]
3105 fn test_parse_copyright_dep5_format() {
3106 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3107Upstream-Name: libseccomp
3108Source: https://sourceforge.net/projects/libseccomp/
3109
3110Files: *
3111Copyright: 2012 Paul Moore <pmoore@redhat.com>
3112 2012 Ashley Lai <adlai@us.ibm.com>
3113License: LGPL-2.1
3114
3115License: LGPL-2.1
3116 This library is free software
3117";
3118 let pkg = parse_copyright_file(content, Some("libseccomp"));
3119 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3120 assert_eq!(pkg.namespace, Some("debian".to_string()));
3121 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3122 assert_eq!(
3123 pkg.extracted_license_statement,
3124 Some("LGPL-2.1".to_string())
3125 );
3126 assert!(pkg.parties.len() >= 2);
3127 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3128 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3129 }
3130
3131 #[test]
3132 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3133 let path = PathBuf::from(
3134 "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3135 );
3136 let pkg = DebianCopyrightParser::extract_first_package(&path);
3137
3138 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3139 let extracted = pkg
3140 .extracted_license_statement
3141 .as_deref()
3142 .expect("license statement should exist");
3143 assert!(extracted.contains("GPL-2+"));
3144 assert!(!pkg.license_detections.is_empty());
3145
3146 let primary = &pkg.license_detections[0];
3147 assert_eq!(
3148 primary.matches[0].matched_text.as_deref(),
3149 Some("License: GPL-2+")
3150 );
3151 assert_eq!(primary.matches[0].start_line, 47);
3152 assert_eq!(primary.matches[0].end_line, 47);
3153 }
3154
3155 #[test]
3156 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3157 let path = PathBuf::from("testdata/debian/copyright/copyright");
3158 let pkg = DebianCopyrightParser::extract_first_package(&path);
3159
3160 assert_eq!(pkg.license_detections.len(), 1);
3161 assert_eq!(pkg.other_license_detections.len(), 4);
3162
3163 let primary = &pkg.license_detections[0];
3164 assert_eq!(
3165 primary.matches[0].matched_text.as_deref(),
3166 Some("License: LGPL-2.1")
3167 );
3168 assert_eq!(primary.matches[0].start_line, 11);
3169
3170 let ordered_lines: Vec<usize> = pkg
3171 .other_license_detections
3172 .iter()
3173 .map(|detection| detection.matches[0].start_line)
3174 .collect();
3175 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3176
3177 let ordered_texts: Vec<&str> = pkg
3178 .other_license_detections
3179 .iter()
3180 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3181 .collect();
3182 assert_eq!(
3183 ordered_texts,
3184 vec![
3185 "License: LGPL-2.1",
3186 "License: LGPL-2.1",
3187 "License: LGPL-2.1",
3188 "License: LGPL-2.1",
3189 ]
3190 );
3191 }
3192
3193 #[test]
3194 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3195 let path = PathBuf::from(
3196 "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-2019-11-15/main/c/clamav/stable_copyright",
3197 );
3198 let pkg = DebianCopyrightParser::extract_first_package(&path);
3199
3200 let zlib = pkg
3201 .other_license_detections
3202 .iter()
3203 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3204 .expect("at least one Zlib license paragraph should be detected");
3205 assert_eq!(
3206 zlib.matches[0].matched_text.as_deref(),
3207 Some("License: Zlib")
3208 );
3209
3210 let last_zlib = pkg
3211 .other_license_detections
3212 .iter()
3213 .rev()
3214 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3215 .expect("bottom standalone Zlib license paragraph should be detected");
3216 assert_eq!(last_zlib.matches[0].start_line, 732);
3217 assert_eq!(last_zlib.matches[0].end_line, 732);
3218 }
3219
3220 #[test]
3221 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3222 let path = PathBuf::from(
3223 "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/crafted_for_tests/test_license_nameless",
3224 );
3225 let pkg = DebianCopyrightParser::extract_first_package(&path);
3226
3227 assert_eq!(pkg.license_detections.len(), 1);
3228 let primary = &pkg.license_detections[0];
3229 assert_eq!(
3230 primary.matches[0].matched_text.as_deref(),
3231 Some("License: LGPL-3+ or GPL-2+")
3232 );
3233 assert_eq!(primary.matches[0].start_line, 8);
3234 assert_eq!(primary.matches[0].end_line, 8);
3235
3236 assert!(pkg.other_license_detections.iter().any(|detection| {
3237 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3238 }));
3239 }
3240
3241 #[test]
3242 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3243 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3244 let pkg = parse_copyright_file(content, Some("foo"));
3245
3246 assert_eq!(pkg.license_detections.len(), 1);
3247 let primary = &pkg.license_detections[0];
3248 assert_eq!(
3249 primary.matches[0].matched_text.as_deref(),
3250 Some("License: GPL-2+")
3251 );
3252 assert_eq!(primary.matches[0].start_line, 7);
3253 }
3254
3255 #[test]
3256 #[ignore = "performance probe for Debian copyright parsing"]
3257 fn test_debian_copyright_perf_guardrail_large_dep5_fixtures() {
3258 use std::hint::black_box;
3259 use std::time::Instant;
3260
3261 let fixtures = [
3262 (
3263 "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3264 Some("bsdutils"),
3265 47usize,
3266 ),
3267 (
3268 "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-2019-11-15/main/c/clamav/stable_copyright",
3269 Some("clamav"),
3270 47usize,
3271 ),
3272 ];
3273
3274 let iterations = 100usize;
3275 let start = Instant::now();
3276
3277 for _ in 0..iterations {
3278 for (path, package_name, expected_line) in fixtures {
3279 let content =
3280 read_file_to_string(Path::new(path)).expect("fixture should be readable");
3281 let pkg = black_box(parse_copyright_file(&content, package_name));
3282 assert!(!pkg.license_detections.is_empty());
3283 assert_eq!(
3284 pkg.license_detections[0].matches[0].start_line,
3285 expected_line
3286 );
3287 }
3288 }
3289
3290 eprintln!(
3291 "Debian copyright perf probe: parsed {} fixtures x {} iterations in {:?}",
3292 fixtures.len(),
3293 iterations,
3294 start.elapsed()
3295 );
3296 }
3297
3298 #[test]
3299 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3300 let raw_lines = vec![
3301 "Files: *".to_string(),
3302 "Copyright: 2024 Example Org".to_string(),
3303 "License: Apache-2.0".to_string(),
3304 " Licensed under the Apache License, Version 2.0.".to_string(),
3305 ];
3306
3307 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3308 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3309 .into_iter()
3310 .next()
3311 .expect("reference RFC822 paragraph should parse");
3312
3313 assert_eq!(paragraph.metadata.headers, expected.headers);
3314 assert_eq!(paragraph.metadata.body, expected.body);
3315 assert_eq!(
3316 paragraph.license_header_line,
3317 Some(("License: Apache-2.0".to_string(), 12))
3318 );
3319 }
3320
3321 #[test]
3322 fn test_parse_copyright_unstructured() {
3323 let content = "This package was debianized by John Doe.
3324
3325Upstream Authors:
3326 Jane Smith
3327
3328Copyright:
3329 2009 10gen
3330
3331License:
3332 SSPL
3333";
3334 let pkg = parse_copyright_file(content, Some("mongodb"));
3335 assert_eq!(pkg.name, Some("mongodb".to_string()));
3336 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3337 assert!(!pkg.parties.is_empty());
3338 }
3339
3340 #[test]
3341 fn test_parse_copyright_holders() {
3342 let text = "2012 Paul Moore <pmoore@redhat.com>
33432012 Ashley Lai <adlai@us.ibm.com>
3344Copyright (C) 2015-2018 Example Corp";
3345 let holders = parse_copyright_holders(text);
3346 assert!(holders.len() >= 3);
3347 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3348 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3349 }
3350
3351 #[test]
3352 fn test_parse_copyright_empty() {
3353 let content = "This is just some text without proper copyright info.";
3354 let pkg = parse_copyright_file(content, Some("test"));
3355 assert_eq!(pkg.name, Some("test".to_string()));
3356 assert!(pkg.parties.is_empty());
3357 assert!(pkg.extracted_license_statement.is_none());
3358 }
3359
3360 #[test]
3361 fn test_deb_parser_is_match() {
3362 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3363 assert!(DebianDebParser::is_match(&PathBuf::from(
3364 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3365 )));
3366 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3367 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3368 }
3369
3370 #[test]
3371 fn test_parse_deb_filename_with_arch() {
3372 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3373 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3374 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3375 assert_eq!(pkg.namespace, Some("debian".to_string()));
3376 assert_eq!(
3377 pkg.purl,
3378 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3379 );
3380 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3381 }
3382
3383 #[test]
3384 fn test_parse_deb_filename_without_arch() {
3385 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3386 assert_eq!(pkg.name, Some("package".to_string()));
3387 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3388 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3389 }
3390
3391 #[test]
3392 fn test_extract_deb_archive() {
3393 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3394 if !test_path.exists() {
3395 return;
3396 }
3397
3398 let pkg = DebianDebParser::extract_first_package(&test_path);
3399
3400 assert_eq!(pkg.name, Some("adduser".to_string()));
3401 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3402 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3403 assert!(pkg.description.is_some());
3404 assert!(!pkg.parties.is_empty());
3405
3406 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3407 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3408 }
3409
3410 #[test]
3411 fn test_extract_deb_archive_with_control_tar_xz() {
3412 let deb = create_synthetic_deb_with_control_tar_xz();
3413
3414 let pkg = DebianDebParser::extract_first_package(deb.path());
3415
3416 assert_eq!(pkg.name, Some("synthetic".to_string()));
3417 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3418 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3419 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3420 }
3421
3422 #[test]
3423 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3424 let deb = create_synthetic_deb_with_copyright();
3425
3426 let pkg = DebianDebParser::extract_first_package(deb.path());
3427
3428 assert_eq!(pkg.name, Some("synthetic".to_string()));
3429 assert_eq!(
3430 pkg.extracted_license_statement,
3431 Some("Apache-2.0".to_string())
3432 );
3433 assert!(pkg.parties.iter().any(|party| {
3434 party.role.as_deref() == Some("copyright-holder")
3435 && party.name.as_deref() == Some("Example Org")
3436 }));
3437 }
3438
3439 #[test]
3440 fn test_parse_deb_filename_simple() {
3441 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3442 assert_eq!(pkg.name, Some("adduser".to_string()));
3443 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3444 assert_eq!(pkg.namespace, Some("debian".to_string()));
3445 }
3446
3447 #[test]
3448 fn test_parse_deb_filename_invalid() {
3449 let pkg = parse_deb_filename("invalid.deb");
3450 assert!(pkg.name.is_none());
3451 assert!(pkg.version.is_none());
3452 }
3453
3454 #[test]
3455 fn test_distroless_parser() {
3456 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3457
3458 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3459
3460 if !test_file.exists() {
3461 eprintln!("Warning: Test file not found, skipping test");
3462 return;
3463 }
3464
3465 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3466
3467 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3468 assert_eq!(
3469 pkg.datasource_id,
3470 Some(DatasourceId::DebianDistrolessInstalledDb)
3471 );
3472 assert_eq!(pkg.name, Some("base-files".to_string()));
3473 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3474 assert_eq!(pkg.namespace, Some("debian".to_string()));
3475 assert!(pkg.purl.is_some());
3476 assert!(
3477 pkg.purl
3478 .as_ref()
3479 .unwrap()
3480 .contains("pkg:deb/debian/base-files")
3481 );
3482 }
3483}