1use std::collections::HashMap;
34use std::path::Path;
35use std::sync::LazyLock;
36
37use crate::parser_warn as warn;
38use packageurl::PackageUrl;
39use regex::Regex;
40
41use crate::models::{
42 DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
43 PackageType, Party,
44};
45use crate::parsers::rfc822::{self, Rfc822Metadata};
46use crate::parsers::utils::{
47 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
48};
49use crate::utils::spdx::combine_license_expressions;
50
51use super::PackageParser;
52use super::license_normalization::{
53 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
54 normalize_declared_license_key,
55};
56
57const PACKAGE_TYPE: PackageType = PackageType::Deb;
58
59const MAX_ARCHIVE_SIZE: u64 = 1024 * 1024 * 1024;
60const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
61const MAX_COMPRESSION_RATIO: usize = 100;
62
63static DEP_RE: LazyLock<Regex> = LazyLock::new(|| {
64 Regex::new(
65 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
66 )
67 .expect("compile-time constant dependency regex")
68});
69
70fn default_package_data(datasource_id: DatasourceId) -> PackageData {
71 PackageData {
72 package_type: Some(PACKAGE_TYPE),
73 datasource_id: Some(datasource_id),
74 ..Default::default()
75 }
76}
77
78const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
80const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
81
82const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
84 "packages.debian.org",
85 "lists.debian.org",
86 "lists.alioth.debian.org",
87 "@debian.org",
88 "debian-init-diversity@",
89];
90const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
91
92struct DepFieldSpec {
94 field: &'static str,
95 scope: &'static str,
96 is_runtime: bool,
97 is_optional: bool,
98}
99
100const DEP_FIELDS: &[DepFieldSpec] = &[
101 DepFieldSpec {
102 field: "depends",
103 scope: "depends",
104 is_runtime: true,
105 is_optional: false,
106 },
107 DepFieldSpec {
108 field: "pre-depends",
109 scope: "pre-depends",
110 is_runtime: true,
111 is_optional: false,
112 },
113 DepFieldSpec {
114 field: "recommends",
115 scope: "recommends",
116 is_runtime: true,
117 is_optional: true,
118 },
119 DepFieldSpec {
120 field: "suggests",
121 scope: "suggests",
122 is_runtime: true,
123 is_optional: true,
124 },
125 DepFieldSpec {
126 field: "breaks",
127 scope: "breaks",
128 is_runtime: false,
129 is_optional: false,
130 },
131 DepFieldSpec {
132 field: "conflicts",
133 scope: "conflicts",
134 is_runtime: false,
135 is_optional: false,
136 },
137 DepFieldSpec {
138 field: "replaces",
139 scope: "replaces",
140 is_runtime: false,
141 is_optional: false,
142 },
143 DepFieldSpec {
144 field: "provides",
145 scope: "provides",
146 is_runtime: false,
147 is_optional: false,
148 },
149 DepFieldSpec {
150 field: "build-depends",
151 scope: "build-depends",
152 is_runtime: false,
153 is_optional: false,
154 },
155 DepFieldSpec {
156 field: "build-depends-indep",
157 scope: "build-depends-indep",
158 is_runtime: false,
159 is_optional: false,
160 },
161 DepFieldSpec {
162 field: "build-conflicts",
163 scope: "build-conflicts",
164 is_runtime: false,
165 is_optional: false,
166 },
167];
168
169pub struct DebianControlParser;
174
175impl PackageParser for DebianControlParser {
176 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
177
178 fn is_match(path: &Path) -> bool {
179 if let Some(name) = path.file_name()
180 && name == "control"
181 && let Some(parent) = path.parent()
182 && let Some(parent_name) = parent.file_name()
183 {
184 return parent_name == "debian";
185 }
186 false
187 }
188
189 fn extract_packages(path: &Path) -> Vec<PackageData> {
190 let content = match read_file_to_string(path, None) {
191 Ok(c) => c,
192 Err(e) => {
193 warn!("Failed to read debian/control at {:?}: {}", path, e);
194 return vec![default_package_data(DatasourceId::DebianControlInSource)];
195 }
196 };
197
198 let packages = parse_debian_control(&content);
199 if packages.is_empty() {
200 vec![default_package_data(DatasourceId::DebianControlInSource)]
201 } else {
202 packages
203 }
204 }
205}
206
207pub struct DebianInstalledParser;
212
213impl PackageParser for DebianInstalledParser {
214 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216 fn is_match(path: &Path) -> bool {
217 let path_str = path.to_string_lossy();
218 path_str.ends_with("var/lib/dpkg/status")
219 }
220
221 fn extract_packages(path: &Path) -> Vec<PackageData> {
222 let content = match read_file_to_string(path, None) {
223 Ok(c) => c,
224 Err(e) => {
225 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
226 return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
227 }
228 };
229
230 let packages = parse_dpkg_status(&content);
231 if packages.is_empty() {
232 vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
233 } else {
234 packages
235 }
236 }
237}
238
239pub struct DebianDistrolessInstalledParser;
240
241impl PackageParser for DebianDistrolessInstalledParser {
242 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
243
244 fn is_match(path: &Path) -> bool {
245 let path_str = path.to_string_lossy();
246 path_str.contains("var/lib/dpkg/status.d/")
247 }
248
249 fn extract_packages(path: &Path) -> Vec<PackageData> {
250 let content = match read_file_to_string(path, None) {
251 Ok(c) => c,
252 Err(e) => {
253 warn!("Failed to read distroless status file at {:?}: {}", path, e);
254 return vec![default_package_data(
255 DatasourceId::DebianDistrolessInstalledDb,
256 )];
257 }
258 };
259
260 vec![parse_distroless_status(&content)]
261 }
262}
263
264fn parse_distroless_status(content: &str) -> PackageData {
265 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
266
267 if paragraphs.is_empty() {
268 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
269 }
270
271 build_package_from_paragraph(
272 ¶graphs[0],
273 None,
274 DatasourceId::DebianDistrolessInstalledDb,
275 )
276 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
277}
278
279fn parse_debian_control(content: &str) -> Vec<PackageData> {
289 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
290 if paragraphs.is_empty() {
291 return Vec::new();
292 }
293
294 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
295
296 let (source_paragraph, binary_start) = if has_source {
297 (Some(¶graphs[0]), 1)
298 } else {
299 (None, 0)
300 };
301
302 let source_meta = source_paragraph.map(extract_source_meta);
303
304 let mut packages = Vec::new();
305 let mut count = 0usize;
306
307 for para in ¶graphs[binary_start..] {
308 count += 1;
309 if count > MAX_ITERATION_COUNT {
310 warn!("parse_debian_control: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
311 break;
312 }
313 if let Some(pkg) = build_package_from_paragraph(
314 para,
315 source_meta.as_ref(),
316 DatasourceId::DebianControlInSource,
317 ) {
318 packages.push(pkg);
319 }
320 }
321
322 if packages.is_empty()
323 && let Some(source_para) = source_paragraph
324 && let Some(pkg) = build_package_from_source_paragraph(source_para)
325 {
326 packages.push(pkg);
327 }
328
329 packages
330}
331
332fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
337 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
338 let mut packages = Vec::new();
339 let mut count = 0usize;
340
341 for para in ¶graphs {
342 count += 1;
343 if count > MAX_ITERATION_COUNT {
344 warn!("parse_dpkg_status: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
345 break;
346 }
347 let status = rfc822::get_header_first(¶.headers, "status");
348 if status.as_deref() != Some("install ok installed") {
349 continue;
350 }
351
352 if let Some(pkg) =
353 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
354 {
355 packages.push(pkg);
356 }
357 }
358
359 packages
360}
361
362struct SourceMeta {
367 parties: Vec<Party>,
368 homepage_url: Option<String>,
369 vcs_url: Option<String>,
370 code_view_url: Option<String>,
371 bug_tracking_url: Option<String>,
372}
373
374fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
375 let mut parties = Vec::new();
376
377 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
379 let (name, email) = split_name_email(&maintainer);
380 parties.push(Party {
381 r#type: Some("person".to_string()),
382 role: Some("maintainer".to_string()),
383 name,
384 email,
385 url: None,
386 organization: None,
387 organization_url: None,
388 timezone: None,
389 });
390 }
391
392 if let Some(orig_maintainer) =
394 rfc822::get_header_first(¶graph.headers, "original-maintainer")
395 {
396 let (name, email) = split_name_email(&orig_maintainer);
397 parties.push(Party {
398 r#type: Some("person".to_string()),
399 role: Some("maintainer".to_string()),
400 name,
401 email,
402 url: None,
403 organization: None,
404 organization_url: None,
405 timezone: None,
406 });
407 }
408
409 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
411 for uploader in uploaders_str.split(',') {
412 let trimmed = uploader.trim();
413 if !trimmed.is_empty() {
414 let (name, email) = split_name_email(trimmed);
415 parties.push(Party {
416 r#type: Some("person".to_string()),
417 role: Some("uploader".to_string()),
418 name,
419 email,
420 url: None,
421 organization: None,
422 organization_url: None,
423 timezone: None,
424 });
425 }
426 }
427 }
428
429 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage").map(truncate_field);
430
431 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
432 .map(|url| truncate_field(url.split_whitespace().next().unwrap_or(&url).to_string()));
433
434 let code_view_url =
435 rfc822::get_header_first(¶graph.headers, "vcs-browser").map(truncate_field);
436
437 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs").map(truncate_field);
438
439 SourceMeta {
440 parties,
441 homepage_url,
442 vcs_url,
443 code_view_url,
444 bug_tracking_url,
445 }
446}
447
448fn build_package_from_paragraph(
453 paragraph: &Rfc822Metadata,
454 source_meta: Option<&SourceMeta>,
455 datasource_id: DatasourceId,
456) -> Option<PackageData> {
457 let name = rfc822::get_header_first(¶graph.headers, "package").map(truncate_field)?;
458 let version = rfc822::get_header_first(¶graph.headers, "version").map(truncate_field);
459 let architecture =
460 rfc822::get_header_first(¶graph.headers, "architecture").map(truncate_field);
461 let description =
462 rfc822::get_header_first(¶graph.headers, "description").map(truncate_field);
463 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
464 let homepage = rfc822::get_header_first(¶graph.headers, "homepage").map(truncate_field);
465 let source_field = rfc822::get_header_first(¶graph.headers, "source");
466 let section = rfc822::get_header_first(¶graph.headers, "section");
467 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
468 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
469
470 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
471
472 let parties = if let Some(meta) = source_meta {
474 meta.parties.clone()
475 } else {
476 let mut p = Vec::new();
477 if let Some(m) = &maintainer_str {
478 let (n, e) = split_name_email(m);
479 p.push(Party {
480 r#type: Some("person".to_string()),
481 role: Some("maintainer".to_string()),
482 name: n,
483 email: e,
484 url: None,
485 organization: None,
486 organization_url: None,
487 timezone: None,
488 });
489 }
490 p
491 };
492
493 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
495 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
496 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
497 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
498
499 let purl = build_debian_purl(
501 &name,
502 version.as_deref(),
503 namespace.as_deref(),
504 architecture.as_deref(),
505 );
506
507 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
509
510 let keywords = section.into_iter().collect();
512
513 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
515
516 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
518 if let Some(ma) = &multi_arch
519 && !ma.is_empty()
520 {
521 extra_data.insert(
522 "multi_arch".to_string(),
523 serde_json::Value::String(ma.clone()),
524 );
525 }
526 if let Some(size_str) = &installed_size
527 && let Ok(size) = size_str.parse::<u64>()
528 {
529 extra_data.insert(
530 "installed_size".to_string(),
531 serde_json::Value::Number(serde_json::Number::from(size)),
532 );
533 }
534
535 let qualifiers = architecture.as_ref().map(|arch| {
537 let mut q = HashMap::new();
538 q.insert("arch".to_string(), arch.clone());
539 q
540 });
541
542 Some(PackageData {
543 package_type: Some(PACKAGE_TYPE),
544 namespace: namespace.clone(),
545 name: Some(name),
546 version,
547 qualifiers,
548 subpath: None,
549 primary_language: None,
550 description,
551 release_date: None,
552 parties,
553 keywords,
554 homepage_url,
555 download_url: None,
556 size: None,
557 sha1: None,
558 md5: None,
559 sha256: None,
560 sha512: None,
561 bug_tracking_url,
562 code_view_url,
563 vcs_url,
564 copyright: None,
565 holder: None,
566 declared_license_expression: None,
567 declared_license_expression_spdx: None,
568 license_detections: Vec::new(),
569 other_license_expression: None,
570 other_license_expression_spdx: None,
571 other_license_detections: Vec::new(),
572 extracted_license_statement: None,
573 notice_text: None,
574 source_packages,
575 file_references: Vec::new(),
576 is_private: false,
577 is_virtual: false,
578 extra_data: if extra_data.is_empty() {
579 None
580 } else {
581 Some(extra_data)
582 },
583 dependencies,
584 repository_homepage_url: None,
585 repository_download_url: None,
586 api_data_url: None,
587 datasource_id: Some(datasource_id),
588 purl,
589 })
590}
591
592fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
593 let name = rfc822::get_header_first(¶graph.headers, "source").map(truncate_field)?;
594 let version = rfc822::get_header_first(¶graph.headers, "version").map(truncate_field);
595 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
596
597 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
598 let source_meta = extract_source_meta(paragraph);
599
600 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
601 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
602
603 let section = rfc822::get_header_first(¶graph.headers, "section");
604 let keywords = section.into_iter().collect();
605
606 Some(PackageData {
607 package_type: Some(PACKAGE_TYPE),
608 namespace: namespace.clone(),
609 name: Some(name),
610 version,
611 qualifiers: None,
612 subpath: None,
613 primary_language: None,
614 description: None,
615 release_date: None,
616 parties: source_meta.parties,
617 keywords,
618 homepage_url: source_meta.homepage_url,
619 download_url: None,
620 size: None,
621 sha1: None,
622 md5: None,
623 sha256: None,
624 sha512: None,
625 bug_tracking_url: source_meta.bug_tracking_url,
626 code_view_url: source_meta.code_view_url,
627 vcs_url: source_meta.vcs_url,
628 copyright: None,
629 holder: None,
630 declared_license_expression: None,
631 declared_license_expression_spdx: None,
632 license_detections: Vec::new(),
633 other_license_expression: None,
634 other_license_expression_spdx: None,
635 other_license_detections: Vec::new(),
636 extracted_license_statement: None,
637 notice_text: None,
638 source_packages: Vec::new(),
639 file_references: Vec::new(),
640 is_private: false,
641 is_virtual: false,
642 extra_data: None,
643 dependencies,
644 repository_homepage_url: None,
645 repository_download_url: None,
646 api_data_url: None,
647 datasource_id: Some(DatasourceId::DebianControlInSource),
648 purl,
649 })
650}
651
652fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
657 if let Some(ver) = version {
659 let ver_lower = ver.to_lowercase();
660 for clue in VERSION_CLUES_UBUNTU {
661 if ver_lower.contains(clue) {
662 return Some("ubuntu".to_string());
663 }
664 }
665 for clue in VERSION_CLUES_DEBIAN {
666 if ver_lower.contains(clue) {
667 return Some("debian".to_string());
668 }
669 }
670 }
671
672 if let Some(maint) = maintainer {
674 let maint_lower = maint.to_lowercase();
675 for clue in MAINTAINER_CLUES_UBUNTU {
676 if maint_lower.contains(clue) {
677 return Some("ubuntu".to_string());
678 }
679 }
680 for clue in MAINTAINER_CLUES_DEBIAN {
681 if maint_lower.contains(clue) {
682 return Some("debian".to_string());
683 }
684 }
685 }
686
687 Some("debian".to_string())
689}
690
691fn build_debian_purl(
696 name: &str,
697 version: Option<&str>,
698 namespace: Option<&str>,
699 architecture: Option<&str>,
700) -> Option<String> {
701 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
702
703 if let Some(ns) = namespace {
704 purl.with_namespace(ns).ok()?;
705 }
706
707 if let Some(ver) = version {
708 purl.with_version(ver).ok()?;
709 }
710
711 if let Some(arch) = architecture {
712 purl.add_qualifier("arch", arch).ok()?;
713 }
714
715 Some(purl.to_string())
716}
717
718fn parse_all_dependencies(
723 headers: &HashMap<String, Vec<String>>,
724 namespace: Option<&str>,
725) -> Vec<Dependency> {
726 let mut dependencies = Vec::new();
727
728 for spec in DEP_FIELDS {
729 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
730 dependencies.extend(parse_dependency_field(
731 &dep_str,
732 spec.scope,
733 spec.is_runtime,
734 spec.is_optional,
735 namespace,
736 ));
737 }
738 }
739
740 dependencies
741}
742
743fn parse_dependency_field(
752 dep_str: &str,
753 scope: &str,
754 is_runtime: bool,
755 is_optional: bool,
756 namespace: Option<&str>,
757) -> Vec<Dependency> {
758 let mut deps = Vec::new();
759
760 for group in dep_str.split(',').take(MAX_ITERATION_COUNT) {
761 let group = group.trim();
762 if group.is_empty() {
763 continue;
764 }
765
766 let alternatives: Vec<&str> = group.split('|').collect();
767 let has_alternatives = alternatives.len() > 1;
768
769 for alt in alternatives {
770 let alt = alt.trim();
771 if alt.is_empty() {
772 continue;
773 }
774
775 if let Some(caps) = DEP_RE.captures(alt) {
776 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
777 let operator = caps.get(2).map(|m| m.as_str().trim());
778 let version = caps.get(3).map(|m| m.as_str().trim());
779
780 if pkg_name.is_empty() {
781 continue;
782 }
783
784 if pkg_name.starts_with('$') {
785 continue;
786 }
787
788 let extracted_requirement = match (operator, version) {
789 (Some(op), Some(ver)) => Some(truncate_field(format!("{} {}", op, ver))),
790 _ => None,
791 };
792
793 let is_pinned = operator.map(|op| op == "=");
794
795 let purl = build_debian_purl(pkg_name, None, namespace, None);
796
797 deps.push(Dependency {
798 purl,
799 extracted_requirement,
800 scope: Some(scope.to_string()),
801 is_runtime: Some(is_runtime),
802 is_optional: Some(is_optional || has_alternatives),
803 is_pinned,
804 is_direct: Some(true),
805 resolved_package: None,
806 extra_data: None,
807 });
808 }
809 }
810 }
811
812 deps
813}
814
815fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
823 let Some(source_str) = source else {
824 return Vec::new();
825 };
826
827 let trimmed = source_str.trim();
828 if trimmed.is_empty() {
829 return Vec::new();
830 }
831
832 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
834 let name = trimmed[..paren_start].trim();
835 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
836 (
837 name,
838 if version.is_empty() {
839 None
840 } else {
841 Some(version)
842 },
843 )
844 } else {
845 (trimmed, None)
846 };
847
848 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
849 vec![purl]
850 } else {
851 Vec::new()
852 }
853}
854
855crate::register_parser!(
860 "Debian source package control file (debian/control)",
861 &["**/debian/control"],
862 "deb",
863 "",
864 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
865);
866
867crate::register_parser!(
868 "Debian installed package database (dpkg status)",
869 &["**/var/lib/dpkg/status"],
870 "deb",
871 "",
872 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
873);
874
875crate::register_parser!(
876 "Debian distroless package database (status.d)",
877 &["**/var/lib/dpkg/status.d/*"],
878 "deb",
879 "",
880 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
881);
882
883pub struct DebianDscParser;
892
893impl PackageParser for DebianDscParser {
894 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
895
896 fn is_match(path: &Path) -> bool {
897 path.extension().and_then(|e| e.to_str()) == Some("dsc")
898 }
899
900 fn extract_packages(path: &Path) -> Vec<PackageData> {
901 let content = match read_file_to_string(path, None) {
902 Ok(c) => c,
903 Err(e) => {
904 warn!("Failed to read .dsc file {:?}: {}", path, e);
905 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
906 }
907 };
908
909 vec![parse_dsc_content(&content)]
910 }
911}
912
913crate::register_parser!(
914 "Debian source control file (.dsc)",
915 &["**/*.dsc"],
916 "deb",
917 "",
918 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
919);
920
921fn strip_pgp_signature(content: &str) -> String {
922 let mut result = String::new();
923 let mut in_pgp_block = false;
924 let mut in_signature = false;
925 let mut count = 0usize;
926
927 for line in content.lines() {
928 count += 1;
929 if count > MAX_ITERATION_COUNT {
930 warn!("strip_pgp_signature: exceeded MAX_ITERATION_COUNT lines, stopping");
931 break;
932 }
933 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
934 in_pgp_block = true;
935 continue;
936 }
937 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
938 in_signature = true;
939 continue;
940 }
941 if line.starts_with("-----END PGP SIGNATURE-----") {
942 in_signature = false;
943 continue;
944 }
945 if in_pgp_block && line.starts_with("Hash:") {
946 continue;
947 }
948 if in_pgp_block && line.is_empty() && result.is_empty() {
949 in_pgp_block = false;
950 continue;
951 }
952 if !in_signature {
953 result.push_str(line);
954 result.push('\n');
955 }
956 }
957
958 result
959}
960
961fn parse_dsc_content(content: &str) -> PackageData {
962 let clean_content = strip_pgp_signature(content);
963 let metadata = rfc822::parse_rfc822_content(&clean_content);
964 let headers = &metadata.headers;
965
966 let name = rfc822::get_header_first(headers, "source").map(truncate_field);
967 let version = rfc822::get_header_first(headers, "version").map(truncate_field);
968 let architecture = rfc822::get_header_first(headers, "architecture").map(truncate_field);
969 let namespace = Some("debian".to_string());
970
971 let mut package = PackageData {
972 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
973 package_type: Some(PACKAGE_TYPE),
974 namespace: namespace.clone(),
975 name: name.clone(),
976 version: version.clone(),
977 description: rfc822::get_header_first(headers, "description").map(truncate_field),
978 homepage_url: rfc822::get_header_first(headers, "homepage").map(truncate_field),
979 vcs_url: rfc822::get_header_first(headers, "vcs-git").map(truncate_field),
980 code_view_url: rfc822::get_header_first(headers, "vcs-browser").map(truncate_field),
981 ..Default::default()
982 };
983
984 if let (Some(n), Some(v)) = (&name, &version) {
986 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
987 }
988
989 if let Some(n) = &name
991 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
992 {
993 package.source_packages.push(source_purl);
994 }
995
996 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
997 let (name_opt, email_opt) = split_name_email(&maintainer);
998 package.parties.push(Party {
999 r#type: None,
1000 role: Some("maintainer".to_string()),
1001 name: name_opt,
1002 email: email_opt,
1003 url: None,
1004 organization: None,
1005 organization_url: None,
1006 timezone: None,
1007 });
1008 }
1009
1010 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
1011 for uploader in uploaders_str.split(',') {
1012 let uploader = uploader.trim();
1013 if uploader.is_empty() {
1014 continue;
1015 }
1016 let (name_opt, email_opt) = split_name_email(uploader);
1017 package.parties.push(Party {
1018 r#type: None,
1019 role: Some("uploader".to_string()),
1020 name: name_opt,
1021 email: email_opt,
1022 url: None,
1023 organization: None,
1024 organization_url: None,
1025 timezone: None,
1026 });
1027 }
1028 }
1029
1030 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1032 package.dependencies.extend(parse_dependency_field(
1033 &build_deps,
1034 "build",
1035 false,
1036 false,
1037 namespace.as_deref(),
1038 ));
1039 }
1040
1041 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1043 let map = package.extra_data.get_or_insert_with(HashMap::new);
1044 map.insert("standards_version".to_string(), standards.into());
1045 }
1046
1047 package
1048}
1049
1050pub struct DebianOrigTarParser;
1052
1053impl PackageParser for DebianOrigTarParser {
1054 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1055
1056 fn is_match(path: &Path) -> bool {
1057 path.file_name()
1058 .and_then(|n| n.to_str())
1059 .map(|name| name.contains(".orig.tar."))
1060 .unwrap_or(false)
1061 }
1062
1063 fn extract_packages(path: &Path) -> Vec<PackageData> {
1064 let filename = match path.file_name().and_then(|n| n.to_str()) {
1065 Some(f) => f,
1066 None => {
1067 return vec![default_package_data(
1068 DatasourceId::DebianOriginalSourceTarball,
1069 )];
1070 }
1071 };
1072
1073 vec![parse_source_tarball_filename(
1074 filename,
1075 DatasourceId::DebianOriginalSourceTarball,
1076 )]
1077 }
1078}
1079
1080crate::register_parser!(
1081 "Debian original source tarball",
1082 &["**/*.orig.tar.*"],
1083 "deb",
1084 "",
1085 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1086);
1087
1088pub struct DebianDebianTarParser;
1090
1091impl PackageParser for DebianDebianTarParser {
1092 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1093
1094 fn is_match(path: &Path) -> bool {
1095 path.file_name()
1096 .and_then(|n| n.to_str())
1097 .map(|name| name.contains(".debian.tar."))
1098 .unwrap_or(false)
1099 }
1100
1101 fn extract_packages(path: &Path) -> Vec<PackageData> {
1102 let filename = match path.file_name().and_then(|n| n.to_str()) {
1103 Some(f) => f,
1104 None => {
1105 return vec![default_package_data(
1106 DatasourceId::DebianSourceMetadataTarball,
1107 )];
1108 }
1109 };
1110
1111 vec![parse_source_tarball_filename(
1112 filename,
1113 DatasourceId::DebianSourceMetadataTarball,
1114 )]
1115 }
1116}
1117
1118crate::register_parser!(
1119 "Debian source metadata tarball",
1120 &["**/*.debian.tar.*"],
1121 "deb",
1122 "",
1123 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1124);
1125
1126fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1127 let without_tar_ext = filename
1128 .trim_end_matches(".gz")
1129 .trim_end_matches(".xz")
1130 .trim_end_matches(".bz2")
1131 .trim_end_matches(".tar");
1132
1133 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1134 if parts.len() < 2 {
1135 return default_package_data(datasource_id);
1136 }
1137
1138 let name = truncate_field(parts[0].to_string());
1139 let version_with_suffix = parts[1];
1140
1141 let version = version_with_suffix
1142 .trim_end_matches(".orig")
1143 .trim_end_matches(".debian")
1144 .to_string();
1145 let version = truncate_field(version);
1146
1147 let namespace = Some("debian".to_string());
1148
1149 PackageData {
1150 datasource_id: Some(datasource_id),
1151 package_type: Some(PACKAGE_TYPE),
1152 namespace: namespace.clone(),
1153 name: Some(name.clone()),
1154 version: Some(version.clone()),
1155 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1156 ..Default::default()
1157 }
1158}
1159
1160pub struct DebianInstalledListParser;
1162
1163impl PackageParser for DebianInstalledListParser {
1164 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1165
1166 fn is_match(path: &Path) -> bool {
1167 path.extension().and_then(|e| e.to_str()) == Some("list")
1168 && path
1169 .to_str()
1170 .map(|p| p.contains("/var/lib/dpkg/info/"))
1171 .unwrap_or(false)
1172 }
1173
1174 fn extract_packages(path: &Path) -> Vec<PackageData> {
1175 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1176 Some(f) => f,
1177 None => {
1178 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1179 }
1180 };
1181
1182 let content = match read_file_to_string(path, None) {
1183 Ok(c) => c,
1184 Err(e) => {
1185 warn!("Failed to read .list file {:?}: {}", path, e);
1186 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1187 }
1188 };
1189
1190 vec![parse_debian_file_list(
1191 &content,
1192 filename,
1193 DatasourceId::DebianInstalledFilesList,
1194 )]
1195 }
1196}
1197
1198crate::register_parser!(
1199 "Debian installed files list",
1200 &["**/var/lib/dpkg/info/*.list"],
1201 "deb",
1202 "",
1203 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1204);
1205
1206pub struct DebianInstalledMd5sumsParser;
1208
1209impl PackageParser for DebianInstalledMd5sumsParser {
1210 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1211
1212 fn is_match(path: &Path) -> bool {
1213 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1214 && path
1215 .to_str()
1216 .map(|p| p.contains("/var/lib/dpkg/info/"))
1217 .unwrap_or(false)
1218 }
1219
1220 fn extract_packages(path: &Path) -> Vec<PackageData> {
1221 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1222 Some(f) => f,
1223 None => {
1224 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1225 }
1226 };
1227
1228 let content = match read_file_to_string(path, None) {
1229 Ok(c) => c,
1230 Err(e) => {
1231 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1232 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1233 }
1234 };
1235
1236 vec![parse_debian_file_list(
1237 &content,
1238 filename,
1239 DatasourceId::DebianInstalledMd5Sums,
1240 )]
1241 }
1242}
1243
1244crate::register_parser!(
1245 "Debian installed package md5sums",
1246 &["**/var/lib/dpkg/info/*.md5sums"],
1247 "deb",
1248 "",
1249 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1250);
1251
1252const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1253
1254fn parse_debian_file_list(
1255 content: &str,
1256 filename: &str,
1257 datasource_id: DatasourceId,
1258) -> PackageData {
1259 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1260 (
1261 Some(truncate_field(pkg.to_string())),
1262 Some(arch.to_string()),
1263 )
1264 } else if filename == "md5sums" {
1265 (None, None)
1266 } else {
1267 (Some(truncate_field(filename.to_string())), None)
1268 };
1269
1270 let mut file_references = Vec::new();
1271 let mut count = 0usize;
1272
1273 for line in content.lines() {
1274 count += 1;
1275 if count > MAX_ITERATION_COUNT {
1276 warn!("parse_debian_file_list: exceeded MAX_ITERATION_COUNT lines, stopping");
1277 break;
1278 }
1279 let line = line.trim();
1280 if line.is_empty() || line.starts_with('#') {
1281 continue;
1282 }
1283
1284 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1285 (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1286 } else {
1287 (None, line)
1288 };
1289
1290 if IGNORED_ROOT_DIRS.contains(&path) {
1291 continue;
1292 }
1293
1294 file_references.push(FileReference {
1295 path: path.to_string(),
1296 size: None,
1297 sha1: None,
1298 md5: md5sum,
1299 sha256: None,
1300 sha512: None,
1301 extra_data: None,
1302 });
1303 }
1304
1305 if file_references.is_empty() {
1306 return default_package_data(datasource_id);
1307 }
1308
1309 let namespace = Some("debian".to_string());
1310 let mut package = PackageData {
1311 datasource_id: Some(datasource_id),
1312 package_type: Some(PACKAGE_TYPE),
1313 namespace: namespace.clone(),
1314 name: name.clone(),
1315 file_references,
1316 ..Default::default()
1317 };
1318
1319 if let Some(n) = &name {
1320 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1321 }
1322
1323 package
1324}
1325
1326pub struct DebianCopyrightParser;
1328
1329impl PackageParser for DebianCopyrightParser {
1330 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1331
1332 fn is_match(path: &Path) -> bool {
1333 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1334 if filename != "copyright" {
1335 return filename.ends_with("_copyright");
1336 }
1337 let path_str = path.to_string_lossy();
1338 path_str.contains("/debian/")
1339 || path_str.contains("/ports/")
1340 || path_str.starts_with("ports/")
1341 || path_str.contains("/packages/deb/")
1342 || path_str.contains("/usr/share/doc/")
1343 || path_str.ends_with("debian/copyright")
1344 } else {
1345 false
1346 }
1347 }
1348
1349 fn extract_packages(path: &Path) -> Vec<PackageData> {
1350 let datasource_id = detect_debian_copyright_datasource(path);
1351 let content = match read_file_to_string(path, None) {
1352 Ok(c) => c,
1353 Err(e) => {
1354 warn!("Failed to read copyright file {:?}: {}", path, e);
1355 return vec![default_package_data(datasource_id)];
1356 }
1357 };
1358
1359 let package_name = extract_package_name_from_path(path)
1360 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
1361 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1362 package_data.datasource_id = Some(datasource_id);
1363 vec![package_data]
1364 }
1365}
1366
1367crate::register_parser!(
1368 "Debian machine-readable copyright file",
1369 &[
1370 "**/debian/copyright",
1371 "**/ports/*/copyright",
1372 "**/packages/deb/copyright",
1373 "**/usr/share/doc/*/copyright",
1374 "**/*_copyright"
1375 ],
1376 "deb",
1377 "",
1378 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1379);
1380
1381fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1382 let path_str = path.to_string_lossy();
1383 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1384 DatasourceId::DebianCopyrightInSource
1385 } else if path_str.contains("/usr/share/doc/") {
1386 DatasourceId::DebianCopyrightInPackage
1387 } else {
1388 DatasourceId::DebianCopyrightStandalone
1389 }
1390}
1391
1392fn extract_package_name_from_path(path: &Path) -> Option<String> {
1393 let components: Vec<_> = path.components().collect();
1394
1395 for (i, component) in components.iter().enumerate() {
1396 if let std::path::Component::Normal(os_str) = component
1397 && os_str.to_str() == Some("doc")
1398 && i + 1 < components.len()
1399 && let std::path::Component::Normal(next) = components[i + 1]
1400 {
1401 return next.to_str().map(|s| s.to_string());
1402 }
1403 }
1404 None
1405}
1406
1407fn extract_standalone_package_name_from_path(
1408 path: &Path,
1409 datasource_id: DatasourceId,
1410) -> Option<String> {
1411 if datasource_id != DatasourceId::DebianCopyrightStandalone {
1412 return None;
1413 }
1414
1415 path.file_name()
1416 .and_then(|name| name.to_str())
1417 .filter(|name| *name == "copyright")?;
1418
1419 path.parent()
1420 .and_then(|parent| parent.file_name())
1421 .and_then(|name| name.to_str())
1422 .map(str::to_string)
1423}
1424
1425fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1426 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1427
1428 let is_dep5 = paragraphs
1429 .first()
1430 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1431 .is_some();
1432
1433 let namespace = Some("debian".to_string());
1434 let mut parties = Vec::new();
1435 let mut license_statements = Vec::new();
1436 let mut primary_license_detection = None;
1437 let mut header_license_detection = None;
1438 let mut other_license_detections = Vec::new();
1439
1440 if is_dep5 {
1441 let mut para_count = 0usize;
1442 for para in ¶graphs {
1443 para_count += 1;
1444 if para_count > MAX_ITERATION_COUNT {
1445 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
1446 break;
1447 }
1448 if let Some(copyright_text) =
1449 rfc822::get_header_first(¶.metadata.headers, "copyright")
1450 {
1451 for holder in parse_copyright_holders(©right_text) {
1452 if !holder.is_empty() {
1453 parties.push(Party {
1454 r#type: None,
1455 role: Some("copyright-holder".to_string()),
1456 name: Some(holder),
1457 email: None,
1458 url: None,
1459 organization: None,
1460 organization_url: None,
1461 timezone: None,
1462 });
1463 }
1464 }
1465 }
1466
1467 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1468 let license_name = license.lines().next().unwrap_or(&license).trim();
1469 if !license_name.is_empty()
1470 && !license_statements.contains(&license_name.to_string())
1471 {
1472 license_statements.push(license_name.to_string());
1473 }
1474
1475 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1476 let detection =
1477 build_primary_license_detection(license_name, matched_text, line_no);
1478 let is_header_paragraph =
1479 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1480 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1481 == Some("*")
1482 {
1483 primary_license_detection = Some(detection);
1484 } else if is_header_paragraph {
1485 header_license_detection.get_or_insert(detection);
1486 } else {
1487 other_license_detections.push(detection);
1488 }
1489 }
1490 }
1491 }
1492
1493 if primary_license_detection.is_none() && header_license_detection.is_some() {
1494 primary_license_detection = header_license_detection;
1495 }
1496 } else {
1497 let copyright_block = extract_unstructured_field(content, "Copyright:");
1498 if let Some(text) = copyright_block {
1499 for holder in parse_copyright_holders(&text) {
1500 if !holder.is_empty() {
1501 parties.push(Party {
1502 r#type: None,
1503 role: Some("copyright-holder".to_string()),
1504 name: Some(holder),
1505 email: None,
1506 url: None,
1507 organization: None,
1508 organization_url: None,
1509 timezone: None,
1510 });
1511 }
1512 }
1513 }
1514
1515 let license_block = extract_unstructured_field(content, "License:");
1516 if let Some(text) = license_block {
1517 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1518 }
1519 }
1520
1521 let extracted_license_statement = if license_statements.is_empty() {
1522 None
1523 } else {
1524 Some(truncate_field(license_statements.join(" AND ")))
1525 };
1526
1527 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1528 let declared_license_expression = license_detections
1529 .first()
1530 .map(|detection| detection.license_expression.clone());
1531 let declared_license_expression_spdx = license_detections
1532 .first()
1533 .map(|detection| detection.license_expression_spdx.clone());
1534 let other_license_expression = combine_license_expressions(
1535 other_license_detections
1536 .iter()
1537 .map(|detection| detection.license_expression.clone()),
1538 );
1539 let other_license_expression_spdx = combine_license_expressions(
1540 other_license_detections
1541 .iter()
1542 .map(|detection| detection.license_expression_spdx.clone()),
1543 );
1544
1545 PackageData {
1546 datasource_id: Some(DatasourceId::DebianCopyright),
1547 package_type: Some(PACKAGE_TYPE),
1548 namespace: namespace.clone(),
1549 name: package_name.map(|s| truncate_field(s.to_string())),
1550 parties,
1551 declared_license_expression,
1552 declared_license_expression_spdx,
1553 license_detections,
1554 other_license_expression,
1555 other_license_expression_spdx,
1556 other_license_detections,
1557 extracted_license_statement,
1558 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1559 ..Default::default()
1560 }
1561}
1562
1563#[derive(Debug)]
1564struct CopyrightParagraph {
1565 metadata: Rfc822Metadata,
1566 license_header_line: Option<(String, usize)>,
1567}
1568
1569fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1570 let mut paragraphs = Vec::new();
1571 let mut current_lines = Vec::new();
1572 let mut current_start_line = 1usize;
1573 let mut count = 0usize;
1574
1575 for (idx, line) in content.lines().enumerate() {
1576 count += 1;
1577 if count > MAX_ITERATION_COUNT {
1578 warn!(
1579 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
1580 );
1581 break;
1582 }
1583 let line_no = idx + 1;
1584 if line.is_empty() {
1585 if !current_lines.is_empty() {
1586 paragraphs.push(finalize_copyright_paragraph(
1587 std::mem::take(&mut current_lines),
1588 current_start_line,
1589 ));
1590 }
1591 current_start_line = line_no + 1;
1592 } else {
1593 if current_lines.is_empty() {
1594 current_start_line = line_no;
1595 }
1596 current_lines.push(line.to_string());
1597 }
1598 }
1599
1600 if !current_lines.is_empty() {
1601 paragraphs.push(finalize_copyright_paragraph(
1602 current_lines,
1603 current_start_line,
1604 ));
1605 }
1606
1607 paragraphs
1608}
1609
1610fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1611 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1612 let mut current_name: Option<String> = None;
1613 let mut current_value = String::new();
1614 let mut license_header_line = None;
1615
1616 for (idx, line) in raw_lines.iter().enumerate() {
1617 if line.starts_with(' ') || line.starts_with('\t') {
1618 if current_name.is_some() {
1619 current_value.push('\n');
1620 current_value.push_str(line);
1621 }
1622 continue;
1623 }
1624
1625 if let Some(name) = current_name.take() {
1626 add_copyright_header_value(&mut headers, &name, ¤t_value);
1627 current_value.clear();
1628 }
1629
1630 if let Some((name, value)) = line.split_once(':') {
1631 let normalized_name = name.trim().to_ascii_lowercase();
1632 if normalized_name == "license" && license_header_line.is_none() {
1633 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1634 }
1635 current_name = Some(normalized_name);
1636 current_value = value.trim_start().to_string();
1637 }
1638 }
1639
1640 if let Some(name) = current_name.take() {
1641 add_copyright_header_value(&mut headers, &name, ¤t_value);
1642 }
1643
1644 CopyrightParagraph {
1645 metadata: Rfc822Metadata {
1646 headers,
1647 body: String::new(),
1648 },
1649 license_header_line,
1650 }
1651}
1652
1653fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1654 let entry = headers.entry(name.to_string()).or_default();
1655 let trimmed = value.trim_end();
1656 if !trimmed.is_empty() {
1657 entry.push(trimmed.to_string());
1658 }
1659}
1660
1661fn build_primary_license_detection(
1662 license_name: &str,
1663 matched_text: String,
1664 line_no: usize,
1665) -> LicenseDetection {
1666 let normalized = normalize_debian_license_name(license_name);
1667 let line = match LineNumber::new(line_no) {
1668 Some(l) => l,
1669 None => {
1670 warn!(
1671 "build_primary_license_detection: line number {} out of range, clamping to 1",
1672 line_no
1673 );
1674 LineNumber::new(1).expect("1 is a valid line number")
1675 }
1676 };
1677
1678 build_declared_license_detection(
1679 &normalized,
1680 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1681 )
1682}
1683
1684fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1685 match license_name.trim() {
1686 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1687 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1688 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1689 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1690 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1691 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1692 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1693 "public-domain" => {
1694 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1695 }
1696 other => normalize_declared_license_key(other)
1697 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1698 }
1699}
1700
1701fn parse_copyright_holders(text: &str) -> Vec<String> {
1702 let mut holders = Vec::new();
1703 let mut count = 0usize;
1704
1705 for line in text.lines() {
1706 count += 1;
1707 if count > MAX_ITERATION_COUNT {
1708 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
1709 break;
1710 }
1711 let line = line.trim();
1712 if line.is_empty() {
1713 continue;
1714 }
1715
1716 let cleaned = line
1717 .trim_start_matches("Copyright")
1718 .trim_start_matches("copyright")
1719 .trim_start_matches("(C)")
1720 .trim_start_matches("(c)")
1721 .trim_start_matches("©")
1722 .trim();
1723
1724 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1725 let without_years = &cleaned[year_end..];
1726 let holder = without_years
1727 .trim_start_matches(',')
1728 .trim_start_matches('-')
1729 .trim();
1730
1731 if !holder.is_empty() && holder.len() > 2 {
1732 holders.push(holder.to_string());
1733 }
1734 }
1735 }
1736
1737 holders
1738}
1739
1740fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1741 let mut in_field = false;
1742 let mut field_content = String::new();
1743 let mut count = 0usize;
1744
1745 for line in content.lines() {
1746 count += 1;
1747 if count > MAX_ITERATION_COUNT {
1748 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
1749 break;
1750 }
1751 if line.starts_with(field_name) {
1752 in_field = true;
1753 field_content.push_str(line.trim_start_matches(field_name).trim());
1754 field_content.push('\n');
1755 } else if in_field {
1756 if line.starts_with(char::is_whitespace) {
1757 field_content.push_str(line.trim());
1758 field_content.push('\n');
1759 } else if !line.trim().is_empty() {
1760 break;
1761 }
1762 }
1763 }
1764
1765 let trimmed = field_content.trim();
1766 if trimmed.is_empty() {
1767 None
1768 } else {
1769 Some(truncate_field(trimmed.to_string()))
1770 }
1771}
1772
1773pub struct DebianDebParser;
1775
1776impl PackageParser for DebianDebParser {
1777 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1778
1779 fn is_match(path: &Path) -> bool {
1780 path.extension().and_then(|e| e.to_str()) == Some("deb")
1781 }
1782
1783 fn extract_packages(path: &Path) -> Vec<PackageData> {
1784 if let Ok(data) = extract_deb_archive(path) {
1786 return vec![data];
1787 }
1788
1789 let filename = match path.file_name().and_then(|n| n.to_str()) {
1791 Some(f) => f,
1792 None => {
1793 return vec![default_package_data(DatasourceId::DebianDeb)];
1794 }
1795 };
1796
1797 vec![parse_deb_filename(filename)]
1798 }
1799}
1800
1801crate::register_parser!(
1802 "Debian binary package archive (.deb)",
1803 &["**/*.deb"],
1804 "deb",
1805 "",
1806 Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1807);
1808
1809fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1810 use flate2::read::GzDecoder;
1811 use liblzma::read::XzDecoder;
1812 use std::io::{Cursor, Read};
1813
1814 let file_metadata =
1815 std::fs::metadata(path).map_err(|e| format!("Failed to stat .deb file: {}", e))?;
1816 if file_metadata.len() > MAX_ARCHIVE_SIZE {
1817 return Err(format!(
1818 ".deb file exceeds MAX_ARCHIVE_SIZE ({} bytes)",
1819 file_metadata.len()
1820 ));
1821 }
1822 let compressed_size = file_metadata.len() as usize;
1823
1824 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1825
1826 let mut archive = ar::Archive::new(file);
1827 let mut package: Option<PackageData> = None;
1828 let mut total_extracted: usize = 0;
1829
1830 while let Some(entry_result) = archive.next_entry() {
1831 let entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1832
1833 let entry_name_raw = entry.header().identifier();
1834 let entry_name = String::from_utf8_lossy(entry_name_raw);
1835 let had_replacement = entry_name_raw.iter().any(|&b| b > 127);
1836 if had_replacement {
1837 warn!(
1838 "extract_deb_archive: non-UTF-8 bytes in entry name replaced with lossy conversion"
1839 );
1840 }
1841 let entry_name = entry_name.trim().to_string();
1842
1843 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1844 let entry_size = entry.header().size();
1845 if entry_size > MAX_FILE_SIZE {
1846 warn!(
1847 "extract_deb_archive: control tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1848 entry_size
1849 );
1850 continue;
1851 }
1852 let mut control_data = Vec::new();
1853 entry
1854 .take(MAX_FILE_SIZE)
1855 .read_to_end(&mut control_data)
1856 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1857
1858 total_extracted += control_data.len();
1859 if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1860 warn!(
1861 "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1862 );
1863 break;
1864 }
1865 if total_extracted > MAX_ARCHIVE_SIZE as usize {
1866 warn!(
1867 "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1868 );
1869 break;
1870 }
1871
1872 if entry_name.ends_with(".gz") {
1873 let decoder = GzDecoder::new(Cursor::new(control_data));
1874 if let Some(parsed_package) =
1875 parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1876 {
1877 package = Some(parsed_package);
1878 }
1879 } else if entry_name.ends_with(".xz") {
1880 let decoder = XzDecoder::new(Cursor::new(control_data));
1881 if let Some(parsed_package) =
1882 parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1883 {
1884 package = Some(parsed_package);
1885 }
1886 }
1887 } else if entry_name.starts_with("data.tar") {
1888 let entry_size = entry.header().size();
1889 if entry_size > MAX_FILE_SIZE {
1890 warn!(
1891 "extract_deb_archive: data tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1892 entry_size
1893 );
1894 continue;
1895 }
1896 let mut data = Vec::new();
1897 entry
1898 .take(MAX_FILE_SIZE)
1899 .read_to_end(&mut data)
1900 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1901
1902 total_extracted += data.len();
1903 if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1904 warn!(
1905 "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1906 );
1907 break;
1908 }
1909 if total_extracted > MAX_ARCHIVE_SIZE as usize {
1910 warn!(
1911 "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1912 );
1913 break;
1914 }
1915
1916 let Some(current_package) = package.as_mut() else {
1917 continue;
1918 };
1919
1920 if entry_name.ends_with(".gz") {
1921 let decoder = GzDecoder::new(Cursor::new(data));
1922 merge_deb_data_archive(
1923 decoder,
1924 current_package,
1925 &mut total_extracted,
1926 compressed_size,
1927 )?;
1928 } else if entry_name.ends_with(".xz") {
1929 let decoder = XzDecoder::new(Cursor::new(data));
1930 merge_deb_data_archive(
1931 decoder,
1932 current_package,
1933 &mut total_extracted,
1934 compressed_size,
1935 )?;
1936 }
1937 }
1938 }
1939
1940 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1941}
1942
1943fn parse_control_tar_archive<R: std::io::Read>(
1944 reader: R,
1945 total_extracted: &mut usize,
1946 compressed_size: usize,
1947) -> Result<Option<PackageData>, String> {
1948 use std::io::Read;
1949
1950 let mut tar_archive = tar::Archive::new(reader);
1951
1952 for tar_entry_result in tar_archive
1953 .entries()
1954 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1955 {
1956 let tar_entry = tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1957
1958 let tar_path = tar_entry
1959 .path()
1960 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1961
1962 if tar_path
1963 .components()
1964 .any(|c| matches!(c, std::path::Component::ParentDir))
1965 {
1966 warn!(
1967 "parse_control_tar_archive: skipping tar entry with path traversal: {:?}",
1968 tar_path
1969 );
1970 continue;
1971 }
1972
1973 if tar_entry.size() > MAX_FILE_SIZE {
1974 warn!(
1975 "parse_control_tar_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1976 tar_entry.size()
1977 );
1978 continue;
1979 }
1980
1981 if tar_path.ends_with("control") {
1982 let mut control_content = String::new();
1983 tar_entry
1984 .take(MAX_FILE_SIZE)
1985 .read_to_string(&mut control_content)
1986 .map_err(|e| format!("Failed to read control file: {}", e))?;
1987
1988 *total_extracted += control_content.len();
1989 if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1990 warn!(
1991 "parse_control_tar_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1992 );
1993 return Ok(None);
1994 }
1995 if *total_extracted > MAX_ARCHIVE_SIZE as usize {
1996 warn!(
1997 "parse_control_tar_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1998 );
1999 return Ok(None);
2000 }
2001
2002 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
2003 if paragraphs.is_empty() {
2004 return Err("No paragraphs in control file".to_string());
2005 }
2006
2007 if let Some(package) =
2008 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
2009 {
2010 return Ok(Some(package));
2011 }
2012
2013 return Err("Failed to parse control file".to_string());
2014 }
2015 }
2016
2017 Ok(None)
2018}
2019
2020fn merge_deb_data_archive<R: std::io::Read>(
2021 reader: R,
2022 package: &mut PackageData,
2023 total_extracted: &mut usize,
2024 compressed_size: usize,
2025) -> Result<(), String> {
2026 use std::io::Read;
2027
2028 let mut tar_archive = tar::Archive::new(reader);
2029
2030 for tar_entry_result in tar_archive
2031 .entries()
2032 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
2033 {
2034 let tar_entry =
2035 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
2036
2037 let tar_path = tar_entry
2038 .path()
2039 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
2040
2041 if tar_path
2042 .components()
2043 .any(|c| matches!(c, std::path::Component::ParentDir))
2044 {
2045 warn!(
2046 "merge_deb_data_archive: skipping tar entry with path traversal: {:?}",
2047 tar_path
2048 );
2049 continue;
2050 }
2051
2052 if tar_entry.size() > MAX_FILE_SIZE {
2053 warn!(
2054 "merge_deb_data_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
2055 tar_entry.size()
2056 );
2057 continue;
2058 }
2059
2060 let tar_path_str = tar_path.to_string_lossy();
2061
2062 if tar_path_str.ends_with(&format!(
2063 "/usr/share/doc/{}/copyright",
2064 package.name.as_deref().unwrap_or_default()
2065 )) || tar_path_str.ends_with(&format!(
2066 "usr/share/doc/{}/copyright",
2067 package.name.as_deref().unwrap_or_default()
2068 )) {
2069 let mut copyright_content = String::new();
2070 tar_entry
2071 .take(MAX_FILE_SIZE)
2072 .read_to_string(&mut copyright_content)
2073 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
2074
2075 *total_extracted += copyright_content.len();
2076 if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
2077 warn!(
2078 "merge_deb_data_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
2079 );
2080 return Ok(());
2081 }
2082 if *total_extracted > MAX_ARCHIVE_SIZE as usize {
2083 warn!(
2084 "merge_deb_data_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
2085 );
2086 return Ok(());
2087 }
2088
2089 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
2090 merge_debian_copyright_into_package(package, ©right_pkg);
2091 break;
2092 }
2093 }
2094
2095 Ok(())
2096}
2097
2098fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
2099 if target.extracted_license_statement.is_none() {
2100 target.extracted_license_statement = copyright.extracted_license_statement.clone();
2101 }
2102
2103 for party in ©right.parties {
2104 if !target.parties.iter().any(|existing| {
2105 existing.r#type == party.r#type
2106 && existing.role == party.role
2107 && existing.name == party.name
2108 && existing.email == party.email
2109 && existing.url == party.url
2110 && existing.organization == party.organization
2111 && existing.organization_url == party.organization_url
2112 && existing.timezone == party.timezone
2113 }) {
2114 target.parties.push(party.clone());
2115 }
2116 }
2117}
2118
2119fn parse_deb_filename(filename: &str) -> PackageData {
2120 let without_ext = filename.trim_end_matches(".deb");
2121
2122 let parts: Vec<&str> = without_ext.split('_').collect();
2123 if parts.len() < 2 {
2124 return default_package_data(DatasourceId::DebianDeb);
2125 }
2126
2127 let name = truncate_field(parts[0].to_string());
2128 let version = truncate_field(parts[1].to_string());
2129 let architecture = if parts.len() >= 3 {
2130 Some(truncate_field(parts[2].to_string()))
2131 } else {
2132 None
2133 };
2134
2135 let namespace = Some("debian".to_string());
2136
2137 PackageData {
2138 datasource_id: Some(DatasourceId::DebianDeb),
2139 package_type: Some(PACKAGE_TYPE),
2140 namespace: namespace.clone(),
2141 name: Some(name.clone()),
2142 version: Some(version.clone()),
2143 purl: build_debian_purl(
2144 &name,
2145 Some(&version),
2146 namespace.as_deref(),
2147 architecture.as_deref(),
2148 ),
2149 ..Default::default()
2150 }
2151}
2152
2153pub struct DebianControlInExtractedDebParser;
2159
2160impl PackageParser for DebianControlInExtractedDebParser {
2161 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2162
2163 fn is_match(path: &Path) -> bool {
2164 path.file_name()
2165 .and_then(|n| n.to_str())
2166 .is_some_and(|name| name == "control")
2167 && path
2168 .to_str()
2169 .map(|p| {
2170 p.ends_with("control.tar.gz-extract/control")
2171 || p.ends_with("control.tar.xz-extract/control")
2172 })
2173 .unwrap_or(false)
2174 }
2175
2176 fn extract_packages(path: &Path) -> Vec<PackageData> {
2177 let content = match read_file_to_string(path, None) {
2178 Ok(c) => c,
2179 Err(e) => {
2180 warn!(
2181 "Failed to read control file in extracted deb {:?}: {}",
2182 path, e
2183 );
2184 return vec![default_package_data(
2185 DatasourceId::DebianControlExtractedDeb,
2186 )];
2187 }
2188 };
2189
2190 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
2193 if paragraphs.is_empty() {
2194 return vec![default_package_data(
2195 DatasourceId::DebianControlExtractedDeb,
2196 )];
2197 }
2198
2199 if let Some(pkg) = build_package_from_paragraph(
2200 ¶graphs[0],
2201 None,
2202 DatasourceId::DebianControlExtractedDeb,
2203 ) {
2204 vec![pkg]
2205 } else {
2206 vec![default_package_data(
2207 DatasourceId::DebianControlExtractedDeb,
2208 )]
2209 }
2210 }
2211}
2212
2213pub struct DebianMd5sumInPackageParser;
2215
2216impl PackageParser for DebianMd5sumInPackageParser {
2217 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2218
2219 fn is_match(path: &Path) -> bool {
2220 path.file_name()
2221 .and_then(|n| n.to_str())
2222 .is_some_and(|name| name == "md5sums")
2223 && path
2224 .to_str()
2225 .map(|p| {
2226 p.ends_with("control.tar.gz-extract/md5sums")
2227 || p.ends_with("control.tar.xz-extract/md5sums")
2228 })
2229 .unwrap_or(false)
2230 }
2231
2232 fn extract_packages(path: &Path) -> Vec<PackageData> {
2233 let content = match read_file_to_string(path, None) {
2234 Ok(c) => c,
2235 Err(e) => {
2236 warn!("Failed to read md5sums file {:?}: {}", path, e);
2237 return vec![default_package_data(
2238 DatasourceId::DebianMd5SumsInExtractedDeb,
2239 )];
2240 }
2241 };
2242
2243 let package_name = extract_package_name_from_deb_path(path);
2244
2245 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2246 }
2247}
2248
2249pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2250 let parent = path.parent()?;
2251 let grandparent = parent.parent()?;
2252 let dirname = grandparent.file_name()?.to_str()?;
2253 let without_extract = dirname.strip_suffix("-extract")?;
2254 let without_deb = without_extract.strip_suffix(".deb")?;
2255 let name = without_deb.split('_').next()?;
2256
2257 Some(name.to_string())
2258}
2259
2260fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2261 let mut file_references = Vec::new();
2262 let mut count = 0usize;
2263
2264 for line in content.lines() {
2265 count += 1;
2266 if count > MAX_ITERATION_COUNT {
2267 warn!("parse_md5sums_in_package: exceeded MAX_ITERATION_COUNT lines, stopping");
2268 break;
2269 }
2270 let line = line.trim();
2271 if line.is_empty() || line.starts_with('#') {
2272 continue;
2273 }
2274
2275 let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find(" ") {
2276 (
2277 Md5Digest::from_hex(line[..idx].trim()).ok(),
2278 line[idx + 2..].trim(),
2279 )
2280 } else if let Some((hash, path)) = line.split_once(' ') {
2281 (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2282 } else {
2283 (None, line)
2284 };
2285
2286 if IGNORED_ROOT_DIRS.contains(&filepath) {
2287 continue;
2288 }
2289
2290 file_references.push(FileReference {
2291 path: filepath.to_string(),
2292 size: None,
2293 sha1: None,
2294 md5: md5sum,
2295 sha256: None,
2296 sha512: None,
2297 extra_data: None,
2298 });
2299 }
2300
2301 if file_references.is_empty() {
2302 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2303 }
2304
2305 let namespace = Some("debian".to_string());
2306 let mut package = PackageData {
2307 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2308 package_type: Some(PACKAGE_TYPE),
2309 namespace: namespace.clone(),
2310 name: package_name.map(|s| truncate_field(s.to_string())),
2311 file_references,
2312 ..Default::default()
2313 };
2314
2315 if let Some(n) = &package.name {
2316 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2317 }
2318
2319 package
2320}
2321
2322crate::register_parser!(
2323 "Debian control file in extracted .deb control tarball",
2324 &[
2325 "**/control.tar.gz-extract/control",
2326 "**/control.tar.xz-extract/control"
2327 ],
2328 "deb",
2329 "",
2330 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2331);
2332
2333crate::register_parser!(
2334 "Debian MD5 checksums in extracted .deb control tarball",
2335 &[
2336 "**/control.tar.gz-extract/md5sums",
2337 "**/control.tar.xz-extract/md5sums"
2338 ],
2339 "deb",
2340 "",
2341 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2342);
2343
2344#[cfg(test)]
2345mod tests {
2346 use super::*;
2347 use crate::models::DatasourceId;
2348 use crate::models::PackageType;
2349 use ar::{Builder as ArBuilder, Header as ArHeader};
2350 use flate2::Compression;
2351 use flate2::write::GzEncoder;
2352 use liblzma::write::XzEncoder;
2353 use std::io::Cursor;
2354 use std::path::PathBuf;
2355 use tar::{Builder as TarBuilder, Header as TarHeader};
2356 use tempfile::NamedTempFile;
2357
2358 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2359 let mut control_tar = Vec::new();
2360 {
2361 let encoder = XzEncoder::new(&mut control_tar, 6);
2362 let mut tar_builder = TarBuilder::new(encoder);
2363
2364 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2365 let mut header = TarHeader::new_gnu();
2366 header
2367 .set_path("control")
2368 .expect("control tar path should be valid");
2369 header.set_size(control_content.len() as u64);
2370 header.set_mode(0o644);
2371 header.set_cksum();
2372 tar_builder
2373 .append(&header, Cursor::new(control_content))
2374 .expect("control file should be appended to tar.xz");
2375 tar_builder.finish().expect("control tar.xz should finish");
2376 }
2377
2378 let deb = NamedTempFile::new().expect("temp deb file should be created");
2379 {
2380 let mut builder = ArBuilder::new(
2381 deb.reopen()
2382 .expect("temporary deb file should reopen for writing"),
2383 );
2384
2385 let debian_binary = b"2.0\n";
2386 let mut debian_binary_header =
2387 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2388 debian_binary_header.set_mode(0o100644);
2389 builder
2390 .append(&debian_binary_header, Cursor::new(debian_binary))
2391 .expect("debian-binary entry should be appended");
2392
2393 let mut control_header =
2394 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2395 control_header.set_mode(0o100644);
2396 builder
2397 .append(&control_header, Cursor::new(control_tar))
2398 .expect("control.tar.xz entry should be appended");
2399 }
2400
2401 deb
2402 }
2403
2404 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2405 let mut control_tar = Vec::new();
2406 {
2407 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2408 let mut tar_builder = TarBuilder::new(encoder);
2409
2410 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2411 let mut header = TarHeader::new_gnu();
2412 header
2413 .set_path("control")
2414 .expect("control tar path should be valid");
2415 header.set_size(control_content.len() as u64);
2416 header.set_mode(0o644);
2417 header.set_cksum();
2418 tar_builder
2419 .append(&header, Cursor::new(control_content))
2420 .expect("control file should be appended to tar.gz");
2421 tar_builder.finish().expect("control tar.gz should finish");
2422 }
2423
2424 let mut data_tar = Vec::new();
2425 {
2426 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2427 let mut tar_builder = TarBuilder::new(encoder);
2428
2429 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2430 let mut header = TarHeader::new_gnu();
2431 header
2432 .set_path("./usr/share/doc/synthetic/copyright")
2433 .expect("copyright path should be valid");
2434 header.set_size(copyright.len() as u64);
2435 header.set_mode(0o644);
2436 header.set_cksum();
2437 tar_builder
2438 .append(&header, Cursor::new(copyright))
2439 .expect("copyright file should be appended to data tar");
2440 tar_builder.finish().expect("data tar.gz should finish");
2441 }
2442
2443 let deb = NamedTempFile::new().expect("temp deb file should be created");
2444 {
2445 let mut builder = ArBuilder::new(
2446 deb.reopen()
2447 .expect("temporary deb file should reopen for writing"),
2448 );
2449
2450 let debian_binary = b"2.0\n";
2451 let mut debian_binary_header =
2452 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2453 debian_binary_header.set_mode(0o100644);
2454 builder
2455 .append(&debian_binary_header, Cursor::new(debian_binary))
2456 .expect("debian-binary entry should be appended");
2457
2458 let mut control_header =
2459 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2460 control_header.set_mode(0o100644);
2461 builder
2462 .append(&control_header, Cursor::new(control_tar))
2463 .expect("control.tar.gz entry should be appended");
2464
2465 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2466 data_header.set_mode(0o100644);
2467 builder
2468 .append(&data_header, Cursor::new(data_tar))
2469 .expect("data.tar.gz entry should be appended");
2470 }
2471
2472 deb
2473 }
2474
2475 #[test]
2478 fn test_detect_namespace_from_ubuntu_version() {
2479 assert_eq!(
2480 detect_namespace(Some("1.0-1ubuntu1"), None),
2481 Some("ubuntu".to_string())
2482 );
2483 }
2484
2485 #[test]
2486 fn test_detect_namespace_from_debian_version() {
2487 assert_eq!(
2488 detect_namespace(Some("1.0-1+deb11u1"), None),
2489 Some("debian".to_string())
2490 );
2491 }
2492
2493 #[test]
2494 fn test_detect_namespace_from_ubuntu_maintainer() {
2495 assert_eq!(
2496 detect_namespace(
2497 None,
2498 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2499 ),
2500 Some("ubuntu".to_string())
2501 );
2502 }
2503
2504 #[test]
2505 fn test_detect_namespace_from_debian_maintainer() {
2506 assert_eq!(
2507 detect_namespace(None, Some("John Doe <john@debian.org>")),
2508 Some("debian".to_string())
2509 );
2510 }
2511
2512 #[test]
2513 fn test_detect_namespace_default() {
2514 assert_eq!(
2515 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2516 Some("debian".to_string())
2517 );
2518 }
2519
2520 #[test]
2521 fn test_detect_namespace_version_takes_priority() {
2522 assert_eq!(
2524 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2525 Some("ubuntu".to_string())
2526 );
2527 }
2528
2529 #[test]
2532 fn test_build_purl_basic() {
2533 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2534 assert_eq!(
2535 purl,
2536 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2537 );
2538 }
2539
2540 #[test]
2541 fn test_build_purl_no_version() {
2542 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2543 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2544 }
2545
2546 #[test]
2547 fn test_build_purl_no_arch() {
2548 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2549 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2550 }
2551
2552 #[test]
2553 fn test_build_purl_no_namespace() {
2554 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2555 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2556 }
2557
2558 #[test]
2561 fn test_parse_simple_dependency() {
2562 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2563 assert_eq!(deps.len(), 1);
2564 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2565 assert_eq!(deps[0].extracted_requirement, None);
2566 assert_eq!(deps[0].scope, Some("depends".to_string()));
2567 }
2568
2569 #[test]
2570 fn test_parse_dependency_with_version() {
2571 let deps =
2572 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2573 assert_eq!(deps.len(), 1);
2574 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2575 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2576 }
2577
2578 #[test]
2579 fn test_parse_dependency_exact_version() {
2580 let deps = parse_dependency_field(
2581 "libc6 (= 2.31-13+deb11u5)",
2582 "depends",
2583 true,
2584 false,
2585 Some("debian"),
2586 );
2587 assert_eq!(deps.len(), 1);
2588 assert_eq!(deps[0].is_pinned, Some(true));
2589 }
2590
2591 #[test]
2592 fn test_parse_dependency_strict_less() {
2593 let deps =
2594 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2595 assert_eq!(deps.len(), 1);
2596 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2597 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2598 }
2599
2600 #[test]
2601 fn test_parse_multiple_dependencies() {
2602 let deps = parse_dependency_field(
2603 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2604 "depends",
2605 true,
2606 false,
2607 Some("debian"),
2608 );
2609 assert_eq!(deps.len(), 3);
2610 }
2611
2612 #[test]
2613 fn test_parse_dependency_alternatives() {
2614 let deps = parse_dependency_field(
2615 "libssl1.1 | libssl3",
2616 "depends",
2617 true,
2618 false,
2619 Some("debian"),
2620 );
2621 assert_eq!(deps.len(), 2);
2622 assert_eq!(deps[0].is_optional, Some(true));
2624 assert_eq!(deps[1].is_optional, Some(true));
2625 }
2626
2627 #[test]
2628 fn test_parse_dependency_skips_substitutions() {
2629 let deps = parse_dependency_field(
2630 "${shlibs:Depends}, ${misc:Depends}, libc6",
2631 "depends",
2632 true,
2633 false,
2634 Some("debian"),
2635 );
2636 assert_eq!(deps.len(), 1);
2637 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2638 }
2639
2640 #[test]
2641 fn test_parse_dependency_with_arch_qualifier() {
2642 let deps = parse_dependency_field(
2644 "libc6 (>= 2.17) [amd64]",
2645 "depends",
2646 true,
2647 false,
2648 Some("debian"),
2649 );
2650 assert_eq!(deps.len(), 1);
2651 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2652 }
2653
2654 #[test]
2655 fn test_parse_empty_dependency() {
2656 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2657 assert!(deps.is_empty());
2658 }
2659
2660 #[test]
2663 fn test_parse_source_field_name_only() {
2664 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2665 assert_eq!(sources.len(), 1);
2666 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2667 }
2668
2669 #[test]
2670 fn test_parse_source_field_with_version() {
2671 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2672 assert_eq!(sources.len(), 1);
2673 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2674 }
2675
2676 #[test]
2677 fn test_parse_source_field_empty() {
2678 let sources = parse_source_field(None, Some("debian"));
2679 assert!(sources.is_empty());
2680 }
2681
2682 #[test]
2685 fn test_parse_debian_control_source_and_binary() {
2686 let content = "\
2687Source: curl
2688Section: web
2689Priority: optional
2690Maintainer: Alessandro Ghedini <ghedo@debian.org>
2691Homepage: https://curl.se/
2692Vcs-Browser: https://salsa.debian.org/debian/curl
2693Vcs-Git: https://salsa.debian.org/debian/curl.git
2694Build-Depends: debhelper (>= 12), libssl-dev
2695
2696Package: curl
2697Architecture: amd64
2698Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2699Description: command line tool for transferring data with URL syntax";
2700
2701 let packages = parse_debian_control(content);
2702 assert_eq!(packages.len(), 1);
2703
2704 let pkg = &packages[0];
2705 assert_eq!(pkg.name, Some("curl".to_string()));
2706 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2707 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2708 assert_eq!(
2709 pkg.vcs_url,
2710 Some("https://salsa.debian.org/debian/curl.git".to_string())
2711 );
2712 assert_eq!(
2713 pkg.code_view_url,
2714 Some("https://salsa.debian.org/debian/curl".to_string())
2715 );
2716
2717 assert_eq!(pkg.parties.len(), 1);
2719 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2720 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2721 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2722
2723 assert!(!pkg.dependencies.is_empty());
2725 }
2726
2727 #[test]
2728 fn test_parse_debian_control_multiple_binary() {
2729 let content = "\
2730Source: gzip
2731Maintainer: Debian Developer <dev@debian.org>
2732
2733Package: gzip
2734Architecture: any
2735Depends: libc6 (>= 2.17)
2736Description: GNU file compression
2737
2738Package: gzip-win32
2739Architecture: all
2740Description: gzip for Windows";
2741
2742 let packages = parse_debian_control(content);
2743 assert_eq!(packages.len(), 2);
2744 assert_eq!(packages[0].name, Some("gzip".to_string()));
2745 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2746
2747 assert_eq!(packages[0].parties.len(), 1);
2749 assert_eq!(packages[1].parties.len(), 1);
2750 }
2751
2752 #[test]
2753 fn test_parse_debian_control_source_only() {
2754 let content = "\
2755Source: my-package
2756Maintainer: Test User <test@debian.org>
2757Build-Depends: debhelper (>= 13)";
2758
2759 let packages = parse_debian_control(content);
2760 assert_eq!(packages.len(), 1);
2761 assert_eq!(packages[0].name, Some("my-package".to_string()));
2762 assert!(!packages[0].dependencies.is_empty());
2764 assert_eq!(
2765 packages[0].dependencies[0].scope,
2766 Some("build-depends".to_string())
2767 );
2768 }
2769
2770 #[test]
2771 fn test_parse_debian_control_with_uploaders() {
2772 let content = "\
2773Source: example
2774Maintainer: Main Dev <main@debian.org>
2775Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2776
2777Package: example
2778Architecture: any
2779Description: test package";
2780
2781 let packages = parse_debian_control(content);
2782 assert_eq!(packages.len(), 1);
2783 assert_eq!(packages[0].parties.len(), 3);
2785 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2786 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2787 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2788 }
2789
2790 #[test]
2791 fn test_parse_debian_control_vcs_git_with_branch() {
2792 let content = "\
2793Source: example
2794Maintainer: Dev <dev@debian.org>
2795Vcs-Git: https://salsa.debian.org/example.git -b main
2796
2797Package: example
2798Architecture: any
2799Description: test";
2800
2801 let packages = parse_debian_control(content);
2802 assert_eq!(packages.len(), 1);
2803 assert_eq!(
2805 packages[0].vcs_url,
2806 Some("https://salsa.debian.org/example.git".to_string())
2807 );
2808 }
2809
2810 #[test]
2811 fn test_parse_debian_control_multi_arch() {
2812 let content = "\
2813Source: example
2814Maintainer: Dev <dev@debian.org>
2815
2816Package: libexample
2817Architecture: any
2818Multi-Arch: same
2819Description: shared library";
2820
2821 let packages = parse_debian_control(content);
2822 assert_eq!(packages.len(), 1);
2823 let extra = packages[0].extra_data.as_ref().unwrap();
2824 assert_eq!(
2825 extra.get("multi_arch"),
2826 Some(&serde_json::Value::String("same".to_string()))
2827 );
2828 }
2829
2830 #[test]
2833 fn test_parse_dpkg_status_basic() {
2834 let content = "\
2835Package: base-files
2836Status: install ok installed
2837Priority: required
2838Section: admin
2839Installed-Size: 391
2840Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2841Architecture: amd64
2842Version: 11ubuntu5.6
2843Description: Debian base system miscellaneous files
2844Homepage: https://tracker.debian.org/pkg/base-files
2845
2846Package: not-installed
2847Status: deinstall ok config-files
2848Architecture: amd64
2849Version: 1.0
2850Description: This should be skipped";
2851
2852 let packages = parse_dpkg_status(content);
2853 assert_eq!(packages.len(), 1);
2854
2855 let pkg = &packages[0];
2856 assert_eq!(pkg.name, Some("base-files".to_string()));
2857 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2858 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2859 assert_eq!(
2860 pkg.datasource_id,
2861 Some(DatasourceId::DebianInstalledStatusDb)
2862 );
2863
2864 let extra = pkg.extra_data.as_ref().unwrap();
2866 assert_eq!(
2867 extra.get("installed_size"),
2868 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2869 );
2870 }
2871
2872 #[test]
2873 fn test_parse_dpkg_status_multiple_installed() {
2874 let content = "\
2875Package: libc6
2876Status: install ok installed
2877Architecture: amd64
2878Version: 2.31-13+deb11u5
2879Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2880Description: GNU C Library
2881
2882Package: zlib1g
2883Status: install ok installed
2884Architecture: amd64
2885Version: 1:1.2.11.dfsg-2+deb11u2
2886Maintainer: Mark Brown <broonie@debian.org>
2887Description: compression library";
2888
2889 let packages = parse_dpkg_status(content);
2890 assert_eq!(packages.len(), 2);
2891 assert_eq!(packages[0].name, Some("libc6".to_string()));
2892 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2893 }
2894
2895 #[test]
2896 fn test_parse_dpkg_status_with_dependencies() {
2897 let content = "\
2898Package: curl
2899Status: install ok installed
2900Architecture: amd64
2901Version: 7.74.0-1.3+deb11u7
2902Maintainer: Alessandro Ghedini <ghedo@debian.org>
2903Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2904Recommends: ca-certificates
2905Description: command line tool for transferring data with URL syntax";
2906
2907 let packages = parse_dpkg_status(content);
2908 assert_eq!(packages.len(), 1);
2909
2910 let deps = &packages[0].dependencies;
2911 assert_eq!(deps.len(), 3);
2913
2914 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2916 assert_eq!(deps[0].scope, Some("depends".to_string()));
2917 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2918
2919 assert_eq!(
2921 deps[2].purl,
2922 Some("pkg:deb/debian/ca-certificates".to_string())
2923 );
2924 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2925 assert_eq!(deps[2].is_optional, Some(true));
2926 }
2927
2928 #[test]
2929 fn test_parse_dpkg_status_with_source() {
2930 let content = "\
2931Package: libncurses6
2932Status: install ok installed
2933Architecture: amd64
2934Source: ncurses (6.2+20201114-2+deb11u1)
2935Version: 6.2+20201114-2+deb11u1
2936Maintainer: Craig Small <csmall@debian.org>
2937Description: shared libraries for terminal handling";
2938
2939 let packages = parse_dpkg_status(content);
2940 assert_eq!(packages.len(), 1);
2941 assert!(!packages[0].source_packages.is_empty());
2942 assert!(packages[0].source_packages[0].contains("ncurses"));
2944 }
2945
2946 #[test]
2947 fn test_parse_dpkg_status_filters_not_installed() {
2948 let content = "\
2949Package: installed-pkg
2950Status: install ok installed
2951Version: 1.0
2952Architecture: amd64
2953Description: installed
2954
2955Package: half-installed
2956Status: install ok half-installed
2957Version: 2.0
2958Architecture: amd64
2959Description: half installed
2960
2961Package: deinstall-pkg
2962Status: deinstall ok config-files
2963Version: 3.0
2964Architecture: amd64
2965Description: deinstalled
2966
2967Package: purge-pkg
2968Status: purge ok not-installed
2969Version: 4.0
2970Architecture: amd64
2971Description: purged";
2972
2973 let packages = parse_dpkg_status(content);
2974 assert_eq!(packages.len(), 1);
2975 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2976 }
2977
2978 #[test]
2979 fn test_parse_dpkg_status_empty() {
2980 let packages = parse_dpkg_status("");
2981 assert!(packages.is_empty());
2982 }
2983
2984 #[test]
2987 fn test_debian_control_is_match() {
2988 assert!(DebianControlParser::is_match(Path::new(
2989 "/path/to/debian/control"
2990 )));
2991 assert!(DebianControlParser::is_match(Path::new("debian/control")));
2992 assert!(!DebianControlParser::is_match(Path::new(
2993 "/path/to/control"
2994 )));
2995 assert!(!DebianControlParser::is_match(Path::new(
2996 "/path/to/debian/changelog"
2997 )));
2998 }
2999
3000 #[test]
3001 fn test_debian_installed_is_match() {
3002 assert!(DebianInstalledParser::is_match(Path::new(
3003 "/var/lib/dpkg/status"
3004 )));
3005 assert!(DebianInstalledParser::is_match(Path::new(
3006 "some/root/var/lib/dpkg/status"
3007 )));
3008 assert!(!DebianInstalledParser::is_match(Path::new(
3009 "/var/lib/dpkg/status.d/something"
3010 )));
3011 assert!(!DebianInstalledParser::is_match(Path::new(
3012 "/var/lib/dpkg/available"
3013 )));
3014 }
3015
3016 #[test]
3019 fn test_parse_debian_control_empty_input() {
3020 let packages = parse_debian_control("");
3021 assert!(packages.is_empty());
3022 }
3023
3024 #[test]
3025 fn test_parse_debian_control_malformed_input() {
3026 let content = "this is not a valid control file\nwith random text";
3027 let packages = parse_debian_control(content);
3028 assert!(packages.is_empty());
3030 }
3031
3032 #[test]
3033 fn test_dependency_with_epoch_version() {
3034 let deps = parse_dependency_field(
3036 "zlib1g (>= 1:1.2.11)",
3037 "depends",
3038 true,
3039 false,
3040 Some("debian"),
3041 );
3042 assert_eq!(deps.len(), 1);
3043 assert_eq!(
3044 deps[0].extracted_requirement,
3045 Some(">= 1:1.2.11".to_string())
3046 );
3047 }
3048
3049 #[test]
3050 fn test_dependency_with_plus_in_name() {
3051 let deps =
3052 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
3053 assert_eq!(deps.len(), 1);
3054 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
3055 }
3056
3057 #[test]
3058 fn test_dsc_parser_is_match() {
3059 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
3060 assert!(DebianDscParser::is_match(&PathBuf::from(
3061 "adduser_3.118+deb11u1.dsc"
3062 )));
3063 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
3064 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
3065 }
3066
3067 #[test]
3068 fn test_dsc_parser_adduser() {
3069 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
3070 let package = DebianDscParser::extract_first_package(&path);
3071
3072 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
3073 assert_eq!(package.namespace, Some("debian".to_string()));
3074 assert_eq!(package.name, Some("adduser".to_string()));
3075 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
3076 assert_eq!(
3077 package.purl,
3078 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
3079 );
3080 assert_eq!(
3081 package.vcs_url,
3082 Some("https://salsa.debian.org/debian/adduser.git".to_string())
3083 );
3084 assert_eq!(
3085 package.code_view_url,
3086 Some("https://salsa.debian.org/debian/adduser".to_string())
3087 );
3088 assert_eq!(
3089 package.datasource_id,
3090 Some(DatasourceId::DebianSourceControlDsc)
3091 );
3092
3093 assert_eq!(package.parties.len(), 2);
3094 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3095 assert_eq!(
3096 package.parties[0].name,
3097 Some("Debian Adduser Developers".to_string())
3098 );
3099 assert_eq!(
3100 package.parties[0].email,
3101 Some("adduser@packages.debian.org".to_string())
3102 );
3103 assert_eq!(package.parties[0].r#type, None);
3104
3105 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3106 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
3107 assert_eq!(
3108 package.parties[1].email,
3109 Some("mh+debian-packages@zugschlus.de".to_string())
3110 );
3111 assert_eq!(package.parties[1].r#type, None);
3112
3113 assert_eq!(package.source_packages.len(), 1);
3114 assert_eq!(
3115 package.source_packages[0],
3116 "pkg:deb/debian/adduser".to_string()
3117 );
3118
3119 assert!(!package.dependencies.is_empty());
3120 let build_dep_names: Vec<String> = package
3121 .dependencies
3122 .iter()
3123 .filter_map(|d| d.purl.as_ref())
3124 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
3125 .map(|p| p.to_string())
3126 .collect();
3127 assert!(build_dep_names.len() >= 2);
3128 }
3129
3130 #[test]
3131 fn test_dsc_parser_zsh() {
3132 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
3133 let package = DebianDscParser::extract_first_package(&path);
3134
3135 assert_eq!(package.name, Some("zsh".to_string()));
3136 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
3137 assert_eq!(package.namespace, Some("debian".to_string()));
3138 assert!(package.purl.is_some());
3139 assert!(package.purl.as_ref().unwrap().contains("zsh"));
3140 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
3141 }
3142
3143 #[test]
3144 fn test_parse_dsc_content_basic() {
3145 let content = "Format: 3.0 (native)
3146Source: testpkg
3147Binary: testpkg
3148Architecture: amd64
3149Version: 1.0.0
3150Maintainer: Test User <test@example.com>
3151Standards-Version: 4.5.0
3152Build-Depends: debhelper (>= 12)
3153Files:
3154 abc123 1024 testpkg_1.0.0.tar.xz
3155";
3156
3157 let package = parse_dsc_content(content);
3158 assert_eq!(package.name, Some("testpkg".to_string()));
3159 assert_eq!(package.version, Some("1.0.0".to_string()));
3160 assert_eq!(package.namespace, Some("debian".to_string()));
3161 assert_eq!(package.parties.len(), 1);
3162 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
3163 assert_eq!(
3164 package.parties[0].email,
3165 Some("test@example.com".to_string())
3166 );
3167 assert_eq!(package.dependencies.len(), 1);
3168 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
3169 }
3170
3171 #[test]
3172 fn test_parse_dsc_content_with_uploaders() {
3173 let content = "Source: mypkg
3174Version: 2.0
3175Architecture: all
3176Maintainer: Main Dev <main@example.com>
3177Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
3178";
3179
3180 let package = parse_dsc_content(content);
3181 assert_eq!(package.parties.len(), 3);
3182 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3183 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3184 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
3185 }
3186
3187 #[test]
3188 fn test_orig_tar_parser_is_match() {
3189 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3190 "package_1.0.orig.tar.gz"
3191 )));
3192 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3193 "abseil_0~20200923.3.orig.tar.xz"
3194 )));
3195 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
3196 "package.debian.tar.gz"
3197 )));
3198 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
3199 }
3200
3201 #[test]
3202 fn test_debian_tar_parser_is_match() {
3203 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3204 "package_1.0-1.debian.tar.xz"
3205 )));
3206 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3207 "abseil_20220623.1-1.debian.tar.gz"
3208 )));
3209 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
3210 "package.orig.tar.gz"
3211 )));
3212 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
3213 }
3214
3215 #[test]
3216 fn test_parse_orig_tar_filename() {
3217 let pkg = parse_source_tarball_filename(
3218 "abseil_0~20200923.3.orig.tar.gz",
3219 DatasourceId::DebianOriginalSourceTarball,
3220 );
3221 assert_eq!(pkg.name, Some("abseil".to_string()));
3222 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
3223 assert_eq!(pkg.namespace, Some("debian".to_string()));
3224 assert_eq!(
3225 pkg.purl,
3226 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
3227 );
3228 assert_eq!(
3229 pkg.datasource_id,
3230 Some(DatasourceId::DebianOriginalSourceTarball)
3231 );
3232 }
3233
3234 #[test]
3235 fn test_parse_debian_tar_filename() {
3236 let pkg = parse_source_tarball_filename(
3237 "abseil_20220623.1-1.debian.tar.xz",
3238 DatasourceId::DebianSourceMetadataTarball,
3239 );
3240 assert_eq!(pkg.name, Some("abseil".to_string()));
3241 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
3242 assert_eq!(pkg.namespace, Some("debian".to_string()));
3243 assert_eq!(
3244 pkg.purl,
3245 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
3246 );
3247 }
3248
3249 #[test]
3250 fn test_parse_deb_filename() {
3251 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3252 assert_eq!(pkg.name, Some("nginx".to_string()));
3253 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3254
3255 let pkg = parse_deb_filename("invalid.deb");
3256 assert!(pkg.name.is_none());
3257 assert!(pkg.version.is_none());
3258 }
3259
3260 #[test]
3261 fn test_parse_source_tarball_various_compressions() {
3262 let pkg_gz = parse_source_tarball_filename(
3263 "test_1.0.orig.tar.gz",
3264 DatasourceId::DebianOriginalSourceTarball,
3265 );
3266 let pkg_xz = parse_source_tarball_filename(
3267 "test_1.0.orig.tar.xz",
3268 DatasourceId::DebianOriginalSourceTarball,
3269 );
3270 let pkg_bz2 = parse_source_tarball_filename(
3271 "test_1.0.orig.tar.bz2",
3272 DatasourceId::DebianOriginalSourceTarball,
3273 );
3274
3275 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3276 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3277 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3278 }
3279
3280 #[test]
3281 fn test_parse_source_tarball_invalid_format() {
3282 let pkg = parse_source_tarball_filename(
3283 "invalid-no-underscore.tar.gz",
3284 DatasourceId::DebianOriginalSourceTarball,
3285 );
3286 assert!(pkg.name.is_none());
3287 assert!(pkg.version.is_none());
3288 }
3289
3290 #[test]
3291 fn test_list_parser_is_match() {
3292 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3293 "/var/lib/dpkg/info/bash.list"
3294 )));
3295 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3296 "/var/lib/dpkg/info/package:amd64.list"
3297 )));
3298 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3299 "bash.list"
3300 )));
3301 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3302 "/var/lib/dpkg/info/bash.md5sums"
3303 )));
3304 }
3305
3306 #[test]
3307 fn test_md5sums_parser_is_match() {
3308 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3309 "/var/lib/dpkg/info/bash.md5sums"
3310 )));
3311 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3312 "/var/lib/dpkg/info/package:amd64.md5sums"
3313 )));
3314 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3315 "bash.md5sums"
3316 )));
3317 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3318 "/var/lib/dpkg/info/bash.list"
3319 )));
3320 }
3321
3322 #[test]
3323 fn test_parse_debian_file_list_plain_list() {
3324 let content = "/.
3325/bin
3326/bin/bash
3327/usr/bin/bashbug
3328/usr/share/doc/bash/README
3329";
3330 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3331 assert_eq!(pkg.name, Some("bash".to_string()));
3332 assert_eq!(pkg.file_references.len(), 3);
3333 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3334 assert_eq!(pkg.file_references[0].md5, None);
3335 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3336 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3337 }
3338
3339 #[test]
3340 fn test_parse_debian_file_list_md5sums() {
3341 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
33421c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3343f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3344";
3345 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3346 assert_eq!(pkg.name, Some("bash".to_string()));
3347 assert_eq!(pkg.file_references.len(), 3);
3348 assert_eq!(pkg.file_references[0].path, "bin/bash");
3349 assert_eq!(
3350 pkg.file_references[0].md5,
3351 Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3352 );
3353 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3354 assert_eq!(
3355 pkg.file_references[1].md5,
3356 Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3357 );
3358 }
3359
3360 #[test]
3361 fn test_parse_debian_file_list_with_arch() {
3362 let content = "/usr/bin/foo
3363/usr/lib/x86_64-linux-gnu/libfoo.so
3364";
3365 let pkg = parse_debian_file_list(
3366 content,
3367 "libfoo:amd64",
3368 DatasourceId::DebianInstalledFilesList,
3369 );
3370 assert_eq!(pkg.name, Some("libfoo".to_string()));
3371 assert!(pkg.purl.is_some());
3372 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3373 assert_eq!(pkg.file_references.len(), 2);
3374 }
3375
3376 #[test]
3377 fn test_parse_debian_file_list_skips_comments_and_empty() {
3378 let content = "# This is a comment
3379/bin/bash
3380
3381/usr/bin/bashbug
3382
3383";
3384 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3385 assert_eq!(pkg.file_references.len(), 2);
3386 }
3387
3388 #[test]
3389 fn test_parse_debian_file_list_md5sums_only() {
3390 let content = "abc123 usr/bin/tool
3391";
3392 let pkg =
3393 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3394 assert_eq!(pkg.name, None);
3395 assert_eq!(pkg.file_references.len(), 1);
3396 }
3397
3398 #[test]
3399 fn test_parse_debian_file_list_ignores_root_dirs() {
3400 let content = "/.
3401/bin
3402/bin/bash
3403/etc
3404/usr
3405/var
3406";
3407 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3408 assert_eq!(pkg.file_references.len(), 1);
3409 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3410 }
3411
3412 #[test]
3413 fn test_copyright_parser_is_match() {
3414 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3415 "/usr/share/doc/bash/copyright"
3416 )));
3417 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3418 "debian/copyright"
3419 )));
3420 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3421 "src/third_party/gperftools/dist/packages/deb/copyright"
3422 )));
3423 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3424 "ports/zlib/copyright"
3425 )));
3426 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3427 "copyright.txt"
3428 )));
3429 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3430 "/etc/copyright"
3431 )));
3432 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3433 "/tmp/sample_copyright"
3434 )));
3435 }
3436
3437 #[test]
3438 fn test_detect_debian_copyright_datasource() {
3439 assert_eq!(
3440 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3441 DatasourceId::DebianCopyrightInSource
3442 );
3443 assert_eq!(
3444 detect_debian_copyright_datasource(&PathBuf::from(
3445 "src/third_party/gperftools/dist/packages/deb/copyright"
3446 )),
3447 DatasourceId::DebianCopyrightStandalone
3448 );
3449 assert_eq!(
3450 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
3451 DatasourceId::DebianCopyrightStandalone
3452 );
3453 assert_eq!(
3454 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3455 DatasourceId::DebianCopyrightInPackage
3456 );
3457 assert_eq!(
3458 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3459 DatasourceId::DebianCopyrightStandalone
3460 );
3461 }
3462
3463 #[test]
3464 fn test_extract_package_name_from_path() {
3465 assert_eq!(
3466 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3467 Some("bash".to_string())
3468 );
3469 assert_eq!(
3470 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3471 Some("libseccomp2".to_string())
3472 );
3473 assert_eq!(
3474 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3475 None
3476 );
3477 assert_eq!(
3478 extract_standalone_package_name_from_path(
3479 &PathBuf::from("ports/zlib/copyright"),
3480 DatasourceId::DebianCopyrightStandalone,
3481 ),
3482 Some("zlib".to_string())
3483 );
3484 }
3485
3486 #[test]
3487 fn test_parse_copyright_dep5_format() {
3488 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3489Upstream-Name: libseccomp
3490Source: https://sourceforge.net/projects/libseccomp/
3491
3492Files: *
3493Copyright: 2012 Paul Moore <pmoore@redhat.com>
3494 2012 Ashley Lai <adlai@us.ibm.com>
3495License: LGPL-2.1
3496
3497License: LGPL-2.1
3498 This library is free software
3499";
3500 let pkg = parse_copyright_file(content, Some("libseccomp"));
3501 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3502 assert_eq!(pkg.namespace, Some("debian".to_string()));
3503 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3504 assert_eq!(
3505 pkg.extracted_license_statement,
3506 Some("LGPL-2.1".to_string())
3507 );
3508 assert!(pkg.parties.len() >= 2);
3509 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3510 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3511 }
3512
3513 #[test]
3514 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3515 let path = PathBuf::from(
3516 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3517 );
3518 let pkg = DebianCopyrightParser::extract_first_package(&path);
3519
3520 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3521 let extracted = pkg
3522 .extracted_license_statement
3523 .as_deref()
3524 .expect("license statement should exist");
3525 assert!(extracted.contains("GPL-2+"));
3526 assert!(!pkg.license_detections.is_empty());
3527
3528 let primary = &pkg.license_detections[0];
3529 assert_eq!(
3530 primary.matches[0].matched_text.as_deref(),
3531 Some("License: GPL-2+")
3532 );
3533 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3534 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3535 }
3536
3537 #[test]
3538 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3539 let path = PathBuf::from("testdata/debian/copyright/copyright");
3540 let pkg = DebianCopyrightParser::extract_first_package(&path);
3541
3542 assert_eq!(pkg.license_detections.len(), 1);
3543 assert_eq!(pkg.other_license_detections.len(), 4);
3544
3545 let primary = &pkg.license_detections[0];
3546 assert_eq!(
3547 primary.matches[0].matched_text.as_deref(),
3548 Some("License: LGPL-2.1")
3549 );
3550 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3551
3552 let ordered_lines: Vec<usize> = pkg
3553 .other_license_detections
3554 .iter()
3555 .map(|detection| detection.matches[0].start_line.get())
3556 .collect();
3557 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3558
3559 let ordered_texts: Vec<&str> = pkg
3560 .other_license_detections
3561 .iter()
3562 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3563 .collect();
3564 assert_eq!(
3565 ordered_texts,
3566 vec![
3567 "License: LGPL-2.1",
3568 "License: LGPL-2.1",
3569 "License: LGPL-2.1",
3570 "License: LGPL-2.1",
3571 ]
3572 );
3573 }
3574
3575 #[test]
3576 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3577 let path = PathBuf::from(
3578 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3579 );
3580 let pkg = DebianCopyrightParser::extract_first_package(&path);
3581
3582 let zlib = pkg
3583 .other_license_detections
3584 .iter()
3585 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3586 .expect("at least one Zlib license paragraph should be detected");
3587 assert_eq!(
3588 zlib.matches[0].matched_text.as_deref(),
3589 Some("License: Zlib")
3590 );
3591
3592 let last_zlib = pkg
3593 .other_license_detections
3594 .iter()
3595 .rev()
3596 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3597 .expect("bottom standalone Zlib license paragraph should be detected");
3598 assert_eq!(
3599 last_zlib.matches[0].start_line,
3600 LineNumber::new(732).unwrap()
3601 );
3602 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3603 }
3604
3605 #[test]
3606 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3607 let path =
3608 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3609 let pkg = DebianCopyrightParser::extract_first_package(&path);
3610
3611 assert_eq!(pkg.license_detections.len(), 1);
3612 let primary = &pkg.license_detections[0];
3613 assert_eq!(
3614 primary.matches[0].matched_text.as_deref(),
3615 Some("License: LGPL-3+ or GPL-2+")
3616 );
3617 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3618 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3619
3620 assert!(pkg.other_license_detections.iter().any(|detection| {
3621 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3622 }));
3623 }
3624
3625 #[test]
3626 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3627 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3628 let pkg = parse_copyright_file(content, Some("foo"));
3629
3630 assert_eq!(pkg.license_detections.len(), 1);
3631 let primary = &pkg.license_detections[0];
3632 assert_eq!(
3633 primary.matches[0].matched_text.as_deref(),
3634 Some("License: GPL-2+")
3635 );
3636 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3637 }
3638
3639 #[test]
3640 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3641 let raw_lines = vec![
3642 "Files: *".to_string(),
3643 "Copyright: 2024 Example Org".to_string(),
3644 "License: Apache-2.0".to_string(),
3645 " Licensed under the Apache License, Version 2.0.".to_string(),
3646 ];
3647
3648 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3649 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3650 .into_iter()
3651 .next()
3652 .expect("reference RFC822 paragraph should parse");
3653
3654 assert_eq!(paragraph.metadata.headers, expected.headers);
3655 assert_eq!(paragraph.metadata.body, expected.body);
3656 assert_eq!(
3657 paragraph.license_header_line,
3658 Some(("License: Apache-2.0".to_string(), 12))
3659 );
3660 }
3661
3662 #[test]
3663 fn test_parse_copyright_unstructured() {
3664 let content = "This package was debianized by John Doe.
3665
3666Upstream Authors:
3667 Jane Smith
3668
3669Copyright:
3670 2009 10gen
3671
3672License:
3673 SSPL
3674";
3675 let pkg = parse_copyright_file(content, Some("mongodb"));
3676 assert_eq!(pkg.name, Some("mongodb".to_string()));
3677 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3678 assert!(!pkg.parties.is_empty());
3679 }
3680
3681 #[test]
3682 fn test_parse_copyright_holders() {
3683 let text = "2012 Paul Moore <pmoore@redhat.com>
36842012 Ashley Lai <adlai@us.ibm.com>
3685Copyright (C) 2015-2018 Example Corp";
3686 let holders = parse_copyright_holders(text);
3687 assert!(holders.len() >= 3);
3688 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3689 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3690 }
3691
3692 #[test]
3693 fn test_parse_copyright_empty() {
3694 let content = "This is just some text without proper copyright info.";
3695 let pkg = parse_copyright_file(content, Some("test"));
3696 assert_eq!(pkg.name, Some("test".to_string()));
3697 assert!(pkg.parties.is_empty());
3698 assert!(pkg.extracted_license_statement.is_none());
3699 }
3700
3701 #[test]
3702 fn test_deb_parser_is_match() {
3703 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3704 assert!(DebianDebParser::is_match(&PathBuf::from(
3705 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3706 )));
3707 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3708 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3709 }
3710
3711 #[test]
3712 fn test_parse_deb_filename_with_arch() {
3713 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3714 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3715 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3716 assert_eq!(pkg.namespace, Some("debian".to_string()));
3717 assert_eq!(
3718 pkg.purl,
3719 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3720 );
3721 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3722 }
3723
3724 #[test]
3725 fn test_parse_deb_filename_without_arch() {
3726 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3727 assert_eq!(pkg.name, Some("package".to_string()));
3728 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3729 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3730 }
3731
3732 #[test]
3733 fn test_extract_deb_archive() {
3734 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3735 if !test_path.exists() {
3736 return;
3737 }
3738
3739 let pkg = DebianDebParser::extract_first_package(&test_path);
3740
3741 assert_eq!(pkg.name, Some("adduser".to_string()));
3742 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3743 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3744 assert!(pkg.description.is_some());
3745 assert!(!pkg.parties.is_empty());
3746
3747 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3748 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3749 }
3750
3751 #[test]
3752 fn test_extract_deb_archive_with_control_tar_xz() {
3753 let deb = create_synthetic_deb_with_control_tar_xz();
3754
3755 let pkg = DebianDebParser::extract_first_package(deb.path());
3756
3757 assert_eq!(pkg.name, Some("synthetic".to_string()));
3758 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3759 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3760 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3761 }
3762
3763 #[test]
3764 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3765 let deb = create_synthetic_deb_with_copyright();
3766
3767 let pkg = DebianDebParser::extract_first_package(deb.path());
3768
3769 assert_eq!(pkg.name, Some("synthetic".to_string()));
3770 assert_eq!(
3771 pkg.extracted_license_statement,
3772 Some("Apache-2.0".to_string())
3773 );
3774 assert!(pkg.parties.iter().any(|party| {
3775 party.role.as_deref() == Some("copyright-holder")
3776 && party.name.as_deref() == Some("Example Org")
3777 }));
3778 }
3779
3780 #[test]
3781 fn test_parse_deb_filename_simple() {
3782 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3783 assert_eq!(pkg.name, Some("adduser".to_string()));
3784 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3785 assert_eq!(pkg.namespace, Some("debian".to_string()));
3786 }
3787
3788 #[test]
3789 fn test_parse_deb_filename_invalid() {
3790 let pkg = parse_deb_filename("invalid.deb");
3791 assert!(pkg.name.is_none());
3792 assert!(pkg.version.is_none());
3793 }
3794
3795 #[test]
3796 fn test_distroless_parser() {
3797 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3798
3799 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3800
3801 if !test_file.exists() {
3802 eprintln!("Warning: Test file not found, skipping test");
3803 return;
3804 }
3805
3806 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3807
3808 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3809 assert_eq!(
3810 pkg.datasource_id,
3811 Some(DatasourceId::DebianDistrolessInstalledDb)
3812 );
3813 assert_eq!(pkg.name, Some("base-files".to_string()));
3814 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3815 assert_eq!(pkg.namespace, Some("debian".to_string()));
3816 assert!(pkg.purl.is_some());
3817 assert!(
3818 pkg.purl
3819 .as_ref()
3820 .unwrap()
3821 .contains("pkg:deb/debian/base-files")
3822 );
3823 }
3824}