1use std::collections::HashMap;
34use std::path::Path;
35use std::sync::LazyLock;
36
37use crate::parser_warn as warn;
38use packageurl::PackageUrl;
39use regex::Regex;
40
41use crate::models::{
42 DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
43 PackageType, Party,
44};
45use crate::parsers::rfc822::{self, Rfc822Metadata};
46use crate::parsers::utils::{
47 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
48};
49use crate::utils::spdx::combine_license_expressions;
50
51use super::PackageParser;
52use super::license_normalization::{
53 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
54 normalize_declared_license_key,
55};
56
57const PACKAGE_TYPE: PackageType = PackageType::Deb;
58
59const MAX_ARCHIVE_SIZE: u64 = 1024 * 1024 * 1024;
60const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
61const MAX_COMPRESSION_RATIO: usize = 100;
62
63static DEP_RE: LazyLock<Regex> = LazyLock::new(|| {
64 Regex::new(
65 r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
66 )
67 .expect("compile-time constant dependency regex")
68});
69
70fn default_package_data(datasource_id: DatasourceId) -> PackageData {
71 PackageData {
72 package_type: Some(PACKAGE_TYPE),
73 datasource_id: Some(datasource_id),
74 ..Default::default()
75 }
76}
77
78const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
80const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
81
82const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
84 "packages.debian.org",
85 "lists.debian.org",
86 "lists.alioth.debian.org",
87 "@debian.org",
88 "debian-init-diversity@",
89];
90const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
91
92struct DepFieldSpec {
94 field: &'static str,
95 scope: &'static str,
96 is_runtime: bool,
97 is_optional: bool,
98}
99
100const DEP_FIELDS: &[DepFieldSpec] = &[
101 DepFieldSpec {
102 field: "depends",
103 scope: "depends",
104 is_runtime: true,
105 is_optional: false,
106 },
107 DepFieldSpec {
108 field: "pre-depends",
109 scope: "pre-depends",
110 is_runtime: true,
111 is_optional: false,
112 },
113 DepFieldSpec {
114 field: "recommends",
115 scope: "recommends",
116 is_runtime: true,
117 is_optional: true,
118 },
119 DepFieldSpec {
120 field: "suggests",
121 scope: "suggests",
122 is_runtime: true,
123 is_optional: true,
124 },
125 DepFieldSpec {
126 field: "breaks",
127 scope: "breaks",
128 is_runtime: false,
129 is_optional: false,
130 },
131 DepFieldSpec {
132 field: "conflicts",
133 scope: "conflicts",
134 is_runtime: false,
135 is_optional: false,
136 },
137 DepFieldSpec {
138 field: "replaces",
139 scope: "replaces",
140 is_runtime: false,
141 is_optional: false,
142 },
143 DepFieldSpec {
144 field: "provides",
145 scope: "provides",
146 is_runtime: false,
147 is_optional: false,
148 },
149 DepFieldSpec {
150 field: "build-depends",
151 scope: "build-depends",
152 is_runtime: false,
153 is_optional: false,
154 },
155 DepFieldSpec {
156 field: "build-depends-indep",
157 scope: "build-depends-indep",
158 is_runtime: false,
159 is_optional: false,
160 },
161 DepFieldSpec {
162 field: "build-conflicts",
163 scope: "build-conflicts",
164 is_runtime: false,
165 is_optional: false,
166 },
167];
168
169pub struct DebianControlParser;
174
175impl PackageParser for DebianControlParser {
176 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
177
178 fn is_match(path: &Path) -> bool {
179 if let Some(name) = path.file_name()
180 && name == "control"
181 && let Some(parent) = path.parent()
182 && let Some(parent_name) = parent.file_name()
183 {
184 return parent_name == "debian";
185 }
186 false
187 }
188
189 fn extract_packages(path: &Path) -> Vec<PackageData> {
190 let content = match read_file_to_string(path, None) {
191 Ok(c) => c,
192 Err(e) => {
193 warn!("Failed to read debian/control at {:?}: {}", path, e);
194 return vec![default_package_data(DatasourceId::DebianControlInSource)];
195 }
196 };
197
198 let packages = parse_debian_control(&content);
199 if packages.is_empty() {
200 vec![default_package_data(DatasourceId::DebianControlInSource)]
201 } else {
202 packages
203 }
204 }
205}
206
207pub struct DebianInstalledParser;
212
213impl PackageParser for DebianInstalledParser {
214 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216 fn is_match(path: &Path) -> bool {
217 let path_str = path.to_string_lossy();
218 path_str.ends_with("var/lib/dpkg/status")
219 }
220
221 fn extract_packages(path: &Path) -> Vec<PackageData> {
222 let content = match read_file_to_string(path, None) {
223 Ok(c) => c,
224 Err(e) => {
225 warn!("Failed to read dpkg/status at {:?}: {}", path, e);
226 return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
227 }
228 };
229
230 let packages = parse_dpkg_status(&content);
231 if packages.is_empty() {
232 vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
233 } else {
234 packages
235 }
236 }
237}
238
239pub struct DebianDistrolessInstalledParser;
240
241impl PackageParser for DebianDistrolessInstalledParser {
242 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
243
244 fn is_match(path: &Path) -> bool {
245 let path_str = path.to_string_lossy();
246 path_str.contains("var/lib/dpkg/status.d/")
247 }
248
249 fn extract_packages(path: &Path) -> Vec<PackageData> {
250 let content = match read_file_to_string(path, None) {
251 Ok(c) => c,
252 Err(e) => {
253 warn!("Failed to read distroless status file at {:?}: {}", path, e);
254 return vec![default_package_data(
255 DatasourceId::DebianDistrolessInstalledDb,
256 )];
257 }
258 };
259
260 vec![parse_distroless_status(&content)]
261 }
262}
263
264fn parse_distroless_status(content: &str) -> PackageData {
265 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
266
267 if paragraphs.is_empty() {
268 return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
269 }
270
271 build_package_from_paragraph(
272 ¶graphs[0],
273 None,
274 DatasourceId::DebianDistrolessInstalledDb,
275 )
276 .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
277}
278
279fn parse_debian_control(content: &str) -> Vec<PackageData> {
289 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
290 if paragraphs.is_empty() {
291 return Vec::new();
292 }
293
294 let has_source = rfc822::get_header_first(¶graphs[0].headers, "source").is_some();
295
296 let (source_paragraph, binary_start) = if has_source {
297 (Some(¶graphs[0]), 1)
298 } else {
299 (None, 0)
300 };
301
302 let source_meta = source_paragraph.map(extract_source_meta);
303
304 let mut packages = Vec::new();
305 let mut count = 0usize;
306
307 for para in ¶graphs[binary_start..] {
308 count += 1;
309 if count > MAX_ITERATION_COUNT {
310 warn!("parse_debian_control: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
311 break;
312 }
313 if let Some(pkg) = build_package_from_paragraph(
314 para,
315 source_meta.as_ref(),
316 DatasourceId::DebianControlInSource,
317 ) {
318 packages.push(pkg);
319 }
320 }
321
322 if packages.is_empty()
323 && let Some(source_para) = source_paragraph
324 && let Some(pkg) = build_package_from_source_paragraph(source_para)
325 {
326 packages.push(pkg);
327 }
328
329 packages
330}
331
332fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
337 let paragraphs = rfc822::parse_rfc822_paragraphs(content);
338 let mut packages = Vec::new();
339 let mut count = 0usize;
340
341 for para in ¶graphs {
342 count += 1;
343 if count > MAX_ITERATION_COUNT {
344 warn!("parse_dpkg_status: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
345 break;
346 }
347 let status = rfc822::get_header_first(¶.headers, "status");
348 if status.as_deref() != Some("install ok installed") {
349 continue;
350 }
351
352 if let Some(pkg) =
353 build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
354 {
355 packages.push(pkg);
356 }
357 }
358
359 packages
360}
361
362struct SourceMeta {
367 parties: Vec<Party>,
368 homepage_url: Option<String>,
369 vcs_url: Option<String>,
370 code_view_url: Option<String>,
371 bug_tracking_url: Option<String>,
372}
373
374fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
375 let mut parties = Vec::new();
376
377 if let Some(maintainer) = rfc822::get_header_first(¶graph.headers, "maintainer") {
379 let (name, email) = split_name_email(&maintainer);
380 parties.push(Party {
381 r#type: Some("person".to_string()),
382 role: Some("maintainer".to_string()),
383 name,
384 email,
385 url: None,
386 organization: None,
387 organization_url: None,
388 timezone: None,
389 });
390 }
391
392 if let Some(orig_maintainer) =
394 rfc822::get_header_first(¶graph.headers, "original-maintainer")
395 {
396 let (name, email) = split_name_email(&orig_maintainer);
397 parties.push(Party {
398 r#type: Some("person".to_string()),
399 role: Some("maintainer".to_string()),
400 name,
401 email,
402 url: None,
403 organization: None,
404 organization_url: None,
405 timezone: None,
406 });
407 }
408
409 if let Some(uploaders_str) = rfc822::get_header_first(¶graph.headers, "uploaders") {
411 for uploader in uploaders_str.split(',') {
412 let trimmed = uploader.trim();
413 if !trimmed.is_empty() {
414 let (name, email) = split_name_email(trimmed);
415 parties.push(Party {
416 r#type: Some("person".to_string()),
417 role: Some("uploader".to_string()),
418 name,
419 email,
420 url: None,
421 organization: None,
422 organization_url: None,
423 timezone: None,
424 });
425 }
426 }
427 }
428
429 let homepage_url = rfc822::get_header_first(¶graph.headers, "homepage").map(truncate_field);
430
431 let vcs_url = rfc822::get_header_first(¶graph.headers, "vcs-git")
432 .map(|url| truncate_field(url.split_whitespace().next().unwrap_or(&url).to_string()));
433
434 let code_view_url =
435 rfc822::get_header_first(¶graph.headers, "vcs-browser").map(truncate_field);
436
437 let bug_tracking_url = rfc822::get_header_first(¶graph.headers, "bugs").map(truncate_field);
438
439 SourceMeta {
440 parties,
441 homepage_url,
442 vcs_url,
443 code_view_url,
444 bug_tracking_url,
445 }
446}
447
448fn build_package_from_paragraph(
453 paragraph: &Rfc822Metadata,
454 source_meta: Option<&SourceMeta>,
455 datasource_id: DatasourceId,
456) -> Option<PackageData> {
457 let name = rfc822::get_header_first(¶graph.headers, "package").map(truncate_field)?;
458 let version = rfc822::get_header_first(¶graph.headers, "version").map(truncate_field);
459 let architecture =
460 rfc822::get_header_first(¶graph.headers, "architecture").map(truncate_field);
461 let description =
462 rfc822::get_header_first(¶graph.headers, "description").map(truncate_field);
463 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
464 let homepage = rfc822::get_header_first(¶graph.headers, "homepage").map(truncate_field);
465 let source_field = rfc822::get_header_first(¶graph.headers, "source");
466 let section = rfc822::get_header_first(¶graph.headers, "section");
467 let installed_size = rfc822::get_header_first(¶graph.headers, "installed-size");
468 let multi_arch = rfc822::get_header_first(¶graph.headers, "multi-arch");
469
470 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
471
472 let parties = if let Some(meta) = source_meta {
474 meta.parties.clone()
475 } else {
476 let mut p = Vec::new();
477 if let Some(m) = &maintainer_str {
478 let (n, e) = split_name_email(m);
479 p.push(Party {
480 r#type: Some("person".to_string()),
481 role: Some("maintainer".to_string()),
482 name: n,
483 email: e,
484 url: None,
485 organization: None,
486 organization_url: None,
487 timezone: None,
488 });
489 }
490 p
491 };
492
493 let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
495 let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
496 let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
497 let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
498
499 let purl = build_debian_purl(
501 &name,
502 version.as_deref(),
503 namespace.as_deref(),
504 architecture.as_deref(),
505 );
506
507 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
509
510 let keywords = section.into_iter().collect();
512
513 let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
515
516 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
518 if let Some(ma) = &multi_arch
519 && !ma.is_empty()
520 {
521 extra_data.insert(
522 "multi_arch".to_string(),
523 serde_json::Value::String(ma.clone()),
524 );
525 }
526 if let Some(size_str) = &installed_size
527 && let Ok(size) = size_str.parse::<u64>()
528 {
529 extra_data.insert(
530 "installed_size".to_string(),
531 serde_json::Value::Number(serde_json::Number::from(size)),
532 );
533 }
534
535 let qualifiers = architecture.as_ref().map(|arch| {
537 let mut q = HashMap::new();
538 q.insert("arch".to_string(), arch.clone());
539 q
540 });
541
542 Some(PackageData {
543 package_type: Some(PACKAGE_TYPE),
544 namespace: namespace.clone(),
545 name: Some(name),
546 version,
547 qualifiers,
548 subpath: None,
549 primary_language: None,
550 description,
551 release_date: None,
552 parties,
553 keywords,
554 homepage_url,
555 download_url: None,
556 size: None,
557 sha1: None,
558 md5: None,
559 sha256: None,
560 sha512: None,
561 bug_tracking_url,
562 code_view_url,
563 vcs_url,
564 copyright: None,
565 holder: None,
566 declared_license_expression: None,
567 declared_license_expression_spdx: None,
568 license_detections: Vec::new(),
569 other_license_expression: None,
570 other_license_expression_spdx: None,
571 other_license_detections: Vec::new(),
572 extracted_license_statement: None,
573 notice_text: None,
574 source_packages,
575 file_references: Vec::new(),
576 is_private: false,
577 is_virtual: false,
578 extra_data: if extra_data.is_empty() {
579 None
580 } else {
581 Some(extra_data)
582 },
583 dependencies,
584 repository_homepage_url: None,
585 repository_download_url: None,
586 api_data_url: None,
587 datasource_id: Some(datasource_id),
588 purl,
589 })
590}
591
592fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
593 let name = rfc822::get_header_first(¶graph.headers, "source").map(truncate_field)?;
594 let version = rfc822::get_header_first(¶graph.headers, "version").map(truncate_field);
595 let maintainer_str = rfc822::get_header_first(¶graph.headers, "maintainer");
596
597 let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
598 let source_meta = extract_source_meta(paragraph);
599
600 let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
601 let dependencies = parse_all_dependencies(¶graph.headers, namespace.as_deref());
602
603 let section = rfc822::get_header_first(¶graph.headers, "section");
604 let keywords = section.into_iter().collect();
605
606 Some(PackageData {
607 package_type: Some(PACKAGE_TYPE),
608 namespace: namespace.clone(),
609 name: Some(name),
610 version,
611 qualifiers: None,
612 subpath: None,
613 primary_language: None,
614 description: None,
615 release_date: None,
616 parties: source_meta.parties,
617 keywords,
618 homepage_url: source_meta.homepage_url,
619 download_url: None,
620 size: None,
621 sha1: None,
622 md5: None,
623 sha256: None,
624 sha512: None,
625 bug_tracking_url: source_meta.bug_tracking_url,
626 code_view_url: source_meta.code_view_url,
627 vcs_url: source_meta.vcs_url,
628 copyright: None,
629 holder: None,
630 declared_license_expression: None,
631 declared_license_expression_spdx: None,
632 license_detections: Vec::new(),
633 other_license_expression: None,
634 other_license_expression_spdx: None,
635 other_license_detections: Vec::new(),
636 extracted_license_statement: None,
637 notice_text: None,
638 source_packages: Vec::new(),
639 file_references: Vec::new(),
640 is_private: false,
641 is_virtual: false,
642 extra_data: None,
643 dependencies,
644 repository_homepage_url: None,
645 repository_download_url: None,
646 api_data_url: None,
647 datasource_id: Some(DatasourceId::DebianControlInSource),
648 purl,
649 })
650}
651
652fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
657 if let Some(ver) = version {
659 let ver_lower = ver.to_lowercase();
660 for clue in VERSION_CLUES_UBUNTU {
661 if ver_lower.contains(clue) {
662 return Some("ubuntu".to_string());
663 }
664 }
665 for clue in VERSION_CLUES_DEBIAN {
666 if ver_lower.contains(clue) {
667 return Some("debian".to_string());
668 }
669 }
670 }
671
672 if let Some(maint) = maintainer {
674 let maint_lower = maint.to_lowercase();
675 for clue in MAINTAINER_CLUES_UBUNTU {
676 if maint_lower.contains(clue) {
677 return Some("ubuntu".to_string());
678 }
679 }
680 for clue in MAINTAINER_CLUES_DEBIAN {
681 if maint_lower.contains(clue) {
682 return Some("debian".to_string());
683 }
684 }
685 }
686
687 Some("debian".to_string())
689}
690
691fn build_debian_purl(
696 name: &str,
697 version: Option<&str>,
698 namespace: Option<&str>,
699 architecture: Option<&str>,
700) -> Option<String> {
701 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
702
703 if let Some(ns) = namespace {
704 purl.with_namespace(ns).ok()?;
705 }
706
707 if let Some(ver) = version {
708 purl.with_version(ver).ok()?;
709 }
710
711 if let Some(arch) = architecture {
712 purl.add_qualifier("arch", arch).ok()?;
713 }
714
715 Some(purl.to_string())
716}
717
718fn parse_all_dependencies(
723 headers: &HashMap<String, Vec<String>>,
724 namespace: Option<&str>,
725) -> Vec<Dependency> {
726 let mut dependencies = Vec::new();
727
728 for spec in DEP_FIELDS {
729 if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
730 dependencies.extend(parse_dependency_field(
731 &dep_str,
732 spec.scope,
733 spec.is_runtime,
734 spec.is_optional,
735 namespace,
736 ));
737 }
738 }
739
740 dependencies
741}
742
743fn parse_dependency_field(
752 dep_str: &str,
753 scope: &str,
754 is_runtime: bool,
755 is_optional: bool,
756 namespace: Option<&str>,
757) -> Vec<Dependency> {
758 let mut deps = Vec::new();
759
760 for group in dep_str.split(',').take(MAX_ITERATION_COUNT) {
761 let group = group.trim();
762 if group.is_empty() {
763 continue;
764 }
765
766 let alternatives: Vec<&str> = group.split('|').collect();
767 let has_alternatives = alternatives.len() > 1;
768
769 for alt in alternatives {
770 let alt = alt.trim();
771 if alt.is_empty() {
772 continue;
773 }
774
775 if let Some(caps) = DEP_RE.captures(alt) {
776 let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
777 let operator = caps.get(2).map(|m| m.as_str().trim());
778 let version = caps.get(3).map(|m| m.as_str().trim());
779
780 if pkg_name.is_empty() {
781 continue;
782 }
783
784 if pkg_name.starts_with('$') {
785 continue;
786 }
787
788 let extracted_requirement = match (operator, version) {
789 (Some(op), Some(ver)) => Some(truncate_field(format!("{} {}", op, ver))),
790 _ => None,
791 };
792
793 let is_pinned = operator.map(|op| op == "=");
794
795 let purl = build_debian_purl(pkg_name, None, namespace, None);
796
797 deps.push(Dependency {
798 purl,
799 extracted_requirement,
800 scope: Some(scope.to_string()),
801 is_runtime: Some(is_runtime),
802 is_optional: Some(is_optional || has_alternatives),
803 is_pinned,
804 is_direct: Some(true),
805 resolved_package: None,
806 extra_data: None,
807 });
808 }
809 }
810 }
811
812 deps
813}
814
815fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
823 let Some(source_str) = source else {
824 return Vec::new();
825 };
826
827 let trimmed = source_str.trim();
828 if trimmed.is_empty() {
829 return Vec::new();
830 }
831
832 let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
834 let name = trimmed[..paren_start].trim();
835 let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
836 (
837 name,
838 if version.is_empty() {
839 None
840 } else {
841 Some(version)
842 },
843 )
844 } else {
845 (trimmed, None)
846 };
847
848 if let Some(purl) = build_debian_purl(name, version, namespace, None) {
849 vec![purl]
850 } else {
851 Vec::new()
852 }
853}
854
855crate::register_parser!(
860 "Debian source package control file (debian/control)",
861 &["**/debian/control"],
862 "deb",
863 "",
864 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
865);
866
867crate::register_parser!(
868 "Debian installed package database (dpkg status)",
869 &["**/var/lib/dpkg/status"],
870 "deb",
871 "",
872 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
873);
874
875crate::register_parser!(
876 "Debian distroless package database (status.d)",
877 &["**/var/lib/dpkg/status.d/*"],
878 "deb",
879 "",
880 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
881);
882
883pub struct DebianDscParser;
892
893impl PackageParser for DebianDscParser {
894 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
895
896 fn is_match(path: &Path) -> bool {
897 path.extension().and_then(|e| e.to_str()) == Some("dsc")
898 }
899
900 fn extract_packages(path: &Path) -> Vec<PackageData> {
901 let content = match read_file_to_string(path, None) {
902 Ok(c) => c,
903 Err(e) => {
904 warn!("Failed to read .dsc file {:?}: {}", path, e);
905 return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
906 }
907 };
908
909 vec![parse_dsc_content(&content)]
910 }
911}
912
913crate::register_parser!(
914 "Debian source control file (.dsc)",
915 &["**/*.dsc"],
916 "deb",
917 "",
918 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
919);
920
921fn strip_pgp_signature(content: &str) -> String {
922 let mut result = String::new();
923 let mut in_pgp_block = false;
924 let mut in_signature = false;
925 let mut count = 0usize;
926
927 for line in content.lines() {
928 count += 1;
929 if count > MAX_ITERATION_COUNT {
930 warn!("strip_pgp_signature: exceeded MAX_ITERATION_COUNT lines, stopping");
931 break;
932 }
933 if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
934 in_pgp_block = true;
935 continue;
936 }
937 if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
938 in_signature = true;
939 continue;
940 }
941 if line.starts_with("-----END PGP SIGNATURE-----") {
942 in_signature = false;
943 continue;
944 }
945 if in_pgp_block && line.starts_with("Hash:") {
946 continue;
947 }
948 if in_pgp_block && line.is_empty() && result.is_empty() {
949 in_pgp_block = false;
950 continue;
951 }
952 if !in_signature {
953 result.push_str(line);
954 result.push('\n');
955 }
956 }
957
958 result
959}
960
961fn parse_dsc_content(content: &str) -> PackageData {
962 let clean_content = strip_pgp_signature(content);
963 let metadata = rfc822::parse_rfc822_content(&clean_content);
964 let headers = &metadata.headers;
965
966 let name = rfc822::get_header_first(headers, "source").map(truncate_field);
967 let version = rfc822::get_header_first(headers, "version").map(truncate_field);
968 let architecture = rfc822::get_header_first(headers, "architecture").map(truncate_field);
969 let namespace = Some("debian".to_string());
970
971 let mut package = PackageData {
972 datasource_id: Some(DatasourceId::DebianSourceControlDsc),
973 package_type: Some(PACKAGE_TYPE),
974 namespace: namespace.clone(),
975 name: name.clone(),
976 version: version.clone(),
977 description: rfc822::get_header_first(headers, "description").map(truncate_field),
978 homepage_url: rfc822::get_header_first(headers, "homepage").map(truncate_field),
979 vcs_url: rfc822::get_header_first(headers, "vcs-git").map(truncate_field),
980 code_view_url: rfc822::get_header_first(headers, "vcs-browser").map(truncate_field),
981 ..Default::default()
982 };
983
984 if let (Some(n), Some(v)) = (&name, &version) {
986 package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
987 }
988
989 if let Some(n) = &name
991 && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
992 {
993 package.source_packages.push(source_purl);
994 }
995
996 if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
997 let (name_opt, email_opt) = split_name_email(&maintainer);
998 package.parties.push(Party {
999 r#type: None,
1000 role: Some("maintainer".to_string()),
1001 name: name_opt,
1002 email: email_opt,
1003 url: None,
1004 organization: None,
1005 organization_url: None,
1006 timezone: None,
1007 });
1008 }
1009
1010 if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
1011 for uploader in uploaders_str.split(',') {
1012 let uploader = uploader.trim();
1013 if uploader.is_empty() {
1014 continue;
1015 }
1016 let (name_opt, email_opt) = split_name_email(uploader);
1017 package.parties.push(Party {
1018 r#type: None,
1019 role: Some("uploader".to_string()),
1020 name: name_opt,
1021 email: email_opt,
1022 url: None,
1023 organization: None,
1024 organization_url: None,
1025 timezone: None,
1026 });
1027 }
1028 }
1029
1030 if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1032 package.dependencies.extend(parse_dependency_field(
1033 &build_deps,
1034 "build",
1035 false,
1036 false,
1037 namespace.as_deref(),
1038 ));
1039 }
1040
1041 if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1043 let map = package.extra_data.get_or_insert_with(HashMap::new);
1044 map.insert("standards_version".to_string(), standards.into());
1045 }
1046
1047 package
1048}
1049
1050pub struct DebianOrigTarParser;
1052
1053impl PackageParser for DebianOrigTarParser {
1054 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1055
1056 fn is_match(path: &Path) -> bool {
1057 path.file_name()
1058 .and_then(|n| n.to_str())
1059 .map(|name| name.contains(".orig.tar."))
1060 .unwrap_or(false)
1061 }
1062
1063 fn extract_packages(path: &Path) -> Vec<PackageData> {
1064 let filename = match path.file_name().and_then(|n| n.to_str()) {
1065 Some(f) => f,
1066 None => {
1067 return vec![default_package_data(
1068 DatasourceId::DebianOriginalSourceTarball,
1069 )];
1070 }
1071 };
1072
1073 vec![parse_source_tarball_filename(
1074 filename,
1075 DatasourceId::DebianOriginalSourceTarball,
1076 )]
1077 }
1078}
1079
1080crate::register_parser!(
1081 "Debian original source tarball",
1082 &["**/*.orig.tar.*"],
1083 "deb",
1084 "",
1085 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1086);
1087
1088pub struct DebianDebianTarParser;
1090
1091impl PackageParser for DebianDebianTarParser {
1092 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1093
1094 fn is_match(path: &Path) -> bool {
1095 path.file_name()
1096 .and_then(|n| n.to_str())
1097 .map(|name| name.contains(".debian.tar."))
1098 .unwrap_or(false)
1099 }
1100
1101 fn extract_packages(path: &Path) -> Vec<PackageData> {
1102 let filename = match path.file_name().and_then(|n| n.to_str()) {
1103 Some(f) => f,
1104 None => {
1105 return vec![default_package_data(
1106 DatasourceId::DebianSourceMetadataTarball,
1107 )];
1108 }
1109 };
1110
1111 vec![parse_source_tarball_filename(
1112 filename,
1113 DatasourceId::DebianSourceMetadataTarball,
1114 )]
1115 }
1116}
1117
1118crate::register_parser!(
1119 "Debian source metadata tarball",
1120 &["**/*.debian.tar.*"],
1121 "deb",
1122 "",
1123 Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1124);
1125
1126fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1127 let without_tar_ext = filename
1128 .trim_end_matches(".gz")
1129 .trim_end_matches(".xz")
1130 .trim_end_matches(".bz2")
1131 .trim_end_matches(".tar");
1132
1133 let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1134 if parts.len() < 2 {
1135 return default_package_data(datasource_id);
1136 }
1137
1138 let name = truncate_field(parts[0].to_string());
1139 let version_with_suffix = parts[1];
1140
1141 let version = version_with_suffix
1142 .trim_end_matches(".orig")
1143 .trim_end_matches(".debian")
1144 .to_string();
1145 let version = truncate_field(version);
1146
1147 let namespace = Some("debian".to_string());
1148
1149 PackageData {
1150 datasource_id: Some(datasource_id),
1151 package_type: Some(PACKAGE_TYPE),
1152 namespace: namespace.clone(),
1153 name: Some(name.clone()),
1154 version: Some(version.clone()),
1155 purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1156 ..Default::default()
1157 }
1158}
1159
1160pub struct DebianInstalledListParser;
1162
1163impl PackageParser for DebianInstalledListParser {
1164 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1165
1166 fn is_match(path: &Path) -> bool {
1167 path.extension().and_then(|e| e.to_str()) == Some("list")
1168 && path
1169 .to_str()
1170 .map(|p| p.contains("/var/lib/dpkg/info/"))
1171 .unwrap_or(false)
1172 }
1173
1174 fn extract_packages(path: &Path) -> Vec<PackageData> {
1175 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1176 Some(f) => f,
1177 None => {
1178 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1179 }
1180 };
1181
1182 let content = match read_file_to_string(path, None) {
1183 Ok(c) => c,
1184 Err(e) => {
1185 warn!("Failed to read .list file {:?}: {}", path, e);
1186 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1187 }
1188 };
1189
1190 vec![parse_debian_file_list(
1191 &content,
1192 filename,
1193 DatasourceId::DebianInstalledFilesList,
1194 )]
1195 }
1196}
1197
1198crate::register_parser!(
1199 "Debian installed files list",
1200 &["**/var/lib/dpkg/info/*.list"],
1201 "deb",
1202 "",
1203 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1204);
1205
1206pub struct DebianInstalledMd5sumsParser;
1208
1209impl PackageParser for DebianInstalledMd5sumsParser {
1210 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1211
1212 fn is_match(path: &Path) -> bool {
1213 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1214 && path
1215 .to_str()
1216 .map(|p| p.contains("/var/lib/dpkg/info/"))
1217 .unwrap_or(false)
1218 }
1219
1220 fn extract_packages(path: &Path) -> Vec<PackageData> {
1221 let filename = match path.file_stem().and_then(|s| s.to_str()) {
1222 Some(f) => f,
1223 None => {
1224 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1225 }
1226 };
1227
1228 let content = match read_file_to_string(path, None) {
1229 Ok(c) => c,
1230 Err(e) => {
1231 warn!("Failed to read .md5sums file {:?}: {}", path, e);
1232 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1233 }
1234 };
1235
1236 vec![parse_debian_file_list(
1237 &content,
1238 filename,
1239 DatasourceId::DebianInstalledMd5Sums,
1240 )]
1241 }
1242}
1243
1244crate::register_parser!(
1245 "Debian installed package md5sums",
1246 &["**/var/lib/dpkg/info/*.md5sums"],
1247 "deb",
1248 "",
1249 Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1250);
1251
1252const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1253
1254fn parse_debian_file_list(
1255 content: &str,
1256 filename: &str,
1257 datasource_id: DatasourceId,
1258) -> PackageData {
1259 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1260 (
1261 Some(truncate_field(pkg.to_string())),
1262 Some(arch.to_string()),
1263 )
1264 } else if filename == "md5sums" {
1265 (None, None)
1266 } else {
1267 (Some(truncate_field(filename.to_string())), None)
1268 };
1269
1270 let mut file_references = Vec::new();
1271 let mut count = 0usize;
1272
1273 for line in content.lines() {
1274 count += 1;
1275 if count > MAX_ITERATION_COUNT {
1276 warn!("parse_debian_file_list: exceeded MAX_ITERATION_COUNT lines, stopping");
1277 break;
1278 }
1279 let line = line.trim();
1280 if line.is_empty() || line.starts_with('#') {
1281 continue;
1282 }
1283
1284 let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1285 (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1286 } else {
1287 (None, line)
1288 };
1289
1290 if IGNORED_ROOT_DIRS.contains(&path) {
1291 continue;
1292 }
1293
1294 file_references.push(FileReference {
1295 path: path.to_string(),
1296 size: None,
1297 sha1: None,
1298 md5: md5sum,
1299 sha256: None,
1300 sha512: None,
1301 extra_data: None,
1302 });
1303 }
1304
1305 if file_references.is_empty() {
1306 return default_package_data(datasource_id);
1307 }
1308
1309 let namespace = Some("debian".to_string());
1310 let mut package = PackageData {
1311 datasource_id: Some(datasource_id),
1312 package_type: Some(PACKAGE_TYPE),
1313 namespace: namespace.clone(),
1314 name: name.clone(),
1315 file_references,
1316 ..Default::default()
1317 };
1318
1319 if let Some(n) = &name {
1320 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1321 }
1322
1323 package
1324}
1325
1326pub struct DebianCopyrightParser;
1328
1329impl PackageParser for DebianCopyrightParser {
1330 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1331
1332 fn is_match(path: &Path) -> bool {
1333 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1334 if filename != "copyright" {
1335 return filename.ends_with("_copyright");
1336 }
1337 let path_str = path.to_string_lossy();
1338 path_str.contains("/debian/")
1339 || path_str.contains("/ports/")
1340 || path_str.starts_with("ports/")
1341 || path_str.contains("/packages/deb/")
1342 || path_str.contains("/usr/share/doc/")
1343 || path_str.ends_with("debian/copyright")
1344 } else {
1345 false
1346 }
1347 }
1348
1349 fn extract_packages(path: &Path) -> Vec<PackageData> {
1350 let datasource_id = detect_debian_copyright_datasource(path);
1351 let content = match read_file_to_string(path, None) {
1352 Ok(c) => c,
1353 Err(e) => {
1354 warn!("Failed to read copyright file {:?}: {}", path, e);
1355 return vec![default_package_data(datasource_id)];
1356 }
1357 };
1358
1359 let package_name = extract_package_name_from_path(path)
1360 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
1361 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1362 package_data.datasource_id = Some(datasource_id);
1363 vec![package_data]
1364 }
1365}
1366
1367crate::register_parser!(
1368 "Debian machine-readable copyright file",
1369 &[
1370 "**/debian/copyright",
1371 "**/ports/*/copyright",
1372 "**/packages/deb/copyright",
1373 "**/usr/share/doc/*/copyright",
1374 "**/*_copyright"
1375 ],
1376 "deb",
1377 "",
1378 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1379);
1380
1381fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1382 let path_str = path.to_string_lossy();
1383 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1384 DatasourceId::DebianCopyrightInSource
1385 } else if path_str.contains("/usr/share/doc/") {
1386 DatasourceId::DebianCopyrightInPackage
1387 } else {
1388 DatasourceId::DebianCopyrightStandalone
1389 }
1390}
1391
1392fn extract_package_name_from_path(path: &Path) -> Option<String> {
1393 let components: Vec<_> = path.components().collect();
1394
1395 for (i, component) in components.iter().enumerate() {
1396 if let std::path::Component::Normal(os_str) = component
1397 && os_str.to_str() == Some("doc")
1398 && i + 1 < components.len()
1399 && let std::path::Component::Normal(next) = components[i + 1]
1400 {
1401 return next.to_str().map(|s| s.to_string());
1402 }
1403 }
1404 None
1405}
1406
1407fn extract_standalone_package_name_from_path(
1408 path: &Path,
1409 datasource_id: DatasourceId,
1410) -> Option<String> {
1411 if datasource_id != DatasourceId::DebianCopyrightStandalone {
1412 return None;
1413 }
1414
1415 path.file_name()
1416 .and_then(|name| name.to_str())
1417 .filter(|name| *name == "copyright")?;
1418
1419 path.parent()
1420 .and_then(|parent| parent.file_name())
1421 .and_then(|name| name.to_str())
1422 .map(str::to_string)
1423}
1424
1425fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1426 let paragraphs = parse_copyright_paragraphs_with_lines(content);
1427
1428 let is_dep5 = paragraphs
1429 .first()
1430 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1431 .is_some();
1432
1433 let namespace = Some("debian".to_string());
1434 let mut parties = Vec::new();
1435 let mut license_statements = Vec::new();
1436 let mut primary_license_detection = None;
1437 let mut header_license_detection = None;
1438 let mut other_license_detections = Vec::new();
1439
1440 if is_dep5 {
1441 let mut para_count = 0usize;
1442 for para in ¶graphs {
1443 para_count += 1;
1444 if para_count > MAX_ITERATION_COUNT {
1445 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
1446 break;
1447 }
1448 if let Some(copyright_text) =
1449 rfc822::get_header_first(¶.metadata.headers, "copyright")
1450 {
1451 for holder in parse_copyright_holders(©right_text) {
1452 if !holder.is_empty() {
1453 parties.push(Party {
1454 r#type: None,
1455 role: Some("copyright-holder".to_string()),
1456 name: Some(holder),
1457 email: None,
1458 url: None,
1459 organization: None,
1460 organization_url: None,
1461 timezone: None,
1462 });
1463 }
1464 }
1465 }
1466
1467 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
1468 let license_name = license.lines().next().unwrap_or(&license).trim();
1469 if !license_name.is_empty()
1470 && !license_statements.contains(&license_name.to_string())
1471 {
1472 license_statements.push(license_name.to_string());
1473 }
1474
1475 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1476 let detection =
1477 build_primary_license_detection(license_name, matched_text, line_no);
1478 let is_header_paragraph =
1479 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
1480 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
1481 == Some("*")
1482 {
1483 primary_license_detection = Some(detection);
1484 } else if is_header_paragraph {
1485 header_license_detection.get_or_insert(detection);
1486 } else {
1487 other_license_detections.push(detection);
1488 }
1489 }
1490 }
1491 }
1492
1493 if primary_license_detection.is_none() && header_license_detection.is_some() {
1494 primary_license_detection = header_license_detection;
1495 }
1496 } else {
1497 let copyright_block = extract_unstructured_field(content, "Copyright:");
1498 if let Some(text) = copyright_block {
1499 for holder in parse_copyright_holders(&text) {
1500 if !holder.is_empty() {
1501 parties.push(Party {
1502 r#type: None,
1503 role: Some("copyright-holder".to_string()),
1504 name: Some(holder),
1505 email: None,
1506 url: None,
1507 organization: None,
1508 organization_url: None,
1509 timezone: None,
1510 });
1511 }
1512 }
1513 }
1514
1515 let license_block = extract_unstructured_field(content, "License:");
1516 if let Some(text) = license_block {
1517 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1518 }
1519 }
1520
1521 let extracted_license_statement = if license_statements.is_empty() {
1522 None
1523 } else {
1524 Some(truncate_field(license_statements.join(" AND ")))
1525 };
1526
1527 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1528 let declared_license_expression = license_detections
1529 .first()
1530 .map(|detection| detection.license_expression.clone());
1531 let declared_license_expression_spdx = license_detections
1532 .first()
1533 .map(|detection| detection.license_expression_spdx.clone());
1534 let other_license_expression = combine_license_expressions(
1535 other_license_detections
1536 .iter()
1537 .map(|detection| detection.license_expression.clone()),
1538 );
1539 let other_license_expression_spdx = combine_license_expressions(
1540 other_license_detections
1541 .iter()
1542 .map(|detection| detection.license_expression_spdx.clone()),
1543 );
1544
1545 PackageData {
1546 datasource_id: Some(DatasourceId::DebianCopyright),
1547 package_type: Some(PACKAGE_TYPE),
1548 namespace: namespace.clone(),
1549 name: package_name.map(|s| truncate_field(s.to_string())),
1550 parties,
1551 declared_license_expression,
1552 declared_license_expression_spdx,
1553 license_detections,
1554 other_license_expression,
1555 other_license_expression_spdx,
1556 other_license_detections,
1557 extracted_license_statement,
1558 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1559 ..Default::default()
1560 }
1561}
1562
1563#[derive(Debug)]
1564struct CopyrightParagraph {
1565 metadata: Rfc822Metadata,
1566 license_header_line: Option<(String, usize)>,
1567}
1568
1569fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1570 let mut paragraphs = Vec::new();
1571 let mut current_lines = Vec::new();
1572 let mut current_start_line = 1usize;
1573 let mut count = 0usize;
1574
1575 for (idx, line) in content.lines().enumerate() {
1576 count += 1;
1577 if count > MAX_ITERATION_COUNT {
1578 warn!(
1579 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
1580 );
1581 break;
1582 }
1583 let line_no = idx + 1;
1584 if line.is_empty() {
1585 if !current_lines.is_empty() {
1586 paragraphs.push(finalize_copyright_paragraph(
1587 std::mem::take(&mut current_lines),
1588 current_start_line,
1589 ));
1590 }
1591 current_start_line = line_no + 1;
1592 } else {
1593 if current_lines.is_empty() {
1594 current_start_line = line_no;
1595 }
1596 current_lines.push(line.to_string());
1597 }
1598 }
1599
1600 if !current_lines.is_empty() {
1601 paragraphs.push(finalize_copyright_paragraph(
1602 current_lines,
1603 current_start_line,
1604 ));
1605 }
1606
1607 paragraphs
1608}
1609
1610fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1611 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1612 let mut current_name: Option<String> = None;
1613 let mut current_value = String::new();
1614 let mut license_header_line = None;
1615
1616 for (idx, line) in raw_lines.iter().enumerate() {
1617 if line.starts_with(' ') || line.starts_with('\t') {
1618 if current_name.is_some() {
1619 current_value.push('\n');
1620 current_value.push_str(line);
1621 }
1622 continue;
1623 }
1624
1625 if let Some(name) = current_name.take() {
1626 add_copyright_header_value(&mut headers, &name, ¤t_value);
1627 current_value.clear();
1628 }
1629
1630 if let Some((name, value)) = line.split_once(':') {
1631 let normalized_name = name.trim().to_ascii_lowercase();
1632 if normalized_name == "license" && license_header_line.is_none() {
1633 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1634 }
1635 current_name = Some(normalized_name);
1636 current_value = value.trim_start().to_string();
1637 }
1638 }
1639
1640 if let Some(name) = current_name.take() {
1641 add_copyright_header_value(&mut headers, &name, ¤t_value);
1642 }
1643
1644 CopyrightParagraph {
1645 metadata: Rfc822Metadata {
1646 headers,
1647 body: String::new(),
1648 },
1649 license_header_line,
1650 }
1651}
1652
1653fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1654 let entry = headers.entry(name.to_string()).or_default();
1655 let trimmed = value.trim_end();
1656 if !trimmed.is_empty() {
1657 entry.push(trimmed.to_string());
1658 }
1659}
1660
1661fn build_primary_license_detection(
1662 license_name: &str,
1663 matched_text: String,
1664 line_no: usize,
1665) -> LicenseDetection {
1666 let normalized = normalize_debian_license_name(license_name);
1667 let line = match LineNumber::new(line_no) {
1668 Some(l) => l,
1669 None => {
1670 warn!(
1671 "build_primary_license_detection: line number {} out of range, clamping to 1",
1672 line_no
1673 );
1674 LineNumber::new(1).expect("1 is a valid line number")
1675 }
1676 };
1677
1678 build_declared_license_detection(
1679 &normalized,
1680 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1681 )
1682}
1683
1684fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1685 match license_name.trim() {
1686 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1687 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1688 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1689 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1690 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1691 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1692 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1693 "public-domain" => {
1694 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1695 }
1696 other => normalize_declared_license_key(other)
1697 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1698 }
1699}
1700
1701fn parse_copyright_holders(text: &str) -> Vec<String> {
1702 let mut holders = Vec::new();
1703 let mut count = 0usize;
1704
1705 for line in text.lines() {
1706 count += 1;
1707 if count > MAX_ITERATION_COUNT {
1708 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
1709 break;
1710 }
1711 let line = line.trim();
1712 if line.is_empty() {
1713 continue;
1714 }
1715
1716 let cleaned = line
1717 .trim_start_matches("Copyright")
1718 .trim_start_matches("copyright")
1719 .trim_start_matches("(C)")
1720 .trim_start_matches("(c)")
1721 .trim_start_matches("©")
1722 .trim();
1723
1724 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1725 let without_years = &cleaned[year_end..];
1726 let holder = without_years
1727 .trim_start_matches(',')
1728 .trim_start_matches('-')
1729 .trim();
1730
1731 if !holder.is_empty() && holder.len() > 2 {
1732 holders.push(holder.to_string());
1733 }
1734 }
1735 }
1736
1737 holders
1738}
1739
1740fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1741 let mut in_field = false;
1742 let mut field_content = String::new();
1743 let mut count = 0usize;
1744
1745 for line in content.lines() {
1746 count += 1;
1747 if count > MAX_ITERATION_COUNT {
1748 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
1749 break;
1750 }
1751 if line.starts_with(field_name) {
1752 in_field = true;
1753 field_content.push_str(line.trim_start_matches(field_name).trim());
1754 field_content.push('\n');
1755 } else if in_field {
1756 if line.starts_with(char::is_whitespace) {
1757 field_content.push_str(line.trim());
1758 field_content.push('\n');
1759 } else if !line.trim().is_empty() {
1760 break;
1761 }
1762 }
1763 }
1764
1765 let trimmed = field_content.trim();
1766 if trimmed.is_empty() {
1767 None
1768 } else {
1769 Some(truncate_field(trimmed.to_string()))
1770 }
1771}
1772
1773pub struct DebianDebParser;
1775
1776impl PackageParser for DebianDebParser {
1777 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1778
1779 fn is_match(path: &Path) -> bool {
1780 path.extension().and_then(|e| e.to_str()) == Some("deb")
1781 }
1782
1783 fn extract_packages(path: &Path) -> Vec<PackageData> {
1784 if let Ok(data) = extract_deb_archive(path) {
1786 return vec![data];
1787 }
1788
1789 let filename = match path.file_name().and_then(|n| n.to_str()) {
1791 Some(f) => f,
1792 None => {
1793 return vec![default_package_data(DatasourceId::DebianDeb)];
1794 }
1795 };
1796
1797 vec![parse_deb_filename(filename)]
1798 }
1799}
1800
1801crate::register_parser!(
1802 "Debian binary package archive (.deb)",
1803 &["**/*.deb"],
1804 "deb",
1805 "",
1806 Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1807);
1808
1809fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1810 use flate2::read::GzDecoder;
1811 use liblzma::read::XzDecoder;
1812 use std::io::{Cursor, Read};
1813
1814 let file_metadata =
1815 std::fs::metadata(path).map_err(|e| format!("Failed to stat .deb file: {}", e))?;
1816 if file_metadata.len() > MAX_ARCHIVE_SIZE {
1817 return Err(format!(
1818 ".deb file exceeds MAX_ARCHIVE_SIZE ({} bytes)",
1819 file_metadata.len()
1820 ));
1821 }
1822 let compressed_size = file_metadata.len() as usize;
1823
1824 let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1825
1826 let mut archive = ar::Archive::new(file);
1827 let mut package: Option<PackageData> = None;
1828 let mut total_extracted: usize = 0;
1829
1830 while let Some(entry_result) = archive.next_entry() {
1831 let entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1832
1833 let entry_name_raw = entry.header().identifier();
1834 let entry_name = String::from_utf8_lossy(entry_name_raw);
1835 let had_replacement = entry_name_raw.iter().any(|&b| b > 127);
1836 if had_replacement {
1837 warn!(
1838 "extract_deb_archive: non-UTF-8 bytes in entry name replaced with lossy conversion"
1839 );
1840 }
1841 let entry_name = entry_name.trim().to_string();
1842
1843 if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1844 let entry_size = entry.header().size();
1845 if entry_size > MAX_FILE_SIZE {
1846 warn!(
1847 "extract_deb_archive: control tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1848 entry_size
1849 );
1850 continue;
1851 }
1852 let mut control_data = Vec::new();
1853 entry
1854 .take(MAX_FILE_SIZE)
1855 .read_to_end(&mut control_data)
1856 .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1857
1858 total_extracted += control_data.len();
1859 if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1860 warn!(
1861 "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1862 );
1863 break;
1864 }
1865 if total_extracted > MAX_ARCHIVE_SIZE as usize {
1866 warn!(
1867 "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1868 );
1869 break;
1870 }
1871
1872 if entry_name.ends_with(".gz") {
1873 let decoder = GzDecoder::new(Cursor::new(control_data));
1874 if let Some(parsed_package) =
1875 parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1876 {
1877 package = Some(parsed_package);
1878 }
1879 } else if entry_name.ends_with(".xz") {
1880 let decoder = XzDecoder::new(Cursor::new(control_data));
1881 if let Some(parsed_package) =
1882 parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1883 {
1884 package = Some(parsed_package);
1885 }
1886 }
1887 } else if entry_name.starts_with("data.tar") {
1888 let entry_size = entry.header().size();
1889 if entry_size > MAX_FILE_SIZE {
1890 warn!(
1891 "extract_deb_archive: data tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1892 entry_size
1893 );
1894 continue;
1895 }
1896 let mut data = Vec::new();
1897 entry
1898 .take(MAX_FILE_SIZE)
1899 .read_to_end(&mut data)
1900 .map_err(|e| format!("Failed to read data archive: {}", e))?;
1901
1902 total_extracted += data.len();
1903 if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1904 warn!(
1905 "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1906 );
1907 break;
1908 }
1909 if total_extracted > MAX_ARCHIVE_SIZE as usize {
1910 warn!(
1911 "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1912 );
1913 break;
1914 }
1915
1916 let Some(current_package) = package.as_mut() else {
1917 continue;
1918 };
1919
1920 if entry_name.ends_with(".gz") {
1921 let decoder = GzDecoder::new(Cursor::new(data));
1922 merge_deb_data_archive(
1923 decoder,
1924 current_package,
1925 &mut total_extracted,
1926 compressed_size,
1927 )?;
1928 } else if entry_name.ends_with(".xz") {
1929 let decoder = XzDecoder::new(Cursor::new(data));
1930 merge_deb_data_archive(
1931 decoder,
1932 current_package,
1933 &mut total_extracted,
1934 compressed_size,
1935 )?;
1936 }
1937 }
1938 }
1939
1940 package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1941}
1942
1943fn parse_control_tar_archive<R: std::io::Read>(
1944 reader: R,
1945 total_extracted: &mut usize,
1946 compressed_size: usize,
1947) -> Result<Option<PackageData>, String> {
1948 use std::io::Read;
1949
1950 let mut tar_archive = tar::Archive::new(reader);
1951
1952 for tar_entry_result in tar_archive
1953 .entries()
1954 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1955 {
1956 let tar_entry = tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1957
1958 let tar_path = tar_entry
1959 .path()
1960 .map_err(|e| format!("Failed to get tar path: {}", e))?;
1961
1962 if tar_path
1963 .components()
1964 .any(|c| matches!(c, std::path::Component::ParentDir))
1965 {
1966 warn!(
1967 "parse_control_tar_archive: skipping tar entry with path traversal: {:?}",
1968 tar_path
1969 );
1970 continue;
1971 }
1972
1973 if tar_entry.size() > MAX_FILE_SIZE {
1974 warn!(
1975 "parse_control_tar_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1976 tar_entry.size()
1977 );
1978 continue;
1979 }
1980
1981 if tar_path.ends_with("control") {
1982 let mut control_content = String::new();
1983 tar_entry
1984 .take(MAX_FILE_SIZE)
1985 .read_to_string(&mut control_content)
1986 .map_err(|e| format!("Failed to read control file: {}", e))?;
1987
1988 *total_extracted += control_content.len();
1989 if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1990 warn!(
1991 "parse_control_tar_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1992 );
1993 return Ok(None);
1994 }
1995 if *total_extracted > MAX_ARCHIVE_SIZE as usize {
1996 warn!(
1997 "parse_control_tar_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1998 );
1999 return Ok(None);
2000 }
2001
2002 let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
2003 if paragraphs.is_empty() {
2004 return Err("No paragraphs in control file".to_string());
2005 }
2006
2007 if let Some(package) =
2008 build_package_from_paragraph(¶graphs[0], None, DatasourceId::DebianDeb)
2009 {
2010 return Ok(Some(package));
2011 }
2012
2013 return Err("Failed to parse control file".to_string());
2014 }
2015 }
2016
2017 Ok(None)
2018}
2019
2020fn merge_deb_data_archive<R: std::io::Read>(
2021 reader: R,
2022 package: &mut PackageData,
2023 total_extracted: &mut usize,
2024 compressed_size: usize,
2025) -> Result<(), String> {
2026 use std::io::Read;
2027
2028 let mut tar_archive = tar::Archive::new(reader);
2029
2030 for tar_entry_result in tar_archive
2031 .entries()
2032 .map_err(|e| format!("Failed to read data tar entries: {}", e))?
2033 {
2034 let tar_entry =
2035 tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
2036
2037 let tar_path = tar_entry
2038 .path()
2039 .map_err(|e| format!("Failed to get data tar path: {}", e))?;
2040
2041 if tar_path
2042 .components()
2043 .any(|c| matches!(c, std::path::Component::ParentDir))
2044 {
2045 warn!(
2046 "merge_deb_data_archive: skipping tar entry with path traversal: {:?}",
2047 tar_path
2048 );
2049 continue;
2050 }
2051
2052 if tar_entry.size() > MAX_FILE_SIZE {
2053 warn!(
2054 "merge_deb_data_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
2055 tar_entry.size()
2056 );
2057 continue;
2058 }
2059
2060 let tar_path_str = tar_path.to_string_lossy();
2061
2062 if tar_path_str.ends_with(&format!(
2063 "/usr/share/doc/{}/copyright",
2064 package.name.as_deref().unwrap_or_default()
2065 )) || tar_path_str.ends_with(&format!(
2066 "usr/share/doc/{}/copyright",
2067 package.name.as_deref().unwrap_or_default()
2068 )) {
2069 let mut copyright_content = String::new();
2070 tar_entry
2071 .take(MAX_FILE_SIZE)
2072 .read_to_string(&mut copyright_content)
2073 .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
2074
2075 *total_extracted += copyright_content.len();
2076 if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
2077 warn!(
2078 "merge_deb_data_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
2079 );
2080 return Ok(());
2081 }
2082 if *total_extracted > MAX_ARCHIVE_SIZE as usize {
2083 warn!(
2084 "merge_deb_data_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
2085 );
2086 return Ok(());
2087 }
2088
2089 let copyright_pkg = parse_copyright_file(©right_content, package.name.as_deref());
2090 merge_debian_copyright_into_package(package, ©right_pkg);
2091 break;
2092 }
2093 }
2094
2095 Ok(())
2096}
2097
2098fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
2099 if target.extracted_license_statement.is_none() {
2100 target.extracted_license_statement = copyright.extracted_license_statement.clone();
2101 }
2102
2103 if target.declared_license_expression.is_none() {
2104 target.declared_license_expression = copyright.declared_license_expression.clone();
2105 }
2106 if target.declared_license_expression_spdx.is_none() {
2107 target.declared_license_expression_spdx =
2108 copyright.declared_license_expression_spdx.clone();
2109 }
2110 if target.license_detections.is_empty() {
2111 target.license_detections = copyright.license_detections.clone();
2112 }
2113 if target.other_license_expression.is_none() {
2114 target.other_license_expression = copyright.other_license_expression.clone();
2115 }
2116 if target.other_license_expression_spdx.is_none() {
2117 target.other_license_expression_spdx = copyright.other_license_expression_spdx.clone();
2118 }
2119 if target.other_license_detections.is_empty() {
2120 target.other_license_detections = copyright.other_license_detections.clone();
2121 }
2122
2123 for party in ©right.parties {
2124 if !target.parties.iter().any(|existing| {
2125 existing.r#type == party.r#type
2126 && existing.role == party.role
2127 && existing.name == party.name
2128 && existing.email == party.email
2129 && existing.url == party.url
2130 && existing.organization == party.organization
2131 && existing.organization_url == party.organization_url
2132 && existing.timezone == party.timezone
2133 }) {
2134 target.parties.push(party.clone());
2135 }
2136 }
2137}
2138
2139fn parse_deb_filename(filename: &str) -> PackageData {
2140 let without_ext = filename.trim_end_matches(".deb");
2141
2142 let parts: Vec<&str> = without_ext.split('_').collect();
2143 if parts.len() < 2 {
2144 return default_package_data(DatasourceId::DebianDeb);
2145 }
2146
2147 let name = truncate_field(parts[0].to_string());
2148 let version = truncate_field(parts[1].to_string());
2149 let architecture = if parts.len() >= 3 {
2150 Some(truncate_field(parts[2].to_string()))
2151 } else {
2152 None
2153 };
2154
2155 let namespace = Some("debian".to_string());
2156
2157 PackageData {
2158 datasource_id: Some(DatasourceId::DebianDeb),
2159 package_type: Some(PACKAGE_TYPE),
2160 namespace: namespace.clone(),
2161 name: Some(name.clone()),
2162 version: Some(version.clone()),
2163 purl: build_debian_purl(
2164 &name,
2165 Some(&version),
2166 namespace.as_deref(),
2167 architecture.as_deref(),
2168 ),
2169 ..Default::default()
2170 }
2171}
2172
2173pub struct DebianControlInExtractedDebParser;
2179
2180impl PackageParser for DebianControlInExtractedDebParser {
2181 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2182
2183 fn is_match(path: &Path) -> bool {
2184 path.file_name()
2185 .and_then(|n| n.to_str())
2186 .is_some_and(|name| name == "control")
2187 && path
2188 .to_str()
2189 .map(|p| {
2190 p.ends_with("control.tar.gz-extract/control")
2191 || p.ends_with("control.tar.xz-extract/control")
2192 })
2193 .unwrap_or(false)
2194 }
2195
2196 fn extract_packages(path: &Path) -> Vec<PackageData> {
2197 let content = match read_file_to_string(path, None) {
2198 Ok(c) => c,
2199 Err(e) => {
2200 warn!(
2201 "Failed to read control file in extracted deb {:?}: {}",
2202 path, e
2203 );
2204 return vec![default_package_data(
2205 DatasourceId::DebianControlExtractedDeb,
2206 )];
2207 }
2208 };
2209
2210 let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
2213 if paragraphs.is_empty() {
2214 return vec![default_package_data(
2215 DatasourceId::DebianControlExtractedDeb,
2216 )];
2217 }
2218
2219 if let Some(pkg) = build_package_from_paragraph(
2220 ¶graphs[0],
2221 None,
2222 DatasourceId::DebianControlExtractedDeb,
2223 ) {
2224 vec![pkg]
2225 } else {
2226 vec![default_package_data(
2227 DatasourceId::DebianControlExtractedDeb,
2228 )]
2229 }
2230 }
2231}
2232
2233pub struct DebianMd5sumInPackageParser;
2235
2236impl PackageParser for DebianMd5sumInPackageParser {
2237 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2238
2239 fn is_match(path: &Path) -> bool {
2240 path.file_name()
2241 .and_then(|n| n.to_str())
2242 .is_some_and(|name| name == "md5sums")
2243 && path
2244 .to_str()
2245 .map(|p| {
2246 p.ends_with("control.tar.gz-extract/md5sums")
2247 || p.ends_with("control.tar.xz-extract/md5sums")
2248 })
2249 .unwrap_or(false)
2250 }
2251
2252 fn extract_packages(path: &Path) -> Vec<PackageData> {
2253 let content = match read_file_to_string(path, None) {
2254 Ok(c) => c,
2255 Err(e) => {
2256 warn!("Failed to read md5sums file {:?}: {}", path, e);
2257 return vec![default_package_data(
2258 DatasourceId::DebianMd5SumsInExtractedDeb,
2259 )];
2260 }
2261 };
2262
2263 let package_name = extract_package_name_from_deb_path(path);
2264
2265 vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2266 }
2267}
2268
2269pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2270 let parent = path.parent()?;
2271 let grandparent = parent.parent()?;
2272 let dirname = grandparent.file_name()?.to_str()?;
2273 let without_extract = dirname.strip_suffix("-extract")?;
2274 let without_deb = without_extract.strip_suffix(".deb")?;
2275 let name = without_deb.split('_').next()?;
2276
2277 Some(name.to_string())
2278}
2279
2280fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2281 let mut file_references = Vec::new();
2282 let mut count = 0usize;
2283
2284 for line in content.lines() {
2285 count += 1;
2286 if count > MAX_ITERATION_COUNT {
2287 warn!("parse_md5sums_in_package: exceeded MAX_ITERATION_COUNT lines, stopping");
2288 break;
2289 }
2290 let line = line.trim();
2291 if line.is_empty() || line.starts_with('#') {
2292 continue;
2293 }
2294
2295 let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find(" ") {
2296 (
2297 Md5Digest::from_hex(line[..idx].trim()).ok(),
2298 line[idx + 2..].trim(),
2299 )
2300 } else if let Some((hash, path)) = line.split_once(' ') {
2301 (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2302 } else {
2303 (None, line)
2304 };
2305
2306 if IGNORED_ROOT_DIRS.contains(&filepath) {
2307 continue;
2308 }
2309
2310 file_references.push(FileReference {
2311 path: filepath.to_string(),
2312 size: None,
2313 sha1: None,
2314 md5: md5sum,
2315 sha256: None,
2316 sha512: None,
2317 extra_data: None,
2318 });
2319 }
2320
2321 if file_references.is_empty() {
2322 return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2323 }
2324
2325 let namespace = Some("debian".to_string());
2326 let mut package = PackageData {
2327 datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2328 package_type: Some(PACKAGE_TYPE),
2329 namespace: namespace.clone(),
2330 name: package_name.map(|s| truncate_field(s.to_string())),
2331 file_references,
2332 ..Default::default()
2333 };
2334
2335 if let Some(n) = &package.name {
2336 package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2337 }
2338
2339 package
2340}
2341
2342crate::register_parser!(
2343 "Debian control file in extracted .deb control tarball",
2344 &[
2345 "**/control.tar.gz-extract/control",
2346 "**/control.tar.xz-extract/control"
2347 ],
2348 "deb",
2349 "",
2350 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2351);
2352
2353crate::register_parser!(
2354 "Debian MD5 checksums in extracted .deb control tarball",
2355 &[
2356 "**/control.tar.gz-extract/md5sums",
2357 "**/control.tar.xz-extract/md5sums"
2358 ],
2359 "deb",
2360 "",
2361 Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2362);
2363
2364#[cfg(test)]
2365mod tests {
2366 use super::*;
2367 use crate::models::DatasourceId;
2368 use crate::models::PackageType;
2369 use ar::{Builder as ArBuilder, Header as ArHeader};
2370 use flate2::Compression;
2371 use flate2::write::GzEncoder;
2372 use liblzma::write::XzEncoder;
2373 use std::io::Cursor;
2374 use std::path::PathBuf;
2375 use tar::{Builder as TarBuilder, Header as TarHeader};
2376 use tempfile::NamedTempFile;
2377
2378 fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2379 let mut control_tar = Vec::new();
2380 {
2381 let encoder = XzEncoder::new(&mut control_tar, 6);
2382 let mut tar_builder = TarBuilder::new(encoder);
2383
2384 let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2385 let mut header = TarHeader::new_gnu();
2386 header
2387 .set_path("control")
2388 .expect("control tar path should be valid");
2389 header.set_size(control_content.len() as u64);
2390 header.set_mode(0o644);
2391 header.set_cksum();
2392 tar_builder
2393 .append(&header, Cursor::new(control_content))
2394 .expect("control file should be appended to tar.xz");
2395 tar_builder.finish().expect("control tar.xz should finish");
2396 }
2397
2398 let deb = NamedTempFile::new().expect("temp deb file should be created");
2399 {
2400 let mut builder = ArBuilder::new(
2401 deb.reopen()
2402 .expect("temporary deb file should reopen for writing"),
2403 );
2404
2405 let debian_binary = b"2.0\n";
2406 let mut debian_binary_header =
2407 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2408 debian_binary_header.set_mode(0o100644);
2409 builder
2410 .append(&debian_binary_header, Cursor::new(debian_binary))
2411 .expect("debian-binary entry should be appended");
2412
2413 let mut control_header =
2414 ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2415 control_header.set_mode(0o100644);
2416 builder
2417 .append(&control_header, Cursor::new(control_tar))
2418 .expect("control.tar.xz entry should be appended");
2419 }
2420
2421 deb
2422 }
2423
2424 fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2425 let mut control_tar = Vec::new();
2426 {
2427 let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2428 let mut tar_builder = TarBuilder::new(encoder);
2429
2430 let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2431 let mut header = TarHeader::new_gnu();
2432 header
2433 .set_path("control")
2434 .expect("control tar path should be valid");
2435 header.set_size(control_content.len() as u64);
2436 header.set_mode(0o644);
2437 header.set_cksum();
2438 tar_builder
2439 .append(&header, Cursor::new(control_content))
2440 .expect("control file should be appended to tar.gz");
2441 tar_builder.finish().expect("control tar.gz should finish");
2442 }
2443
2444 let mut data_tar = Vec::new();
2445 {
2446 let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2447 let mut tar_builder = TarBuilder::new(encoder);
2448
2449 let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2450 let mut header = TarHeader::new_gnu();
2451 header
2452 .set_path("./usr/share/doc/synthetic/copyright")
2453 .expect("copyright path should be valid");
2454 header.set_size(copyright.len() as u64);
2455 header.set_mode(0o644);
2456 header.set_cksum();
2457 tar_builder
2458 .append(&header, Cursor::new(copyright))
2459 .expect("copyright file should be appended to data tar");
2460 tar_builder.finish().expect("data tar.gz should finish");
2461 }
2462
2463 let deb = NamedTempFile::new().expect("temp deb file should be created");
2464 {
2465 let mut builder = ArBuilder::new(
2466 deb.reopen()
2467 .expect("temporary deb file should reopen for writing"),
2468 );
2469
2470 let debian_binary = b"2.0\n";
2471 let mut debian_binary_header =
2472 ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2473 debian_binary_header.set_mode(0o100644);
2474 builder
2475 .append(&debian_binary_header, Cursor::new(debian_binary))
2476 .expect("debian-binary entry should be appended");
2477
2478 let mut control_header =
2479 ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2480 control_header.set_mode(0o100644);
2481 builder
2482 .append(&control_header, Cursor::new(control_tar))
2483 .expect("control.tar.gz entry should be appended");
2484
2485 let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2486 data_header.set_mode(0o100644);
2487 builder
2488 .append(&data_header, Cursor::new(data_tar))
2489 .expect("data.tar.gz entry should be appended");
2490 }
2491
2492 deb
2493 }
2494
2495 #[test]
2498 fn test_detect_namespace_from_ubuntu_version() {
2499 assert_eq!(
2500 detect_namespace(Some("1.0-1ubuntu1"), None),
2501 Some("ubuntu".to_string())
2502 );
2503 }
2504
2505 #[test]
2506 fn test_detect_namespace_from_debian_version() {
2507 assert_eq!(
2508 detect_namespace(Some("1.0-1+deb11u1"), None),
2509 Some("debian".to_string())
2510 );
2511 }
2512
2513 #[test]
2514 fn test_detect_namespace_from_ubuntu_maintainer() {
2515 assert_eq!(
2516 detect_namespace(
2517 None,
2518 Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2519 ),
2520 Some("ubuntu".to_string())
2521 );
2522 }
2523
2524 #[test]
2525 fn test_detect_namespace_from_debian_maintainer() {
2526 assert_eq!(
2527 detect_namespace(None, Some("John Doe <john@debian.org>")),
2528 Some("debian".to_string())
2529 );
2530 }
2531
2532 #[test]
2533 fn test_detect_namespace_default() {
2534 assert_eq!(
2535 detect_namespace(None, Some("Unknown <unknown@example.com>")),
2536 Some("debian".to_string())
2537 );
2538 }
2539
2540 #[test]
2541 fn test_detect_namespace_version_takes_priority() {
2542 assert_eq!(
2544 detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2545 Some("ubuntu".to_string())
2546 );
2547 }
2548
2549 #[test]
2552 fn test_build_purl_basic() {
2553 let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2554 assert_eq!(
2555 purl,
2556 Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2557 );
2558 }
2559
2560 #[test]
2561 fn test_build_purl_no_version() {
2562 let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2563 assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2564 }
2565
2566 #[test]
2567 fn test_build_purl_no_arch() {
2568 let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2569 assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2570 }
2571
2572 #[test]
2573 fn test_build_purl_no_namespace() {
2574 let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2575 assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2576 }
2577
2578 #[test]
2581 fn test_parse_simple_dependency() {
2582 let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2583 assert_eq!(deps.len(), 1);
2584 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2585 assert_eq!(deps[0].extracted_requirement, None);
2586 assert_eq!(deps[0].scope, Some("depends".to_string()));
2587 }
2588
2589 #[test]
2590 fn test_parse_dependency_with_version() {
2591 let deps =
2592 parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2593 assert_eq!(deps.len(), 1);
2594 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2595 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2596 }
2597
2598 #[test]
2599 fn test_parse_dependency_exact_version() {
2600 let deps = parse_dependency_field(
2601 "libc6 (= 2.31-13+deb11u5)",
2602 "depends",
2603 true,
2604 false,
2605 Some("debian"),
2606 );
2607 assert_eq!(deps.len(), 1);
2608 assert_eq!(deps[0].is_pinned, Some(true));
2609 }
2610
2611 #[test]
2612 fn test_parse_dependency_strict_less() {
2613 let deps =
2614 parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2615 assert_eq!(deps.len(), 1);
2616 assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2617 assert_eq!(deps[0].scope, Some("breaks".to_string()));
2618 }
2619
2620 #[test]
2621 fn test_parse_multiple_dependencies() {
2622 let deps = parse_dependency_field(
2623 "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2624 "depends",
2625 true,
2626 false,
2627 Some("debian"),
2628 );
2629 assert_eq!(deps.len(), 3);
2630 }
2631
2632 #[test]
2633 fn test_parse_dependency_alternatives() {
2634 let deps = parse_dependency_field(
2635 "libssl1.1 | libssl3",
2636 "depends",
2637 true,
2638 false,
2639 Some("debian"),
2640 );
2641 assert_eq!(deps.len(), 2);
2642 assert_eq!(deps[0].is_optional, Some(true));
2644 assert_eq!(deps[1].is_optional, Some(true));
2645 }
2646
2647 #[test]
2648 fn test_parse_dependency_skips_substitutions() {
2649 let deps = parse_dependency_field(
2650 "${shlibs:Depends}, ${misc:Depends}, libc6",
2651 "depends",
2652 true,
2653 false,
2654 Some("debian"),
2655 );
2656 assert_eq!(deps.len(), 1);
2657 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2658 }
2659
2660 #[test]
2661 fn test_parse_dependency_with_arch_qualifier() {
2662 let deps = parse_dependency_field(
2664 "libc6 (>= 2.17) [amd64]",
2665 "depends",
2666 true,
2667 false,
2668 Some("debian"),
2669 );
2670 assert_eq!(deps.len(), 1);
2671 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2672 }
2673
2674 #[test]
2675 fn test_parse_empty_dependency() {
2676 let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2677 assert!(deps.is_empty());
2678 }
2679
2680 #[test]
2683 fn test_parse_source_field_name_only() {
2684 let sources = parse_source_field(Some("util-linux"), Some("debian"));
2685 assert_eq!(sources.len(), 1);
2686 assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2687 }
2688
2689 #[test]
2690 fn test_parse_source_field_with_version() {
2691 let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2692 assert_eq!(sources.len(), 1);
2693 assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2694 }
2695
2696 #[test]
2697 fn test_parse_source_field_empty() {
2698 let sources = parse_source_field(None, Some("debian"));
2699 assert!(sources.is_empty());
2700 }
2701
2702 #[test]
2705 fn test_parse_debian_control_source_and_binary() {
2706 let content = "\
2707Source: curl
2708Section: web
2709Priority: optional
2710Maintainer: Alessandro Ghedini <ghedo@debian.org>
2711Homepage: https://curl.se/
2712Vcs-Browser: https://salsa.debian.org/debian/curl
2713Vcs-Git: https://salsa.debian.org/debian/curl.git
2714Build-Depends: debhelper (>= 12), libssl-dev
2715
2716Package: curl
2717Architecture: amd64
2718Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2719Description: command line tool for transferring data with URL syntax";
2720
2721 let packages = parse_debian_control(content);
2722 assert_eq!(packages.len(), 1);
2723
2724 let pkg = &packages[0];
2725 assert_eq!(pkg.name, Some("curl".to_string()));
2726 assert_eq!(pkg.package_type, Some(PackageType::Deb));
2727 assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2728 assert_eq!(
2729 pkg.vcs_url,
2730 Some("https://salsa.debian.org/debian/curl.git".to_string())
2731 );
2732 assert_eq!(
2733 pkg.code_view_url,
2734 Some("https://salsa.debian.org/debian/curl".to_string())
2735 );
2736
2737 assert_eq!(pkg.parties.len(), 1);
2739 assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2740 assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2741 assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2742
2743 assert!(!pkg.dependencies.is_empty());
2745 }
2746
2747 #[test]
2748 fn test_parse_debian_control_multiple_binary() {
2749 let content = "\
2750Source: gzip
2751Maintainer: Debian Developer <dev@debian.org>
2752
2753Package: gzip
2754Architecture: any
2755Depends: libc6 (>= 2.17)
2756Description: GNU file compression
2757
2758Package: gzip-win32
2759Architecture: all
2760Description: gzip for Windows";
2761
2762 let packages = parse_debian_control(content);
2763 assert_eq!(packages.len(), 2);
2764 assert_eq!(packages[0].name, Some("gzip".to_string()));
2765 assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2766
2767 assert_eq!(packages[0].parties.len(), 1);
2769 assert_eq!(packages[1].parties.len(), 1);
2770 }
2771
2772 #[test]
2773 fn test_parse_debian_control_source_only() {
2774 let content = "\
2775Source: my-package
2776Maintainer: Test User <test@debian.org>
2777Build-Depends: debhelper (>= 13)";
2778
2779 let packages = parse_debian_control(content);
2780 assert_eq!(packages.len(), 1);
2781 assert_eq!(packages[0].name, Some("my-package".to_string()));
2782 assert!(!packages[0].dependencies.is_empty());
2784 assert_eq!(
2785 packages[0].dependencies[0].scope,
2786 Some("build-depends".to_string())
2787 );
2788 }
2789
2790 #[test]
2791 fn test_parse_debian_control_with_uploaders() {
2792 let content = "\
2793Source: example
2794Maintainer: Main Dev <main@debian.org>
2795Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2796
2797Package: example
2798Architecture: any
2799Description: test package";
2800
2801 let packages = parse_debian_control(content);
2802 assert_eq!(packages.len(), 1);
2803 assert_eq!(packages[0].parties.len(), 3);
2805 assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2806 assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2807 assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2808 }
2809
2810 #[test]
2811 fn test_parse_debian_control_vcs_git_with_branch() {
2812 let content = "\
2813Source: example
2814Maintainer: Dev <dev@debian.org>
2815Vcs-Git: https://salsa.debian.org/example.git -b main
2816
2817Package: example
2818Architecture: any
2819Description: test";
2820
2821 let packages = parse_debian_control(content);
2822 assert_eq!(packages.len(), 1);
2823 assert_eq!(
2825 packages[0].vcs_url,
2826 Some("https://salsa.debian.org/example.git".to_string())
2827 );
2828 }
2829
2830 #[test]
2831 fn test_parse_debian_control_multi_arch() {
2832 let content = "\
2833Source: example
2834Maintainer: Dev <dev@debian.org>
2835
2836Package: libexample
2837Architecture: any
2838Multi-Arch: same
2839Description: shared library";
2840
2841 let packages = parse_debian_control(content);
2842 assert_eq!(packages.len(), 1);
2843 let extra = packages[0].extra_data.as_ref().unwrap();
2844 assert_eq!(
2845 extra.get("multi_arch"),
2846 Some(&serde_json::Value::String("same".to_string()))
2847 );
2848 }
2849
2850 #[test]
2853 fn test_parse_dpkg_status_basic() {
2854 let content = "\
2855Package: base-files
2856Status: install ok installed
2857Priority: required
2858Section: admin
2859Installed-Size: 391
2860Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2861Architecture: amd64
2862Version: 11ubuntu5.6
2863Description: Debian base system miscellaneous files
2864Homepage: https://tracker.debian.org/pkg/base-files
2865
2866Package: not-installed
2867Status: deinstall ok config-files
2868Architecture: amd64
2869Version: 1.0
2870Description: This should be skipped";
2871
2872 let packages = parse_dpkg_status(content);
2873 assert_eq!(packages.len(), 1);
2874
2875 let pkg = &packages[0];
2876 assert_eq!(pkg.name, Some("base-files".to_string()));
2877 assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2878 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2879 assert_eq!(
2880 pkg.datasource_id,
2881 Some(DatasourceId::DebianInstalledStatusDb)
2882 );
2883
2884 let extra = pkg.extra_data.as_ref().unwrap();
2886 assert_eq!(
2887 extra.get("installed_size"),
2888 Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2889 );
2890 }
2891
2892 #[test]
2893 fn test_parse_dpkg_status_multiple_installed() {
2894 let content = "\
2895Package: libc6
2896Status: install ok installed
2897Architecture: amd64
2898Version: 2.31-13+deb11u5
2899Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2900Description: GNU C Library
2901
2902Package: zlib1g
2903Status: install ok installed
2904Architecture: amd64
2905Version: 1:1.2.11.dfsg-2+deb11u2
2906Maintainer: Mark Brown <broonie@debian.org>
2907Description: compression library";
2908
2909 let packages = parse_dpkg_status(content);
2910 assert_eq!(packages.len(), 2);
2911 assert_eq!(packages[0].name, Some("libc6".to_string()));
2912 assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2913 }
2914
2915 #[test]
2916 fn test_parse_dpkg_status_with_dependencies() {
2917 let content = "\
2918Package: curl
2919Status: install ok installed
2920Architecture: amd64
2921Version: 7.74.0-1.3+deb11u7
2922Maintainer: Alessandro Ghedini <ghedo@debian.org>
2923Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2924Recommends: ca-certificates
2925Description: command line tool for transferring data with URL syntax";
2926
2927 let packages = parse_dpkg_status(content);
2928 assert_eq!(packages.len(), 1);
2929
2930 let deps = &packages[0].dependencies;
2931 assert_eq!(deps.len(), 3);
2933
2934 assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2936 assert_eq!(deps[0].scope, Some("depends".to_string()));
2937 assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2938
2939 assert_eq!(
2941 deps[2].purl,
2942 Some("pkg:deb/debian/ca-certificates".to_string())
2943 );
2944 assert_eq!(deps[2].scope, Some("recommends".to_string()));
2945 assert_eq!(deps[2].is_optional, Some(true));
2946 }
2947
2948 #[test]
2949 fn test_parse_dpkg_status_with_source() {
2950 let content = "\
2951Package: libncurses6
2952Status: install ok installed
2953Architecture: amd64
2954Source: ncurses (6.2+20201114-2+deb11u1)
2955Version: 6.2+20201114-2+deb11u1
2956Maintainer: Craig Small <csmall@debian.org>
2957Description: shared libraries for terminal handling";
2958
2959 let packages = parse_dpkg_status(content);
2960 assert_eq!(packages.len(), 1);
2961 assert!(!packages[0].source_packages.is_empty());
2962 assert!(packages[0].source_packages[0].contains("ncurses"));
2964 }
2965
2966 #[test]
2967 fn test_parse_dpkg_status_filters_not_installed() {
2968 let content = "\
2969Package: installed-pkg
2970Status: install ok installed
2971Version: 1.0
2972Architecture: amd64
2973Description: installed
2974
2975Package: half-installed
2976Status: install ok half-installed
2977Version: 2.0
2978Architecture: amd64
2979Description: half installed
2980
2981Package: deinstall-pkg
2982Status: deinstall ok config-files
2983Version: 3.0
2984Architecture: amd64
2985Description: deinstalled
2986
2987Package: purge-pkg
2988Status: purge ok not-installed
2989Version: 4.0
2990Architecture: amd64
2991Description: purged";
2992
2993 let packages = parse_dpkg_status(content);
2994 assert_eq!(packages.len(), 1);
2995 assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2996 }
2997
2998 #[test]
2999 fn test_parse_dpkg_status_empty() {
3000 let packages = parse_dpkg_status("");
3001 assert!(packages.is_empty());
3002 }
3003
3004 #[test]
3007 fn test_debian_control_is_match() {
3008 assert!(DebianControlParser::is_match(Path::new(
3009 "/path/to/debian/control"
3010 )));
3011 assert!(DebianControlParser::is_match(Path::new("debian/control")));
3012 assert!(!DebianControlParser::is_match(Path::new(
3013 "/path/to/control"
3014 )));
3015 assert!(!DebianControlParser::is_match(Path::new(
3016 "/path/to/debian/changelog"
3017 )));
3018 }
3019
3020 #[test]
3021 fn test_debian_installed_is_match() {
3022 assert!(DebianInstalledParser::is_match(Path::new(
3023 "/var/lib/dpkg/status"
3024 )));
3025 assert!(DebianInstalledParser::is_match(Path::new(
3026 "some/root/var/lib/dpkg/status"
3027 )));
3028 assert!(!DebianInstalledParser::is_match(Path::new(
3029 "/var/lib/dpkg/status.d/something"
3030 )));
3031 assert!(!DebianInstalledParser::is_match(Path::new(
3032 "/var/lib/dpkg/available"
3033 )));
3034 }
3035
3036 #[test]
3039 fn test_parse_debian_control_empty_input() {
3040 let packages = parse_debian_control("");
3041 assert!(packages.is_empty());
3042 }
3043
3044 #[test]
3045 fn test_parse_debian_control_malformed_input() {
3046 let content = "this is not a valid control file\nwith random text";
3047 let packages = parse_debian_control(content);
3048 assert!(packages.is_empty());
3050 }
3051
3052 #[test]
3053 fn test_dependency_with_epoch_version() {
3054 let deps = parse_dependency_field(
3056 "zlib1g (>= 1:1.2.11)",
3057 "depends",
3058 true,
3059 false,
3060 Some("debian"),
3061 );
3062 assert_eq!(deps.len(), 1);
3063 assert_eq!(
3064 deps[0].extracted_requirement,
3065 Some(">= 1:1.2.11".to_string())
3066 );
3067 }
3068
3069 #[test]
3070 fn test_dependency_with_plus_in_name() {
3071 let deps =
3072 parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
3073 assert_eq!(deps.len(), 1);
3074 assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
3075 }
3076
3077 #[test]
3078 fn test_dsc_parser_is_match() {
3079 assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
3080 assert!(DebianDscParser::is_match(&PathBuf::from(
3081 "adduser_3.118+deb11u1.dsc"
3082 )));
3083 assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
3084 assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
3085 }
3086
3087 #[test]
3088 fn test_dsc_parser_adduser() {
3089 let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
3090 let package = DebianDscParser::extract_first_package(&path);
3091
3092 assert_eq!(package.package_type, Some(PACKAGE_TYPE));
3093 assert_eq!(package.namespace, Some("debian".to_string()));
3094 assert_eq!(package.name, Some("adduser".to_string()));
3095 assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
3096 assert_eq!(
3097 package.purl,
3098 Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
3099 );
3100 assert_eq!(
3101 package.vcs_url,
3102 Some("https://salsa.debian.org/debian/adduser.git".to_string())
3103 );
3104 assert_eq!(
3105 package.code_view_url,
3106 Some("https://salsa.debian.org/debian/adduser".to_string())
3107 );
3108 assert_eq!(
3109 package.datasource_id,
3110 Some(DatasourceId::DebianSourceControlDsc)
3111 );
3112
3113 assert_eq!(package.parties.len(), 2);
3114 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3115 assert_eq!(
3116 package.parties[0].name,
3117 Some("Debian Adduser Developers".to_string())
3118 );
3119 assert_eq!(
3120 package.parties[0].email,
3121 Some("adduser@packages.debian.org".to_string())
3122 );
3123 assert_eq!(package.parties[0].r#type, None);
3124
3125 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3126 assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
3127 assert_eq!(
3128 package.parties[1].email,
3129 Some("mh+debian-packages@zugschlus.de".to_string())
3130 );
3131 assert_eq!(package.parties[1].r#type, None);
3132
3133 assert_eq!(package.source_packages.len(), 1);
3134 assert_eq!(
3135 package.source_packages[0],
3136 "pkg:deb/debian/adduser".to_string()
3137 );
3138
3139 assert!(!package.dependencies.is_empty());
3140 let build_dep_names: Vec<String> = package
3141 .dependencies
3142 .iter()
3143 .filter_map(|d| d.purl.as_ref())
3144 .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
3145 .map(|p| p.to_string())
3146 .collect();
3147 assert!(build_dep_names.len() >= 2);
3148 }
3149
3150 #[test]
3151 fn test_dsc_parser_zsh() {
3152 let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
3153 let package = DebianDscParser::extract_first_package(&path);
3154
3155 assert_eq!(package.name, Some("zsh".to_string()));
3156 assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
3157 assert_eq!(package.namespace, Some("debian".to_string()));
3158 assert!(package.purl.is_some());
3159 assert!(package.purl.as_ref().unwrap().contains("zsh"));
3160 assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
3161 }
3162
3163 #[test]
3164 fn test_parse_dsc_content_basic() {
3165 let content = "Format: 3.0 (native)
3166Source: testpkg
3167Binary: testpkg
3168Architecture: amd64
3169Version: 1.0.0
3170Maintainer: Test User <test@example.com>
3171Standards-Version: 4.5.0
3172Build-Depends: debhelper (>= 12)
3173Files:
3174 abc123 1024 testpkg_1.0.0.tar.xz
3175";
3176
3177 let package = parse_dsc_content(content);
3178 assert_eq!(package.name, Some("testpkg".to_string()));
3179 assert_eq!(package.version, Some("1.0.0".to_string()));
3180 assert_eq!(package.namespace, Some("debian".to_string()));
3181 assert_eq!(package.parties.len(), 1);
3182 assert_eq!(package.parties[0].name, Some("Test User".to_string()));
3183 assert_eq!(
3184 package.parties[0].email,
3185 Some("test@example.com".to_string())
3186 );
3187 assert_eq!(package.dependencies.len(), 1);
3188 assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
3189 }
3190
3191 #[test]
3192 fn test_parse_dsc_content_with_uploaders() {
3193 let content = "Source: mypkg
3194Version: 2.0
3195Architecture: all
3196Maintainer: Main Dev <main@example.com>
3197Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
3198";
3199
3200 let package = parse_dsc_content(content);
3201 assert_eq!(package.parties.len(), 3);
3202 assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3203 assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3204 assert_eq!(package.parties[2].role, Some("uploader".to_string()));
3205 }
3206
3207 #[test]
3208 fn test_orig_tar_parser_is_match() {
3209 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3210 "package_1.0.orig.tar.gz"
3211 )));
3212 assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3213 "abseil_0~20200923.3.orig.tar.xz"
3214 )));
3215 assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
3216 "package.debian.tar.gz"
3217 )));
3218 assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
3219 }
3220
3221 #[test]
3222 fn test_debian_tar_parser_is_match() {
3223 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3224 "package_1.0-1.debian.tar.xz"
3225 )));
3226 assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3227 "abseil_20220623.1-1.debian.tar.gz"
3228 )));
3229 assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
3230 "package.orig.tar.gz"
3231 )));
3232 assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
3233 }
3234
3235 #[test]
3236 fn test_parse_orig_tar_filename() {
3237 let pkg = parse_source_tarball_filename(
3238 "abseil_0~20200923.3.orig.tar.gz",
3239 DatasourceId::DebianOriginalSourceTarball,
3240 );
3241 assert_eq!(pkg.name, Some("abseil".to_string()));
3242 assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
3243 assert_eq!(pkg.namespace, Some("debian".to_string()));
3244 assert_eq!(
3245 pkg.purl,
3246 Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
3247 );
3248 assert_eq!(
3249 pkg.datasource_id,
3250 Some(DatasourceId::DebianOriginalSourceTarball)
3251 );
3252 }
3253
3254 #[test]
3255 fn test_parse_debian_tar_filename() {
3256 let pkg = parse_source_tarball_filename(
3257 "abseil_20220623.1-1.debian.tar.xz",
3258 DatasourceId::DebianSourceMetadataTarball,
3259 );
3260 assert_eq!(pkg.name, Some("abseil".to_string()));
3261 assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
3262 assert_eq!(pkg.namespace, Some("debian".to_string()));
3263 assert_eq!(
3264 pkg.purl,
3265 Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
3266 );
3267 }
3268
3269 #[test]
3270 fn test_parse_deb_filename() {
3271 let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3272 assert_eq!(pkg.name, Some("nginx".to_string()));
3273 assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3274
3275 let pkg = parse_deb_filename("invalid.deb");
3276 assert!(pkg.name.is_none());
3277 assert!(pkg.version.is_none());
3278 }
3279
3280 #[test]
3281 fn test_parse_source_tarball_various_compressions() {
3282 let pkg_gz = parse_source_tarball_filename(
3283 "test_1.0.orig.tar.gz",
3284 DatasourceId::DebianOriginalSourceTarball,
3285 );
3286 let pkg_xz = parse_source_tarball_filename(
3287 "test_1.0.orig.tar.xz",
3288 DatasourceId::DebianOriginalSourceTarball,
3289 );
3290 let pkg_bz2 = parse_source_tarball_filename(
3291 "test_1.0.orig.tar.bz2",
3292 DatasourceId::DebianOriginalSourceTarball,
3293 );
3294
3295 assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3296 assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3297 assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3298 }
3299
3300 #[test]
3301 fn test_parse_source_tarball_invalid_format() {
3302 let pkg = parse_source_tarball_filename(
3303 "invalid-no-underscore.tar.gz",
3304 DatasourceId::DebianOriginalSourceTarball,
3305 );
3306 assert!(pkg.name.is_none());
3307 assert!(pkg.version.is_none());
3308 }
3309
3310 #[test]
3311 fn test_list_parser_is_match() {
3312 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3313 "/var/lib/dpkg/info/bash.list"
3314 )));
3315 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3316 "/var/lib/dpkg/info/package:amd64.list"
3317 )));
3318 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3319 "bash.list"
3320 )));
3321 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3322 "/var/lib/dpkg/info/bash.md5sums"
3323 )));
3324 }
3325
3326 #[test]
3327 fn test_md5sums_parser_is_match() {
3328 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3329 "/var/lib/dpkg/info/bash.md5sums"
3330 )));
3331 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3332 "/var/lib/dpkg/info/package:amd64.md5sums"
3333 )));
3334 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3335 "bash.md5sums"
3336 )));
3337 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3338 "/var/lib/dpkg/info/bash.list"
3339 )));
3340 }
3341
3342 #[test]
3343 fn test_parse_debian_file_list_plain_list() {
3344 let content = "/.
3345/bin
3346/bin/bash
3347/usr/bin/bashbug
3348/usr/share/doc/bash/README
3349";
3350 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3351 assert_eq!(pkg.name, Some("bash".to_string()));
3352 assert_eq!(pkg.file_references.len(), 3);
3353 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3354 assert_eq!(pkg.file_references[0].md5, None);
3355 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3356 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3357 }
3358
3359 #[test]
3360 fn test_parse_debian_file_list_md5sums() {
3361 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
33621c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
3363f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
3364";
3365 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3366 assert_eq!(pkg.name, Some("bash".to_string()));
3367 assert_eq!(pkg.file_references.len(), 3);
3368 assert_eq!(pkg.file_references[0].path, "bin/bash");
3369 assert_eq!(
3370 pkg.file_references[0].md5,
3371 Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3372 );
3373 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3374 assert_eq!(
3375 pkg.file_references[1].md5,
3376 Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3377 );
3378 }
3379
3380 #[test]
3381 fn test_parse_debian_file_list_with_arch() {
3382 let content = "/usr/bin/foo
3383/usr/lib/x86_64-linux-gnu/libfoo.so
3384";
3385 let pkg = parse_debian_file_list(
3386 content,
3387 "libfoo:amd64",
3388 DatasourceId::DebianInstalledFilesList,
3389 );
3390 assert_eq!(pkg.name, Some("libfoo".to_string()));
3391 assert!(pkg.purl.is_some());
3392 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3393 assert_eq!(pkg.file_references.len(), 2);
3394 }
3395
3396 #[test]
3397 fn test_parse_debian_file_list_skips_comments_and_empty() {
3398 let content = "# This is a comment
3399/bin/bash
3400
3401/usr/bin/bashbug
3402
3403";
3404 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3405 assert_eq!(pkg.file_references.len(), 2);
3406 }
3407
3408 #[test]
3409 fn test_parse_debian_file_list_md5sums_only() {
3410 let content = "abc123 usr/bin/tool
3411";
3412 let pkg =
3413 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3414 assert_eq!(pkg.name, None);
3415 assert_eq!(pkg.file_references.len(), 1);
3416 }
3417
3418 #[test]
3419 fn test_parse_debian_file_list_ignores_root_dirs() {
3420 let content = "/.
3421/bin
3422/bin/bash
3423/etc
3424/usr
3425/var
3426";
3427 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3428 assert_eq!(pkg.file_references.len(), 1);
3429 assert_eq!(pkg.file_references[0].path, "/bin/bash");
3430 }
3431
3432 #[test]
3433 fn test_copyright_parser_is_match() {
3434 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3435 "/usr/share/doc/bash/copyright"
3436 )));
3437 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3438 "debian/copyright"
3439 )));
3440 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3441 "src/third_party/gperftools/dist/packages/deb/copyright"
3442 )));
3443 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3444 "ports/zlib/copyright"
3445 )));
3446 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3447 "copyright.txt"
3448 )));
3449 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3450 "/etc/copyright"
3451 )));
3452 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3453 "/tmp/sample_copyright"
3454 )));
3455 }
3456
3457 #[test]
3458 fn test_detect_debian_copyright_datasource() {
3459 assert_eq!(
3460 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3461 DatasourceId::DebianCopyrightInSource
3462 );
3463 assert_eq!(
3464 detect_debian_copyright_datasource(&PathBuf::from(
3465 "src/third_party/gperftools/dist/packages/deb/copyright"
3466 )),
3467 DatasourceId::DebianCopyrightStandalone
3468 );
3469 assert_eq!(
3470 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
3471 DatasourceId::DebianCopyrightStandalone
3472 );
3473 assert_eq!(
3474 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3475 DatasourceId::DebianCopyrightInPackage
3476 );
3477 assert_eq!(
3478 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3479 DatasourceId::DebianCopyrightStandalone
3480 );
3481 }
3482
3483 #[test]
3484 fn test_extract_package_name_from_path() {
3485 assert_eq!(
3486 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3487 Some("bash".to_string())
3488 );
3489 assert_eq!(
3490 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3491 Some("libseccomp2".to_string())
3492 );
3493 assert_eq!(
3494 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3495 None
3496 );
3497 assert_eq!(
3498 extract_standalone_package_name_from_path(
3499 &PathBuf::from("ports/zlib/copyright"),
3500 DatasourceId::DebianCopyrightStandalone,
3501 ),
3502 Some("zlib".to_string())
3503 );
3504 }
3505
3506 #[test]
3507 fn test_parse_copyright_dep5_format() {
3508 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3509Upstream-Name: libseccomp
3510Source: https://sourceforge.net/projects/libseccomp/
3511
3512Files: *
3513Copyright: 2012 Paul Moore <pmoore@redhat.com>
3514 2012 Ashley Lai <adlai@us.ibm.com>
3515License: LGPL-2.1
3516
3517License: LGPL-2.1
3518 This library is free software
3519";
3520 let pkg = parse_copyright_file(content, Some("libseccomp"));
3521 assert_eq!(pkg.name, Some("libseccomp".to_string()));
3522 assert_eq!(pkg.namespace, Some("debian".to_string()));
3523 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3524 assert_eq!(
3525 pkg.extracted_license_statement,
3526 Some("LGPL-2.1".to_string())
3527 );
3528 assert!(pkg.parties.len() >= 2);
3529 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3530 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3531 }
3532
3533 #[test]
3534 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3535 let path = PathBuf::from(
3536 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3537 );
3538 let pkg = DebianCopyrightParser::extract_first_package(&path);
3539
3540 assert_eq!(pkg.name, Some("bsdutils".to_string()));
3541 let extracted = pkg
3542 .extracted_license_statement
3543 .as_deref()
3544 .expect("license statement should exist");
3545 assert!(extracted.contains("GPL-2+"));
3546 assert!(!pkg.license_detections.is_empty());
3547
3548 let primary = &pkg.license_detections[0];
3549 assert_eq!(
3550 primary.matches[0].matched_text.as_deref(),
3551 Some("License: GPL-2+")
3552 );
3553 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3554 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3555 }
3556
3557 #[test]
3558 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3559 let path = PathBuf::from("testdata/debian/copyright/copyright");
3560 let pkg = DebianCopyrightParser::extract_first_package(&path);
3561
3562 assert_eq!(pkg.license_detections.len(), 1);
3563 assert_eq!(pkg.other_license_detections.len(), 4);
3564
3565 let primary = &pkg.license_detections[0];
3566 assert_eq!(
3567 primary.matches[0].matched_text.as_deref(),
3568 Some("License: LGPL-2.1")
3569 );
3570 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3571
3572 let ordered_lines: Vec<usize> = pkg
3573 .other_license_detections
3574 .iter()
3575 .map(|detection| detection.matches[0].start_line.get())
3576 .collect();
3577 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3578
3579 let ordered_texts: Vec<&str> = pkg
3580 .other_license_detections
3581 .iter()
3582 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3583 .collect();
3584 assert_eq!(
3585 ordered_texts,
3586 vec![
3587 "License: LGPL-2.1",
3588 "License: LGPL-2.1",
3589 "License: LGPL-2.1",
3590 "License: LGPL-2.1",
3591 ]
3592 );
3593 }
3594
3595 #[test]
3596 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3597 let path = PathBuf::from(
3598 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3599 );
3600 let pkg = DebianCopyrightParser::extract_first_package(&path);
3601
3602 let zlib = pkg
3603 .other_license_detections
3604 .iter()
3605 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3606 .expect("at least one Zlib license paragraph should be detected");
3607 assert_eq!(
3608 zlib.matches[0].matched_text.as_deref(),
3609 Some("License: Zlib")
3610 );
3611
3612 let last_zlib = pkg
3613 .other_license_detections
3614 .iter()
3615 .rev()
3616 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3617 .expect("bottom standalone Zlib license paragraph should be detected");
3618 assert_eq!(
3619 last_zlib.matches[0].start_line,
3620 LineNumber::new(732).unwrap()
3621 );
3622 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3623 }
3624
3625 #[test]
3626 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3627 let path =
3628 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3629 let pkg = DebianCopyrightParser::extract_first_package(&path);
3630
3631 assert_eq!(pkg.license_detections.len(), 1);
3632 let primary = &pkg.license_detections[0];
3633 assert_eq!(
3634 primary.matches[0].matched_text.as_deref(),
3635 Some("License: LGPL-3+ or GPL-2+")
3636 );
3637 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3638 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3639
3640 assert!(pkg.other_license_detections.iter().any(|detection| {
3641 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3642 }));
3643 }
3644
3645 #[test]
3646 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3647 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3648 let pkg = parse_copyright_file(content, Some("foo"));
3649
3650 assert_eq!(pkg.license_detections.len(), 1);
3651 let primary = &pkg.license_detections[0];
3652 assert_eq!(
3653 primary.matches[0].matched_text.as_deref(),
3654 Some("License: GPL-2+")
3655 );
3656 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3657 }
3658
3659 #[test]
3660 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3661 let raw_lines = vec![
3662 "Files: *".to_string(),
3663 "Copyright: 2024 Example Org".to_string(),
3664 "License: Apache-2.0".to_string(),
3665 " Licensed under the Apache License, Version 2.0.".to_string(),
3666 ];
3667
3668 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3669 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3670 .into_iter()
3671 .next()
3672 .expect("reference RFC822 paragraph should parse");
3673
3674 assert_eq!(paragraph.metadata.headers, expected.headers);
3675 assert_eq!(paragraph.metadata.body, expected.body);
3676 assert_eq!(
3677 paragraph.license_header_line,
3678 Some(("License: Apache-2.0".to_string(), 12))
3679 );
3680 }
3681
3682 #[test]
3683 fn test_parse_copyright_unstructured() {
3684 let content = "This package was debianized by John Doe.
3685
3686Upstream Authors:
3687 Jane Smith
3688
3689Copyright:
3690 2009 10gen
3691
3692License:
3693 SSPL
3694";
3695 let pkg = parse_copyright_file(content, Some("mongodb"));
3696 assert_eq!(pkg.name, Some("mongodb".to_string()));
3697 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3698 assert!(!pkg.parties.is_empty());
3699 }
3700
3701 #[test]
3702 fn test_parse_copyright_holders() {
3703 let text = "2012 Paul Moore <pmoore@redhat.com>
37042012 Ashley Lai <adlai@us.ibm.com>
3705Copyright (C) 2015-2018 Example Corp";
3706 let holders = parse_copyright_holders(text);
3707 assert!(holders.len() >= 3);
3708 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3709 assert!(holders.iter().any(|h| h.contains("Example Corp")));
3710 }
3711
3712 #[test]
3713 fn test_parse_copyright_empty() {
3714 let content = "This is just some text without proper copyright info.";
3715 let pkg = parse_copyright_file(content, Some("test"));
3716 assert_eq!(pkg.name, Some("test".to_string()));
3717 assert!(pkg.parties.is_empty());
3718 assert!(pkg.extracted_license_statement.is_none());
3719 }
3720
3721 #[test]
3722 fn test_merge_debian_copyright_into_package_preserves_license_fields() {
3723 let copyright = parse_copyright_file(
3724 "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
3725 Upstream-Name: demo\n\n\
3726 Files: *\n\
3727 Copyright: 2024 Example\n\
3728 License: MIT\n\n\
3729 Files: debian/*\n\
3730 Copyright: 2024 Debian Example\n\
3731 License: Apache-2.0\n",
3732 Some("demo"),
3733 );
3734 let mut target = default_package_data(DatasourceId::DebianDeb);
3735
3736 merge_debian_copyright_into_package(&mut target, ©right);
3737
3738 assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
3739 assert_eq!(
3740 target.declared_license_expression_spdx.as_deref(),
3741 Some("MIT")
3742 );
3743 assert_eq!(
3744 target.other_license_expression.as_deref(),
3745 Some("apache-2.0")
3746 );
3747 assert_eq!(
3748 target.other_license_expression_spdx.as_deref(),
3749 Some("Apache-2.0")
3750 );
3751 assert_eq!(target.license_detections.len(), 1);
3752 assert_eq!(target.other_license_detections.len(), 1);
3753 }
3754
3755 #[test]
3756 fn test_deb_parser_is_match() {
3757 assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3758 assert!(DebianDebParser::is_match(&PathBuf::from(
3759 "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3760 )));
3761 assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3762 assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3763 }
3764
3765 #[test]
3766 fn test_parse_deb_filename_with_arch() {
3767 let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3768 assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3769 assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3770 assert_eq!(pkg.namespace, Some("debian".to_string()));
3771 assert_eq!(
3772 pkg.purl,
3773 Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3774 );
3775 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3776 }
3777
3778 #[test]
3779 fn test_parse_deb_filename_without_arch() {
3780 let pkg = parse_deb_filename("package_1.0-1_all.deb");
3781 assert_eq!(pkg.name, Some("package".to_string()));
3782 assert_eq!(pkg.version, Some("1.0-1".to_string()));
3783 assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3784 }
3785
3786 #[test]
3787 fn test_extract_deb_archive() {
3788 let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3789 if !test_path.exists() {
3790 return;
3791 }
3792
3793 let pkg = DebianDebParser::extract_first_package(&test_path);
3794
3795 assert_eq!(pkg.name, Some("adduser".to_string()));
3796 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3797 assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3798 assert!(pkg.description.is_some());
3799 assert!(!pkg.parties.is_empty());
3800
3801 assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3802 assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3803 }
3804
3805 #[test]
3806 fn test_extract_deb_archive_with_control_tar_xz() {
3807 let deb = create_synthetic_deb_with_control_tar_xz();
3808
3809 let pkg = DebianDebParser::extract_first_package(deb.path());
3810
3811 assert_eq!(pkg.name, Some("synthetic".to_string()));
3812 assert_eq!(pkg.version, Some("1.2.3".to_string()));
3813 assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3814 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3815 }
3816
3817 #[test]
3818 fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3819 let deb = create_synthetic_deb_with_copyright();
3820
3821 let pkg = DebianDebParser::extract_first_package(deb.path());
3822
3823 assert_eq!(pkg.name, Some("synthetic".to_string()));
3824 assert_eq!(
3825 pkg.extracted_license_statement,
3826 Some("Apache-2.0".to_string())
3827 );
3828 assert!(pkg.parties.iter().any(|party| {
3829 party.role.as_deref() == Some("copyright-holder")
3830 && party.name.as_deref() == Some("Example Org")
3831 }));
3832 }
3833
3834 #[test]
3835 fn test_parse_deb_filename_simple() {
3836 let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3837 assert_eq!(pkg.name, Some("adduser".to_string()));
3838 assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3839 assert_eq!(pkg.namespace, Some("debian".to_string()));
3840 }
3841
3842 #[test]
3843 fn test_parse_deb_filename_invalid() {
3844 let pkg = parse_deb_filename("invalid.deb");
3845 assert!(pkg.name.is_none());
3846 assert!(pkg.version.is_none());
3847 }
3848
3849 #[test]
3850 fn test_distroless_parser() {
3851 let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3852
3853 assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3854
3855 if !test_file.exists() {
3856 eprintln!("Warning: Test file not found, skipping test");
3857 return;
3858 }
3859
3860 let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3861
3862 assert_eq!(pkg.package_type, Some(PackageType::Deb));
3863 assert_eq!(
3864 pkg.datasource_id,
3865 Some(DatasourceId::DebianDistrolessInstalledDb)
3866 );
3867 assert_eq!(pkg.name, Some("base-files".to_string()));
3868 assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3869 assert_eq!(pkg.namespace, Some("debian".to_string()));
3870 assert!(pkg.purl.is_some());
3871 assert!(
3872 pkg.purl
3873 .as_ref()
3874 .unwrap()
3875 .contains("pkg:deb/debian/base-files")
3876 );
3877 }
3878}