1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::rfc822::{self, Rfc822Metadata};
10use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
11use crate::utils::spdx::combine_license_expressions;
12
13use super::utils::{build_debian_purl, make_party};
14use super::{PACKAGE_TYPE, default_package_data, read_or_default};
15use crate::parsers::PackageParser;
16use crate::parsers::license_normalization::{
17 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
18 normalize_declared_license_key,
19};
20
21pub struct DebianCopyrightParser;
23
24impl PackageParser for DebianCopyrightParser {
25 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
26
27 fn is_match(path: &Path) -> bool {
28 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
29 if filename != "copyright" {
30 return filename.ends_with("_copyright");
31 }
32 let path_str = path.to_string_lossy();
33 path_str.contains("/debian/")
34 || path_str.contains("/ports/")
35 || path_str.starts_with("ports/")
36 || path_str.contains("/packages/deb/")
37 || path_str.contains("/usr/share/doc/")
38 || path_str.ends_with("debian/copyright")
39 } else {
40 false
41 }
42 }
43
44 fn extract_packages(path: &Path) -> Vec<PackageData> {
45 let datasource_id = detect_debian_copyright_datasource(path);
46 let content = read_or_default!(path, "copyright file", datasource_id);
47
48 let package_name = extract_package_name_from_path(path)
49 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
50 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
51 package_data.datasource_id = Some(datasource_id);
52 vec![package_data]
53 }
54}
55
56crate::register_parser!(
57 "Debian machine-readable copyright file",
58 &[
59 "**/debian/copyright",
60 "**/ports/*/copyright",
61 "**/packages/deb/copyright",
62 "**/usr/share/doc/*/copyright",
63 "**/*_copyright"
64 ],
65 "deb",
66 "",
67 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
68);
69
70fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
71 let path_str = path.to_string_lossy();
72 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
73 DatasourceId::DebianCopyrightInSource
74 } else if path_str.contains("/usr/share/doc/") {
75 DatasourceId::DebianCopyrightInPackage
76 } else {
77 DatasourceId::DebianCopyrightStandalone
78 }
79}
80
81fn extract_package_name_from_path(path: &Path) -> Option<String> {
82 let components: Vec<_> = path.components().collect();
83
84 for (i, component) in components.iter().enumerate() {
85 if let std::path::Component::Normal(os_str) = component
86 && os_str.to_str() == Some("doc")
87 && i + 1 < components.len()
88 && let std::path::Component::Normal(next) = components[i + 1]
89 {
90 return next.to_str().map(|s| s.to_string());
91 }
92 }
93 None
94}
95
96fn extract_standalone_package_name_from_path(
97 path: &Path,
98 datasource_id: DatasourceId,
99) -> Option<String> {
100 if datasource_id != DatasourceId::DebianCopyrightStandalone {
101 return None;
102 }
103
104 path.file_name()
105 .and_then(|name| name.to_str())
106 .filter(|name| *name == "copyright")?;
107
108 path.parent()
109 .and_then(|parent| parent.file_name())
110 .and_then(|name| name.to_str())
111 .map(str::to_string)
112}
113
114pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
115 let paragraphs = parse_copyright_paragraphs_with_lines(content);
116
117 let is_dep5 = paragraphs
118 .first()
119 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
120 .is_some();
121
122 let namespace = Some("debian".to_string());
123 let mut parties = Vec::new();
124 let mut license_statements = Vec::new();
125 let mut primary_license_detection = None;
126 let mut header_license_detection = None;
127 let mut other_license_detections = Vec::new();
128
129 if is_dep5 {
130 let mut para_count = 0usize;
131 for para in ¶graphs {
132 para_count += 1;
133 if para_count > MAX_ITERATION_COUNT {
134 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
135 break;
136 }
137 if let Some(copyright_text) =
138 rfc822::get_header_first(¶.metadata.headers, "copyright")
139 {
140 for holder in parse_copyright_holders(©right_text) {
141 if !holder.is_empty() {
142 parties.push(make_party(None, "copyright-holder", Some(holder), None));
143 }
144 }
145 }
146
147 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
148 let license_name = license.lines().next().unwrap_or(&license).trim();
149 if !license_name.is_empty()
150 && !license_statements.contains(&license_name.to_string())
151 {
152 license_statements.push(license_name.to_string());
153 }
154
155 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
156 let detection =
157 build_primary_license_detection(license_name, matched_text, line_no);
158 let is_header_paragraph =
159 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
160 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
161 == Some("*")
162 {
163 primary_license_detection = Some(detection);
164 } else if is_header_paragraph {
165 header_license_detection.get_or_insert(detection);
166 } else {
167 other_license_detections.push(detection);
168 }
169 }
170 }
171 }
172
173 if primary_license_detection.is_none() && header_license_detection.is_some() {
174 primary_license_detection = header_license_detection;
175 }
176 } else {
177 let copyright_block = extract_unstructured_field(content, "Copyright:");
178 if let Some(text) = copyright_block {
179 for holder in parse_copyright_holders(&text) {
180 if !holder.is_empty() {
181 parties.push(make_party(None, "copyright-holder", Some(holder), None));
182 }
183 }
184 }
185
186 let license_block = extract_unstructured_field(content, "License:");
187 if let Some(text) = license_block {
188 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
189 }
190 }
191
192 let extracted_license_statement = if license_statements.is_empty() {
193 None
194 } else {
195 Some(truncate_field(license_statements.join(" AND ")))
196 };
197
198 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
199 let declared_license_expression = license_detections
200 .first()
201 .map(|detection| detection.license_expression.clone());
202 let declared_license_expression_spdx = license_detections
203 .first()
204 .map(|detection| detection.license_expression_spdx.clone());
205 let other_license_expression = combine_license_expressions(
206 other_license_detections
207 .iter()
208 .map(|detection| detection.license_expression.clone()),
209 );
210 let other_license_expression_spdx = combine_license_expressions(
211 other_license_detections
212 .iter()
213 .map(|detection| detection.license_expression_spdx.clone()),
214 );
215
216 PackageData {
217 datasource_id: Some(DatasourceId::DebianCopyright),
218 package_type: Some(PACKAGE_TYPE),
219 namespace: namespace.clone(),
220 name: package_name.map(|s| truncate_field(s.to_string())),
221 parties,
222 declared_license_expression,
223 declared_license_expression_spdx,
224 license_detections,
225 other_license_expression,
226 other_license_expression_spdx,
227 other_license_detections,
228 extracted_license_statement,
229 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
230 ..Default::default()
231 }
232}
233
234#[derive(Debug)]
235struct CopyrightParagraph {
236 metadata: Rfc822Metadata,
237 license_header_line: Option<(String, usize)>,
238}
239
240fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
241 let mut paragraphs = Vec::new();
242 let mut current_lines = Vec::new();
243 let mut current_start_line = 1usize;
244 let mut count = 0usize;
245
246 for (idx, line) in content.lines().enumerate() {
247 count += 1;
248 if count > MAX_ITERATION_COUNT {
249 warn!(
250 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
251 );
252 break;
253 }
254 let line_no = idx + 1;
255 if line.is_empty() {
256 if !current_lines.is_empty() {
257 paragraphs.push(finalize_copyright_paragraph(
258 std::mem::take(&mut current_lines),
259 current_start_line,
260 ));
261 }
262 current_start_line = line_no + 1;
263 } else {
264 if current_lines.is_empty() {
265 current_start_line = line_no;
266 }
267 current_lines.push(line.to_string());
268 }
269 }
270
271 if !current_lines.is_empty() {
272 paragraphs.push(finalize_copyright_paragraph(
273 current_lines,
274 current_start_line,
275 ));
276 }
277
278 paragraphs
279}
280
281fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
282 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
283 let mut current_name: Option<String> = None;
284 let mut current_value = String::new();
285 let mut license_header_line = None;
286
287 for (idx, line) in raw_lines.iter().enumerate() {
288 if line.starts_with(' ') || line.starts_with('\t') {
289 if current_name.is_some() {
290 current_value.push('\n');
291 current_value.push_str(line);
292 }
293 continue;
294 }
295
296 if let Some(name) = current_name.take() {
297 add_copyright_header_value(&mut headers, &name, ¤t_value);
298 current_value.clear();
299 }
300
301 if let Some((name, value)) = line.split_once(':') {
302 let normalized_name = name.trim().to_ascii_lowercase();
303 if normalized_name == "license" && license_header_line.is_none() {
304 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
305 }
306 current_name = Some(normalized_name);
307 current_value = value.trim_start().to_string();
308 }
309 }
310
311 if let Some(name) = current_name.take() {
312 add_copyright_header_value(&mut headers, &name, ¤t_value);
313 }
314
315 CopyrightParagraph {
316 metadata: Rfc822Metadata {
317 headers,
318 body: String::new(),
319 },
320 license_header_line,
321 }
322}
323
324fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
325 let entry = headers.entry(name.to_string()).or_default();
326 let trimmed = value.trim_end();
327 if !trimmed.is_empty() {
328 entry.push(trimmed.to_string());
329 }
330}
331
332fn build_primary_license_detection(
333 license_name: &str,
334 matched_text: String,
335 line_no: usize,
336) -> LicenseDetection {
337 let normalized = normalize_debian_license_name(license_name);
338 let line = match LineNumber::new(line_no) {
339 Some(l) => l,
340 None => {
341 warn!(
342 "build_primary_license_detection: line number {} out of range, clamping to 1",
343 line_no
344 );
345 LineNumber::new(1).expect("1 is a valid line number")
346 }
347 };
348
349 build_declared_license_detection(
350 &normalized,
351 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
352 )
353}
354
355fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
356 match license_name.trim() {
357 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
358 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
359 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
360 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
361 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
362 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
363 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
364 "public-domain" => {
365 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-scancode-public-domain")
366 }
367 other => normalize_declared_license_key(other)
368 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
369 }
370}
371
372fn parse_copyright_holders(text: &str) -> Vec<String> {
373 let mut holders = Vec::new();
374 let mut count = 0usize;
375
376 for line in text.lines() {
377 count += 1;
378 if count > MAX_ITERATION_COUNT {
379 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
380 break;
381 }
382 let line = line.trim();
383 if line.is_empty() {
384 continue;
385 }
386
387 let cleaned = line
388 .trim_start_matches("Copyright")
389 .trim_start_matches("copyright")
390 .trim_start_matches("(C)")
391 .trim_start_matches("(c)")
392 .trim_start_matches("©")
393 .trim();
394
395 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
396 let without_years = &cleaned[year_end..];
397 let holder = without_years
398 .trim_start_matches(',')
399 .trim_start_matches('-')
400 .trim();
401
402 if !holder.is_empty() && holder.len() > 2 {
403 holders.push(holder.to_string());
404 }
405 }
406 }
407
408 holders
409}
410
411fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
412 let mut in_field = false;
413 let mut field_content = String::new();
414 let mut count = 0usize;
415
416 for line in content.lines() {
417 count += 1;
418 if count > MAX_ITERATION_COUNT {
419 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
420 break;
421 }
422 if line.starts_with(field_name) {
423 in_field = true;
424 field_content.push_str(line.trim_start_matches(field_name).trim());
425 field_content.push('\n');
426 } else if in_field {
427 if line.starts_with(char::is_whitespace) {
428 field_content.push_str(line.trim());
429 field_content.push('\n');
430 } else if !line.trim().is_empty() {
431 break;
432 }
433 }
434 }
435
436 let trimmed = field_content.trim();
437 if trimmed.is_empty() {
438 None
439 } else {
440 Some(truncate_field(trimmed.to_string()))
441 }
442}
443
444#[cfg(test)]
445mod tests {
446 use super::super::deb::merge_debian_copyright_into_package;
447 use super::super::default_package_data;
448 use super::*;
449 use crate::models::DatasourceId;
450 use crate::models::LineNumber;
451 use std::path::PathBuf;
452
453 #[test]
454 fn test_copyright_parser_is_match() {
455 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
456 "/usr/share/doc/bash/copyright"
457 )));
458 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
459 "debian/copyright"
460 )));
461 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
462 "src/third_party/gperftools/dist/packages/deb/copyright"
463 )));
464 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
465 "ports/zlib/copyright"
466 )));
467 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
468 "copyright.txt"
469 )));
470 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
471 "/etc/copyright"
472 )));
473 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
474 "/tmp/sample_copyright"
475 )));
476 }
477
478 #[test]
479 fn test_detect_debian_copyright_datasource() {
480 assert_eq!(
481 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
482 DatasourceId::DebianCopyrightInSource
483 );
484 assert_eq!(
485 detect_debian_copyright_datasource(&PathBuf::from(
486 "src/third_party/gperftools/dist/packages/deb/copyright"
487 )),
488 DatasourceId::DebianCopyrightStandalone
489 );
490 assert_eq!(
491 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
492 DatasourceId::DebianCopyrightStandalone
493 );
494 assert_eq!(
495 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
496 DatasourceId::DebianCopyrightInPackage
497 );
498 assert_eq!(
499 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
500 DatasourceId::DebianCopyrightStandalone
501 );
502 }
503
504 #[test]
505 fn test_extract_package_name_from_path() {
506 assert_eq!(
507 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
508 Some("bash".to_string())
509 );
510 assert_eq!(
511 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
512 Some("libseccomp2".to_string())
513 );
514 assert_eq!(
515 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
516 None
517 );
518 assert_eq!(
519 extract_standalone_package_name_from_path(
520 &PathBuf::from("ports/zlib/copyright"),
521 DatasourceId::DebianCopyrightStandalone,
522 ),
523 Some("zlib".to_string())
524 );
525 }
526
527 #[test]
528 fn test_parse_copyright_dep5_format() {
529 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
530Upstream-Name: libseccomp
531Source: https://sourceforge.net/projects/libseccomp/
532
533Files: *
534Copyright: 2012 Paul Moore <pmoore@redhat.com>
535 2012 Ashley Lai <adlai@us.ibm.com>
536License: LGPL-2.1
537
538License: LGPL-2.1
539 This library is free software
540";
541 let pkg = parse_copyright_file(content, Some("libseccomp"));
542 assert_eq!(pkg.name, Some("libseccomp".to_string()));
543 assert_eq!(pkg.namespace, Some("debian".to_string()));
544 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
545 assert_eq!(
546 pkg.extracted_license_statement,
547 Some("LGPL-2.1".to_string())
548 );
549 assert!(pkg.parties.len() >= 2);
550 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
551 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
552 }
553
554 #[test]
555 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
556 let path = PathBuf::from(
557 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
558 );
559 let pkg = DebianCopyrightParser::extract_first_package(&path);
560
561 assert_eq!(pkg.name, Some("bsdutils".to_string()));
562 let extracted = pkg
563 .extracted_license_statement
564 .as_deref()
565 .expect("license statement should exist");
566 assert!(extracted.contains("GPL-2+"));
567 assert!(!pkg.license_detections.is_empty());
568
569 let primary = &pkg.license_detections[0];
570 assert_eq!(
571 primary.matches[0].matched_text.as_deref(),
572 Some("License: GPL-2+")
573 );
574 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
575 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
576 }
577
578 #[test]
579 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
580 let path = PathBuf::from("testdata/debian/copyright/copyright");
581 let pkg = DebianCopyrightParser::extract_first_package(&path);
582
583 assert_eq!(pkg.license_detections.len(), 1);
584 assert_eq!(pkg.other_license_detections.len(), 4);
585
586 let primary = &pkg.license_detections[0];
587 assert_eq!(
588 primary.matches[0].matched_text.as_deref(),
589 Some("License: LGPL-2.1")
590 );
591 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
592
593 let ordered_lines: Vec<usize> = pkg
594 .other_license_detections
595 .iter()
596 .map(|detection| detection.matches[0].start_line.get())
597 .collect();
598 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
599
600 let ordered_texts: Vec<&str> = pkg
601 .other_license_detections
602 .iter()
603 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
604 .collect();
605 assert_eq!(
606 ordered_texts,
607 vec![
608 "License: LGPL-2.1",
609 "License: LGPL-2.1",
610 "License: LGPL-2.1",
611 "License: LGPL-2.1",
612 ]
613 );
614 }
615
616 #[test]
617 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
618 let path = PathBuf::from(
619 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
620 );
621 let pkg = DebianCopyrightParser::extract_first_package(&path);
622
623 let zlib = pkg
624 .other_license_detections
625 .iter()
626 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
627 .expect("at least one Zlib license paragraph should be detected");
628 assert_eq!(
629 zlib.matches[0].matched_text.as_deref(),
630 Some("License: Zlib")
631 );
632
633 let last_zlib = pkg
634 .other_license_detections
635 .iter()
636 .rev()
637 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
638 .expect("bottom standalone Zlib license paragraph should be detected");
639 assert_eq!(
640 last_zlib.matches[0].start_line,
641 LineNumber::new(732).unwrap()
642 );
643 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
644 }
645
646 #[test]
647 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
648 let path =
649 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
650 let pkg = DebianCopyrightParser::extract_first_package(&path);
651
652 assert_eq!(pkg.license_detections.len(), 1);
653 let primary = &pkg.license_detections[0];
654 assert_eq!(
655 primary.matches[0].matched_text.as_deref(),
656 Some("License: LGPL-3+ or GPL-2+")
657 );
658 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
659 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
660
661 assert!(pkg.other_license_detections.iter().any(|detection| {
662 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
663 }));
664 }
665
666 #[test]
667 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
668 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
669 let pkg = parse_copyright_file(content, Some("foo"));
670
671 assert_eq!(pkg.license_detections.len(), 1);
672 let primary = &pkg.license_detections[0];
673 assert_eq!(
674 primary.matches[0].matched_text.as_deref(),
675 Some("License: GPL-2+")
676 );
677 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
678 }
679
680 #[test]
681 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
682 let raw_lines = vec![
683 "Files: *".to_string(),
684 "Copyright: 2024 Example Org".to_string(),
685 "License: Apache-2.0".to_string(),
686 " Licensed under the Apache License, Version 2.0.".to_string(),
687 ];
688
689 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
690 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
691 .into_iter()
692 .next()
693 .expect("reference RFC822 paragraph should parse");
694
695 assert_eq!(paragraph.metadata.headers, expected.headers);
696 assert_eq!(paragraph.metadata.body, expected.body);
697 assert_eq!(
698 paragraph.license_header_line,
699 Some(("License: Apache-2.0".to_string(), 12))
700 );
701 }
702
703 #[test]
704 fn test_parse_copyright_unstructured() {
705 let content = "This package was debianized by John Doe.
706
707Upstream Authors:
708 Jane Smith
709
710Copyright:
711 2009 10gen
712
713License:
714 SSPL
715";
716 let pkg = parse_copyright_file(content, Some("mongodb"));
717 assert_eq!(pkg.name, Some("mongodb".to_string()));
718 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
719 assert!(!pkg.parties.is_empty());
720 }
721
722 #[test]
723 fn test_parse_copyright_holders() {
724 let text = "2012 Paul Moore <pmoore@redhat.com>
7252012 Ashley Lai <adlai@us.ibm.com>
726Copyright (C) 2015-2018 Example Corp";
727 let holders = parse_copyright_holders(text);
728 assert!(holders.len() >= 3);
729 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
730 assert!(holders.iter().any(|h| h.contains("Example Corp")));
731 }
732
733 #[test]
734 fn test_parse_copyright_empty() {
735 let content = "This is just some text without proper copyright info.";
736 let pkg = parse_copyright_file(content, Some("test"));
737 assert_eq!(pkg.name, Some("test".to_string()));
738 assert!(pkg.parties.is_empty());
739 assert!(pkg.extracted_license_statement.is_none());
740 }
741
742 #[test]
743 fn test_merge_debian_copyright_into_package_preserves_license_fields() {
744 let copyright = parse_copyright_file(
745 "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
746 Upstream-Name: demo\n\n\
747 Files: *\n\
748 Copyright: 2024 Example\n\
749 License: MIT\n\n\
750 Files: debian/*\n\
751 Copyright: 2024 Debian Example\n\
752 License: Apache-2.0\n",
753 Some("demo"),
754 );
755 let mut target = default_package_data(DatasourceId::DebianDeb);
756
757 merge_debian_copyright_into_package(&mut target, ©right);
758
759 assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
760 assert_eq!(
761 target.declared_license_expression_spdx.as_deref(),
762 Some("MIT")
763 );
764 assert_eq!(
765 target.other_license_expression.as_deref(),
766 Some("apache-2.0")
767 );
768 assert_eq!(
769 target.other_license_expression_spdx.as_deref(),
770 Some("Apache-2.0")
771 );
772 assert_eq!(target.license_detections.len(), 1);
773 assert_eq!(target.other_license_detections.len(), 1);
774 }
775
776 #[test]
777 fn test_normalize_debian_public_domain_uses_scancode_license_ref() {
778 let normalized = normalize_debian_license_name("public-domain");
779
780 assert_eq!(normalized.declared_license_expression, "public-domain");
781 assert_eq!(
782 normalized.declared_license_expression_spdx,
783 "LicenseRef-scancode-public-domain"
784 );
785 }
786}