1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::rfc822::{self, Rfc822Metadata};
10use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
11use crate::utils::spdx::combine_license_expressions;
12
13use super::super::metadata::ParserMetadata;
14use super::utils::{build_debian_purl, make_party};
15use super::{PACKAGE_TYPE, default_package_data, read_or_default};
16use crate::parsers::PackageParser;
17use crate::parsers::license_normalization::{
18 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
19 normalize_declared_license_key,
20};
21
22pub struct DebianCopyrightParser;
24
25impl PackageParser for DebianCopyrightParser {
26 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
27
28 fn metadata() -> Vec<ParserMetadata> {
29 vec![ParserMetadata {
30 description: "Debian machine-readable copyright file",
31 file_patterns: &[
32 "**/debian/copyright",
33 "**/ports/*/copyright",
34 "**/packages/deb/copyright",
35 "**/usr/share/doc/*/copyright",
36 "**/*_copyright",
37 ],
38 package_type: "deb",
39 primary_language: "",
40 documentation_url: Some(
41 "https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/",
42 ),
43 }]
44 }
45
46 fn is_match(path: &Path) -> bool {
47 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
48 if filename != "copyright" {
49 return filename.ends_with("_copyright");
50 }
51 let path_str = path.to_string_lossy();
52 path_str.contains("/debian/")
53 || path_str.contains("/ports/")
54 || path_str.starts_with("ports/")
55 || path_str.contains("/packages/deb/")
56 || path_str.contains("/usr/share/doc/")
57 || path_str.ends_with("debian/copyright")
58 } else {
59 false
60 }
61 }
62
63 fn extract_packages(path: &Path) -> Vec<PackageData> {
64 let datasource_id = detect_debian_copyright_datasource(path);
65 let content = read_or_default!(path, "copyright file", datasource_id);
66
67 let package_name = extract_package_name_from_path(path)
68 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
69 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
70 package_data.datasource_id = Some(datasource_id);
71 vec![package_data]
72 }
73}
74
75fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
76 let path_str = path.to_string_lossy();
77 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
78 DatasourceId::DebianCopyrightInSource
79 } else if path_str.contains("/usr/share/doc/") {
80 DatasourceId::DebianCopyrightInPackage
81 } else {
82 DatasourceId::DebianCopyrightStandalone
83 }
84}
85
86fn extract_package_name_from_path(path: &Path) -> Option<String> {
87 let components: Vec<_> = path.components().collect();
88
89 for (i, component) in components.iter().enumerate() {
90 if let std::path::Component::Normal(os_str) = component
91 && os_str.to_str() == Some("doc")
92 && i + 1 < components.len()
93 && let std::path::Component::Normal(next) = components[i + 1]
94 {
95 return next.to_str().map(|s| s.to_string());
96 }
97 }
98 None
99}
100
101fn extract_standalone_package_name_from_path(
102 path: &Path,
103 datasource_id: DatasourceId,
104) -> Option<String> {
105 if datasource_id != DatasourceId::DebianCopyrightStandalone {
106 return None;
107 }
108
109 path.file_name()
110 .and_then(|name| name.to_str())
111 .filter(|name| *name == "copyright")?;
112
113 path.parent()
114 .and_then(|parent| parent.file_name())
115 .and_then(|name| name.to_str())
116 .map(str::to_string)
117}
118
119pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
120 let paragraphs = parse_copyright_paragraphs_with_lines(content);
121
122 let is_dep5 = paragraphs
123 .first()
124 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
125 .is_some();
126
127 let namespace = Some("debian".to_string());
128 let mut parties = Vec::new();
129 let mut license_statements = Vec::new();
130 let mut primary_license_detection = None;
131 let mut header_license_detection = None;
132 let mut other_license_detections = Vec::new();
133
134 if is_dep5 {
135 let mut para_count = 0usize;
136 for para in ¶graphs {
137 para_count += 1;
138 if para_count > MAX_ITERATION_COUNT {
139 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
140 break;
141 }
142 if let Some(copyright_text) =
143 rfc822::get_header_first(¶.metadata.headers, "copyright")
144 {
145 for holder in parse_copyright_holders(©right_text) {
146 if !holder.is_empty() {
147 parties.push(make_party(None, "copyright-holder", Some(holder), None));
148 }
149 }
150 }
151
152 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
153 let license_name = license.lines().next().unwrap_or(&license).trim();
154 if !license_name.is_empty()
155 && !license_statements.contains(&license_name.to_string())
156 {
157 license_statements.push(license_name.to_string());
158 }
159
160 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
161 let detection =
162 build_primary_license_detection(license_name, matched_text, line_no);
163 let is_header_paragraph =
164 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
165 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
166 == Some("*")
167 {
168 primary_license_detection = Some(detection);
169 } else if is_header_paragraph {
170 header_license_detection.get_or_insert(detection);
171 } else {
172 other_license_detections.push(detection);
173 }
174 }
175 }
176 }
177
178 if primary_license_detection.is_none() && header_license_detection.is_some() {
179 primary_license_detection = header_license_detection;
180 }
181 } else {
182 let copyright_block = extract_unstructured_field(content, "Copyright:");
183 if let Some(text) = copyright_block {
184 for holder in parse_copyright_holders(&text) {
185 if !holder.is_empty() {
186 parties.push(make_party(None, "copyright-holder", Some(holder), None));
187 }
188 }
189 }
190
191 let license_block = extract_unstructured_field(content, "License:");
192 if let Some(text) = license_block {
193 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
194 }
195 }
196
197 let extracted_license_statement = if license_statements.is_empty() {
198 None
199 } else {
200 Some(truncate_field(license_statements.join(" AND ")))
201 };
202
203 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
204 let declared_license_expression = license_detections
205 .first()
206 .map(|detection| detection.license_expression.clone());
207 let declared_license_expression_spdx = license_detections
208 .first()
209 .map(|detection| detection.license_expression_spdx.clone());
210 let other_license_expression = combine_license_expressions(
211 other_license_detections
212 .iter()
213 .map(|detection| detection.license_expression.clone()),
214 );
215 let other_license_expression_spdx = combine_license_expressions(
216 other_license_detections
217 .iter()
218 .map(|detection| detection.license_expression_spdx.clone()),
219 );
220
221 PackageData {
222 datasource_id: Some(DatasourceId::DebianCopyright),
223 package_type: Some(PACKAGE_TYPE),
224 namespace: namespace.clone(),
225 name: package_name.map(|s| truncate_field(s.to_string())),
226 parties,
227 declared_license_expression,
228 declared_license_expression_spdx,
229 license_detections,
230 other_license_expression,
231 other_license_expression_spdx,
232 other_license_detections,
233 extracted_license_statement,
234 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
235 ..Default::default()
236 }
237}
238
239#[derive(Debug)]
240struct CopyrightParagraph {
241 metadata: Rfc822Metadata,
242 license_header_line: Option<(String, usize)>,
243}
244
245fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
246 let mut paragraphs = Vec::new();
247 let mut current_lines = Vec::new();
248 let mut current_start_line = 1usize;
249 let mut count = 0usize;
250
251 for (idx, line) in content.lines().enumerate() {
252 count += 1;
253 if count > MAX_ITERATION_COUNT {
254 warn!(
255 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
256 );
257 break;
258 }
259 let line_no = idx + 1;
260 if line.is_empty() {
261 if !current_lines.is_empty() {
262 paragraphs.push(finalize_copyright_paragraph(
263 std::mem::take(&mut current_lines),
264 current_start_line,
265 ));
266 }
267 current_start_line = line_no + 1;
268 } else {
269 if current_lines.is_empty() {
270 current_start_line = line_no;
271 }
272 current_lines.push(line.to_string());
273 }
274 }
275
276 if !current_lines.is_empty() {
277 paragraphs.push(finalize_copyright_paragraph(
278 current_lines,
279 current_start_line,
280 ));
281 }
282
283 paragraphs
284}
285
286fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
287 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
288 let mut current_name: Option<String> = None;
289 let mut current_value = String::new();
290 let mut license_header_line = None;
291
292 for (idx, line) in raw_lines.iter().enumerate() {
293 if line.starts_with(' ') || line.starts_with('\t') {
294 if current_name.is_some() {
295 current_value.push('\n');
296 current_value.push_str(line);
297 }
298 continue;
299 }
300
301 if let Some(name) = current_name.take() {
302 add_copyright_header_value(&mut headers, &name, ¤t_value);
303 current_value.clear();
304 }
305
306 if let Some((name, value)) = line.split_once(':') {
307 let normalized_name = name.trim().to_ascii_lowercase();
308 if normalized_name == "license" && license_header_line.is_none() {
309 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
310 }
311 current_name = Some(normalized_name);
312 current_value = value.trim_start().to_string();
313 }
314 }
315
316 if let Some(name) = current_name.take() {
317 add_copyright_header_value(&mut headers, &name, ¤t_value);
318 }
319
320 CopyrightParagraph {
321 metadata: Rfc822Metadata {
322 headers,
323 body: String::new(),
324 },
325 license_header_line,
326 }
327}
328
329fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
330 let entry = headers.entry(name.to_string()).or_default();
331 let trimmed = value.trim_end();
332 if !trimmed.is_empty() {
333 entry.push(trimmed.to_string());
334 }
335}
336
337fn build_primary_license_detection(
338 license_name: &str,
339 matched_text: String,
340 line_no: usize,
341) -> LicenseDetection {
342 let normalized = normalize_debian_license_name(license_name);
343 let line = match LineNumber::new(line_no) {
344 Some(l) => l,
345 None => {
346 warn!(
347 "build_primary_license_detection: line number {} out of range, clamping to 1",
348 line_no
349 );
350 LineNumber::new(1).expect("1 is a valid line number")
351 }
352 };
353
354 build_declared_license_detection(
355 &normalized,
356 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
357 )
358}
359
360fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
361 match license_name.trim() {
362 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
363 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
364 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
365 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
366 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
367 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
368 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
369 "public-domain" => {
370 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-scancode-public-domain")
371 }
372 other => normalize_declared_license_key(other)
373 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
374 }
375}
376
377fn parse_copyright_holders(text: &str) -> Vec<String> {
378 let mut holders = Vec::new();
379 let mut count = 0usize;
380
381 for line in text.lines() {
382 count += 1;
383 if count > MAX_ITERATION_COUNT {
384 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
385 break;
386 }
387 let line = line.trim();
388 if line.is_empty() {
389 continue;
390 }
391
392 let cleaned = line
393 .trim_start_matches("Copyright")
394 .trim_start_matches("copyright")
395 .trim_start_matches("(C)")
396 .trim_start_matches("(c)")
397 .trim_start_matches("©")
398 .trim();
399
400 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
401 let without_years = &cleaned[year_end..];
402 let holder = without_years
403 .trim_start_matches(',')
404 .trim_start_matches('-')
405 .trim();
406
407 if !holder.is_empty() && holder.len() > 2 {
408 holders.push(holder.to_string());
409 }
410 }
411 }
412
413 holders
414}
415
416fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
417 let mut in_field = false;
418 let mut field_content = String::new();
419 let mut count = 0usize;
420
421 for line in content.lines() {
422 count += 1;
423 if count > MAX_ITERATION_COUNT {
424 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
425 break;
426 }
427 if line.starts_with(field_name) {
428 in_field = true;
429 field_content.push_str(line.trim_start_matches(field_name).trim());
430 field_content.push('\n');
431 } else if in_field {
432 if line.starts_with(char::is_whitespace) {
433 field_content.push_str(line.trim());
434 field_content.push('\n');
435 } else if !line.trim().is_empty() {
436 break;
437 }
438 }
439 }
440
441 let trimmed = field_content.trim();
442 if trimmed.is_empty() {
443 None
444 } else {
445 Some(truncate_field(trimmed.to_string()))
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::super::deb::merge_debian_copyright_into_package;
452 use super::super::default_package_data;
453 use super::*;
454 use crate::models::DatasourceId;
455 use crate::models::LineNumber;
456 use std::path::PathBuf;
457
458 #[test]
459 fn test_copyright_parser_is_match() {
460 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
461 "/usr/share/doc/bash/copyright"
462 )));
463 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
464 "debian/copyright"
465 )));
466 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
467 "src/third_party/gperftools/dist/packages/deb/copyright"
468 )));
469 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
470 "ports/zlib/copyright"
471 )));
472 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
473 "copyright.txt"
474 )));
475 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
476 "/etc/copyright"
477 )));
478 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
479 "/tmp/sample_copyright"
480 )));
481 }
482
483 #[test]
484 fn test_detect_debian_copyright_datasource() {
485 assert_eq!(
486 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
487 DatasourceId::DebianCopyrightInSource
488 );
489 assert_eq!(
490 detect_debian_copyright_datasource(&PathBuf::from(
491 "src/third_party/gperftools/dist/packages/deb/copyright"
492 )),
493 DatasourceId::DebianCopyrightStandalone
494 );
495 assert_eq!(
496 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
497 DatasourceId::DebianCopyrightStandalone
498 );
499 assert_eq!(
500 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
501 DatasourceId::DebianCopyrightInPackage
502 );
503 assert_eq!(
504 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
505 DatasourceId::DebianCopyrightStandalone
506 );
507 }
508
509 #[test]
510 fn test_extract_package_name_from_path() {
511 assert_eq!(
512 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
513 Some("bash".to_string())
514 );
515 assert_eq!(
516 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
517 Some("libseccomp2".to_string())
518 );
519 assert_eq!(
520 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
521 None
522 );
523 assert_eq!(
524 extract_standalone_package_name_from_path(
525 &PathBuf::from("ports/zlib/copyright"),
526 DatasourceId::DebianCopyrightStandalone,
527 ),
528 Some("zlib".to_string())
529 );
530 }
531
532 #[test]
533 fn test_parse_copyright_dep5_format() {
534 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
535Upstream-Name: libseccomp
536Source: https://sourceforge.net/projects/libseccomp/
537
538Files: *
539Copyright: 2012 Paul Moore <pmoore@redhat.com>
540 2012 Ashley Lai <adlai@us.ibm.com>
541License: LGPL-2.1
542
543License: LGPL-2.1
544 This library is free software
545";
546 let pkg = parse_copyright_file(content, Some("libseccomp"));
547 assert_eq!(pkg.name, Some("libseccomp".to_string()));
548 assert_eq!(pkg.namespace, Some("debian".to_string()));
549 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
550 assert_eq!(
551 pkg.extracted_license_statement,
552 Some("LGPL-2.1".to_string())
553 );
554 assert!(pkg.parties.len() >= 2);
555 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
556 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
557 }
558
559 #[test]
560 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
561 let path = PathBuf::from(
562 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
563 );
564 let pkg = DebianCopyrightParser::extract_first_package(&path);
565
566 assert_eq!(pkg.name, Some("bsdutils".to_string()));
567 let extracted = pkg
568 .extracted_license_statement
569 .as_deref()
570 .expect("license statement should exist");
571 assert!(extracted.contains("GPL-2+"));
572 assert!(!pkg.license_detections.is_empty());
573
574 let primary = &pkg.license_detections[0];
575 assert_eq!(
576 primary.matches[0].matched_text.as_deref(),
577 Some("License: GPL-2+")
578 );
579 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
580 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
581 }
582
583 #[test]
584 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
585 let path = PathBuf::from("testdata/debian/copyright/copyright");
586 let pkg = DebianCopyrightParser::extract_first_package(&path);
587
588 assert_eq!(pkg.license_detections.len(), 1);
589 assert_eq!(pkg.other_license_detections.len(), 4);
590
591 let primary = &pkg.license_detections[0];
592 assert_eq!(
593 primary.matches[0].matched_text.as_deref(),
594 Some("License: LGPL-2.1")
595 );
596 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
597
598 let ordered_lines: Vec<usize> = pkg
599 .other_license_detections
600 .iter()
601 .map(|detection| detection.matches[0].start_line.get())
602 .collect();
603 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
604
605 let ordered_texts: Vec<&str> = pkg
606 .other_license_detections
607 .iter()
608 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
609 .collect();
610 assert_eq!(
611 ordered_texts,
612 vec![
613 "License: LGPL-2.1",
614 "License: LGPL-2.1",
615 "License: LGPL-2.1",
616 "License: LGPL-2.1",
617 ]
618 );
619 }
620
621 #[test]
622 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
623 let path = PathBuf::from(
624 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
625 );
626 let pkg = DebianCopyrightParser::extract_first_package(&path);
627
628 let zlib = pkg
629 .other_license_detections
630 .iter()
631 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
632 .expect("at least one Zlib license paragraph should be detected");
633 assert_eq!(
634 zlib.matches[0].matched_text.as_deref(),
635 Some("License: Zlib")
636 );
637
638 let last_zlib = pkg
639 .other_license_detections
640 .iter()
641 .rev()
642 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
643 .expect("bottom standalone Zlib license paragraph should be detected");
644 assert_eq!(
645 last_zlib.matches[0].start_line,
646 LineNumber::new(732).unwrap()
647 );
648 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
649 }
650
651 #[test]
652 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
653 let path =
654 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
655 let pkg = DebianCopyrightParser::extract_first_package(&path);
656
657 assert_eq!(pkg.license_detections.len(), 1);
658 let primary = &pkg.license_detections[0];
659 assert_eq!(
660 primary.matches[0].matched_text.as_deref(),
661 Some("License: LGPL-3+ or GPL-2+")
662 );
663 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
664 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
665
666 assert!(pkg.other_license_detections.iter().any(|detection| {
667 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
668 }));
669 }
670
671 #[test]
672 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
673 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
674 let pkg = parse_copyright_file(content, Some("foo"));
675
676 assert_eq!(pkg.license_detections.len(), 1);
677 let primary = &pkg.license_detections[0];
678 assert_eq!(
679 primary.matches[0].matched_text.as_deref(),
680 Some("License: GPL-2+")
681 );
682 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
683 }
684
685 #[test]
686 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
687 let raw_lines = vec![
688 "Files: *".to_string(),
689 "Copyright: 2024 Example Org".to_string(),
690 "License: Apache-2.0".to_string(),
691 " Licensed under the Apache License, Version 2.0.".to_string(),
692 ];
693
694 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
695 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
696 .into_iter()
697 .next()
698 .expect("reference RFC822 paragraph should parse");
699
700 assert_eq!(paragraph.metadata.headers, expected.headers);
701 assert_eq!(paragraph.metadata.body, expected.body);
702 assert_eq!(
703 paragraph.license_header_line,
704 Some(("License: Apache-2.0".to_string(), 12))
705 );
706 }
707
708 #[test]
709 fn test_parse_copyright_unstructured() {
710 let content = "This package was debianized by John Doe.
711
712Upstream Authors:
713 Jane Smith
714
715Copyright:
716 2009 10gen
717
718License:
719 SSPL
720";
721 let pkg = parse_copyright_file(content, Some("mongodb"));
722 assert_eq!(pkg.name, Some("mongodb".to_string()));
723 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
724 assert!(!pkg.parties.is_empty());
725 }
726
727 #[test]
728 fn test_parse_copyright_holders() {
729 let text = "2012 Paul Moore <pmoore@redhat.com>
7302012 Ashley Lai <adlai@us.ibm.com>
731Copyright (C) 2015-2018 Example Corp";
732 let holders = parse_copyright_holders(text);
733 assert!(holders.len() >= 3);
734 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
735 assert!(holders.iter().any(|h| h.contains("Example Corp")));
736 }
737
738 #[test]
739 fn test_parse_copyright_empty() {
740 let content = "This is just some text without proper copyright info.";
741 let pkg = parse_copyright_file(content, Some("test"));
742 assert_eq!(pkg.name, Some("test".to_string()));
743 assert!(pkg.parties.is_empty());
744 assert!(pkg.extracted_license_statement.is_none());
745 }
746
747 #[test]
748 fn test_merge_debian_copyright_into_package_preserves_license_fields() {
749 let copyright = parse_copyright_file(
750 "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
751 Upstream-Name: demo\n\n\
752 Files: *\n\
753 Copyright: 2024 Example\n\
754 License: MIT\n\n\
755 Files: debian/*\n\
756 Copyright: 2024 Debian Example\n\
757 License: Apache-2.0\n",
758 Some("demo"),
759 );
760 let mut target = default_package_data(DatasourceId::DebianDeb);
761
762 merge_debian_copyright_into_package(&mut target, ©right);
763
764 assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
765 assert_eq!(
766 target.declared_license_expression_spdx.as_deref(),
767 Some("MIT")
768 );
769 assert_eq!(
770 target.other_license_expression.as_deref(),
771 Some("apache-2.0")
772 );
773 assert_eq!(
774 target.other_license_expression_spdx.as_deref(),
775 Some("Apache-2.0")
776 );
777 assert_eq!(target.license_detections.len(), 1);
778 assert_eq!(target.other_license_detections.len(), 1);
779 }
780
781 #[test]
782 fn test_normalize_debian_public_domain_uses_scancode_license_ref() {
783 let normalized = normalize_debian_license_name("public-domain");
784
785 assert_eq!(normalized.declared_license_expression, "public-domain");
786 assert_eq!(
787 normalized.declared_license_expression_spdx,
788 "LicenseRef-scancode-public-domain"
789 );
790 }
791}