1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType, Party};
5use crate::parser_warn as warn;
6use crate::parsers::rfc822::{self, Rfc822Metadata};
7use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
8use crate::utils::spdx::combine_license_expressions;
9
10use super::utils::build_debian_purl;
11use super::{PACKAGE_TYPE, default_package_data};
12use crate::parsers::PackageParser;
13use crate::parsers::license_normalization::{
14 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
15 normalize_declared_license_key,
16};
17
18pub struct DebianCopyrightParser;
20
21impl PackageParser for DebianCopyrightParser {
22 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
23
24 fn is_match(path: &Path) -> bool {
25 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
26 if filename != "copyright" {
27 return filename.ends_with("_copyright");
28 }
29 let path_str = path.to_string_lossy();
30 path_str.contains("/debian/")
31 || path_str.contains("/ports/")
32 || path_str.starts_with("ports/")
33 || path_str.contains("/packages/deb/")
34 || path_str.contains("/usr/share/doc/")
35 || path_str.ends_with("debian/copyright")
36 } else {
37 false
38 }
39 }
40
41 fn extract_packages(path: &Path) -> Vec<PackageData> {
42 let datasource_id = detect_debian_copyright_datasource(path);
43 let content = match read_file_to_string(path, None) {
44 Ok(c) => c,
45 Err(e) => {
46 warn!("Failed to read copyright file {:?}: {}", path, e);
47 return vec![default_package_data(datasource_id)];
48 }
49 };
50
51 let package_name = extract_package_name_from_path(path)
52 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
53 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
54 package_data.datasource_id = Some(datasource_id);
55 vec![package_data]
56 }
57}
58
59crate::register_parser!(
60 "Debian machine-readable copyright file",
61 &[
62 "**/debian/copyright",
63 "**/ports/*/copyright",
64 "**/packages/deb/copyright",
65 "**/usr/share/doc/*/copyright",
66 "**/*_copyright"
67 ],
68 "deb",
69 "",
70 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
71);
72
73fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
74 let path_str = path.to_string_lossy();
75 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
76 DatasourceId::DebianCopyrightInSource
77 } else if path_str.contains("/usr/share/doc/") {
78 DatasourceId::DebianCopyrightInPackage
79 } else {
80 DatasourceId::DebianCopyrightStandalone
81 }
82}
83
84fn extract_package_name_from_path(path: &Path) -> Option<String> {
85 let components: Vec<_> = path.components().collect();
86
87 for (i, component) in components.iter().enumerate() {
88 if let std::path::Component::Normal(os_str) = component
89 && os_str.to_str() == Some("doc")
90 && i + 1 < components.len()
91 && let std::path::Component::Normal(next) = components[i + 1]
92 {
93 return next.to_str().map(|s| s.to_string());
94 }
95 }
96 None
97}
98
99fn extract_standalone_package_name_from_path(
100 path: &Path,
101 datasource_id: DatasourceId,
102) -> Option<String> {
103 if datasource_id != DatasourceId::DebianCopyrightStandalone {
104 return None;
105 }
106
107 path.file_name()
108 .and_then(|name| name.to_str())
109 .filter(|name| *name == "copyright")?;
110
111 path.parent()
112 .and_then(|parent| parent.file_name())
113 .and_then(|name| name.to_str())
114 .map(str::to_string)
115}
116
117pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
118 let paragraphs = parse_copyright_paragraphs_with_lines(content);
119
120 let is_dep5 = paragraphs
121 .first()
122 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
123 .is_some();
124
125 let namespace = Some("debian".to_string());
126 let mut parties = Vec::new();
127 let mut license_statements = Vec::new();
128 let mut primary_license_detection = None;
129 let mut header_license_detection = None;
130 let mut other_license_detections = Vec::new();
131
132 if is_dep5 {
133 let mut para_count = 0usize;
134 for para in ¶graphs {
135 para_count += 1;
136 if para_count > MAX_ITERATION_COUNT {
137 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
138 break;
139 }
140 if let Some(copyright_text) =
141 rfc822::get_header_first(¶.metadata.headers, "copyright")
142 {
143 for holder in parse_copyright_holders(©right_text) {
144 if !holder.is_empty() {
145 parties.push(Party {
146 r#type: None,
147 role: Some("copyright-holder".to_string()),
148 name: Some(holder),
149 email: None,
150 url: None,
151 organization: None,
152 organization_url: None,
153 timezone: None,
154 });
155 }
156 }
157 }
158
159 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
160 let license_name = license.lines().next().unwrap_or(&license).trim();
161 if !license_name.is_empty()
162 && !license_statements.contains(&license_name.to_string())
163 {
164 license_statements.push(license_name.to_string());
165 }
166
167 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
168 let detection =
169 build_primary_license_detection(license_name, matched_text, line_no);
170 let is_header_paragraph =
171 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
172 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
173 == Some("*")
174 {
175 primary_license_detection = Some(detection);
176 } else if is_header_paragraph {
177 header_license_detection.get_or_insert(detection);
178 } else {
179 other_license_detections.push(detection);
180 }
181 }
182 }
183 }
184
185 if primary_license_detection.is_none() && header_license_detection.is_some() {
186 primary_license_detection = header_license_detection;
187 }
188 } else {
189 let copyright_block = extract_unstructured_field(content, "Copyright:");
190 if let Some(text) = copyright_block {
191 for holder in parse_copyright_holders(&text) {
192 if !holder.is_empty() {
193 parties.push(Party {
194 r#type: None,
195 role: Some("copyright-holder".to_string()),
196 name: Some(holder),
197 email: None,
198 url: None,
199 organization: None,
200 organization_url: None,
201 timezone: None,
202 });
203 }
204 }
205 }
206
207 let license_block = extract_unstructured_field(content, "License:");
208 if let Some(text) = license_block {
209 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
210 }
211 }
212
213 let extracted_license_statement = if license_statements.is_empty() {
214 None
215 } else {
216 Some(truncate_field(license_statements.join(" AND ")))
217 };
218
219 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
220 let declared_license_expression = license_detections
221 .first()
222 .map(|detection| detection.license_expression.clone());
223 let declared_license_expression_spdx = license_detections
224 .first()
225 .map(|detection| detection.license_expression_spdx.clone());
226 let other_license_expression = combine_license_expressions(
227 other_license_detections
228 .iter()
229 .map(|detection| detection.license_expression.clone()),
230 );
231 let other_license_expression_spdx = combine_license_expressions(
232 other_license_detections
233 .iter()
234 .map(|detection| detection.license_expression_spdx.clone()),
235 );
236
237 PackageData {
238 datasource_id: Some(DatasourceId::DebianCopyright),
239 package_type: Some(PACKAGE_TYPE),
240 namespace: namespace.clone(),
241 name: package_name.map(|s| truncate_field(s.to_string())),
242 parties,
243 declared_license_expression,
244 declared_license_expression_spdx,
245 license_detections,
246 other_license_expression,
247 other_license_expression_spdx,
248 other_license_detections,
249 extracted_license_statement,
250 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
251 ..Default::default()
252 }
253}
254
255#[derive(Debug)]
256struct CopyrightParagraph {
257 metadata: Rfc822Metadata,
258 license_header_line: Option<(String, usize)>,
259}
260
261fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
262 let mut paragraphs = Vec::new();
263 let mut current_lines = Vec::new();
264 let mut current_start_line = 1usize;
265 let mut count = 0usize;
266
267 for (idx, line) in content.lines().enumerate() {
268 count += 1;
269 if count > MAX_ITERATION_COUNT {
270 warn!(
271 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
272 );
273 break;
274 }
275 let line_no = idx + 1;
276 if line.is_empty() {
277 if !current_lines.is_empty() {
278 paragraphs.push(finalize_copyright_paragraph(
279 std::mem::take(&mut current_lines),
280 current_start_line,
281 ));
282 }
283 current_start_line = line_no + 1;
284 } else {
285 if current_lines.is_empty() {
286 current_start_line = line_no;
287 }
288 current_lines.push(line.to_string());
289 }
290 }
291
292 if !current_lines.is_empty() {
293 paragraphs.push(finalize_copyright_paragraph(
294 current_lines,
295 current_start_line,
296 ));
297 }
298
299 paragraphs
300}
301
302fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
303 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
304 let mut current_name: Option<String> = None;
305 let mut current_value = String::new();
306 let mut license_header_line = None;
307
308 for (idx, line) in raw_lines.iter().enumerate() {
309 if line.starts_with(' ') || line.starts_with('\t') {
310 if current_name.is_some() {
311 current_value.push('\n');
312 current_value.push_str(line);
313 }
314 continue;
315 }
316
317 if let Some(name) = current_name.take() {
318 add_copyright_header_value(&mut headers, &name, ¤t_value);
319 current_value.clear();
320 }
321
322 if let Some((name, value)) = line.split_once(':') {
323 let normalized_name = name.trim().to_ascii_lowercase();
324 if normalized_name == "license" && license_header_line.is_none() {
325 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
326 }
327 current_name = Some(normalized_name);
328 current_value = value.trim_start().to_string();
329 }
330 }
331
332 if let Some(name) = current_name.take() {
333 add_copyright_header_value(&mut headers, &name, ¤t_value);
334 }
335
336 CopyrightParagraph {
337 metadata: Rfc822Metadata {
338 headers,
339 body: String::new(),
340 },
341 license_header_line,
342 }
343}
344
345fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
346 let entry = headers.entry(name.to_string()).or_default();
347 let trimmed = value.trim_end();
348 if !trimmed.is_empty() {
349 entry.push(trimmed.to_string());
350 }
351}
352
353fn build_primary_license_detection(
354 license_name: &str,
355 matched_text: String,
356 line_no: usize,
357) -> LicenseDetection {
358 let normalized = normalize_debian_license_name(license_name);
359 let line = match LineNumber::new(line_no) {
360 Some(l) => l,
361 None => {
362 warn!(
363 "build_primary_license_detection: line number {} out of range, clamping to 1",
364 line_no
365 );
366 LineNumber::new(1).expect("1 is a valid line number")
367 }
368 };
369
370 build_declared_license_detection(
371 &normalized,
372 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
373 )
374}
375
376fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
377 match license_name.trim() {
378 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
379 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
380 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
381 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
382 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
383 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
384 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
385 "public-domain" => {
386 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
387 }
388 other => normalize_declared_license_key(other)
389 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
390 }
391}
392
393fn parse_copyright_holders(text: &str) -> Vec<String> {
394 let mut holders = Vec::new();
395 let mut count = 0usize;
396
397 for line in text.lines() {
398 count += 1;
399 if count > MAX_ITERATION_COUNT {
400 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
401 break;
402 }
403 let line = line.trim();
404 if line.is_empty() {
405 continue;
406 }
407
408 let cleaned = line
409 .trim_start_matches("Copyright")
410 .trim_start_matches("copyright")
411 .trim_start_matches("(C)")
412 .trim_start_matches("(c)")
413 .trim_start_matches("©")
414 .trim();
415
416 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
417 let without_years = &cleaned[year_end..];
418 let holder = without_years
419 .trim_start_matches(',')
420 .trim_start_matches('-')
421 .trim();
422
423 if !holder.is_empty() && holder.len() > 2 {
424 holders.push(holder.to_string());
425 }
426 }
427 }
428
429 holders
430}
431
432fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
433 let mut in_field = false;
434 let mut field_content = String::new();
435 let mut count = 0usize;
436
437 for line in content.lines() {
438 count += 1;
439 if count > MAX_ITERATION_COUNT {
440 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
441 break;
442 }
443 if line.starts_with(field_name) {
444 in_field = true;
445 field_content.push_str(line.trim_start_matches(field_name).trim());
446 field_content.push('\n');
447 } else if in_field {
448 if line.starts_with(char::is_whitespace) {
449 field_content.push_str(line.trim());
450 field_content.push('\n');
451 } else if !line.trim().is_empty() {
452 break;
453 }
454 }
455 }
456
457 let trimmed = field_content.trim();
458 if trimmed.is_empty() {
459 None
460 } else {
461 Some(truncate_field(trimmed.to_string()))
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::super::deb::merge_debian_copyright_into_package;
468 use super::super::default_package_data;
469 use super::*;
470 use crate::models::DatasourceId;
471 use crate::models::LineNumber;
472 use std::path::PathBuf;
473
474 #[test]
475 fn test_copyright_parser_is_match() {
476 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
477 "/usr/share/doc/bash/copyright"
478 )));
479 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
480 "debian/copyright"
481 )));
482 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
483 "src/third_party/gperftools/dist/packages/deb/copyright"
484 )));
485 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
486 "ports/zlib/copyright"
487 )));
488 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
489 "copyright.txt"
490 )));
491 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
492 "/etc/copyright"
493 )));
494 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
495 "/tmp/sample_copyright"
496 )));
497 }
498
499 #[test]
500 fn test_detect_debian_copyright_datasource() {
501 assert_eq!(
502 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
503 DatasourceId::DebianCopyrightInSource
504 );
505 assert_eq!(
506 detect_debian_copyright_datasource(&PathBuf::from(
507 "src/third_party/gperftools/dist/packages/deb/copyright"
508 )),
509 DatasourceId::DebianCopyrightStandalone
510 );
511 assert_eq!(
512 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
513 DatasourceId::DebianCopyrightStandalone
514 );
515 assert_eq!(
516 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
517 DatasourceId::DebianCopyrightInPackage
518 );
519 assert_eq!(
520 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
521 DatasourceId::DebianCopyrightStandalone
522 );
523 }
524
525 #[test]
526 fn test_extract_package_name_from_path() {
527 assert_eq!(
528 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
529 Some("bash".to_string())
530 );
531 assert_eq!(
532 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
533 Some("libseccomp2".to_string())
534 );
535 assert_eq!(
536 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
537 None
538 );
539 assert_eq!(
540 extract_standalone_package_name_from_path(
541 &PathBuf::from("ports/zlib/copyright"),
542 DatasourceId::DebianCopyrightStandalone,
543 ),
544 Some("zlib".to_string())
545 );
546 }
547
548 #[test]
549 fn test_parse_copyright_dep5_format() {
550 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
551Upstream-Name: libseccomp
552Source: https://sourceforge.net/projects/libseccomp/
553
554Files: *
555Copyright: 2012 Paul Moore <pmoore@redhat.com>
556 2012 Ashley Lai <adlai@us.ibm.com>
557License: LGPL-2.1
558
559License: LGPL-2.1
560 This library is free software
561";
562 let pkg = parse_copyright_file(content, Some("libseccomp"));
563 assert_eq!(pkg.name, Some("libseccomp".to_string()));
564 assert_eq!(pkg.namespace, Some("debian".to_string()));
565 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
566 assert_eq!(
567 pkg.extracted_license_statement,
568 Some("LGPL-2.1".to_string())
569 );
570 assert!(pkg.parties.len() >= 2);
571 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
572 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
573 }
574
575 #[test]
576 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
577 let path = PathBuf::from(
578 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
579 );
580 let pkg = DebianCopyrightParser::extract_first_package(&path);
581
582 assert_eq!(pkg.name, Some("bsdutils".to_string()));
583 let extracted = pkg
584 .extracted_license_statement
585 .as_deref()
586 .expect("license statement should exist");
587 assert!(extracted.contains("GPL-2+"));
588 assert!(!pkg.license_detections.is_empty());
589
590 let primary = &pkg.license_detections[0];
591 assert_eq!(
592 primary.matches[0].matched_text.as_deref(),
593 Some("License: GPL-2+")
594 );
595 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
596 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
597 }
598
599 #[test]
600 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
601 let path = PathBuf::from("testdata/debian/copyright/copyright");
602 let pkg = DebianCopyrightParser::extract_first_package(&path);
603
604 assert_eq!(pkg.license_detections.len(), 1);
605 assert_eq!(pkg.other_license_detections.len(), 4);
606
607 let primary = &pkg.license_detections[0];
608 assert_eq!(
609 primary.matches[0].matched_text.as_deref(),
610 Some("License: LGPL-2.1")
611 );
612 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
613
614 let ordered_lines: Vec<usize> = pkg
615 .other_license_detections
616 .iter()
617 .map(|detection| detection.matches[0].start_line.get())
618 .collect();
619 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
620
621 let ordered_texts: Vec<&str> = pkg
622 .other_license_detections
623 .iter()
624 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
625 .collect();
626 assert_eq!(
627 ordered_texts,
628 vec![
629 "License: LGPL-2.1",
630 "License: LGPL-2.1",
631 "License: LGPL-2.1",
632 "License: LGPL-2.1",
633 ]
634 );
635 }
636
637 #[test]
638 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
639 let path = PathBuf::from(
640 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
641 );
642 let pkg = DebianCopyrightParser::extract_first_package(&path);
643
644 let zlib = pkg
645 .other_license_detections
646 .iter()
647 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
648 .expect("at least one Zlib license paragraph should be detected");
649 assert_eq!(
650 zlib.matches[0].matched_text.as_deref(),
651 Some("License: Zlib")
652 );
653
654 let last_zlib = pkg
655 .other_license_detections
656 .iter()
657 .rev()
658 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
659 .expect("bottom standalone Zlib license paragraph should be detected");
660 assert_eq!(
661 last_zlib.matches[0].start_line,
662 LineNumber::new(732).unwrap()
663 );
664 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
665 }
666
667 #[test]
668 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
669 let path =
670 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
671 let pkg = DebianCopyrightParser::extract_first_package(&path);
672
673 assert_eq!(pkg.license_detections.len(), 1);
674 let primary = &pkg.license_detections[0];
675 assert_eq!(
676 primary.matches[0].matched_text.as_deref(),
677 Some("License: LGPL-3+ or GPL-2+")
678 );
679 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
680 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
681
682 assert!(pkg.other_license_detections.iter().any(|detection| {
683 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
684 }));
685 }
686
687 #[test]
688 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
689 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
690 let pkg = parse_copyright_file(content, Some("foo"));
691
692 assert_eq!(pkg.license_detections.len(), 1);
693 let primary = &pkg.license_detections[0];
694 assert_eq!(
695 primary.matches[0].matched_text.as_deref(),
696 Some("License: GPL-2+")
697 );
698 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
699 }
700
701 #[test]
702 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
703 let raw_lines = vec![
704 "Files: *".to_string(),
705 "Copyright: 2024 Example Org".to_string(),
706 "License: Apache-2.0".to_string(),
707 " Licensed under the Apache License, Version 2.0.".to_string(),
708 ];
709
710 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
711 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
712 .into_iter()
713 .next()
714 .expect("reference RFC822 paragraph should parse");
715
716 assert_eq!(paragraph.metadata.headers, expected.headers);
717 assert_eq!(paragraph.metadata.body, expected.body);
718 assert_eq!(
719 paragraph.license_header_line,
720 Some(("License: Apache-2.0".to_string(), 12))
721 );
722 }
723
724 #[test]
725 fn test_parse_copyright_unstructured() {
726 let content = "This package was debianized by John Doe.
727
728Upstream Authors:
729 Jane Smith
730
731Copyright:
732 2009 10gen
733
734License:
735 SSPL
736";
737 let pkg = parse_copyright_file(content, Some("mongodb"));
738 assert_eq!(pkg.name, Some("mongodb".to_string()));
739 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
740 assert!(!pkg.parties.is_empty());
741 }
742
743 #[test]
744 fn test_parse_copyright_holders() {
745 let text = "2012 Paul Moore <pmoore@redhat.com>
7462012 Ashley Lai <adlai@us.ibm.com>
747Copyright (C) 2015-2018 Example Corp";
748 let holders = parse_copyright_holders(text);
749 assert!(holders.len() >= 3);
750 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
751 assert!(holders.iter().any(|h| h.contains("Example Corp")));
752 }
753
754 #[test]
755 fn test_parse_copyright_empty() {
756 let content = "This is just some text without proper copyright info.";
757 let pkg = parse_copyright_file(content, Some("test"));
758 assert_eq!(pkg.name, Some("test".to_string()));
759 assert!(pkg.parties.is_empty());
760 assert!(pkg.extracted_license_statement.is_none());
761 }
762
763 #[test]
764 fn test_merge_debian_copyright_into_package_preserves_license_fields() {
765 let copyright = parse_copyright_file(
766 "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
767 Upstream-Name: demo\n\n\
768 Files: *\n\
769 Copyright: 2024 Example\n\
770 License: MIT\n\n\
771 Files: debian/*\n\
772 Copyright: 2024 Debian Example\n\
773 License: Apache-2.0\n",
774 Some("demo"),
775 );
776 let mut target = default_package_data(DatasourceId::DebianDeb);
777
778 merge_debian_copyright_into_package(&mut target, ©right);
779
780 assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
781 assert_eq!(
782 target.declared_license_expression_spdx.as_deref(),
783 Some("MIT")
784 );
785 assert_eq!(
786 target.other_license_expression.as_deref(),
787 Some("apache-2.0")
788 );
789 assert_eq!(
790 target.other_license_expression_spdx.as_deref(),
791 Some("Apache-2.0")
792 );
793 assert_eq!(target.license_detections.len(), 1);
794 assert_eq!(target.other_license_detections.len(), 1);
795 }
796}