1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
5use crate::parser_warn as warn;
6use crate::parsers::rfc822::{self, Rfc822Metadata};
7use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
8use crate::utils::spdx::combine_license_expressions;
9
10use super::utils::{build_debian_purl, make_party};
11use super::{PACKAGE_TYPE, default_package_data, read_or_default};
12use crate::parsers::PackageParser;
13use crate::parsers::license_normalization::{
14 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
15 normalize_declared_license_key,
16};
17
18pub struct DebianCopyrightParser;
20
21impl PackageParser for DebianCopyrightParser {
22 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
23
24 fn is_match(path: &Path) -> bool {
25 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
26 if filename != "copyright" {
27 return filename.ends_with("_copyright");
28 }
29 let path_str = path.to_string_lossy();
30 path_str.contains("/debian/")
31 || path_str.contains("/ports/")
32 || path_str.starts_with("ports/")
33 || path_str.contains("/packages/deb/")
34 || path_str.contains("/usr/share/doc/")
35 || path_str.ends_with("debian/copyright")
36 } else {
37 false
38 }
39 }
40
41 fn extract_packages(path: &Path) -> Vec<PackageData> {
42 let datasource_id = detect_debian_copyright_datasource(path);
43 let content = read_or_default!(path, "copyright file", datasource_id);
44
45 let package_name = extract_package_name_from_path(path)
46 .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
47 let mut package_data = parse_copyright_file(&content, package_name.as_deref());
48 package_data.datasource_id = Some(datasource_id);
49 vec![package_data]
50 }
51}
52
53crate::register_parser!(
54 "Debian machine-readable copyright file",
55 &[
56 "**/debian/copyright",
57 "**/ports/*/copyright",
58 "**/packages/deb/copyright",
59 "**/usr/share/doc/*/copyright",
60 "**/*_copyright"
61 ],
62 "deb",
63 "",
64 Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
65);
66
67fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
68 let path_str = path.to_string_lossy();
69 if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
70 DatasourceId::DebianCopyrightInSource
71 } else if path_str.contains("/usr/share/doc/") {
72 DatasourceId::DebianCopyrightInPackage
73 } else {
74 DatasourceId::DebianCopyrightStandalone
75 }
76}
77
78fn extract_package_name_from_path(path: &Path) -> Option<String> {
79 let components: Vec<_> = path.components().collect();
80
81 for (i, component) in components.iter().enumerate() {
82 if let std::path::Component::Normal(os_str) = component
83 && os_str.to_str() == Some("doc")
84 && i + 1 < components.len()
85 && let std::path::Component::Normal(next) = components[i + 1]
86 {
87 return next.to_str().map(|s| s.to_string());
88 }
89 }
90 None
91}
92
93fn extract_standalone_package_name_from_path(
94 path: &Path,
95 datasource_id: DatasourceId,
96) -> Option<String> {
97 if datasource_id != DatasourceId::DebianCopyrightStandalone {
98 return None;
99 }
100
101 path.file_name()
102 .and_then(|name| name.to_str())
103 .filter(|name| *name == "copyright")?;
104
105 path.parent()
106 .and_then(|parent| parent.file_name())
107 .and_then(|name| name.to_str())
108 .map(str::to_string)
109}
110
111pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
112 let paragraphs = parse_copyright_paragraphs_with_lines(content);
113
114 let is_dep5 = paragraphs
115 .first()
116 .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
117 .is_some();
118
119 let namespace = Some("debian".to_string());
120 let mut parties = Vec::new();
121 let mut license_statements = Vec::new();
122 let mut primary_license_detection = None;
123 let mut header_license_detection = None;
124 let mut other_license_detections = Vec::new();
125
126 if is_dep5 {
127 let mut para_count = 0usize;
128 for para in ¶graphs {
129 para_count += 1;
130 if para_count > MAX_ITERATION_COUNT {
131 warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
132 break;
133 }
134 if let Some(copyright_text) =
135 rfc822::get_header_first(¶.metadata.headers, "copyright")
136 {
137 for holder in parse_copyright_holders(©right_text) {
138 if !holder.is_empty() {
139 parties.push(make_party(None, "copyright-holder", Some(holder), None));
140 }
141 }
142 }
143
144 if let Some(license) = rfc822::get_header_first(¶.metadata.headers, "license") {
145 let license_name = license.lines().next().unwrap_or(&license).trim();
146 if !license_name.is_empty()
147 && !license_statements.contains(&license_name.to_string())
148 {
149 license_statements.push(license_name.to_string());
150 }
151
152 if let Some((matched_text, line_no)) = para.license_header_line.clone() {
153 let detection =
154 build_primary_license_detection(license_name, matched_text, line_no);
155 let is_header_paragraph =
156 rfc822::get_header_first(¶.metadata.headers, "format").is_some();
157 if rfc822::get_header_first(¶.metadata.headers, "files").as_deref()
158 == Some("*")
159 {
160 primary_license_detection = Some(detection);
161 } else if is_header_paragraph {
162 header_license_detection.get_or_insert(detection);
163 } else {
164 other_license_detections.push(detection);
165 }
166 }
167 }
168 }
169
170 if primary_license_detection.is_none() && header_license_detection.is_some() {
171 primary_license_detection = header_license_detection;
172 }
173 } else {
174 let copyright_block = extract_unstructured_field(content, "Copyright:");
175 if let Some(text) = copyright_block {
176 for holder in parse_copyright_holders(&text) {
177 if !holder.is_empty() {
178 parties.push(make_party(None, "copyright-holder", Some(holder), None));
179 }
180 }
181 }
182
183 let license_block = extract_unstructured_field(content, "License:");
184 if let Some(text) = license_block {
185 license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
186 }
187 }
188
189 let extracted_license_statement = if license_statements.is_empty() {
190 None
191 } else {
192 Some(truncate_field(license_statements.join(" AND ")))
193 };
194
195 let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
196 let declared_license_expression = license_detections
197 .first()
198 .map(|detection| detection.license_expression.clone());
199 let declared_license_expression_spdx = license_detections
200 .first()
201 .map(|detection| detection.license_expression_spdx.clone());
202 let other_license_expression = combine_license_expressions(
203 other_license_detections
204 .iter()
205 .map(|detection| detection.license_expression.clone()),
206 );
207 let other_license_expression_spdx = combine_license_expressions(
208 other_license_detections
209 .iter()
210 .map(|detection| detection.license_expression_spdx.clone()),
211 );
212
213 PackageData {
214 datasource_id: Some(DatasourceId::DebianCopyright),
215 package_type: Some(PACKAGE_TYPE),
216 namespace: namespace.clone(),
217 name: package_name.map(|s| truncate_field(s.to_string())),
218 parties,
219 declared_license_expression,
220 declared_license_expression_spdx,
221 license_detections,
222 other_license_expression,
223 other_license_expression_spdx,
224 other_license_detections,
225 extracted_license_statement,
226 purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
227 ..Default::default()
228 }
229}
230
231#[derive(Debug)]
232struct CopyrightParagraph {
233 metadata: Rfc822Metadata,
234 license_header_line: Option<(String, usize)>,
235}
236
237fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
238 let mut paragraphs = Vec::new();
239 let mut current_lines = Vec::new();
240 let mut current_start_line = 1usize;
241 let mut count = 0usize;
242
243 for (idx, line) in content.lines().enumerate() {
244 count += 1;
245 if count > MAX_ITERATION_COUNT {
246 warn!(
247 "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
248 );
249 break;
250 }
251 let line_no = idx + 1;
252 if line.is_empty() {
253 if !current_lines.is_empty() {
254 paragraphs.push(finalize_copyright_paragraph(
255 std::mem::take(&mut current_lines),
256 current_start_line,
257 ));
258 }
259 current_start_line = line_no + 1;
260 } else {
261 if current_lines.is_empty() {
262 current_start_line = line_no;
263 }
264 current_lines.push(line.to_string());
265 }
266 }
267
268 if !current_lines.is_empty() {
269 paragraphs.push(finalize_copyright_paragraph(
270 current_lines,
271 current_start_line,
272 ));
273 }
274
275 paragraphs
276}
277
278fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
279 let mut headers: HashMap<String, Vec<String>> = HashMap::new();
280 let mut current_name: Option<String> = None;
281 let mut current_value = String::new();
282 let mut license_header_line = None;
283
284 for (idx, line) in raw_lines.iter().enumerate() {
285 if line.starts_with(' ') || line.starts_with('\t') {
286 if current_name.is_some() {
287 current_value.push('\n');
288 current_value.push_str(line);
289 }
290 continue;
291 }
292
293 if let Some(name) = current_name.take() {
294 add_copyright_header_value(&mut headers, &name, ¤t_value);
295 current_value.clear();
296 }
297
298 if let Some((name, value)) = line.split_once(':') {
299 let normalized_name = name.trim().to_ascii_lowercase();
300 if normalized_name == "license" && license_header_line.is_none() {
301 license_header_line = Some((line.trim_end().to_string(), start_line + idx));
302 }
303 current_name = Some(normalized_name);
304 current_value = value.trim_start().to_string();
305 }
306 }
307
308 if let Some(name) = current_name.take() {
309 add_copyright_header_value(&mut headers, &name, ¤t_value);
310 }
311
312 CopyrightParagraph {
313 metadata: Rfc822Metadata {
314 headers,
315 body: String::new(),
316 },
317 license_header_line,
318 }
319}
320
321fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
322 let entry = headers.entry(name.to_string()).or_default();
323 let trimmed = value.trim_end();
324 if !trimmed.is_empty() {
325 entry.push(trimmed.to_string());
326 }
327}
328
329fn build_primary_license_detection(
330 license_name: &str,
331 matched_text: String,
332 line_no: usize,
333) -> LicenseDetection {
334 let normalized = normalize_debian_license_name(license_name);
335 let line = match LineNumber::new(line_no) {
336 Some(l) => l,
337 None => {
338 warn!(
339 "build_primary_license_detection: line number {} out of range, clamping to 1",
340 line_no
341 );
342 LineNumber::new(1).expect("1 is a valid line number")
343 }
344 };
345
346 build_declared_license_detection(
347 &normalized,
348 DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
349 )
350}
351
352fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
353 match license_name.trim() {
354 "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
355 "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
356 "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
357 "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
358 "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
359 "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
360 "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
361 "public-domain" => {
362 NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
363 }
364 other => normalize_declared_license_key(other)
365 .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
366 }
367}
368
369fn parse_copyright_holders(text: &str) -> Vec<String> {
370 let mut holders = Vec::new();
371 let mut count = 0usize;
372
373 for line in text.lines() {
374 count += 1;
375 if count > MAX_ITERATION_COUNT {
376 warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
377 break;
378 }
379 let line = line.trim();
380 if line.is_empty() {
381 continue;
382 }
383
384 let cleaned = line
385 .trim_start_matches("Copyright")
386 .trim_start_matches("copyright")
387 .trim_start_matches("(C)")
388 .trim_start_matches("(c)")
389 .trim_start_matches("©")
390 .trim();
391
392 if let Some(year_end) = cleaned.find(char::is_alphabetic) {
393 let without_years = &cleaned[year_end..];
394 let holder = without_years
395 .trim_start_matches(',')
396 .trim_start_matches('-')
397 .trim();
398
399 if !holder.is_empty() && holder.len() > 2 {
400 holders.push(holder.to_string());
401 }
402 }
403 }
404
405 holders
406}
407
408fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
409 let mut in_field = false;
410 let mut field_content = String::new();
411 let mut count = 0usize;
412
413 for line in content.lines() {
414 count += 1;
415 if count > MAX_ITERATION_COUNT {
416 warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
417 break;
418 }
419 if line.starts_with(field_name) {
420 in_field = true;
421 field_content.push_str(line.trim_start_matches(field_name).trim());
422 field_content.push('\n');
423 } else if in_field {
424 if line.starts_with(char::is_whitespace) {
425 field_content.push_str(line.trim());
426 field_content.push('\n');
427 } else if !line.trim().is_empty() {
428 break;
429 }
430 }
431 }
432
433 let trimmed = field_content.trim();
434 if trimmed.is_empty() {
435 None
436 } else {
437 Some(truncate_field(trimmed.to_string()))
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::super::deb::merge_debian_copyright_into_package;
444 use super::super::default_package_data;
445 use super::*;
446 use crate::models::DatasourceId;
447 use crate::models::LineNumber;
448 use std::path::PathBuf;
449
450 #[test]
451 fn test_copyright_parser_is_match() {
452 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
453 "/usr/share/doc/bash/copyright"
454 )));
455 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
456 "debian/copyright"
457 )));
458 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
459 "src/third_party/gperftools/dist/packages/deb/copyright"
460 )));
461 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
462 "ports/zlib/copyright"
463 )));
464 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
465 "copyright.txt"
466 )));
467 assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
468 "/etc/copyright"
469 )));
470 assert!(DebianCopyrightParser::is_match(&PathBuf::from(
471 "/tmp/sample_copyright"
472 )));
473 }
474
475 #[test]
476 fn test_detect_debian_copyright_datasource() {
477 assert_eq!(
478 detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
479 DatasourceId::DebianCopyrightInSource
480 );
481 assert_eq!(
482 detect_debian_copyright_datasource(&PathBuf::from(
483 "src/third_party/gperftools/dist/packages/deb/copyright"
484 )),
485 DatasourceId::DebianCopyrightStandalone
486 );
487 assert_eq!(
488 detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
489 DatasourceId::DebianCopyrightStandalone
490 );
491 assert_eq!(
492 detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
493 DatasourceId::DebianCopyrightInPackage
494 );
495 assert_eq!(
496 detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
497 DatasourceId::DebianCopyrightStandalone
498 );
499 }
500
501 #[test]
502 fn test_extract_package_name_from_path() {
503 assert_eq!(
504 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
505 Some("bash".to_string())
506 );
507 assert_eq!(
508 extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
509 Some("libseccomp2".to_string())
510 );
511 assert_eq!(
512 extract_package_name_from_path(&PathBuf::from("debian/copyright")),
513 None
514 );
515 assert_eq!(
516 extract_standalone_package_name_from_path(
517 &PathBuf::from("ports/zlib/copyright"),
518 DatasourceId::DebianCopyrightStandalone,
519 ),
520 Some("zlib".to_string())
521 );
522 }
523
524 #[test]
525 fn test_parse_copyright_dep5_format() {
526 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
527Upstream-Name: libseccomp
528Source: https://sourceforge.net/projects/libseccomp/
529
530Files: *
531Copyright: 2012 Paul Moore <pmoore@redhat.com>
532 2012 Ashley Lai <adlai@us.ibm.com>
533License: LGPL-2.1
534
535License: LGPL-2.1
536 This library is free software
537";
538 let pkg = parse_copyright_file(content, Some("libseccomp"));
539 assert_eq!(pkg.name, Some("libseccomp".to_string()));
540 assert_eq!(pkg.namespace, Some("debian".to_string()));
541 assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
542 assert_eq!(
543 pkg.extracted_license_statement,
544 Some("LGPL-2.1".to_string())
545 );
546 assert!(pkg.parties.len() >= 2);
547 assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
548 assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
549 }
550
551 #[test]
552 fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
553 let path = PathBuf::from(
554 "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
555 );
556 let pkg = DebianCopyrightParser::extract_first_package(&path);
557
558 assert_eq!(pkg.name, Some("bsdutils".to_string()));
559 let extracted = pkg
560 .extracted_license_statement
561 .as_deref()
562 .expect("license statement should exist");
563 assert!(extracted.contains("GPL-2+"));
564 assert!(!pkg.license_detections.is_empty());
565
566 let primary = &pkg.license_detections[0];
567 assert_eq!(
568 primary.matches[0].matched_text.as_deref(),
569 Some("License: GPL-2+")
570 );
571 assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
572 assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
573 }
574
575 #[test]
576 fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
577 let path = PathBuf::from("testdata/debian/copyright/copyright");
578 let pkg = DebianCopyrightParser::extract_first_package(&path);
579
580 assert_eq!(pkg.license_detections.len(), 1);
581 assert_eq!(pkg.other_license_detections.len(), 4);
582
583 let primary = &pkg.license_detections[0];
584 assert_eq!(
585 primary.matches[0].matched_text.as_deref(),
586 Some("License: LGPL-2.1")
587 );
588 assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
589
590 let ordered_lines: Vec<usize> = pkg
591 .other_license_detections
592 .iter()
593 .map(|detection| detection.matches[0].start_line.get())
594 .collect();
595 assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
596
597 let ordered_texts: Vec<&str> = pkg
598 .other_license_detections
599 .iter()
600 .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
601 .collect();
602 assert_eq!(
603 ordered_texts,
604 vec![
605 "License: LGPL-2.1",
606 "License: LGPL-2.1",
607 "License: LGPL-2.1",
608 "License: LGPL-2.1",
609 ]
610 );
611 }
612
613 #[test]
614 fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
615 let path = PathBuf::from(
616 "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
617 );
618 let pkg = DebianCopyrightParser::extract_first_package(&path);
619
620 let zlib = pkg
621 .other_license_detections
622 .iter()
623 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
624 .expect("at least one Zlib license paragraph should be detected");
625 assert_eq!(
626 zlib.matches[0].matched_text.as_deref(),
627 Some("License: Zlib")
628 );
629
630 let last_zlib = pkg
631 .other_license_detections
632 .iter()
633 .rev()
634 .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
635 .expect("bottom standalone Zlib license paragraph should be detected");
636 assert_eq!(
637 last_zlib.matches[0].start_line,
638 LineNumber::new(732).unwrap()
639 );
640 assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
641 }
642
643 #[test]
644 fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
645 let path =
646 PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
647 let pkg = DebianCopyrightParser::extract_first_package(&path);
648
649 assert_eq!(pkg.license_detections.len(), 1);
650 let primary = &pkg.license_detections[0];
651 assert_eq!(
652 primary.matches[0].matched_text.as_deref(),
653 Some("License: LGPL-3+ or GPL-2+")
654 );
655 assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
656 assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
657
658 assert!(pkg.other_license_detections.iter().any(|detection| {
659 detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
660 }));
661 }
662
663 #[test]
664 fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
665 let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
666 let pkg = parse_copyright_file(content, Some("foo"));
667
668 assert_eq!(pkg.license_detections.len(), 1);
669 let primary = &pkg.license_detections[0];
670 assert_eq!(
671 primary.matches[0].matched_text.as_deref(),
672 Some("License: GPL-2+")
673 );
674 assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
675 }
676
677 #[test]
678 fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
679 let raw_lines = vec![
680 "Files: *".to_string(),
681 "Copyright: 2024 Example Org".to_string(),
682 "License: Apache-2.0".to_string(),
683 " Licensed under the Apache License, Version 2.0.".to_string(),
684 ];
685
686 let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
687 let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
688 .into_iter()
689 .next()
690 .expect("reference RFC822 paragraph should parse");
691
692 assert_eq!(paragraph.metadata.headers, expected.headers);
693 assert_eq!(paragraph.metadata.body, expected.body);
694 assert_eq!(
695 paragraph.license_header_line,
696 Some(("License: Apache-2.0".to_string(), 12))
697 );
698 }
699
700 #[test]
701 fn test_parse_copyright_unstructured() {
702 let content = "This package was debianized by John Doe.
703
704Upstream Authors:
705 Jane Smith
706
707Copyright:
708 2009 10gen
709
710License:
711 SSPL
712";
713 let pkg = parse_copyright_file(content, Some("mongodb"));
714 assert_eq!(pkg.name, Some("mongodb".to_string()));
715 assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
716 assert!(!pkg.parties.is_empty());
717 }
718
719 #[test]
720 fn test_parse_copyright_holders() {
721 let text = "2012 Paul Moore <pmoore@redhat.com>
7222012 Ashley Lai <adlai@us.ibm.com>
723Copyright (C) 2015-2018 Example Corp";
724 let holders = parse_copyright_holders(text);
725 assert!(holders.len() >= 3);
726 assert!(holders.iter().any(|h| h.contains("Paul Moore")));
727 assert!(holders.iter().any(|h| h.contains("Example Corp")));
728 }
729
730 #[test]
731 fn test_parse_copyright_empty() {
732 let content = "This is just some text without proper copyright info.";
733 let pkg = parse_copyright_file(content, Some("test"));
734 assert_eq!(pkg.name, Some("test".to_string()));
735 assert!(pkg.parties.is_empty());
736 assert!(pkg.extracted_license_statement.is_none());
737 }
738
739 #[test]
740 fn test_merge_debian_copyright_into_package_preserves_license_fields() {
741 let copyright = parse_copyright_file(
742 "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
743 Upstream-Name: demo\n\n\
744 Files: *\n\
745 Copyright: 2024 Example\n\
746 License: MIT\n\n\
747 Files: debian/*\n\
748 Copyright: 2024 Debian Example\n\
749 License: Apache-2.0\n",
750 Some("demo"),
751 );
752 let mut target = default_package_data(DatasourceId::DebianDeb);
753
754 merge_debian_copyright_into_package(&mut target, ©right);
755
756 assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
757 assert_eq!(
758 target.declared_license_expression_spdx.as_deref(),
759 Some("MIT")
760 );
761 assert_eq!(
762 target.other_license_expression.as_deref(),
763 Some("apache-2.0")
764 );
765 assert_eq!(
766 target.other_license_expression_spdx.as_deref(),
767 Some("Apache-2.0")
768 );
769 assert_eq!(target.license_detections.len(), 1);
770 assert_eq!(target.other_license_detections.len(), 1);
771 }
772}