Skip to main content

provenant/parsers/debian/
copyright.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
5use crate::parser_warn as warn;
6use crate::parsers::rfc822::{self, Rfc822Metadata};
7use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
8use crate::utils::spdx::combine_license_expressions;
9
10use super::utils::{build_debian_purl, make_party};
11use super::{PACKAGE_TYPE, default_package_data, read_or_default};
12use crate::parsers::PackageParser;
13use crate::parsers::license_normalization::{
14    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
15    normalize_declared_license_key,
16};
17
18/// Parser for Debian machine-readable copyright files (DEP-5 format)
19pub struct DebianCopyrightParser;
20
21impl PackageParser for DebianCopyrightParser {
22    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
23
24    fn is_match(path: &Path) -> bool {
25        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
26            if filename != "copyright" {
27                return filename.ends_with("_copyright");
28            }
29            let path_str = path.to_string_lossy();
30            path_str.contains("/debian/")
31                || path_str.contains("/ports/")
32                || path_str.starts_with("ports/")
33                || path_str.contains("/packages/deb/")
34                || path_str.contains("/usr/share/doc/")
35                || path_str.ends_with("debian/copyright")
36        } else {
37            false
38        }
39    }
40
41    fn extract_packages(path: &Path) -> Vec<PackageData> {
42        let datasource_id = detect_debian_copyright_datasource(path);
43        let content = read_or_default!(path, "copyright file", datasource_id);
44
45        let package_name = extract_package_name_from_path(path)
46            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
47        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
48        package_data.datasource_id = Some(datasource_id);
49        vec![package_data]
50    }
51}
52
53crate::register_parser!(
54    "Debian machine-readable copyright file",
55    &[
56        "**/debian/copyright",
57        "**/ports/*/copyright",
58        "**/packages/deb/copyright",
59        "**/usr/share/doc/*/copyright",
60        "**/*_copyright"
61    ],
62    "deb",
63    "",
64    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
65);
66
67fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
68    let path_str = path.to_string_lossy();
69    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
70        DatasourceId::DebianCopyrightInSource
71    } else if path_str.contains("/usr/share/doc/") {
72        DatasourceId::DebianCopyrightInPackage
73    } else {
74        DatasourceId::DebianCopyrightStandalone
75    }
76}
77
78fn extract_package_name_from_path(path: &Path) -> Option<String> {
79    let components: Vec<_> = path.components().collect();
80
81    for (i, component) in components.iter().enumerate() {
82        if let std::path::Component::Normal(os_str) = component
83            && os_str.to_str() == Some("doc")
84            && i + 1 < components.len()
85            && let std::path::Component::Normal(next) = components[i + 1]
86        {
87            return next.to_str().map(|s| s.to_string());
88        }
89    }
90    None
91}
92
93fn extract_standalone_package_name_from_path(
94    path: &Path,
95    datasource_id: DatasourceId,
96) -> Option<String> {
97    if datasource_id != DatasourceId::DebianCopyrightStandalone {
98        return None;
99    }
100
101    path.file_name()
102        .and_then(|name| name.to_str())
103        .filter(|name| *name == "copyright")?;
104
105    path.parent()
106        .and_then(|parent| parent.file_name())
107        .and_then(|name| name.to_str())
108        .map(str::to_string)
109}
110
111pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
112    let paragraphs = parse_copyright_paragraphs_with_lines(content);
113
114    let is_dep5 = paragraphs
115        .first()
116        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
117        .is_some();
118
119    let namespace = Some("debian".to_string());
120    let mut parties = Vec::new();
121    let mut license_statements = Vec::new();
122    let mut primary_license_detection = None;
123    let mut header_license_detection = None;
124    let mut other_license_detections = Vec::new();
125
126    if is_dep5 {
127        let mut para_count = 0usize;
128        for para in &paragraphs {
129            para_count += 1;
130            if para_count > MAX_ITERATION_COUNT {
131                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
132                break;
133            }
134            if let Some(copyright_text) =
135                rfc822::get_header_first(&para.metadata.headers, "copyright")
136            {
137                for holder in parse_copyright_holders(&copyright_text) {
138                    if !holder.is_empty() {
139                        parties.push(make_party(None, "copyright-holder", Some(holder), None));
140                    }
141                }
142            }
143
144            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
145                let license_name = license.lines().next().unwrap_or(&license).trim();
146                if !license_name.is_empty()
147                    && !license_statements.contains(&license_name.to_string())
148                {
149                    license_statements.push(license_name.to_string());
150                }
151
152                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
153                    let detection =
154                        build_primary_license_detection(license_name, matched_text, line_no);
155                    let is_header_paragraph =
156                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
157                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
158                        == Some("*")
159                    {
160                        primary_license_detection = Some(detection);
161                    } else if is_header_paragraph {
162                        header_license_detection.get_or_insert(detection);
163                    } else {
164                        other_license_detections.push(detection);
165                    }
166                }
167            }
168        }
169
170        if primary_license_detection.is_none() && header_license_detection.is_some() {
171            primary_license_detection = header_license_detection;
172        }
173    } else {
174        let copyright_block = extract_unstructured_field(content, "Copyright:");
175        if let Some(text) = copyright_block {
176            for holder in parse_copyright_holders(&text) {
177                if !holder.is_empty() {
178                    parties.push(make_party(None, "copyright-holder", Some(holder), None));
179                }
180            }
181        }
182
183        let license_block = extract_unstructured_field(content, "License:");
184        if let Some(text) = license_block {
185            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
186        }
187    }
188
189    let extracted_license_statement = if license_statements.is_empty() {
190        None
191    } else {
192        Some(truncate_field(license_statements.join(" AND ")))
193    };
194
195    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
196    let declared_license_expression = license_detections
197        .first()
198        .map(|detection| detection.license_expression.clone());
199    let declared_license_expression_spdx = license_detections
200        .first()
201        .map(|detection| detection.license_expression_spdx.clone());
202    let other_license_expression = combine_license_expressions(
203        other_license_detections
204            .iter()
205            .map(|detection| detection.license_expression.clone()),
206    );
207    let other_license_expression_spdx = combine_license_expressions(
208        other_license_detections
209            .iter()
210            .map(|detection| detection.license_expression_spdx.clone()),
211    );
212
213    PackageData {
214        datasource_id: Some(DatasourceId::DebianCopyright),
215        package_type: Some(PACKAGE_TYPE),
216        namespace: namespace.clone(),
217        name: package_name.map(|s| truncate_field(s.to_string())),
218        parties,
219        declared_license_expression,
220        declared_license_expression_spdx,
221        license_detections,
222        other_license_expression,
223        other_license_expression_spdx,
224        other_license_detections,
225        extracted_license_statement,
226        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
227        ..Default::default()
228    }
229}
230
231#[derive(Debug)]
232struct CopyrightParagraph {
233    metadata: Rfc822Metadata,
234    license_header_line: Option<(String, usize)>,
235}
236
237fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
238    let mut paragraphs = Vec::new();
239    let mut current_lines = Vec::new();
240    let mut current_start_line = 1usize;
241    let mut count = 0usize;
242
243    for (idx, line) in content.lines().enumerate() {
244        count += 1;
245        if count > MAX_ITERATION_COUNT {
246            warn!(
247                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
248            );
249            break;
250        }
251        let line_no = idx + 1;
252        if line.is_empty() {
253            if !current_lines.is_empty() {
254                paragraphs.push(finalize_copyright_paragraph(
255                    std::mem::take(&mut current_lines),
256                    current_start_line,
257                ));
258            }
259            current_start_line = line_no + 1;
260        } else {
261            if current_lines.is_empty() {
262                current_start_line = line_no;
263            }
264            current_lines.push(line.to_string());
265        }
266    }
267
268    if !current_lines.is_empty() {
269        paragraphs.push(finalize_copyright_paragraph(
270            current_lines,
271            current_start_line,
272        ));
273    }
274
275    paragraphs
276}
277
278fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
279    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
280    let mut current_name: Option<String> = None;
281    let mut current_value = String::new();
282    let mut license_header_line = None;
283
284    for (idx, line) in raw_lines.iter().enumerate() {
285        if line.starts_with(' ') || line.starts_with('\t') {
286            if current_name.is_some() {
287                current_value.push('\n');
288                current_value.push_str(line);
289            }
290            continue;
291        }
292
293        if let Some(name) = current_name.take() {
294            add_copyright_header_value(&mut headers, &name, &current_value);
295            current_value.clear();
296        }
297
298        if let Some((name, value)) = line.split_once(':') {
299            let normalized_name = name.trim().to_ascii_lowercase();
300            if normalized_name == "license" && license_header_line.is_none() {
301                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
302            }
303            current_name = Some(normalized_name);
304            current_value = value.trim_start().to_string();
305        }
306    }
307
308    if let Some(name) = current_name.take() {
309        add_copyright_header_value(&mut headers, &name, &current_value);
310    }
311
312    CopyrightParagraph {
313        metadata: Rfc822Metadata {
314            headers,
315            body: String::new(),
316        },
317        license_header_line,
318    }
319}
320
321fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
322    let entry = headers.entry(name.to_string()).or_default();
323    let trimmed = value.trim_end();
324    if !trimmed.is_empty() {
325        entry.push(trimmed.to_string());
326    }
327}
328
329fn build_primary_license_detection(
330    license_name: &str,
331    matched_text: String,
332    line_no: usize,
333) -> LicenseDetection {
334    let normalized = normalize_debian_license_name(license_name);
335    let line = match LineNumber::new(line_no) {
336        Some(l) => l,
337        None => {
338            warn!(
339                "build_primary_license_detection: line number {} out of range, clamping to 1",
340                line_no
341            );
342            LineNumber::new(1).expect("1 is a valid line number")
343        }
344    };
345
346    build_declared_license_detection(
347        &normalized,
348        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
349    )
350}
351
352fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
353    match license_name.trim() {
354        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
355        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
356        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
357        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
358        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
359        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
360        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
361        "public-domain" => {
362            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
363        }
364        other => normalize_declared_license_key(other)
365            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
366    }
367}
368
369fn parse_copyright_holders(text: &str) -> Vec<String> {
370    let mut holders = Vec::new();
371    let mut count = 0usize;
372
373    for line in text.lines() {
374        count += 1;
375        if count > MAX_ITERATION_COUNT {
376            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
377            break;
378        }
379        let line = line.trim();
380        if line.is_empty() {
381            continue;
382        }
383
384        let cleaned = line
385            .trim_start_matches("Copyright")
386            .trim_start_matches("copyright")
387            .trim_start_matches("(C)")
388            .trim_start_matches("(c)")
389            .trim_start_matches("©")
390            .trim();
391
392        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
393            let without_years = &cleaned[year_end..];
394            let holder = without_years
395                .trim_start_matches(',')
396                .trim_start_matches('-')
397                .trim();
398
399            if !holder.is_empty() && holder.len() > 2 {
400                holders.push(holder.to_string());
401            }
402        }
403    }
404
405    holders
406}
407
408fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
409    let mut in_field = false;
410    let mut field_content = String::new();
411    let mut count = 0usize;
412
413    for line in content.lines() {
414        count += 1;
415        if count > MAX_ITERATION_COUNT {
416            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
417            break;
418        }
419        if line.starts_with(field_name) {
420            in_field = true;
421            field_content.push_str(line.trim_start_matches(field_name).trim());
422            field_content.push('\n');
423        } else if in_field {
424            if line.starts_with(char::is_whitespace) {
425                field_content.push_str(line.trim());
426                field_content.push('\n');
427            } else if !line.trim().is_empty() {
428                break;
429            }
430        }
431    }
432
433    let trimmed = field_content.trim();
434    if trimmed.is_empty() {
435        None
436    } else {
437        Some(truncate_field(trimmed.to_string()))
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::super::deb::merge_debian_copyright_into_package;
444    use super::super::default_package_data;
445    use super::*;
446    use crate::models::DatasourceId;
447    use crate::models::LineNumber;
448    use std::path::PathBuf;
449
450    #[test]
451    fn test_copyright_parser_is_match() {
452        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
453            "/usr/share/doc/bash/copyright"
454        )));
455        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
456            "debian/copyright"
457        )));
458        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
459            "src/third_party/gperftools/dist/packages/deb/copyright"
460        )));
461        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
462            "ports/zlib/copyright"
463        )));
464        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
465            "copyright.txt"
466        )));
467        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
468            "/etc/copyright"
469        )));
470        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
471            "/tmp/sample_copyright"
472        )));
473    }
474
475    #[test]
476    fn test_detect_debian_copyright_datasource() {
477        assert_eq!(
478            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
479            DatasourceId::DebianCopyrightInSource
480        );
481        assert_eq!(
482            detect_debian_copyright_datasource(&PathBuf::from(
483                "src/third_party/gperftools/dist/packages/deb/copyright"
484            )),
485            DatasourceId::DebianCopyrightStandalone
486        );
487        assert_eq!(
488            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
489            DatasourceId::DebianCopyrightStandalone
490        );
491        assert_eq!(
492            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
493            DatasourceId::DebianCopyrightInPackage
494        );
495        assert_eq!(
496            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
497            DatasourceId::DebianCopyrightStandalone
498        );
499    }
500
501    #[test]
502    fn test_extract_package_name_from_path() {
503        assert_eq!(
504            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
505            Some("bash".to_string())
506        );
507        assert_eq!(
508            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
509            Some("libseccomp2".to_string())
510        );
511        assert_eq!(
512            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
513            None
514        );
515        assert_eq!(
516            extract_standalone_package_name_from_path(
517                &PathBuf::from("ports/zlib/copyright"),
518                DatasourceId::DebianCopyrightStandalone,
519            ),
520            Some("zlib".to_string())
521        );
522    }
523
524    #[test]
525    fn test_parse_copyright_dep5_format() {
526        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
527Upstream-Name: libseccomp
528Source: https://sourceforge.net/projects/libseccomp/
529
530Files: *
531Copyright: 2012 Paul Moore <pmoore@redhat.com>
532 2012 Ashley Lai <adlai@us.ibm.com>
533License: LGPL-2.1
534
535License: LGPL-2.1
536 This library is free software
537";
538        let pkg = parse_copyright_file(content, Some("libseccomp"));
539        assert_eq!(pkg.name, Some("libseccomp".to_string()));
540        assert_eq!(pkg.namespace, Some("debian".to_string()));
541        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
542        assert_eq!(
543            pkg.extracted_license_statement,
544            Some("LGPL-2.1".to_string())
545        );
546        assert!(pkg.parties.len() >= 2);
547        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
548        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
549    }
550
551    #[test]
552    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
553        let path = PathBuf::from(
554            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
555        );
556        let pkg = DebianCopyrightParser::extract_first_package(&path);
557
558        assert_eq!(pkg.name, Some("bsdutils".to_string()));
559        let extracted = pkg
560            .extracted_license_statement
561            .as_deref()
562            .expect("license statement should exist");
563        assert!(extracted.contains("GPL-2+"));
564        assert!(!pkg.license_detections.is_empty());
565
566        let primary = &pkg.license_detections[0];
567        assert_eq!(
568            primary.matches[0].matched_text.as_deref(),
569            Some("License: GPL-2+")
570        );
571        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
572        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
573    }
574
575    #[test]
576    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
577        let path = PathBuf::from("testdata/debian/copyright/copyright");
578        let pkg = DebianCopyrightParser::extract_first_package(&path);
579
580        assert_eq!(pkg.license_detections.len(), 1);
581        assert_eq!(pkg.other_license_detections.len(), 4);
582
583        let primary = &pkg.license_detections[0];
584        assert_eq!(
585            primary.matches[0].matched_text.as_deref(),
586            Some("License: LGPL-2.1")
587        );
588        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
589
590        let ordered_lines: Vec<usize> = pkg
591            .other_license_detections
592            .iter()
593            .map(|detection| detection.matches[0].start_line.get())
594            .collect();
595        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
596
597        let ordered_texts: Vec<&str> = pkg
598            .other_license_detections
599            .iter()
600            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
601            .collect();
602        assert_eq!(
603            ordered_texts,
604            vec![
605                "License: LGPL-2.1",
606                "License: LGPL-2.1",
607                "License: LGPL-2.1",
608                "License: LGPL-2.1",
609            ]
610        );
611    }
612
613    #[test]
614    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
615        let path = PathBuf::from(
616            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
617        );
618        let pkg = DebianCopyrightParser::extract_first_package(&path);
619
620        let zlib = pkg
621            .other_license_detections
622            .iter()
623            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
624            .expect("at least one Zlib license paragraph should be detected");
625        assert_eq!(
626            zlib.matches[0].matched_text.as_deref(),
627            Some("License: Zlib")
628        );
629
630        let last_zlib = pkg
631            .other_license_detections
632            .iter()
633            .rev()
634            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
635            .expect("bottom standalone Zlib license paragraph should be detected");
636        assert_eq!(
637            last_zlib.matches[0].start_line,
638            LineNumber::new(732).unwrap()
639        );
640        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
641    }
642
643    #[test]
644    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
645        let path =
646            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
647        let pkg = DebianCopyrightParser::extract_first_package(&path);
648
649        assert_eq!(pkg.license_detections.len(), 1);
650        let primary = &pkg.license_detections[0];
651        assert_eq!(
652            primary.matches[0].matched_text.as_deref(),
653            Some("License: LGPL-3+ or GPL-2+")
654        );
655        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
656        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
657
658        assert!(pkg.other_license_detections.iter().any(|detection| {
659            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
660        }));
661    }
662
663    #[test]
664    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
665        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
666        let pkg = parse_copyright_file(content, Some("foo"));
667
668        assert_eq!(pkg.license_detections.len(), 1);
669        let primary = &pkg.license_detections[0];
670        assert_eq!(
671            primary.matches[0].matched_text.as_deref(),
672            Some("License: GPL-2+")
673        );
674        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
675    }
676
677    #[test]
678    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
679        let raw_lines = vec![
680            "Files: *".to_string(),
681            "Copyright: 2024 Example Org".to_string(),
682            "License: Apache-2.0".to_string(),
683            " Licensed under the Apache License, Version 2.0.".to_string(),
684        ];
685
686        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
687        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
688            .into_iter()
689            .next()
690            .expect("reference RFC822 paragraph should parse");
691
692        assert_eq!(paragraph.metadata.headers, expected.headers);
693        assert_eq!(paragraph.metadata.body, expected.body);
694        assert_eq!(
695            paragraph.license_header_line,
696            Some(("License: Apache-2.0".to_string(), 12))
697        );
698    }
699
700    #[test]
701    fn test_parse_copyright_unstructured() {
702        let content = "This package was debianized by John Doe.
703
704Upstream Authors:
705    Jane Smith
706
707Copyright:
708    2009 10gen
709
710License:
711    SSPL
712";
713        let pkg = parse_copyright_file(content, Some("mongodb"));
714        assert_eq!(pkg.name, Some("mongodb".to_string()));
715        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
716        assert!(!pkg.parties.is_empty());
717    }
718
719    #[test]
720    fn test_parse_copyright_holders() {
721        let text = "2012 Paul Moore <pmoore@redhat.com>
7222012 Ashley Lai <adlai@us.ibm.com>
723Copyright (C) 2015-2018 Example Corp";
724        let holders = parse_copyright_holders(text);
725        assert!(holders.len() >= 3);
726        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
727        assert!(holders.iter().any(|h| h.contains("Example Corp")));
728    }
729
730    #[test]
731    fn test_parse_copyright_empty() {
732        let content = "This is just some text without proper copyright info.";
733        let pkg = parse_copyright_file(content, Some("test"));
734        assert_eq!(pkg.name, Some("test".to_string()));
735        assert!(pkg.parties.is_empty());
736        assert!(pkg.extracted_license_statement.is_none());
737    }
738
739    #[test]
740    fn test_merge_debian_copyright_into_package_preserves_license_fields() {
741        let copyright = parse_copyright_file(
742            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
743             Upstream-Name: demo\n\n\
744             Files: *\n\
745             Copyright: 2024 Example\n\
746             License: MIT\n\n\
747             Files: debian/*\n\
748             Copyright: 2024 Debian Example\n\
749             License: Apache-2.0\n",
750            Some("demo"),
751        );
752        let mut target = default_package_data(DatasourceId::DebianDeb);
753
754        merge_debian_copyright_into_package(&mut target, &copyright);
755
756        assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
757        assert_eq!(
758            target.declared_license_expression_spdx.as_deref(),
759            Some("MIT")
760        );
761        assert_eq!(
762            target.other_license_expression.as_deref(),
763            Some("apache-2.0")
764        );
765        assert_eq!(
766            target.other_license_expression_spdx.as_deref(),
767            Some("Apache-2.0")
768        );
769        assert_eq!(target.license_detections.len(), 1);
770        assert_eq!(target.other_license_detections.len(), 1);
771    }
772}