Skip to main content

provenant/parsers/debian/
copyright.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::rfc822::{self, Rfc822Metadata};
10use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
11use crate::utils::spdx::combine_license_expressions;
12
13use super::utils::{build_debian_purl, make_party};
14use super::{PACKAGE_TYPE, default_package_data, read_or_default};
15use crate::parsers::PackageParser;
16use crate::parsers::license_normalization::{
17    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
18    normalize_declared_license_key,
19};
20
21/// Parser for Debian machine-readable copyright files (DEP-5 format)
22pub struct DebianCopyrightParser;
23
24impl PackageParser for DebianCopyrightParser {
25    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
26
27    fn is_match(path: &Path) -> bool {
28        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
29            if filename != "copyright" {
30                return filename.ends_with("_copyright");
31            }
32            let path_str = path.to_string_lossy();
33            path_str.contains("/debian/")
34                || path_str.contains("/ports/")
35                || path_str.starts_with("ports/")
36                || path_str.contains("/packages/deb/")
37                || path_str.contains("/usr/share/doc/")
38                || path_str.ends_with("debian/copyright")
39        } else {
40            false
41        }
42    }
43
44    fn extract_packages(path: &Path) -> Vec<PackageData> {
45        let datasource_id = detect_debian_copyright_datasource(path);
46        let content = read_or_default!(path, "copyright file", datasource_id);
47
48        let package_name = extract_package_name_from_path(path)
49            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
50        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
51        package_data.datasource_id = Some(datasource_id);
52        vec![package_data]
53    }
54}
55
56crate::register_parser!(
57    "Debian machine-readable copyright file",
58    &[
59        "**/debian/copyright",
60        "**/ports/*/copyright",
61        "**/packages/deb/copyright",
62        "**/usr/share/doc/*/copyright",
63        "**/*_copyright"
64    ],
65    "deb",
66    "",
67    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
68);
69
70fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
71    let path_str = path.to_string_lossy();
72    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
73        DatasourceId::DebianCopyrightInSource
74    } else if path_str.contains("/usr/share/doc/") {
75        DatasourceId::DebianCopyrightInPackage
76    } else {
77        DatasourceId::DebianCopyrightStandalone
78    }
79}
80
81fn extract_package_name_from_path(path: &Path) -> Option<String> {
82    let components: Vec<_> = path.components().collect();
83
84    for (i, component) in components.iter().enumerate() {
85        if let std::path::Component::Normal(os_str) = component
86            && os_str.to_str() == Some("doc")
87            && i + 1 < components.len()
88            && let std::path::Component::Normal(next) = components[i + 1]
89        {
90            return next.to_str().map(|s| s.to_string());
91        }
92    }
93    None
94}
95
96fn extract_standalone_package_name_from_path(
97    path: &Path,
98    datasource_id: DatasourceId,
99) -> Option<String> {
100    if datasource_id != DatasourceId::DebianCopyrightStandalone {
101        return None;
102    }
103
104    path.file_name()
105        .and_then(|name| name.to_str())
106        .filter(|name| *name == "copyright")?;
107
108    path.parent()
109        .and_then(|parent| parent.file_name())
110        .and_then(|name| name.to_str())
111        .map(str::to_string)
112}
113
114pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
115    let paragraphs = parse_copyright_paragraphs_with_lines(content);
116
117    let is_dep5 = paragraphs
118        .first()
119        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
120        .is_some();
121
122    let namespace = Some("debian".to_string());
123    let mut parties = Vec::new();
124    let mut license_statements = Vec::new();
125    let mut primary_license_detection = None;
126    let mut header_license_detection = None;
127    let mut other_license_detections = Vec::new();
128
129    if is_dep5 {
130        let mut para_count = 0usize;
131        for para in &paragraphs {
132            para_count += 1;
133            if para_count > MAX_ITERATION_COUNT {
134                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
135                break;
136            }
137            if let Some(copyright_text) =
138                rfc822::get_header_first(&para.metadata.headers, "copyright")
139            {
140                for holder in parse_copyright_holders(&copyright_text) {
141                    if !holder.is_empty() {
142                        parties.push(make_party(None, "copyright-holder", Some(holder), None));
143                    }
144                }
145            }
146
147            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
148                let license_name = license.lines().next().unwrap_or(&license).trim();
149                if !license_name.is_empty()
150                    && !license_statements.contains(&license_name.to_string())
151                {
152                    license_statements.push(license_name.to_string());
153                }
154
155                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
156                    let detection =
157                        build_primary_license_detection(license_name, matched_text, line_no);
158                    let is_header_paragraph =
159                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
160                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
161                        == Some("*")
162                    {
163                        primary_license_detection = Some(detection);
164                    } else if is_header_paragraph {
165                        header_license_detection.get_or_insert(detection);
166                    } else {
167                        other_license_detections.push(detection);
168                    }
169                }
170            }
171        }
172
173        if primary_license_detection.is_none() && header_license_detection.is_some() {
174            primary_license_detection = header_license_detection;
175        }
176    } else {
177        let copyright_block = extract_unstructured_field(content, "Copyright:");
178        if let Some(text) = copyright_block {
179            for holder in parse_copyright_holders(&text) {
180                if !holder.is_empty() {
181                    parties.push(make_party(None, "copyright-holder", Some(holder), None));
182                }
183            }
184        }
185
186        let license_block = extract_unstructured_field(content, "License:");
187        if let Some(text) = license_block {
188            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
189        }
190    }
191
192    let extracted_license_statement = if license_statements.is_empty() {
193        None
194    } else {
195        Some(truncate_field(license_statements.join(" AND ")))
196    };
197
198    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
199    let declared_license_expression = license_detections
200        .first()
201        .map(|detection| detection.license_expression.clone());
202    let declared_license_expression_spdx = license_detections
203        .first()
204        .map(|detection| detection.license_expression_spdx.clone());
205    let other_license_expression = combine_license_expressions(
206        other_license_detections
207            .iter()
208            .map(|detection| detection.license_expression.clone()),
209    );
210    let other_license_expression_spdx = combine_license_expressions(
211        other_license_detections
212            .iter()
213            .map(|detection| detection.license_expression_spdx.clone()),
214    );
215
216    PackageData {
217        datasource_id: Some(DatasourceId::DebianCopyright),
218        package_type: Some(PACKAGE_TYPE),
219        namespace: namespace.clone(),
220        name: package_name.map(|s| truncate_field(s.to_string())),
221        parties,
222        declared_license_expression,
223        declared_license_expression_spdx,
224        license_detections,
225        other_license_expression,
226        other_license_expression_spdx,
227        other_license_detections,
228        extracted_license_statement,
229        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
230        ..Default::default()
231    }
232}
233
234#[derive(Debug)]
235struct CopyrightParagraph {
236    metadata: Rfc822Metadata,
237    license_header_line: Option<(String, usize)>,
238}
239
240fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
241    let mut paragraphs = Vec::new();
242    let mut current_lines = Vec::new();
243    let mut current_start_line = 1usize;
244    let mut count = 0usize;
245
246    for (idx, line) in content.lines().enumerate() {
247        count += 1;
248        if count > MAX_ITERATION_COUNT {
249            warn!(
250                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
251            );
252            break;
253        }
254        let line_no = idx + 1;
255        if line.is_empty() {
256            if !current_lines.is_empty() {
257                paragraphs.push(finalize_copyright_paragraph(
258                    std::mem::take(&mut current_lines),
259                    current_start_line,
260                ));
261            }
262            current_start_line = line_no + 1;
263        } else {
264            if current_lines.is_empty() {
265                current_start_line = line_no;
266            }
267            current_lines.push(line.to_string());
268        }
269    }
270
271    if !current_lines.is_empty() {
272        paragraphs.push(finalize_copyright_paragraph(
273            current_lines,
274            current_start_line,
275        ));
276    }
277
278    paragraphs
279}
280
281fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
282    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
283    let mut current_name: Option<String> = None;
284    let mut current_value = String::new();
285    let mut license_header_line = None;
286
287    for (idx, line) in raw_lines.iter().enumerate() {
288        if line.starts_with(' ') || line.starts_with('\t') {
289            if current_name.is_some() {
290                current_value.push('\n');
291                current_value.push_str(line);
292            }
293            continue;
294        }
295
296        if let Some(name) = current_name.take() {
297            add_copyright_header_value(&mut headers, &name, &current_value);
298            current_value.clear();
299        }
300
301        if let Some((name, value)) = line.split_once(':') {
302            let normalized_name = name.trim().to_ascii_lowercase();
303            if normalized_name == "license" && license_header_line.is_none() {
304                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
305            }
306            current_name = Some(normalized_name);
307            current_value = value.trim_start().to_string();
308        }
309    }
310
311    if let Some(name) = current_name.take() {
312        add_copyright_header_value(&mut headers, &name, &current_value);
313    }
314
315    CopyrightParagraph {
316        metadata: Rfc822Metadata {
317            headers,
318            body: String::new(),
319        },
320        license_header_line,
321    }
322}
323
324fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
325    let entry = headers.entry(name.to_string()).or_default();
326    let trimmed = value.trim_end();
327    if !trimmed.is_empty() {
328        entry.push(trimmed.to_string());
329    }
330}
331
332fn build_primary_license_detection(
333    license_name: &str,
334    matched_text: String,
335    line_no: usize,
336) -> LicenseDetection {
337    let normalized = normalize_debian_license_name(license_name);
338    let line = match LineNumber::new(line_no) {
339        Some(l) => l,
340        None => {
341            warn!(
342                "build_primary_license_detection: line number {} out of range, clamping to 1",
343                line_no
344            );
345            LineNumber::new(1).expect("1 is a valid line number")
346        }
347    };
348
349    build_declared_license_detection(
350        &normalized,
351        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
352    )
353}
354
355fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
356    match license_name.trim() {
357        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
358        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
359        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
360        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
361        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
362        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
363        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
364        "public-domain" => {
365            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-scancode-public-domain")
366        }
367        other => normalize_declared_license_key(other)
368            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
369    }
370}
371
372fn parse_copyright_holders(text: &str) -> Vec<String> {
373    let mut holders = Vec::new();
374    let mut count = 0usize;
375
376    for line in text.lines() {
377        count += 1;
378        if count > MAX_ITERATION_COUNT {
379            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
380            break;
381        }
382        let line = line.trim();
383        if line.is_empty() {
384            continue;
385        }
386
387        let cleaned = line
388            .trim_start_matches("Copyright")
389            .trim_start_matches("copyright")
390            .trim_start_matches("(C)")
391            .trim_start_matches("(c)")
392            .trim_start_matches("©")
393            .trim();
394
395        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
396            let without_years = &cleaned[year_end..];
397            let holder = without_years
398                .trim_start_matches(',')
399                .trim_start_matches('-')
400                .trim();
401
402            if !holder.is_empty() && holder.len() > 2 {
403                holders.push(holder.to_string());
404            }
405        }
406    }
407
408    holders
409}
410
411fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
412    let mut in_field = false;
413    let mut field_content = String::new();
414    let mut count = 0usize;
415
416    for line in content.lines() {
417        count += 1;
418        if count > MAX_ITERATION_COUNT {
419            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
420            break;
421        }
422        if line.starts_with(field_name) {
423            in_field = true;
424            field_content.push_str(line.trim_start_matches(field_name).trim());
425            field_content.push('\n');
426        } else if in_field {
427            if line.starts_with(char::is_whitespace) {
428                field_content.push_str(line.trim());
429                field_content.push('\n');
430            } else if !line.trim().is_empty() {
431                break;
432            }
433        }
434    }
435
436    let trimmed = field_content.trim();
437    if trimmed.is_empty() {
438        None
439    } else {
440        Some(truncate_field(trimmed.to_string()))
441    }
442}
443
444#[cfg(test)]
445mod tests {
446    use super::super::deb::merge_debian_copyright_into_package;
447    use super::super::default_package_data;
448    use super::*;
449    use crate::models::DatasourceId;
450    use crate::models::LineNumber;
451    use std::path::PathBuf;
452
453    #[test]
454    fn test_copyright_parser_is_match() {
455        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
456            "/usr/share/doc/bash/copyright"
457        )));
458        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
459            "debian/copyright"
460        )));
461        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
462            "src/third_party/gperftools/dist/packages/deb/copyright"
463        )));
464        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
465            "ports/zlib/copyright"
466        )));
467        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
468            "copyright.txt"
469        )));
470        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
471            "/etc/copyright"
472        )));
473        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
474            "/tmp/sample_copyright"
475        )));
476    }
477
478    #[test]
479    fn test_detect_debian_copyright_datasource() {
480        assert_eq!(
481            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
482            DatasourceId::DebianCopyrightInSource
483        );
484        assert_eq!(
485            detect_debian_copyright_datasource(&PathBuf::from(
486                "src/third_party/gperftools/dist/packages/deb/copyright"
487            )),
488            DatasourceId::DebianCopyrightStandalone
489        );
490        assert_eq!(
491            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
492            DatasourceId::DebianCopyrightStandalone
493        );
494        assert_eq!(
495            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
496            DatasourceId::DebianCopyrightInPackage
497        );
498        assert_eq!(
499            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
500            DatasourceId::DebianCopyrightStandalone
501        );
502    }
503
504    #[test]
505    fn test_extract_package_name_from_path() {
506        assert_eq!(
507            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
508            Some("bash".to_string())
509        );
510        assert_eq!(
511            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
512            Some("libseccomp2".to_string())
513        );
514        assert_eq!(
515            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
516            None
517        );
518        assert_eq!(
519            extract_standalone_package_name_from_path(
520                &PathBuf::from("ports/zlib/copyright"),
521                DatasourceId::DebianCopyrightStandalone,
522            ),
523            Some("zlib".to_string())
524        );
525    }
526
527    #[test]
528    fn test_parse_copyright_dep5_format() {
529        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
530Upstream-Name: libseccomp
531Source: https://sourceforge.net/projects/libseccomp/
532
533Files: *
534Copyright: 2012 Paul Moore <pmoore@redhat.com>
535 2012 Ashley Lai <adlai@us.ibm.com>
536License: LGPL-2.1
537
538License: LGPL-2.1
539 This library is free software
540";
541        let pkg = parse_copyright_file(content, Some("libseccomp"));
542        assert_eq!(pkg.name, Some("libseccomp".to_string()));
543        assert_eq!(pkg.namespace, Some("debian".to_string()));
544        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
545        assert_eq!(
546            pkg.extracted_license_statement,
547            Some("LGPL-2.1".to_string())
548        );
549        assert!(pkg.parties.len() >= 2);
550        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
551        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
552    }
553
554    #[test]
555    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
556        let path = PathBuf::from(
557            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
558        );
559        let pkg = DebianCopyrightParser::extract_first_package(&path);
560
561        assert_eq!(pkg.name, Some("bsdutils".to_string()));
562        let extracted = pkg
563            .extracted_license_statement
564            .as_deref()
565            .expect("license statement should exist");
566        assert!(extracted.contains("GPL-2+"));
567        assert!(!pkg.license_detections.is_empty());
568
569        let primary = &pkg.license_detections[0];
570        assert_eq!(
571            primary.matches[0].matched_text.as_deref(),
572            Some("License: GPL-2+")
573        );
574        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
575        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
576    }
577
578    #[test]
579    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
580        let path = PathBuf::from("testdata/debian/copyright/copyright");
581        let pkg = DebianCopyrightParser::extract_first_package(&path);
582
583        assert_eq!(pkg.license_detections.len(), 1);
584        assert_eq!(pkg.other_license_detections.len(), 4);
585
586        let primary = &pkg.license_detections[0];
587        assert_eq!(
588            primary.matches[0].matched_text.as_deref(),
589            Some("License: LGPL-2.1")
590        );
591        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
592
593        let ordered_lines: Vec<usize> = pkg
594            .other_license_detections
595            .iter()
596            .map(|detection| detection.matches[0].start_line.get())
597            .collect();
598        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
599
600        let ordered_texts: Vec<&str> = pkg
601            .other_license_detections
602            .iter()
603            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
604            .collect();
605        assert_eq!(
606            ordered_texts,
607            vec![
608                "License: LGPL-2.1",
609                "License: LGPL-2.1",
610                "License: LGPL-2.1",
611                "License: LGPL-2.1",
612            ]
613        );
614    }
615
616    #[test]
617    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
618        let path = PathBuf::from(
619            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
620        );
621        let pkg = DebianCopyrightParser::extract_first_package(&path);
622
623        let zlib = pkg
624            .other_license_detections
625            .iter()
626            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
627            .expect("at least one Zlib license paragraph should be detected");
628        assert_eq!(
629            zlib.matches[0].matched_text.as_deref(),
630            Some("License: Zlib")
631        );
632
633        let last_zlib = pkg
634            .other_license_detections
635            .iter()
636            .rev()
637            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
638            .expect("bottom standalone Zlib license paragraph should be detected");
639        assert_eq!(
640            last_zlib.matches[0].start_line,
641            LineNumber::new(732).unwrap()
642        );
643        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
644    }
645
646    #[test]
647    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
648        let path =
649            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
650        let pkg = DebianCopyrightParser::extract_first_package(&path);
651
652        assert_eq!(pkg.license_detections.len(), 1);
653        let primary = &pkg.license_detections[0];
654        assert_eq!(
655            primary.matches[0].matched_text.as_deref(),
656            Some("License: LGPL-3+ or GPL-2+")
657        );
658        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
659        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
660
661        assert!(pkg.other_license_detections.iter().any(|detection| {
662            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
663        }));
664    }
665
666    #[test]
667    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
668        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
669        let pkg = parse_copyright_file(content, Some("foo"));
670
671        assert_eq!(pkg.license_detections.len(), 1);
672        let primary = &pkg.license_detections[0];
673        assert_eq!(
674            primary.matches[0].matched_text.as_deref(),
675            Some("License: GPL-2+")
676        );
677        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
678    }
679
680    #[test]
681    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
682        let raw_lines = vec![
683            "Files: *".to_string(),
684            "Copyright: 2024 Example Org".to_string(),
685            "License: Apache-2.0".to_string(),
686            " Licensed under the Apache License, Version 2.0.".to_string(),
687        ];
688
689        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
690        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
691            .into_iter()
692            .next()
693            .expect("reference RFC822 paragraph should parse");
694
695        assert_eq!(paragraph.metadata.headers, expected.headers);
696        assert_eq!(paragraph.metadata.body, expected.body);
697        assert_eq!(
698            paragraph.license_header_line,
699            Some(("License: Apache-2.0".to_string(), 12))
700        );
701    }
702
703    #[test]
704    fn test_parse_copyright_unstructured() {
705        let content = "This package was debianized by John Doe.
706
707Upstream Authors:
708    Jane Smith
709
710Copyright:
711    2009 10gen
712
713License:
714    SSPL
715";
716        let pkg = parse_copyright_file(content, Some("mongodb"));
717        assert_eq!(pkg.name, Some("mongodb".to_string()));
718        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
719        assert!(!pkg.parties.is_empty());
720    }
721
722    #[test]
723    fn test_parse_copyright_holders() {
724        let text = "2012 Paul Moore <pmoore@redhat.com>
7252012 Ashley Lai <adlai@us.ibm.com>
726Copyright (C) 2015-2018 Example Corp";
727        let holders = parse_copyright_holders(text);
728        assert!(holders.len() >= 3);
729        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
730        assert!(holders.iter().any(|h| h.contains("Example Corp")));
731    }
732
733    #[test]
734    fn test_parse_copyright_empty() {
735        let content = "This is just some text without proper copyright info.";
736        let pkg = parse_copyright_file(content, Some("test"));
737        assert_eq!(pkg.name, Some("test".to_string()));
738        assert!(pkg.parties.is_empty());
739        assert!(pkg.extracted_license_statement.is_none());
740    }
741
742    #[test]
743    fn test_merge_debian_copyright_into_package_preserves_license_fields() {
744        let copyright = parse_copyright_file(
745            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
746             Upstream-Name: demo\n\n\
747             Files: *\n\
748             Copyright: 2024 Example\n\
749             License: MIT\n\n\
750             Files: debian/*\n\
751             Copyright: 2024 Debian Example\n\
752             License: Apache-2.0\n",
753            Some("demo"),
754        );
755        let mut target = default_package_data(DatasourceId::DebianDeb);
756
757        merge_debian_copyright_into_package(&mut target, &copyright);
758
759        assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
760        assert_eq!(
761            target.declared_license_expression_spdx.as_deref(),
762            Some("MIT")
763        );
764        assert_eq!(
765            target.other_license_expression.as_deref(),
766            Some("apache-2.0")
767        );
768        assert_eq!(
769            target.other_license_expression_spdx.as_deref(),
770            Some("Apache-2.0")
771        );
772        assert_eq!(target.license_detections.len(), 1);
773        assert_eq!(target.other_license_detections.len(), 1);
774    }
775
776    #[test]
777    fn test_normalize_debian_public_domain_uses_scancode_license_ref() {
778        let normalized = normalize_debian_license_name("public-domain");
779
780        assert_eq!(normalized.declared_license_expression, "public-domain");
781        assert_eq!(
782            normalized.declared_license_expression_spdx,
783            "LicenseRef-scancode-public-domain"
784        );
785    }
786}