Skip to main content

provenant/parsers/debian/
copyright.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::rfc822::{self, Rfc822Metadata};
10use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
11use crate::utils::spdx::combine_license_expressions;
12
13use super::super::metadata::ParserMetadata;
14use super::utils::{build_debian_purl, make_party};
15use super::{PACKAGE_TYPE, default_package_data, read_or_default};
16use crate::parsers::PackageParser;
17use crate::parsers::license_normalization::{
18    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
19    normalize_declared_license_key,
20};
21
22/// Parser for Debian machine-readable copyright files (DEP-5 format)
23pub struct DebianCopyrightParser;
24
25impl PackageParser for DebianCopyrightParser {
26    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
27
28    fn metadata() -> Vec<ParserMetadata> {
29        vec![ParserMetadata {
30            description: "Debian machine-readable copyright file",
31            file_patterns: &[
32                "**/debian/copyright",
33                "**/ports/*/copyright",
34                "**/packages/deb/copyright",
35                "**/usr/share/doc/*/copyright",
36                "**/*_copyright",
37            ],
38            package_type: "deb",
39            primary_language: "",
40            documentation_url: Some(
41                "https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/",
42            ),
43        }]
44    }
45
46    fn is_match(path: &Path) -> bool {
47        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
48            if filename != "copyright" {
49                return filename.ends_with("_copyright");
50            }
51            let path_str = path.to_string_lossy();
52            path_str.contains("/debian/")
53                || path_str.contains("/ports/")
54                || path_str.starts_with("ports/")
55                || path_str.contains("/packages/deb/")
56                || path_str.contains("/usr/share/doc/")
57                || path_str.ends_with("debian/copyright")
58        } else {
59            false
60        }
61    }
62
63    fn extract_packages(path: &Path) -> Vec<PackageData> {
64        let datasource_id = detect_debian_copyright_datasource(path);
65        let content = read_or_default!(path, "copyright file", datasource_id);
66
67        let package_name = extract_package_name_from_path(path)
68            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
69        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
70        package_data.datasource_id = Some(datasource_id);
71        vec![package_data]
72    }
73}
74
75fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
76    let path_str = path.to_string_lossy();
77    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
78        DatasourceId::DebianCopyrightInSource
79    } else if path_str.contains("/usr/share/doc/") {
80        DatasourceId::DebianCopyrightInPackage
81    } else {
82        DatasourceId::DebianCopyrightStandalone
83    }
84}
85
86fn extract_package_name_from_path(path: &Path) -> Option<String> {
87    let components: Vec<_> = path.components().collect();
88
89    for (i, component) in components.iter().enumerate() {
90        if let std::path::Component::Normal(os_str) = component
91            && os_str.to_str() == Some("doc")
92            && i + 1 < components.len()
93            && let std::path::Component::Normal(next) = components[i + 1]
94        {
95            return next.to_str().map(|s| s.to_string());
96        }
97    }
98    None
99}
100
101fn extract_standalone_package_name_from_path(
102    path: &Path,
103    datasource_id: DatasourceId,
104) -> Option<String> {
105    if datasource_id != DatasourceId::DebianCopyrightStandalone {
106        return None;
107    }
108
109    path.file_name()
110        .and_then(|name| name.to_str())
111        .filter(|name| *name == "copyright")?;
112
113    path.parent()
114        .and_then(|parent| parent.file_name())
115        .and_then(|name| name.to_str())
116        .map(str::to_string)
117}
118
119pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
120    let paragraphs = parse_copyright_paragraphs_with_lines(content);
121
122    let is_dep5 = paragraphs
123        .first()
124        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
125        .is_some();
126
127    let namespace = Some("debian".to_string());
128    let mut parties = Vec::new();
129    let mut license_statements = Vec::new();
130    let mut primary_license_detection = None;
131    let mut header_license_detection = None;
132    let mut other_license_detections = Vec::new();
133
134    if is_dep5 {
135        let mut para_count = 0usize;
136        for para in &paragraphs {
137            para_count += 1;
138            if para_count > MAX_ITERATION_COUNT {
139                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
140                break;
141            }
142            if let Some(copyright_text) =
143                rfc822::get_header_first(&para.metadata.headers, "copyright")
144            {
145                for holder in parse_copyright_holders(&copyright_text) {
146                    if !holder.is_empty() {
147                        parties.push(make_party(None, "copyright-holder", Some(holder), None));
148                    }
149                }
150            }
151
152            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
153                let license_name = license.lines().next().unwrap_or(&license).trim();
154                if !license_name.is_empty()
155                    && !license_statements.contains(&license_name.to_string())
156                {
157                    license_statements.push(license_name.to_string());
158                }
159
160                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
161                    let detection =
162                        build_primary_license_detection(license_name, matched_text, line_no);
163                    let is_header_paragraph =
164                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
165                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
166                        == Some("*")
167                    {
168                        primary_license_detection = Some(detection);
169                    } else if is_header_paragraph {
170                        header_license_detection.get_or_insert(detection);
171                    } else {
172                        other_license_detections.push(detection);
173                    }
174                }
175            }
176        }
177
178        if primary_license_detection.is_none() && header_license_detection.is_some() {
179            primary_license_detection = header_license_detection;
180        }
181    } else {
182        let copyright_block = extract_unstructured_field(content, "Copyright:");
183        if let Some(text) = copyright_block {
184            for holder in parse_copyright_holders(&text) {
185                if !holder.is_empty() {
186                    parties.push(make_party(None, "copyright-holder", Some(holder), None));
187                }
188            }
189        }
190
191        let license_block = extract_unstructured_field(content, "License:");
192        if let Some(text) = license_block {
193            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
194        }
195    }
196
197    let extracted_license_statement = if license_statements.is_empty() {
198        None
199    } else {
200        Some(truncate_field(license_statements.join(" AND ")))
201    };
202
203    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
204    let declared_license_expression = license_detections
205        .first()
206        .map(|detection| detection.license_expression.clone());
207    let declared_license_expression_spdx = license_detections
208        .first()
209        .map(|detection| detection.license_expression_spdx.clone());
210    let other_license_expression = combine_license_expressions(
211        other_license_detections
212            .iter()
213            .map(|detection| detection.license_expression.clone()),
214    );
215    let other_license_expression_spdx = combine_license_expressions(
216        other_license_detections
217            .iter()
218            .map(|detection| detection.license_expression_spdx.clone()),
219    );
220
221    PackageData {
222        datasource_id: Some(DatasourceId::DebianCopyright),
223        package_type: Some(PACKAGE_TYPE),
224        namespace: namespace.clone(),
225        name: package_name.map(|s| truncate_field(s.to_string())),
226        parties,
227        declared_license_expression,
228        declared_license_expression_spdx,
229        license_detections,
230        other_license_expression,
231        other_license_expression_spdx,
232        other_license_detections,
233        extracted_license_statement,
234        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
235        ..Default::default()
236    }
237}
238
239#[derive(Debug)]
240struct CopyrightParagraph {
241    metadata: Rfc822Metadata,
242    license_header_line: Option<(String, usize)>,
243}
244
245fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
246    let mut paragraphs = Vec::new();
247    let mut current_lines = Vec::new();
248    let mut current_start_line = 1usize;
249    let mut count = 0usize;
250
251    for (idx, line) in content.lines().enumerate() {
252        count += 1;
253        if count > MAX_ITERATION_COUNT {
254            warn!(
255                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
256            );
257            break;
258        }
259        let line_no = idx + 1;
260        if line.is_empty() {
261            if !current_lines.is_empty() {
262                paragraphs.push(finalize_copyright_paragraph(
263                    std::mem::take(&mut current_lines),
264                    current_start_line,
265                ));
266            }
267            current_start_line = line_no + 1;
268        } else {
269            if current_lines.is_empty() {
270                current_start_line = line_no;
271            }
272            current_lines.push(line.to_string());
273        }
274    }
275
276    if !current_lines.is_empty() {
277        paragraphs.push(finalize_copyright_paragraph(
278            current_lines,
279            current_start_line,
280        ));
281    }
282
283    paragraphs
284}
285
286fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
287    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
288    let mut current_name: Option<String> = None;
289    let mut current_value = String::new();
290    let mut license_header_line = None;
291
292    for (idx, line) in raw_lines.iter().enumerate() {
293        if line.starts_with(' ') || line.starts_with('\t') {
294            if current_name.is_some() {
295                current_value.push('\n');
296                current_value.push_str(line);
297            }
298            continue;
299        }
300
301        if let Some(name) = current_name.take() {
302            add_copyright_header_value(&mut headers, &name, &current_value);
303            current_value.clear();
304        }
305
306        if let Some((name, value)) = line.split_once(':') {
307            let normalized_name = name.trim().to_ascii_lowercase();
308            if normalized_name == "license" && license_header_line.is_none() {
309                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
310            }
311            current_name = Some(normalized_name);
312            current_value = value.trim_start().to_string();
313        }
314    }
315
316    if let Some(name) = current_name.take() {
317        add_copyright_header_value(&mut headers, &name, &current_value);
318    }
319
320    CopyrightParagraph {
321        metadata: Rfc822Metadata {
322            headers,
323            body: String::new(),
324        },
325        license_header_line,
326    }
327}
328
329fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
330    let entry = headers.entry(name.to_string()).or_default();
331    let trimmed = value.trim_end();
332    if !trimmed.is_empty() {
333        entry.push(trimmed.to_string());
334    }
335}
336
337fn build_primary_license_detection(
338    license_name: &str,
339    matched_text: String,
340    line_no: usize,
341) -> LicenseDetection {
342    let normalized = normalize_debian_license_name(license_name);
343    let line = match LineNumber::new(line_no) {
344        Some(l) => l,
345        None => {
346            warn!(
347                "build_primary_license_detection: line number {} out of range, clamping to 1",
348                line_no
349            );
350            LineNumber::new(1).expect("1 is a valid line number")
351        }
352    };
353
354    build_declared_license_detection(
355        &normalized,
356        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
357    )
358}
359
360fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
361    match license_name.trim() {
362        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
363        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
364        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
365        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
366        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
367        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
368        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
369        "public-domain" => {
370            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-scancode-public-domain")
371        }
372        other => normalize_declared_license_key(other)
373            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
374    }
375}
376
377fn parse_copyright_holders(text: &str) -> Vec<String> {
378    let mut holders = Vec::new();
379    let mut count = 0usize;
380
381    for line in text.lines() {
382        count += 1;
383        if count > MAX_ITERATION_COUNT {
384            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
385            break;
386        }
387        let line = line.trim();
388        if line.is_empty() {
389            continue;
390        }
391
392        let cleaned = line
393            .trim_start_matches("Copyright")
394            .trim_start_matches("copyright")
395            .trim_start_matches("(C)")
396            .trim_start_matches("(c)")
397            .trim_start_matches("©")
398            .trim();
399
400        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
401            let without_years = &cleaned[year_end..];
402            let holder = without_years
403                .trim_start_matches(',')
404                .trim_start_matches('-')
405                .trim();
406
407            if !holder.is_empty() && holder.len() > 2 {
408                holders.push(holder.to_string());
409            }
410        }
411    }
412
413    holders
414}
415
416fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
417    let mut in_field = false;
418    let mut field_content = String::new();
419    let mut count = 0usize;
420
421    for line in content.lines() {
422        count += 1;
423        if count > MAX_ITERATION_COUNT {
424            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
425            break;
426        }
427        if line.starts_with(field_name) {
428            in_field = true;
429            field_content.push_str(line.trim_start_matches(field_name).trim());
430            field_content.push('\n');
431        } else if in_field {
432            if line.starts_with(char::is_whitespace) {
433                field_content.push_str(line.trim());
434                field_content.push('\n');
435            } else if !line.trim().is_empty() {
436                break;
437            }
438        }
439    }
440
441    let trimmed = field_content.trim();
442    if trimmed.is_empty() {
443        None
444    } else {
445        Some(truncate_field(trimmed.to_string()))
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    use super::super::deb::merge_debian_copyright_into_package;
452    use super::super::default_package_data;
453    use super::*;
454    use crate::models::DatasourceId;
455    use crate::models::LineNumber;
456    use std::path::PathBuf;
457
458    #[test]
459    fn test_copyright_parser_is_match() {
460        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
461            "/usr/share/doc/bash/copyright"
462        )));
463        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
464            "debian/copyright"
465        )));
466        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
467            "src/third_party/gperftools/dist/packages/deb/copyright"
468        )));
469        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
470            "ports/zlib/copyright"
471        )));
472        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
473            "copyright.txt"
474        )));
475        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
476            "/etc/copyright"
477        )));
478        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
479            "/tmp/sample_copyright"
480        )));
481    }
482
483    #[test]
484    fn test_detect_debian_copyright_datasource() {
485        assert_eq!(
486            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
487            DatasourceId::DebianCopyrightInSource
488        );
489        assert_eq!(
490            detect_debian_copyright_datasource(&PathBuf::from(
491                "src/third_party/gperftools/dist/packages/deb/copyright"
492            )),
493            DatasourceId::DebianCopyrightStandalone
494        );
495        assert_eq!(
496            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
497            DatasourceId::DebianCopyrightStandalone
498        );
499        assert_eq!(
500            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
501            DatasourceId::DebianCopyrightInPackage
502        );
503        assert_eq!(
504            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
505            DatasourceId::DebianCopyrightStandalone
506        );
507    }
508
509    #[test]
510    fn test_extract_package_name_from_path() {
511        assert_eq!(
512            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
513            Some("bash".to_string())
514        );
515        assert_eq!(
516            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
517            Some("libseccomp2".to_string())
518        );
519        assert_eq!(
520            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
521            None
522        );
523        assert_eq!(
524            extract_standalone_package_name_from_path(
525                &PathBuf::from("ports/zlib/copyright"),
526                DatasourceId::DebianCopyrightStandalone,
527            ),
528            Some("zlib".to_string())
529        );
530    }
531
532    #[test]
533    fn test_parse_copyright_dep5_format() {
534        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
535Upstream-Name: libseccomp
536Source: https://sourceforge.net/projects/libseccomp/
537
538Files: *
539Copyright: 2012 Paul Moore <pmoore@redhat.com>
540 2012 Ashley Lai <adlai@us.ibm.com>
541License: LGPL-2.1
542
543License: LGPL-2.1
544 This library is free software
545";
546        let pkg = parse_copyright_file(content, Some("libseccomp"));
547        assert_eq!(pkg.name, Some("libseccomp".to_string()));
548        assert_eq!(pkg.namespace, Some("debian".to_string()));
549        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
550        assert_eq!(
551            pkg.extracted_license_statement,
552            Some("LGPL-2.1".to_string())
553        );
554        assert!(pkg.parties.len() >= 2);
555        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
556        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
557    }
558
559    #[test]
560    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
561        let path = PathBuf::from(
562            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
563        );
564        let pkg = DebianCopyrightParser::extract_first_package(&path);
565
566        assert_eq!(pkg.name, Some("bsdutils".to_string()));
567        let extracted = pkg
568            .extracted_license_statement
569            .as_deref()
570            .expect("license statement should exist");
571        assert!(extracted.contains("GPL-2+"));
572        assert!(!pkg.license_detections.is_empty());
573
574        let primary = &pkg.license_detections[0];
575        assert_eq!(
576            primary.matches[0].matched_text.as_deref(),
577            Some("License: GPL-2+")
578        );
579        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
580        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
581    }
582
583    #[test]
584    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
585        let path = PathBuf::from("testdata/debian/copyright/copyright");
586        let pkg = DebianCopyrightParser::extract_first_package(&path);
587
588        assert_eq!(pkg.license_detections.len(), 1);
589        assert_eq!(pkg.other_license_detections.len(), 4);
590
591        let primary = &pkg.license_detections[0];
592        assert_eq!(
593            primary.matches[0].matched_text.as_deref(),
594            Some("License: LGPL-2.1")
595        );
596        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
597
598        let ordered_lines: Vec<usize> = pkg
599            .other_license_detections
600            .iter()
601            .map(|detection| detection.matches[0].start_line.get())
602            .collect();
603        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
604
605        let ordered_texts: Vec<&str> = pkg
606            .other_license_detections
607            .iter()
608            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
609            .collect();
610        assert_eq!(
611            ordered_texts,
612            vec![
613                "License: LGPL-2.1",
614                "License: LGPL-2.1",
615                "License: LGPL-2.1",
616                "License: LGPL-2.1",
617            ]
618        );
619    }
620
621    #[test]
622    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
623        let path = PathBuf::from(
624            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
625        );
626        let pkg = DebianCopyrightParser::extract_first_package(&path);
627
628        let zlib = pkg
629            .other_license_detections
630            .iter()
631            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
632            .expect("at least one Zlib license paragraph should be detected");
633        assert_eq!(
634            zlib.matches[0].matched_text.as_deref(),
635            Some("License: Zlib")
636        );
637
638        let last_zlib = pkg
639            .other_license_detections
640            .iter()
641            .rev()
642            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
643            .expect("bottom standalone Zlib license paragraph should be detected");
644        assert_eq!(
645            last_zlib.matches[0].start_line,
646            LineNumber::new(732).unwrap()
647        );
648        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
649    }
650
651    #[test]
652    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
653        let path =
654            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
655        let pkg = DebianCopyrightParser::extract_first_package(&path);
656
657        assert_eq!(pkg.license_detections.len(), 1);
658        let primary = &pkg.license_detections[0];
659        assert_eq!(
660            primary.matches[0].matched_text.as_deref(),
661            Some("License: LGPL-3+ or GPL-2+")
662        );
663        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
664        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
665
666        assert!(pkg.other_license_detections.iter().any(|detection| {
667            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
668        }));
669    }
670
671    #[test]
672    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
673        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
674        let pkg = parse_copyright_file(content, Some("foo"));
675
676        assert_eq!(pkg.license_detections.len(), 1);
677        let primary = &pkg.license_detections[0];
678        assert_eq!(
679            primary.matches[0].matched_text.as_deref(),
680            Some("License: GPL-2+")
681        );
682        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
683    }
684
685    #[test]
686    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
687        let raw_lines = vec![
688            "Files: *".to_string(),
689            "Copyright: 2024 Example Org".to_string(),
690            "License: Apache-2.0".to_string(),
691            " Licensed under the Apache License, Version 2.0.".to_string(),
692        ];
693
694        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
695        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
696            .into_iter()
697            .next()
698            .expect("reference RFC822 paragraph should parse");
699
700        assert_eq!(paragraph.metadata.headers, expected.headers);
701        assert_eq!(paragraph.metadata.body, expected.body);
702        assert_eq!(
703            paragraph.license_header_line,
704            Some(("License: Apache-2.0".to_string(), 12))
705        );
706    }
707
708    #[test]
709    fn test_parse_copyright_unstructured() {
710        let content = "This package was debianized by John Doe.
711
712Upstream Authors:
713    Jane Smith
714
715Copyright:
716    2009 10gen
717
718License:
719    SSPL
720";
721        let pkg = parse_copyright_file(content, Some("mongodb"));
722        assert_eq!(pkg.name, Some("mongodb".to_string()));
723        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
724        assert!(!pkg.parties.is_empty());
725    }
726
727    #[test]
728    fn test_parse_copyright_holders() {
729        let text = "2012 Paul Moore <pmoore@redhat.com>
7302012 Ashley Lai <adlai@us.ibm.com>
731Copyright (C) 2015-2018 Example Corp";
732        let holders = parse_copyright_holders(text);
733        assert!(holders.len() >= 3);
734        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
735        assert!(holders.iter().any(|h| h.contains("Example Corp")));
736    }
737
738    #[test]
739    fn test_parse_copyright_empty() {
740        let content = "This is just some text without proper copyright info.";
741        let pkg = parse_copyright_file(content, Some("test"));
742        assert_eq!(pkg.name, Some("test".to_string()));
743        assert!(pkg.parties.is_empty());
744        assert!(pkg.extracted_license_statement.is_none());
745    }
746
747    #[test]
748    fn test_merge_debian_copyright_into_package_preserves_license_fields() {
749        let copyright = parse_copyright_file(
750            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
751             Upstream-Name: demo\n\n\
752             Files: *\n\
753             Copyright: 2024 Example\n\
754             License: MIT\n\n\
755             Files: debian/*\n\
756             Copyright: 2024 Debian Example\n\
757             License: Apache-2.0\n",
758            Some("demo"),
759        );
760        let mut target = default_package_data(DatasourceId::DebianDeb);
761
762        merge_debian_copyright_into_package(&mut target, &copyright);
763
764        assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
765        assert_eq!(
766            target.declared_license_expression_spdx.as_deref(),
767            Some("MIT")
768        );
769        assert_eq!(
770            target.other_license_expression.as_deref(),
771            Some("apache-2.0")
772        );
773        assert_eq!(
774            target.other_license_expression_spdx.as_deref(),
775            Some("Apache-2.0")
776        );
777        assert_eq!(target.license_detections.len(), 1);
778        assert_eq!(target.other_license_detections.len(), 1);
779    }
780
781    #[test]
782    fn test_normalize_debian_public_domain_uses_scancode_license_ref() {
783        let normalized = normalize_debian_license_name("public-domain");
784
785        assert_eq!(normalized.declared_license_expression, "public-domain");
786        assert_eq!(
787            normalized.declared_license_expression_spdx,
788            "LicenseRef-scancode-public-domain"
789        );
790    }
791}