Skip to main content

provenant/parsers/debian/
copyright.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::models::{DatasourceId, LicenseDetection, LineNumber, PackageData, PackageType, Party};
5use crate::parser_warn as warn;
6use crate::parsers::rfc822::{self, Rfc822Metadata};
7use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
8use crate::utils::spdx::combine_license_expressions;
9
10use super::utils::build_debian_purl;
11use super::{PACKAGE_TYPE, default_package_data};
12use crate::parsers::PackageParser;
13use crate::parsers::license_normalization::{
14    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
15    normalize_declared_license_key,
16};
17
18/// Parser for Debian machine-readable copyright files (DEP-5 format)
19pub struct DebianCopyrightParser;
20
21impl PackageParser for DebianCopyrightParser {
22    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
23
24    fn is_match(path: &Path) -> bool {
25        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
26            if filename != "copyright" {
27                return filename.ends_with("_copyright");
28            }
29            let path_str = path.to_string_lossy();
30            path_str.contains("/debian/")
31                || path_str.contains("/ports/")
32                || path_str.starts_with("ports/")
33                || path_str.contains("/packages/deb/")
34                || path_str.contains("/usr/share/doc/")
35                || path_str.ends_with("debian/copyright")
36        } else {
37            false
38        }
39    }
40
41    fn extract_packages(path: &Path) -> Vec<PackageData> {
42        let datasource_id = detect_debian_copyright_datasource(path);
43        let content = match read_file_to_string(path, None) {
44            Ok(c) => c,
45            Err(e) => {
46                warn!("Failed to read copyright file {:?}: {}", path, e);
47                return vec![default_package_data(datasource_id)];
48            }
49        };
50
51        let package_name = extract_package_name_from_path(path)
52            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
53        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
54        package_data.datasource_id = Some(datasource_id);
55        vec![package_data]
56    }
57}
58
59crate::register_parser!(
60    "Debian machine-readable copyright file",
61    &[
62        "**/debian/copyright",
63        "**/ports/*/copyright",
64        "**/packages/deb/copyright",
65        "**/usr/share/doc/*/copyright",
66        "**/*_copyright"
67    ],
68    "deb",
69    "",
70    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
71);
72
73fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
74    let path_str = path.to_string_lossy();
75    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
76        DatasourceId::DebianCopyrightInSource
77    } else if path_str.contains("/usr/share/doc/") {
78        DatasourceId::DebianCopyrightInPackage
79    } else {
80        DatasourceId::DebianCopyrightStandalone
81    }
82}
83
84fn extract_package_name_from_path(path: &Path) -> Option<String> {
85    let components: Vec<_> = path.components().collect();
86
87    for (i, component) in components.iter().enumerate() {
88        if let std::path::Component::Normal(os_str) = component
89            && os_str.to_str() == Some("doc")
90            && i + 1 < components.len()
91            && let std::path::Component::Normal(next) = components[i + 1]
92        {
93            return next.to_str().map(|s| s.to_string());
94        }
95    }
96    None
97}
98
99fn extract_standalone_package_name_from_path(
100    path: &Path,
101    datasource_id: DatasourceId,
102) -> Option<String> {
103    if datasource_id != DatasourceId::DebianCopyrightStandalone {
104        return None;
105    }
106
107    path.file_name()
108        .and_then(|name| name.to_str())
109        .filter(|name| *name == "copyright")?;
110
111    path.parent()
112        .and_then(|parent| parent.file_name())
113        .and_then(|name| name.to_str())
114        .map(str::to_string)
115}
116
117pub(super) fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
118    let paragraphs = parse_copyright_paragraphs_with_lines(content);
119
120    let is_dep5 = paragraphs
121        .first()
122        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
123        .is_some();
124
125    let namespace = Some("debian".to_string());
126    let mut parties = Vec::new();
127    let mut license_statements = Vec::new();
128    let mut primary_license_detection = None;
129    let mut header_license_detection = None;
130    let mut other_license_detections = Vec::new();
131
132    if is_dep5 {
133        let mut para_count = 0usize;
134        for para in &paragraphs {
135            para_count += 1;
136            if para_count > MAX_ITERATION_COUNT {
137                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
138                break;
139            }
140            if let Some(copyright_text) =
141                rfc822::get_header_first(&para.metadata.headers, "copyright")
142            {
143                for holder in parse_copyright_holders(&copyright_text) {
144                    if !holder.is_empty() {
145                        parties.push(Party {
146                            r#type: None,
147                            role: Some("copyright-holder".to_string()),
148                            name: Some(holder),
149                            email: None,
150                            url: None,
151                            organization: None,
152                            organization_url: None,
153                            timezone: None,
154                        });
155                    }
156                }
157            }
158
159            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
160                let license_name = license.lines().next().unwrap_or(&license).trim();
161                if !license_name.is_empty()
162                    && !license_statements.contains(&license_name.to_string())
163                {
164                    license_statements.push(license_name.to_string());
165                }
166
167                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
168                    let detection =
169                        build_primary_license_detection(license_name, matched_text, line_no);
170                    let is_header_paragraph =
171                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
172                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
173                        == Some("*")
174                    {
175                        primary_license_detection = Some(detection);
176                    } else if is_header_paragraph {
177                        header_license_detection.get_or_insert(detection);
178                    } else {
179                        other_license_detections.push(detection);
180                    }
181                }
182            }
183        }
184
185        if primary_license_detection.is_none() && header_license_detection.is_some() {
186            primary_license_detection = header_license_detection;
187        }
188    } else {
189        let copyright_block = extract_unstructured_field(content, "Copyright:");
190        if let Some(text) = copyright_block {
191            for holder in parse_copyright_holders(&text) {
192                if !holder.is_empty() {
193                    parties.push(Party {
194                        r#type: None,
195                        role: Some("copyright-holder".to_string()),
196                        name: Some(holder),
197                        email: None,
198                        url: None,
199                        organization: None,
200                        organization_url: None,
201                        timezone: None,
202                    });
203                }
204            }
205        }
206
207        let license_block = extract_unstructured_field(content, "License:");
208        if let Some(text) = license_block {
209            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
210        }
211    }
212
213    let extracted_license_statement = if license_statements.is_empty() {
214        None
215    } else {
216        Some(truncate_field(license_statements.join(" AND ")))
217    };
218
219    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
220    let declared_license_expression = license_detections
221        .first()
222        .map(|detection| detection.license_expression.clone());
223    let declared_license_expression_spdx = license_detections
224        .first()
225        .map(|detection| detection.license_expression_spdx.clone());
226    let other_license_expression = combine_license_expressions(
227        other_license_detections
228            .iter()
229            .map(|detection| detection.license_expression.clone()),
230    );
231    let other_license_expression_spdx = combine_license_expressions(
232        other_license_detections
233            .iter()
234            .map(|detection| detection.license_expression_spdx.clone()),
235    );
236
237    PackageData {
238        datasource_id: Some(DatasourceId::DebianCopyright),
239        package_type: Some(PACKAGE_TYPE),
240        namespace: namespace.clone(),
241        name: package_name.map(|s| truncate_field(s.to_string())),
242        parties,
243        declared_license_expression,
244        declared_license_expression_spdx,
245        license_detections,
246        other_license_expression,
247        other_license_expression_spdx,
248        other_license_detections,
249        extracted_license_statement,
250        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
251        ..Default::default()
252    }
253}
254
255#[derive(Debug)]
256struct CopyrightParagraph {
257    metadata: Rfc822Metadata,
258    license_header_line: Option<(String, usize)>,
259}
260
261fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
262    let mut paragraphs = Vec::new();
263    let mut current_lines = Vec::new();
264    let mut current_start_line = 1usize;
265    let mut count = 0usize;
266
267    for (idx, line) in content.lines().enumerate() {
268        count += 1;
269        if count > MAX_ITERATION_COUNT {
270            warn!(
271                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
272            );
273            break;
274        }
275        let line_no = idx + 1;
276        if line.is_empty() {
277            if !current_lines.is_empty() {
278                paragraphs.push(finalize_copyright_paragraph(
279                    std::mem::take(&mut current_lines),
280                    current_start_line,
281                ));
282            }
283            current_start_line = line_no + 1;
284        } else {
285            if current_lines.is_empty() {
286                current_start_line = line_no;
287            }
288            current_lines.push(line.to_string());
289        }
290    }
291
292    if !current_lines.is_empty() {
293        paragraphs.push(finalize_copyright_paragraph(
294            current_lines,
295            current_start_line,
296        ));
297    }
298
299    paragraphs
300}
301
302fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
303    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
304    let mut current_name: Option<String> = None;
305    let mut current_value = String::new();
306    let mut license_header_line = None;
307
308    for (idx, line) in raw_lines.iter().enumerate() {
309        if line.starts_with(' ') || line.starts_with('\t') {
310            if current_name.is_some() {
311                current_value.push('\n');
312                current_value.push_str(line);
313            }
314            continue;
315        }
316
317        if let Some(name) = current_name.take() {
318            add_copyright_header_value(&mut headers, &name, &current_value);
319            current_value.clear();
320        }
321
322        if let Some((name, value)) = line.split_once(':') {
323            let normalized_name = name.trim().to_ascii_lowercase();
324            if normalized_name == "license" && license_header_line.is_none() {
325                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
326            }
327            current_name = Some(normalized_name);
328            current_value = value.trim_start().to_string();
329        }
330    }
331
332    if let Some(name) = current_name.take() {
333        add_copyright_header_value(&mut headers, &name, &current_value);
334    }
335
336    CopyrightParagraph {
337        metadata: Rfc822Metadata {
338            headers,
339            body: String::new(),
340        },
341        license_header_line,
342    }
343}
344
345fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
346    let entry = headers.entry(name.to_string()).or_default();
347    let trimmed = value.trim_end();
348    if !trimmed.is_empty() {
349        entry.push(trimmed.to_string());
350    }
351}
352
353fn build_primary_license_detection(
354    license_name: &str,
355    matched_text: String,
356    line_no: usize,
357) -> LicenseDetection {
358    let normalized = normalize_debian_license_name(license_name);
359    let line = match LineNumber::new(line_no) {
360        Some(l) => l,
361        None => {
362            warn!(
363                "build_primary_license_detection: line number {} out of range, clamping to 1",
364                line_no
365            );
366            LineNumber::new(1).expect("1 is a valid line number")
367        }
368    };
369
370    build_declared_license_detection(
371        &normalized,
372        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
373    )
374}
375
376fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
377    match license_name.trim() {
378        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
379        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
380        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
381        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
382        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
383        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
384        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
385        "public-domain" => {
386            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
387        }
388        other => normalize_declared_license_key(other)
389            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
390    }
391}
392
393fn parse_copyright_holders(text: &str) -> Vec<String> {
394    let mut holders = Vec::new();
395    let mut count = 0usize;
396
397    for line in text.lines() {
398        count += 1;
399        if count > MAX_ITERATION_COUNT {
400            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
401            break;
402        }
403        let line = line.trim();
404        if line.is_empty() {
405            continue;
406        }
407
408        let cleaned = line
409            .trim_start_matches("Copyright")
410            .trim_start_matches("copyright")
411            .trim_start_matches("(C)")
412            .trim_start_matches("(c)")
413            .trim_start_matches("©")
414            .trim();
415
416        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
417            let without_years = &cleaned[year_end..];
418            let holder = without_years
419                .trim_start_matches(',')
420                .trim_start_matches('-')
421                .trim();
422
423            if !holder.is_empty() && holder.len() > 2 {
424                holders.push(holder.to_string());
425            }
426        }
427    }
428
429    holders
430}
431
432fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
433    let mut in_field = false;
434    let mut field_content = String::new();
435    let mut count = 0usize;
436
437    for line in content.lines() {
438        count += 1;
439        if count > MAX_ITERATION_COUNT {
440            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
441            break;
442        }
443        if line.starts_with(field_name) {
444            in_field = true;
445            field_content.push_str(line.trim_start_matches(field_name).trim());
446            field_content.push('\n');
447        } else if in_field {
448            if line.starts_with(char::is_whitespace) {
449                field_content.push_str(line.trim());
450                field_content.push('\n');
451            } else if !line.trim().is_empty() {
452                break;
453            }
454        }
455    }
456
457    let trimmed = field_content.trim();
458    if trimmed.is_empty() {
459        None
460    } else {
461        Some(truncate_field(trimmed.to_string()))
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::super::deb::merge_debian_copyright_into_package;
468    use super::super::default_package_data;
469    use super::*;
470    use crate::models::DatasourceId;
471    use crate::models::LineNumber;
472    use std::path::PathBuf;
473
474    #[test]
475    fn test_copyright_parser_is_match() {
476        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
477            "/usr/share/doc/bash/copyright"
478        )));
479        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
480            "debian/copyright"
481        )));
482        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
483            "src/third_party/gperftools/dist/packages/deb/copyright"
484        )));
485        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
486            "ports/zlib/copyright"
487        )));
488        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
489            "copyright.txt"
490        )));
491        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
492            "/etc/copyright"
493        )));
494        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
495            "/tmp/sample_copyright"
496        )));
497    }
498
499    #[test]
500    fn test_detect_debian_copyright_datasource() {
501        assert_eq!(
502            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
503            DatasourceId::DebianCopyrightInSource
504        );
505        assert_eq!(
506            detect_debian_copyright_datasource(&PathBuf::from(
507                "src/third_party/gperftools/dist/packages/deb/copyright"
508            )),
509            DatasourceId::DebianCopyrightStandalone
510        );
511        assert_eq!(
512            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
513            DatasourceId::DebianCopyrightStandalone
514        );
515        assert_eq!(
516            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
517            DatasourceId::DebianCopyrightInPackage
518        );
519        assert_eq!(
520            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
521            DatasourceId::DebianCopyrightStandalone
522        );
523    }
524
525    #[test]
526    fn test_extract_package_name_from_path() {
527        assert_eq!(
528            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
529            Some("bash".to_string())
530        );
531        assert_eq!(
532            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
533            Some("libseccomp2".to_string())
534        );
535        assert_eq!(
536            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
537            None
538        );
539        assert_eq!(
540            extract_standalone_package_name_from_path(
541                &PathBuf::from("ports/zlib/copyright"),
542                DatasourceId::DebianCopyrightStandalone,
543            ),
544            Some("zlib".to_string())
545        );
546    }
547
548    #[test]
549    fn test_parse_copyright_dep5_format() {
550        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
551Upstream-Name: libseccomp
552Source: https://sourceforge.net/projects/libseccomp/
553
554Files: *
555Copyright: 2012 Paul Moore <pmoore@redhat.com>
556 2012 Ashley Lai <adlai@us.ibm.com>
557License: LGPL-2.1
558
559License: LGPL-2.1
560 This library is free software
561";
562        let pkg = parse_copyright_file(content, Some("libseccomp"));
563        assert_eq!(pkg.name, Some("libseccomp".to_string()));
564        assert_eq!(pkg.namespace, Some("debian".to_string()));
565        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
566        assert_eq!(
567            pkg.extracted_license_statement,
568            Some("LGPL-2.1".to_string())
569        );
570        assert!(pkg.parties.len() >= 2);
571        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
572        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
573    }
574
575    #[test]
576    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
577        let path = PathBuf::from(
578            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
579        );
580        let pkg = DebianCopyrightParser::extract_first_package(&path);
581
582        assert_eq!(pkg.name, Some("bsdutils".to_string()));
583        let extracted = pkg
584            .extracted_license_statement
585            .as_deref()
586            .expect("license statement should exist");
587        assert!(extracted.contains("GPL-2+"));
588        assert!(!pkg.license_detections.is_empty());
589
590        let primary = &pkg.license_detections[0];
591        assert_eq!(
592            primary.matches[0].matched_text.as_deref(),
593            Some("License: GPL-2+")
594        );
595        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
596        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
597    }
598
599    #[test]
600    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
601        let path = PathBuf::from("testdata/debian/copyright/copyright");
602        let pkg = DebianCopyrightParser::extract_first_package(&path);
603
604        assert_eq!(pkg.license_detections.len(), 1);
605        assert_eq!(pkg.other_license_detections.len(), 4);
606
607        let primary = &pkg.license_detections[0];
608        assert_eq!(
609            primary.matches[0].matched_text.as_deref(),
610            Some("License: LGPL-2.1")
611        );
612        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
613
614        let ordered_lines: Vec<usize> = pkg
615            .other_license_detections
616            .iter()
617            .map(|detection| detection.matches[0].start_line.get())
618            .collect();
619        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
620
621        let ordered_texts: Vec<&str> = pkg
622            .other_license_detections
623            .iter()
624            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
625            .collect();
626        assert_eq!(
627            ordered_texts,
628            vec![
629                "License: LGPL-2.1",
630                "License: LGPL-2.1",
631                "License: LGPL-2.1",
632                "License: LGPL-2.1",
633            ]
634        );
635    }
636
637    #[test]
638    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
639        let path = PathBuf::from(
640            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
641        );
642        let pkg = DebianCopyrightParser::extract_first_package(&path);
643
644        let zlib = pkg
645            .other_license_detections
646            .iter()
647            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
648            .expect("at least one Zlib license paragraph should be detected");
649        assert_eq!(
650            zlib.matches[0].matched_text.as_deref(),
651            Some("License: Zlib")
652        );
653
654        let last_zlib = pkg
655            .other_license_detections
656            .iter()
657            .rev()
658            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
659            .expect("bottom standalone Zlib license paragraph should be detected");
660        assert_eq!(
661            last_zlib.matches[0].start_line,
662            LineNumber::new(732).unwrap()
663        );
664        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
665    }
666
667    #[test]
668    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
669        let path =
670            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
671        let pkg = DebianCopyrightParser::extract_first_package(&path);
672
673        assert_eq!(pkg.license_detections.len(), 1);
674        let primary = &pkg.license_detections[0];
675        assert_eq!(
676            primary.matches[0].matched_text.as_deref(),
677            Some("License: LGPL-3+ or GPL-2+")
678        );
679        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
680        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
681
682        assert!(pkg.other_license_detections.iter().any(|detection| {
683            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
684        }));
685    }
686
687    #[test]
688    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
689        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
690        let pkg = parse_copyright_file(content, Some("foo"));
691
692        assert_eq!(pkg.license_detections.len(), 1);
693        let primary = &pkg.license_detections[0];
694        assert_eq!(
695            primary.matches[0].matched_text.as_deref(),
696            Some("License: GPL-2+")
697        );
698        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
699    }
700
701    #[test]
702    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
703        let raw_lines = vec![
704            "Files: *".to_string(),
705            "Copyright: 2024 Example Org".to_string(),
706            "License: Apache-2.0".to_string(),
707            " Licensed under the Apache License, Version 2.0.".to_string(),
708        ];
709
710        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
711        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
712            .into_iter()
713            .next()
714            .expect("reference RFC822 paragraph should parse");
715
716        assert_eq!(paragraph.metadata.headers, expected.headers);
717        assert_eq!(paragraph.metadata.body, expected.body);
718        assert_eq!(
719            paragraph.license_header_line,
720            Some(("License: Apache-2.0".to_string(), 12))
721        );
722    }
723
724    #[test]
725    fn test_parse_copyright_unstructured() {
726        let content = "This package was debianized by John Doe.
727
728Upstream Authors:
729    Jane Smith
730
731Copyright:
732    2009 10gen
733
734License:
735    SSPL
736";
737        let pkg = parse_copyright_file(content, Some("mongodb"));
738        assert_eq!(pkg.name, Some("mongodb".to_string()));
739        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
740        assert!(!pkg.parties.is_empty());
741    }
742
743    #[test]
744    fn test_parse_copyright_holders() {
745        let text = "2012 Paul Moore <pmoore@redhat.com>
7462012 Ashley Lai <adlai@us.ibm.com>
747Copyright (C) 2015-2018 Example Corp";
748        let holders = parse_copyright_holders(text);
749        assert!(holders.len() >= 3);
750        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
751        assert!(holders.iter().any(|h| h.contains("Example Corp")));
752    }
753
754    #[test]
755    fn test_parse_copyright_empty() {
756        let content = "This is just some text without proper copyright info.";
757        let pkg = parse_copyright_file(content, Some("test"));
758        assert_eq!(pkg.name, Some("test".to_string()));
759        assert!(pkg.parties.is_empty());
760        assert!(pkg.extracted_license_statement.is_none());
761    }
762
763    #[test]
764    fn test_merge_debian_copyright_into_package_preserves_license_fields() {
765        let copyright = parse_copyright_file(
766            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
767             Upstream-Name: demo\n\n\
768             Files: *\n\
769             Copyright: 2024 Example\n\
770             License: MIT\n\n\
771             Files: debian/*\n\
772             Copyright: 2024 Debian Example\n\
773             License: Apache-2.0\n",
774            Some("demo"),
775        );
776        let mut target = default_package_data(DatasourceId::DebianDeb);
777
778        merge_debian_copyright_into_package(&mut target, &copyright);
779
780        assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
781        assert_eq!(
782            target.declared_license_expression_spdx.as_deref(),
783            Some("MIT")
784        );
785        assert_eq!(
786            target.other_license_expression.as_deref(),
787            Some("apache-2.0")
788        );
789        assert_eq!(
790            target.other_license_expression_spdx.as_deref(),
791            Some("Apache-2.0")
792        );
793        assert_eq!(target.license_detections.len(), 1);
794        assert_eq!(target.other_license_detections.len(), 1);
795    }
796}