Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41    DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
42    PackageType, Party,
43};
44use crate::parsers::rfc822::{self, Rfc822Metadata};
45use crate::parsers::utils::{read_file_to_string, split_name_email};
46use crate::utils::spdx::combine_license_expressions;
47
48use super::PackageParser;
49use super::license_normalization::{
50    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
51    normalize_declared_license_key,
52};
53
54const PACKAGE_TYPE: PackageType = PackageType::Deb;
55
56fn default_package_data(datasource_id: DatasourceId) -> PackageData {
57    PackageData {
58        package_type: Some(PACKAGE_TYPE),
59        datasource_id: Some(datasource_id),
60        ..Default::default()
61    }
62}
63
64// Namespace detection clues from version strings
65const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
66const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
67
68// Namespace detection clues from maintainer fields
69const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
70    "packages.debian.org",
71    "lists.debian.org",
72    "lists.alioth.debian.org",
73    "@debian.org",
74    "debian-init-diversity@",
75];
76const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
77
78// Dependency field names and their scope/flags
79struct DepFieldSpec {
80    field: &'static str,
81    scope: &'static str,
82    is_runtime: bool,
83    is_optional: bool,
84}
85
86const DEP_FIELDS: &[DepFieldSpec] = &[
87    DepFieldSpec {
88        field: "depends",
89        scope: "depends",
90        is_runtime: true,
91        is_optional: false,
92    },
93    DepFieldSpec {
94        field: "pre-depends",
95        scope: "pre-depends",
96        is_runtime: true,
97        is_optional: false,
98    },
99    DepFieldSpec {
100        field: "recommends",
101        scope: "recommends",
102        is_runtime: true,
103        is_optional: true,
104    },
105    DepFieldSpec {
106        field: "suggests",
107        scope: "suggests",
108        is_runtime: true,
109        is_optional: true,
110    },
111    DepFieldSpec {
112        field: "breaks",
113        scope: "breaks",
114        is_runtime: false,
115        is_optional: false,
116    },
117    DepFieldSpec {
118        field: "conflicts",
119        scope: "conflicts",
120        is_runtime: false,
121        is_optional: false,
122    },
123    DepFieldSpec {
124        field: "replaces",
125        scope: "replaces",
126        is_runtime: false,
127        is_optional: false,
128    },
129    DepFieldSpec {
130        field: "provides",
131        scope: "provides",
132        is_runtime: false,
133        is_optional: false,
134    },
135    DepFieldSpec {
136        field: "build-depends",
137        scope: "build-depends",
138        is_runtime: false,
139        is_optional: false,
140    },
141    DepFieldSpec {
142        field: "build-depends-indep",
143        scope: "build-depends-indep",
144        is_runtime: false,
145        is_optional: false,
146    },
147    DepFieldSpec {
148        field: "build-conflicts",
149        scope: "build-conflicts",
150        is_runtime: false,
151        is_optional: false,
152    },
153];
154
155// ---------------------------------------------------------------------------
156// DebianControlParser: debian/control files (source + binary paragraphs)
157// ---------------------------------------------------------------------------
158
159pub struct DebianControlParser;
160
161impl PackageParser for DebianControlParser {
162    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
163
164    fn is_match(path: &Path) -> bool {
165        if let Some(name) = path.file_name()
166            && name == "control"
167            && let Some(parent) = path.parent()
168            && let Some(parent_name) = parent.file_name()
169        {
170            return parent_name == "debian";
171        }
172        false
173    }
174
175    fn extract_packages(path: &Path) -> Vec<PackageData> {
176        let content = match read_file_to_string(path) {
177            Ok(c) => c,
178            Err(e) => {
179                warn!("Failed to read debian/control at {:?}: {}", path, e);
180                return vec![default_package_data(DatasourceId::DebianControlInSource)];
181            }
182        };
183
184        let packages = parse_debian_control(&content);
185        if packages.is_empty() {
186            vec![default_package_data(DatasourceId::DebianControlInSource)]
187        } else {
188            packages
189        }
190    }
191}
192
193// ---------------------------------------------------------------------------
194// DebianInstalledParser: /var/lib/dpkg/status
195// ---------------------------------------------------------------------------
196
197pub struct DebianInstalledParser;
198
199impl PackageParser for DebianInstalledParser {
200    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
201
202    fn is_match(path: &Path) -> bool {
203        let path_str = path.to_string_lossy();
204        path_str.ends_with("var/lib/dpkg/status")
205    }
206
207    fn extract_packages(path: &Path) -> Vec<PackageData> {
208        let content = match read_file_to_string(path) {
209            Ok(c) => c,
210            Err(e) => {
211                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
212                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
213            }
214        };
215
216        let packages = parse_dpkg_status(&content);
217        if packages.is_empty() {
218            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
219        } else {
220            packages
221        }
222    }
223}
224
225pub struct DebianDistrolessInstalledParser;
226
227impl PackageParser for DebianDistrolessInstalledParser {
228    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
229
230    fn is_match(path: &Path) -> bool {
231        let path_str = path.to_string_lossy();
232        path_str.contains("var/lib/dpkg/status.d/")
233    }
234
235    fn extract_packages(path: &Path) -> Vec<PackageData> {
236        let content = match read_file_to_string(path) {
237            Ok(c) => c,
238            Err(e) => {
239                warn!("Failed to read distroless status file at {:?}: {}", path, e);
240                return vec![default_package_data(
241                    DatasourceId::DebianDistrolessInstalledDb,
242                )];
243            }
244        };
245
246        vec![parse_distroless_status(&content)]
247    }
248}
249
250fn parse_distroless_status(content: &str) -> PackageData {
251    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
252
253    if paragraphs.is_empty() {
254        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
255    }
256
257    build_package_from_paragraph(
258        &paragraphs[0],
259        None,
260        DatasourceId::DebianDistrolessInstalledDb,
261    )
262    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
263}
264
265// ---------------------------------------------------------------------------
266// Parsing logic
267// ---------------------------------------------------------------------------
268
269/// Parses a debian/control file into PackageData entries.
270///
271/// A debian/control file has a Source paragraph followed by one or more Binary
272/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
273/// into each binary package.
274fn parse_debian_control(content: &str) -> Vec<PackageData> {
275    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
276    if paragraphs.is_empty() {
277        return Vec::new();
278    }
279
280    // Determine if first paragraph is a Source paragraph
281    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
282
283    let (source_paragraph, binary_start) = if has_source {
284        (Some(&paragraphs[0]), 1)
285    } else {
286        (None, 0)
287    };
288
289    // Extract source-level shared metadata
290    let source_meta = source_paragraph.map(extract_source_meta);
291
292    let mut packages = Vec::new();
293
294    for para in &paragraphs[binary_start..] {
295        if let Some(pkg) = build_package_from_paragraph(
296            para,
297            source_meta.as_ref(),
298            DatasourceId::DebianControlInSource,
299        ) {
300            packages.push(pkg);
301        }
302    }
303
304    if packages.is_empty()
305        && let Some(source_para) = source_paragraph
306        && let Some(pkg) = build_package_from_source_paragraph(source_para)
307    {
308        packages.push(pkg);
309    }
310
311    packages
312}
313
314/// Parses a dpkg/status file into PackageData entries.
315///
316/// Each paragraph represents an installed package. Only packages with
317/// `Status: install ok installed` are included.
318fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
319    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
320    let mut packages = Vec::new();
321
322    for para in &paragraphs {
323        let status = rfc822::get_header_first(&para.headers, "status");
324        if status.as_deref() != Some("install ok installed") {
325            continue;
326        }
327
328        if let Some(pkg) =
329            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
330        {
331            packages.push(pkg);
332        }
333    }
334
335    packages
336}
337
338// ---------------------------------------------------------------------------
339// Source paragraph metadata (shared across binary packages)
340// ---------------------------------------------------------------------------
341
342struct SourceMeta {
343    parties: Vec<Party>,
344    homepage_url: Option<String>,
345    vcs_url: Option<String>,
346    code_view_url: Option<String>,
347    bug_tracking_url: Option<String>,
348}
349
350fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
351    let mut parties = Vec::new();
352
353    // Maintainer
354    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
355        let (name, email) = split_name_email(&maintainer);
356        parties.push(Party {
357            r#type: Some("person".to_string()),
358            role: Some("maintainer".to_string()),
359            name,
360            email,
361            url: None,
362            organization: None,
363            organization_url: None,
364            timezone: None,
365        });
366    }
367
368    // Original-Maintainer
369    if let Some(orig_maintainer) =
370        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
371    {
372        let (name, email) = split_name_email(&orig_maintainer);
373        parties.push(Party {
374            r#type: Some("person".to_string()),
375            role: Some("maintainer".to_string()),
376            name,
377            email,
378            url: None,
379            organization: None,
380            organization_url: None,
381            timezone: None,
382        });
383    }
384
385    // Uploaders (comma-separated)
386    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
387        for uploader in uploaders_str.split(',') {
388            let trimmed = uploader.trim();
389            if !trimmed.is_empty() {
390                let (name, email) = split_name_email(trimmed);
391                parties.push(Party {
392                    r#type: Some("person".to_string()),
393                    role: Some("uploader".to_string()),
394                    name,
395                    email,
396                    url: None,
397                    organization: None,
398                    organization_url: None,
399                    timezone: None,
400                });
401            }
402        }
403    }
404
405    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage");
406
407    // VCS-Git: may contain branch info after space
408    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
409        .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
410
411    let code_view_url = rfc822::get_header_first(&paragraph.headers, "vcs-browser");
412
413    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs");
414
415    SourceMeta {
416        parties,
417        homepage_url,
418        vcs_url,
419        code_view_url,
420        bug_tracking_url,
421    }
422}
423
424// ---------------------------------------------------------------------------
425// Package building
426// ---------------------------------------------------------------------------
427
428fn build_package_from_paragraph(
429    paragraph: &Rfc822Metadata,
430    source_meta: Option<&SourceMeta>,
431    datasource_id: DatasourceId,
432) -> Option<PackageData> {
433    let name = rfc822::get_header_first(&paragraph.headers, "package")?;
434    let version = rfc822::get_header_first(&paragraph.headers, "version");
435    let architecture = rfc822::get_header_first(&paragraph.headers, "architecture");
436    let description = rfc822::get_header_first(&paragraph.headers, "description");
437    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
438    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage");
439    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
440    let section = rfc822::get_header_first(&paragraph.headers, "section");
441    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
442    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
443
444    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
445
446    // Build parties: use source_meta parties if available, otherwise parse from paragraph
447    let parties = if let Some(meta) = source_meta {
448        meta.parties.clone()
449    } else {
450        let mut p = Vec::new();
451        if let Some(m) = &maintainer_str {
452            let (n, e) = split_name_email(m);
453            p.push(Party {
454                r#type: Some("person".to_string()),
455                role: Some("maintainer".to_string()),
456                name: n,
457                email: e,
458                url: None,
459                organization: None,
460                organization_url: None,
461                timezone: None,
462            });
463        }
464        p
465    };
466
467    // Resolve homepage: paragraph's own, or from source metadata
468    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
469    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
470    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
471    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
472
473    // Build PURL
474    let purl = build_debian_purl(
475        &name,
476        version.as_deref(),
477        namespace.as_deref(),
478        architecture.as_deref(),
479    );
480
481    // Parse dependencies from all dependency fields
482    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
483
484    // Keywords from section
485    let keywords = section.into_iter().collect();
486
487    // Source packages
488    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
489
490    // Extra data
491    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
492    if let Some(ma) = &multi_arch
493        && !ma.is_empty()
494    {
495        extra_data.insert(
496            "multi_arch".to_string(),
497            serde_json::Value::String(ma.clone()),
498        );
499    }
500    if let Some(size_str) = &installed_size
501        && let Ok(size) = size_str.parse::<u64>()
502    {
503        extra_data.insert(
504            "installed_size".to_string(),
505            serde_json::Value::Number(serde_json::Number::from(size)),
506        );
507    }
508
509    // Qualifiers for architecture
510    let qualifiers = architecture.as_ref().map(|arch| {
511        let mut q = HashMap::new();
512        q.insert("arch".to_string(), arch.clone());
513        q
514    });
515
516    Some(PackageData {
517        package_type: Some(PACKAGE_TYPE),
518        namespace: namespace.clone(),
519        name: Some(name),
520        version,
521        qualifiers,
522        subpath: None,
523        primary_language: None,
524        description,
525        release_date: None,
526        parties,
527        keywords,
528        homepage_url,
529        download_url: None,
530        size: None,
531        sha1: None,
532        md5: None,
533        sha256: None,
534        sha512: None,
535        bug_tracking_url,
536        code_view_url,
537        vcs_url,
538        copyright: None,
539        holder: None,
540        declared_license_expression: None,
541        declared_license_expression_spdx: None,
542        license_detections: Vec::new(),
543        other_license_expression: None,
544        other_license_expression_spdx: None,
545        other_license_detections: Vec::new(),
546        extracted_license_statement: None,
547        notice_text: None,
548        source_packages,
549        file_references: Vec::new(),
550        is_private: false,
551        is_virtual: false,
552        extra_data: if extra_data.is_empty() {
553            None
554        } else {
555            Some(extra_data)
556        },
557        dependencies,
558        repository_homepage_url: None,
559        repository_download_url: None,
560        api_data_url: None,
561        datasource_id: Some(datasource_id),
562        purl,
563    })
564}
565
566fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
567    let name = rfc822::get_header_first(&paragraph.headers, "source")?;
568    let version = rfc822::get_header_first(&paragraph.headers, "version");
569    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
570
571    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
572    let source_meta = extract_source_meta(paragraph);
573
574    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
575    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
576
577    let section = rfc822::get_header_first(&paragraph.headers, "section");
578    let keywords = section.into_iter().collect();
579
580    Some(PackageData {
581        package_type: Some(PACKAGE_TYPE),
582        namespace: namespace.clone(),
583        name: Some(name),
584        version,
585        qualifiers: None,
586        subpath: None,
587        primary_language: None,
588        description: None,
589        release_date: None,
590        parties: source_meta.parties,
591        keywords,
592        homepage_url: source_meta.homepage_url,
593        download_url: None,
594        size: None,
595        sha1: None,
596        md5: None,
597        sha256: None,
598        sha512: None,
599        bug_tracking_url: source_meta.bug_tracking_url,
600        code_view_url: source_meta.code_view_url,
601        vcs_url: source_meta.vcs_url,
602        copyright: None,
603        holder: None,
604        declared_license_expression: None,
605        declared_license_expression_spdx: None,
606        license_detections: Vec::new(),
607        other_license_expression: None,
608        other_license_expression_spdx: None,
609        other_license_detections: Vec::new(),
610        extracted_license_statement: None,
611        notice_text: None,
612        source_packages: Vec::new(),
613        file_references: Vec::new(),
614        is_private: false,
615        is_virtual: false,
616        extra_data: None,
617        dependencies,
618        repository_homepage_url: None,
619        repository_download_url: None,
620        api_data_url: None,
621        datasource_id: Some(DatasourceId::DebianControlInSource),
622        purl,
623    })
624}
625
626// ---------------------------------------------------------------------------
627// Namespace detection
628// ---------------------------------------------------------------------------
629
630fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
631    // Check version clues first
632    if let Some(ver) = version {
633        let ver_lower = ver.to_lowercase();
634        for clue in VERSION_CLUES_UBUNTU {
635            if ver_lower.contains(clue) {
636                return Some("ubuntu".to_string());
637            }
638        }
639        for clue in VERSION_CLUES_DEBIAN {
640            if ver_lower.contains(clue) {
641                return Some("debian".to_string());
642            }
643        }
644    }
645
646    // Check maintainer clues
647    if let Some(maint) = maintainer {
648        let maint_lower = maint.to_lowercase();
649        for clue in MAINTAINER_CLUES_UBUNTU {
650            if maint_lower.contains(clue) {
651                return Some("ubuntu".to_string());
652            }
653        }
654        for clue in MAINTAINER_CLUES_DEBIAN {
655            if maint_lower.contains(clue) {
656                return Some("debian".to_string());
657            }
658        }
659    }
660
661    // Default to debian
662    Some("debian".to_string())
663}
664
665// ---------------------------------------------------------------------------
666// PURL generation
667// ---------------------------------------------------------------------------
668
669fn build_debian_purl(
670    name: &str,
671    version: Option<&str>,
672    namespace: Option<&str>,
673    architecture: Option<&str>,
674) -> Option<String> {
675    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
676
677    if let Some(ns) = namespace {
678        purl.with_namespace(ns).ok()?;
679    }
680
681    if let Some(ver) = version {
682        purl.with_version(ver).ok()?;
683    }
684
685    if let Some(arch) = architecture {
686        purl.add_qualifier("arch", arch).ok()?;
687    }
688
689    Some(purl.to_string())
690}
691
692// ---------------------------------------------------------------------------
693// Dependency parsing
694// ---------------------------------------------------------------------------
695
696fn parse_all_dependencies(
697    headers: &HashMap<String, Vec<String>>,
698    namespace: Option<&str>,
699) -> Vec<Dependency> {
700    let mut dependencies = Vec::new();
701
702    for spec in DEP_FIELDS {
703        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
704            dependencies.extend(parse_dependency_field(
705                &dep_str,
706                spec.scope,
707                spec.is_runtime,
708                spec.is_optional,
709                namespace,
710            ));
711        }
712    }
713
714    dependencies
715}
716
717/// Parses a Debian dependency field value.
718///
719/// Debian dependencies are comma-separated, with optional version constraints
720/// in parentheses and alternative packages separated by `|`.
721///
722/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
723///
724/// Alternatives (|) are treated as separate optional dependencies.
725fn parse_dependency_field(
726    dep_str: &str,
727    scope: &str,
728    is_runtime: bool,
729    is_optional: bool,
730    namespace: Option<&str>,
731) -> Vec<Dependency> {
732    let mut deps = Vec::new();
733
734    // Regex for parsing individual dependency: name (operator version)
735    // Debian operators: <<, <=, =, >=, >>
736    let dep_re = Regex::new(
737        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
738    )
739    .unwrap();
740
741    for group in dep_str.split(',') {
742        let group = group.trim();
743        if group.is_empty() {
744            continue;
745        }
746
747        // Handle alternatives (|)
748        let alternatives: Vec<&str> = group.split('|').collect();
749        let has_alternatives = alternatives.len() > 1;
750
751        for alt in alternatives {
752            let alt = alt.trim();
753            if alt.is_empty() {
754                continue;
755            }
756
757            if let Some(caps) = dep_re.captures(alt) {
758                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
759                let operator = caps.get(2).map(|m| m.as_str().trim());
760                let version = caps.get(3).map(|m| m.as_str().trim());
761
762                if pkg_name.is_empty() {
763                    continue;
764                }
765
766                // Skip substitution variables like ${shlibs:Depends}
767                if pkg_name.starts_with('$') {
768                    continue;
769                }
770
771                let extracted_requirement = match (operator, version) {
772                    (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
773                    _ => None,
774                };
775
776                let is_pinned = operator.map(|op| op == "=");
777
778                let purl = build_debian_purl(pkg_name, None, namespace, None);
779
780                deps.push(Dependency {
781                    purl,
782                    extracted_requirement,
783                    scope: Some(scope.to_string()),
784                    is_runtime: Some(is_runtime),
785                    is_optional: Some(is_optional || has_alternatives),
786                    is_pinned,
787                    is_direct: Some(true),
788                    resolved_package: None,
789                    extra_data: None,
790                });
791            }
792        }
793    }
794
795    deps
796}
797
798// ---------------------------------------------------------------------------
799// Source field parsing
800// ---------------------------------------------------------------------------
801
802/// Parses the Source field which may contain a version in parentheses.
803///
804/// Format: `source-name` or `source-name (version)`
805fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
806    let Some(source_str) = source else {
807        return Vec::new();
808    };
809
810    let trimmed = source_str.trim();
811    if trimmed.is_empty() {
812        return Vec::new();
813    }
814
815    // Extract name and optional version from "name (version)" format
816    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
817        let name = trimmed[..paren_start].trim();
818        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
819        (
820            name,
821            if version.is_empty() {
822                None
823            } else {
824                Some(version)
825            },
826        )
827    } else {
828        (trimmed, None)
829    };
830
831    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
832        vec![purl]
833    } else {
834        Vec::new()
835    }
836}
837
838// ---------------------------------------------------------------------------
839// Parser registration macros
840// ---------------------------------------------------------------------------
841
842crate::register_parser!(
843    "Debian source package control file (debian/control)",
844    &["**/debian/control"],
845    "deb",
846    "",
847    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
848);
849
850crate::register_parser!(
851    "Debian installed package database (dpkg status)",
852    &["**/var/lib/dpkg/status"],
853    "deb",
854    "",
855    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
856);
857
858crate::register_parser!(
859    "Debian distroless package database (status.d)",
860    &["**/var/lib/dpkg/status.d/*"],
861    "deb",
862    "",
863    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
864);
865
866// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
867// but we register it for the single-package interface too.
868
869// ============================================================================
870// WAVE 2 PARSERS: Additional Debian Format Support
871// ============================================================================
872
873/// Parser for Debian Source Control (.dsc) files
874pub struct DebianDscParser;
875
876impl PackageParser for DebianDscParser {
877    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
878
879    fn is_match(path: &Path) -> bool {
880        path.extension().and_then(|e| e.to_str()) == Some("dsc")
881    }
882
883    fn extract_packages(path: &Path) -> Vec<PackageData> {
884        let content = match read_file_to_string(path) {
885            Ok(c) => c,
886            Err(e) => {
887                warn!("Failed to read .dsc file {:?}: {}", path, e);
888                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
889            }
890        };
891
892        vec![parse_dsc_content(&content)]
893    }
894}
895
896crate::register_parser!(
897    "Debian source control file (.dsc)",
898    &["**/*.dsc"],
899    "deb",
900    "",
901    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
902);
903
904fn strip_pgp_signature(content: &str) -> String {
905    let mut result = String::new();
906    let mut in_pgp_block = false;
907    let mut in_signature = false;
908
909    for line in content.lines() {
910        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
911            in_pgp_block = true;
912            continue;
913        }
914        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
915            in_signature = true;
916            continue;
917        }
918        if line.starts_with("-----END PGP SIGNATURE-----") {
919            in_signature = false;
920            continue;
921        }
922        if in_pgp_block && line.starts_with("Hash:") {
923            continue;
924        }
925        if in_pgp_block && line.is_empty() && result.is_empty() {
926            in_pgp_block = false;
927            continue;
928        }
929        if !in_signature {
930            result.push_str(line);
931            result.push('\n');
932        }
933    }
934
935    result
936}
937
938fn parse_dsc_content(content: &str) -> PackageData {
939    let clean_content = strip_pgp_signature(content);
940    let metadata = rfc822::parse_rfc822_content(&clean_content);
941    let headers = &metadata.headers;
942
943    let name = rfc822::get_header_first(headers, "source");
944    let version = rfc822::get_header_first(headers, "version");
945    let architecture = rfc822::get_header_first(headers, "architecture");
946    let namespace = Some("debian".to_string());
947
948    let mut package = PackageData {
949        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
950        package_type: Some(PACKAGE_TYPE),
951        namespace: namespace.clone(),
952        name: name.clone(),
953        version: version.clone(),
954        description: rfc822::get_header_first(headers, "description"),
955        homepage_url: rfc822::get_header_first(headers, "homepage"),
956        vcs_url: rfc822::get_header_first(headers, "vcs-git"),
957        code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
958        ..Default::default()
959    };
960
961    // Build PURL with architecture qualifier
962    if let (Some(n), Some(v)) = (&name, &version) {
963        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
964    }
965
966    // Set source_packages to point to the source itself (without version)
967    if let Some(n) = &name
968        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
969    {
970        package.source_packages.push(source_purl);
971    }
972
973    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
974        let (name_opt, email_opt) = split_name_email(&maintainer);
975        package.parties.push(Party {
976            r#type: None,
977            role: Some("maintainer".to_string()),
978            name: name_opt,
979            email: email_opt,
980            url: None,
981            organization: None,
982            organization_url: None,
983            timezone: None,
984        });
985    }
986
987    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
988        for uploader in uploaders_str.split(',') {
989            let uploader = uploader.trim();
990            if uploader.is_empty() {
991                continue;
992            }
993            let (name_opt, email_opt) = split_name_email(uploader);
994            package.parties.push(Party {
995                r#type: None,
996                role: Some("uploader".to_string()),
997                name: name_opt,
998                email: email_opt,
999                url: None,
1000                organization: None,
1001                organization_url: None,
1002                timezone: None,
1003            });
1004        }
1005    }
1006
1007    // Parse Build-Depends
1008    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1009        package.dependencies.extend(parse_dependency_field(
1010            &build_deps,
1011            "build",
1012            false,
1013            false,
1014            namespace.as_deref(),
1015        ));
1016    }
1017
1018    // Store Standards-Version in extra_data
1019    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1020        let map = package.extra_data.get_or_insert_with(HashMap::new);
1021        map.insert("standards_version".to_string(), standards.into());
1022    }
1023
1024    package
1025}
1026
1027/// Parser for Debian original source tarballs (*.orig.tar.*)
1028pub struct DebianOrigTarParser;
1029
1030impl PackageParser for DebianOrigTarParser {
1031    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1032
1033    fn is_match(path: &Path) -> bool {
1034        path.file_name()
1035            .and_then(|n| n.to_str())
1036            .map(|name| name.contains(".orig.tar."))
1037            .unwrap_or(false)
1038    }
1039
1040    fn extract_packages(path: &Path) -> Vec<PackageData> {
1041        let filename = match path.file_name().and_then(|n| n.to_str()) {
1042            Some(f) => f,
1043            None => {
1044                return vec![default_package_data(
1045                    DatasourceId::DebianOriginalSourceTarball,
1046                )];
1047            }
1048        };
1049
1050        vec![parse_source_tarball_filename(
1051            filename,
1052            DatasourceId::DebianOriginalSourceTarball,
1053        )]
1054    }
1055}
1056
1057crate::register_parser!(
1058    "Debian original source tarball",
1059    &["**/*.orig.tar.*"],
1060    "deb",
1061    "",
1062    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1063);
1064
1065/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1066pub struct DebianDebianTarParser;
1067
1068impl PackageParser for DebianDebianTarParser {
1069    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1070
1071    fn is_match(path: &Path) -> bool {
1072        path.file_name()
1073            .and_then(|n| n.to_str())
1074            .map(|name| name.contains(".debian.tar."))
1075            .unwrap_or(false)
1076    }
1077
1078    fn extract_packages(path: &Path) -> Vec<PackageData> {
1079        let filename = match path.file_name().and_then(|n| n.to_str()) {
1080            Some(f) => f,
1081            None => {
1082                return vec![default_package_data(
1083                    DatasourceId::DebianSourceMetadataTarball,
1084                )];
1085            }
1086        };
1087
1088        vec![parse_source_tarball_filename(
1089            filename,
1090            DatasourceId::DebianSourceMetadataTarball,
1091        )]
1092    }
1093}
1094
1095crate::register_parser!(
1096    "Debian source metadata tarball",
1097    &["**/*.debian.tar.*"],
1098    "deb",
1099    "",
1100    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1101);
1102
1103fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1104    let without_tar_ext = filename
1105        .trim_end_matches(".gz")
1106        .trim_end_matches(".xz")
1107        .trim_end_matches(".bz2")
1108        .trim_end_matches(".tar");
1109
1110    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1111    if parts.len() < 2 {
1112        return default_package_data(datasource_id);
1113    }
1114
1115    let name = parts[0].to_string();
1116    let version_with_suffix = parts[1];
1117
1118    let version = version_with_suffix
1119        .trim_end_matches(".orig")
1120        .trim_end_matches(".debian")
1121        .to_string();
1122
1123    let namespace = Some("debian".to_string());
1124
1125    PackageData {
1126        datasource_id: Some(datasource_id),
1127        package_type: Some(PACKAGE_TYPE),
1128        namespace: namespace.clone(),
1129        name: Some(name.clone()),
1130        version: Some(version.clone()),
1131        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1132        ..Default::default()
1133    }
1134}
1135
1136/// Parser for Debian installed file lists (*.list)
1137pub struct DebianInstalledListParser;
1138
1139impl PackageParser for DebianInstalledListParser {
1140    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1141
1142    fn is_match(path: &Path) -> bool {
1143        path.extension().and_then(|e| e.to_str()) == Some("list")
1144            && path
1145                .to_str()
1146                .map(|p| p.contains("/var/lib/dpkg/info/"))
1147                .unwrap_or(false)
1148    }
1149
1150    fn extract_packages(path: &Path) -> Vec<PackageData> {
1151        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1152            Some(f) => f,
1153            None => {
1154                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1155            }
1156        };
1157
1158        let content = match read_file_to_string(path) {
1159            Ok(c) => c,
1160            Err(e) => {
1161                warn!("Failed to read .list file {:?}: {}", path, e);
1162                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1163            }
1164        };
1165
1166        vec![parse_debian_file_list(
1167            &content,
1168            filename,
1169            DatasourceId::DebianInstalledFilesList,
1170        )]
1171    }
1172}
1173
1174crate::register_parser!(
1175    "Debian installed files list",
1176    &["**/var/lib/dpkg/info/*.list"],
1177    "deb",
1178    "",
1179    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1180);
1181
1182/// Parser for Debian installed MD5 checksum files (*.md5sums)
1183pub struct DebianInstalledMd5sumsParser;
1184
1185impl PackageParser for DebianInstalledMd5sumsParser {
1186    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1187
1188    fn is_match(path: &Path) -> bool {
1189        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1190            && path
1191                .to_str()
1192                .map(|p| p.contains("/var/lib/dpkg/info/"))
1193                .unwrap_or(false)
1194    }
1195
1196    fn extract_packages(path: &Path) -> Vec<PackageData> {
1197        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1198            Some(f) => f,
1199            None => {
1200                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1201            }
1202        };
1203
1204        let content = match read_file_to_string(path) {
1205            Ok(c) => c,
1206            Err(e) => {
1207                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1208                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1209            }
1210        };
1211
1212        vec![parse_debian_file_list(
1213            &content,
1214            filename,
1215            DatasourceId::DebianInstalledMd5Sums,
1216        )]
1217    }
1218}
1219
1220crate::register_parser!(
1221    "Debian installed package md5sums",
1222    &["**/var/lib/dpkg/info/*.md5sums"],
1223    "deb",
1224    "",
1225    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1226);
1227
1228const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1229
1230fn parse_debian_file_list(
1231    content: &str,
1232    filename: &str,
1233    datasource_id: DatasourceId,
1234) -> PackageData {
1235    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1236        (Some(pkg.to_string()), Some(arch.to_string()))
1237    } else if filename == "md5sums" {
1238        (None, None)
1239    } else {
1240        (Some(filename.to_string()), None)
1241    };
1242
1243    let mut file_references = Vec::new();
1244
1245    for line in content.lines() {
1246        let line = line.trim();
1247        if line.is_empty() || line.starts_with('#') {
1248            continue;
1249        }
1250
1251        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1252            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1253        } else {
1254            (None, line)
1255        };
1256
1257        if IGNORED_ROOT_DIRS.contains(&path) {
1258            continue;
1259        }
1260
1261        file_references.push(FileReference {
1262            path: path.to_string(),
1263            size: None,
1264            sha1: None,
1265            md5: md5sum,
1266            sha256: None,
1267            sha512: None,
1268            extra_data: None,
1269        });
1270    }
1271
1272    if file_references.is_empty() {
1273        return default_package_data(datasource_id);
1274    }
1275
1276    let namespace = Some("debian".to_string());
1277    let mut package = PackageData {
1278        datasource_id: Some(datasource_id),
1279        package_type: Some(PACKAGE_TYPE),
1280        namespace: namespace.clone(),
1281        name: name.clone(),
1282        file_references,
1283        ..Default::default()
1284    };
1285
1286    if let Some(n) = &name {
1287        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1288    }
1289
1290    package
1291}
1292
1293/// Parser for Debian machine-readable copyright files (DEP-5 format)
1294pub struct DebianCopyrightParser;
1295
1296impl PackageParser for DebianCopyrightParser {
1297    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1298
1299    fn is_match(path: &Path) -> bool {
1300        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1301            if filename != "copyright" {
1302                return filename.ends_with("_copyright");
1303            }
1304            let path_str = path.to_string_lossy();
1305            path_str.contains("/debian/")
1306                || path_str.contains("/packages/deb/")
1307                || path_str.contains("/usr/share/doc/")
1308                || path_str.ends_with("debian/copyright")
1309        } else {
1310            false
1311        }
1312    }
1313
1314    fn extract_packages(path: &Path) -> Vec<PackageData> {
1315        let datasource_id = detect_debian_copyright_datasource(path);
1316        let content = match read_file_to_string(path) {
1317            Ok(c) => c,
1318            Err(e) => {
1319                warn!("Failed to read copyright file {:?}: {}", path, e);
1320                return vec![default_package_data(datasource_id)];
1321            }
1322        };
1323
1324        let package_name = extract_package_name_from_path(path);
1325        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1326        package_data.datasource_id = Some(datasource_id);
1327        vec![package_data]
1328    }
1329}
1330
1331crate::register_parser!(
1332    "Debian machine-readable copyright file",
1333    &[
1334        "**/debian/copyright",
1335        "**/packages/deb/copyright",
1336        "**/usr/share/doc/*/copyright",
1337        "**/*_copyright"
1338    ],
1339    "deb",
1340    "",
1341    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1342);
1343
1344fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1345    let path_str = path.to_string_lossy();
1346    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1347        DatasourceId::DebianCopyrightInSource
1348    } else if path_str.contains("/usr/share/doc/") {
1349        DatasourceId::DebianCopyrightInPackage
1350    } else {
1351        DatasourceId::DebianCopyrightStandalone
1352    }
1353}
1354
1355fn extract_package_name_from_path(path: &Path) -> Option<String> {
1356    let components: Vec<_> = path.components().collect();
1357
1358    for (i, component) in components.iter().enumerate() {
1359        if let std::path::Component::Normal(os_str) = component
1360            && os_str.to_str() == Some("doc")
1361            && i + 1 < components.len()
1362            && let std::path::Component::Normal(next) = components[i + 1]
1363        {
1364            return next.to_str().map(|s| s.to_string());
1365        }
1366    }
1367    None
1368}
1369
1370fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1371    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1372
1373    let is_dep5 = paragraphs
1374        .first()
1375        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1376        .is_some();
1377
1378    let namespace = Some("debian".to_string());
1379    let mut parties = Vec::new();
1380    let mut license_statements = Vec::new();
1381    let mut primary_license_detection = None;
1382    let mut header_license_detection = None;
1383    let mut other_license_detections = Vec::new();
1384
1385    if is_dep5 {
1386        for para in &paragraphs {
1387            if let Some(copyright_text) =
1388                rfc822::get_header_first(&para.metadata.headers, "copyright")
1389            {
1390                for holder in parse_copyright_holders(&copyright_text) {
1391                    if !holder.is_empty() {
1392                        parties.push(Party {
1393                            r#type: None,
1394                            role: Some("copyright-holder".to_string()),
1395                            name: Some(holder),
1396                            email: None,
1397                            url: None,
1398                            organization: None,
1399                            organization_url: None,
1400                            timezone: None,
1401                        });
1402                    }
1403                }
1404            }
1405
1406            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1407                let license_name = license.lines().next().unwrap_or(&license).trim();
1408                if !license_name.is_empty()
1409                    && !license_statements.contains(&license_name.to_string())
1410                {
1411                    license_statements.push(license_name.to_string());
1412                }
1413
1414                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1415                    let detection =
1416                        build_primary_license_detection(license_name, matched_text, line_no);
1417                    let is_header_paragraph =
1418                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1419                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1420                        == Some("*")
1421                    {
1422                        primary_license_detection = Some(detection);
1423                    } else if is_header_paragraph {
1424                        header_license_detection.get_or_insert(detection);
1425                    } else {
1426                        other_license_detections.push(detection);
1427                    }
1428                }
1429            }
1430        }
1431
1432        if primary_license_detection.is_none() && header_license_detection.is_some() {
1433            primary_license_detection = header_license_detection;
1434        }
1435    } else {
1436        let copyright_block = extract_unstructured_field(content, "Copyright:");
1437        if let Some(text) = copyright_block {
1438            for holder in parse_copyright_holders(&text) {
1439                if !holder.is_empty() {
1440                    parties.push(Party {
1441                        r#type: None,
1442                        role: Some("copyright-holder".to_string()),
1443                        name: Some(holder),
1444                        email: None,
1445                        url: None,
1446                        organization: None,
1447                        organization_url: None,
1448                        timezone: None,
1449                    });
1450                }
1451            }
1452        }
1453
1454        let license_block = extract_unstructured_field(content, "License:");
1455        if let Some(text) = license_block {
1456            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1457        }
1458    }
1459
1460    let extracted_license_statement = if license_statements.is_empty() {
1461        None
1462    } else {
1463        Some(license_statements.join(" AND "))
1464    };
1465
1466    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1467    let declared_license_expression = license_detections
1468        .first()
1469        .map(|detection| detection.license_expression.clone());
1470    let declared_license_expression_spdx = license_detections
1471        .first()
1472        .map(|detection| detection.license_expression_spdx.clone());
1473    let other_license_expression = combine_license_expressions(
1474        other_license_detections
1475            .iter()
1476            .map(|detection| detection.license_expression.clone()),
1477    );
1478    let other_license_expression_spdx = combine_license_expressions(
1479        other_license_detections
1480            .iter()
1481            .map(|detection| detection.license_expression_spdx.clone()),
1482    );
1483
1484    PackageData {
1485        datasource_id: Some(DatasourceId::DebianCopyright),
1486        package_type: Some(PACKAGE_TYPE),
1487        namespace: namespace.clone(),
1488        name: package_name.map(|s| s.to_string()),
1489        parties,
1490        declared_license_expression,
1491        declared_license_expression_spdx,
1492        license_detections,
1493        other_license_expression,
1494        other_license_expression_spdx,
1495        other_license_detections,
1496        extracted_license_statement,
1497        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1498        ..Default::default()
1499    }
1500}
1501
1502#[derive(Debug)]
1503struct CopyrightParagraph {
1504    metadata: Rfc822Metadata,
1505    license_header_line: Option<(String, usize)>,
1506}
1507
1508fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1509    let mut paragraphs = Vec::new();
1510    let mut current_lines = Vec::new();
1511    let mut current_start_line = 1usize;
1512
1513    for (idx, line) in content.lines().enumerate() {
1514        let line_no = idx + 1;
1515        if line.is_empty() {
1516            if !current_lines.is_empty() {
1517                paragraphs.push(finalize_copyright_paragraph(
1518                    std::mem::take(&mut current_lines),
1519                    current_start_line,
1520                ));
1521            }
1522            current_start_line = line_no + 1;
1523        } else {
1524            if current_lines.is_empty() {
1525                current_start_line = line_no;
1526            }
1527            current_lines.push(line.to_string());
1528        }
1529    }
1530
1531    if !current_lines.is_empty() {
1532        paragraphs.push(finalize_copyright_paragraph(
1533            current_lines,
1534            current_start_line,
1535        ));
1536    }
1537
1538    paragraphs
1539}
1540
1541fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1542    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1543    let mut current_name: Option<String> = None;
1544    let mut current_value = String::new();
1545    let mut license_header_line = None;
1546
1547    for (idx, line) in raw_lines.iter().enumerate() {
1548        if line.starts_with(' ') || line.starts_with('\t') {
1549            if current_name.is_some() {
1550                current_value.push('\n');
1551                current_value.push_str(line);
1552            }
1553            continue;
1554        }
1555
1556        if let Some(name) = current_name.take() {
1557            add_copyright_header_value(&mut headers, &name, &current_value);
1558            current_value.clear();
1559        }
1560
1561        if let Some((name, value)) = line.split_once(':') {
1562            let normalized_name = name.trim().to_ascii_lowercase();
1563            if normalized_name == "license" && license_header_line.is_none() {
1564                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1565            }
1566            current_name = Some(normalized_name);
1567            current_value = value.trim_start().to_string();
1568        }
1569    }
1570
1571    if let Some(name) = current_name.take() {
1572        add_copyright_header_value(&mut headers, &name, &current_value);
1573    }
1574
1575    CopyrightParagraph {
1576        metadata: Rfc822Metadata {
1577            headers,
1578            body: String::new(),
1579        },
1580        license_header_line,
1581    }
1582}
1583
1584fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1585    let entry = headers.entry(name.to_string()).or_default();
1586    let trimmed = value.trim_end();
1587    if !trimmed.is_empty() {
1588        entry.push(trimmed.to_string());
1589    }
1590}
1591
1592fn build_primary_license_detection(
1593    license_name: &str,
1594    matched_text: String,
1595    line_no: usize,
1596) -> LicenseDetection {
1597    let normalized = normalize_debian_license_name(license_name);
1598    let line = LineNumber::new(line_no).unwrap();
1599
1600    build_declared_license_detection(
1601        &normalized,
1602        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1603    )
1604}
1605
1606fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1607    match license_name.trim() {
1608        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1609        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1610        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1611        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1612        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1613        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1614        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1615        "public-domain" => {
1616            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1617        }
1618        other => normalize_declared_license_key(other)
1619            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1620    }
1621}
1622
1623fn parse_copyright_holders(text: &str) -> Vec<String> {
1624    let mut holders = Vec::new();
1625
1626    for line in text.lines() {
1627        let line = line.trim();
1628        if line.is_empty() {
1629            continue;
1630        }
1631
1632        let cleaned = line
1633            .trim_start_matches("Copyright")
1634            .trim_start_matches("copyright")
1635            .trim_start_matches("(C)")
1636            .trim_start_matches("(c)")
1637            .trim_start_matches("©")
1638            .trim();
1639
1640        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1641            let without_years = &cleaned[year_end..];
1642            let holder = without_years
1643                .trim_start_matches(',')
1644                .trim_start_matches('-')
1645                .trim();
1646
1647            if !holder.is_empty() && holder.len() > 2 {
1648                holders.push(holder.to_string());
1649            }
1650        }
1651    }
1652
1653    holders
1654}
1655
1656fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1657    let mut in_field = false;
1658    let mut field_content = String::new();
1659
1660    for line in content.lines() {
1661        if line.starts_with(field_name) {
1662            in_field = true;
1663            field_content.push_str(line.trim_start_matches(field_name).trim());
1664            field_content.push('\n');
1665        } else if in_field {
1666            if line.starts_with(char::is_whitespace) {
1667                field_content.push_str(line.trim());
1668                field_content.push('\n');
1669            } else if !line.trim().is_empty() {
1670                break;
1671            }
1672        }
1673    }
1674
1675    let trimmed = field_content.trim();
1676    if trimmed.is_empty() {
1677        None
1678    } else {
1679        Some(trimmed.to_string())
1680    }
1681}
1682
1683/// Parser for Debian binary package archives (.deb files)
1684pub struct DebianDebParser;
1685
1686impl PackageParser for DebianDebParser {
1687    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1688
1689    fn is_match(path: &Path) -> bool {
1690        path.extension().and_then(|e| e.to_str()) == Some("deb")
1691    }
1692
1693    fn extract_packages(path: &Path) -> Vec<PackageData> {
1694        // Try to extract metadata from archive contents first
1695        if let Ok(data) = extract_deb_archive(path) {
1696            return vec![data];
1697        }
1698
1699        // Fallback to filename parsing
1700        let filename = match path.file_name().and_then(|n| n.to_str()) {
1701            Some(f) => f,
1702            None => {
1703                return vec![default_package_data(DatasourceId::DebianDeb)];
1704            }
1705        };
1706
1707        vec![parse_deb_filename(filename)]
1708    }
1709}
1710
1711crate::register_parser!(
1712    "Debian binary package archive (.deb)",
1713    &["**/*.deb"],
1714    "deb",
1715    "",
1716    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1717);
1718
1719fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1720    use flate2::read::GzDecoder;
1721    use liblzma::read::XzDecoder;
1722    use std::io::{Cursor, Read};
1723
1724    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1725
1726    let mut archive = ar::Archive::new(file);
1727    let mut package: Option<PackageData> = None;
1728
1729    while let Some(entry_result) = archive.next_entry() {
1730        let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1731
1732        let entry_name = std::str::from_utf8(entry.header().identifier())
1733            .map_err(|e| format!("Invalid entry name: {}", e))?;
1734        let entry_name = entry_name.trim().to_string();
1735
1736        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1737            let mut control_data = Vec::new();
1738            entry
1739                .read_to_end(&mut control_data)
1740                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1741
1742            if entry_name.ends_with(".gz") {
1743                let decoder = GzDecoder::new(Cursor::new(control_data));
1744                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1745                    package = Some(parsed_package);
1746                }
1747            } else if entry_name.ends_with(".xz") {
1748                let decoder = XzDecoder::new(Cursor::new(control_data));
1749                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1750                    package = Some(parsed_package);
1751                }
1752            }
1753        } else if entry_name.starts_with("data.tar") {
1754            let mut data = Vec::new();
1755            entry
1756                .read_to_end(&mut data)
1757                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1758
1759            let Some(current_package) = package.as_mut() else {
1760                continue;
1761            };
1762
1763            if entry_name.ends_with(".gz") {
1764                let decoder = GzDecoder::new(Cursor::new(data));
1765                merge_deb_data_archive(decoder, current_package)?;
1766            } else if entry_name.ends_with(".xz") {
1767                let decoder = XzDecoder::new(Cursor::new(data));
1768                merge_deb_data_archive(decoder, current_package)?;
1769            }
1770        }
1771    }
1772
1773    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1774}
1775
1776fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1777    use std::io::Read;
1778
1779    let mut tar_archive = tar::Archive::new(reader);
1780
1781    for tar_entry_result in tar_archive
1782        .entries()
1783        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1784    {
1785        let mut tar_entry =
1786            tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1787
1788        let tar_path = tar_entry
1789            .path()
1790            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1791
1792        if tar_path.ends_with("control") {
1793            let mut control_content = String::new();
1794            tar_entry
1795                .read_to_string(&mut control_content)
1796                .map_err(|e| format!("Failed to read control file: {}", e))?;
1797
1798            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1799            if paragraphs.is_empty() {
1800                return Err("No paragraphs in control file".to_string());
1801            }
1802
1803            if let Some(package) =
1804                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
1805            {
1806                return Ok(Some(package));
1807            }
1808
1809            return Err("Failed to parse control file".to_string());
1810        }
1811    }
1812
1813    Ok(None)
1814}
1815
1816fn merge_deb_data_archive<R: std::io::Read>(
1817    reader: R,
1818    package: &mut PackageData,
1819) -> Result<(), String> {
1820    use std::io::Read;
1821
1822    let mut tar_archive = tar::Archive::new(reader);
1823
1824    for tar_entry_result in tar_archive
1825        .entries()
1826        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1827    {
1828        let mut tar_entry =
1829            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1830
1831        let tar_path = tar_entry
1832            .path()
1833            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1834        let tar_path_str = tar_path.to_string_lossy();
1835
1836        if tar_path_str.ends_with(&format!(
1837            "/usr/share/doc/{}/copyright",
1838            package.name.as_deref().unwrap_or_default()
1839        )) || tar_path_str.ends_with(&format!(
1840            "usr/share/doc/{}/copyright",
1841            package.name.as_deref().unwrap_or_default()
1842        )) {
1843            let mut copyright_content = String::new();
1844            tar_entry
1845                .read_to_string(&mut copyright_content)
1846                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1847
1848            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
1849            merge_debian_copyright_into_package(package, &copyright_pkg);
1850            break;
1851        }
1852    }
1853
1854    Ok(())
1855}
1856
1857fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1858    if target.extracted_license_statement.is_none() {
1859        target.extracted_license_statement = copyright.extracted_license_statement.clone();
1860    }
1861
1862    for party in &copyright.parties {
1863        if !target.parties.iter().any(|existing| {
1864            existing.r#type == party.r#type
1865                && existing.role == party.role
1866                && existing.name == party.name
1867                && existing.email == party.email
1868                && existing.url == party.url
1869                && existing.organization == party.organization
1870                && existing.organization_url == party.organization_url
1871                && existing.timezone == party.timezone
1872        }) {
1873            target.parties.push(party.clone());
1874        }
1875    }
1876}
1877
1878fn parse_deb_filename(filename: &str) -> PackageData {
1879    let without_ext = filename.trim_end_matches(".deb");
1880
1881    let parts: Vec<&str> = without_ext.split('_').collect();
1882    if parts.len() < 2 {
1883        return default_package_data(DatasourceId::DebianDeb);
1884    }
1885
1886    let name = parts[0].to_string();
1887    let version = parts[1].to_string();
1888    let architecture = if parts.len() >= 3 {
1889        Some(parts[2].to_string())
1890    } else {
1891        None
1892    };
1893
1894    let namespace = Some("debian".to_string());
1895
1896    PackageData {
1897        datasource_id: Some(DatasourceId::DebianDeb),
1898        package_type: Some(PACKAGE_TYPE),
1899        namespace: namespace.clone(),
1900        name: Some(name.clone()),
1901        version: Some(version.clone()),
1902        purl: build_debian_purl(
1903            &name,
1904            Some(&version),
1905            namespace.as_deref(),
1906            architecture.as_deref(),
1907        ),
1908        ..Default::default()
1909    }
1910}
1911
1912/// Parser for control files inside extracted .deb control tarballs.
1913///
1914/// Matches paths like `*/control.tar.gz-extract/control` and
1915/// `*/control.tar.xz-extract/control` which are created by ExtractCode
1916/// when extracting .deb archives.
1917pub struct DebianControlInExtractedDebParser;
1918
1919impl PackageParser for DebianControlInExtractedDebParser {
1920    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1921
1922    fn is_match(path: &Path) -> bool {
1923        path.file_name()
1924            .and_then(|n| n.to_str())
1925            .is_some_and(|name| name == "control")
1926            && path
1927                .to_str()
1928                .map(|p| {
1929                    p.ends_with("control.tar.gz-extract/control")
1930                        || p.ends_with("control.tar.xz-extract/control")
1931                })
1932                .unwrap_or(false)
1933    }
1934
1935    fn extract_packages(path: &Path) -> Vec<PackageData> {
1936        let content = match read_file_to_string(path) {
1937            Ok(c) => c,
1938            Err(e) => {
1939                warn!(
1940                    "Failed to read control file in extracted deb {:?}: {}",
1941                    path, e
1942                );
1943                return vec![default_package_data(
1944                    DatasourceId::DebianControlExtractedDeb,
1945                )];
1946            }
1947        };
1948
1949        // A control file inside an extracted .deb has a single paragraph
1950        // (unlike debian/control which has source + binary paragraphs)
1951        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1952        if paragraphs.is_empty() {
1953            return vec![default_package_data(
1954                DatasourceId::DebianControlExtractedDeb,
1955            )];
1956        }
1957
1958        if let Some(pkg) = build_package_from_paragraph(
1959            &paragraphs[0],
1960            None,
1961            DatasourceId::DebianControlExtractedDeb,
1962        ) {
1963            vec![pkg]
1964        } else {
1965            vec![default_package_data(
1966                DatasourceId::DebianControlExtractedDeb,
1967            )]
1968        }
1969    }
1970}
1971
1972/// Parser for MD5 checksum files inside extracted .deb control tarballs
1973pub struct DebianMd5sumInPackageParser;
1974
1975impl PackageParser for DebianMd5sumInPackageParser {
1976    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1977
1978    fn is_match(path: &Path) -> bool {
1979        path.file_name()
1980            .and_then(|n| n.to_str())
1981            .is_some_and(|name| name == "md5sums")
1982            && path
1983                .to_str()
1984                .map(|p| {
1985                    p.ends_with("control.tar.gz-extract/md5sums")
1986                        || p.ends_with("control.tar.xz-extract/md5sums")
1987                })
1988                .unwrap_or(false)
1989    }
1990
1991    fn extract_packages(path: &Path) -> Vec<PackageData> {
1992        let content = match read_file_to_string(path) {
1993            Ok(c) => c,
1994            Err(e) => {
1995                warn!("Failed to read md5sums file {:?}: {}", path, e);
1996                return vec![default_package_data(
1997                    DatasourceId::DebianMd5SumsInExtractedDeb,
1998                )];
1999            }
2000        };
2001
2002        let package_name = extract_package_name_from_deb_path(path);
2003
2004        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2005    }
2006}
2007
2008pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2009    let parent = path.parent()?;
2010    let grandparent = parent.parent()?;
2011    let dirname = grandparent.file_name()?.to_str()?;
2012    let without_extract = dirname.strip_suffix("-extract")?;
2013    let without_deb = without_extract.strip_suffix(".deb")?;
2014    let name = without_deb.split('_').next()?;
2015
2016    Some(name.to_string())
2017}
2018
2019fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2020    let mut file_references = Vec::new();
2021
2022    for line in content.lines() {
2023        let line = line.trim();
2024        if line.is_empty() || line.starts_with('#') {
2025            continue;
2026        }
2027
2028        let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
2029            (
2030                Md5Digest::from_hex(line[..idx].trim()).ok(),
2031                line[idx + 2..].trim(),
2032            )
2033        } else if let Some((hash, path)) = line.split_once(' ') {
2034            (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2035        } else {
2036            (None, line)
2037        };
2038
2039        if IGNORED_ROOT_DIRS.contains(&filepath) {
2040            continue;
2041        }
2042
2043        file_references.push(FileReference {
2044            path: filepath.to_string(),
2045            size: None,
2046            sha1: None,
2047            md5: md5sum,
2048            sha256: None,
2049            sha512: None,
2050            extra_data: None,
2051        });
2052    }
2053
2054    if file_references.is_empty() {
2055        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2056    }
2057
2058    let namespace = Some("debian".to_string());
2059    let mut package = PackageData {
2060        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2061        package_type: Some(PACKAGE_TYPE),
2062        namespace: namespace.clone(),
2063        name: package_name.map(|s| s.to_string()),
2064        file_references,
2065        ..Default::default()
2066    };
2067
2068    if let Some(n) = &package.name {
2069        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2070    }
2071
2072    package
2073}
2074
2075crate::register_parser!(
2076    "Debian control file in extracted .deb control tarball",
2077    &[
2078        "**/control.tar.gz-extract/control",
2079        "**/control.tar.xz-extract/control"
2080    ],
2081    "deb",
2082    "",
2083    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2084);
2085
2086crate::register_parser!(
2087    "Debian MD5 checksums in extracted .deb control tarball",
2088    &[
2089        "**/control.tar.gz-extract/md5sums",
2090        "**/control.tar.xz-extract/md5sums"
2091    ],
2092    "deb",
2093    "",
2094    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2095);
2096
2097#[cfg(test)]
2098mod tests {
2099    use super::*;
2100    use crate::models::DatasourceId;
2101    use crate::models::PackageType;
2102    use ar::{Builder as ArBuilder, Header as ArHeader};
2103    use flate2::Compression;
2104    use flate2::write::GzEncoder;
2105    use liblzma::write::XzEncoder;
2106    use std::io::Cursor;
2107    use std::path::PathBuf;
2108    use tar::{Builder as TarBuilder, Header as TarHeader};
2109    use tempfile::NamedTempFile;
2110
2111    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2112        let mut control_tar = Vec::new();
2113        {
2114            let encoder = XzEncoder::new(&mut control_tar, 6);
2115            let mut tar_builder = TarBuilder::new(encoder);
2116
2117            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2118            let mut header = TarHeader::new_gnu();
2119            header
2120                .set_path("control")
2121                .expect("control tar path should be valid");
2122            header.set_size(control_content.len() as u64);
2123            header.set_mode(0o644);
2124            header.set_cksum();
2125            tar_builder
2126                .append(&header, Cursor::new(control_content))
2127                .expect("control file should be appended to tar.xz");
2128            tar_builder.finish().expect("control tar.xz should finish");
2129        }
2130
2131        let deb = NamedTempFile::new().expect("temp deb file should be created");
2132        {
2133            let mut builder = ArBuilder::new(
2134                deb.reopen()
2135                    .expect("temporary deb file should reopen for writing"),
2136            );
2137
2138            let debian_binary = b"2.0\n";
2139            let mut debian_binary_header =
2140                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2141            debian_binary_header.set_mode(0o100644);
2142            builder
2143                .append(&debian_binary_header, Cursor::new(debian_binary))
2144                .expect("debian-binary entry should be appended");
2145
2146            let mut control_header =
2147                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2148            control_header.set_mode(0o100644);
2149            builder
2150                .append(&control_header, Cursor::new(control_tar))
2151                .expect("control.tar.xz entry should be appended");
2152        }
2153
2154        deb
2155    }
2156
2157    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2158        let mut control_tar = Vec::new();
2159        {
2160            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2161            let mut tar_builder = TarBuilder::new(encoder);
2162
2163            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2164            let mut header = TarHeader::new_gnu();
2165            header
2166                .set_path("control")
2167                .expect("control tar path should be valid");
2168            header.set_size(control_content.len() as u64);
2169            header.set_mode(0o644);
2170            header.set_cksum();
2171            tar_builder
2172                .append(&header, Cursor::new(control_content))
2173                .expect("control file should be appended to tar.gz");
2174            tar_builder.finish().expect("control tar.gz should finish");
2175        }
2176
2177        let mut data_tar = Vec::new();
2178        {
2179            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2180            let mut tar_builder = TarBuilder::new(encoder);
2181
2182            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2183            let mut header = TarHeader::new_gnu();
2184            header
2185                .set_path("./usr/share/doc/synthetic/copyright")
2186                .expect("copyright path should be valid");
2187            header.set_size(copyright.len() as u64);
2188            header.set_mode(0o644);
2189            header.set_cksum();
2190            tar_builder
2191                .append(&header, Cursor::new(copyright))
2192                .expect("copyright file should be appended to data tar");
2193            tar_builder.finish().expect("data tar.gz should finish");
2194        }
2195
2196        let deb = NamedTempFile::new().expect("temp deb file should be created");
2197        {
2198            let mut builder = ArBuilder::new(
2199                deb.reopen()
2200                    .expect("temporary deb file should reopen for writing"),
2201            );
2202
2203            let debian_binary = b"2.0\n";
2204            let mut debian_binary_header =
2205                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2206            debian_binary_header.set_mode(0o100644);
2207            builder
2208                .append(&debian_binary_header, Cursor::new(debian_binary))
2209                .expect("debian-binary entry should be appended");
2210
2211            let mut control_header =
2212                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2213            control_header.set_mode(0o100644);
2214            builder
2215                .append(&control_header, Cursor::new(control_tar))
2216                .expect("control.tar.gz entry should be appended");
2217
2218            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2219            data_header.set_mode(0o100644);
2220            builder
2221                .append(&data_header, Cursor::new(data_tar))
2222                .expect("data.tar.gz entry should be appended");
2223        }
2224
2225        deb
2226    }
2227
2228    // ====== Namespace detection ======
2229
2230    #[test]
2231    fn test_detect_namespace_from_ubuntu_version() {
2232        assert_eq!(
2233            detect_namespace(Some("1.0-1ubuntu1"), None),
2234            Some("ubuntu".to_string())
2235        );
2236    }
2237
2238    #[test]
2239    fn test_detect_namespace_from_debian_version() {
2240        assert_eq!(
2241            detect_namespace(Some("1.0-1+deb11u1"), None),
2242            Some("debian".to_string())
2243        );
2244    }
2245
2246    #[test]
2247    fn test_detect_namespace_from_ubuntu_maintainer() {
2248        assert_eq!(
2249            detect_namespace(
2250                None,
2251                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2252            ),
2253            Some("ubuntu".to_string())
2254        );
2255    }
2256
2257    #[test]
2258    fn test_detect_namespace_from_debian_maintainer() {
2259        assert_eq!(
2260            detect_namespace(None, Some("John Doe <john@debian.org>")),
2261            Some("debian".to_string())
2262        );
2263    }
2264
2265    #[test]
2266    fn test_detect_namespace_default() {
2267        assert_eq!(
2268            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2269            Some("debian".to_string())
2270        );
2271    }
2272
2273    #[test]
2274    fn test_detect_namespace_version_takes_priority() {
2275        // Version clue should be checked before maintainer
2276        assert_eq!(
2277            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2278            Some("ubuntu".to_string())
2279        );
2280    }
2281
2282    // ====== PURL generation ======
2283
2284    #[test]
2285    fn test_build_purl_basic() {
2286        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2287        assert_eq!(
2288            purl,
2289            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2290        );
2291    }
2292
2293    #[test]
2294    fn test_build_purl_no_version() {
2295        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2296        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2297    }
2298
2299    #[test]
2300    fn test_build_purl_no_arch() {
2301        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2302        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2303    }
2304
2305    #[test]
2306    fn test_build_purl_no_namespace() {
2307        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2308        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2309    }
2310
2311    // ====== Dependency parsing ======
2312
2313    #[test]
2314    fn test_parse_simple_dependency() {
2315        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2316        assert_eq!(deps.len(), 1);
2317        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2318        assert_eq!(deps[0].extracted_requirement, None);
2319        assert_eq!(deps[0].scope, Some("depends".to_string()));
2320    }
2321
2322    #[test]
2323    fn test_parse_dependency_with_version() {
2324        let deps =
2325            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2326        assert_eq!(deps.len(), 1);
2327        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2328        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2329    }
2330
2331    #[test]
2332    fn test_parse_dependency_exact_version() {
2333        let deps = parse_dependency_field(
2334            "libc6 (= 2.31-13+deb11u5)",
2335            "depends",
2336            true,
2337            false,
2338            Some("debian"),
2339        );
2340        assert_eq!(deps.len(), 1);
2341        assert_eq!(deps[0].is_pinned, Some(true));
2342    }
2343
2344    #[test]
2345    fn test_parse_dependency_strict_less() {
2346        let deps =
2347            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2348        assert_eq!(deps.len(), 1);
2349        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2350        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2351    }
2352
2353    #[test]
2354    fn test_parse_multiple_dependencies() {
2355        let deps = parse_dependency_field(
2356            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2357            "depends",
2358            true,
2359            false,
2360            Some("debian"),
2361        );
2362        assert_eq!(deps.len(), 3);
2363    }
2364
2365    #[test]
2366    fn test_parse_dependency_alternatives() {
2367        let deps = parse_dependency_field(
2368            "libssl1.1 | libssl3",
2369            "depends",
2370            true,
2371            false,
2372            Some("debian"),
2373        );
2374        assert_eq!(deps.len(), 2);
2375        // Alternatives are marked as optional
2376        assert_eq!(deps[0].is_optional, Some(true));
2377        assert_eq!(deps[1].is_optional, Some(true));
2378    }
2379
2380    #[test]
2381    fn test_parse_dependency_skips_substitutions() {
2382        let deps = parse_dependency_field(
2383            "${shlibs:Depends}, ${misc:Depends}, libc6",
2384            "depends",
2385            true,
2386            false,
2387            Some("debian"),
2388        );
2389        assert_eq!(deps.len(), 1);
2390        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2391    }
2392
2393    #[test]
2394    fn test_parse_dependency_with_arch_qualifier() {
2395        // Dependencies can have [arch] qualifiers which we ignore
2396        let deps = parse_dependency_field(
2397            "libc6 (>= 2.17) [amd64]",
2398            "depends",
2399            true,
2400            false,
2401            Some("debian"),
2402        );
2403        assert_eq!(deps.len(), 1);
2404        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2405    }
2406
2407    #[test]
2408    fn test_parse_empty_dependency() {
2409        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2410        assert!(deps.is_empty());
2411    }
2412
2413    // ====== Source field parsing ======
2414
2415    #[test]
2416    fn test_parse_source_field_name_only() {
2417        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2418        assert_eq!(sources.len(), 1);
2419        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2420    }
2421
2422    #[test]
2423    fn test_parse_source_field_with_version() {
2424        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2425        assert_eq!(sources.len(), 1);
2426        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2427    }
2428
2429    #[test]
2430    fn test_parse_source_field_empty() {
2431        let sources = parse_source_field(None, Some("debian"));
2432        assert!(sources.is_empty());
2433    }
2434
2435    // ====== Control file parsing ======
2436
2437    #[test]
2438    fn test_parse_debian_control_source_and_binary() {
2439        let content = "\
2440Source: curl
2441Section: web
2442Priority: optional
2443Maintainer: Alessandro Ghedini <ghedo@debian.org>
2444Homepage: https://curl.se/
2445Vcs-Browser: https://salsa.debian.org/debian/curl
2446Vcs-Git: https://salsa.debian.org/debian/curl.git
2447Build-Depends: debhelper (>= 12), libssl-dev
2448
2449Package: curl
2450Architecture: amd64
2451Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2452Description: command line tool for transferring data with URL syntax";
2453
2454        let packages = parse_debian_control(content);
2455        assert_eq!(packages.len(), 1);
2456
2457        let pkg = &packages[0];
2458        assert_eq!(pkg.name, Some("curl".to_string()));
2459        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2460        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2461        assert_eq!(
2462            pkg.vcs_url,
2463            Some("https://salsa.debian.org/debian/curl.git".to_string())
2464        );
2465        assert_eq!(
2466            pkg.code_view_url,
2467            Some("https://salsa.debian.org/debian/curl".to_string())
2468        );
2469
2470        // Maintainer from source paragraph
2471        assert_eq!(pkg.parties.len(), 1);
2472        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2473        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2474        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2475
2476        // Dependencies parsed
2477        assert!(!pkg.dependencies.is_empty());
2478    }
2479
2480    #[test]
2481    fn test_parse_debian_control_multiple_binary() {
2482        let content = "\
2483Source: gzip
2484Maintainer: Debian Developer <dev@debian.org>
2485
2486Package: gzip
2487Architecture: any
2488Depends: libc6 (>= 2.17)
2489Description: GNU file compression
2490
2491Package: gzip-win32
2492Architecture: all
2493Description: gzip for Windows";
2494
2495        let packages = parse_debian_control(content);
2496        assert_eq!(packages.len(), 2);
2497        assert_eq!(packages[0].name, Some("gzip".to_string()));
2498        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2499
2500        // Both inherit source maintainer
2501        assert_eq!(packages[0].parties.len(), 1);
2502        assert_eq!(packages[1].parties.len(), 1);
2503    }
2504
2505    #[test]
2506    fn test_parse_debian_control_source_only() {
2507        let content = "\
2508Source: my-package
2509Maintainer: Test User <test@debian.org>
2510Build-Depends: debhelper (>= 13)";
2511
2512        let packages = parse_debian_control(content);
2513        assert_eq!(packages.len(), 1);
2514        assert_eq!(packages[0].name, Some("my-package".to_string()));
2515        // Build-Depends parsed
2516        assert!(!packages[0].dependencies.is_empty());
2517        assert_eq!(
2518            packages[0].dependencies[0].scope,
2519            Some("build-depends".to_string())
2520        );
2521    }
2522
2523    #[test]
2524    fn test_parse_debian_control_with_uploaders() {
2525        let content = "\
2526Source: example
2527Maintainer: Main Dev <main@debian.org>
2528Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2529
2530Package: example
2531Architecture: any
2532Description: test package";
2533
2534        let packages = parse_debian_control(content);
2535        assert_eq!(packages.len(), 1);
2536        // 1 maintainer + 2 uploaders
2537        assert_eq!(packages[0].parties.len(), 3);
2538        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2539        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2540        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2541    }
2542
2543    #[test]
2544    fn test_parse_debian_control_vcs_git_with_branch() {
2545        let content = "\
2546Source: example
2547Maintainer: Dev <dev@debian.org>
2548Vcs-Git: https://salsa.debian.org/example.git -b main
2549
2550Package: example
2551Architecture: any
2552Description: test";
2553
2554        let packages = parse_debian_control(content);
2555        assert_eq!(packages.len(), 1);
2556        // Should only take the URL, not the branch
2557        assert_eq!(
2558            packages[0].vcs_url,
2559            Some("https://salsa.debian.org/example.git".to_string())
2560        );
2561    }
2562
2563    #[test]
2564    fn test_parse_debian_control_multi_arch() {
2565        let content = "\
2566Source: example
2567Maintainer: Dev <dev@debian.org>
2568
2569Package: libexample
2570Architecture: any
2571Multi-Arch: same
2572Description: shared library";
2573
2574        let packages = parse_debian_control(content);
2575        assert_eq!(packages.len(), 1);
2576        let extra = packages[0].extra_data.as_ref().unwrap();
2577        assert_eq!(
2578            extra.get("multi_arch"),
2579            Some(&serde_json::Value::String("same".to_string()))
2580        );
2581    }
2582
2583    // ====== dpkg/status parsing ======
2584
2585    #[test]
2586    fn test_parse_dpkg_status_basic() {
2587        let content = "\
2588Package: base-files
2589Status: install ok installed
2590Priority: required
2591Section: admin
2592Installed-Size: 391
2593Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2594Architecture: amd64
2595Version: 11ubuntu5.6
2596Description: Debian base system miscellaneous files
2597Homepage: https://tracker.debian.org/pkg/base-files
2598
2599Package: not-installed
2600Status: deinstall ok config-files
2601Architecture: amd64
2602Version: 1.0
2603Description: This should be skipped";
2604
2605        let packages = parse_dpkg_status(content);
2606        assert_eq!(packages.len(), 1);
2607
2608        let pkg = &packages[0];
2609        assert_eq!(pkg.name, Some("base-files".to_string()));
2610        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2611        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2612        assert_eq!(
2613            pkg.datasource_id,
2614            Some(DatasourceId::DebianInstalledStatusDb)
2615        );
2616
2617        // Installed-Size in extra_data
2618        let extra = pkg.extra_data.as_ref().unwrap();
2619        assert_eq!(
2620            extra.get("installed_size"),
2621            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2622        );
2623    }
2624
2625    #[test]
2626    fn test_parse_dpkg_status_multiple_installed() {
2627        let content = "\
2628Package: libc6
2629Status: install ok installed
2630Architecture: amd64
2631Version: 2.31-13+deb11u5
2632Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2633Description: GNU C Library
2634
2635Package: zlib1g
2636Status: install ok installed
2637Architecture: amd64
2638Version: 1:1.2.11.dfsg-2+deb11u2
2639Maintainer: Mark Brown <broonie@debian.org>
2640Description: compression library";
2641
2642        let packages = parse_dpkg_status(content);
2643        assert_eq!(packages.len(), 2);
2644        assert_eq!(packages[0].name, Some("libc6".to_string()));
2645        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2646    }
2647
2648    #[test]
2649    fn test_parse_dpkg_status_with_dependencies() {
2650        let content = "\
2651Package: curl
2652Status: install ok installed
2653Architecture: amd64
2654Version: 7.74.0-1.3+deb11u7
2655Maintainer: Alessandro Ghedini <ghedo@debian.org>
2656Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2657Recommends: ca-certificates
2658Description: command line tool for transferring data with URL syntax";
2659
2660        let packages = parse_dpkg_status(content);
2661        assert_eq!(packages.len(), 1);
2662
2663        let deps = &packages[0].dependencies;
2664        // 2 from Depends + 1 from Recommends
2665        assert_eq!(deps.len(), 3);
2666
2667        // Check first dependency
2668        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2669        assert_eq!(deps[0].scope, Some("depends".to_string()));
2670        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2671
2672        // Check recommends
2673        assert_eq!(
2674            deps[2].purl,
2675            Some("pkg:deb/debian/ca-certificates".to_string())
2676        );
2677        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2678        assert_eq!(deps[2].is_optional, Some(true));
2679    }
2680
2681    #[test]
2682    fn test_parse_dpkg_status_with_source() {
2683        let content = "\
2684Package: libncurses6
2685Status: install ok installed
2686Architecture: amd64
2687Source: ncurses (6.2+20201114-2+deb11u1)
2688Version: 6.2+20201114-2+deb11u1
2689Maintainer: Craig Small <csmall@debian.org>
2690Description: shared libraries for terminal handling";
2691
2692        let packages = parse_dpkg_status(content);
2693        assert_eq!(packages.len(), 1);
2694        assert!(!packages[0].source_packages.is_empty());
2695        // Source PURL should include version from parentheses
2696        assert!(packages[0].source_packages[0].contains("ncurses"));
2697    }
2698
2699    #[test]
2700    fn test_parse_dpkg_status_filters_not_installed() {
2701        let content = "\
2702Package: installed-pkg
2703Status: install ok installed
2704Version: 1.0
2705Architecture: amd64
2706Description: installed
2707
2708Package: half-installed
2709Status: install ok half-installed
2710Version: 2.0
2711Architecture: amd64
2712Description: half installed
2713
2714Package: deinstall-pkg
2715Status: deinstall ok config-files
2716Version: 3.0
2717Architecture: amd64
2718Description: deinstalled
2719
2720Package: purge-pkg
2721Status: purge ok not-installed
2722Version: 4.0
2723Architecture: amd64
2724Description: purged";
2725
2726        let packages = parse_dpkg_status(content);
2727        assert_eq!(packages.len(), 1);
2728        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2729    }
2730
2731    #[test]
2732    fn test_parse_dpkg_status_empty() {
2733        let packages = parse_dpkg_status("");
2734        assert!(packages.is_empty());
2735    }
2736
2737    // ====== is_match tests ======
2738
2739    #[test]
2740    fn test_debian_control_is_match() {
2741        assert!(DebianControlParser::is_match(Path::new(
2742            "/path/to/debian/control"
2743        )));
2744        assert!(DebianControlParser::is_match(Path::new("debian/control")));
2745        assert!(!DebianControlParser::is_match(Path::new(
2746            "/path/to/control"
2747        )));
2748        assert!(!DebianControlParser::is_match(Path::new(
2749            "/path/to/debian/changelog"
2750        )));
2751    }
2752
2753    #[test]
2754    fn test_debian_installed_is_match() {
2755        assert!(DebianInstalledParser::is_match(Path::new(
2756            "/var/lib/dpkg/status"
2757        )));
2758        assert!(DebianInstalledParser::is_match(Path::new(
2759            "some/root/var/lib/dpkg/status"
2760        )));
2761        assert!(!DebianInstalledParser::is_match(Path::new(
2762            "/var/lib/dpkg/status.d/something"
2763        )));
2764        assert!(!DebianInstalledParser::is_match(Path::new(
2765            "/var/lib/dpkg/available"
2766        )));
2767    }
2768
2769    // ====== Edge cases ======
2770
2771    #[test]
2772    fn test_parse_debian_control_empty_input() {
2773        let packages = parse_debian_control("");
2774        assert!(packages.is_empty());
2775    }
2776
2777    #[test]
2778    fn test_parse_debian_control_malformed_input() {
2779        let content = "this is not a valid control file\nwith random text";
2780        let packages = parse_debian_control(content);
2781        // Should not panic, may return empty or partial results
2782        assert!(packages.is_empty());
2783    }
2784
2785    #[test]
2786    fn test_dependency_with_epoch_version() {
2787        // Debian versions can have epochs like 1:2.3.4
2788        let deps = parse_dependency_field(
2789            "zlib1g (>= 1:1.2.11)",
2790            "depends",
2791            true,
2792            false,
2793            Some("debian"),
2794        );
2795        assert_eq!(deps.len(), 1);
2796        assert_eq!(
2797            deps[0].extracted_requirement,
2798            Some(">= 1:1.2.11".to_string())
2799        );
2800    }
2801
2802    #[test]
2803    fn test_dependency_with_plus_in_name() {
2804        let deps =
2805            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2806        assert_eq!(deps.len(), 1);
2807        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2808    }
2809
2810    #[test]
2811    fn test_dsc_parser_is_match() {
2812        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2813        assert!(DebianDscParser::is_match(&PathBuf::from(
2814            "adduser_3.118+deb11u1.dsc"
2815        )));
2816        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2817        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2818    }
2819
2820    #[test]
2821    fn test_dsc_parser_adduser() {
2822        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2823        let package = DebianDscParser::extract_first_package(&path);
2824
2825        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2826        assert_eq!(package.namespace, Some("debian".to_string()));
2827        assert_eq!(package.name, Some("adduser".to_string()));
2828        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2829        assert_eq!(
2830            package.purl,
2831            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2832        );
2833        assert_eq!(
2834            package.vcs_url,
2835            Some("https://salsa.debian.org/debian/adduser.git".to_string())
2836        );
2837        assert_eq!(
2838            package.code_view_url,
2839            Some("https://salsa.debian.org/debian/adduser".to_string())
2840        );
2841        assert_eq!(
2842            package.datasource_id,
2843            Some(DatasourceId::DebianSourceControlDsc)
2844        );
2845
2846        assert_eq!(package.parties.len(), 2);
2847        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2848        assert_eq!(
2849            package.parties[0].name,
2850            Some("Debian Adduser Developers".to_string())
2851        );
2852        assert_eq!(
2853            package.parties[0].email,
2854            Some("adduser@packages.debian.org".to_string())
2855        );
2856        assert_eq!(package.parties[0].r#type, None);
2857
2858        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2859        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2860        assert_eq!(
2861            package.parties[1].email,
2862            Some("mh+debian-packages@zugschlus.de".to_string())
2863        );
2864        assert_eq!(package.parties[1].r#type, None);
2865
2866        assert_eq!(package.source_packages.len(), 1);
2867        assert_eq!(
2868            package.source_packages[0],
2869            "pkg:deb/debian/adduser".to_string()
2870        );
2871
2872        assert!(!package.dependencies.is_empty());
2873        let build_dep_names: Vec<String> = package
2874            .dependencies
2875            .iter()
2876            .filter_map(|d| d.purl.as_ref())
2877            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2878            .map(|p| p.to_string())
2879            .collect();
2880        assert!(build_dep_names.len() >= 2);
2881    }
2882
2883    #[test]
2884    fn test_dsc_parser_zsh() {
2885        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2886        let package = DebianDscParser::extract_first_package(&path);
2887
2888        assert_eq!(package.name, Some("zsh".to_string()));
2889        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2890        assert_eq!(package.namespace, Some("debian".to_string()));
2891        assert!(package.purl.is_some());
2892        assert!(package.purl.as_ref().unwrap().contains("zsh"));
2893        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2894    }
2895
2896    #[test]
2897    fn test_parse_dsc_content_basic() {
2898        let content = "Format: 3.0 (native)
2899Source: testpkg
2900Binary: testpkg
2901Architecture: amd64
2902Version: 1.0.0
2903Maintainer: Test User <test@example.com>
2904Standards-Version: 4.5.0
2905Build-Depends: debhelper (>= 12)
2906Files:
2907 abc123 1024 testpkg_1.0.0.tar.xz
2908";
2909
2910        let package = parse_dsc_content(content);
2911        assert_eq!(package.name, Some("testpkg".to_string()));
2912        assert_eq!(package.version, Some("1.0.0".to_string()));
2913        assert_eq!(package.namespace, Some("debian".to_string()));
2914        assert_eq!(package.parties.len(), 1);
2915        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2916        assert_eq!(
2917            package.parties[0].email,
2918            Some("test@example.com".to_string())
2919        );
2920        assert_eq!(package.dependencies.len(), 1);
2921        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2922    }
2923
2924    #[test]
2925    fn test_parse_dsc_content_with_uploaders() {
2926        let content = "Source: mypkg
2927Version: 2.0
2928Architecture: all
2929Maintainer: Main Dev <main@example.com>
2930Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2931";
2932
2933        let package = parse_dsc_content(content);
2934        assert_eq!(package.parties.len(), 3);
2935        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2936        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2937        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2938    }
2939
2940    #[test]
2941    fn test_orig_tar_parser_is_match() {
2942        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2943            "package_1.0.orig.tar.gz"
2944        )));
2945        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2946            "abseil_0~20200923.3.orig.tar.xz"
2947        )));
2948        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2949            "package.debian.tar.gz"
2950        )));
2951        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2952    }
2953
2954    #[test]
2955    fn test_debian_tar_parser_is_match() {
2956        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2957            "package_1.0-1.debian.tar.xz"
2958        )));
2959        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2960            "abseil_20220623.1-1.debian.tar.gz"
2961        )));
2962        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2963            "package.orig.tar.gz"
2964        )));
2965        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2966    }
2967
2968    #[test]
2969    fn test_parse_orig_tar_filename() {
2970        let pkg = parse_source_tarball_filename(
2971            "abseil_0~20200923.3.orig.tar.gz",
2972            DatasourceId::DebianOriginalSourceTarball,
2973        );
2974        assert_eq!(pkg.name, Some("abseil".to_string()));
2975        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2976        assert_eq!(pkg.namespace, Some("debian".to_string()));
2977        assert_eq!(
2978            pkg.purl,
2979            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2980        );
2981        assert_eq!(
2982            pkg.datasource_id,
2983            Some(DatasourceId::DebianOriginalSourceTarball)
2984        );
2985    }
2986
2987    #[test]
2988    fn test_parse_debian_tar_filename() {
2989        let pkg = parse_source_tarball_filename(
2990            "abseil_20220623.1-1.debian.tar.xz",
2991            DatasourceId::DebianSourceMetadataTarball,
2992        );
2993        assert_eq!(pkg.name, Some("abseil".to_string()));
2994        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2995        assert_eq!(pkg.namespace, Some("debian".to_string()));
2996        assert_eq!(
2997            pkg.purl,
2998            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2999        );
3000    }
3001
3002    #[test]
3003    fn test_parse_deb_filename() {
3004        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3005        assert_eq!(pkg.name, Some("nginx".to_string()));
3006        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3007
3008        let pkg = parse_deb_filename("invalid.deb");
3009        assert!(pkg.name.is_none());
3010        assert!(pkg.version.is_none());
3011    }
3012
3013    #[test]
3014    fn test_parse_source_tarball_various_compressions() {
3015        let pkg_gz = parse_source_tarball_filename(
3016            "test_1.0.orig.tar.gz",
3017            DatasourceId::DebianOriginalSourceTarball,
3018        );
3019        let pkg_xz = parse_source_tarball_filename(
3020            "test_1.0.orig.tar.xz",
3021            DatasourceId::DebianOriginalSourceTarball,
3022        );
3023        let pkg_bz2 = parse_source_tarball_filename(
3024            "test_1.0.orig.tar.bz2",
3025            DatasourceId::DebianOriginalSourceTarball,
3026        );
3027
3028        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3029        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3030        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3031    }
3032
3033    #[test]
3034    fn test_parse_source_tarball_invalid_format() {
3035        let pkg = parse_source_tarball_filename(
3036            "invalid-no-underscore.tar.gz",
3037            DatasourceId::DebianOriginalSourceTarball,
3038        );
3039        assert!(pkg.name.is_none());
3040        assert!(pkg.version.is_none());
3041    }
3042
3043    #[test]
3044    fn test_list_parser_is_match() {
3045        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3046            "/var/lib/dpkg/info/bash.list"
3047        )));
3048        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3049            "/var/lib/dpkg/info/package:amd64.list"
3050        )));
3051        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3052            "bash.list"
3053        )));
3054        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3055            "/var/lib/dpkg/info/bash.md5sums"
3056        )));
3057    }
3058
3059    #[test]
3060    fn test_md5sums_parser_is_match() {
3061        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3062            "/var/lib/dpkg/info/bash.md5sums"
3063        )));
3064        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3065            "/var/lib/dpkg/info/package:amd64.md5sums"
3066        )));
3067        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3068            "bash.md5sums"
3069        )));
3070        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3071            "/var/lib/dpkg/info/bash.list"
3072        )));
3073    }
3074
3075    #[test]
3076    fn test_parse_debian_file_list_plain_list() {
3077        let content = "/.
3078/bin
3079/bin/bash
3080/usr/bin/bashbug
3081/usr/share/doc/bash/README
3082";
3083        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3084        assert_eq!(pkg.name, Some("bash".to_string()));
3085        assert_eq!(pkg.file_references.len(), 3);
3086        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3087        assert_eq!(pkg.file_references[0].md5, None);
3088        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3089        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3090    }
3091
3092    #[test]
3093    fn test_parse_debian_file_list_md5sums() {
3094        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
30951c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3096f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3097";
3098        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3099        assert_eq!(pkg.name, Some("bash".to_string()));
3100        assert_eq!(pkg.file_references.len(), 3);
3101        assert_eq!(pkg.file_references[0].path, "bin/bash");
3102        assert_eq!(
3103            pkg.file_references[0].md5,
3104            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3105        );
3106        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3107        assert_eq!(
3108            pkg.file_references[1].md5,
3109            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3110        );
3111    }
3112
3113    #[test]
3114    fn test_parse_debian_file_list_with_arch() {
3115        let content = "/usr/bin/foo
3116/usr/lib/x86_64-linux-gnu/libfoo.so
3117";
3118        let pkg = parse_debian_file_list(
3119            content,
3120            "libfoo:amd64",
3121            DatasourceId::DebianInstalledFilesList,
3122        );
3123        assert_eq!(pkg.name, Some("libfoo".to_string()));
3124        assert!(pkg.purl.is_some());
3125        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3126        assert_eq!(pkg.file_references.len(), 2);
3127    }
3128
3129    #[test]
3130    fn test_parse_debian_file_list_skips_comments_and_empty() {
3131        let content = "# This is a comment
3132/bin/bash
3133
3134/usr/bin/bashbug
3135  
3136";
3137        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3138        assert_eq!(pkg.file_references.len(), 2);
3139    }
3140
3141    #[test]
3142    fn test_parse_debian_file_list_md5sums_only() {
3143        let content = "abc123  usr/bin/tool
3144";
3145        let pkg =
3146            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3147        assert_eq!(pkg.name, None);
3148        assert_eq!(pkg.file_references.len(), 1);
3149    }
3150
3151    #[test]
3152    fn test_parse_debian_file_list_ignores_root_dirs() {
3153        let content = "/.
3154/bin
3155/bin/bash
3156/etc
3157/usr
3158/var
3159";
3160        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3161        assert_eq!(pkg.file_references.len(), 1);
3162        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3163    }
3164
3165    #[test]
3166    fn test_copyright_parser_is_match() {
3167        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3168            "/usr/share/doc/bash/copyright"
3169        )));
3170        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3171            "debian/copyright"
3172        )));
3173        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3174            "src/third_party/gperftools/dist/packages/deb/copyright"
3175        )));
3176        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3177            "copyright.txt"
3178        )));
3179        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3180            "/etc/copyright"
3181        )));
3182        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3183            "/tmp/sample_copyright"
3184        )));
3185    }
3186
3187    #[test]
3188    fn test_detect_debian_copyright_datasource() {
3189        assert_eq!(
3190            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3191            DatasourceId::DebianCopyrightInSource
3192        );
3193        assert_eq!(
3194            detect_debian_copyright_datasource(&PathBuf::from(
3195                "src/third_party/gperftools/dist/packages/deb/copyright"
3196            )),
3197            DatasourceId::DebianCopyrightStandalone
3198        );
3199        assert_eq!(
3200            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3201            DatasourceId::DebianCopyrightInPackage
3202        );
3203        assert_eq!(
3204            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3205            DatasourceId::DebianCopyrightStandalone
3206        );
3207    }
3208
3209    #[test]
3210    fn test_extract_package_name_from_path() {
3211        assert_eq!(
3212            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3213            Some("bash".to_string())
3214        );
3215        assert_eq!(
3216            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3217            Some("libseccomp2".to_string())
3218        );
3219        assert_eq!(
3220            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3221            None
3222        );
3223    }
3224
3225    #[test]
3226    fn test_parse_copyright_dep5_format() {
3227        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3228Upstream-Name: libseccomp
3229Source: https://sourceforge.net/projects/libseccomp/
3230
3231Files: *
3232Copyright: 2012 Paul Moore <pmoore@redhat.com>
3233 2012 Ashley Lai <adlai@us.ibm.com>
3234License: LGPL-2.1
3235
3236License: LGPL-2.1
3237 This library is free software
3238";
3239        let pkg = parse_copyright_file(content, Some("libseccomp"));
3240        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3241        assert_eq!(pkg.namespace, Some("debian".to_string()));
3242        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3243        assert_eq!(
3244            pkg.extracted_license_statement,
3245            Some("LGPL-2.1".to_string())
3246        );
3247        assert!(pkg.parties.len() >= 2);
3248        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3249        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3250    }
3251
3252    #[test]
3253    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3254        let path = PathBuf::from(
3255            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3256        );
3257        let pkg = DebianCopyrightParser::extract_first_package(&path);
3258
3259        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3260        let extracted = pkg
3261            .extracted_license_statement
3262            .as_deref()
3263            .expect("license statement should exist");
3264        assert!(extracted.contains("GPL-2+"));
3265        assert!(!pkg.license_detections.is_empty());
3266
3267        let primary = &pkg.license_detections[0];
3268        assert_eq!(
3269            primary.matches[0].matched_text.as_deref(),
3270            Some("License: GPL-2+")
3271        );
3272        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3273        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3274    }
3275
3276    #[test]
3277    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3278        let path = PathBuf::from("testdata/debian/copyright/copyright");
3279        let pkg = DebianCopyrightParser::extract_first_package(&path);
3280
3281        assert_eq!(pkg.license_detections.len(), 1);
3282        assert_eq!(pkg.other_license_detections.len(), 4);
3283
3284        let primary = &pkg.license_detections[0];
3285        assert_eq!(
3286            primary.matches[0].matched_text.as_deref(),
3287            Some("License: LGPL-2.1")
3288        );
3289        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3290
3291        let ordered_lines: Vec<usize> = pkg
3292            .other_license_detections
3293            .iter()
3294            .map(|detection| detection.matches[0].start_line.get())
3295            .collect();
3296        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3297
3298        let ordered_texts: Vec<&str> = pkg
3299            .other_license_detections
3300            .iter()
3301            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3302            .collect();
3303        assert_eq!(
3304            ordered_texts,
3305            vec![
3306                "License: LGPL-2.1",
3307                "License: LGPL-2.1",
3308                "License: LGPL-2.1",
3309                "License: LGPL-2.1",
3310            ]
3311        );
3312    }
3313
3314    #[test]
3315    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3316        let path = PathBuf::from(
3317            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3318        );
3319        let pkg = DebianCopyrightParser::extract_first_package(&path);
3320
3321        let zlib = pkg
3322            .other_license_detections
3323            .iter()
3324            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3325            .expect("at least one Zlib license paragraph should be detected");
3326        assert_eq!(
3327            zlib.matches[0].matched_text.as_deref(),
3328            Some("License: Zlib")
3329        );
3330
3331        let last_zlib = pkg
3332            .other_license_detections
3333            .iter()
3334            .rev()
3335            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3336            .expect("bottom standalone Zlib license paragraph should be detected");
3337        assert_eq!(
3338            last_zlib.matches[0].start_line,
3339            LineNumber::new(732).unwrap()
3340        );
3341        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3342    }
3343
3344    #[test]
3345    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3346        let path =
3347            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3348        let pkg = DebianCopyrightParser::extract_first_package(&path);
3349
3350        assert_eq!(pkg.license_detections.len(), 1);
3351        let primary = &pkg.license_detections[0];
3352        assert_eq!(
3353            primary.matches[0].matched_text.as_deref(),
3354            Some("License: LGPL-3+ or GPL-2+")
3355        );
3356        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3357        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3358
3359        assert!(pkg.other_license_detections.iter().any(|detection| {
3360            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3361        }));
3362    }
3363
3364    #[test]
3365    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3366        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3367        let pkg = parse_copyright_file(content, Some("foo"));
3368
3369        assert_eq!(pkg.license_detections.len(), 1);
3370        let primary = &pkg.license_detections[0];
3371        assert_eq!(
3372            primary.matches[0].matched_text.as_deref(),
3373            Some("License: GPL-2+")
3374        );
3375        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3376    }
3377
3378    #[test]
3379    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3380        let raw_lines = vec![
3381            "Files: *".to_string(),
3382            "Copyright: 2024 Example Org".to_string(),
3383            "License: Apache-2.0".to_string(),
3384            " Licensed under the Apache License, Version 2.0.".to_string(),
3385        ];
3386
3387        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3388        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3389            .into_iter()
3390            .next()
3391            .expect("reference RFC822 paragraph should parse");
3392
3393        assert_eq!(paragraph.metadata.headers, expected.headers);
3394        assert_eq!(paragraph.metadata.body, expected.body);
3395        assert_eq!(
3396            paragraph.license_header_line,
3397            Some(("License: Apache-2.0".to_string(), 12))
3398        );
3399    }
3400
3401    #[test]
3402    fn test_parse_copyright_unstructured() {
3403        let content = "This package was debianized by John Doe.
3404
3405Upstream Authors:
3406    Jane Smith
3407
3408Copyright:
3409    2009 10gen
3410
3411License:
3412    SSPL
3413";
3414        let pkg = parse_copyright_file(content, Some("mongodb"));
3415        assert_eq!(pkg.name, Some("mongodb".to_string()));
3416        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3417        assert!(!pkg.parties.is_empty());
3418    }
3419
3420    #[test]
3421    fn test_parse_copyright_holders() {
3422        let text = "2012 Paul Moore <pmoore@redhat.com>
34232012 Ashley Lai <adlai@us.ibm.com>
3424Copyright (C) 2015-2018 Example Corp";
3425        let holders = parse_copyright_holders(text);
3426        assert!(holders.len() >= 3);
3427        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3428        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3429    }
3430
3431    #[test]
3432    fn test_parse_copyright_empty() {
3433        let content = "This is just some text without proper copyright info.";
3434        let pkg = parse_copyright_file(content, Some("test"));
3435        assert_eq!(pkg.name, Some("test".to_string()));
3436        assert!(pkg.parties.is_empty());
3437        assert!(pkg.extracted_license_statement.is_none());
3438    }
3439
3440    #[test]
3441    fn test_deb_parser_is_match() {
3442        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3443        assert!(DebianDebParser::is_match(&PathBuf::from(
3444            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3445        )));
3446        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3447        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3448    }
3449
3450    #[test]
3451    fn test_parse_deb_filename_with_arch() {
3452        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3453        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3454        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3455        assert_eq!(pkg.namespace, Some("debian".to_string()));
3456        assert_eq!(
3457            pkg.purl,
3458            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3459        );
3460        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3461    }
3462
3463    #[test]
3464    fn test_parse_deb_filename_without_arch() {
3465        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3466        assert_eq!(pkg.name, Some("package".to_string()));
3467        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3468        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3469    }
3470
3471    #[test]
3472    fn test_extract_deb_archive() {
3473        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3474        if !test_path.exists() {
3475            return;
3476        }
3477
3478        let pkg = DebianDebParser::extract_first_package(&test_path);
3479
3480        assert_eq!(pkg.name, Some("adduser".to_string()));
3481        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3482        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3483        assert!(pkg.description.is_some());
3484        assert!(!pkg.parties.is_empty());
3485
3486        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3487        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3488    }
3489
3490    #[test]
3491    fn test_extract_deb_archive_with_control_tar_xz() {
3492        let deb = create_synthetic_deb_with_control_tar_xz();
3493
3494        let pkg = DebianDebParser::extract_first_package(deb.path());
3495
3496        assert_eq!(pkg.name, Some("synthetic".to_string()));
3497        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3498        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3499        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3500    }
3501
3502    #[test]
3503    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3504        let deb = create_synthetic_deb_with_copyright();
3505
3506        let pkg = DebianDebParser::extract_first_package(deb.path());
3507
3508        assert_eq!(pkg.name, Some("synthetic".to_string()));
3509        assert_eq!(
3510            pkg.extracted_license_statement,
3511            Some("Apache-2.0".to_string())
3512        );
3513        assert!(pkg.parties.iter().any(|party| {
3514            party.role.as_deref() == Some("copyright-holder")
3515                && party.name.as_deref() == Some("Example Org")
3516        }));
3517    }
3518
3519    #[test]
3520    fn test_parse_deb_filename_simple() {
3521        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3522        assert_eq!(pkg.name, Some("adduser".to_string()));
3523        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3524        assert_eq!(pkg.namespace, Some("debian".to_string()));
3525    }
3526
3527    #[test]
3528    fn test_parse_deb_filename_invalid() {
3529        let pkg = parse_deb_filename("invalid.deb");
3530        assert!(pkg.name.is_none());
3531        assert!(pkg.version.is_none());
3532    }
3533
3534    #[test]
3535    fn test_distroless_parser() {
3536        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3537
3538        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3539
3540        if !test_file.exists() {
3541            eprintln!("Warning: Test file not found, skipping test");
3542            return;
3543        }
3544
3545        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3546
3547        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3548        assert_eq!(
3549            pkg.datasource_id,
3550            Some(DatasourceId::DebianDistrolessInstalledDb)
3551        );
3552        assert_eq!(pkg.name, Some("base-files".to_string()));
3553        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3554        assert_eq!(pkg.namespace, Some("debian".to_string()));
3555        assert!(pkg.purl.is_some());
3556        assert!(
3557            pkg.purl
3558                .as_ref()
3559                .unwrap()
3560                .contains("pkg:deb/debian/base-files")
3561        );
3562    }
3563}