Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41    DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
42};
43use crate::parsers::rfc822::{self, Rfc822Metadata};
44use crate::parsers::utils::{read_file_to_string, split_name_email};
45use crate::utils::spdx::combine_license_expressions;
46
47use super::PackageParser;
48use super::license_normalization::{
49    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
50    normalize_declared_license_key,
51};
52
53const PACKAGE_TYPE: PackageType = PackageType::Deb;
54
55fn default_package_data(datasource_id: DatasourceId) -> PackageData {
56    PackageData {
57        package_type: Some(PACKAGE_TYPE),
58        datasource_id: Some(datasource_id),
59        ..Default::default()
60    }
61}
62
63// Namespace detection clues from version strings
64const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
65const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
66
67// Namespace detection clues from maintainer fields
68const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
69    "packages.debian.org",
70    "lists.debian.org",
71    "lists.alioth.debian.org",
72    "@debian.org",
73    "debian-init-diversity@",
74];
75const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
76
77// Dependency field names and their scope/flags
78struct DepFieldSpec {
79    field: &'static str,
80    scope: &'static str,
81    is_runtime: bool,
82    is_optional: bool,
83}
84
85const DEP_FIELDS: &[DepFieldSpec] = &[
86    DepFieldSpec {
87        field: "depends",
88        scope: "depends",
89        is_runtime: true,
90        is_optional: false,
91    },
92    DepFieldSpec {
93        field: "pre-depends",
94        scope: "pre-depends",
95        is_runtime: true,
96        is_optional: false,
97    },
98    DepFieldSpec {
99        field: "recommends",
100        scope: "recommends",
101        is_runtime: true,
102        is_optional: true,
103    },
104    DepFieldSpec {
105        field: "suggests",
106        scope: "suggests",
107        is_runtime: true,
108        is_optional: true,
109    },
110    DepFieldSpec {
111        field: "breaks",
112        scope: "breaks",
113        is_runtime: false,
114        is_optional: false,
115    },
116    DepFieldSpec {
117        field: "conflicts",
118        scope: "conflicts",
119        is_runtime: false,
120        is_optional: false,
121    },
122    DepFieldSpec {
123        field: "replaces",
124        scope: "replaces",
125        is_runtime: false,
126        is_optional: false,
127    },
128    DepFieldSpec {
129        field: "provides",
130        scope: "provides",
131        is_runtime: false,
132        is_optional: false,
133    },
134    DepFieldSpec {
135        field: "build-depends",
136        scope: "build-depends",
137        is_runtime: false,
138        is_optional: false,
139    },
140    DepFieldSpec {
141        field: "build-depends-indep",
142        scope: "build-depends-indep",
143        is_runtime: false,
144        is_optional: false,
145    },
146    DepFieldSpec {
147        field: "build-conflicts",
148        scope: "build-conflicts",
149        is_runtime: false,
150        is_optional: false,
151    },
152];
153
154// ---------------------------------------------------------------------------
155// DebianControlParser: debian/control files (source + binary paragraphs)
156// ---------------------------------------------------------------------------
157
158pub struct DebianControlParser;
159
160impl PackageParser for DebianControlParser {
161    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
162
163    fn is_match(path: &Path) -> bool {
164        if let Some(name) = path.file_name()
165            && name == "control"
166            && let Some(parent) = path.parent()
167            && let Some(parent_name) = parent.file_name()
168        {
169            return parent_name == "debian";
170        }
171        false
172    }
173
174    fn extract_packages(path: &Path) -> Vec<PackageData> {
175        let content = match read_file_to_string(path) {
176            Ok(c) => c,
177            Err(e) => {
178                warn!("Failed to read debian/control at {:?}: {}", path, e);
179                return vec![default_package_data(DatasourceId::DebianControlInSource)];
180            }
181        };
182
183        let packages = parse_debian_control(&content);
184        if packages.is_empty() {
185            vec![default_package_data(DatasourceId::DebianControlInSource)]
186        } else {
187            packages
188        }
189    }
190}
191
192// ---------------------------------------------------------------------------
193// DebianInstalledParser: /var/lib/dpkg/status
194// ---------------------------------------------------------------------------
195
196pub struct DebianInstalledParser;
197
198impl PackageParser for DebianInstalledParser {
199    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
200
201    fn is_match(path: &Path) -> bool {
202        let path_str = path.to_string_lossy();
203        path_str.ends_with("var/lib/dpkg/status")
204    }
205
206    fn extract_packages(path: &Path) -> Vec<PackageData> {
207        let content = match read_file_to_string(path) {
208            Ok(c) => c,
209            Err(e) => {
210                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
211                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
212            }
213        };
214
215        let packages = parse_dpkg_status(&content);
216        if packages.is_empty() {
217            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
218        } else {
219            packages
220        }
221    }
222}
223
224pub struct DebianDistrolessInstalledParser;
225
226impl PackageParser for DebianDistrolessInstalledParser {
227    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
228
229    fn is_match(path: &Path) -> bool {
230        let path_str = path.to_string_lossy();
231        path_str.contains("var/lib/dpkg/status.d/")
232    }
233
234    fn extract_packages(path: &Path) -> Vec<PackageData> {
235        let content = match read_file_to_string(path) {
236            Ok(c) => c,
237            Err(e) => {
238                warn!("Failed to read distroless status file at {:?}: {}", path, e);
239                return vec![default_package_data(
240                    DatasourceId::DebianDistrolessInstalledDb,
241                )];
242            }
243        };
244
245        vec![parse_distroless_status(&content)]
246    }
247}
248
249fn parse_distroless_status(content: &str) -> PackageData {
250    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
251
252    if paragraphs.is_empty() {
253        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
254    }
255
256    build_package_from_paragraph(
257        &paragraphs[0],
258        None,
259        DatasourceId::DebianDistrolessInstalledDb,
260    )
261    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
262}
263
264// ---------------------------------------------------------------------------
265// Parsing logic
266// ---------------------------------------------------------------------------
267
268/// Parses a debian/control file into PackageData entries.
269///
270/// A debian/control file has a Source paragraph followed by one or more Binary
271/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
272/// into each binary package.
273fn parse_debian_control(content: &str) -> Vec<PackageData> {
274    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
275    if paragraphs.is_empty() {
276        return Vec::new();
277    }
278
279    // Determine if first paragraph is a Source paragraph
280    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
281
282    let (source_paragraph, binary_start) = if has_source {
283        (Some(&paragraphs[0]), 1)
284    } else {
285        (None, 0)
286    };
287
288    // Extract source-level shared metadata
289    let source_meta = source_paragraph.map(extract_source_meta);
290
291    let mut packages = Vec::new();
292
293    for para in &paragraphs[binary_start..] {
294        if let Some(pkg) = build_package_from_paragraph(
295            para,
296            source_meta.as_ref(),
297            DatasourceId::DebianControlInSource,
298        ) {
299            packages.push(pkg);
300        }
301    }
302
303    if packages.is_empty()
304        && let Some(source_para) = source_paragraph
305        && let Some(pkg) = build_package_from_source_paragraph(source_para)
306    {
307        packages.push(pkg);
308    }
309
310    packages
311}
312
313/// Parses a dpkg/status file into PackageData entries.
314///
315/// Each paragraph represents an installed package. Only packages with
316/// `Status: install ok installed` are included.
317fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
318    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
319    let mut packages = Vec::new();
320
321    for para in &paragraphs {
322        let status = rfc822::get_header_first(&para.headers, "status");
323        if status.as_deref() != Some("install ok installed") {
324            continue;
325        }
326
327        if let Some(pkg) =
328            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
329        {
330            packages.push(pkg);
331        }
332    }
333
334    packages
335}
336
337// ---------------------------------------------------------------------------
338// Source paragraph metadata (shared across binary packages)
339// ---------------------------------------------------------------------------
340
341struct SourceMeta {
342    parties: Vec<Party>,
343    homepage_url: Option<String>,
344    vcs_url: Option<String>,
345    code_view_url: Option<String>,
346    bug_tracking_url: Option<String>,
347}
348
349fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
350    let mut parties = Vec::new();
351
352    // Maintainer
353    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
354        let (name, email) = split_name_email(&maintainer);
355        parties.push(Party {
356            r#type: Some("person".to_string()),
357            role: Some("maintainer".to_string()),
358            name,
359            email,
360            url: None,
361            organization: None,
362            organization_url: None,
363            timezone: None,
364        });
365    }
366
367    // Original-Maintainer
368    if let Some(orig_maintainer) =
369        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
370    {
371        let (name, email) = split_name_email(&orig_maintainer);
372        parties.push(Party {
373            r#type: Some("person".to_string()),
374            role: Some("maintainer".to_string()),
375            name,
376            email,
377            url: None,
378            organization: None,
379            organization_url: None,
380            timezone: None,
381        });
382    }
383
384    // Uploaders (comma-separated)
385    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
386        for uploader in uploaders_str.split(',') {
387            let trimmed = uploader.trim();
388            if !trimmed.is_empty() {
389                let (name, email) = split_name_email(trimmed);
390                parties.push(Party {
391                    r#type: Some("person".to_string()),
392                    role: Some("uploader".to_string()),
393                    name,
394                    email,
395                    url: None,
396                    organization: None,
397                    organization_url: None,
398                    timezone: None,
399                });
400            }
401        }
402    }
403
404    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage");
405
406    // VCS-Git: may contain branch info after space
407    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
408        .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
409
410    let code_view_url = rfc822::get_header_first(&paragraph.headers, "vcs-browser");
411
412    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs");
413
414    SourceMeta {
415        parties,
416        homepage_url,
417        vcs_url,
418        code_view_url,
419        bug_tracking_url,
420    }
421}
422
423// ---------------------------------------------------------------------------
424// Package building
425// ---------------------------------------------------------------------------
426
427fn build_package_from_paragraph(
428    paragraph: &Rfc822Metadata,
429    source_meta: Option<&SourceMeta>,
430    datasource_id: DatasourceId,
431) -> Option<PackageData> {
432    let name = rfc822::get_header_first(&paragraph.headers, "package")?;
433    let version = rfc822::get_header_first(&paragraph.headers, "version");
434    let architecture = rfc822::get_header_first(&paragraph.headers, "architecture");
435    let description = rfc822::get_header_first(&paragraph.headers, "description");
436    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
437    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage");
438    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
439    let section = rfc822::get_header_first(&paragraph.headers, "section");
440    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
441    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
442
443    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
444
445    // Build parties: use source_meta parties if available, otherwise parse from paragraph
446    let parties = if let Some(meta) = source_meta {
447        meta.parties.clone()
448    } else {
449        let mut p = Vec::new();
450        if let Some(m) = &maintainer_str {
451            let (n, e) = split_name_email(m);
452            p.push(Party {
453                r#type: Some("person".to_string()),
454                role: Some("maintainer".to_string()),
455                name: n,
456                email: e,
457                url: None,
458                organization: None,
459                organization_url: None,
460                timezone: None,
461            });
462        }
463        p
464    };
465
466    // Resolve homepage: paragraph's own, or from source metadata
467    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
468    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
469    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
470    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
471
472    // Build PURL
473    let purl = build_debian_purl(
474        &name,
475        version.as_deref(),
476        namespace.as_deref(),
477        architecture.as_deref(),
478    );
479
480    // Parse dependencies from all dependency fields
481    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
482
483    // Keywords from section
484    let keywords = section.into_iter().collect();
485
486    // Source packages
487    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
488
489    // Extra data
490    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
491    if let Some(ma) = &multi_arch
492        && !ma.is_empty()
493    {
494        extra_data.insert(
495            "multi_arch".to_string(),
496            serde_json::Value::String(ma.clone()),
497        );
498    }
499    if let Some(size_str) = &installed_size
500        && let Ok(size) = size_str.parse::<u64>()
501    {
502        extra_data.insert(
503            "installed_size".to_string(),
504            serde_json::Value::Number(serde_json::Number::from(size)),
505        );
506    }
507
508    // Qualifiers for architecture
509    let qualifiers = architecture.as_ref().map(|arch| {
510        let mut q = HashMap::new();
511        q.insert("arch".to_string(), arch.clone());
512        q
513    });
514
515    Some(PackageData {
516        package_type: Some(PACKAGE_TYPE),
517        namespace: namespace.clone(),
518        name: Some(name),
519        version,
520        qualifiers,
521        subpath: None,
522        primary_language: None,
523        description,
524        release_date: None,
525        parties,
526        keywords,
527        homepage_url,
528        download_url: None,
529        size: None,
530        sha1: None,
531        md5: None,
532        sha256: None,
533        sha512: None,
534        bug_tracking_url,
535        code_view_url,
536        vcs_url,
537        copyright: None,
538        holder: None,
539        declared_license_expression: None,
540        declared_license_expression_spdx: None,
541        license_detections: Vec::new(),
542        other_license_expression: None,
543        other_license_expression_spdx: None,
544        other_license_detections: Vec::new(),
545        extracted_license_statement: None,
546        notice_text: None,
547        source_packages,
548        file_references: Vec::new(),
549        is_private: false,
550        is_virtual: false,
551        extra_data: if extra_data.is_empty() {
552            None
553        } else {
554            Some(extra_data)
555        },
556        dependencies,
557        repository_homepage_url: None,
558        repository_download_url: None,
559        api_data_url: None,
560        datasource_id: Some(datasource_id),
561        purl,
562    })
563}
564
565fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
566    let name = rfc822::get_header_first(&paragraph.headers, "source")?;
567    let version = rfc822::get_header_first(&paragraph.headers, "version");
568    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
569
570    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
571    let source_meta = extract_source_meta(paragraph);
572
573    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
574    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
575
576    let section = rfc822::get_header_first(&paragraph.headers, "section");
577    let keywords = section.into_iter().collect();
578
579    Some(PackageData {
580        package_type: Some(PACKAGE_TYPE),
581        namespace: namespace.clone(),
582        name: Some(name),
583        version,
584        qualifiers: None,
585        subpath: None,
586        primary_language: None,
587        description: None,
588        release_date: None,
589        parties: source_meta.parties,
590        keywords,
591        homepage_url: source_meta.homepage_url,
592        download_url: None,
593        size: None,
594        sha1: None,
595        md5: None,
596        sha256: None,
597        sha512: None,
598        bug_tracking_url: source_meta.bug_tracking_url,
599        code_view_url: source_meta.code_view_url,
600        vcs_url: source_meta.vcs_url,
601        copyright: None,
602        holder: None,
603        declared_license_expression: None,
604        declared_license_expression_spdx: None,
605        license_detections: Vec::new(),
606        other_license_expression: None,
607        other_license_expression_spdx: None,
608        other_license_detections: Vec::new(),
609        extracted_license_statement: None,
610        notice_text: None,
611        source_packages: Vec::new(),
612        file_references: Vec::new(),
613        is_private: false,
614        is_virtual: false,
615        extra_data: None,
616        dependencies,
617        repository_homepage_url: None,
618        repository_download_url: None,
619        api_data_url: None,
620        datasource_id: Some(DatasourceId::DebianControlInSource),
621        purl,
622    })
623}
624
625// ---------------------------------------------------------------------------
626// Namespace detection
627// ---------------------------------------------------------------------------
628
629fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
630    // Check version clues first
631    if let Some(ver) = version {
632        let ver_lower = ver.to_lowercase();
633        for clue in VERSION_CLUES_UBUNTU {
634            if ver_lower.contains(clue) {
635                return Some("ubuntu".to_string());
636            }
637        }
638        for clue in VERSION_CLUES_DEBIAN {
639            if ver_lower.contains(clue) {
640                return Some("debian".to_string());
641            }
642        }
643    }
644
645    // Check maintainer clues
646    if let Some(maint) = maintainer {
647        let maint_lower = maint.to_lowercase();
648        for clue in MAINTAINER_CLUES_UBUNTU {
649            if maint_lower.contains(clue) {
650                return Some("ubuntu".to_string());
651            }
652        }
653        for clue in MAINTAINER_CLUES_DEBIAN {
654            if maint_lower.contains(clue) {
655                return Some("debian".to_string());
656            }
657        }
658    }
659
660    // Default to debian
661    Some("debian".to_string())
662}
663
664// ---------------------------------------------------------------------------
665// PURL generation
666// ---------------------------------------------------------------------------
667
668fn build_debian_purl(
669    name: &str,
670    version: Option<&str>,
671    namespace: Option<&str>,
672    architecture: Option<&str>,
673) -> Option<String> {
674    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
675
676    if let Some(ns) = namespace {
677        purl.with_namespace(ns).ok()?;
678    }
679
680    if let Some(ver) = version {
681        purl.with_version(ver).ok()?;
682    }
683
684    if let Some(arch) = architecture {
685        purl.add_qualifier("arch", arch).ok()?;
686    }
687
688    Some(purl.to_string())
689}
690
691// ---------------------------------------------------------------------------
692// Dependency parsing
693// ---------------------------------------------------------------------------
694
695fn parse_all_dependencies(
696    headers: &HashMap<String, Vec<String>>,
697    namespace: Option<&str>,
698) -> Vec<Dependency> {
699    let mut dependencies = Vec::new();
700
701    for spec in DEP_FIELDS {
702        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
703            dependencies.extend(parse_dependency_field(
704                &dep_str,
705                spec.scope,
706                spec.is_runtime,
707                spec.is_optional,
708                namespace,
709            ));
710        }
711    }
712
713    dependencies
714}
715
716/// Parses a Debian dependency field value.
717///
718/// Debian dependencies are comma-separated, with optional version constraints
719/// in parentheses and alternative packages separated by `|`.
720///
721/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
722///
723/// Alternatives (|) are treated as separate optional dependencies.
724fn parse_dependency_field(
725    dep_str: &str,
726    scope: &str,
727    is_runtime: bool,
728    is_optional: bool,
729    namespace: Option<&str>,
730) -> Vec<Dependency> {
731    let mut deps = Vec::new();
732
733    // Regex for parsing individual dependency: name (operator version)
734    // Debian operators: <<, <=, =, >=, >>
735    let dep_re = Regex::new(
736        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
737    )
738    .unwrap();
739
740    for group in dep_str.split(',') {
741        let group = group.trim();
742        if group.is_empty() {
743            continue;
744        }
745
746        // Handle alternatives (|)
747        let alternatives: Vec<&str> = group.split('|').collect();
748        let has_alternatives = alternatives.len() > 1;
749
750        for alt in alternatives {
751            let alt = alt.trim();
752            if alt.is_empty() {
753                continue;
754            }
755
756            if let Some(caps) = dep_re.captures(alt) {
757                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
758                let operator = caps.get(2).map(|m| m.as_str().trim());
759                let version = caps.get(3).map(|m| m.as_str().trim());
760
761                if pkg_name.is_empty() {
762                    continue;
763                }
764
765                // Skip substitution variables like ${shlibs:Depends}
766                if pkg_name.starts_with('$') {
767                    continue;
768                }
769
770                let extracted_requirement = match (operator, version) {
771                    (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
772                    _ => None,
773                };
774
775                let is_pinned = operator.map(|op| op == "=");
776
777                let purl = build_debian_purl(pkg_name, None, namespace, None);
778
779                deps.push(Dependency {
780                    purl,
781                    extracted_requirement,
782                    scope: Some(scope.to_string()),
783                    is_runtime: Some(is_runtime),
784                    is_optional: Some(is_optional || has_alternatives),
785                    is_pinned,
786                    is_direct: Some(true),
787                    resolved_package: None,
788                    extra_data: None,
789                });
790            }
791        }
792    }
793
794    deps
795}
796
797// ---------------------------------------------------------------------------
798// Source field parsing
799// ---------------------------------------------------------------------------
800
801/// Parses the Source field which may contain a version in parentheses.
802///
803/// Format: `source-name` or `source-name (version)`
804fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
805    let Some(source_str) = source else {
806        return Vec::new();
807    };
808
809    let trimmed = source_str.trim();
810    if trimmed.is_empty() {
811        return Vec::new();
812    }
813
814    // Extract name and optional version from "name (version)" format
815    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
816        let name = trimmed[..paren_start].trim();
817        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
818        (
819            name,
820            if version.is_empty() {
821                None
822            } else {
823                Some(version)
824            },
825        )
826    } else {
827        (trimmed, None)
828    };
829
830    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
831        vec![purl]
832    } else {
833        Vec::new()
834    }
835}
836
837// ---------------------------------------------------------------------------
838// Parser registration macros
839// ---------------------------------------------------------------------------
840
841crate::register_parser!(
842    "Debian source package control file (debian/control)",
843    &["**/debian/control"],
844    "deb",
845    "",
846    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
847);
848
849crate::register_parser!(
850    "Debian installed package database (dpkg status)",
851    &["**/var/lib/dpkg/status"],
852    "deb",
853    "",
854    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
855);
856
857crate::register_parser!(
858    "Debian distroless package database (status.d)",
859    &["**/var/lib/dpkg/status.d/*"],
860    "deb",
861    "",
862    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
863);
864
865// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
866// but we register it for the single-package interface too.
867
868// ============================================================================
869// WAVE 2 PARSERS: Additional Debian Format Support
870// ============================================================================
871
872/// Parser for Debian Source Control (.dsc) files
873pub struct DebianDscParser;
874
875impl PackageParser for DebianDscParser {
876    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
877
878    fn is_match(path: &Path) -> bool {
879        path.extension().and_then(|e| e.to_str()) == Some("dsc")
880    }
881
882    fn extract_packages(path: &Path) -> Vec<PackageData> {
883        let content = match read_file_to_string(path) {
884            Ok(c) => c,
885            Err(e) => {
886                warn!("Failed to read .dsc file {:?}: {}", path, e);
887                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
888            }
889        };
890
891        vec![parse_dsc_content(&content)]
892    }
893}
894
895crate::register_parser!(
896    "Debian source control file (.dsc)",
897    &["**/*.dsc"],
898    "deb",
899    "",
900    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
901);
902
903fn strip_pgp_signature(content: &str) -> String {
904    let mut result = String::new();
905    let mut in_pgp_block = false;
906    let mut in_signature = false;
907
908    for line in content.lines() {
909        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
910            in_pgp_block = true;
911            continue;
912        }
913        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
914            in_signature = true;
915            continue;
916        }
917        if line.starts_with("-----END PGP SIGNATURE-----") {
918            in_signature = false;
919            continue;
920        }
921        if in_pgp_block && line.starts_with("Hash:") {
922            continue;
923        }
924        if in_pgp_block && line.is_empty() && result.is_empty() {
925            in_pgp_block = false;
926            continue;
927        }
928        if !in_signature {
929            result.push_str(line);
930            result.push('\n');
931        }
932    }
933
934    result
935}
936
937fn parse_dsc_content(content: &str) -> PackageData {
938    let clean_content = strip_pgp_signature(content);
939    let metadata = rfc822::parse_rfc822_content(&clean_content);
940    let headers = &metadata.headers;
941
942    let name = rfc822::get_header_first(headers, "source");
943    let version = rfc822::get_header_first(headers, "version");
944    let architecture = rfc822::get_header_first(headers, "architecture");
945    let namespace = Some("debian".to_string());
946
947    let mut package = PackageData {
948        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
949        package_type: Some(PACKAGE_TYPE),
950        namespace: namespace.clone(),
951        name: name.clone(),
952        version: version.clone(),
953        description: rfc822::get_header_first(headers, "description"),
954        homepage_url: rfc822::get_header_first(headers, "homepage"),
955        vcs_url: rfc822::get_header_first(headers, "vcs-git"),
956        code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
957        ..Default::default()
958    };
959
960    // Build PURL with architecture qualifier
961    if let (Some(n), Some(v)) = (&name, &version) {
962        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
963    }
964
965    // Set source_packages to point to the source itself (without version)
966    if let Some(n) = &name
967        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
968    {
969        package.source_packages.push(source_purl);
970    }
971
972    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
973        let (name_opt, email_opt) = split_name_email(&maintainer);
974        package.parties.push(Party {
975            r#type: None,
976            role: Some("maintainer".to_string()),
977            name: name_opt,
978            email: email_opt,
979            url: None,
980            organization: None,
981            organization_url: None,
982            timezone: None,
983        });
984    }
985
986    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
987        for uploader in uploaders_str.split(',') {
988            let uploader = uploader.trim();
989            if uploader.is_empty() {
990                continue;
991            }
992            let (name_opt, email_opt) = split_name_email(uploader);
993            package.parties.push(Party {
994                r#type: None,
995                role: Some("uploader".to_string()),
996                name: name_opt,
997                email: email_opt,
998                url: None,
999                organization: None,
1000                organization_url: None,
1001                timezone: None,
1002            });
1003        }
1004    }
1005
1006    // Parse Build-Depends
1007    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1008        package.dependencies.extend(parse_dependency_field(
1009            &build_deps,
1010            "build",
1011            false,
1012            false,
1013            namespace.as_deref(),
1014        ));
1015    }
1016
1017    // Store Standards-Version in extra_data
1018    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1019        let map = package.extra_data.get_or_insert_with(HashMap::new);
1020        map.insert("standards_version".to_string(), standards.into());
1021    }
1022
1023    package
1024}
1025
1026/// Parser for Debian original source tarballs (*.orig.tar.*)
1027pub struct DebianOrigTarParser;
1028
1029impl PackageParser for DebianOrigTarParser {
1030    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1031
1032    fn is_match(path: &Path) -> bool {
1033        path.file_name()
1034            .and_then(|n| n.to_str())
1035            .map(|name| name.contains(".orig.tar."))
1036            .unwrap_or(false)
1037    }
1038
1039    fn extract_packages(path: &Path) -> Vec<PackageData> {
1040        let filename = match path.file_name().and_then(|n| n.to_str()) {
1041            Some(f) => f,
1042            None => {
1043                return vec![default_package_data(
1044                    DatasourceId::DebianOriginalSourceTarball,
1045                )];
1046            }
1047        };
1048
1049        vec![parse_source_tarball_filename(
1050            filename,
1051            DatasourceId::DebianOriginalSourceTarball,
1052        )]
1053    }
1054}
1055
1056crate::register_parser!(
1057    "Debian original source tarball",
1058    &["**/*.orig.tar.*"],
1059    "deb",
1060    "",
1061    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1062);
1063
1064/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1065pub struct DebianDebianTarParser;
1066
1067impl PackageParser for DebianDebianTarParser {
1068    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1069
1070    fn is_match(path: &Path) -> bool {
1071        path.file_name()
1072            .and_then(|n| n.to_str())
1073            .map(|name| name.contains(".debian.tar."))
1074            .unwrap_or(false)
1075    }
1076
1077    fn extract_packages(path: &Path) -> Vec<PackageData> {
1078        let filename = match path.file_name().and_then(|n| n.to_str()) {
1079            Some(f) => f,
1080            None => {
1081                return vec![default_package_data(
1082                    DatasourceId::DebianSourceMetadataTarball,
1083                )];
1084            }
1085        };
1086
1087        vec![parse_source_tarball_filename(
1088            filename,
1089            DatasourceId::DebianSourceMetadataTarball,
1090        )]
1091    }
1092}
1093
1094crate::register_parser!(
1095    "Debian source metadata tarball",
1096    &["**/*.debian.tar.*"],
1097    "deb",
1098    "",
1099    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1100);
1101
1102fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1103    let without_tar_ext = filename
1104        .trim_end_matches(".gz")
1105        .trim_end_matches(".xz")
1106        .trim_end_matches(".bz2")
1107        .trim_end_matches(".tar");
1108
1109    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1110    if parts.len() < 2 {
1111        return default_package_data(datasource_id);
1112    }
1113
1114    let name = parts[0].to_string();
1115    let version_with_suffix = parts[1];
1116
1117    let version = version_with_suffix
1118        .trim_end_matches(".orig")
1119        .trim_end_matches(".debian")
1120        .to_string();
1121
1122    let namespace = Some("debian".to_string());
1123
1124    PackageData {
1125        datasource_id: Some(datasource_id),
1126        package_type: Some(PACKAGE_TYPE),
1127        namespace: namespace.clone(),
1128        name: Some(name.clone()),
1129        version: Some(version.clone()),
1130        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1131        ..Default::default()
1132    }
1133}
1134
1135/// Parser for Debian installed file lists (*.list)
1136pub struct DebianInstalledListParser;
1137
1138impl PackageParser for DebianInstalledListParser {
1139    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1140
1141    fn is_match(path: &Path) -> bool {
1142        path.extension().and_then(|e| e.to_str()) == Some("list")
1143            && path
1144                .to_str()
1145                .map(|p| p.contains("/var/lib/dpkg/info/"))
1146                .unwrap_or(false)
1147    }
1148
1149    fn extract_packages(path: &Path) -> Vec<PackageData> {
1150        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1151            Some(f) => f,
1152            None => {
1153                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1154            }
1155        };
1156
1157        let content = match read_file_to_string(path) {
1158            Ok(c) => c,
1159            Err(e) => {
1160                warn!("Failed to read .list file {:?}: {}", path, e);
1161                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1162            }
1163        };
1164
1165        vec![parse_debian_file_list(
1166            &content,
1167            filename,
1168            DatasourceId::DebianInstalledFilesList,
1169        )]
1170    }
1171}
1172
1173crate::register_parser!(
1174    "Debian installed files list",
1175    &["**/var/lib/dpkg/info/*.list"],
1176    "deb",
1177    "",
1178    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1179);
1180
1181/// Parser for Debian installed MD5 checksum files (*.md5sums)
1182pub struct DebianInstalledMd5sumsParser;
1183
1184impl PackageParser for DebianInstalledMd5sumsParser {
1185    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1186
1187    fn is_match(path: &Path) -> bool {
1188        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1189            && path
1190                .to_str()
1191                .map(|p| p.contains("/var/lib/dpkg/info/"))
1192                .unwrap_or(false)
1193    }
1194
1195    fn extract_packages(path: &Path) -> Vec<PackageData> {
1196        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1197            Some(f) => f,
1198            None => {
1199                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1200            }
1201        };
1202
1203        let content = match read_file_to_string(path) {
1204            Ok(c) => c,
1205            Err(e) => {
1206                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1207                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1208            }
1209        };
1210
1211        vec![parse_debian_file_list(
1212            &content,
1213            filename,
1214            DatasourceId::DebianInstalledMd5Sums,
1215        )]
1216    }
1217}
1218
1219crate::register_parser!(
1220    "Debian installed package md5sums",
1221    &["**/var/lib/dpkg/info/*.md5sums"],
1222    "deb",
1223    "",
1224    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1225);
1226
1227const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1228
1229fn parse_debian_file_list(
1230    content: &str,
1231    filename: &str,
1232    datasource_id: DatasourceId,
1233) -> PackageData {
1234    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1235        (Some(pkg.to_string()), Some(arch.to_string()))
1236    } else if filename == "md5sums" {
1237        (None, None)
1238    } else {
1239        (Some(filename.to_string()), None)
1240    };
1241
1242    let mut file_references = Vec::new();
1243
1244    for line in content.lines() {
1245        let line = line.trim();
1246        if line.is_empty() || line.starts_with('#') {
1247            continue;
1248        }
1249
1250        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1251            (Some(hash.trim().to_string()), p.trim())
1252        } else {
1253            (None, line)
1254        };
1255
1256        if IGNORED_ROOT_DIRS.contains(&path) {
1257            continue;
1258        }
1259
1260        file_references.push(FileReference {
1261            path: path.to_string(),
1262            size: None,
1263            sha1: None,
1264            md5: md5sum,
1265            sha256: None,
1266            sha512: None,
1267            extra_data: None,
1268        });
1269    }
1270
1271    if file_references.is_empty() {
1272        return default_package_data(datasource_id);
1273    }
1274
1275    let namespace = Some("debian".to_string());
1276    let mut package = PackageData {
1277        datasource_id: Some(datasource_id),
1278        package_type: Some(PACKAGE_TYPE),
1279        namespace: namespace.clone(),
1280        name: name.clone(),
1281        file_references,
1282        ..Default::default()
1283    };
1284
1285    if let Some(n) = &name {
1286        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1287    }
1288
1289    package
1290}
1291
1292/// Parser for Debian machine-readable copyright files (DEP-5 format)
1293pub struct DebianCopyrightParser;
1294
1295impl PackageParser for DebianCopyrightParser {
1296    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1297
1298    fn is_match(path: &Path) -> bool {
1299        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1300            if filename != "copyright" {
1301                return filename.ends_with("_copyright");
1302            }
1303            let path_str = path.to_string_lossy();
1304            path_str.contains("/debian/")
1305                || path_str.contains("/usr/share/doc/")
1306                || path_str.ends_with("debian/copyright")
1307        } else {
1308            false
1309        }
1310    }
1311
1312    fn extract_packages(path: &Path) -> Vec<PackageData> {
1313        let datasource_id = detect_debian_copyright_datasource(path);
1314        let content = match read_file_to_string(path) {
1315            Ok(c) => c,
1316            Err(e) => {
1317                warn!("Failed to read copyright file {:?}: {}", path, e);
1318                return vec![default_package_data(datasource_id)];
1319            }
1320        };
1321
1322        let package_name = extract_package_name_from_path(path);
1323        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1324        package_data.datasource_id = Some(datasource_id);
1325        vec![package_data]
1326    }
1327}
1328
1329crate::register_parser!(
1330    "Debian machine-readable copyright file",
1331    &[
1332        "**/debian/copyright",
1333        "**/usr/share/doc/*/copyright",
1334        "**/*_copyright"
1335    ],
1336    "deb",
1337    "",
1338    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1339);
1340
1341fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1342    let path_str = path.to_string_lossy();
1343    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1344        DatasourceId::DebianCopyrightInSource
1345    } else if path_str.contains("/usr/share/doc/") {
1346        DatasourceId::DebianCopyrightInPackage
1347    } else {
1348        DatasourceId::DebianCopyrightStandalone
1349    }
1350}
1351
1352fn extract_package_name_from_path(path: &Path) -> Option<String> {
1353    let components: Vec<_> = path.components().collect();
1354
1355    for (i, component) in components.iter().enumerate() {
1356        if let std::path::Component::Normal(os_str) = component
1357            && os_str.to_str() == Some("doc")
1358            && i + 1 < components.len()
1359            && let std::path::Component::Normal(next) = components[i + 1]
1360        {
1361            return next.to_str().map(|s| s.to_string());
1362        }
1363    }
1364    None
1365}
1366
1367fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1368    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1369
1370    let is_dep5 = paragraphs
1371        .first()
1372        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1373        .is_some();
1374
1375    let namespace = Some("debian".to_string());
1376    let mut parties = Vec::new();
1377    let mut license_statements = Vec::new();
1378    let mut primary_license_detection = None;
1379    let mut header_license_detection = None;
1380    let mut other_license_detections = Vec::new();
1381
1382    if is_dep5 {
1383        for para in &paragraphs {
1384            if let Some(copyright_text) =
1385                rfc822::get_header_first(&para.metadata.headers, "copyright")
1386            {
1387                for holder in parse_copyright_holders(&copyright_text) {
1388                    if !holder.is_empty() {
1389                        parties.push(Party {
1390                            r#type: None,
1391                            role: Some("copyright-holder".to_string()),
1392                            name: Some(holder),
1393                            email: None,
1394                            url: None,
1395                            organization: None,
1396                            organization_url: None,
1397                            timezone: None,
1398                        });
1399                    }
1400                }
1401            }
1402
1403            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1404                let license_name = license.lines().next().unwrap_or(&license).trim();
1405                if !license_name.is_empty()
1406                    && !license_statements.contains(&license_name.to_string())
1407                {
1408                    license_statements.push(license_name.to_string());
1409                }
1410
1411                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1412                    let detection =
1413                        build_primary_license_detection(license_name, matched_text, line_no);
1414                    let is_header_paragraph =
1415                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1416                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1417                        == Some("*")
1418                    {
1419                        primary_license_detection = Some(detection);
1420                    } else if is_header_paragraph {
1421                        header_license_detection.get_or_insert(detection);
1422                    } else {
1423                        other_license_detections.push(detection);
1424                    }
1425                }
1426            }
1427        }
1428
1429        if primary_license_detection.is_none() && header_license_detection.is_some() {
1430            primary_license_detection = header_license_detection;
1431        }
1432    } else {
1433        let copyright_block = extract_unstructured_field(content, "Copyright:");
1434        if let Some(text) = copyright_block {
1435            for holder in parse_copyright_holders(&text) {
1436                if !holder.is_empty() {
1437                    parties.push(Party {
1438                        r#type: None,
1439                        role: Some("copyright-holder".to_string()),
1440                        name: Some(holder),
1441                        email: None,
1442                        url: None,
1443                        organization: None,
1444                        organization_url: None,
1445                        timezone: None,
1446                    });
1447                }
1448            }
1449        }
1450
1451        let license_block = extract_unstructured_field(content, "License:");
1452        if let Some(text) = license_block {
1453            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1454        }
1455    }
1456
1457    let extracted_license_statement = if license_statements.is_empty() {
1458        None
1459    } else {
1460        Some(license_statements.join(" AND "))
1461    };
1462
1463    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1464    let declared_license_expression = license_detections
1465        .first()
1466        .map(|detection| detection.license_expression.clone());
1467    let declared_license_expression_spdx = license_detections
1468        .first()
1469        .map(|detection| detection.license_expression_spdx.clone());
1470    let other_license_expression = combine_license_expressions(
1471        other_license_detections
1472            .iter()
1473            .map(|detection| detection.license_expression.clone()),
1474    );
1475    let other_license_expression_spdx = combine_license_expressions(
1476        other_license_detections
1477            .iter()
1478            .map(|detection| detection.license_expression_spdx.clone()),
1479    );
1480
1481    PackageData {
1482        datasource_id: Some(DatasourceId::DebianCopyright),
1483        package_type: Some(PACKAGE_TYPE),
1484        namespace: namespace.clone(),
1485        name: package_name.map(|s| s.to_string()),
1486        parties,
1487        declared_license_expression,
1488        declared_license_expression_spdx,
1489        license_detections,
1490        other_license_expression,
1491        other_license_expression_spdx,
1492        other_license_detections,
1493        extracted_license_statement,
1494        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1495        ..Default::default()
1496    }
1497}
1498
1499#[derive(Debug)]
1500struct CopyrightParagraph {
1501    metadata: Rfc822Metadata,
1502    license_header_line: Option<(String, usize)>,
1503}
1504
1505fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1506    let mut paragraphs = Vec::new();
1507    let mut current_lines = Vec::new();
1508    let mut current_start_line = 1usize;
1509
1510    for (idx, line) in content.lines().enumerate() {
1511        let line_no = idx + 1;
1512        if line.is_empty() {
1513            if !current_lines.is_empty() {
1514                paragraphs.push(finalize_copyright_paragraph(
1515                    std::mem::take(&mut current_lines),
1516                    current_start_line,
1517                ));
1518            }
1519            current_start_line = line_no + 1;
1520        } else {
1521            if current_lines.is_empty() {
1522                current_start_line = line_no;
1523            }
1524            current_lines.push(line.to_string());
1525        }
1526    }
1527
1528    if !current_lines.is_empty() {
1529        paragraphs.push(finalize_copyright_paragraph(
1530            current_lines,
1531            current_start_line,
1532        ));
1533    }
1534
1535    paragraphs
1536}
1537
1538fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1539    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1540    let mut current_name: Option<String> = None;
1541    let mut current_value = String::new();
1542    let mut license_header_line = None;
1543
1544    for (idx, line) in raw_lines.iter().enumerate() {
1545        if line.starts_with(' ') || line.starts_with('\t') {
1546            if current_name.is_some() {
1547                current_value.push('\n');
1548                current_value.push_str(line);
1549            }
1550            continue;
1551        }
1552
1553        if let Some(name) = current_name.take() {
1554            add_copyright_header_value(&mut headers, &name, &current_value);
1555            current_value.clear();
1556        }
1557
1558        if let Some((name, value)) = line.split_once(':') {
1559            let normalized_name = name.trim().to_ascii_lowercase();
1560            if normalized_name == "license" && license_header_line.is_none() {
1561                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1562            }
1563            current_name = Some(normalized_name);
1564            current_value = value.trim_start().to_string();
1565        }
1566    }
1567
1568    if let Some(name) = current_name.take() {
1569        add_copyright_header_value(&mut headers, &name, &current_value);
1570    }
1571
1572    CopyrightParagraph {
1573        metadata: Rfc822Metadata {
1574            headers,
1575            body: String::new(),
1576        },
1577        license_header_line,
1578    }
1579}
1580
1581fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1582    let entry = headers.entry(name.to_string()).or_default();
1583    let trimmed = value.trim_end();
1584    if !trimmed.is_empty() {
1585        entry.push(trimmed.to_string());
1586    }
1587}
1588
1589fn build_primary_license_detection(
1590    license_name: &str,
1591    matched_text: String,
1592    line_no: usize,
1593) -> LicenseDetection {
1594    let normalized = normalize_debian_license_name(license_name);
1595
1596    build_declared_license_detection(
1597        &normalized,
1598        DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no),
1599    )
1600}
1601
1602fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1603    match license_name.trim() {
1604        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1605        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1606        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1607        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1608        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1609        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1610        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1611        "public-domain" => {
1612            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1613        }
1614        other => normalize_declared_license_key(other)
1615            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1616    }
1617}
1618
1619fn parse_copyright_holders(text: &str) -> Vec<String> {
1620    let mut holders = Vec::new();
1621
1622    for line in text.lines() {
1623        let line = line.trim();
1624        if line.is_empty() {
1625            continue;
1626        }
1627
1628        let cleaned = line
1629            .trim_start_matches("Copyright")
1630            .trim_start_matches("copyright")
1631            .trim_start_matches("(C)")
1632            .trim_start_matches("(c)")
1633            .trim_start_matches("©")
1634            .trim();
1635
1636        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1637            let without_years = &cleaned[year_end..];
1638            let holder = without_years
1639                .trim_start_matches(',')
1640                .trim_start_matches('-')
1641                .trim();
1642
1643            if !holder.is_empty() && holder.len() > 2 {
1644                holders.push(holder.to_string());
1645            }
1646        }
1647    }
1648
1649    holders
1650}
1651
1652fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1653    let mut in_field = false;
1654    let mut field_content = String::new();
1655
1656    for line in content.lines() {
1657        if line.starts_with(field_name) {
1658            in_field = true;
1659            field_content.push_str(line.trim_start_matches(field_name).trim());
1660            field_content.push('\n');
1661        } else if in_field {
1662            if line.starts_with(char::is_whitespace) {
1663                field_content.push_str(line.trim());
1664                field_content.push('\n');
1665            } else if !line.trim().is_empty() {
1666                break;
1667            }
1668        }
1669    }
1670
1671    let trimmed = field_content.trim();
1672    if trimmed.is_empty() {
1673        None
1674    } else {
1675        Some(trimmed.to_string())
1676    }
1677}
1678
1679/// Parser for Debian binary package archives (.deb files)
1680pub struct DebianDebParser;
1681
1682impl PackageParser for DebianDebParser {
1683    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1684
1685    fn is_match(path: &Path) -> bool {
1686        path.extension().and_then(|e| e.to_str()) == Some("deb")
1687    }
1688
1689    fn extract_packages(path: &Path) -> Vec<PackageData> {
1690        // Try to extract metadata from archive contents first
1691        if let Ok(data) = extract_deb_archive(path) {
1692            return vec![data];
1693        }
1694
1695        // Fallback to filename parsing
1696        let filename = match path.file_name().and_then(|n| n.to_str()) {
1697            Some(f) => f,
1698            None => {
1699                return vec![default_package_data(DatasourceId::DebianDeb)];
1700            }
1701        };
1702
1703        vec![parse_deb_filename(filename)]
1704    }
1705}
1706
1707crate::register_parser!(
1708    "Debian binary package archive (.deb)",
1709    &["**/*.deb"],
1710    "deb",
1711    "",
1712    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1713);
1714
1715fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1716    use flate2::read::GzDecoder;
1717    use liblzma::read::XzDecoder;
1718    use std::io::{Cursor, Read};
1719
1720    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1721
1722    let mut archive = ar::Archive::new(file);
1723    let mut package: Option<PackageData> = None;
1724
1725    while let Some(entry_result) = archive.next_entry() {
1726        let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1727
1728        let entry_name = std::str::from_utf8(entry.header().identifier())
1729            .map_err(|e| format!("Invalid entry name: {}", e))?;
1730        let entry_name = entry_name.trim().to_string();
1731
1732        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1733            let mut control_data = Vec::new();
1734            entry
1735                .read_to_end(&mut control_data)
1736                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1737
1738            if entry_name.ends_with(".gz") {
1739                let decoder = GzDecoder::new(Cursor::new(control_data));
1740                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1741                    package = Some(parsed_package);
1742                }
1743            } else if entry_name.ends_with(".xz") {
1744                let decoder = XzDecoder::new(Cursor::new(control_data));
1745                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1746                    package = Some(parsed_package);
1747                }
1748            }
1749        } else if entry_name.starts_with("data.tar") {
1750            let mut data = Vec::new();
1751            entry
1752                .read_to_end(&mut data)
1753                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1754
1755            let Some(current_package) = package.as_mut() else {
1756                continue;
1757            };
1758
1759            if entry_name.ends_with(".gz") {
1760                let decoder = GzDecoder::new(Cursor::new(data));
1761                merge_deb_data_archive(decoder, current_package)?;
1762            } else if entry_name.ends_with(".xz") {
1763                let decoder = XzDecoder::new(Cursor::new(data));
1764                merge_deb_data_archive(decoder, current_package)?;
1765            }
1766        }
1767    }
1768
1769    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1770}
1771
1772fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1773    use std::io::Read;
1774
1775    let mut tar_archive = tar::Archive::new(reader);
1776
1777    for tar_entry_result in tar_archive
1778        .entries()
1779        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1780    {
1781        let mut tar_entry =
1782            tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1783
1784        let tar_path = tar_entry
1785            .path()
1786            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1787
1788        if tar_path.ends_with("control") {
1789            let mut control_content = String::new();
1790            tar_entry
1791                .read_to_string(&mut control_content)
1792                .map_err(|e| format!("Failed to read control file: {}", e))?;
1793
1794            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1795            if paragraphs.is_empty() {
1796                return Err("No paragraphs in control file".to_string());
1797            }
1798
1799            if let Some(package) =
1800                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
1801            {
1802                return Ok(Some(package));
1803            }
1804
1805            return Err("Failed to parse control file".to_string());
1806        }
1807    }
1808
1809    Ok(None)
1810}
1811
1812fn merge_deb_data_archive<R: std::io::Read>(
1813    reader: R,
1814    package: &mut PackageData,
1815) -> Result<(), String> {
1816    use std::io::Read;
1817
1818    let mut tar_archive = tar::Archive::new(reader);
1819
1820    for tar_entry_result in tar_archive
1821        .entries()
1822        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1823    {
1824        let mut tar_entry =
1825            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1826
1827        let tar_path = tar_entry
1828            .path()
1829            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1830        let tar_path_str = tar_path.to_string_lossy();
1831
1832        if tar_path_str.ends_with(&format!(
1833            "/usr/share/doc/{}/copyright",
1834            package.name.as_deref().unwrap_or_default()
1835        )) || tar_path_str.ends_with(&format!(
1836            "usr/share/doc/{}/copyright",
1837            package.name.as_deref().unwrap_or_default()
1838        )) {
1839            let mut copyright_content = String::new();
1840            tar_entry
1841                .read_to_string(&mut copyright_content)
1842                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1843
1844            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
1845            merge_debian_copyright_into_package(package, &copyright_pkg);
1846            break;
1847        }
1848    }
1849
1850    Ok(())
1851}
1852
1853fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1854    if target.extracted_license_statement.is_none() {
1855        target.extracted_license_statement = copyright.extracted_license_statement.clone();
1856    }
1857
1858    for party in &copyright.parties {
1859        if !target.parties.iter().any(|existing| {
1860            existing.r#type == party.r#type
1861                && existing.role == party.role
1862                && existing.name == party.name
1863                && existing.email == party.email
1864                && existing.url == party.url
1865                && existing.organization == party.organization
1866                && existing.organization_url == party.organization_url
1867                && existing.timezone == party.timezone
1868        }) {
1869            target.parties.push(party.clone());
1870        }
1871    }
1872}
1873
1874fn parse_deb_filename(filename: &str) -> PackageData {
1875    let without_ext = filename.trim_end_matches(".deb");
1876
1877    let parts: Vec<&str> = without_ext.split('_').collect();
1878    if parts.len() < 2 {
1879        return default_package_data(DatasourceId::DebianDeb);
1880    }
1881
1882    let name = parts[0].to_string();
1883    let version = parts[1].to_string();
1884    let architecture = if parts.len() >= 3 {
1885        Some(parts[2].to_string())
1886    } else {
1887        None
1888    };
1889
1890    let namespace = Some("debian".to_string());
1891
1892    PackageData {
1893        datasource_id: Some(DatasourceId::DebianDeb),
1894        package_type: Some(PACKAGE_TYPE),
1895        namespace: namespace.clone(),
1896        name: Some(name.clone()),
1897        version: Some(version.clone()),
1898        purl: build_debian_purl(
1899            &name,
1900            Some(&version),
1901            namespace.as_deref(),
1902            architecture.as_deref(),
1903        ),
1904        ..Default::default()
1905    }
1906}
1907
1908/// Parser for control files inside extracted .deb control tarballs.
1909///
1910/// Matches paths like `*/control.tar.gz-extract/control` and
1911/// `*/control.tar.xz-extract/control` which are created by ExtractCode
1912/// when extracting .deb archives.
1913pub struct DebianControlInExtractedDebParser;
1914
1915impl PackageParser for DebianControlInExtractedDebParser {
1916    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1917
1918    fn is_match(path: &Path) -> bool {
1919        path.file_name()
1920            .and_then(|n| n.to_str())
1921            .is_some_and(|name| name == "control")
1922            && path
1923                .to_str()
1924                .map(|p| {
1925                    p.ends_with("control.tar.gz-extract/control")
1926                        || p.ends_with("control.tar.xz-extract/control")
1927                })
1928                .unwrap_or(false)
1929    }
1930
1931    fn extract_packages(path: &Path) -> Vec<PackageData> {
1932        let content = match read_file_to_string(path) {
1933            Ok(c) => c,
1934            Err(e) => {
1935                warn!(
1936                    "Failed to read control file in extracted deb {:?}: {}",
1937                    path, e
1938                );
1939                return vec![default_package_data(
1940                    DatasourceId::DebianControlExtractedDeb,
1941                )];
1942            }
1943        };
1944
1945        // A control file inside an extracted .deb has a single paragraph
1946        // (unlike debian/control which has source + binary paragraphs)
1947        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1948        if paragraphs.is_empty() {
1949            return vec![default_package_data(
1950                DatasourceId::DebianControlExtractedDeb,
1951            )];
1952        }
1953
1954        if let Some(pkg) = build_package_from_paragraph(
1955            &paragraphs[0],
1956            None,
1957            DatasourceId::DebianControlExtractedDeb,
1958        ) {
1959            vec![pkg]
1960        } else {
1961            vec![default_package_data(
1962                DatasourceId::DebianControlExtractedDeb,
1963            )]
1964        }
1965    }
1966}
1967
1968/// Parser for MD5 checksum files inside extracted .deb control tarballs
1969pub struct DebianMd5sumInPackageParser;
1970
1971impl PackageParser for DebianMd5sumInPackageParser {
1972    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1973
1974    fn is_match(path: &Path) -> bool {
1975        path.file_name()
1976            .and_then(|n| n.to_str())
1977            .is_some_and(|name| name == "md5sums")
1978            && path
1979                .to_str()
1980                .map(|p| {
1981                    p.ends_with("control.tar.gz-extract/md5sums")
1982                        || p.ends_with("control.tar.xz-extract/md5sums")
1983                })
1984                .unwrap_or(false)
1985    }
1986
1987    fn extract_packages(path: &Path) -> Vec<PackageData> {
1988        let content = match read_file_to_string(path) {
1989            Ok(c) => c,
1990            Err(e) => {
1991                warn!("Failed to read md5sums file {:?}: {}", path, e);
1992                return vec![default_package_data(
1993                    DatasourceId::DebianMd5SumsInExtractedDeb,
1994                )];
1995            }
1996        };
1997
1998        let package_name = extract_package_name_from_deb_path(path);
1999
2000        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2001    }
2002}
2003
2004pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2005    let parent = path.parent()?;
2006    let grandparent = parent.parent()?;
2007    let dirname = grandparent.file_name()?.to_str()?;
2008    let without_extract = dirname.strip_suffix("-extract")?;
2009    let without_deb = without_extract.strip_suffix(".deb")?;
2010    let name = without_deb.split('_').next()?;
2011
2012    Some(name.to_string())
2013}
2014
2015fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2016    let mut file_references = Vec::new();
2017
2018    for line in content.lines() {
2019        let line = line.trim();
2020        if line.is_empty() || line.starts_with('#') {
2021            continue;
2022        }
2023
2024        let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find("  ") {
2025            (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
2026        } else if let Some((hash, path)) = line.split_once(' ') {
2027            (Some(hash.trim().to_string()), path.trim())
2028        } else {
2029            (None, line)
2030        };
2031
2032        if IGNORED_ROOT_DIRS.contains(&filepath) {
2033            continue;
2034        }
2035
2036        file_references.push(FileReference {
2037            path: filepath.to_string(),
2038            size: None,
2039            sha1: None,
2040            md5: md5sum,
2041            sha256: None,
2042            sha512: None,
2043            extra_data: None,
2044        });
2045    }
2046
2047    if file_references.is_empty() {
2048        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2049    }
2050
2051    let namespace = Some("debian".to_string());
2052    let mut package = PackageData {
2053        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2054        package_type: Some(PACKAGE_TYPE),
2055        namespace: namespace.clone(),
2056        name: package_name.map(|s| s.to_string()),
2057        file_references,
2058        ..Default::default()
2059    };
2060
2061    if let Some(n) = &package.name {
2062        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2063    }
2064
2065    package
2066}
2067
2068crate::register_parser!(
2069    "Debian control file in extracted .deb control tarball",
2070    &[
2071        "**/control.tar.gz-extract/control",
2072        "**/control.tar.xz-extract/control"
2073    ],
2074    "deb",
2075    "",
2076    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2077);
2078
2079crate::register_parser!(
2080    "Debian MD5 checksums in extracted .deb control tarball",
2081    &[
2082        "**/control.tar.gz-extract/md5sums",
2083        "**/control.tar.xz-extract/md5sums"
2084    ],
2085    "deb",
2086    "",
2087    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2088);
2089
2090#[cfg(test)]
2091mod tests {
2092    use super::*;
2093    use crate::models::DatasourceId;
2094    use crate::models::PackageType;
2095    use ar::{Builder as ArBuilder, Header as ArHeader};
2096    use flate2::Compression;
2097    use flate2::write::GzEncoder;
2098    use liblzma::write::XzEncoder;
2099    use std::io::Cursor;
2100    use std::path::PathBuf;
2101    use tar::{Builder as TarBuilder, Header as TarHeader};
2102    use tempfile::NamedTempFile;
2103
2104    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2105        let mut control_tar = Vec::new();
2106        {
2107            let encoder = XzEncoder::new(&mut control_tar, 6);
2108            let mut tar_builder = TarBuilder::new(encoder);
2109
2110            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2111            let mut header = TarHeader::new_gnu();
2112            header
2113                .set_path("control")
2114                .expect("control tar path should be valid");
2115            header.set_size(control_content.len() as u64);
2116            header.set_mode(0o644);
2117            header.set_cksum();
2118            tar_builder
2119                .append(&header, Cursor::new(control_content))
2120                .expect("control file should be appended to tar.xz");
2121            tar_builder.finish().expect("control tar.xz should finish");
2122        }
2123
2124        let deb = NamedTempFile::new().expect("temp deb file should be created");
2125        {
2126            let mut builder = ArBuilder::new(
2127                deb.reopen()
2128                    .expect("temporary deb file should reopen for writing"),
2129            );
2130
2131            let debian_binary = b"2.0\n";
2132            let mut debian_binary_header =
2133                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2134            debian_binary_header.set_mode(0o100644);
2135            builder
2136                .append(&debian_binary_header, Cursor::new(debian_binary))
2137                .expect("debian-binary entry should be appended");
2138
2139            let mut control_header =
2140                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2141            control_header.set_mode(0o100644);
2142            builder
2143                .append(&control_header, Cursor::new(control_tar))
2144                .expect("control.tar.xz entry should be appended");
2145        }
2146
2147        deb
2148    }
2149
2150    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2151        let mut control_tar = Vec::new();
2152        {
2153            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2154            let mut tar_builder = TarBuilder::new(encoder);
2155
2156            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2157            let mut header = TarHeader::new_gnu();
2158            header
2159                .set_path("control")
2160                .expect("control tar path should be valid");
2161            header.set_size(control_content.len() as u64);
2162            header.set_mode(0o644);
2163            header.set_cksum();
2164            tar_builder
2165                .append(&header, Cursor::new(control_content))
2166                .expect("control file should be appended to tar.gz");
2167            tar_builder.finish().expect("control tar.gz should finish");
2168        }
2169
2170        let mut data_tar = Vec::new();
2171        {
2172            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2173            let mut tar_builder = TarBuilder::new(encoder);
2174
2175            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2176            let mut header = TarHeader::new_gnu();
2177            header
2178                .set_path("./usr/share/doc/synthetic/copyright")
2179                .expect("copyright path should be valid");
2180            header.set_size(copyright.len() as u64);
2181            header.set_mode(0o644);
2182            header.set_cksum();
2183            tar_builder
2184                .append(&header, Cursor::new(copyright))
2185                .expect("copyright file should be appended to data tar");
2186            tar_builder.finish().expect("data tar.gz should finish");
2187        }
2188
2189        let deb = NamedTempFile::new().expect("temp deb file should be created");
2190        {
2191            let mut builder = ArBuilder::new(
2192                deb.reopen()
2193                    .expect("temporary deb file should reopen for writing"),
2194            );
2195
2196            let debian_binary = b"2.0\n";
2197            let mut debian_binary_header =
2198                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2199            debian_binary_header.set_mode(0o100644);
2200            builder
2201                .append(&debian_binary_header, Cursor::new(debian_binary))
2202                .expect("debian-binary entry should be appended");
2203
2204            let mut control_header =
2205                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2206            control_header.set_mode(0o100644);
2207            builder
2208                .append(&control_header, Cursor::new(control_tar))
2209                .expect("control.tar.gz entry should be appended");
2210
2211            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2212            data_header.set_mode(0o100644);
2213            builder
2214                .append(&data_header, Cursor::new(data_tar))
2215                .expect("data.tar.gz entry should be appended");
2216        }
2217
2218        deb
2219    }
2220
2221    // ====== Namespace detection ======
2222
2223    #[test]
2224    fn test_detect_namespace_from_ubuntu_version() {
2225        assert_eq!(
2226            detect_namespace(Some("1.0-1ubuntu1"), None),
2227            Some("ubuntu".to_string())
2228        );
2229    }
2230
2231    #[test]
2232    fn test_detect_namespace_from_debian_version() {
2233        assert_eq!(
2234            detect_namespace(Some("1.0-1+deb11u1"), None),
2235            Some("debian".to_string())
2236        );
2237    }
2238
2239    #[test]
2240    fn test_detect_namespace_from_ubuntu_maintainer() {
2241        assert_eq!(
2242            detect_namespace(
2243                None,
2244                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2245            ),
2246            Some("ubuntu".to_string())
2247        );
2248    }
2249
2250    #[test]
2251    fn test_detect_namespace_from_debian_maintainer() {
2252        assert_eq!(
2253            detect_namespace(None, Some("John Doe <john@debian.org>")),
2254            Some("debian".to_string())
2255        );
2256    }
2257
2258    #[test]
2259    fn test_detect_namespace_default() {
2260        assert_eq!(
2261            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2262            Some("debian".to_string())
2263        );
2264    }
2265
2266    #[test]
2267    fn test_detect_namespace_version_takes_priority() {
2268        // Version clue should be checked before maintainer
2269        assert_eq!(
2270            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2271            Some("ubuntu".to_string())
2272        );
2273    }
2274
2275    // ====== PURL generation ======
2276
2277    #[test]
2278    fn test_build_purl_basic() {
2279        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2280        assert_eq!(
2281            purl,
2282            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2283        );
2284    }
2285
2286    #[test]
2287    fn test_build_purl_no_version() {
2288        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2289        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2290    }
2291
2292    #[test]
2293    fn test_build_purl_no_arch() {
2294        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2295        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2296    }
2297
2298    #[test]
2299    fn test_build_purl_no_namespace() {
2300        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2301        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2302    }
2303
2304    // ====== Dependency parsing ======
2305
2306    #[test]
2307    fn test_parse_simple_dependency() {
2308        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2309        assert_eq!(deps.len(), 1);
2310        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2311        assert_eq!(deps[0].extracted_requirement, None);
2312        assert_eq!(deps[0].scope, Some("depends".to_string()));
2313    }
2314
2315    #[test]
2316    fn test_parse_dependency_with_version() {
2317        let deps =
2318            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2319        assert_eq!(deps.len(), 1);
2320        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2321        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2322    }
2323
2324    #[test]
2325    fn test_parse_dependency_exact_version() {
2326        let deps = parse_dependency_field(
2327            "libc6 (= 2.31-13+deb11u5)",
2328            "depends",
2329            true,
2330            false,
2331            Some("debian"),
2332        );
2333        assert_eq!(deps.len(), 1);
2334        assert_eq!(deps[0].is_pinned, Some(true));
2335    }
2336
2337    #[test]
2338    fn test_parse_dependency_strict_less() {
2339        let deps =
2340            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2341        assert_eq!(deps.len(), 1);
2342        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2343        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2344    }
2345
2346    #[test]
2347    fn test_parse_multiple_dependencies() {
2348        let deps = parse_dependency_field(
2349            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2350            "depends",
2351            true,
2352            false,
2353            Some("debian"),
2354        );
2355        assert_eq!(deps.len(), 3);
2356    }
2357
2358    #[test]
2359    fn test_parse_dependency_alternatives() {
2360        let deps = parse_dependency_field(
2361            "libssl1.1 | libssl3",
2362            "depends",
2363            true,
2364            false,
2365            Some("debian"),
2366        );
2367        assert_eq!(deps.len(), 2);
2368        // Alternatives are marked as optional
2369        assert_eq!(deps[0].is_optional, Some(true));
2370        assert_eq!(deps[1].is_optional, Some(true));
2371    }
2372
2373    #[test]
2374    fn test_parse_dependency_skips_substitutions() {
2375        let deps = parse_dependency_field(
2376            "${shlibs:Depends}, ${misc:Depends}, libc6",
2377            "depends",
2378            true,
2379            false,
2380            Some("debian"),
2381        );
2382        assert_eq!(deps.len(), 1);
2383        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2384    }
2385
2386    #[test]
2387    fn test_parse_dependency_with_arch_qualifier() {
2388        // Dependencies can have [arch] qualifiers which we ignore
2389        let deps = parse_dependency_field(
2390            "libc6 (>= 2.17) [amd64]",
2391            "depends",
2392            true,
2393            false,
2394            Some("debian"),
2395        );
2396        assert_eq!(deps.len(), 1);
2397        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2398    }
2399
2400    #[test]
2401    fn test_parse_empty_dependency() {
2402        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2403        assert!(deps.is_empty());
2404    }
2405
2406    // ====== Source field parsing ======
2407
2408    #[test]
2409    fn test_parse_source_field_name_only() {
2410        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2411        assert_eq!(sources.len(), 1);
2412        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2413    }
2414
2415    #[test]
2416    fn test_parse_source_field_with_version() {
2417        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2418        assert_eq!(sources.len(), 1);
2419        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2420    }
2421
2422    #[test]
2423    fn test_parse_source_field_empty() {
2424        let sources = parse_source_field(None, Some("debian"));
2425        assert!(sources.is_empty());
2426    }
2427
2428    // ====== Control file parsing ======
2429
2430    #[test]
2431    fn test_parse_debian_control_source_and_binary() {
2432        let content = "\
2433Source: curl
2434Section: web
2435Priority: optional
2436Maintainer: Alessandro Ghedini <ghedo@debian.org>
2437Homepage: https://curl.se/
2438Vcs-Browser: https://salsa.debian.org/debian/curl
2439Vcs-Git: https://salsa.debian.org/debian/curl.git
2440Build-Depends: debhelper (>= 12), libssl-dev
2441
2442Package: curl
2443Architecture: amd64
2444Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2445Description: command line tool for transferring data with URL syntax";
2446
2447        let packages = parse_debian_control(content);
2448        assert_eq!(packages.len(), 1);
2449
2450        let pkg = &packages[0];
2451        assert_eq!(pkg.name, Some("curl".to_string()));
2452        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2453        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2454        assert_eq!(
2455            pkg.vcs_url,
2456            Some("https://salsa.debian.org/debian/curl.git".to_string())
2457        );
2458        assert_eq!(
2459            pkg.code_view_url,
2460            Some("https://salsa.debian.org/debian/curl".to_string())
2461        );
2462
2463        // Maintainer from source paragraph
2464        assert_eq!(pkg.parties.len(), 1);
2465        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2466        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2467        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2468
2469        // Dependencies parsed
2470        assert!(!pkg.dependencies.is_empty());
2471    }
2472
2473    #[test]
2474    fn test_parse_debian_control_multiple_binary() {
2475        let content = "\
2476Source: gzip
2477Maintainer: Debian Developer <dev@debian.org>
2478
2479Package: gzip
2480Architecture: any
2481Depends: libc6 (>= 2.17)
2482Description: GNU file compression
2483
2484Package: gzip-win32
2485Architecture: all
2486Description: gzip for Windows";
2487
2488        let packages = parse_debian_control(content);
2489        assert_eq!(packages.len(), 2);
2490        assert_eq!(packages[0].name, Some("gzip".to_string()));
2491        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2492
2493        // Both inherit source maintainer
2494        assert_eq!(packages[0].parties.len(), 1);
2495        assert_eq!(packages[1].parties.len(), 1);
2496    }
2497
2498    #[test]
2499    fn test_parse_debian_control_source_only() {
2500        let content = "\
2501Source: my-package
2502Maintainer: Test User <test@debian.org>
2503Build-Depends: debhelper (>= 13)";
2504
2505        let packages = parse_debian_control(content);
2506        assert_eq!(packages.len(), 1);
2507        assert_eq!(packages[0].name, Some("my-package".to_string()));
2508        // Build-Depends parsed
2509        assert!(!packages[0].dependencies.is_empty());
2510        assert_eq!(
2511            packages[0].dependencies[0].scope,
2512            Some("build-depends".to_string())
2513        );
2514    }
2515
2516    #[test]
2517    fn test_parse_debian_control_with_uploaders() {
2518        let content = "\
2519Source: example
2520Maintainer: Main Dev <main@debian.org>
2521Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2522
2523Package: example
2524Architecture: any
2525Description: test package";
2526
2527        let packages = parse_debian_control(content);
2528        assert_eq!(packages.len(), 1);
2529        // 1 maintainer + 2 uploaders
2530        assert_eq!(packages[0].parties.len(), 3);
2531        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2532        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2533        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2534    }
2535
2536    #[test]
2537    fn test_parse_debian_control_vcs_git_with_branch() {
2538        let content = "\
2539Source: example
2540Maintainer: Dev <dev@debian.org>
2541Vcs-Git: https://salsa.debian.org/example.git -b main
2542
2543Package: example
2544Architecture: any
2545Description: test";
2546
2547        let packages = parse_debian_control(content);
2548        assert_eq!(packages.len(), 1);
2549        // Should only take the URL, not the branch
2550        assert_eq!(
2551            packages[0].vcs_url,
2552            Some("https://salsa.debian.org/example.git".to_string())
2553        );
2554    }
2555
2556    #[test]
2557    fn test_parse_debian_control_multi_arch() {
2558        let content = "\
2559Source: example
2560Maintainer: Dev <dev@debian.org>
2561
2562Package: libexample
2563Architecture: any
2564Multi-Arch: same
2565Description: shared library";
2566
2567        let packages = parse_debian_control(content);
2568        assert_eq!(packages.len(), 1);
2569        let extra = packages[0].extra_data.as_ref().unwrap();
2570        assert_eq!(
2571            extra.get("multi_arch"),
2572            Some(&serde_json::Value::String("same".to_string()))
2573        );
2574    }
2575
2576    // ====== dpkg/status parsing ======
2577
2578    #[test]
2579    fn test_parse_dpkg_status_basic() {
2580        let content = "\
2581Package: base-files
2582Status: install ok installed
2583Priority: required
2584Section: admin
2585Installed-Size: 391
2586Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2587Architecture: amd64
2588Version: 11ubuntu5.6
2589Description: Debian base system miscellaneous files
2590Homepage: https://tracker.debian.org/pkg/base-files
2591
2592Package: not-installed
2593Status: deinstall ok config-files
2594Architecture: amd64
2595Version: 1.0
2596Description: This should be skipped";
2597
2598        let packages = parse_dpkg_status(content);
2599        assert_eq!(packages.len(), 1);
2600
2601        let pkg = &packages[0];
2602        assert_eq!(pkg.name, Some("base-files".to_string()));
2603        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2604        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2605        assert_eq!(
2606            pkg.datasource_id,
2607            Some(DatasourceId::DebianInstalledStatusDb)
2608        );
2609
2610        // Installed-Size in extra_data
2611        let extra = pkg.extra_data.as_ref().unwrap();
2612        assert_eq!(
2613            extra.get("installed_size"),
2614            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2615        );
2616    }
2617
2618    #[test]
2619    fn test_parse_dpkg_status_multiple_installed() {
2620        let content = "\
2621Package: libc6
2622Status: install ok installed
2623Architecture: amd64
2624Version: 2.31-13+deb11u5
2625Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2626Description: GNU C Library
2627
2628Package: zlib1g
2629Status: install ok installed
2630Architecture: amd64
2631Version: 1:1.2.11.dfsg-2+deb11u2
2632Maintainer: Mark Brown <broonie@debian.org>
2633Description: compression library";
2634
2635        let packages = parse_dpkg_status(content);
2636        assert_eq!(packages.len(), 2);
2637        assert_eq!(packages[0].name, Some("libc6".to_string()));
2638        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2639    }
2640
2641    #[test]
2642    fn test_parse_dpkg_status_with_dependencies() {
2643        let content = "\
2644Package: curl
2645Status: install ok installed
2646Architecture: amd64
2647Version: 7.74.0-1.3+deb11u7
2648Maintainer: Alessandro Ghedini <ghedo@debian.org>
2649Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2650Recommends: ca-certificates
2651Description: command line tool for transferring data with URL syntax";
2652
2653        let packages = parse_dpkg_status(content);
2654        assert_eq!(packages.len(), 1);
2655
2656        let deps = &packages[0].dependencies;
2657        // 2 from Depends + 1 from Recommends
2658        assert_eq!(deps.len(), 3);
2659
2660        // Check first dependency
2661        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2662        assert_eq!(deps[0].scope, Some("depends".to_string()));
2663        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2664
2665        // Check recommends
2666        assert_eq!(
2667            deps[2].purl,
2668            Some("pkg:deb/debian/ca-certificates".to_string())
2669        );
2670        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2671        assert_eq!(deps[2].is_optional, Some(true));
2672    }
2673
2674    #[test]
2675    fn test_parse_dpkg_status_with_source() {
2676        let content = "\
2677Package: libncurses6
2678Status: install ok installed
2679Architecture: amd64
2680Source: ncurses (6.2+20201114-2+deb11u1)
2681Version: 6.2+20201114-2+deb11u1
2682Maintainer: Craig Small <csmall@debian.org>
2683Description: shared libraries for terminal handling";
2684
2685        let packages = parse_dpkg_status(content);
2686        assert_eq!(packages.len(), 1);
2687        assert!(!packages[0].source_packages.is_empty());
2688        // Source PURL should include version from parentheses
2689        assert!(packages[0].source_packages[0].contains("ncurses"));
2690    }
2691
2692    #[test]
2693    fn test_parse_dpkg_status_filters_not_installed() {
2694        let content = "\
2695Package: installed-pkg
2696Status: install ok installed
2697Version: 1.0
2698Architecture: amd64
2699Description: installed
2700
2701Package: half-installed
2702Status: install ok half-installed
2703Version: 2.0
2704Architecture: amd64
2705Description: half installed
2706
2707Package: deinstall-pkg
2708Status: deinstall ok config-files
2709Version: 3.0
2710Architecture: amd64
2711Description: deinstalled
2712
2713Package: purge-pkg
2714Status: purge ok not-installed
2715Version: 4.0
2716Architecture: amd64
2717Description: purged";
2718
2719        let packages = parse_dpkg_status(content);
2720        assert_eq!(packages.len(), 1);
2721        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2722    }
2723
2724    #[test]
2725    fn test_parse_dpkg_status_empty() {
2726        let packages = parse_dpkg_status("");
2727        assert!(packages.is_empty());
2728    }
2729
2730    // ====== is_match tests ======
2731
2732    #[test]
2733    fn test_debian_control_is_match() {
2734        assert!(DebianControlParser::is_match(Path::new(
2735            "/path/to/debian/control"
2736        )));
2737        assert!(DebianControlParser::is_match(Path::new("debian/control")));
2738        assert!(!DebianControlParser::is_match(Path::new(
2739            "/path/to/control"
2740        )));
2741        assert!(!DebianControlParser::is_match(Path::new(
2742            "/path/to/debian/changelog"
2743        )));
2744    }
2745
2746    #[test]
2747    fn test_debian_installed_is_match() {
2748        assert!(DebianInstalledParser::is_match(Path::new(
2749            "/var/lib/dpkg/status"
2750        )));
2751        assert!(DebianInstalledParser::is_match(Path::new(
2752            "some/root/var/lib/dpkg/status"
2753        )));
2754        assert!(!DebianInstalledParser::is_match(Path::new(
2755            "/var/lib/dpkg/status.d/something"
2756        )));
2757        assert!(!DebianInstalledParser::is_match(Path::new(
2758            "/var/lib/dpkg/available"
2759        )));
2760    }
2761
2762    // ====== Edge cases ======
2763
2764    #[test]
2765    fn test_parse_debian_control_empty_input() {
2766        let packages = parse_debian_control("");
2767        assert!(packages.is_empty());
2768    }
2769
2770    #[test]
2771    fn test_parse_debian_control_malformed_input() {
2772        let content = "this is not a valid control file\nwith random text";
2773        let packages = parse_debian_control(content);
2774        // Should not panic, may return empty or partial results
2775        assert!(packages.is_empty());
2776    }
2777
2778    #[test]
2779    fn test_dependency_with_epoch_version() {
2780        // Debian versions can have epochs like 1:2.3.4
2781        let deps = parse_dependency_field(
2782            "zlib1g (>= 1:1.2.11)",
2783            "depends",
2784            true,
2785            false,
2786            Some("debian"),
2787        );
2788        assert_eq!(deps.len(), 1);
2789        assert_eq!(
2790            deps[0].extracted_requirement,
2791            Some(">= 1:1.2.11".to_string())
2792        );
2793    }
2794
2795    #[test]
2796    fn test_dependency_with_plus_in_name() {
2797        let deps =
2798            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2799        assert_eq!(deps.len(), 1);
2800        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2801    }
2802
2803    #[test]
2804    fn test_dsc_parser_is_match() {
2805        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2806        assert!(DebianDscParser::is_match(&PathBuf::from(
2807            "adduser_3.118+deb11u1.dsc"
2808        )));
2809        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2810        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2811    }
2812
2813    #[test]
2814    fn test_dsc_parser_adduser() {
2815        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2816        let package = DebianDscParser::extract_first_package(&path);
2817
2818        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2819        assert_eq!(package.namespace, Some("debian".to_string()));
2820        assert_eq!(package.name, Some("adduser".to_string()));
2821        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2822        assert_eq!(
2823            package.purl,
2824            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2825        );
2826        assert_eq!(
2827            package.vcs_url,
2828            Some("https://salsa.debian.org/debian/adduser.git".to_string())
2829        );
2830        assert_eq!(
2831            package.code_view_url,
2832            Some("https://salsa.debian.org/debian/adduser".to_string())
2833        );
2834        assert_eq!(
2835            package.datasource_id,
2836            Some(DatasourceId::DebianSourceControlDsc)
2837        );
2838
2839        assert_eq!(package.parties.len(), 2);
2840        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2841        assert_eq!(
2842            package.parties[0].name,
2843            Some("Debian Adduser Developers".to_string())
2844        );
2845        assert_eq!(
2846            package.parties[0].email,
2847            Some("adduser@packages.debian.org".to_string())
2848        );
2849        assert_eq!(package.parties[0].r#type, None);
2850
2851        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2852        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2853        assert_eq!(
2854            package.parties[1].email,
2855            Some("mh+debian-packages@zugschlus.de".to_string())
2856        );
2857        assert_eq!(package.parties[1].r#type, None);
2858
2859        assert_eq!(package.source_packages.len(), 1);
2860        assert_eq!(
2861            package.source_packages[0],
2862            "pkg:deb/debian/adduser".to_string()
2863        );
2864
2865        assert!(!package.dependencies.is_empty());
2866        let build_dep_names: Vec<String> = package
2867            .dependencies
2868            .iter()
2869            .filter_map(|d| d.purl.as_ref())
2870            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2871            .map(|p| p.to_string())
2872            .collect();
2873        assert!(build_dep_names.len() >= 2);
2874    }
2875
2876    #[test]
2877    fn test_dsc_parser_zsh() {
2878        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2879        let package = DebianDscParser::extract_first_package(&path);
2880
2881        assert_eq!(package.name, Some("zsh".to_string()));
2882        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2883        assert_eq!(package.namespace, Some("debian".to_string()));
2884        assert!(package.purl.is_some());
2885        assert!(package.purl.as_ref().unwrap().contains("zsh"));
2886        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2887    }
2888
2889    #[test]
2890    fn test_parse_dsc_content_basic() {
2891        let content = "Format: 3.0 (native)
2892Source: testpkg
2893Binary: testpkg
2894Architecture: amd64
2895Version: 1.0.0
2896Maintainer: Test User <test@example.com>
2897Standards-Version: 4.5.0
2898Build-Depends: debhelper (>= 12)
2899Files:
2900 abc123 1024 testpkg_1.0.0.tar.xz
2901";
2902
2903        let package = parse_dsc_content(content);
2904        assert_eq!(package.name, Some("testpkg".to_string()));
2905        assert_eq!(package.version, Some("1.0.0".to_string()));
2906        assert_eq!(package.namespace, Some("debian".to_string()));
2907        assert_eq!(package.parties.len(), 1);
2908        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2909        assert_eq!(
2910            package.parties[0].email,
2911            Some("test@example.com".to_string())
2912        );
2913        assert_eq!(package.dependencies.len(), 1);
2914        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2915    }
2916
2917    #[test]
2918    fn test_parse_dsc_content_with_uploaders() {
2919        let content = "Source: mypkg
2920Version: 2.0
2921Architecture: all
2922Maintainer: Main Dev <main@example.com>
2923Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2924";
2925
2926        let package = parse_dsc_content(content);
2927        assert_eq!(package.parties.len(), 3);
2928        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2929        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2930        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2931    }
2932
2933    #[test]
2934    fn test_orig_tar_parser_is_match() {
2935        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2936            "package_1.0.orig.tar.gz"
2937        )));
2938        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2939            "abseil_0~20200923.3.orig.tar.xz"
2940        )));
2941        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2942            "package.debian.tar.gz"
2943        )));
2944        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2945    }
2946
2947    #[test]
2948    fn test_debian_tar_parser_is_match() {
2949        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2950            "package_1.0-1.debian.tar.xz"
2951        )));
2952        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2953            "abseil_20220623.1-1.debian.tar.gz"
2954        )));
2955        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2956            "package.orig.tar.gz"
2957        )));
2958        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2959    }
2960
2961    #[test]
2962    fn test_parse_orig_tar_filename() {
2963        let pkg = parse_source_tarball_filename(
2964            "abseil_0~20200923.3.orig.tar.gz",
2965            DatasourceId::DebianOriginalSourceTarball,
2966        );
2967        assert_eq!(pkg.name, Some("abseil".to_string()));
2968        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2969        assert_eq!(pkg.namespace, Some("debian".to_string()));
2970        assert_eq!(
2971            pkg.purl,
2972            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2973        );
2974        assert_eq!(
2975            pkg.datasource_id,
2976            Some(DatasourceId::DebianOriginalSourceTarball)
2977        );
2978    }
2979
2980    #[test]
2981    fn test_parse_debian_tar_filename() {
2982        let pkg = parse_source_tarball_filename(
2983            "abseil_20220623.1-1.debian.tar.xz",
2984            DatasourceId::DebianSourceMetadataTarball,
2985        );
2986        assert_eq!(pkg.name, Some("abseil".to_string()));
2987        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2988        assert_eq!(pkg.namespace, Some("debian".to_string()));
2989        assert_eq!(
2990            pkg.purl,
2991            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2992        );
2993    }
2994
2995    #[test]
2996    fn test_parse_deb_filename() {
2997        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2998        assert_eq!(pkg.name, Some("nginx".to_string()));
2999        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3000
3001        let pkg = parse_deb_filename("invalid.deb");
3002        assert!(pkg.name.is_none());
3003        assert!(pkg.version.is_none());
3004    }
3005
3006    #[test]
3007    fn test_parse_source_tarball_various_compressions() {
3008        let pkg_gz = parse_source_tarball_filename(
3009            "test_1.0.orig.tar.gz",
3010            DatasourceId::DebianOriginalSourceTarball,
3011        );
3012        let pkg_xz = parse_source_tarball_filename(
3013            "test_1.0.orig.tar.xz",
3014            DatasourceId::DebianOriginalSourceTarball,
3015        );
3016        let pkg_bz2 = parse_source_tarball_filename(
3017            "test_1.0.orig.tar.bz2",
3018            DatasourceId::DebianOriginalSourceTarball,
3019        );
3020
3021        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3022        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3023        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3024    }
3025
3026    #[test]
3027    fn test_parse_source_tarball_invalid_format() {
3028        let pkg = parse_source_tarball_filename(
3029            "invalid-no-underscore.tar.gz",
3030            DatasourceId::DebianOriginalSourceTarball,
3031        );
3032        assert!(pkg.name.is_none());
3033        assert!(pkg.version.is_none());
3034    }
3035
3036    #[test]
3037    fn test_list_parser_is_match() {
3038        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3039            "/var/lib/dpkg/info/bash.list"
3040        )));
3041        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3042            "/var/lib/dpkg/info/package:amd64.list"
3043        )));
3044        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3045            "bash.list"
3046        )));
3047        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3048            "/var/lib/dpkg/info/bash.md5sums"
3049        )));
3050    }
3051
3052    #[test]
3053    fn test_md5sums_parser_is_match() {
3054        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3055            "/var/lib/dpkg/info/bash.md5sums"
3056        )));
3057        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3058            "/var/lib/dpkg/info/package:amd64.md5sums"
3059        )));
3060        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3061            "bash.md5sums"
3062        )));
3063        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3064            "/var/lib/dpkg/info/bash.list"
3065        )));
3066    }
3067
3068    #[test]
3069    fn test_parse_debian_file_list_plain_list() {
3070        let content = "/.
3071/bin
3072/bin/bash
3073/usr/bin/bashbug
3074/usr/share/doc/bash/README
3075";
3076        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3077        assert_eq!(pkg.name, Some("bash".to_string()));
3078        assert_eq!(pkg.file_references.len(), 3);
3079        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3080        assert_eq!(pkg.file_references[0].md5, None);
3081        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3082        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3083    }
3084
3085    #[test]
3086    fn test_parse_debian_file_list_md5sums() {
3087        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
30881c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3089f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3090";
3091        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3092        assert_eq!(pkg.name, Some("bash".to_string()));
3093        assert_eq!(pkg.file_references.len(), 3);
3094        assert_eq!(pkg.file_references[0].path, "bin/bash");
3095        assert_eq!(
3096            pkg.file_references[0].md5,
3097            Some("77506afebd3b7e19e937a678a185b62e".to_string())
3098        );
3099        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3100        assert_eq!(
3101            pkg.file_references[1].md5,
3102            Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3103        );
3104    }
3105
3106    #[test]
3107    fn test_parse_debian_file_list_with_arch() {
3108        let content = "/usr/bin/foo
3109/usr/lib/x86_64-linux-gnu/libfoo.so
3110";
3111        let pkg = parse_debian_file_list(
3112            content,
3113            "libfoo:amd64",
3114            DatasourceId::DebianInstalledFilesList,
3115        );
3116        assert_eq!(pkg.name, Some("libfoo".to_string()));
3117        assert!(pkg.purl.is_some());
3118        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3119        assert_eq!(pkg.file_references.len(), 2);
3120    }
3121
3122    #[test]
3123    fn test_parse_debian_file_list_skips_comments_and_empty() {
3124        let content = "# This is a comment
3125/bin/bash
3126
3127/usr/bin/bashbug
3128  
3129";
3130        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3131        assert_eq!(pkg.file_references.len(), 2);
3132    }
3133
3134    #[test]
3135    fn test_parse_debian_file_list_md5sums_only() {
3136        let content = "abc123  usr/bin/tool
3137";
3138        let pkg =
3139            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3140        assert_eq!(pkg.name, None);
3141        assert_eq!(pkg.file_references.len(), 1);
3142    }
3143
3144    #[test]
3145    fn test_parse_debian_file_list_ignores_root_dirs() {
3146        let content = "/.
3147/bin
3148/bin/bash
3149/etc
3150/usr
3151/var
3152";
3153        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3154        assert_eq!(pkg.file_references.len(), 1);
3155        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3156    }
3157
3158    #[test]
3159    fn test_copyright_parser_is_match() {
3160        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3161            "/usr/share/doc/bash/copyright"
3162        )));
3163        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3164            "debian/copyright"
3165        )));
3166        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3167            "copyright.txt"
3168        )));
3169        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3170            "/etc/copyright"
3171        )));
3172        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3173            "/tmp/sample_copyright"
3174        )));
3175    }
3176
3177    #[test]
3178    fn test_detect_debian_copyright_datasource() {
3179        assert_eq!(
3180            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3181            DatasourceId::DebianCopyrightInSource
3182        );
3183        assert_eq!(
3184            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3185            DatasourceId::DebianCopyrightInPackage
3186        );
3187        assert_eq!(
3188            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3189            DatasourceId::DebianCopyrightStandalone
3190        );
3191    }
3192
3193    #[test]
3194    fn test_extract_package_name_from_path() {
3195        assert_eq!(
3196            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3197            Some("bash".to_string())
3198        );
3199        assert_eq!(
3200            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3201            Some("libseccomp2".to_string())
3202        );
3203        assert_eq!(
3204            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3205            None
3206        );
3207    }
3208
3209    #[test]
3210    fn test_parse_copyright_dep5_format() {
3211        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3212Upstream-Name: libseccomp
3213Source: https://sourceforge.net/projects/libseccomp/
3214
3215Files: *
3216Copyright: 2012 Paul Moore <pmoore@redhat.com>
3217 2012 Ashley Lai <adlai@us.ibm.com>
3218License: LGPL-2.1
3219
3220License: LGPL-2.1
3221 This library is free software
3222";
3223        let pkg = parse_copyright_file(content, Some("libseccomp"));
3224        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3225        assert_eq!(pkg.namespace, Some("debian".to_string()));
3226        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3227        assert_eq!(
3228            pkg.extracted_license_statement,
3229            Some("LGPL-2.1".to_string())
3230        );
3231        assert!(pkg.parties.len() >= 2);
3232        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3233        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3234    }
3235
3236    #[test]
3237    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3238        let path = PathBuf::from(
3239            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3240        );
3241        let pkg = DebianCopyrightParser::extract_first_package(&path);
3242
3243        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3244        let extracted = pkg
3245            .extracted_license_statement
3246            .as_deref()
3247            .expect("license statement should exist");
3248        assert!(extracted.contains("GPL-2+"));
3249        assert!(!pkg.license_detections.is_empty());
3250
3251        let primary = &pkg.license_detections[0];
3252        assert_eq!(
3253            primary.matches[0].matched_text.as_deref(),
3254            Some("License: GPL-2+")
3255        );
3256        assert_eq!(primary.matches[0].start_line, 47);
3257        assert_eq!(primary.matches[0].end_line, 47);
3258    }
3259
3260    #[test]
3261    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3262        let path = PathBuf::from("testdata/debian/copyright/copyright");
3263        let pkg = DebianCopyrightParser::extract_first_package(&path);
3264
3265        assert_eq!(pkg.license_detections.len(), 1);
3266        assert_eq!(pkg.other_license_detections.len(), 4);
3267
3268        let primary = &pkg.license_detections[0];
3269        assert_eq!(
3270            primary.matches[0].matched_text.as_deref(),
3271            Some("License: LGPL-2.1")
3272        );
3273        assert_eq!(primary.matches[0].start_line, 11);
3274
3275        let ordered_lines: Vec<usize> = pkg
3276            .other_license_detections
3277            .iter()
3278            .map(|detection| detection.matches[0].start_line)
3279            .collect();
3280        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3281
3282        let ordered_texts: Vec<&str> = pkg
3283            .other_license_detections
3284            .iter()
3285            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3286            .collect();
3287        assert_eq!(
3288            ordered_texts,
3289            vec![
3290                "License: LGPL-2.1",
3291                "License: LGPL-2.1",
3292                "License: LGPL-2.1",
3293                "License: LGPL-2.1",
3294            ]
3295        );
3296    }
3297
3298    #[test]
3299    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3300        let path = PathBuf::from(
3301            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3302        );
3303        let pkg = DebianCopyrightParser::extract_first_package(&path);
3304
3305        let zlib = pkg
3306            .other_license_detections
3307            .iter()
3308            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3309            .expect("at least one Zlib license paragraph should be detected");
3310        assert_eq!(
3311            zlib.matches[0].matched_text.as_deref(),
3312            Some("License: Zlib")
3313        );
3314
3315        let last_zlib = pkg
3316            .other_license_detections
3317            .iter()
3318            .rev()
3319            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3320            .expect("bottom standalone Zlib license paragraph should be detected");
3321        assert_eq!(last_zlib.matches[0].start_line, 732);
3322        assert_eq!(last_zlib.matches[0].end_line, 732);
3323    }
3324
3325    #[test]
3326    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3327        let path =
3328            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3329        let pkg = DebianCopyrightParser::extract_first_package(&path);
3330
3331        assert_eq!(pkg.license_detections.len(), 1);
3332        let primary = &pkg.license_detections[0];
3333        assert_eq!(
3334            primary.matches[0].matched_text.as_deref(),
3335            Some("License: LGPL-3+ or GPL-2+")
3336        );
3337        assert_eq!(primary.matches[0].start_line, 8);
3338        assert_eq!(primary.matches[0].end_line, 8);
3339
3340        assert!(pkg.other_license_detections.iter().any(|detection| {
3341            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3342        }));
3343    }
3344
3345    #[test]
3346    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3347        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3348        let pkg = parse_copyright_file(content, Some("foo"));
3349
3350        assert_eq!(pkg.license_detections.len(), 1);
3351        let primary = &pkg.license_detections[0];
3352        assert_eq!(
3353            primary.matches[0].matched_text.as_deref(),
3354            Some("License: GPL-2+")
3355        );
3356        assert_eq!(primary.matches[0].start_line, 7);
3357    }
3358
3359    #[test]
3360    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3361        let raw_lines = vec![
3362            "Files: *".to_string(),
3363            "Copyright: 2024 Example Org".to_string(),
3364            "License: Apache-2.0".to_string(),
3365            " Licensed under the Apache License, Version 2.0.".to_string(),
3366        ];
3367
3368        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3369        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3370            .into_iter()
3371            .next()
3372            .expect("reference RFC822 paragraph should parse");
3373
3374        assert_eq!(paragraph.metadata.headers, expected.headers);
3375        assert_eq!(paragraph.metadata.body, expected.body);
3376        assert_eq!(
3377            paragraph.license_header_line,
3378            Some(("License: Apache-2.0".to_string(), 12))
3379        );
3380    }
3381
3382    #[test]
3383    fn test_parse_copyright_unstructured() {
3384        let content = "This package was debianized by John Doe.
3385
3386Upstream Authors:
3387    Jane Smith
3388
3389Copyright:
3390    2009 10gen
3391
3392License:
3393    SSPL
3394";
3395        let pkg = parse_copyright_file(content, Some("mongodb"));
3396        assert_eq!(pkg.name, Some("mongodb".to_string()));
3397        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3398        assert!(!pkg.parties.is_empty());
3399    }
3400
3401    #[test]
3402    fn test_parse_copyright_holders() {
3403        let text = "2012 Paul Moore <pmoore@redhat.com>
34042012 Ashley Lai <adlai@us.ibm.com>
3405Copyright (C) 2015-2018 Example Corp";
3406        let holders = parse_copyright_holders(text);
3407        assert!(holders.len() >= 3);
3408        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3409        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3410    }
3411
3412    #[test]
3413    fn test_parse_copyright_empty() {
3414        let content = "This is just some text without proper copyright info.";
3415        let pkg = parse_copyright_file(content, Some("test"));
3416        assert_eq!(pkg.name, Some("test".to_string()));
3417        assert!(pkg.parties.is_empty());
3418        assert!(pkg.extracted_license_statement.is_none());
3419    }
3420
3421    #[test]
3422    fn test_deb_parser_is_match() {
3423        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3424        assert!(DebianDebParser::is_match(&PathBuf::from(
3425            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3426        )));
3427        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3428        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3429    }
3430
3431    #[test]
3432    fn test_parse_deb_filename_with_arch() {
3433        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3434        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3435        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3436        assert_eq!(pkg.namespace, Some("debian".to_string()));
3437        assert_eq!(
3438            pkg.purl,
3439            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3440        );
3441        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3442    }
3443
3444    #[test]
3445    fn test_parse_deb_filename_without_arch() {
3446        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3447        assert_eq!(pkg.name, Some("package".to_string()));
3448        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3449        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3450    }
3451
3452    #[test]
3453    fn test_extract_deb_archive() {
3454        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3455        if !test_path.exists() {
3456            return;
3457        }
3458
3459        let pkg = DebianDebParser::extract_first_package(&test_path);
3460
3461        assert_eq!(pkg.name, Some("adduser".to_string()));
3462        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3463        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3464        assert!(pkg.description.is_some());
3465        assert!(!pkg.parties.is_empty());
3466
3467        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3468        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3469    }
3470
3471    #[test]
3472    fn test_extract_deb_archive_with_control_tar_xz() {
3473        let deb = create_synthetic_deb_with_control_tar_xz();
3474
3475        let pkg = DebianDebParser::extract_first_package(deb.path());
3476
3477        assert_eq!(pkg.name, Some("synthetic".to_string()));
3478        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3479        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3480        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3481    }
3482
3483    #[test]
3484    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3485        let deb = create_synthetic_deb_with_copyright();
3486
3487        let pkg = DebianDebParser::extract_first_package(deb.path());
3488
3489        assert_eq!(pkg.name, Some("synthetic".to_string()));
3490        assert_eq!(
3491            pkg.extracted_license_statement,
3492            Some("Apache-2.0".to_string())
3493        );
3494        assert!(pkg.parties.iter().any(|party| {
3495            party.role.as_deref() == Some("copyright-holder")
3496                && party.name.as_deref() == Some("Example Org")
3497        }));
3498    }
3499
3500    #[test]
3501    fn test_parse_deb_filename_simple() {
3502        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3503        assert_eq!(pkg.name, Some("adduser".to_string()));
3504        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3505        assert_eq!(pkg.namespace, Some("debian".to_string()));
3506    }
3507
3508    #[test]
3509    fn test_parse_deb_filename_invalid() {
3510        let pkg = parse_deb_filename("invalid.deb");
3511        assert!(pkg.name.is_none());
3512        assert!(pkg.version.is_none());
3513    }
3514
3515    #[test]
3516    fn test_distroless_parser() {
3517        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3518
3519        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3520
3521        if !test_file.exists() {
3522            eprintln!("Warning: Test file not found, skipping test");
3523            return;
3524        }
3525
3526        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3527
3528        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3529        assert_eq!(
3530            pkg.datasource_id,
3531            Some(DatasourceId::DebianDistrolessInstalledDb)
3532        );
3533        assert_eq!(pkg.name, Some("base-files".to_string()));
3534        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3535        assert_eq!(pkg.namespace, Some("debian".to_string()));
3536        assert!(pkg.purl.is_some());
3537        assert!(
3538            pkg.purl
3539                .as_ref()
3540                .unwrap()
3541                .contains("pkg:deb/debian/base-files")
3542        );
3543    }
3544}