Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35
36use crate::parser_warn as warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41    DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
42};
43use crate::parsers::rfc822::{self, Rfc822Metadata};
44use crate::parsers::utils::{read_file_to_string, split_name_email};
45use crate::utils::spdx::combine_license_expressions;
46
47use super::PackageParser;
48use super::license_normalization::{
49    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
50    normalize_declared_license_key,
51};
52
53const PACKAGE_TYPE: PackageType = PackageType::Deb;
54
55fn default_package_data(datasource_id: DatasourceId) -> PackageData {
56    PackageData {
57        package_type: Some(PACKAGE_TYPE),
58        datasource_id: Some(datasource_id),
59        ..Default::default()
60    }
61}
62
63// Namespace detection clues from version strings
64const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
65const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
66
67// Namespace detection clues from maintainer fields
68const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
69    "packages.debian.org",
70    "lists.debian.org",
71    "lists.alioth.debian.org",
72    "@debian.org",
73    "debian-init-diversity@",
74];
75const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
76
77// Dependency field names and their scope/flags
78struct DepFieldSpec {
79    field: &'static str,
80    scope: &'static str,
81    is_runtime: bool,
82    is_optional: bool,
83}
84
85const DEP_FIELDS: &[DepFieldSpec] = &[
86    DepFieldSpec {
87        field: "depends",
88        scope: "depends",
89        is_runtime: true,
90        is_optional: false,
91    },
92    DepFieldSpec {
93        field: "pre-depends",
94        scope: "pre-depends",
95        is_runtime: true,
96        is_optional: false,
97    },
98    DepFieldSpec {
99        field: "recommends",
100        scope: "recommends",
101        is_runtime: true,
102        is_optional: true,
103    },
104    DepFieldSpec {
105        field: "suggests",
106        scope: "suggests",
107        is_runtime: true,
108        is_optional: true,
109    },
110    DepFieldSpec {
111        field: "breaks",
112        scope: "breaks",
113        is_runtime: false,
114        is_optional: false,
115    },
116    DepFieldSpec {
117        field: "conflicts",
118        scope: "conflicts",
119        is_runtime: false,
120        is_optional: false,
121    },
122    DepFieldSpec {
123        field: "replaces",
124        scope: "replaces",
125        is_runtime: false,
126        is_optional: false,
127    },
128    DepFieldSpec {
129        field: "provides",
130        scope: "provides",
131        is_runtime: false,
132        is_optional: false,
133    },
134    DepFieldSpec {
135        field: "build-depends",
136        scope: "build-depends",
137        is_runtime: false,
138        is_optional: false,
139    },
140    DepFieldSpec {
141        field: "build-depends-indep",
142        scope: "build-depends-indep",
143        is_runtime: false,
144        is_optional: false,
145    },
146    DepFieldSpec {
147        field: "build-conflicts",
148        scope: "build-conflicts",
149        is_runtime: false,
150        is_optional: false,
151    },
152];
153
154// ---------------------------------------------------------------------------
155// DebianControlParser: debian/control files (source + binary paragraphs)
156// ---------------------------------------------------------------------------
157
158pub struct DebianControlParser;
159
160impl PackageParser for DebianControlParser {
161    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
162
163    fn is_match(path: &Path) -> bool {
164        if let Some(name) = path.file_name()
165            && name == "control"
166            && let Some(parent) = path.parent()
167            && let Some(parent_name) = parent.file_name()
168        {
169            return parent_name == "debian";
170        }
171        false
172    }
173
174    fn extract_packages(path: &Path) -> Vec<PackageData> {
175        let content = match read_file_to_string(path) {
176            Ok(c) => c,
177            Err(e) => {
178                warn!("Failed to read debian/control at {:?}: {}", path, e);
179                return vec![default_package_data(DatasourceId::DebianControlInSource)];
180            }
181        };
182
183        let packages = parse_debian_control(&content);
184        if packages.is_empty() {
185            vec![default_package_data(DatasourceId::DebianControlInSource)]
186        } else {
187            packages
188        }
189    }
190}
191
192// ---------------------------------------------------------------------------
193// DebianInstalledParser: /var/lib/dpkg/status
194// ---------------------------------------------------------------------------
195
196pub struct DebianInstalledParser;
197
198impl PackageParser for DebianInstalledParser {
199    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
200
201    fn is_match(path: &Path) -> bool {
202        let path_str = path.to_string_lossy();
203        path_str.ends_with("var/lib/dpkg/status")
204    }
205
206    fn extract_packages(path: &Path) -> Vec<PackageData> {
207        let content = match read_file_to_string(path) {
208            Ok(c) => c,
209            Err(e) => {
210                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
211                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
212            }
213        };
214
215        let packages = parse_dpkg_status(&content);
216        if packages.is_empty() {
217            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
218        } else {
219            packages
220        }
221    }
222}
223
224pub struct DebianDistrolessInstalledParser;
225
226impl PackageParser for DebianDistrolessInstalledParser {
227    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
228
229    fn is_match(path: &Path) -> bool {
230        let path_str = path.to_string_lossy();
231        path_str.contains("var/lib/dpkg/status.d/")
232    }
233
234    fn extract_packages(path: &Path) -> Vec<PackageData> {
235        let content = match read_file_to_string(path) {
236            Ok(c) => c,
237            Err(e) => {
238                warn!("Failed to read distroless status file at {:?}: {}", path, e);
239                return vec![default_package_data(
240                    DatasourceId::DebianDistrolessInstalledDb,
241                )];
242            }
243        };
244
245        vec![parse_distroless_status(&content)]
246    }
247}
248
249fn parse_distroless_status(content: &str) -> PackageData {
250    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
251
252    if paragraphs.is_empty() {
253        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
254    }
255
256    build_package_from_paragraph(
257        &paragraphs[0],
258        None,
259        DatasourceId::DebianDistrolessInstalledDb,
260    )
261    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
262}
263
264// ---------------------------------------------------------------------------
265// Parsing logic
266// ---------------------------------------------------------------------------
267
268/// Parses a debian/control file into PackageData entries.
269///
270/// A debian/control file has a Source paragraph followed by one or more Binary
271/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
272/// into each binary package.
273fn parse_debian_control(content: &str) -> Vec<PackageData> {
274    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
275    if paragraphs.is_empty() {
276        return Vec::new();
277    }
278
279    // Determine if first paragraph is a Source paragraph
280    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
281
282    let (source_paragraph, binary_start) = if has_source {
283        (Some(&paragraphs[0]), 1)
284    } else {
285        (None, 0)
286    };
287
288    // Extract source-level shared metadata
289    let source_meta = source_paragraph.map(extract_source_meta);
290
291    let mut packages = Vec::new();
292
293    for para in &paragraphs[binary_start..] {
294        if let Some(pkg) = build_package_from_paragraph(
295            para,
296            source_meta.as_ref(),
297            DatasourceId::DebianControlInSource,
298        ) {
299            packages.push(pkg);
300        }
301    }
302
303    if packages.is_empty()
304        && let Some(source_para) = source_paragraph
305        && let Some(pkg) = build_package_from_source_paragraph(source_para)
306    {
307        packages.push(pkg);
308    }
309
310    packages
311}
312
313/// Parses a dpkg/status file into PackageData entries.
314///
315/// Each paragraph represents an installed package. Only packages with
316/// `Status: install ok installed` are included.
317fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
318    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
319    let mut packages = Vec::new();
320
321    for para in &paragraphs {
322        let status = rfc822::get_header_first(&para.headers, "status");
323        if status.as_deref() != Some("install ok installed") {
324            continue;
325        }
326
327        if let Some(pkg) =
328            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
329        {
330            packages.push(pkg);
331        }
332    }
333
334    packages
335}
336
337// ---------------------------------------------------------------------------
338// Source paragraph metadata (shared across binary packages)
339// ---------------------------------------------------------------------------
340
341struct SourceMeta {
342    parties: Vec<Party>,
343    homepage_url: Option<String>,
344    vcs_url: Option<String>,
345    code_view_url: Option<String>,
346    bug_tracking_url: Option<String>,
347}
348
349fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
350    let mut parties = Vec::new();
351
352    // Maintainer
353    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
354        let (name, email) = split_name_email(&maintainer);
355        parties.push(Party {
356            r#type: Some("person".to_string()),
357            role: Some("maintainer".to_string()),
358            name,
359            email,
360            url: None,
361            organization: None,
362            organization_url: None,
363            timezone: None,
364        });
365    }
366
367    // Original-Maintainer
368    if let Some(orig_maintainer) =
369        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
370    {
371        let (name, email) = split_name_email(&orig_maintainer);
372        parties.push(Party {
373            r#type: Some("person".to_string()),
374            role: Some("maintainer".to_string()),
375            name,
376            email,
377            url: None,
378            organization: None,
379            organization_url: None,
380            timezone: None,
381        });
382    }
383
384    // Uploaders (comma-separated)
385    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
386        for uploader in uploaders_str.split(',') {
387            let trimmed = uploader.trim();
388            if !trimmed.is_empty() {
389                let (name, email) = split_name_email(trimmed);
390                parties.push(Party {
391                    r#type: Some("person".to_string()),
392                    role: Some("uploader".to_string()),
393                    name,
394                    email,
395                    url: None,
396                    organization: None,
397                    organization_url: None,
398                    timezone: None,
399                });
400            }
401        }
402    }
403
404    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage");
405
406    // VCS-Git: may contain branch info after space
407    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
408        .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
409
410    let code_view_url = rfc822::get_header_first(&paragraph.headers, "vcs-browser");
411
412    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs");
413
414    SourceMeta {
415        parties,
416        homepage_url,
417        vcs_url,
418        code_view_url,
419        bug_tracking_url,
420    }
421}
422
423// ---------------------------------------------------------------------------
424// Package building
425// ---------------------------------------------------------------------------
426
427fn build_package_from_paragraph(
428    paragraph: &Rfc822Metadata,
429    source_meta: Option<&SourceMeta>,
430    datasource_id: DatasourceId,
431) -> Option<PackageData> {
432    let name = rfc822::get_header_first(&paragraph.headers, "package")?;
433    let version = rfc822::get_header_first(&paragraph.headers, "version");
434    let architecture = rfc822::get_header_first(&paragraph.headers, "architecture");
435    let description = rfc822::get_header_first(&paragraph.headers, "description");
436    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
437    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage");
438    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
439    let section = rfc822::get_header_first(&paragraph.headers, "section");
440    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
441    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
442
443    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
444
445    // Build parties: use source_meta parties if available, otherwise parse from paragraph
446    let parties = if let Some(meta) = source_meta {
447        meta.parties.clone()
448    } else {
449        let mut p = Vec::new();
450        if let Some(m) = &maintainer_str {
451            let (n, e) = split_name_email(m);
452            p.push(Party {
453                r#type: Some("person".to_string()),
454                role: Some("maintainer".to_string()),
455                name: n,
456                email: e,
457                url: None,
458                organization: None,
459                organization_url: None,
460                timezone: None,
461            });
462        }
463        p
464    };
465
466    // Resolve homepage: paragraph's own, or from source metadata
467    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
468    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
469    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
470    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
471
472    // Build PURL
473    let purl = build_debian_purl(
474        &name,
475        version.as_deref(),
476        namespace.as_deref(),
477        architecture.as_deref(),
478    );
479
480    // Parse dependencies from all dependency fields
481    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
482
483    // Keywords from section
484    let keywords = section.into_iter().collect();
485
486    // Source packages
487    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
488
489    // Extra data
490    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
491    if let Some(ma) = &multi_arch
492        && !ma.is_empty()
493    {
494        extra_data.insert(
495            "multi_arch".to_string(),
496            serde_json::Value::String(ma.clone()),
497        );
498    }
499    if let Some(size_str) = &installed_size
500        && let Ok(size) = size_str.parse::<u64>()
501    {
502        extra_data.insert(
503            "installed_size".to_string(),
504            serde_json::Value::Number(serde_json::Number::from(size)),
505        );
506    }
507
508    // Qualifiers for architecture
509    let qualifiers = architecture.as_ref().map(|arch| {
510        let mut q = HashMap::new();
511        q.insert("arch".to_string(), arch.clone());
512        q
513    });
514
515    Some(PackageData {
516        package_type: Some(PACKAGE_TYPE),
517        namespace: namespace.clone(),
518        name: Some(name),
519        version,
520        qualifiers,
521        subpath: None,
522        primary_language: None,
523        description,
524        release_date: None,
525        parties,
526        keywords,
527        homepage_url,
528        download_url: None,
529        size: None,
530        sha1: None,
531        md5: None,
532        sha256: None,
533        sha512: None,
534        bug_tracking_url,
535        code_view_url,
536        vcs_url,
537        copyright: None,
538        holder: None,
539        declared_license_expression: None,
540        declared_license_expression_spdx: None,
541        license_detections: Vec::new(),
542        other_license_expression: None,
543        other_license_expression_spdx: None,
544        other_license_detections: Vec::new(),
545        extracted_license_statement: None,
546        notice_text: None,
547        source_packages,
548        file_references: Vec::new(),
549        is_private: false,
550        is_virtual: false,
551        extra_data: if extra_data.is_empty() {
552            None
553        } else {
554            Some(extra_data)
555        },
556        dependencies,
557        repository_homepage_url: None,
558        repository_download_url: None,
559        api_data_url: None,
560        datasource_id: Some(datasource_id),
561        purl,
562    })
563}
564
565fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
566    let name = rfc822::get_header_first(&paragraph.headers, "source")?;
567    let version = rfc822::get_header_first(&paragraph.headers, "version");
568    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
569
570    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
571    let source_meta = extract_source_meta(paragraph);
572
573    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
574    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
575
576    let section = rfc822::get_header_first(&paragraph.headers, "section");
577    let keywords = section.into_iter().collect();
578
579    Some(PackageData {
580        package_type: Some(PACKAGE_TYPE),
581        namespace: namespace.clone(),
582        name: Some(name),
583        version,
584        qualifiers: None,
585        subpath: None,
586        primary_language: None,
587        description: None,
588        release_date: None,
589        parties: source_meta.parties,
590        keywords,
591        homepage_url: source_meta.homepage_url,
592        download_url: None,
593        size: None,
594        sha1: None,
595        md5: None,
596        sha256: None,
597        sha512: None,
598        bug_tracking_url: source_meta.bug_tracking_url,
599        code_view_url: source_meta.code_view_url,
600        vcs_url: source_meta.vcs_url,
601        copyright: None,
602        holder: None,
603        declared_license_expression: None,
604        declared_license_expression_spdx: None,
605        license_detections: Vec::new(),
606        other_license_expression: None,
607        other_license_expression_spdx: None,
608        other_license_detections: Vec::new(),
609        extracted_license_statement: None,
610        notice_text: None,
611        source_packages: Vec::new(),
612        file_references: Vec::new(),
613        is_private: false,
614        is_virtual: false,
615        extra_data: None,
616        dependencies,
617        repository_homepage_url: None,
618        repository_download_url: None,
619        api_data_url: None,
620        datasource_id: Some(DatasourceId::DebianControlInSource),
621        purl,
622    })
623}
624
625// ---------------------------------------------------------------------------
626// Namespace detection
627// ---------------------------------------------------------------------------
628
629fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
630    // Check version clues first
631    if let Some(ver) = version {
632        let ver_lower = ver.to_lowercase();
633        for clue in VERSION_CLUES_UBUNTU {
634            if ver_lower.contains(clue) {
635                return Some("ubuntu".to_string());
636            }
637        }
638        for clue in VERSION_CLUES_DEBIAN {
639            if ver_lower.contains(clue) {
640                return Some("debian".to_string());
641            }
642        }
643    }
644
645    // Check maintainer clues
646    if let Some(maint) = maintainer {
647        let maint_lower = maint.to_lowercase();
648        for clue in MAINTAINER_CLUES_UBUNTU {
649            if maint_lower.contains(clue) {
650                return Some("ubuntu".to_string());
651            }
652        }
653        for clue in MAINTAINER_CLUES_DEBIAN {
654            if maint_lower.contains(clue) {
655                return Some("debian".to_string());
656            }
657        }
658    }
659
660    // Default to debian
661    Some("debian".to_string())
662}
663
664// ---------------------------------------------------------------------------
665// PURL generation
666// ---------------------------------------------------------------------------
667
668fn build_debian_purl(
669    name: &str,
670    version: Option<&str>,
671    namespace: Option<&str>,
672    architecture: Option<&str>,
673) -> Option<String> {
674    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
675
676    if let Some(ns) = namespace {
677        purl.with_namespace(ns).ok()?;
678    }
679
680    if let Some(ver) = version {
681        purl.with_version(ver).ok()?;
682    }
683
684    if let Some(arch) = architecture {
685        purl.add_qualifier("arch", arch).ok()?;
686    }
687
688    Some(purl.to_string())
689}
690
691// ---------------------------------------------------------------------------
692// Dependency parsing
693// ---------------------------------------------------------------------------
694
695fn parse_all_dependencies(
696    headers: &HashMap<String, Vec<String>>,
697    namespace: Option<&str>,
698) -> Vec<Dependency> {
699    let mut dependencies = Vec::new();
700
701    for spec in DEP_FIELDS {
702        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
703            dependencies.extend(parse_dependency_field(
704                &dep_str,
705                spec.scope,
706                spec.is_runtime,
707                spec.is_optional,
708                namespace,
709            ));
710        }
711    }
712
713    dependencies
714}
715
716/// Parses a Debian dependency field value.
717///
718/// Debian dependencies are comma-separated, with optional version constraints
719/// in parentheses and alternative packages separated by `|`.
720///
721/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
722///
723/// Alternatives (|) are treated as separate optional dependencies.
724fn parse_dependency_field(
725    dep_str: &str,
726    scope: &str,
727    is_runtime: bool,
728    is_optional: bool,
729    namespace: Option<&str>,
730) -> Vec<Dependency> {
731    let mut deps = Vec::new();
732
733    // Regex for parsing individual dependency: name (operator version)
734    // Debian operators: <<, <=, =, >=, >>
735    let dep_re = Regex::new(
736        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
737    )
738    .unwrap();
739
740    for group in dep_str.split(',') {
741        let group = group.trim();
742        if group.is_empty() {
743            continue;
744        }
745
746        // Handle alternatives (|)
747        let alternatives: Vec<&str> = group.split('|').collect();
748        let has_alternatives = alternatives.len() > 1;
749
750        for alt in alternatives {
751            let alt = alt.trim();
752            if alt.is_empty() {
753                continue;
754            }
755
756            if let Some(caps) = dep_re.captures(alt) {
757                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
758                let operator = caps.get(2).map(|m| m.as_str().trim());
759                let version = caps.get(3).map(|m| m.as_str().trim());
760
761                if pkg_name.is_empty() {
762                    continue;
763                }
764
765                // Skip substitution variables like ${shlibs:Depends}
766                if pkg_name.starts_with('$') {
767                    continue;
768                }
769
770                let extracted_requirement = match (operator, version) {
771                    (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
772                    _ => None,
773                };
774
775                let is_pinned = operator.map(|op| op == "=");
776
777                let purl = build_debian_purl(pkg_name, None, namespace, None);
778
779                deps.push(Dependency {
780                    purl,
781                    extracted_requirement,
782                    scope: Some(scope.to_string()),
783                    is_runtime: Some(is_runtime),
784                    is_optional: Some(is_optional || has_alternatives),
785                    is_pinned,
786                    is_direct: Some(true),
787                    resolved_package: None,
788                    extra_data: None,
789                });
790            }
791        }
792    }
793
794    deps
795}
796
797// ---------------------------------------------------------------------------
798// Source field parsing
799// ---------------------------------------------------------------------------
800
801/// Parses the Source field which may contain a version in parentheses.
802///
803/// Format: `source-name` or `source-name (version)`
804fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
805    let Some(source_str) = source else {
806        return Vec::new();
807    };
808
809    let trimmed = source_str.trim();
810    if trimmed.is_empty() {
811        return Vec::new();
812    }
813
814    // Extract name and optional version from "name (version)" format
815    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
816        let name = trimmed[..paren_start].trim();
817        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
818        (
819            name,
820            if version.is_empty() {
821                None
822            } else {
823                Some(version)
824            },
825        )
826    } else {
827        (trimmed, None)
828    };
829
830    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
831        vec![purl]
832    } else {
833        Vec::new()
834    }
835}
836
837// ---------------------------------------------------------------------------
838// Parser registration macros
839// ---------------------------------------------------------------------------
840
841crate::register_parser!(
842    "Debian source package control file (debian/control)",
843    &["**/debian/control"],
844    "deb",
845    "",
846    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
847);
848
849crate::register_parser!(
850    "Debian installed package database (dpkg status)",
851    &["**/var/lib/dpkg/status"],
852    "deb",
853    "",
854    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
855);
856
857crate::register_parser!(
858    "Debian distroless package database (status.d)",
859    &["**/var/lib/dpkg/status.d/*"],
860    "deb",
861    "",
862    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
863);
864
865// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
866// but we register it for the single-package interface too.
867
868// ============================================================================
869// WAVE 2 PARSERS: Additional Debian Format Support
870// ============================================================================
871
872/// Parser for Debian Source Control (.dsc) files
873pub struct DebianDscParser;
874
875impl PackageParser for DebianDscParser {
876    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
877
878    fn is_match(path: &Path) -> bool {
879        path.extension().and_then(|e| e.to_str()) == Some("dsc")
880    }
881
882    fn extract_packages(path: &Path) -> Vec<PackageData> {
883        let content = match read_file_to_string(path) {
884            Ok(c) => c,
885            Err(e) => {
886                warn!("Failed to read .dsc file {:?}: {}", path, e);
887                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
888            }
889        };
890
891        vec![parse_dsc_content(&content)]
892    }
893}
894
895crate::register_parser!(
896    "Debian source control file (.dsc)",
897    &["**/*.dsc"],
898    "deb",
899    "",
900    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
901);
902
903fn strip_pgp_signature(content: &str) -> String {
904    let mut result = String::new();
905    let mut in_pgp_block = false;
906    let mut in_signature = false;
907
908    for line in content.lines() {
909        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
910            in_pgp_block = true;
911            continue;
912        }
913        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
914            in_signature = true;
915            continue;
916        }
917        if line.starts_with("-----END PGP SIGNATURE-----") {
918            in_signature = false;
919            continue;
920        }
921        if in_pgp_block && line.starts_with("Hash:") {
922            continue;
923        }
924        if in_pgp_block && line.is_empty() && result.is_empty() {
925            in_pgp_block = false;
926            continue;
927        }
928        if !in_signature {
929            result.push_str(line);
930            result.push('\n');
931        }
932    }
933
934    result
935}
936
937fn parse_dsc_content(content: &str) -> PackageData {
938    let clean_content = strip_pgp_signature(content);
939    let metadata = rfc822::parse_rfc822_content(&clean_content);
940    let headers = &metadata.headers;
941
942    let name = rfc822::get_header_first(headers, "source");
943    let version = rfc822::get_header_first(headers, "version");
944    let architecture = rfc822::get_header_first(headers, "architecture");
945    let namespace = Some("debian".to_string());
946
947    let mut package = PackageData {
948        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
949        package_type: Some(PACKAGE_TYPE),
950        namespace: namespace.clone(),
951        name: name.clone(),
952        version: version.clone(),
953        description: rfc822::get_header_first(headers, "description"),
954        homepage_url: rfc822::get_header_first(headers, "homepage"),
955        vcs_url: rfc822::get_header_first(headers, "vcs-git"),
956        code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
957        ..Default::default()
958    };
959
960    // Build PURL with architecture qualifier
961    if let (Some(n), Some(v)) = (&name, &version) {
962        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
963    }
964
965    // Set source_packages to point to the source itself (without version)
966    if let Some(n) = &name
967        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
968    {
969        package.source_packages.push(source_purl);
970    }
971
972    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
973        let (name_opt, email_opt) = split_name_email(&maintainer);
974        package.parties.push(Party {
975            r#type: None,
976            role: Some("maintainer".to_string()),
977            name: name_opt,
978            email: email_opt,
979            url: None,
980            organization: None,
981            organization_url: None,
982            timezone: None,
983        });
984    }
985
986    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
987        for uploader in uploaders_str.split(',') {
988            let uploader = uploader.trim();
989            if uploader.is_empty() {
990                continue;
991            }
992            let (name_opt, email_opt) = split_name_email(uploader);
993            package.parties.push(Party {
994                r#type: None,
995                role: Some("uploader".to_string()),
996                name: name_opt,
997                email: email_opt,
998                url: None,
999                organization: None,
1000                organization_url: None,
1001                timezone: None,
1002            });
1003        }
1004    }
1005
1006    // Parse Build-Depends
1007    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1008        package.dependencies.extend(parse_dependency_field(
1009            &build_deps,
1010            "build",
1011            false,
1012            false,
1013            namespace.as_deref(),
1014        ));
1015    }
1016
1017    // Store Standards-Version in extra_data
1018    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1019        let map = package.extra_data.get_or_insert_with(HashMap::new);
1020        map.insert("standards_version".to_string(), standards.into());
1021    }
1022
1023    package
1024}
1025
1026/// Parser for Debian original source tarballs (*.orig.tar.*)
1027pub struct DebianOrigTarParser;
1028
1029impl PackageParser for DebianOrigTarParser {
1030    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1031
1032    fn is_match(path: &Path) -> bool {
1033        path.file_name()
1034            .and_then(|n| n.to_str())
1035            .map(|name| name.contains(".orig.tar."))
1036            .unwrap_or(false)
1037    }
1038
1039    fn extract_packages(path: &Path) -> Vec<PackageData> {
1040        let filename = match path.file_name().and_then(|n| n.to_str()) {
1041            Some(f) => f,
1042            None => {
1043                return vec![default_package_data(
1044                    DatasourceId::DebianOriginalSourceTarball,
1045                )];
1046            }
1047        };
1048
1049        vec![parse_source_tarball_filename(
1050            filename,
1051            DatasourceId::DebianOriginalSourceTarball,
1052        )]
1053    }
1054}
1055
1056crate::register_parser!(
1057    "Debian original source tarball",
1058    &["**/*.orig.tar.*"],
1059    "deb",
1060    "",
1061    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1062);
1063
1064/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1065pub struct DebianDebianTarParser;
1066
1067impl PackageParser for DebianDebianTarParser {
1068    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1069
1070    fn is_match(path: &Path) -> bool {
1071        path.file_name()
1072            .and_then(|n| n.to_str())
1073            .map(|name| name.contains(".debian.tar."))
1074            .unwrap_or(false)
1075    }
1076
1077    fn extract_packages(path: &Path) -> Vec<PackageData> {
1078        let filename = match path.file_name().and_then(|n| n.to_str()) {
1079            Some(f) => f,
1080            None => {
1081                return vec![default_package_data(
1082                    DatasourceId::DebianSourceMetadataTarball,
1083                )];
1084            }
1085        };
1086
1087        vec![parse_source_tarball_filename(
1088            filename,
1089            DatasourceId::DebianSourceMetadataTarball,
1090        )]
1091    }
1092}
1093
1094crate::register_parser!(
1095    "Debian source metadata tarball",
1096    &["**/*.debian.tar.*"],
1097    "deb",
1098    "",
1099    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1100);
1101
1102fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1103    let without_tar_ext = filename
1104        .trim_end_matches(".gz")
1105        .trim_end_matches(".xz")
1106        .trim_end_matches(".bz2")
1107        .trim_end_matches(".tar");
1108
1109    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1110    if parts.len() < 2 {
1111        return default_package_data(datasource_id);
1112    }
1113
1114    let name = parts[0].to_string();
1115    let version_with_suffix = parts[1];
1116
1117    let version = version_with_suffix
1118        .trim_end_matches(".orig")
1119        .trim_end_matches(".debian")
1120        .to_string();
1121
1122    let namespace = Some("debian".to_string());
1123
1124    PackageData {
1125        datasource_id: Some(datasource_id),
1126        package_type: Some(PACKAGE_TYPE),
1127        namespace: namespace.clone(),
1128        name: Some(name.clone()),
1129        version: Some(version.clone()),
1130        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1131        ..Default::default()
1132    }
1133}
1134
1135/// Parser for Debian installed file lists (*.list)
1136pub struct DebianInstalledListParser;
1137
1138impl PackageParser for DebianInstalledListParser {
1139    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1140
1141    fn is_match(path: &Path) -> bool {
1142        path.extension().and_then(|e| e.to_str()) == Some("list")
1143            && path
1144                .to_str()
1145                .map(|p| p.contains("/var/lib/dpkg/info/"))
1146                .unwrap_or(false)
1147    }
1148
1149    fn extract_packages(path: &Path) -> Vec<PackageData> {
1150        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1151            Some(f) => f,
1152            None => {
1153                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1154            }
1155        };
1156
1157        let content = match read_file_to_string(path) {
1158            Ok(c) => c,
1159            Err(e) => {
1160                warn!("Failed to read .list file {:?}: {}", path, e);
1161                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1162            }
1163        };
1164
1165        vec![parse_debian_file_list(
1166            &content,
1167            filename,
1168            DatasourceId::DebianInstalledFilesList,
1169        )]
1170    }
1171}
1172
1173crate::register_parser!(
1174    "Debian installed files list",
1175    &["**/var/lib/dpkg/info/*.list"],
1176    "deb",
1177    "",
1178    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1179);
1180
1181/// Parser for Debian installed MD5 checksum files (*.md5sums)
1182pub struct DebianInstalledMd5sumsParser;
1183
1184impl PackageParser for DebianInstalledMd5sumsParser {
1185    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1186
1187    fn is_match(path: &Path) -> bool {
1188        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1189            && path
1190                .to_str()
1191                .map(|p| p.contains("/var/lib/dpkg/info/"))
1192                .unwrap_or(false)
1193    }
1194
1195    fn extract_packages(path: &Path) -> Vec<PackageData> {
1196        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1197            Some(f) => f,
1198            None => {
1199                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1200            }
1201        };
1202
1203        let content = match read_file_to_string(path) {
1204            Ok(c) => c,
1205            Err(e) => {
1206                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1207                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1208            }
1209        };
1210
1211        vec![parse_debian_file_list(
1212            &content,
1213            filename,
1214            DatasourceId::DebianInstalledMd5Sums,
1215        )]
1216    }
1217}
1218
1219crate::register_parser!(
1220    "Debian installed package md5sums",
1221    &["**/var/lib/dpkg/info/*.md5sums"],
1222    "deb",
1223    "",
1224    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1225);
1226
1227const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1228
1229fn parse_debian_file_list(
1230    content: &str,
1231    filename: &str,
1232    datasource_id: DatasourceId,
1233) -> PackageData {
1234    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1235        (Some(pkg.to_string()), Some(arch.to_string()))
1236    } else if filename == "md5sums" {
1237        (None, None)
1238    } else {
1239        (Some(filename.to_string()), None)
1240    };
1241
1242    let mut file_references = Vec::new();
1243
1244    for line in content.lines() {
1245        let line = line.trim();
1246        if line.is_empty() || line.starts_with('#') {
1247            continue;
1248        }
1249
1250        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1251            (Some(hash.trim().to_string()), p.trim())
1252        } else {
1253            (None, line)
1254        };
1255
1256        if IGNORED_ROOT_DIRS.contains(&path) {
1257            continue;
1258        }
1259
1260        file_references.push(FileReference {
1261            path: path.to_string(),
1262            size: None,
1263            sha1: None,
1264            md5: md5sum,
1265            sha256: None,
1266            sha512: None,
1267            extra_data: None,
1268        });
1269    }
1270
1271    if file_references.is_empty() {
1272        return default_package_data(datasource_id);
1273    }
1274
1275    let namespace = Some("debian".to_string());
1276    let mut package = PackageData {
1277        datasource_id: Some(datasource_id),
1278        package_type: Some(PACKAGE_TYPE),
1279        namespace: namespace.clone(),
1280        name: name.clone(),
1281        file_references,
1282        ..Default::default()
1283    };
1284
1285    if let Some(n) = &name {
1286        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1287    }
1288
1289    package
1290}
1291
1292/// Parser for Debian machine-readable copyright files (DEP-5 format)
1293pub struct DebianCopyrightParser;
1294
1295impl PackageParser for DebianCopyrightParser {
1296    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1297
1298    fn is_match(path: &Path) -> bool {
1299        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1300            if filename != "copyright" {
1301                return false;
1302            }
1303            let path_str = path.to_string_lossy();
1304            path_str.contains("/debian/")
1305                || path_str.contains("/usr/share/doc/")
1306                || path_str.ends_with("debian/copyright")
1307        } else {
1308            false
1309        }
1310    }
1311
1312    fn extract_packages(path: &Path) -> Vec<PackageData> {
1313        let content = match read_file_to_string(path) {
1314            Ok(c) => c,
1315            Err(e) => {
1316                warn!("Failed to read copyright file {:?}: {}", path, e);
1317                return vec![default_package_data(DatasourceId::DebianCopyright)];
1318            }
1319        };
1320
1321        let package_name = extract_package_name_from_path(path);
1322        vec![parse_copyright_file(&content, package_name.as_deref())]
1323    }
1324}
1325
1326crate::register_parser!(
1327    "Debian machine-readable copyright file",
1328    &["**/debian/copyright", "**/usr/share/doc/*/copyright"],
1329    "deb",
1330    "",
1331    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1332);
1333
1334fn extract_package_name_from_path(path: &Path) -> Option<String> {
1335    let components: Vec<_> = path.components().collect();
1336
1337    for (i, component) in components.iter().enumerate() {
1338        if let std::path::Component::Normal(os_str) = component
1339            && os_str.to_str() == Some("doc")
1340            && i + 1 < components.len()
1341            && let std::path::Component::Normal(next) = components[i + 1]
1342        {
1343            return next.to_str().map(|s| s.to_string());
1344        }
1345    }
1346    None
1347}
1348
1349fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1350    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1351
1352    let is_dep5 = paragraphs
1353        .first()
1354        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1355        .is_some();
1356
1357    let namespace = Some("debian".to_string());
1358    let mut parties = Vec::new();
1359    let mut license_statements = Vec::new();
1360    let mut primary_license_detection = None;
1361    let mut header_license_detection = None;
1362    let mut other_license_detections = Vec::new();
1363
1364    if is_dep5 {
1365        for para in &paragraphs {
1366            if let Some(copyright_text) =
1367                rfc822::get_header_first(&para.metadata.headers, "copyright")
1368            {
1369                for holder in parse_copyright_holders(&copyright_text) {
1370                    if !holder.is_empty() {
1371                        parties.push(Party {
1372                            r#type: None,
1373                            role: Some("copyright-holder".to_string()),
1374                            name: Some(holder),
1375                            email: None,
1376                            url: None,
1377                            organization: None,
1378                            organization_url: None,
1379                            timezone: None,
1380                        });
1381                    }
1382                }
1383            }
1384
1385            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1386                let license_name = license.lines().next().unwrap_or(&license).trim();
1387                if !license_name.is_empty()
1388                    && !license_statements.contains(&license_name.to_string())
1389                {
1390                    license_statements.push(license_name.to_string());
1391                }
1392
1393                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1394                    let detection =
1395                        build_primary_license_detection(license_name, matched_text, line_no);
1396                    let is_header_paragraph =
1397                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1398                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1399                        == Some("*")
1400                    {
1401                        primary_license_detection = Some(detection);
1402                    } else if is_header_paragraph {
1403                        header_license_detection.get_or_insert(detection);
1404                    } else {
1405                        other_license_detections.push(detection);
1406                    }
1407                }
1408            }
1409        }
1410
1411        if primary_license_detection.is_none() && header_license_detection.is_some() {
1412            primary_license_detection = header_license_detection;
1413        }
1414    } else {
1415        let copyright_block = extract_unstructured_field(content, "Copyright:");
1416        if let Some(text) = copyright_block {
1417            for holder in parse_copyright_holders(&text) {
1418                if !holder.is_empty() {
1419                    parties.push(Party {
1420                        r#type: None,
1421                        role: Some("copyright-holder".to_string()),
1422                        name: Some(holder),
1423                        email: None,
1424                        url: None,
1425                        organization: None,
1426                        organization_url: None,
1427                        timezone: None,
1428                    });
1429                }
1430            }
1431        }
1432
1433        let license_block = extract_unstructured_field(content, "License:");
1434        if let Some(text) = license_block {
1435            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1436        }
1437    }
1438
1439    let extracted_license_statement = if license_statements.is_empty() {
1440        None
1441    } else {
1442        Some(license_statements.join(" AND "))
1443    };
1444
1445    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1446    let declared_license_expression = license_detections
1447        .first()
1448        .map(|detection| detection.license_expression.clone());
1449    let declared_license_expression_spdx = license_detections
1450        .first()
1451        .map(|detection| detection.license_expression_spdx.clone());
1452    let other_license_expression = combine_license_expressions(
1453        other_license_detections
1454            .iter()
1455            .map(|detection| detection.license_expression.clone()),
1456    );
1457    let other_license_expression_spdx = combine_license_expressions(
1458        other_license_detections
1459            .iter()
1460            .map(|detection| detection.license_expression_spdx.clone()),
1461    );
1462
1463    PackageData {
1464        datasource_id: Some(DatasourceId::DebianCopyright),
1465        package_type: Some(PACKAGE_TYPE),
1466        namespace: namespace.clone(),
1467        name: package_name.map(|s| s.to_string()),
1468        parties,
1469        declared_license_expression,
1470        declared_license_expression_spdx,
1471        license_detections,
1472        other_license_expression,
1473        other_license_expression_spdx,
1474        other_license_detections,
1475        extracted_license_statement,
1476        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1477        ..Default::default()
1478    }
1479}
1480
1481#[derive(Debug)]
1482struct CopyrightParagraph {
1483    metadata: Rfc822Metadata,
1484    license_header_line: Option<(String, usize)>,
1485}
1486
1487fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1488    let mut paragraphs = Vec::new();
1489    let mut current_lines = Vec::new();
1490    let mut current_start_line = 1usize;
1491
1492    for (idx, line) in content.lines().enumerate() {
1493        let line_no = idx + 1;
1494        if line.is_empty() {
1495            if !current_lines.is_empty() {
1496                paragraphs.push(finalize_copyright_paragraph(
1497                    std::mem::take(&mut current_lines),
1498                    current_start_line,
1499                ));
1500            }
1501            current_start_line = line_no + 1;
1502        } else {
1503            if current_lines.is_empty() {
1504                current_start_line = line_no;
1505            }
1506            current_lines.push(line.to_string());
1507        }
1508    }
1509
1510    if !current_lines.is_empty() {
1511        paragraphs.push(finalize_copyright_paragraph(
1512            current_lines,
1513            current_start_line,
1514        ));
1515    }
1516
1517    paragraphs
1518}
1519
1520fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1521    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1522    let mut current_name: Option<String> = None;
1523    let mut current_value = String::new();
1524    let mut license_header_line = None;
1525
1526    for (idx, line) in raw_lines.iter().enumerate() {
1527        if line.starts_with(' ') || line.starts_with('\t') {
1528            if current_name.is_some() {
1529                current_value.push('\n');
1530                current_value.push_str(line);
1531            }
1532            continue;
1533        }
1534
1535        if let Some(name) = current_name.take() {
1536            add_copyright_header_value(&mut headers, &name, &current_value);
1537            current_value.clear();
1538        }
1539
1540        if let Some((name, value)) = line.split_once(':') {
1541            let normalized_name = name.trim().to_ascii_lowercase();
1542            if normalized_name == "license" && license_header_line.is_none() {
1543                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1544            }
1545            current_name = Some(normalized_name);
1546            current_value = value.trim_start().to_string();
1547        }
1548    }
1549
1550    if let Some(name) = current_name.take() {
1551        add_copyright_header_value(&mut headers, &name, &current_value);
1552    }
1553
1554    CopyrightParagraph {
1555        metadata: Rfc822Metadata {
1556            headers,
1557            body: String::new(),
1558        },
1559        license_header_line,
1560    }
1561}
1562
1563fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1564    let entry = headers.entry(name.to_string()).or_default();
1565    let trimmed = value.trim_end();
1566    if !trimmed.is_empty() {
1567        entry.push(trimmed.to_string());
1568    }
1569}
1570
1571fn build_primary_license_detection(
1572    license_name: &str,
1573    matched_text: String,
1574    line_no: usize,
1575) -> LicenseDetection {
1576    let normalized = normalize_debian_license_name(license_name);
1577
1578    build_declared_license_detection(
1579        &normalized,
1580        DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no),
1581    )
1582}
1583
1584fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1585    match license_name.trim() {
1586        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1587        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1588        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1589        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1590        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1591        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1592        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1593        "public-domain" => {
1594            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1595        }
1596        other => normalize_declared_license_key(other)
1597            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1598    }
1599}
1600
1601fn parse_copyright_holders(text: &str) -> Vec<String> {
1602    let mut holders = Vec::new();
1603
1604    for line in text.lines() {
1605        let line = line.trim();
1606        if line.is_empty() {
1607            continue;
1608        }
1609
1610        let cleaned = line
1611            .trim_start_matches("Copyright")
1612            .trim_start_matches("copyright")
1613            .trim_start_matches("(C)")
1614            .trim_start_matches("(c)")
1615            .trim_start_matches("©")
1616            .trim();
1617
1618        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1619            let without_years = &cleaned[year_end..];
1620            let holder = without_years
1621                .trim_start_matches(',')
1622                .trim_start_matches('-')
1623                .trim();
1624
1625            if !holder.is_empty() && holder.len() > 2 {
1626                holders.push(holder.to_string());
1627            }
1628        }
1629    }
1630
1631    holders
1632}
1633
1634fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1635    let mut in_field = false;
1636    let mut field_content = String::new();
1637
1638    for line in content.lines() {
1639        if line.starts_with(field_name) {
1640            in_field = true;
1641            field_content.push_str(line.trim_start_matches(field_name).trim());
1642            field_content.push('\n');
1643        } else if in_field {
1644            if line.starts_with(char::is_whitespace) {
1645                field_content.push_str(line.trim());
1646                field_content.push('\n');
1647            } else if !line.trim().is_empty() {
1648                break;
1649            }
1650        }
1651    }
1652
1653    let trimmed = field_content.trim();
1654    if trimmed.is_empty() {
1655        None
1656    } else {
1657        Some(trimmed.to_string())
1658    }
1659}
1660
1661/// Parser for Debian binary package archives (.deb files)
1662pub struct DebianDebParser;
1663
1664impl PackageParser for DebianDebParser {
1665    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1666
1667    fn is_match(path: &Path) -> bool {
1668        path.extension().and_then(|e| e.to_str()) == Some("deb")
1669    }
1670
1671    fn extract_packages(path: &Path) -> Vec<PackageData> {
1672        // Try to extract metadata from archive contents first
1673        if let Ok(data) = extract_deb_archive(path) {
1674            return vec![data];
1675        }
1676
1677        // Fallback to filename parsing
1678        let filename = match path.file_name().and_then(|n| n.to_str()) {
1679            Some(f) => f,
1680            None => {
1681                return vec![default_package_data(DatasourceId::DebianDeb)];
1682            }
1683        };
1684
1685        vec![parse_deb_filename(filename)]
1686    }
1687}
1688
1689crate::register_parser!(
1690    "Debian binary package archive (.deb)",
1691    &["**/*.deb"],
1692    "deb",
1693    "",
1694    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1695);
1696
1697fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1698    use flate2::read::GzDecoder;
1699    use liblzma::read::XzDecoder;
1700    use std::io::{Cursor, Read};
1701
1702    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1703
1704    let mut archive = ar::Archive::new(file);
1705    let mut package: Option<PackageData> = None;
1706
1707    while let Some(entry_result) = archive.next_entry() {
1708        let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1709
1710        let entry_name = std::str::from_utf8(entry.header().identifier())
1711            .map_err(|e| format!("Invalid entry name: {}", e))?;
1712        let entry_name = entry_name.trim().to_string();
1713
1714        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1715            let mut control_data = Vec::new();
1716            entry
1717                .read_to_end(&mut control_data)
1718                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1719
1720            if entry_name.ends_with(".gz") {
1721                let decoder = GzDecoder::new(Cursor::new(control_data));
1722                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1723                    package = Some(parsed_package);
1724                }
1725            } else if entry_name.ends_with(".xz") {
1726                let decoder = XzDecoder::new(Cursor::new(control_data));
1727                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1728                    package = Some(parsed_package);
1729                }
1730            }
1731        } else if entry_name.starts_with("data.tar") {
1732            let mut data = Vec::new();
1733            entry
1734                .read_to_end(&mut data)
1735                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1736
1737            let Some(current_package) = package.as_mut() else {
1738                continue;
1739            };
1740
1741            if entry_name.ends_with(".gz") {
1742                let decoder = GzDecoder::new(Cursor::new(data));
1743                merge_deb_data_archive(decoder, current_package)?;
1744            } else if entry_name.ends_with(".xz") {
1745                let decoder = XzDecoder::new(Cursor::new(data));
1746                merge_deb_data_archive(decoder, current_package)?;
1747            }
1748        }
1749    }
1750
1751    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1752}
1753
1754fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1755    use std::io::Read;
1756
1757    let mut tar_archive = tar::Archive::new(reader);
1758
1759    for tar_entry_result in tar_archive
1760        .entries()
1761        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1762    {
1763        let mut tar_entry =
1764            tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1765
1766        let tar_path = tar_entry
1767            .path()
1768            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1769
1770        if tar_path.ends_with("control") {
1771            let mut control_content = String::new();
1772            tar_entry
1773                .read_to_string(&mut control_content)
1774                .map_err(|e| format!("Failed to read control file: {}", e))?;
1775
1776            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1777            if paragraphs.is_empty() {
1778                return Err("No paragraphs in control file".to_string());
1779            }
1780
1781            if let Some(package) =
1782                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
1783            {
1784                return Ok(Some(package));
1785            }
1786
1787            return Err("Failed to parse control file".to_string());
1788        }
1789    }
1790
1791    Ok(None)
1792}
1793
1794fn merge_deb_data_archive<R: std::io::Read>(
1795    reader: R,
1796    package: &mut PackageData,
1797) -> Result<(), String> {
1798    use std::io::Read;
1799
1800    let mut tar_archive = tar::Archive::new(reader);
1801
1802    for tar_entry_result in tar_archive
1803        .entries()
1804        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1805    {
1806        let mut tar_entry =
1807            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1808
1809        let tar_path = tar_entry
1810            .path()
1811            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1812        let tar_path_str = tar_path.to_string_lossy();
1813
1814        if tar_path_str.ends_with(&format!(
1815            "/usr/share/doc/{}/copyright",
1816            package.name.as_deref().unwrap_or_default()
1817        )) || tar_path_str.ends_with(&format!(
1818            "usr/share/doc/{}/copyright",
1819            package.name.as_deref().unwrap_or_default()
1820        )) {
1821            let mut copyright_content = String::new();
1822            tar_entry
1823                .read_to_string(&mut copyright_content)
1824                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1825
1826            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
1827            merge_debian_copyright_into_package(package, &copyright_pkg);
1828            break;
1829        }
1830    }
1831
1832    Ok(())
1833}
1834
1835fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1836    if target.extracted_license_statement.is_none() {
1837        target.extracted_license_statement = copyright.extracted_license_statement.clone();
1838    }
1839
1840    for party in &copyright.parties {
1841        if !target.parties.iter().any(|existing| {
1842            existing.r#type == party.r#type
1843                && existing.role == party.role
1844                && existing.name == party.name
1845                && existing.email == party.email
1846                && existing.url == party.url
1847                && existing.organization == party.organization
1848                && existing.organization_url == party.organization_url
1849                && existing.timezone == party.timezone
1850        }) {
1851            target.parties.push(party.clone());
1852        }
1853    }
1854}
1855
1856fn parse_deb_filename(filename: &str) -> PackageData {
1857    let without_ext = filename.trim_end_matches(".deb");
1858
1859    let parts: Vec<&str> = without_ext.split('_').collect();
1860    if parts.len() < 2 {
1861        return default_package_data(DatasourceId::DebianDeb);
1862    }
1863
1864    let name = parts[0].to_string();
1865    let version = parts[1].to_string();
1866    let architecture = if parts.len() >= 3 {
1867        Some(parts[2].to_string())
1868    } else {
1869        None
1870    };
1871
1872    let namespace = Some("debian".to_string());
1873
1874    PackageData {
1875        datasource_id: Some(DatasourceId::DebianDeb),
1876        package_type: Some(PACKAGE_TYPE),
1877        namespace: namespace.clone(),
1878        name: Some(name.clone()),
1879        version: Some(version.clone()),
1880        purl: build_debian_purl(
1881            &name,
1882            Some(&version),
1883            namespace.as_deref(),
1884            architecture.as_deref(),
1885        ),
1886        ..Default::default()
1887    }
1888}
1889
1890/// Parser for control files inside extracted .deb control tarballs.
1891///
1892/// Matches paths like `*/control.tar.gz-extract/control` and
1893/// `*/control.tar.xz-extract/control` which are created by ExtractCode
1894/// when extracting .deb archives.
1895pub struct DebianControlInExtractedDebParser;
1896
1897impl PackageParser for DebianControlInExtractedDebParser {
1898    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1899
1900    fn is_match(path: &Path) -> bool {
1901        path.file_name()
1902            .and_then(|n| n.to_str())
1903            .is_some_and(|name| name == "control")
1904            && path
1905                .to_str()
1906                .map(|p| {
1907                    p.ends_with("control.tar.gz-extract/control")
1908                        || p.ends_with("control.tar.xz-extract/control")
1909                })
1910                .unwrap_or(false)
1911    }
1912
1913    fn extract_packages(path: &Path) -> Vec<PackageData> {
1914        let content = match read_file_to_string(path) {
1915            Ok(c) => c,
1916            Err(e) => {
1917                warn!(
1918                    "Failed to read control file in extracted deb {:?}: {}",
1919                    path, e
1920                );
1921                return vec![default_package_data(
1922                    DatasourceId::DebianControlExtractedDeb,
1923                )];
1924            }
1925        };
1926
1927        // A control file inside an extracted .deb has a single paragraph
1928        // (unlike debian/control which has source + binary paragraphs)
1929        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1930        if paragraphs.is_empty() {
1931            return vec![default_package_data(
1932                DatasourceId::DebianControlExtractedDeb,
1933            )];
1934        }
1935
1936        if let Some(pkg) = build_package_from_paragraph(
1937            &paragraphs[0],
1938            None,
1939            DatasourceId::DebianControlExtractedDeb,
1940        ) {
1941            vec![pkg]
1942        } else {
1943            vec![default_package_data(
1944                DatasourceId::DebianControlExtractedDeb,
1945            )]
1946        }
1947    }
1948}
1949
1950/// Parser for MD5 checksum files inside extracted .deb control tarballs
1951pub struct DebianMd5sumInPackageParser;
1952
1953impl PackageParser for DebianMd5sumInPackageParser {
1954    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1955
1956    fn is_match(path: &Path) -> bool {
1957        path.file_name()
1958            .and_then(|n| n.to_str())
1959            .is_some_and(|name| name == "md5sums")
1960            && path
1961                .to_str()
1962                .map(|p| {
1963                    p.ends_with("control.tar.gz-extract/md5sums")
1964                        || p.ends_with("control.tar.xz-extract/md5sums")
1965                })
1966                .unwrap_or(false)
1967    }
1968
1969    fn extract_packages(path: &Path) -> Vec<PackageData> {
1970        let content = match read_file_to_string(path) {
1971            Ok(c) => c,
1972            Err(e) => {
1973                warn!("Failed to read md5sums file {:?}: {}", path, e);
1974                return vec![default_package_data(
1975                    DatasourceId::DebianMd5SumsInExtractedDeb,
1976                )];
1977            }
1978        };
1979
1980        let package_name = extract_package_name_from_deb_path(path);
1981
1982        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
1983    }
1984}
1985
1986pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
1987    let parent = path.parent()?;
1988    let grandparent = parent.parent()?;
1989    let dirname = grandparent.file_name()?.to_str()?;
1990    let without_extract = dirname.strip_suffix("-extract")?;
1991    let without_deb = without_extract.strip_suffix(".deb")?;
1992    let name = without_deb.split('_').next()?;
1993
1994    Some(name.to_string())
1995}
1996
1997fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
1998    let mut file_references = Vec::new();
1999
2000    for line in content.lines() {
2001        let line = line.trim();
2002        if line.is_empty() || line.starts_with('#') {
2003            continue;
2004        }
2005
2006        let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find("  ") {
2007            (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
2008        } else if let Some((hash, path)) = line.split_once(' ') {
2009            (Some(hash.trim().to_string()), path.trim())
2010        } else {
2011            (None, line)
2012        };
2013
2014        if IGNORED_ROOT_DIRS.contains(&filepath) {
2015            continue;
2016        }
2017
2018        file_references.push(FileReference {
2019            path: filepath.to_string(),
2020            size: None,
2021            sha1: None,
2022            md5: md5sum,
2023            sha256: None,
2024            sha512: None,
2025            extra_data: None,
2026        });
2027    }
2028
2029    if file_references.is_empty() {
2030        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2031    }
2032
2033    let namespace = Some("debian".to_string());
2034    let mut package = PackageData {
2035        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2036        package_type: Some(PACKAGE_TYPE),
2037        namespace: namespace.clone(),
2038        name: package_name.map(|s| s.to_string()),
2039        file_references,
2040        ..Default::default()
2041    };
2042
2043    if let Some(n) = &package.name {
2044        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2045    }
2046
2047    package
2048}
2049
2050crate::register_parser!(
2051    "Debian control file in extracted .deb control tarball",
2052    &[
2053        "**/control.tar.gz-extract/control",
2054        "**/control.tar.xz-extract/control"
2055    ],
2056    "deb",
2057    "",
2058    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2059);
2060
2061crate::register_parser!(
2062    "Debian MD5 checksums in extracted .deb control tarball",
2063    &[
2064        "**/control.tar.gz-extract/md5sums",
2065        "**/control.tar.xz-extract/md5sums"
2066    ],
2067    "deb",
2068    "",
2069    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2070);
2071
2072#[cfg(test)]
2073mod tests {
2074    use super::*;
2075    use crate::models::DatasourceId;
2076    use crate::models::PackageType;
2077    use ar::{Builder as ArBuilder, Header as ArHeader};
2078    use flate2::Compression;
2079    use flate2::write::GzEncoder;
2080    use liblzma::write::XzEncoder;
2081    use std::io::Cursor;
2082    use std::path::PathBuf;
2083    use tar::{Builder as TarBuilder, Header as TarHeader};
2084    use tempfile::NamedTempFile;
2085
2086    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2087        let mut control_tar = Vec::new();
2088        {
2089            let encoder = XzEncoder::new(&mut control_tar, 6);
2090            let mut tar_builder = TarBuilder::new(encoder);
2091
2092            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2093            let mut header = TarHeader::new_gnu();
2094            header
2095                .set_path("control")
2096                .expect("control tar path should be valid");
2097            header.set_size(control_content.len() as u64);
2098            header.set_mode(0o644);
2099            header.set_cksum();
2100            tar_builder
2101                .append(&header, Cursor::new(control_content))
2102                .expect("control file should be appended to tar.xz");
2103            tar_builder.finish().expect("control tar.xz should finish");
2104        }
2105
2106        let deb = NamedTempFile::new().expect("temp deb file should be created");
2107        {
2108            let mut builder = ArBuilder::new(
2109                deb.reopen()
2110                    .expect("temporary deb file should reopen for writing"),
2111            );
2112
2113            let debian_binary = b"2.0\n";
2114            let mut debian_binary_header =
2115                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2116            debian_binary_header.set_mode(0o100644);
2117            builder
2118                .append(&debian_binary_header, Cursor::new(debian_binary))
2119                .expect("debian-binary entry should be appended");
2120
2121            let mut control_header =
2122                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2123            control_header.set_mode(0o100644);
2124            builder
2125                .append(&control_header, Cursor::new(control_tar))
2126                .expect("control.tar.xz entry should be appended");
2127        }
2128
2129        deb
2130    }
2131
2132    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2133        let mut control_tar = Vec::new();
2134        {
2135            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2136            let mut tar_builder = TarBuilder::new(encoder);
2137
2138            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2139            let mut header = TarHeader::new_gnu();
2140            header
2141                .set_path("control")
2142                .expect("control tar path should be valid");
2143            header.set_size(control_content.len() as u64);
2144            header.set_mode(0o644);
2145            header.set_cksum();
2146            tar_builder
2147                .append(&header, Cursor::new(control_content))
2148                .expect("control file should be appended to tar.gz");
2149            tar_builder.finish().expect("control tar.gz should finish");
2150        }
2151
2152        let mut data_tar = Vec::new();
2153        {
2154            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2155            let mut tar_builder = TarBuilder::new(encoder);
2156
2157            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2158            let mut header = TarHeader::new_gnu();
2159            header
2160                .set_path("./usr/share/doc/synthetic/copyright")
2161                .expect("copyright path should be valid");
2162            header.set_size(copyright.len() as u64);
2163            header.set_mode(0o644);
2164            header.set_cksum();
2165            tar_builder
2166                .append(&header, Cursor::new(copyright))
2167                .expect("copyright file should be appended to data tar");
2168            tar_builder.finish().expect("data tar.gz should finish");
2169        }
2170
2171        let deb = NamedTempFile::new().expect("temp deb file should be created");
2172        {
2173            let mut builder = ArBuilder::new(
2174                deb.reopen()
2175                    .expect("temporary deb file should reopen for writing"),
2176            );
2177
2178            let debian_binary = b"2.0\n";
2179            let mut debian_binary_header =
2180                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2181            debian_binary_header.set_mode(0o100644);
2182            builder
2183                .append(&debian_binary_header, Cursor::new(debian_binary))
2184                .expect("debian-binary entry should be appended");
2185
2186            let mut control_header =
2187                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2188            control_header.set_mode(0o100644);
2189            builder
2190                .append(&control_header, Cursor::new(control_tar))
2191                .expect("control.tar.gz entry should be appended");
2192
2193            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2194            data_header.set_mode(0o100644);
2195            builder
2196                .append(&data_header, Cursor::new(data_tar))
2197                .expect("data.tar.gz entry should be appended");
2198        }
2199
2200        deb
2201    }
2202
2203    // ====== Namespace detection ======
2204
2205    #[test]
2206    fn test_detect_namespace_from_ubuntu_version() {
2207        assert_eq!(
2208            detect_namespace(Some("1.0-1ubuntu1"), None),
2209            Some("ubuntu".to_string())
2210        );
2211    }
2212
2213    #[test]
2214    fn test_detect_namespace_from_debian_version() {
2215        assert_eq!(
2216            detect_namespace(Some("1.0-1+deb11u1"), None),
2217            Some("debian".to_string())
2218        );
2219    }
2220
2221    #[test]
2222    fn test_detect_namespace_from_ubuntu_maintainer() {
2223        assert_eq!(
2224            detect_namespace(
2225                None,
2226                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2227            ),
2228            Some("ubuntu".to_string())
2229        );
2230    }
2231
2232    #[test]
2233    fn test_detect_namespace_from_debian_maintainer() {
2234        assert_eq!(
2235            detect_namespace(None, Some("John Doe <john@debian.org>")),
2236            Some("debian".to_string())
2237        );
2238    }
2239
2240    #[test]
2241    fn test_detect_namespace_default() {
2242        assert_eq!(
2243            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2244            Some("debian".to_string())
2245        );
2246    }
2247
2248    #[test]
2249    fn test_detect_namespace_version_takes_priority() {
2250        // Version clue should be checked before maintainer
2251        assert_eq!(
2252            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2253            Some("ubuntu".to_string())
2254        );
2255    }
2256
2257    // ====== PURL generation ======
2258
2259    #[test]
2260    fn test_build_purl_basic() {
2261        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2262        assert_eq!(
2263            purl,
2264            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2265        );
2266    }
2267
2268    #[test]
2269    fn test_build_purl_no_version() {
2270        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2271        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2272    }
2273
2274    #[test]
2275    fn test_build_purl_no_arch() {
2276        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2277        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2278    }
2279
2280    #[test]
2281    fn test_build_purl_no_namespace() {
2282        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2283        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2284    }
2285
2286    // ====== Dependency parsing ======
2287
2288    #[test]
2289    fn test_parse_simple_dependency() {
2290        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2291        assert_eq!(deps.len(), 1);
2292        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2293        assert_eq!(deps[0].extracted_requirement, None);
2294        assert_eq!(deps[0].scope, Some("depends".to_string()));
2295    }
2296
2297    #[test]
2298    fn test_parse_dependency_with_version() {
2299        let deps =
2300            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2301        assert_eq!(deps.len(), 1);
2302        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2303        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2304    }
2305
2306    #[test]
2307    fn test_parse_dependency_exact_version() {
2308        let deps = parse_dependency_field(
2309            "libc6 (= 2.31-13+deb11u5)",
2310            "depends",
2311            true,
2312            false,
2313            Some("debian"),
2314        );
2315        assert_eq!(deps.len(), 1);
2316        assert_eq!(deps[0].is_pinned, Some(true));
2317    }
2318
2319    #[test]
2320    fn test_parse_dependency_strict_less() {
2321        let deps =
2322            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2323        assert_eq!(deps.len(), 1);
2324        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2325        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2326    }
2327
2328    #[test]
2329    fn test_parse_multiple_dependencies() {
2330        let deps = parse_dependency_field(
2331            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2332            "depends",
2333            true,
2334            false,
2335            Some("debian"),
2336        );
2337        assert_eq!(deps.len(), 3);
2338    }
2339
2340    #[test]
2341    fn test_parse_dependency_alternatives() {
2342        let deps = parse_dependency_field(
2343            "libssl1.1 | libssl3",
2344            "depends",
2345            true,
2346            false,
2347            Some("debian"),
2348        );
2349        assert_eq!(deps.len(), 2);
2350        // Alternatives are marked as optional
2351        assert_eq!(deps[0].is_optional, Some(true));
2352        assert_eq!(deps[1].is_optional, Some(true));
2353    }
2354
2355    #[test]
2356    fn test_parse_dependency_skips_substitutions() {
2357        let deps = parse_dependency_field(
2358            "${shlibs:Depends}, ${misc:Depends}, libc6",
2359            "depends",
2360            true,
2361            false,
2362            Some("debian"),
2363        );
2364        assert_eq!(deps.len(), 1);
2365        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2366    }
2367
2368    #[test]
2369    fn test_parse_dependency_with_arch_qualifier() {
2370        // Dependencies can have [arch] qualifiers which we ignore
2371        let deps = parse_dependency_field(
2372            "libc6 (>= 2.17) [amd64]",
2373            "depends",
2374            true,
2375            false,
2376            Some("debian"),
2377        );
2378        assert_eq!(deps.len(), 1);
2379        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2380    }
2381
2382    #[test]
2383    fn test_parse_empty_dependency() {
2384        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2385        assert!(deps.is_empty());
2386    }
2387
2388    // ====== Source field parsing ======
2389
2390    #[test]
2391    fn test_parse_source_field_name_only() {
2392        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2393        assert_eq!(sources.len(), 1);
2394        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2395    }
2396
2397    #[test]
2398    fn test_parse_source_field_with_version() {
2399        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2400        assert_eq!(sources.len(), 1);
2401        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2402    }
2403
2404    #[test]
2405    fn test_parse_source_field_empty() {
2406        let sources = parse_source_field(None, Some("debian"));
2407        assert!(sources.is_empty());
2408    }
2409
2410    // ====== Control file parsing ======
2411
2412    #[test]
2413    fn test_parse_debian_control_source_and_binary() {
2414        let content = "\
2415Source: curl
2416Section: web
2417Priority: optional
2418Maintainer: Alessandro Ghedini <ghedo@debian.org>
2419Homepage: https://curl.se/
2420Vcs-Browser: https://salsa.debian.org/debian/curl
2421Vcs-Git: https://salsa.debian.org/debian/curl.git
2422Build-Depends: debhelper (>= 12), libssl-dev
2423
2424Package: curl
2425Architecture: amd64
2426Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2427Description: command line tool for transferring data with URL syntax";
2428
2429        let packages = parse_debian_control(content);
2430        assert_eq!(packages.len(), 1);
2431
2432        let pkg = &packages[0];
2433        assert_eq!(pkg.name, Some("curl".to_string()));
2434        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2435        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2436        assert_eq!(
2437            pkg.vcs_url,
2438            Some("https://salsa.debian.org/debian/curl.git".to_string())
2439        );
2440        assert_eq!(
2441            pkg.code_view_url,
2442            Some("https://salsa.debian.org/debian/curl".to_string())
2443        );
2444
2445        // Maintainer from source paragraph
2446        assert_eq!(pkg.parties.len(), 1);
2447        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2448        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2449        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2450
2451        // Dependencies parsed
2452        assert!(!pkg.dependencies.is_empty());
2453    }
2454
2455    #[test]
2456    fn test_parse_debian_control_multiple_binary() {
2457        let content = "\
2458Source: gzip
2459Maintainer: Debian Developer <dev@debian.org>
2460
2461Package: gzip
2462Architecture: any
2463Depends: libc6 (>= 2.17)
2464Description: GNU file compression
2465
2466Package: gzip-win32
2467Architecture: all
2468Description: gzip for Windows";
2469
2470        let packages = parse_debian_control(content);
2471        assert_eq!(packages.len(), 2);
2472        assert_eq!(packages[0].name, Some("gzip".to_string()));
2473        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2474
2475        // Both inherit source maintainer
2476        assert_eq!(packages[0].parties.len(), 1);
2477        assert_eq!(packages[1].parties.len(), 1);
2478    }
2479
2480    #[test]
2481    fn test_parse_debian_control_source_only() {
2482        let content = "\
2483Source: my-package
2484Maintainer: Test User <test@debian.org>
2485Build-Depends: debhelper (>= 13)";
2486
2487        let packages = parse_debian_control(content);
2488        assert_eq!(packages.len(), 1);
2489        assert_eq!(packages[0].name, Some("my-package".to_string()));
2490        // Build-Depends parsed
2491        assert!(!packages[0].dependencies.is_empty());
2492        assert_eq!(
2493            packages[0].dependencies[0].scope,
2494            Some("build-depends".to_string())
2495        );
2496    }
2497
2498    #[test]
2499    fn test_parse_debian_control_with_uploaders() {
2500        let content = "\
2501Source: example
2502Maintainer: Main Dev <main@debian.org>
2503Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2504
2505Package: example
2506Architecture: any
2507Description: test package";
2508
2509        let packages = parse_debian_control(content);
2510        assert_eq!(packages.len(), 1);
2511        // 1 maintainer + 2 uploaders
2512        assert_eq!(packages[0].parties.len(), 3);
2513        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2514        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2515        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2516    }
2517
2518    #[test]
2519    fn test_parse_debian_control_vcs_git_with_branch() {
2520        let content = "\
2521Source: example
2522Maintainer: Dev <dev@debian.org>
2523Vcs-Git: https://salsa.debian.org/example.git -b main
2524
2525Package: example
2526Architecture: any
2527Description: test";
2528
2529        let packages = parse_debian_control(content);
2530        assert_eq!(packages.len(), 1);
2531        // Should only take the URL, not the branch
2532        assert_eq!(
2533            packages[0].vcs_url,
2534            Some("https://salsa.debian.org/example.git".to_string())
2535        );
2536    }
2537
2538    #[test]
2539    fn test_parse_debian_control_multi_arch() {
2540        let content = "\
2541Source: example
2542Maintainer: Dev <dev@debian.org>
2543
2544Package: libexample
2545Architecture: any
2546Multi-Arch: same
2547Description: shared library";
2548
2549        let packages = parse_debian_control(content);
2550        assert_eq!(packages.len(), 1);
2551        let extra = packages[0].extra_data.as_ref().unwrap();
2552        assert_eq!(
2553            extra.get("multi_arch"),
2554            Some(&serde_json::Value::String("same".to_string()))
2555        );
2556    }
2557
2558    // ====== dpkg/status parsing ======
2559
2560    #[test]
2561    fn test_parse_dpkg_status_basic() {
2562        let content = "\
2563Package: base-files
2564Status: install ok installed
2565Priority: required
2566Section: admin
2567Installed-Size: 391
2568Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2569Architecture: amd64
2570Version: 11ubuntu5.6
2571Description: Debian base system miscellaneous files
2572Homepage: https://tracker.debian.org/pkg/base-files
2573
2574Package: not-installed
2575Status: deinstall ok config-files
2576Architecture: amd64
2577Version: 1.0
2578Description: This should be skipped";
2579
2580        let packages = parse_dpkg_status(content);
2581        assert_eq!(packages.len(), 1);
2582
2583        let pkg = &packages[0];
2584        assert_eq!(pkg.name, Some("base-files".to_string()));
2585        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2586        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2587        assert_eq!(
2588            pkg.datasource_id,
2589            Some(DatasourceId::DebianInstalledStatusDb)
2590        );
2591
2592        // Installed-Size in extra_data
2593        let extra = pkg.extra_data.as_ref().unwrap();
2594        assert_eq!(
2595            extra.get("installed_size"),
2596            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2597        );
2598    }
2599
2600    #[test]
2601    fn test_parse_dpkg_status_multiple_installed() {
2602        let content = "\
2603Package: libc6
2604Status: install ok installed
2605Architecture: amd64
2606Version: 2.31-13+deb11u5
2607Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2608Description: GNU C Library
2609
2610Package: zlib1g
2611Status: install ok installed
2612Architecture: amd64
2613Version: 1:1.2.11.dfsg-2+deb11u2
2614Maintainer: Mark Brown <broonie@debian.org>
2615Description: compression library";
2616
2617        let packages = parse_dpkg_status(content);
2618        assert_eq!(packages.len(), 2);
2619        assert_eq!(packages[0].name, Some("libc6".to_string()));
2620        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2621    }
2622
2623    #[test]
2624    fn test_parse_dpkg_status_with_dependencies() {
2625        let content = "\
2626Package: curl
2627Status: install ok installed
2628Architecture: amd64
2629Version: 7.74.0-1.3+deb11u7
2630Maintainer: Alessandro Ghedini <ghedo@debian.org>
2631Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2632Recommends: ca-certificates
2633Description: command line tool for transferring data with URL syntax";
2634
2635        let packages = parse_dpkg_status(content);
2636        assert_eq!(packages.len(), 1);
2637
2638        let deps = &packages[0].dependencies;
2639        // 2 from Depends + 1 from Recommends
2640        assert_eq!(deps.len(), 3);
2641
2642        // Check first dependency
2643        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2644        assert_eq!(deps[0].scope, Some("depends".to_string()));
2645        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2646
2647        // Check recommends
2648        assert_eq!(
2649            deps[2].purl,
2650            Some("pkg:deb/debian/ca-certificates".to_string())
2651        );
2652        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2653        assert_eq!(deps[2].is_optional, Some(true));
2654    }
2655
2656    #[test]
2657    fn test_parse_dpkg_status_with_source() {
2658        let content = "\
2659Package: libncurses6
2660Status: install ok installed
2661Architecture: amd64
2662Source: ncurses (6.2+20201114-2+deb11u1)
2663Version: 6.2+20201114-2+deb11u1
2664Maintainer: Craig Small <csmall@debian.org>
2665Description: shared libraries for terminal handling";
2666
2667        let packages = parse_dpkg_status(content);
2668        assert_eq!(packages.len(), 1);
2669        assert!(!packages[0].source_packages.is_empty());
2670        // Source PURL should include version from parentheses
2671        assert!(packages[0].source_packages[0].contains("ncurses"));
2672    }
2673
2674    #[test]
2675    fn test_parse_dpkg_status_filters_not_installed() {
2676        let content = "\
2677Package: installed-pkg
2678Status: install ok installed
2679Version: 1.0
2680Architecture: amd64
2681Description: installed
2682
2683Package: half-installed
2684Status: install ok half-installed
2685Version: 2.0
2686Architecture: amd64
2687Description: half installed
2688
2689Package: deinstall-pkg
2690Status: deinstall ok config-files
2691Version: 3.0
2692Architecture: amd64
2693Description: deinstalled
2694
2695Package: purge-pkg
2696Status: purge ok not-installed
2697Version: 4.0
2698Architecture: amd64
2699Description: purged";
2700
2701        let packages = parse_dpkg_status(content);
2702        assert_eq!(packages.len(), 1);
2703        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2704    }
2705
2706    #[test]
2707    fn test_parse_dpkg_status_empty() {
2708        let packages = parse_dpkg_status("");
2709        assert!(packages.is_empty());
2710    }
2711
2712    // ====== is_match tests ======
2713
2714    #[test]
2715    fn test_debian_control_is_match() {
2716        assert!(DebianControlParser::is_match(Path::new(
2717            "/path/to/debian/control"
2718        )));
2719        assert!(DebianControlParser::is_match(Path::new("debian/control")));
2720        assert!(!DebianControlParser::is_match(Path::new(
2721            "/path/to/control"
2722        )));
2723        assert!(!DebianControlParser::is_match(Path::new(
2724            "/path/to/debian/changelog"
2725        )));
2726    }
2727
2728    #[test]
2729    fn test_debian_installed_is_match() {
2730        assert!(DebianInstalledParser::is_match(Path::new(
2731            "/var/lib/dpkg/status"
2732        )));
2733        assert!(DebianInstalledParser::is_match(Path::new(
2734            "some/root/var/lib/dpkg/status"
2735        )));
2736        assert!(!DebianInstalledParser::is_match(Path::new(
2737            "/var/lib/dpkg/status.d/something"
2738        )));
2739        assert!(!DebianInstalledParser::is_match(Path::new(
2740            "/var/lib/dpkg/available"
2741        )));
2742    }
2743
2744    // ====== Edge cases ======
2745
2746    #[test]
2747    fn test_parse_debian_control_empty_input() {
2748        let packages = parse_debian_control("");
2749        assert!(packages.is_empty());
2750    }
2751
2752    #[test]
2753    fn test_parse_debian_control_malformed_input() {
2754        let content = "this is not a valid control file\nwith random text";
2755        let packages = parse_debian_control(content);
2756        // Should not panic, may return empty or partial results
2757        assert!(packages.is_empty());
2758    }
2759
2760    #[test]
2761    fn test_dependency_with_epoch_version() {
2762        // Debian versions can have epochs like 1:2.3.4
2763        let deps = parse_dependency_field(
2764            "zlib1g (>= 1:1.2.11)",
2765            "depends",
2766            true,
2767            false,
2768            Some("debian"),
2769        );
2770        assert_eq!(deps.len(), 1);
2771        assert_eq!(
2772            deps[0].extracted_requirement,
2773            Some(">= 1:1.2.11".to_string())
2774        );
2775    }
2776
2777    #[test]
2778    fn test_dependency_with_plus_in_name() {
2779        let deps =
2780            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2781        assert_eq!(deps.len(), 1);
2782        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2783    }
2784
2785    #[test]
2786    fn test_dsc_parser_is_match() {
2787        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2788        assert!(DebianDscParser::is_match(&PathBuf::from(
2789            "adduser_3.118+deb11u1.dsc"
2790        )));
2791        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2792        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2793    }
2794
2795    #[test]
2796    fn test_dsc_parser_adduser() {
2797        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2798        let package = DebianDscParser::extract_first_package(&path);
2799
2800        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2801        assert_eq!(package.namespace, Some("debian".to_string()));
2802        assert_eq!(package.name, Some("adduser".to_string()));
2803        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2804        assert_eq!(
2805            package.purl,
2806            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2807        );
2808        assert_eq!(
2809            package.vcs_url,
2810            Some("https://salsa.debian.org/debian/adduser.git".to_string())
2811        );
2812        assert_eq!(
2813            package.code_view_url,
2814            Some("https://salsa.debian.org/debian/adduser".to_string())
2815        );
2816        assert_eq!(
2817            package.datasource_id,
2818            Some(DatasourceId::DebianSourceControlDsc)
2819        );
2820
2821        assert_eq!(package.parties.len(), 2);
2822        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2823        assert_eq!(
2824            package.parties[0].name,
2825            Some("Debian Adduser Developers".to_string())
2826        );
2827        assert_eq!(
2828            package.parties[0].email,
2829            Some("adduser@packages.debian.org".to_string())
2830        );
2831        assert_eq!(package.parties[0].r#type, None);
2832
2833        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2834        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2835        assert_eq!(
2836            package.parties[1].email,
2837            Some("mh+debian-packages@zugschlus.de".to_string())
2838        );
2839        assert_eq!(package.parties[1].r#type, None);
2840
2841        assert_eq!(package.source_packages.len(), 1);
2842        assert_eq!(
2843            package.source_packages[0],
2844            "pkg:deb/debian/adduser".to_string()
2845        );
2846
2847        assert!(!package.dependencies.is_empty());
2848        let build_dep_names: Vec<String> = package
2849            .dependencies
2850            .iter()
2851            .filter_map(|d| d.purl.as_ref())
2852            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2853            .map(|p| p.to_string())
2854            .collect();
2855        assert!(build_dep_names.len() >= 2);
2856    }
2857
2858    #[test]
2859    fn test_dsc_parser_zsh() {
2860        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2861        let package = DebianDscParser::extract_first_package(&path);
2862
2863        assert_eq!(package.name, Some("zsh".to_string()));
2864        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2865        assert_eq!(package.namespace, Some("debian".to_string()));
2866        assert!(package.purl.is_some());
2867        assert!(package.purl.as_ref().unwrap().contains("zsh"));
2868        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2869    }
2870
2871    #[test]
2872    fn test_parse_dsc_content_basic() {
2873        let content = "Format: 3.0 (native)
2874Source: testpkg
2875Binary: testpkg
2876Architecture: amd64
2877Version: 1.0.0
2878Maintainer: Test User <test@example.com>
2879Standards-Version: 4.5.0
2880Build-Depends: debhelper (>= 12)
2881Files:
2882 abc123 1024 testpkg_1.0.0.tar.xz
2883";
2884
2885        let package = parse_dsc_content(content);
2886        assert_eq!(package.name, Some("testpkg".to_string()));
2887        assert_eq!(package.version, Some("1.0.0".to_string()));
2888        assert_eq!(package.namespace, Some("debian".to_string()));
2889        assert_eq!(package.parties.len(), 1);
2890        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2891        assert_eq!(
2892            package.parties[0].email,
2893            Some("test@example.com".to_string())
2894        );
2895        assert_eq!(package.dependencies.len(), 1);
2896        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2897    }
2898
2899    #[test]
2900    fn test_parse_dsc_content_with_uploaders() {
2901        let content = "Source: mypkg
2902Version: 2.0
2903Architecture: all
2904Maintainer: Main Dev <main@example.com>
2905Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2906";
2907
2908        let package = parse_dsc_content(content);
2909        assert_eq!(package.parties.len(), 3);
2910        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2911        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2912        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2913    }
2914
2915    #[test]
2916    fn test_orig_tar_parser_is_match() {
2917        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2918            "package_1.0.orig.tar.gz"
2919        )));
2920        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2921            "abseil_0~20200923.3.orig.tar.xz"
2922        )));
2923        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2924            "package.debian.tar.gz"
2925        )));
2926        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2927    }
2928
2929    #[test]
2930    fn test_debian_tar_parser_is_match() {
2931        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2932            "package_1.0-1.debian.tar.xz"
2933        )));
2934        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2935            "abseil_20220623.1-1.debian.tar.gz"
2936        )));
2937        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2938            "package.orig.tar.gz"
2939        )));
2940        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2941    }
2942
2943    #[test]
2944    fn test_parse_orig_tar_filename() {
2945        let pkg = parse_source_tarball_filename(
2946            "abseil_0~20200923.3.orig.tar.gz",
2947            DatasourceId::DebianOriginalSourceTarball,
2948        );
2949        assert_eq!(pkg.name, Some("abseil".to_string()));
2950        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2951        assert_eq!(pkg.namespace, Some("debian".to_string()));
2952        assert_eq!(
2953            pkg.purl,
2954            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2955        );
2956        assert_eq!(
2957            pkg.datasource_id,
2958            Some(DatasourceId::DebianOriginalSourceTarball)
2959        );
2960    }
2961
2962    #[test]
2963    fn test_parse_debian_tar_filename() {
2964        let pkg = parse_source_tarball_filename(
2965            "abseil_20220623.1-1.debian.tar.xz",
2966            DatasourceId::DebianSourceMetadataTarball,
2967        );
2968        assert_eq!(pkg.name, Some("abseil".to_string()));
2969        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2970        assert_eq!(pkg.namespace, Some("debian".to_string()));
2971        assert_eq!(
2972            pkg.purl,
2973            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2974        );
2975    }
2976
2977    #[test]
2978    fn test_parse_deb_filename() {
2979        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2980        assert_eq!(pkg.name, Some("nginx".to_string()));
2981        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
2982
2983        let pkg = parse_deb_filename("invalid.deb");
2984        assert!(pkg.name.is_none());
2985        assert!(pkg.version.is_none());
2986    }
2987
2988    #[test]
2989    fn test_parse_source_tarball_various_compressions() {
2990        let pkg_gz = parse_source_tarball_filename(
2991            "test_1.0.orig.tar.gz",
2992            DatasourceId::DebianOriginalSourceTarball,
2993        );
2994        let pkg_xz = parse_source_tarball_filename(
2995            "test_1.0.orig.tar.xz",
2996            DatasourceId::DebianOriginalSourceTarball,
2997        );
2998        let pkg_bz2 = parse_source_tarball_filename(
2999            "test_1.0.orig.tar.bz2",
3000            DatasourceId::DebianOriginalSourceTarball,
3001        );
3002
3003        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3004        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3005        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3006    }
3007
3008    #[test]
3009    fn test_parse_source_tarball_invalid_format() {
3010        let pkg = parse_source_tarball_filename(
3011            "invalid-no-underscore.tar.gz",
3012            DatasourceId::DebianOriginalSourceTarball,
3013        );
3014        assert!(pkg.name.is_none());
3015        assert!(pkg.version.is_none());
3016    }
3017
3018    #[test]
3019    fn test_list_parser_is_match() {
3020        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3021            "/var/lib/dpkg/info/bash.list"
3022        )));
3023        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3024            "/var/lib/dpkg/info/package:amd64.list"
3025        )));
3026        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3027            "bash.list"
3028        )));
3029        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3030            "/var/lib/dpkg/info/bash.md5sums"
3031        )));
3032    }
3033
3034    #[test]
3035    fn test_md5sums_parser_is_match() {
3036        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3037            "/var/lib/dpkg/info/bash.md5sums"
3038        )));
3039        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3040            "/var/lib/dpkg/info/package:amd64.md5sums"
3041        )));
3042        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3043            "bash.md5sums"
3044        )));
3045        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3046            "/var/lib/dpkg/info/bash.list"
3047        )));
3048    }
3049
3050    #[test]
3051    fn test_parse_debian_file_list_plain_list() {
3052        let content = "/.
3053/bin
3054/bin/bash
3055/usr/bin/bashbug
3056/usr/share/doc/bash/README
3057";
3058        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3059        assert_eq!(pkg.name, Some("bash".to_string()));
3060        assert_eq!(pkg.file_references.len(), 3);
3061        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3062        assert_eq!(pkg.file_references[0].md5, None);
3063        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3064        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3065    }
3066
3067    #[test]
3068    fn test_parse_debian_file_list_md5sums() {
3069        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
30701c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3071f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3072";
3073        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3074        assert_eq!(pkg.name, Some("bash".to_string()));
3075        assert_eq!(pkg.file_references.len(), 3);
3076        assert_eq!(pkg.file_references[0].path, "bin/bash");
3077        assert_eq!(
3078            pkg.file_references[0].md5,
3079            Some("77506afebd3b7e19e937a678a185b62e".to_string())
3080        );
3081        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3082        assert_eq!(
3083            pkg.file_references[1].md5,
3084            Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3085        );
3086    }
3087
3088    #[test]
3089    fn test_parse_debian_file_list_with_arch() {
3090        let content = "/usr/bin/foo
3091/usr/lib/x86_64-linux-gnu/libfoo.so
3092";
3093        let pkg = parse_debian_file_list(
3094            content,
3095            "libfoo:amd64",
3096            DatasourceId::DebianInstalledFilesList,
3097        );
3098        assert_eq!(pkg.name, Some("libfoo".to_string()));
3099        assert!(pkg.purl.is_some());
3100        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3101        assert_eq!(pkg.file_references.len(), 2);
3102    }
3103
3104    #[test]
3105    fn test_parse_debian_file_list_skips_comments_and_empty() {
3106        let content = "# This is a comment
3107/bin/bash
3108
3109/usr/bin/bashbug
3110  
3111";
3112        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3113        assert_eq!(pkg.file_references.len(), 2);
3114    }
3115
3116    #[test]
3117    fn test_parse_debian_file_list_md5sums_only() {
3118        let content = "abc123  usr/bin/tool
3119";
3120        let pkg =
3121            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3122        assert_eq!(pkg.name, None);
3123        assert_eq!(pkg.file_references.len(), 1);
3124    }
3125
3126    #[test]
3127    fn test_parse_debian_file_list_ignores_root_dirs() {
3128        let content = "/.
3129/bin
3130/bin/bash
3131/etc
3132/usr
3133/var
3134";
3135        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3136        assert_eq!(pkg.file_references.len(), 1);
3137        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3138    }
3139
3140    #[test]
3141    fn test_copyright_parser_is_match() {
3142        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3143            "/usr/share/doc/bash/copyright"
3144        )));
3145        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3146            "debian/copyright"
3147        )));
3148        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3149            "copyright.txt"
3150        )));
3151        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3152            "/etc/copyright"
3153        )));
3154    }
3155
3156    #[test]
3157    fn test_extract_package_name_from_path() {
3158        assert_eq!(
3159            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3160            Some("bash".to_string())
3161        );
3162        assert_eq!(
3163            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3164            Some("libseccomp2".to_string())
3165        );
3166        assert_eq!(
3167            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3168            None
3169        );
3170    }
3171
3172    #[test]
3173    fn test_parse_copyright_dep5_format() {
3174        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3175Upstream-Name: libseccomp
3176Source: https://sourceforge.net/projects/libseccomp/
3177
3178Files: *
3179Copyright: 2012 Paul Moore <pmoore@redhat.com>
3180 2012 Ashley Lai <adlai@us.ibm.com>
3181License: LGPL-2.1
3182
3183License: LGPL-2.1
3184 This library is free software
3185";
3186        let pkg = parse_copyright_file(content, Some("libseccomp"));
3187        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3188        assert_eq!(pkg.namespace, Some("debian".to_string()));
3189        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3190        assert_eq!(
3191            pkg.extracted_license_statement,
3192            Some("LGPL-2.1".to_string())
3193        );
3194        assert!(pkg.parties.len() >= 2);
3195        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3196        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3197    }
3198
3199    #[test]
3200    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3201        let path = PathBuf::from(
3202            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3203        );
3204        let pkg = DebianCopyrightParser::extract_first_package(&path);
3205
3206        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3207        let extracted = pkg
3208            .extracted_license_statement
3209            .as_deref()
3210            .expect("license statement should exist");
3211        assert!(extracted.contains("GPL-2+"));
3212        assert!(!pkg.license_detections.is_empty());
3213
3214        let primary = &pkg.license_detections[0];
3215        assert_eq!(
3216            primary.matches[0].matched_text.as_deref(),
3217            Some("License: GPL-2+")
3218        );
3219        assert_eq!(primary.matches[0].start_line, 47);
3220        assert_eq!(primary.matches[0].end_line, 47);
3221    }
3222
3223    #[test]
3224    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3225        let path = PathBuf::from("testdata/debian/copyright/copyright");
3226        let pkg = DebianCopyrightParser::extract_first_package(&path);
3227
3228        assert_eq!(pkg.license_detections.len(), 1);
3229        assert_eq!(pkg.other_license_detections.len(), 4);
3230
3231        let primary = &pkg.license_detections[0];
3232        assert_eq!(
3233            primary.matches[0].matched_text.as_deref(),
3234            Some("License: LGPL-2.1")
3235        );
3236        assert_eq!(primary.matches[0].start_line, 11);
3237
3238        let ordered_lines: Vec<usize> = pkg
3239            .other_license_detections
3240            .iter()
3241            .map(|detection| detection.matches[0].start_line)
3242            .collect();
3243        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3244
3245        let ordered_texts: Vec<&str> = pkg
3246            .other_license_detections
3247            .iter()
3248            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3249            .collect();
3250        assert_eq!(
3251            ordered_texts,
3252            vec![
3253                "License: LGPL-2.1",
3254                "License: LGPL-2.1",
3255                "License: LGPL-2.1",
3256                "License: LGPL-2.1",
3257            ]
3258        );
3259    }
3260
3261    #[test]
3262    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3263        let path = PathBuf::from(
3264            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3265        );
3266        let pkg = DebianCopyrightParser::extract_first_package(&path);
3267
3268        let zlib = pkg
3269            .other_license_detections
3270            .iter()
3271            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3272            .expect("at least one Zlib license paragraph should be detected");
3273        assert_eq!(
3274            zlib.matches[0].matched_text.as_deref(),
3275            Some("License: Zlib")
3276        );
3277
3278        let last_zlib = pkg
3279            .other_license_detections
3280            .iter()
3281            .rev()
3282            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3283            .expect("bottom standalone Zlib license paragraph should be detected");
3284        assert_eq!(last_zlib.matches[0].start_line, 732);
3285        assert_eq!(last_zlib.matches[0].end_line, 732);
3286    }
3287
3288    #[test]
3289    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3290        let path =
3291            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3292        let pkg = DebianCopyrightParser::extract_first_package(&path);
3293
3294        assert_eq!(pkg.license_detections.len(), 1);
3295        let primary = &pkg.license_detections[0];
3296        assert_eq!(
3297            primary.matches[0].matched_text.as_deref(),
3298            Some("License: LGPL-3+ or GPL-2+")
3299        );
3300        assert_eq!(primary.matches[0].start_line, 8);
3301        assert_eq!(primary.matches[0].end_line, 8);
3302
3303        assert!(pkg.other_license_detections.iter().any(|detection| {
3304            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3305        }));
3306    }
3307
3308    #[test]
3309    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3310        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3311        let pkg = parse_copyright_file(content, Some("foo"));
3312
3313        assert_eq!(pkg.license_detections.len(), 1);
3314        let primary = &pkg.license_detections[0];
3315        assert_eq!(
3316            primary.matches[0].matched_text.as_deref(),
3317            Some("License: GPL-2+")
3318        );
3319        assert_eq!(primary.matches[0].start_line, 7);
3320    }
3321
3322    #[test]
3323    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3324        let raw_lines = vec![
3325            "Files: *".to_string(),
3326            "Copyright: 2024 Example Org".to_string(),
3327            "License: Apache-2.0".to_string(),
3328            " Licensed under the Apache License, Version 2.0.".to_string(),
3329        ];
3330
3331        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3332        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3333            .into_iter()
3334            .next()
3335            .expect("reference RFC822 paragraph should parse");
3336
3337        assert_eq!(paragraph.metadata.headers, expected.headers);
3338        assert_eq!(paragraph.metadata.body, expected.body);
3339        assert_eq!(
3340            paragraph.license_header_line,
3341            Some(("License: Apache-2.0".to_string(), 12))
3342        );
3343    }
3344
3345    #[test]
3346    fn test_parse_copyright_unstructured() {
3347        let content = "This package was debianized by John Doe.
3348
3349Upstream Authors:
3350    Jane Smith
3351
3352Copyright:
3353    2009 10gen
3354
3355License:
3356    SSPL
3357";
3358        let pkg = parse_copyright_file(content, Some("mongodb"));
3359        assert_eq!(pkg.name, Some("mongodb".to_string()));
3360        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3361        assert!(!pkg.parties.is_empty());
3362    }
3363
3364    #[test]
3365    fn test_parse_copyright_holders() {
3366        let text = "2012 Paul Moore <pmoore@redhat.com>
33672012 Ashley Lai <adlai@us.ibm.com>
3368Copyright (C) 2015-2018 Example Corp";
3369        let holders = parse_copyright_holders(text);
3370        assert!(holders.len() >= 3);
3371        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3372        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3373    }
3374
3375    #[test]
3376    fn test_parse_copyright_empty() {
3377        let content = "This is just some text without proper copyright info.";
3378        let pkg = parse_copyright_file(content, Some("test"));
3379        assert_eq!(pkg.name, Some("test".to_string()));
3380        assert!(pkg.parties.is_empty());
3381        assert!(pkg.extracted_license_statement.is_none());
3382    }
3383
3384    #[test]
3385    fn test_deb_parser_is_match() {
3386        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3387        assert!(DebianDebParser::is_match(&PathBuf::from(
3388            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3389        )));
3390        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3391        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3392    }
3393
3394    #[test]
3395    fn test_parse_deb_filename_with_arch() {
3396        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3397        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3398        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3399        assert_eq!(pkg.namespace, Some("debian".to_string()));
3400        assert_eq!(
3401            pkg.purl,
3402            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3403        );
3404        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3405    }
3406
3407    #[test]
3408    fn test_parse_deb_filename_without_arch() {
3409        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3410        assert_eq!(pkg.name, Some("package".to_string()));
3411        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3412        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3413    }
3414
3415    #[test]
3416    fn test_extract_deb_archive() {
3417        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3418        if !test_path.exists() {
3419            return;
3420        }
3421
3422        let pkg = DebianDebParser::extract_first_package(&test_path);
3423
3424        assert_eq!(pkg.name, Some("adduser".to_string()));
3425        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3426        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3427        assert!(pkg.description.is_some());
3428        assert!(!pkg.parties.is_empty());
3429
3430        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3431        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3432    }
3433
3434    #[test]
3435    fn test_extract_deb_archive_with_control_tar_xz() {
3436        let deb = create_synthetic_deb_with_control_tar_xz();
3437
3438        let pkg = DebianDebParser::extract_first_package(deb.path());
3439
3440        assert_eq!(pkg.name, Some("synthetic".to_string()));
3441        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3442        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3443        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3444    }
3445
3446    #[test]
3447    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3448        let deb = create_synthetic_deb_with_copyright();
3449
3450        let pkg = DebianDebParser::extract_first_package(deb.path());
3451
3452        assert_eq!(pkg.name, Some("synthetic".to_string()));
3453        assert_eq!(
3454            pkg.extracted_license_statement,
3455            Some("Apache-2.0".to_string())
3456        );
3457        assert!(pkg.parties.iter().any(|party| {
3458            party.role.as_deref() == Some("copyright-holder")
3459                && party.name.as_deref() == Some("Example Org")
3460        }));
3461    }
3462
3463    #[test]
3464    fn test_parse_deb_filename_simple() {
3465        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3466        assert_eq!(pkg.name, Some("adduser".to_string()));
3467        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3468        assert_eq!(pkg.namespace, Some("debian".to_string()));
3469    }
3470
3471    #[test]
3472    fn test_parse_deb_filename_invalid() {
3473        let pkg = parse_deb_filename("invalid.deb");
3474        assert!(pkg.name.is_none());
3475        assert!(pkg.version.is_none());
3476    }
3477
3478    #[test]
3479    fn test_distroless_parser() {
3480        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3481
3482        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3483
3484        if !test_file.exists() {
3485            eprintln!("Warning: Test file not found, skipping test");
3486            return;
3487        }
3488
3489        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3490
3491        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3492        assert_eq!(
3493            pkg.datasource_id,
3494            Some(DatasourceId::DebianDistrolessInstalledDb)
3495        );
3496        assert_eq!(pkg.name, Some("base-files".to_string()));
3497        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3498        assert_eq!(pkg.namespace, Some("debian".to_string()));
3499        assert!(pkg.purl.is_some());
3500        assert!(
3501            pkg.purl
3502                .as_ref()
3503                .unwrap()
3504                .contains("pkg:deb/debian/base-files")
3505        );
3506    }
3507}