Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35
36use log::warn;
37use packageurl::PackageUrl;
38use regex::Regex;
39
40use crate::models::{
41    DatasourceId, Dependency, FileReference, LicenseDetection, Match, PackageData, PackageType,
42    Party,
43};
44use crate::parsers::rfc822::{self, Rfc822Metadata};
45use crate::parsers::utils::{read_file_to_string, split_name_email};
46use crate::utils::spdx::combine_license_expressions;
47
48use super::PackageParser;
49
50const PACKAGE_TYPE: PackageType = PackageType::Deb;
51
52fn default_package_data(datasource_id: DatasourceId) -> PackageData {
53    PackageData {
54        package_type: Some(PACKAGE_TYPE),
55        datasource_id: Some(datasource_id),
56        ..Default::default()
57    }
58}
59
60// Namespace detection clues from version strings
61const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
62const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
63
64// Namespace detection clues from maintainer fields
65const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
66    "packages.debian.org",
67    "lists.debian.org",
68    "lists.alioth.debian.org",
69    "@debian.org",
70    "debian-init-diversity@",
71];
72const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
73
74// Dependency field names and their scope/flags
75struct DepFieldSpec {
76    field: &'static str,
77    scope: &'static str,
78    is_runtime: bool,
79    is_optional: bool,
80}
81
82const DEP_FIELDS: &[DepFieldSpec] = &[
83    DepFieldSpec {
84        field: "depends",
85        scope: "depends",
86        is_runtime: true,
87        is_optional: false,
88    },
89    DepFieldSpec {
90        field: "pre-depends",
91        scope: "pre-depends",
92        is_runtime: true,
93        is_optional: false,
94    },
95    DepFieldSpec {
96        field: "recommends",
97        scope: "recommends",
98        is_runtime: true,
99        is_optional: true,
100    },
101    DepFieldSpec {
102        field: "suggests",
103        scope: "suggests",
104        is_runtime: true,
105        is_optional: true,
106    },
107    DepFieldSpec {
108        field: "breaks",
109        scope: "breaks",
110        is_runtime: false,
111        is_optional: false,
112    },
113    DepFieldSpec {
114        field: "conflicts",
115        scope: "conflicts",
116        is_runtime: false,
117        is_optional: false,
118    },
119    DepFieldSpec {
120        field: "replaces",
121        scope: "replaces",
122        is_runtime: false,
123        is_optional: false,
124    },
125    DepFieldSpec {
126        field: "provides",
127        scope: "provides",
128        is_runtime: false,
129        is_optional: false,
130    },
131    DepFieldSpec {
132        field: "build-depends",
133        scope: "build-depends",
134        is_runtime: false,
135        is_optional: false,
136    },
137    DepFieldSpec {
138        field: "build-depends-indep",
139        scope: "build-depends-indep",
140        is_runtime: false,
141        is_optional: false,
142    },
143    DepFieldSpec {
144        field: "build-conflicts",
145        scope: "build-conflicts",
146        is_runtime: false,
147        is_optional: false,
148    },
149];
150
151// ---------------------------------------------------------------------------
152// DebianControlParser: debian/control files (source + binary paragraphs)
153// ---------------------------------------------------------------------------
154
155pub struct DebianControlParser;
156
157impl PackageParser for DebianControlParser {
158    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
159
160    fn is_match(path: &Path) -> bool {
161        if let Some(name) = path.file_name()
162            && name == "control"
163            && let Some(parent) = path.parent()
164            && let Some(parent_name) = parent.file_name()
165        {
166            return parent_name == "debian";
167        }
168        false
169    }
170
171    fn extract_packages(path: &Path) -> Vec<PackageData> {
172        let content = match read_file_to_string(path) {
173            Ok(c) => c,
174            Err(e) => {
175                warn!("Failed to read debian/control at {:?}: {}", path, e);
176                return Vec::new();
177            }
178        };
179
180        parse_debian_control(&content)
181    }
182}
183
184// ---------------------------------------------------------------------------
185// DebianInstalledParser: /var/lib/dpkg/status
186// ---------------------------------------------------------------------------
187
188pub struct DebianInstalledParser;
189
190impl PackageParser for DebianInstalledParser {
191    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
192
193    fn is_match(path: &Path) -> bool {
194        let path_str = path.to_string_lossy();
195        path_str.ends_with("var/lib/dpkg/status")
196    }
197
198    fn extract_packages(path: &Path) -> Vec<PackageData> {
199        let content = match read_file_to_string(path) {
200            Ok(c) => c,
201            Err(e) => {
202                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
203                return Vec::new();
204            }
205        };
206
207        parse_dpkg_status(&content)
208    }
209}
210
211pub struct DebianDistrolessInstalledParser;
212
213impl PackageParser for DebianDistrolessInstalledParser {
214    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216    fn is_match(path: &Path) -> bool {
217        let path_str = path.to_string_lossy();
218        path_str.contains("var/lib/dpkg/status.d/")
219    }
220
221    fn extract_packages(path: &Path) -> Vec<PackageData> {
222        let content = match read_file_to_string(path) {
223            Ok(c) => c,
224            Err(e) => {
225                warn!("Failed to read distroless status file at {:?}: {}", path, e);
226                return vec![default_package_data(
227                    DatasourceId::DebianDistrolessInstalledDb,
228                )];
229            }
230        };
231
232        vec![parse_distroless_status(&content)]
233    }
234}
235
236fn parse_distroless_status(content: &str) -> PackageData {
237    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
238
239    if paragraphs.is_empty() {
240        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
241    }
242
243    build_package_from_paragraph(
244        &paragraphs[0],
245        None,
246        DatasourceId::DebianDistrolessInstalledDb,
247    )
248    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
249}
250
251// ---------------------------------------------------------------------------
252// Parsing logic
253// ---------------------------------------------------------------------------
254
255/// Parses a debian/control file into PackageData entries.
256///
257/// A debian/control file has a Source paragraph followed by one or more Binary
258/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
259/// into each binary package.
260fn parse_debian_control(content: &str) -> Vec<PackageData> {
261    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
262    if paragraphs.is_empty() {
263        return Vec::new();
264    }
265
266    // Determine if first paragraph is a Source paragraph
267    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
268
269    let (source_paragraph, binary_start) = if has_source {
270        (Some(&paragraphs[0]), 1)
271    } else {
272        (None, 0)
273    };
274
275    // Extract source-level shared metadata
276    let source_meta = source_paragraph.map(extract_source_meta);
277
278    let mut packages = Vec::new();
279
280    for para in &paragraphs[binary_start..] {
281        if let Some(pkg) = build_package_from_paragraph(
282            para,
283            source_meta.as_ref(),
284            DatasourceId::DebianControlInSource,
285        ) {
286            packages.push(pkg);
287        }
288    }
289
290    if packages.is_empty()
291        && let Some(source_para) = source_paragraph
292        && let Some(pkg) = build_package_from_source_paragraph(source_para)
293    {
294        packages.push(pkg);
295    }
296
297    packages
298}
299
300/// Parses a dpkg/status file into PackageData entries.
301///
302/// Each paragraph represents an installed package. Only packages with
303/// `Status: install ok installed` are included.
304fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
305    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
306    let mut packages = Vec::new();
307
308    for para in &paragraphs {
309        let status = rfc822::get_header_first(&para.headers, "status");
310        if status.as_deref() != Some("install ok installed") {
311            continue;
312        }
313
314        if let Some(pkg) =
315            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
316        {
317            packages.push(pkg);
318        }
319    }
320
321    packages
322}
323
324// ---------------------------------------------------------------------------
325// Source paragraph metadata (shared across binary packages)
326// ---------------------------------------------------------------------------
327
328struct SourceMeta {
329    parties: Vec<Party>,
330    homepage_url: Option<String>,
331    vcs_url: Option<String>,
332    code_view_url: Option<String>,
333    bug_tracking_url: Option<String>,
334}
335
336fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
337    let mut parties = Vec::new();
338
339    // Maintainer
340    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
341        let (name, email) = split_name_email(&maintainer);
342        parties.push(Party {
343            r#type: Some("person".to_string()),
344            role: Some("maintainer".to_string()),
345            name,
346            email,
347            url: None,
348            organization: None,
349            organization_url: None,
350            timezone: None,
351        });
352    }
353
354    // Original-Maintainer
355    if let Some(orig_maintainer) =
356        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
357    {
358        let (name, email) = split_name_email(&orig_maintainer);
359        parties.push(Party {
360            r#type: Some("person".to_string()),
361            role: Some("maintainer".to_string()),
362            name,
363            email,
364            url: None,
365            organization: None,
366            organization_url: None,
367            timezone: None,
368        });
369    }
370
371    // Uploaders (comma-separated)
372    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
373        for uploader in uploaders_str.split(',') {
374            let trimmed = uploader.trim();
375            if !trimmed.is_empty() {
376                let (name, email) = split_name_email(trimmed);
377                parties.push(Party {
378                    r#type: Some("person".to_string()),
379                    role: Some("uploader".to_string()),
380                    name,
381                    email,
382                    url: None,
383                    organization: None,
384                    organization_url: None,
385                    timezone: None,
386                });
387            }
388        }
389    }
390
391    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage");
392
393    // VCS-Git: may contain branch info after space
394    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
395        .map(|url| url.split_whitespace().next().unwrap_or(&url).to_string());
396
397    let code_view_url = rfc822::get_header_first(&paragraph.headers, "vcs-browser");
398
399    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs");
400
401    SourceMeta {
402        parties,
403        homepage_url,
404        vcs_url,
405        code_view_url,
406        bug_tracking_url,
407    }
408}
409
410// ---------------------------------------------------------------------------
411// Package building
412// ---------------------------------------------------------------------------
413
414fn build_package_from_paragraph(
415    paragraph: &Rfc822Metadata,
416    source_meta: Option<&SourceMeta>,
417    datasource_id: DatasourceId,
418) -> Option<PackageData> {
419    let name = rfc822::get_header_first(&paragraph.headers, "package")?;
420    let version = rfc822::get_header_first(&paragraph.headers, "version");
421    let architecture = rfc822::get_header_first(&paragraph.headers, "architecture");
422    let description = rfc822::get_header_first(&paragraph.headers, "description");
423    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
424    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage");
425    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
426    let section = rfc822::get_header_first(&paragraph.headers, "section");
427    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
428    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
429
430    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
431
432    // Build parties: use source_meta parties if available, otherwise parse from paragraph
433    let parties = if let Some(meta) = source_meta {
434        meta.parties.clone()
435    } else {
436        let mut p = Vec::new();
437        if let Some(m) = &maintainer_str {
438            let (n, e) = split_name_email(m);
439            p.push(Party {
440                r#type: Some("person".to_string()),
441                role: Some("maintainer".to_string()),
442                name: n,
443                email: e,
444                url: None,
445                organization: None,
446                organization_url: None,
447                timezone: None,
448            });
449        }
450        p
451    };
452
453    // Resolve homepage: paragraph's own, or from source metadata
454    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
455    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
456    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
457    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
458
459    // Build PURL
460    let purl = build_debian_purl(
461        &name,
462        version.as_deref(),
463        namespace.as_deref(),
464        architecture.as_deref(),
465    );
466
467    // Parse dependencies from all dependency fields
468    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
469
470    // Keywords from section
471    let keywords = section.into_iter().collect();
472
473    // Source packages
474    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
475
476    // Extra data
477    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
478    if let Some(ma) = &multi_arch
479        && !ma.is_empty()
480    {
481        extra_data.insert(
482            "multi_arch".to_string(),
483            serde_json::Value::String(ma.clone()),
484        );
485    }
486    if let Some(size_str) = &installed_size
487        && let Ok(size) = size_str.parse::<u64>()
488    {
489        extra_data.insert(
490            "installed_size".to_string(),
491            serde_json::Value::Number(serde_json::Number::from(size)),
492        );
493    }
494
495    // Qualifiers for architecture
496    let qualifiers = architecture.as_ref().map(|arch| {
497        let mut q = HashMap::new();
498        q.insert("arch".to_string(), arch.clone());
499        q
500    });
501
502    Some(PackageData {
503        package_type: Some(PACKAGE_TYPE),
504        namespace: namespace.clone(),
505        name: Some(name),
506        version,
507        qualifiers,
508        subpath: None,
509        primary_language: None,
510        description,
511        release_date: None,
512        parties,
513        keywords,
514        homepage_url,
515        download_url: None,
516        size: None,
517        sha1: None,
518        md5: None,
519        sha256: None,
520        sha512: None,
521        bug_tracking_url,
522        code_view_url,
523        vcs_url,
524        copyright: None,
525        holder: None,
526        declared_license_expression: None,
527        declared_license_expression_spdx: None,
528        license_detections: Vec::new(),
529        other_license_expression: None,
530        other_license_expression_spdx: None,
531        other_license_detections: Vec::new(),
532        extracted_license_statement: None,
533        notice_text: None,
534        source_packages,
535        file_references: Vec::new(),
536        is_private: false,
537        is_virtual: false,
538        extra_data: if extra_data.is_empty() {
539            None
540        } else {
541            Some(extra_data)
542        },
543        dependencies,
544        repository_homepage_url: None,
545        repository_download_url: None,
546        api_data_url: None,
547        datasource_id: Some(datasource_id),
548        purl,
549    })
550}
551
552fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
553    let name = rfc822::get_header_first(&paragraph.headers, "source")?;
554    let version = rfc822::get_header_first(&paragraph.headers, "version");
555    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
556
557    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
558    let source_meta = extract_source_meta(paragraph);
559
560    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
561    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
562
563    let section = rfc822::get_header_first(&paragraph.headers, "section");
564    let keywords = section.into_iter().collect();
565
566    Some(PackageData {
567        package_type: Some(PACKAGE_TYPE),
568        namespace: namespace.clone(),
569        name: Some(name),
570        version,
571        qualifiers: None,
572        subpath: None,
573        primary_language: None,
574        description: None,
575        release_date: None,
576        parties: source_meta.parties,
577        keywords,
578        homepage_url: source_meta.homepage_url,
579        download_url: None,
580        size: None,
581        sha1: None,
582        md5: None,
583        sha256: None,
584        sha512: None,
585        bug_tracking_url: source_meta.bug_tracking_url,
586        code_view_url: source_meta.code_view_url,
587        vcs_url: source_meta.vcs_url,
588        copyright: None,
589        holder: None,
590        declared_license_expression: None,
591        declared_license_expression_spdx: None,
592        license_detections: Vec::new(),
593        other_license_expression: None,
594        other_license_expression_spdx: None,
595        other_license_detections: Vec::new(),
596        extracted_license_statement: None,
597        notice_text: None,
598        source_packages: Vec::new(),
599        file_references: Vec::new(),
600        is_private: false,
601        is_virtual: false,
602        extra_data: None,
603        dependencies,
604        repository_homepage_url: None,
605        repository_download_url: None,
606        api_data_url: None,
607        datasource_id: Some(DatasourceId::DebianControlInSource),
608        purl,
609    })
610}
611
612// ---------------------------------------------------------------------------
613// Namespace detection
614// ---------------------------------------------------------------------------
615
616fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
617    // Check version clues first
618    if let Some(ver) = version {
619        let ver_lower = ver.to_lowercase();
620        for clue in VERSION_CLUES_UBUNTU {
621            if ver_lower.contains(clue) {
622                return Some("ubuntu".to_string());
623            }
624        }
625        for clue in VERSION_CLUES_DEBIAN {
626            if ver_lower.contains(clue) {
627                return Some("debian".to_string());
628            }
629        }
630    }
631
632    // Check maintainer clues
633    if let Some(maint) = maintainer {
634        let maint_lower = maint.to_lowercase();
635        for clue in MAINTAINER_CLUES_UBUNTU {
636            if maint_lower.contains(clue) {
637                return Some("ubuntu".to_string());
638            }
639        }
640        for clue in MAINTAINER_CLUES_DEBIAN {
641            if maint_lower.contains(clue) {
642                return Some("debian".to_string());
643            }
644        }
645    }
646
647    // Default to debian
648    Some("debian".to_string())
649}
650
651// ---------------------------------------------------------------------------
652// PURL generation
653// ---------------------------------------------------------------------------
654
655fn build_debian_purl(
656    name: &str,
657    version: Option<&str>,
658    namespace: Option<&str>,
659    architecture: Option<&str>,
660) -> Option<String> {
661    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
662
663    if let Some(ns) = namespace {
664        purl.with_namespace(ns).ok()?;
665    }
666
667    if let Some(ver) = version {
668        purl.with_version(ver).ok()?;
669    }
670
671    if let Some(arch) = architecture {
672        purl.add_qualifier("arch", arch).ok()?;
673    }
674
675    Some(purl.to_string())
676}
677
678// ---------------------------------------------------------------------------
679// Dependency parsing
680// ---------------------------------------------------------------------------
681
682fn parse_all_dependencies(
683    headers: &HashMap<String, Vec<String>>,
684    namespace: Option<&str>,
685) -> Vec<Dependency> {
686    let mut dependencies = Vec::new();
687
688    for spec in DEP_FIELDS {
689        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
690            dependencies.extend(parse_dependency_field(
691                &dep_str,
692                spec.scope,
693                spec.is_runtime,
694                spec.is_optional,
695                namespace,
696            ));
697        }
698    }
699
700    dependencies
701}
702
703/// Parses a Debian dependency field value.
704///
705/// Debian dependencies are comma-separated, with optional version constraints
706/// in parentheses and alternative packages separated by `|`.
707///
708/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
709///
710/// Alternatives (|) are treated as separate optional dependencies.
711fn parse_dependency_field(
712    dep_str: &str,
713    scope: &str,
714    is_runtime: bool,
715    is_optional: bool,
716    namespace: Option<&str>,
717) -> Vec<Dependency> {
718    let mut deps = Vec::new();
719
720    // Regex for parsing individual dependency: name (operator version)
721    // Debian operators: <<, <=, =, >=, >>
722    let dep_re = Regex::new(
723        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
724    )
725    .unwrap();
726
727    for group in dep_str.split(',') {
728        let group = group.trim();
729        if group.is_empty() {
730            continue;
731        }
732
733        // Handle alternatives (|)
734        let alternatives: Vec<&str> = group.split('|').collect();
735        let has_alternatives = alternatives.len() > 1;
736
737        for alt in alternatives {
738            let alt = alt.trim();
739            if alt.is_empty() {
740                continue;
741            }
742
743            if let Some(caps) = dep_re.captures(alt) {
744                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
745                let operator = caps.get(2).map(|m| m.as_str().trim());
746                let version = caps.get(3).map(|m| m.as_str().trim());
747
748                if pkg_name.is_empty() {
749                    continue;
750                }
751
752                // Skip substitution variables like ${shlibs:Depends}
753                if pkg_name.starts_with('$') {
754                    continue;
755                }
756
757                let extracted_requirement = match (operator, version) {
758                    (Some(op), Some(ver)) => Some(format!("{} {}", op, ver)),
759                    _ => None,
760                };
761
762                let is_pinned = operator.map(|op| op == "=");
763
764                let purl = build_debian_purl(pkg_name, None, namespace, None);
765
766                deps.push(Dependency {
767                    purl,
768                    extracted_requirement,
769                    scope: Some(scope.to_string()),
770                    is_runtime: Some(is_runtime),
771                    is_optional: Some(is_optional || has_alternatives),
772                    is_pinned,
773                    is_direct: Some(true),
774                    resolved_package: None,
775                    extra_data: None,
776                });
777            }
778        }
779    }
780
781    deps
782}
783
784// ---------------------------------------------------------------------------
785// Source field parsing
786// ---------------------------------------------------------------------------
787
788/// Parses the Source field which may contain a version in parentheses.
789///
790/// Format: `source-name` or `source-name (version)`
791fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
792    let Some(source_str) = source else {
793        return Vec::new();
794    };
795
796    let trimmed = source_str.trim();
797    if trimmed.is_empty() {
798        return Vec::new();
799    }
800
801    // Extract name and optional version from "name (version)" format
802    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
803        let name = trimmed[..paren_start].trim();
804        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
805        (
806            name,
807            if version.is_empty() {
808                None
809            } else {
810                Some(version)
811            },
812        )
813    } else {
814        (trimmed, None)
815    };
816
817    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
818        vec![purl]
819    } else {
820        Vec::new()
821    }
822}
823
824// ---------------------------------------------------------------------------
825// Parser registration macros
826// ---------------------------------------------------------------------------
827
828crate::register_parser!(
829    "Debian source package control file (debian/control)",
830    &["**/debian/control"],
831    "deb",
832    "",
833    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
834);
835
836// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
837// but we register it for the single-package interface too.
838
839// ============================================================================
840// WAVE 2 PARSERS: Additional Debian Format Support
841// ============================================================================
842
843/// Parser for Debian Source Control (.dsc) files
844pub struct DebianDscParser;
845
846impl PackageParser for DebianDscParser {
847    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
848
849    fn is_match(path: &Path) -> bool {
850        path.extension().and_then(|e| e.to_str()) == Some("dsc")
851    }
852
853    fn extract_packages(path: &Path) -> Vec<PackageData> {
854        let content = match read_file_to_string(path) {
855            Ok(c) => c,
856            Err(e) => {
857                warn!("Failed to read .dsc file {:?}: {}", path, e);
858                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
859            }
860        };
861
862        vec![parse_dsc_content(&content)]
863    }
864}
865
866fn strip_pgp_signature(content: &str) -> String {
867    let mut result = String::new();
868    let mut in_pgp_block = false;
869    let mut in_signature = false;
870
871    for line in content.lines() {
872        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
873            in_pgp_block = true;
874            continue;
875        }
876        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
877            in_signature = true;
878            continue;
879        }
880        if line.starts_with("-----END PGP SIGNATURE-----") {
881            in_signature = false;
882            continue;
883        }
884        if in_pgp_block && line.starts_with("Hash:") {
885            continue;
886        }
887        if in_pgp_block && line.is_empty() && result.is_empty() {
888            in_pgp_block = false;
889            continue;
890        }
891        if !in_signature {
892            result.push_str(line);
893            result.push('\n');
894        }
895    }
896
897    result
898}
899
900fn parse_dsc_content(content: &str) -> PackageData {
901    let clean_content = strip_pgp_signature(content);
902    let metadata = rfc822::parse_rfc822_content(&clean_content);
903    let headers = &metadata.headers;
904
905    let name = rfc822::get_header_first(headers, "source");
906    let version = rfc822::get_header_first(headers, "version");
907    let architecture = rfc822::get_header_first(headers, "architecture");
908    let namespace = Some("debian".to_string());
909
910    let mut package = PackageData {
911        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
912        package_type: Some(PACKAGE_TYPE),
913        namespace: namespace.clone(),
914        name: name.clone(),
915        version: version.clone(),
916        description: rfc822::get_header_first(headers, "description"),
917        homepage_url: rfc822::get_header_first(headers, "homepage"),
918        vcs_url: rfc822::get_header_first(headers, "vcs-git"),
919        code_view_url: rfc822::get_header_first(headers, "vcs-browser"),
920        ..Default::default()
921    };
922
923    // Build PURL with architecture qualifier
924    if let (Some(n), Some(v)) = (&name, &version) {
925        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
926    }
927
928    // Set source_packages to point to the source itself (without version)
929    if let Some(n) = &name
930        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
931    {
932        package.source_packages.push(source_purl);
933    }
934
935    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
936        let (name_opt, email_opt) = split_name_email(&maintainer);
937        package.parties.push(Party {
938            r#type: None,
939            role: Some("maintainer".to_string()),
940            name: name_opt,
941            email: email_opt,
942            url: None,
943            organization: None,
944            organization_url: None,
945            timezone: None,
946        });
947    }
948
949    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
950        for uploader in uploaders_str.split(',') {
951            let uploader = uploader.trim();
952            if uploader.is_empty() {
953                continue;
954            }
955            let (name_opt, email_opt) = split_name_email(uploader);
956            package.parties.push(Party {
957                r#type: None,
958                role: Some("uploader".to_string()),
959                name: name_opt,
960                email: email_opt,
961                url: None,
962                organization: None,
963                organization_url: None,
964                timezone: None,
965            });
966        }
967    }
968
969    // Parse Build-Depends
970    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
971        package.dependencies.extend(parse_dependency_field(
972            &build_deps,
973            "build",
974            false,
975            false,
976            namespace.as_deref(),
977        ));
978    }
979
980    // Store Standards-Version in extra_data
981    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
982        let map = package.extra_data.get_or_insert_with(HashMap::new);
983        map.insert("standards_version".to_string(), standards.into());
984    }
985
986    package
987}
988
989/// Parser for Debian original source tarballs (*.orig.tar.*)
990pub struct DebianOrigTarParser;
991
992impl PackageParser for DebianOrigTarParser {
993    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
994
995    fn is_match(path: &Path) -> bool {
996        path.file_name()
997            .and_then(|n| n.to_str())
998            .map(|name| name.contains(".orig.tar."))
999            .unwrap_or(false)
1000    }
1001
1002    fn extract_packages(path: &Path) -> Vec<PackageData> {
1003        let filename = match path.file_name().and_then(|n| n.to_str()) {
1004            Some(f) => f,
1005            None => {
1006                return vec![default_package_data(
1007                    DatasourceId::DebianOriginalSourceTarball,
1008                )];
1009            }
1010        };
1011
1012        vec![parse_source_tarball_filename(
1013            filename,
1014            DatasourceId::DebianOriginalSourceTarball,
1015        )]
1016    }
1017}
1018
1019/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1020pub struct DebianDebianTarParser;
1021
1022impl PackageParser for DebianDebianTarParser {
1023    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1024
1025    fn is_match(path: &Path) -> bool {
1026        path.file_name()
1027            .and_then(|n| n.to_str())
1028            .map(|name| name.contains(".debian.tar."))
1029            .unwrap_or(false)
1030    }
1031
1032    fn extract_packages(path: &Path) -> Vec<PackageData> {
1033        let filename = match path.file_name().and_then(|n| n.to_str()) {
1034            Some(f) => f,
1035            None => {
1036                return vec![default_package_data(
1037                    DatasourceId::DebianSourceMetadataTarball,
1038                )];
1039            }
1040        };
1041
1042        vec![parse_source_tarball_filename(
1043            filename,
1044            DatasourceId::DebianSourceMetadataTarball,
1045        )]
1046    }
1047}
1048
1049fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1050    let without_tar_ext = filename
1051        .trim_end_matches(".gz")
1052        .trim_end_matches(".xz")
1053        .trim_end_matches(".bz2")
1054        .trim_end_matches(".tar");
1055
1056    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1057    if parts.len() < 2 {
1058        return default_package_data(datasource_id);
1059    }
1060
1061    let name = parts[0].to_string();
1062    let version_with_suffix = parts[1];
1063
1064    let version = version_with_suffix
1065        .trim_end_matches(".orig")
1066        .trim_end_matches(".debian")
1067        .to_string();
1068
1069    let namespace = Some("debian".to_string());
1070
1071    PackageData {
1072        datasource_id: Some(datasource_id),
1073        package_type: Some(PACKAGE_TYPE),
1074        namespace: namespace.clone(),
1075        name: Some(name.clone()),
1076        version: Some(version.clone()),
1077        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1078        ..Default::default()
1079    }
1080}
1081
1082/// Parser for Debian installed file lists (*.list)
1083pub struct DebianInstalledListParser;
1084
1085impl PackageParser for DebianInstalledListParser {
1086    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1087
1088    fn is_match(path: &Path) -> bool {
1089        path.extension().and_then(|e| e.to_str()) == Some("list")
1090            && path
1091                .to_str()
1092                .map(|p| p.contains("/var/lib/dpkg/info/"))
1093                .unwrap_or(false)
1094    }
1095
1096    fn extract_packages(path: &Path) -> Vec<PackageData> {
1097        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1098            Some(f) => f,
1099            None => {
1100                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1101            }
1102        };
1103
1104        let content = match read_file_to_string(path) {
1105            Ok(c) => c,
1106            Err(e) => {
1107                warn!("Failed to read .list file {:?}: {}", path, e);
1108                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1109            }
1110        };
1111
1112        vec![parse_debian_file_list(
1113            &content,
1114            filename,
1115            DatasourceId::DebianInstalledFilesList,
1116        )]
1117    }
1118}
1119
1120/// Parser for Debian installed MD5 checksum files (*.md5sums)
1121pub struct DebianInstalledMd5sumsParser;
1122
1123impl PackageParser for DebianInstalledMd5sumsParser {
1124    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1125
1126    fn is_match(path: &Path) -> bool {
1127        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1128            && path
1129                .to_str()
1130                .map(|p| p.contains("/var/lib/dpkg/info/"))
1131                .unwrap_or(false)
1132    }
1133
1134    fn extract_packages(path: &Path) -> Vec<PackageData> {
1135        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1136            Some(f) => f,
1137            None => {
1138                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1139            }
1140        };
1141
1142        let content = match read_file_to_string(path) {
1143            Ok(c) => c,
1144            Err(e) => {
1145                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1146                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1147            }
1148        };
1149
1150        vec![parse_debian_file_list(
1151            &content,
1152            filename,
1153            DatasourceId::DebianInstalledMd5Sums,
1154        )]
1155    }
1156}
1157
1158const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1159
1160fn parse_debian_file_list(
1161    content: &str,
1162    filename: &str,
1163    datasource_id: DatasourceId,
1164) -> PackageData {
1165    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1166        (Some(pkg.to_string()), Some(arch.to_string()))
1167    } else if filename == "md5sums" {
1168        (None, None)
1169    } else {
1170        (Some(filename.to_string()), None)
1171    };
1172
1173    let mut file_references = Vec::new();
1174
1175    for line in content.lines() {
1176        let line = line.trim();
1177        if line.is_empty() || line.starts_with('#') {
1178            continue;
1179        }
1180
1181        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1182            (Some(hash.trim().to_string()), p.trim())
1183        } else {
1184            (None, line)
1185        };
1186
1187        if IGNORED_ROOT_DIRS.contains(&path) {
1188            continue;
1189        }
1190
1191        file_references.push(FileReference {
1192            path: path.to_string(),
1193            size: None,
1194            sha1: None,
1195            md5: md5sum,
1196            sha256: None,
1197            sha512: None,
1198            extra_data: None,
1199        });
1200    }
1201
1202    if file_references.is_empty() {
1203        return default_package_data(datasource_id);
1204    }
1205
1206    let namespace = Some("debian".to_string());
1207    let mut package = PackageData {
1208        datasource_id: Some(datasource_id),
1209        package_type: Some(PACKAGE_TYPE),
1210        namespace: namespace.clone(),
1211        name: name.clone(),
1212        file_references,
1213        ..Default::default()
1214    };
1215
1216    if let Some(n) = &name {
1217        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1218    }
1219
1220    package
1221}
1222
1223/// Parser for Debian machine-readable copyright files (DEP-5 format)
1224pub struct DebianCopyrightParser;
1225
1226impl PackageParser for DebianCopyrightParser {
1227    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1228
1229    fn is_match(path: &Path) -> bool {
1230        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1231            if filename != "copyright" {
1232                return false;
1233            }
1234            let path_str = path.to_string_lossy();
1235            path_str.contains("/debian/")
1236                || path_str.contains("/usr/share/doc/")
1237                || path_str.ends_with("debian/copyright")
1238        } else {
1239            false
1240        }
1241    }
1242
1243    fn extract_packages(path: &Path) -> Vec<PackageData> {
1244        let content = match read_file_to_string(path) {
1245            Ok(c) => c,
1246            Err(e) => {
1247                warn!("Failed to read copyright file {:?}: {}", path, e);
1248                return vec![default_package_data(DatasourceId::DebianCopyright)];
1249            }
1250        };
1251
1252        let package_name = extract_package_name_from_path(path);
1253        vec![parse_copyright_file(&content, package_name.as_deref())]
1254    }
1255}
1256
1257fn extract_package_name_from_path(path: &Path) -> Option<String> {
1258    let components: Vec<_> = path.components().collect();
1259
1260    for (i, component) in components.iter().enumerate() {
1261        if let std::path::Component::Normal(os_str) = component
1262            && os_str.to_str() == Some("doc")
1263            && i + 1 < components.len()
1264            && let std::path::Component::Normal(next) = components[i + 1]
1265        {
1266            return next.to_str().map(|s| s.to_string());
1267        }
1268    }
1269    None
1270}
1271
1272fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1273    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1274
1275    let is_dep5 = paragraphs
1276        .first()
1277        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1278        .is_some();
1279
1280    let namespace = Some("debian".to_string());
1281    let mut parties = Vec::new();
1282    let mut license_statements = Vec::new();
1283    let mut primary_license_detection = None;
1284    let mut header_license_detection = None;
1285    let mut other_license_detections = Vec::new();
1286
1287    if is_dep5 {
1288        for para in &paragraphs {
1289            if let Some(copyright_text) =
1290                rfc822::get_header_first(&para.metadata.headers, "copyright")
1291            {
1292                for holder in parse_copyright_holders(&copyright_text) {
1293                    if !holder.is_empty() {
1294                        parties.push(Party {
1295                            r#type: None,
1296                            role: Some("copyright-holder".to_string()),
1297                            name: Some(holder),
1298                            email: None,
1299                            url: None,
1300                            organization: None,
1301                            organization_url: None,
1302                            timezone: None,
1303                        });
1304                    }
1305                }
1306            }
1307
1308            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1309                let license_name = license.lines().next().unwrap_or(&license).trim();
1310                if !license_name.is_empty()
1311                    && !license_statements.contains(&license_name.to_string())
1312                {
1313                    license_statements.push(license_name.to_string());
1314                }
1315
1316                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1317                    let detection =
1318                        build_primary_license_detection(license_name, matched_text, line_no);
1319                    let is_header_paragraph =
1320                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1321                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1322                        == Some("*")
1323                    {
1324                        primary_license_detection = Some(detection);
1325                    } else if is_header_paragraph {
1326                        header_license_detection.get_or_insert(detection);
1327                    } else {
1328                        other_license_detections.push(detection);
1329                    }
1330                }
1331            }
1332        }
1333
1334        if primary_license_detection.is_none() && header_license_detection.is_some() {
1335            primary_license_detection = header_license_detection;
1336        }
1337    } else {
1338        let copyright_block = extract_unstructured_field(content, "Copyright:");
1339        if let Some(text) = copyright_block {
1340            for holder in parse_copyright_holders(&text) {
1341                if !holder.is_empty() {
1342                    parties.push(Party {
1343                        r#type: None,
1344                        role: Some("copyright-holder".to_string()),
1345                        name: Some(holder),
1346                        email: None,
1347                        url: None,
1348                        organization: None,
1349                        organization_url: None,
1350                        timezone: None,
1351                    });
1352                }
1353            }
1354        }
1355
1356        let license_block = extract_unstructured_field(content, "License:");
1357        if let Some(text) = license_block {
1358            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1359        }
1360    }
1361
1362    let extracted_license_statement = if license_statements.is_empty() {
1363        None
1364    } else {
1365        Some(license_statements.join(" AND "))
1366    };
1367
1368    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1369    let declared_license_expression = license_detections
1370        .first()
1371        .map(|detection| detection.license_expression.clone());
1372    let declared_license_expression_spdx = license_detections
1373        .first()
1374        .map(|detection| detection.license_expression_spdx.clone());
1375    let other_license_expression = combine_license_expressions(
1376        other_license_detections
1377            .iter()
1378            .map(|detection| detection.license_expression.clone()),
1379    );
1380    let other_license_expression_spdx = combine_license_expressions(
1381        other_license_detections
1382            .iter()
1383            .map(|detection| detection.license_expression_spdx.clone()),
1384    );
1385
1386    PackageData {
1387        datasource_id: Some(DatasourceId::DebianCopyright),
1388        package_type: Some(PACKAGE_TYPE),
1389        namespace: namespace.clone(),
1390        name: package_name.map(|s| s.to_string()),
1391        parties,
1392        declared_license_expression,
1393        declared_license_expression_spdx,
1394        license_detections,
1395        other_license_expression,
1396        other_license_expression_spdx,
1397        other_license_detections,
1398        extracted_license_statement,
1399        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1400        ..Default::default()
1401    }
1402}
1403
1404#[derive(Debug)]
1405struct CopyrightParagraph {
1406    metadata: Rfc822Metadata,
1407    license_header_line: Option<(String, usize)>,
1408}
1409
1410fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1411    let mut paragraphs = Vec::new();
1412    let mut current_lines = Vec::new();
1413    let mut current_start_line = 1usize;
1414
1415    for (idx, line) in content.lines().enumerate() {
1416        let line_no = idx + 1;
1417        if line.is_empty() {
1418            if !current_lines.is_empty() {
1419                paragraphs.push(finalize_copyright_paragraph(
1420                    std::mem::take(&mut current_lines),
1421                    current_start_line,
1422                ));
1423            }
1424            current_start_line = line_no + 1;
1425        } else {
1426            if current_lines.is_empty() {
1427                current_start_line = line_no;
1428            }
1429            current_lines.push(line.to_string());
1430        }
1431    }
1432
1433    if !current_lines.is_empty() {
1434        paragraphs.push(finalize_copyright_paragraph(
1435            current_lines,
1436            current_start_line,
1437        ));
1438    }
1439
1440    paragraphs
1441}
1442
1443fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1444    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1445    let mut current_name: Option<String> = None;
1446    let mut current_value = String::new();
1447    let mut license_header_line = None;
1448
1449    for (idx, line) in raw_lines.iter().enumerate() {
1450        if line.starts_with(' ') || line.starts_with('\t') {
1451            if current_name.is_some() {
1452                current_value.push('\n');
1453                current_value.push_str(line);
1454            }
1455            continue;
1456        }
1457
1458        if let Some(name) = current_name.take() {
1459            add_copyright_header_value(&mut headers, &name, &current_value);
1460            current_value.clear();
1461        }
1462
1463        if let Some((name, value)) = line.split_once(':') {
1464            let normalized_name = name.trim().to_ascii_lowercase();
1465            if normalized_name == "license" && license_header_line.is_none() {
1466                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1467            }
1468            current_name = Some(normalized_name);
1469            current_value = value.trim_start().to_string();
1470        }
1471    }
1472
1473    if let Some(name) = current_name.take() {
1474        add_copyright_header_value(&mut headers, &name, &current_value);
1475    }
1476
1477    CopyrightParagraph {
1478        metadata: Rfc822Metadata {
1479            headers,
1480            body: String::new(),
1481        },
1482        license_header_line,
1483    }
1484}
1485
1486fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1487    let entry = headers.entry(name.to_string()).or_default();
1488    let trimmed = value.trim_end();
1489    if !trimmed.is_empty() {
1490        entry.push(trimmed.to_string());
1491    }
1492}
1493
1494fn build_primary_license_detection(
1495    license_name: &str,
1496    matched_text: String,
1497    line_no: usize,
1498) -> LicenseDetection {
1499    let (license_expression, license_expression_spdx) = normalize_debian_license_name(license_name);
1500
1501    LicenseDetection {
1502        license_expression: license_expression.clone(),
1503        license_expression_spdx: license_expression_spdx.clone(),
1504        matches: vec![Match {
1505            license_expression,
1506            license_expression_spdx,
1507            from_file: None,
1508            start_line: line_no,
1509            end_line: line_no,
1510            matcher: Some("1-spdx-id".to_string()),
1511            score: 100.0,
1512            matched_length: Some(license_name.split_whitespace().count()),
1513            match_coverage: Some(100.0),
1514            rule_relevance: Some(100),
1515            rule_identifier: None,
1516            rule_url: None,
1517            matched_text: Some(matched_text),
1518        }],
1519        identifier: None,
1520    }
1521}
1522
1523fn normalize_debian_license_name(license_name: &str) -> (String, String) {
1524    match license_name.trim() {
1525        "GPL-2+" => ("gpl-2.0-plus".to_string(), "GPL-2.0-or-later".to_string()),
1526        "GPL-2" => ("gpl-2.0".to_string(), "GPL-2.0-only".to_string()),
1527        "LGPL-2+" => ("lgpl-2.0-plus".to_string(), "LGPL-2.0-or-later".to_string()),
1528        "LGPL-2.1" => ("lgpl-2.1".to_string(), "LGPL-2.1-only".to_string()),
1529        "LGPL-2.1+" => ("lgpl-2.1-plus".to_string(), "LGPL-2.1-or-later".to_string()),
1530        "LGPL-3+" => ("lgpl-3.0-plus".to_string(), "LGPL-3.0-or-later".to_string()),
1531        "MIT" => ("mit".to_string(), "MIT".to_string()),
1532        "BSD-4-clause" => ("bsd-original-uc".to_string(), "BSD-4-Clause-UC".to_string()),
1533        "public-domain" => (
1534            "public-domain".to_string(),
1535            "LicenseRef-provenant-public-domain".to_string(),
1536        ),
1537        other => (other.to_ascii_lowercase(), other.to_string()),
1538    }
1539}
1540
1541fn parse_copyright_holders(text: &str) -> Vec<String> {
1542    let mut holders = Vec::new();
1543
1544    for line in text.lines() {
1545        let line = line.trim();
1546        if line.is_empty() {
1547            continue;
1548        }
1549
1550        let cleaned = line
1551            .trim_start_matches("Copyright")
1552            .trim_start_matches("copyright")
1553            .trim_start_matches("(C)")
1554            .trim_start_matches("(c)")
1555            .trim_start_matches("©")
1556            .trim();
1557
1558        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1559            let without_years = &cleaned[year_end..];
1560            let holder = without_years
1561                .trim_start_matches(',')
1562                .trim_start_matches('-')
1563                .trim();
1564
1565            if !holder.is_empty() && holder.len() > 2 {
1566                holders.push(holder.to_string());
1567            }
1568        }
1569    }
1570
1571    holders
1572}
1573
1574fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1575    let mut in_field = false;
1576    let mut field_content = String::new();
1577
1578    for line in content.lines() {
1579        if line.starts_with(field_name) {
1580            in_field = true;
1581            field_content.push_str(line.trim_start_matches(field_name).trim());
1582            field_content.push('\n');
1583        } else if in_field {
1584            if line.starts_with(char::is_whitespace) {
1585                field_content.push_str(line.trim());
1586                field_content.push('\n');
1587            } else if !line.trim().is_empty() {
1588                break;
1589            }
1590        }
1591    }
1592
1593    let trimmed = field_content.trim();
1594    if trimmed.is_empty() {
1595        None
1596    } else {
1597        Some(trimmed.to_string())
1598    }
1599}
1600
1601/// Parser for Debian binary package archives (.deb files)
1602pub struct DebianDebParser;
1603
1604impl PackageParser for DebianDebParser {
1605    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1606
1607    fn is_match(path: &Path) -> bool {
1608        path.extension().and_then(|e| e.to_str()) == Some("deb")
1609    }
1610
1611    fn extract_packages(path: &Path) -> Vec<PackageData> {
1612        // Try to extract metadata from archive contents first
1613        if let Ok(data) = extract_deb_archive(path) {
1614            return vec![data];
1615        }
1616
1617        // Fallback to filename parsing
1618        let filename = match path.file_name().and_then(|n| n.to_str()) {
1619            Some(f) => f,
1620            None => {
1621                return vec![default_package_data(DatasourceId::DebianDeb)];
1622            }
1623        };
1624
1625        vec![parse_deb_filename(filename)]
1626    }
1627}
1628
1629fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1630    use flate2::read::GzDecoder;
1631    use liblzma::read::XzDecoder;
1632    use std::io::{Cursor, Read};
1633
1634    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1635
1636    let mut archive = ar::Archive::new(file);
1637    let mut package: Option<PackageData> = None;
1638
1639    while let Some(entry_result) = archive.next_entry() {
1640        let mut entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1641
1642        let entry_name = std::str::from_utf8(entry.header().identifier())
1643            .map_err(|e| format!("Invalid entry name: {}", e))?;
1644        let entry_name = entry_name.trim().to_string();
1645
1646        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1647            let mut control_data = Vec::new();
1648            entry
1649                .read_to_end(&mut control_data)
1650                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1651
1652            if entry_name.ends_with(".gz") {
1653                let decoder = GzDecoder::new(Cursor::new(control_data));
1654                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1655                    package = Some(parsed_package);
1656                }
1657            } else if entry_name.ends_with(".xz") {
1658                let decoder = XzDecoder::new(Cursor::new(control_data));
1659                if let Some(parsed_package) = parse_control_tar_archive(decoder)? {
1660                    package = Some(parsed_package);
1661                }
1662            }
1663        } else if entry_name.starts_with("data.tar") {
1664            let mut data = Vec::new();
1665            entry
1666                .read_to_end(&mut data)
1667                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1668
1669            let Some(current_package) = package.as_mut() else {
1670                continue;
1671            };
1672
1673            if entry_name.ends_with(".gz") {
1674                let decoder = GzDecoder::new(Cursor::new(data));
1675                merge_deb_data_archive(decoder, current_package)?;
1676            } else if entry_name.ends_with(".xz") {
1677                let decoder = XzDecoder::new(Cursor::new(data));
1678                merge_deb_data_archive(decoder, current_package)?;
1679            }
1680        }
1681    }
1682
1683    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1684}
1685
1686fn parse_control_tar_archive<R: std::io::Read>(reader: R) -> Result<Option<PackageData>, String> {
1687    use std::io::Read;
1688
1689    let mut tar_archive = tar::Archive::new(reader);
1690
1691    for tar_entry_result in tar_archive
1692        .entries()
1693        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1694    {
1695        let mut tar_entry =
1696            tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1697
1698        let tar_path = tar_entry
1699            .path()
1700            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1701
1702        if tar_path.ends_with("control") {
1703            let mut control_content = String::new();
1704            tar_entry
1705                .read_to_string(&mut control_content)
1706                .map_err(|e| format!("Failed to read control file: {}", e))?;
1707
1708            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
1709            if paragraphs.is_empty() {
1710                return Err("No paragraphs in control file".to_string());
1711            }
1712
1713            if let Some(package) =
1714                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
1715            {
1716                return Ok(Some(package));
1717            }
1718
1719            return Err("Failed to parse control file".to_string());
1720        }
1721    }
1722
1723    Ok(None)
1724}
1725
1726fn merge_deb_data_archive<R: std::io::Read>(
1727    reader: R,
1728    package: &mut PackageData,
1729) -> Result<(), String> {
1730    use std::io::Read;
1731
1732    let mut tar_archive = tar::Archive::new(reader);
1733
1734    for tar_entry_result in tar_archive
1735        .entries()
1736        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
1737    {
1738        let mut tar_entry =
1739            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
1740
1741        let tar_path = tar_entry
1742            .path()
1743            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
1744        let tar_path_str = tar_path.to_string_lossy();
1745
1746        if tar_path_str.ends_with(&format!(
1747            "/usr/share/doc/{}/copyright",
1748            package.name.as_deref().unwrap_or_default()
1749        )) || tar_path_str.ends_with(&format!(
1750            "usr/share/doc/{}/copyright",
1751            package.name.as_deref().unwrap_or_default()
1752        )) {
1753            let mut copyright_content = String::new();
1754            tar_entry
1755                .read_to_string(&mut copyright_content)
1756                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
1757
1758            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
1759            merge_debian_copyright_into_package(package, &copyright_pkg);
1760            break;
1761        }
1762    }
1763
1764    Ok(())
1765}
1766
1767fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
1768    if target.extracted_license_statement.is_none() {
1769        target.extracted_license_statement = copyright.extracted_license_statement.clone();
1770    }
1771
1772    for party in &copyright.parties {
1773        if !target.parties.iter().any(|existing| {
1774            existing.r#type == party.r#type
1775                && existing.role == party.role
1776                && existing.name == party.name
1777                && existing.email == party.email
1778                && existing.url == party.url
1779                && existing.organization == party.organization
1780                && existing.organization_url == party.organization_url
1781                && existing.timezone == party.timezone
1782        }) {
1783            target.parties.push(party.clone());
1784        }
1785    }
1786}
1787
1788fn parse_deb_filename(filename: &str) -> PackageData {
1789    let without_ext = filename.trim_end_matches(".deb");
1790
1791    let parts: Vec<&str> = without_ext.split('_').collect();
1792    if parts.len() < 2 {
1793        return default_package_data(DatasourceId::DebianDeb);
1794    }
1795
1796    let name = parts[0].to_string();
1797    let version = parts[1].to_string();
1798    let architecture = if parts.len() >= 3 {
1799        Some(parts[2].to_string())
1800    } else {
1801        None
1802    };
1803
1804    let namespace = Some("debian".to_string());
1805
1806    PackageData {
1807        datasource_id: Some(DatasourceId::DebianDeb),
1808        package_type: Some(PACKAGE_TYPE),
1809        namespace: namespace.clone(),
1810        name: Some(name.clone()),
1811        version: Some(version.clone()),
1812        purl: build_debian_purl(
1813            &name,
1814            Some(&version),
1815            namespace.as_deref(),
1816            architecture.as_deref(),
1817        ),
1818        ..Default::default()
1819    }
1820}
1821
1822/// Parser for control files inside extracted .deb control tarballs.
1823///
1824/// Matches paths like `*/control.tar.gz-extract/control` and
1825/// `*/control.tar.xz-extract/control` which are created by ExtractCode
1826/// when extracting .deb archives.
1827pub struct DebianControlInExtractedDebParser;
1828
1829impl PackageParser for DebianControlInExtractedDebParser {
1830    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1831
1832    fn is_match(path: &Path) -> bool {
1833        path.file_name()
1834            .and_then(|n| n.to_str())
1835            .is_some_and(|name| name == "control")
1836            && path
1837                .to_str()
1838                .map(|p| {
1839                    p.ends_with("control.tar.gz-extract/control")
1840                        || p.ends_with("control.tar.xz-extract/control")
1841                })
1842                .unwrap_or(false)
1843    }
1844
1845    fn extract_packages(path: &Path) -> Vec<PackageData> {
1846        let content = match read_file_to_string(path) {
1847            Ok(c) => c,
1848            Err(e) => {
1849                warn!(
1850                    "Failed to read control file in extracted deb {:?}: {}",
1851                    path, e
1852                );
1853                return vec![default_package_data(
1854                    DatasourceId::DebianControlExtractedDeb,
1855                )];
1856            }
1857        };
1858
1859        // A control file inside an extracted .deb has a single paragraph
1860        // (unlike debian/control which has source + binary paragraphs)
1861        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
1862        if paragraphs.is_empty() {
1863            return vec![default_package_data(
1864                DatasourceId::DebianControlExtractedDeb,
1865            )];
1866        }
1867
1868        if let Some(pkg) = build_package_from_paragraph(
1869            &paragraphs[0],
1870            None,
1871            DatasourceId::DebianControlExtractedDeb,
1872        ) {
1873            vec![pkg]
1874        } else {
1875            vec![default_package_data(
1876                DatasourceId::DebianControlExtractedDeb,
1877            )]
1878        }
1879    }
1880}
1881
1882/// Parser for MD5 checksum files inside extracted .deb control tarballs
1883pub struct DebianMd5sumInPackageParser;
1884
1885impl PackageParser for DebianMd5sumInPackageParser {
1886    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1887
1888    fn is_match(path: &Path) -> bool {
1889        path.file_name()
1890            .and_then(|n| n.to_str())
1891            .is_some_and(|name| name == "md5sums")
1892            && path
1893                .to_str()
1894                .map(|p| {
1895                    p.ends_with("control.tar.gz-extract/md5sums")
1896                        || p.ends_with("control.tar.xz-extract/md5sums")
1897                })
1898                .unwrap_or(false)
1899    }
1900
1901    fn extract_packages(path: &Path) -> Vec<PackageData> {
1902        let content = match read_file_to_string(path) {
1903            Ok(c) => c,
1904            Err(e) => {
1905                warn!("Failed to read md5sums file {:?}: {}", path, e);
1906                return vec![default_package_data(
1907                    DatasourceId::DebianMd5SumsInExtractedDeb,
1908                )];
1909            }
1910        };
1911
1912        let package_name = extract_package_name_from_deb_path(path);
1913
1914        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
1915    }
1916}
1917
1918pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
1919    let parent = path.parent()?;
1920    let grandparent = parent.parent()?;
1921    let dirname = grandparent.file_name()?.to_str()?;
1922    let without_extract = dirname.strip_suffix("-extract")?;
1923    let without_deb = without_extract.strip_suffix(".deb")?;
1924    let name = without_deb.split('_').next()?;
1925
1926    Some(name.to_string())
1927}
1928
1929fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
1930    let mut file_references = Vec::new();
1931
1932    for line in content.lines() {
1933        let line = line.trim();
1934        if line.is_empty() || line.starts_with('#') {
1935            continue;
1936        }
1937
1938        let (md5sum, filepath): (Option<String>, &str) = if let Some(idx) = line.find("  ") {
1939            (Some(line[..idx].trim().to_string()), line[idx + 2..].trim())
1940        } else if let Some((hash, path)) = line.split_once(' ') {
1941            (Some(hash.trim().to_string()), path.trim())
1942        } else {
1943            (None, line)
1944        };
1945
1946        if IGNORED_ROOT_DIRS.contains(&filepath) {
1947            continue;
1948        }
1949
1950        file_references.push(FileReference {
1951            path: filepath.to_string(),
1952            size: None,
1953            sha1: None,
1954            md5: md5sum,
1955            sha256: None,
1956            sha512: None,
1957            extra_data: None,
1958        });
1959    }
1960
1961    if file_references.is_empty() {
1962        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
1963    }
1964
1965    let namespace = Some("debian".to_string());
1966    let mut package = PackageData {
1967        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
1968        package_type: Some(PACKAGE_TYPE),
1969        namespace: namespace.clone(),
1970        name: package_name.map(|s| s.to_string()),
1971        file_references,
1972        ..Default::default()
1973    };
1974
1975    if let Some(n) = &package.name {
1976        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
1977    }
1978
1979    package
1980}
1981
1982crate::register_parser!(
1983    "Debian control file in extracted .deb control tarball",
1984    &[
1985        "**/control.tar.gz-extract/control",
1986        "**/control.tar.xz-extract/control"
1987    ],
1988    "deb",
1989    "",
1990    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
1991);
1992
1993crate::register_parser!(
1994    "Debian MD5 checksums in extracted .deb control tarball",
1995    &[
1996        "**/control.tar.gz-extract/md5sums",
1997        "**/control.tar.xz-extract/md5sums"
1998    ],
1999    "deb",
2000    "",
2001    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2002);
2003
2004#[cfg(test)]
2005mod tests {
2006    use super::*;
2007    use crate::models::DatasourceId;
2008    use crate::models::PackageType;
2009    use ar::{Builder as ArBuilder, Header as ArHeader};
2010    use flate2::Compression;
2011    use flate2::write::GzEncoder;
2012    use liblzma::write::XzEncoder;
2013    use std::io::Cursor;
2014    use std::path::PathBuf;
2015    use tar::{Builder as TarBuilder, Header as TarHeader};
2016    use tempfile::NamedTempFile;
2017
2018    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2019        let mut control_tar = Vec::new();
2020        {
2021            let encoder = XzEncoder::new(&mut control_tar, 6);
2022            let mut tar_builder = TarBuilder::new(encoder);
2023
2024            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2025            let mut header = TarHeader::new_gnu();
2026            header
2027                .set_path("control")
2028                .expect("control tar path should be valid");
2029            header.set_size(control_content.len() as u64);
2030            header.set_mode(0o644);
2031            header.set_cksum();
2032            tar_builder
2033                .append(&header, Cursor::new(control_content))
2034                .expect("control file should be appended to tar.xz");
2035            tar_builder.finish().expect("control tar.xz should finish");
2036        }
2037
2038        let deb = NamedTempFile::new().expect("temp deb file should be created");
2039        {
2040            let mut builder = ArBuilder::new(
2041                deb.reopen()
2042                    .expect("temporary deb file should reopen for writing"),
2043            );
2044
2045            let debian_binary = b"2.0\n";
2046            let mut debian_binary_header =
2047                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2048            debian_binary_header.set_mode(0o100644);
2049            builder
2050                .append(&debian_binary_header, Cursor::new(debian_binary))
2051                .expect("debian-binary entry should be appended");
2052
2053            let mut control_header =
2054                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2055            control_header.set_mode(0o100644);
2056            builder
2057                .append(&control_header, Cursor::new(control_tar))
2058                .expect("control.tar.xz entry should be appended");
2059        }
2060
2061        deb
2062    }
2063
2064    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2065        let mut control_tar = Vec::new();
2066        {
2067            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2068            let mut tar_builder = TarBuilder::new(encoder);
2069
2070            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2071            let mut header = TarHeader::new_gnu();
2072            header
2073                .set_path("control")
2074                .expect("control tar path should be valid");
2075            header.set_size(control_content.len() as u64);
2076            header.set_mode(0o644);
2077            header.set_cksum();
2078            tar_builder
2079                .append(&header, Cursor::new(control_content))
2080                .expect("control file should be appended to tar.gz");
2081            tar_builder.finish().expect("control tar.gz should finish");
2082        }
2083
2084        let mut data_tar = Vec::new();
2085        {
2086            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2087            let mut tar_builder = TarBuilder::new(encoder);
2088
2089            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2090            let mut header = TarHeader::new_gnu();
2091            header
2092                .set_path("./usr/share/doc/synthetic/copyright")
2093                .expect("copyright path should be valid");
2094            header.set_size(copyright.len() as u64);
2095            header.set_mode(0o644);
2096            header.set_cksum();
2097            tar_builder
2098                .append(&header, Cursor::new(copyright))
2099                .expect("copyright file should be appended to data tar");
2100            tar_builder.finish().expect("data tar.gz should finish");
2101        }
2102
2103        let deb = NamedTempFile::new().expect("temp deb file should be created");
2104        {
2105            let mut builder = ArBuilder::new(
2106                deb.reopen()
2107                    .expect("temporary deb file should reopen for writing"),
2108            );
2109
2110            let debian_binary = b"2.0\n";
2111            let mut debian_binary_header =
2112                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2113            debian_binary_header.set_mode(0o100644);
2114            builder
2115                .append(&debian_binary_header, Cursor::new(debian_binary))
2116                .expect("debian-binary entry should be appended");
2117
2118            let mut control_header =
2119                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2120            control_header.set_mode(0o100644);
2121            builder
2122                .append(&control_header, Cursor::new(control_tar))
2123                .expect("control.tar.gz entry should be appended");
2124
2125            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2126            data_header.set_mode(0o100644);
2127            builder
2128                .append(&data_header, Cursor::new(data_tar))
2129                .expect("data.tar.gz entry should be appended");
2130        }
2131
2132        deb
2133    }
2134
2135    // ====== Namespace detection ======
2136
2137    #[test]
2138    fn test_detect_namespace_from_ubuntu_version() {
2139        assert_eq!(
2140            detect_namespace(Some("1.0-1ubuntu1"), None),
2141            Some("ubuntu".to_string())
2142        );
2143    }
2144
2145    #[test]
2146    fn test_detect_namespace_from_debian_version() {
2147        assert_eq!(
2148            detect_namespace(Some("1.0-1+deb11u1"), None),
2149            Some("debian".to_string())
2150        );
2151    }
2152
2153    #[test]
2154    fn test_detect_namespace_from_ubuntu_maintainer() {
2155        assert_eq!(
2156            detect_namespace(
2157                None,
2158                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2159            ),
2160            Some("ubuntu".to_string())
2161        );
2162    }
2163
2164    #[test]
2165    fn test_detect_namespace_from_debian_maintainer() {
2166        assert_eq!(
2167            detect_namespace(None, Some("John Doe <john@debian.org>")),
2168            Some("debian".to_string())
2169        );
2170    }
2171
2172    #[test]
2173    fn test_detect_namespace_default() {
2174        assert_eq!(
2175            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2176            Some("debian".to_string())
2177        );
2178    }
2179
2180    #[test]
2181    fn test_detect_namespace_version_takes_priority() {
2182        // Version clue should be checked before maintainer
2183        assert_eq!(
2184            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2185            Some("ubuntu".to_string())
2186        );
2187    }
2188
2189    // ====== PURL generation ======
2190
2191    #[test]
2192    fn test_build_purl_basic() {
2193        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2194        assert_eq!(
2195            purl,
2196            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2197        );
2198    }
2199
2200    #[test]
2201    fn test_build_purl_no_version() {
2202        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2203        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2204    }
2205
2206    #[test]
2207    fn test_build_purl_no_arch() {
2208        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2209        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2210    }
2211
2212    #[test]
2213    fn test_build_purl_no_namespace() {
2214        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2215        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2216    }
2217
2218    // ====== Dependency parsing ======
2219
2220    #[test]
2221    fn test_parse_simple_dependency() {
2222        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2223        assert_eq!(deps.len(), 1);
2224        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2225        assert_eq!(deps[0].extracted_requirement, None);
2226        assert_eq!(deps[0].scope, Some("depends".to_string()));
2227    }
2228
2229    #[test]
2230    fn test_parse_dependency_with_version() {
2231        let deps =
2232            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2233        assert_eq!(deps.len(), 1);
2234        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2235        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2236    }
2237
2238    #[test]
2239    fn test_parse_dependency_exact_version() {
2240        let deps = parse_dependency_field(
2241            "libc6 (= 2.31-13+deb11u5)",
2242            "depends",
2243            true,
2244            false,
2245            Some("debian"),
2246        );
2247        assert_eq!(deps.len(), 1);
2248        assert_eq!(deps[0].is_pinned, Some(true));
2249    }
2250
2251    #[test]
2252    fn test_parse_dependency_strict_less() {
2253        let deps =
2254            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2255        assert_eq!(deps.len(), 1);
2256        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2257        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2258    }
2259
2260    #[test]
2261    fn test_parse_multiple_dependencies() {
2262        let deps = parse_dependency_field(
2263            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2264            "depends",
2265            true,
2266            false,
2267            Some("debian"),
2268        );
2269        assert_eq!(deps.len(), 3);
2270    }
2271
2272    #[test]
2273    fn test_parse_dependency_alternatives() {
2274        let deps = parse_dependency_field(
2275            "libssl1.1 | libssl3",
2276            "depends",
2277            true,
2278            false,
2279            Some("debian"),
2280        );
2281        assert_eq!(deps.len(), 2);
2282        // Alternatives are marked as optional
2283        assert_eq!(deps[0].is_optional, Some(true));
2284        assert_eq!(deps[1].is_optional, Some(true));
2285    }
2286
2287    #[test]
2288    fn test_parse_dependency_skips_substitutions() {
2289        let deps = parse_dependency_field(
2290            "${shlibs:Depends}, ${misc:Depends}, libc6",
2291            "depends",
2292            true,
2293            false,
2294            Some("debian"),
2295        );
2296        assert_eq!(deps.len(), 1);
2297        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2298    }
2299
2300    #[test]
2301    fn test_parse_dependency_with_arch_qualifier() {
2302        // Dependencies can have [arch] qualifiers which we ignore
2303        let deps = parse_dependency_field(
2304            "libc6 (>= 2.17) [amd64]",
2305            "depends",
2306            true,
2307            false,
2308            Some("debian"),
2309        );
2310        assert_eq!(deps.len(), 1);
2311        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2312    }
2313
2314    #[test]
2315    fn test_parse_empty_dependency() {
2316        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2317        assert!(deps.is_empty());
2318    }
2319
2320    // ====== Source field parsing ======
2321
2322    #[test]
2323    fn test_parse_source_field_name_only() {
2324        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2325        assert_eq!(sources.len(), 1);
2326        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2327    }
2328
2329    #[test]
2330    fn test_parse_source_field_with_version() {
2331        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2332        assert_eq!(sources.len(), 1);
2333        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2334    }
2335
2336    #[test]
2337    fn test_parse_source_field_empty() {
2338        let sources = parse_source_field(None, Some("debian"));
2339        assert!(sources.is_empty());
2340    }
2341
2342    // ====== Control file parsing ======
2343
2344    #[test]
2345    fn test_parse_debian_control_source_and_binary() {
2346        let content = "\
2347Source: curl
2348Section: web
2349Priority: optional
2350Maintainer: Alessandro Ghedini <ghedo@debian.org>
2351Homepage: https://curl.se/
2352Vcs-Browser: https://salsa.debian.org/debian/curl
2353Vcs-Git: https://salsa.debian.org/debian/curl.git
2354Build-Depends: debhelper (>= 12), libssl-dev
2355
2356Package: curl
2357Architecture: amd64
2358Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2359Description: command line tool for transferring data with URL syntax";
2360
2361        let packages = parse_debian_control(content);
2362        assert_eq!(packages.len(), 1);
2363
2364        let pkg = &packages[0];
2365        assert_eq!(pkg.name, Some("curl".to_string()));
2366        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2367        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2368        assert_eq!(
2369            pkg.vcs_url,
2370            Some("https://salsa.debian.org/debian/curl.git".to_string())
2371        );
2372        assert_eq!(
2373            pkg.code_view_url,
2374            Some("https://salsa.debian.org/debian/curl".to_string())
2375        );
2376
2377        // Maintainer from source paragraph
2378        assert_eq!(pkg.parties.len(), 1);
2379        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2380        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2381        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2382
2383        // Dependencies parsed
2384        assert!(!pkg.dependencies.is_empty());
2385    }
2386
2387    #[test]
2388    fn test_parse_debian_control_multiple_binary() {
2389        let content = "\
2390Source: gzip
2391Maintainer: Debian Developer <dev@debian.org>
2392
2393Package: gzip
2394Architecture: any
2395Depends: libc6 (>= 2.17)
2396Description: GNU file compression
2397
2398Package: gzip-win32
2399Architecture: all
2400Description: gzip for Windows";
2401
2402        let packages = parse_debian_control(content);
2403        assert_eq!(packages.len(), 2);
2404        assert_eq!(packages[0].name, Some("gzip".to_string()));
2405        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2406
2407        // Both inherit source maintainer
2408        assert_eq!(packages[0].parties.len(), 1);
2409        assert_eq!(packages[1].parties.len(), 1);
2410    }
2411
2412    #[test]
2413    fn test_parse_debian_control_source_only() {
2414        let content = "\
2415Source: my-package
2416Maintainer: Test User <test@debian.org>
2417Build-Depends: debhelper (>= 13)";
2418
2419        let packages = parse_debian_control(content);
2420        assert_eq!(packages.len(), 1);
2421        assert_eq!(packages[0].name, Some("my-package".to_string()));
2422        // Build-Depends parsed
2423        assert!(!packages[0].dependencies.is_empty());
2424        assert_eq!(
2425            packages[0].dependencies[0].scope,
2426            Some("build-depends".to_string())
2427        );
2428    }
2429
2430    #[test]
2431    fn test_parse_debian_control_with_uploaders() {
2432        let content = "\
2433Source: example
2434Maintainer: Main Dev <main@debian.org>
2435Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2436
2437Package: example
2438Architecture: any
2439Description: test package";
2440
2441        let packages = parse_debian_control(content);
2442        assert_eq!(packages.len(), 1);
2443        // 1 maintainer + 2 uploaders
2444        assert_eq!(packages[0].parties.len(), 3);
2445        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2446        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2447        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2448    }
2449
2450    #[test]
2451    fn test_parse_debian_control_vcs_git_with_branch() {
2452        let content = "\
2453Source: example
2454Maintainer: Dev <dev@debian.org>
2455Vcs-Git: https://salsa.debian.org/example.git -b main
2456
2457Package: example
2458Architecture: any
2459Description: test";
2460
2461        let packages = parse_debian_control(content);
2462        assert_eq!(packages.len(), 1);
2463        // Should only take the URL, not the branch
2464        assert_eq!(
2465            packages[0].vcs_url,
2466            Some("https://salsa.debian.org/example.git".to_string())
2467        );
2468    }
2469
2470    #[test]
2471    fn test_parse_debian_control_multi_arch() {
2472        let content = "\
2473Source: example
2474Maintainer: Dev <dev@debian.org>
2475
2476Package: libexample
2477Architecture: any
2478Multi-Arch: same
2479Description: shared library";
2480
2481        let packages = parse_debian_control(content);
2482        assert_eq!(packages.len(), 1);
2483        let extra = packages[0].extra_data.as_ref().unwrap();
2484        assert_eq!(
2485            extra.get("multi_arch"),
2486            Some(&serde_json::Value::String("same".to_string()))
2487        );
2488    }
2489
2490    // ====== dpkg/status parsing ======
2491
2492    #[test]
2493    fn test_parse_dpkg_status_basic() {
2494        let content = "\
2495Package: base-files
2496Status: install ok installed
2497Priority: required
2498Section: admin
2499Installed-Size: 391
2500Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2501Architecture: amd64
2502Version: 11ubuntu5.6
2503Description: Debian base system miscellaneous files
2504Homepage: https://tracker.debian.org/pkg/base-files
2505
2506Package: not-installed
2507Status: deinstall ok config-files
2508Architecture: amd64
2509Version: 1.0
2510Description: This should be skipped";
2511
2512        let packages = parse_dpkg_status(content);
2513        assert_eq!(packages.len(), 1);
2514
2515        let pkg = &packages[0];
2516        assert_eq!(pkg.name, Some("base-files".to_string()));
2517        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2518        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2519        assert_eq!(
2520            pkg.datasource_id,
2521            Some(DatasourceId::DebianInstalledStatusDb)
2522        );
2523
2524        // Installed-Size in extra_data
2525        let extra = pkg.extra_data.as_ref().unwrap();
2526        assert_eq!(
2527            extra.get("installed_size"),
2528            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2529        );
2530    }
2531
2532    #[test]
2533    fn test_parse_dpkg_status_multiple_installed() {
2534        let content = "\
2535Package: libc6
2536Status: install ok installed
2537Architecture: amd64
2538Version: 2.31-13+deb11u5
2539Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2540Description: GNU C Library
2541
2542Package: zlib1g
2543Status: install ok installed
2544Architecture: amd64
2545Version: 1:1.2.11.dfsg-2+deb11u2
2546Maintainer: Mark Brown <broonie@debian.org>
2547Description: compression library";
2548
2549        let packages = parse_dpkg_status(content);
2550        assert_eq!(packages.len(), 2);
2551        assert_eq!(packages[0].name, Some("libc6".to_string()));
2552        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2553    }
2554
2555    #[test]
2556    fn test_parse_dpkg_status_with_dependencies() {
2557        let content = "\
2558Package: curl
2559Status: install ok installed
2560Architecture: amd64
2561Version: 7.74.0-1.3+deb11u7
2562Maintainer: Alessandro Ghedini <ghedo@debian.org>
2563Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2564Recommends: ca-certificates
2565Description: command line tool for transferring data with URL syntax";
2566
2567        let packages = parse_dpkg_status(content);
2568        assert_eq!(packages.len(), 1);
2569
2570        let deps = &packages[0].dependencies;
2571        // 2 from Depends + 1 from Recommends
2572        assert_eq!(deps.len(), 3);
2573
2574        // Check first dependency
2575        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2576        assert_eq!(deps[0].scope, Some("depends".to_string()));
2577        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2578
2579        // Check recommends
2580        assert_eq!(
2581            deps[2].purl,
2582            Some("pkg:deb/debian/ca-certificates".to_string())
2583        );
2584        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2585        assert_eq!(deps[2].is_optional, Some(true));
2586    }
2587
2588    #[test]
2589    fn test_parse_dpkg_status_with_source() {
2590        let content = "\
2591Package: libncurses6
2592Status: install ok installed
2593Architecture: amd64
2594Source: ncurses (6.2+20201114-2+deb11u1)
2595Version: 6.2+20201114-2+deb11u1
2596Maintainer: Craig Small <csmall@debian.org>
2597Description: shared libraries for terminal handling";
2598
2599        let packages = parse_dpkg_status(content);
2600        assert_eq!(packages.len(), 1);
2601        assert!(!packages[0].source_packages.is_empty());
2602        // Source PURL should include version from parentheses
2603        assert!(packages[0].source_packages[0].contains("ncurses"));
2604    }
2605
2606    #[test]
2607    fn test_parse_dpkg_status_filters_not_installed() {
2608        let content = "\
2609Package: installed-pkg
2610Status: install ok installed
2611Version: 1.0
2612Architecture: amd64
2613Description: installed
2614
2615Package: half-installed
2616Status: install ok half-installed
2617Version: 2.0
2618Architecture: amd64
2619Description: half installed
2620
2621Package: deinstall-pkg
2622Status: deinstall ok config-files
2623Version: 3.0
2624Architecture: amd64
2625Description: deinstalled
2626
2627Package: purge-pkg
2628Status: purge ok not-installed
2629Version: 4.0
2630Architecture: amd64
2631Description: purged";
2632
2633        let packages = parse_dpkg_status(content);
2634        assert_eq!(packages.len(), 1);
2635        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2636    }
2637
2638    #[test]
2639    fn test_parse_dpkg_status_empty() {
2640        let packages = parse_dpkg_status("");
2641        assert!(packages.is_empty());
2642    }
2643
2644    // ====== is_match tests ======
2645
2646    #[test]
2647    fn test_debian_control_is_match() {
2648        assert!(DebianControlParser::is_match(Path::new(
2649            "/path/to/debian/control"
2650        )));
2651        assert!(DebianControlParser::is_match(Path::new("debian/control")));
2652        assert!(!DebianControlParser::is_match(Path::new(
2653            "/path/to/control"
2654        )));
2655        assert!(!DebianControlParser::is_match(Path::new(
2656            "/path/to/debian/changelog"
2657        )));
2658    }
2659
2660    #[test]
2661    fn test_debian_installed_is_match() {
2662        assert!(DebianInstalledParser::is_match(Path::new(
2663            "/var/lib/dpkg/status"
2664        )));
2665        assert!(DebianInstalledParser::is_match(Path::new(
2666            "some/root/var/lib/dpkg/status"
2667        )));
2668        assert!(!DebianInstalledParser::is_match(Path::new(
2669            "/var/lib/dpkg/status.d/something"
2670        )));
2671        assert!(!DebianInstalledParser::is_match(Path::new(
2672            "/var/lib/dpkg/available"
2673        )));
2674    }
2675
2676    // ====== Edge cases ======
2677
2678    #[test]
2679    fn test_parse_debian_control_empty_input() {
2680        let packages = parse_debian_control("");
2681        assert!(packages.is_empty());
2682    }
2683
2684    #[test]
2685    fn test_parse_debian_control_malformed_input() {
2686        let content = "this is not a valid control file\nwith random text";
2687        let packages = parse_debian_control(content);
2688        // Should not panic, may return empty or partial results
2689        assert!(packages.is_empty());
2690    }
2691
2692    #[test]
2693    fn test_dependency_with_epoch_version() {
2694        // Debian versions can have epochs like 1:2.3.4
2695        let deps = parse_dependency_field(
2696            "zlib1g (>= 1:1.2.11)",
2697            "depends",
2698            true,
2699            false,
2700            Some("debian"),
2701        );
2702        assert_eq!(deps.len(), 1);
2703        assert_eq!(
2704            deps[0].extracted_requirement,
2705            Some(">= 1:1.2.11".to_string())
2706        );
2707    }
2708
2709    #[test]
2710    fn test_dependency_with_plus_in_name() {
2711        let deps =
2712            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
2713        assert_eq!(deps.len(), 1);
2714        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
2715    }
2716
2717    #[test]
2718    fn test_dsc_parser_is_match() {
2719        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
2720        assert!(DebianDscParser::is_match(&PathBuf::from(
2721            "adduser_3.118+deb11u1.dsc"
2722        )));
2723        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
2724        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
2725    }
2726
2727    #[test]
2728    fn test_dsc_parser_adduser() {
2729        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
2730        let package = DebianDscParser::extract_first_package(&path);
2731
2732        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
2733        assert_eq!(package.namespace, Some("debian".to_string()));
2734        assert_eq!(package.name, Some("adduser".to_string()));
2735        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
2736        assert_eq!(
2737            package.purl,
2738            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
2739        );
2740        assert_eq!(
2741            package.vcs_url,
2742            Some("https://salsa.debian.org/debian/adduser.git".to_string())
2743        );
2744        assert_eq!(
2745            package.code_view_url,
2746            Some("https://salsa.debian.org/debian/adduser".to_string())
2747        );
2748        assert_eq!(
2749            package.datasource_id,
2750            Some(DatasourceId::DebianSourceControlDsc)
2751        );
2752
2753        assert_eq!(package.parties.len(), 2);
2754        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2755        assert_eq!(
2756            package.parties[0].name,
2757            Some("Debian Adduser Developers".to_string())
2758        );
2759        assert_eq!(
2760            package.parties[0].email,
2761            Some("adduser@packages.debian.org".to_string())
2762        );
2763        assert_eq!(package.parties[0].r#type, None);
2764
2765        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2766        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
2767        assert_eq!(
2768            package.parties[1].email,
2769            Some("mh+debian-packages@zugschlus.de".to_string())
2770        );
2771        assert_eq!(package.parties[1].r#type, None);
2772
2773        assert_eq!(package.source_packages.len(), 1);
2774        assert_eq!(
2775            package.source_packages[0],
2776            "pkg:deb/debian/adduser".to_string()
2777        );
2778
2779        assert!(!package.dependencies.is_empty());
2780        let build_dep_names: Vec<String> = package
2781            .dependencies
2782            .iter()
2783            .filter_map(|d| d.purl.as_ref())
2784            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
2785            .map(|p| p.to_string())
2786            .collect();
2787        assert!(build_dep_names.len() >= 2);
2788    }
2789
2790    #[test]
2791    fn test_dsc_parser_zsh() {
2792        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
2793        let package = DebianDscParser::extract_first_package(&path);
2794
2795        assert_eq!(package.name, Some("zsh".to_string()));
2796        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
2797        assert_eq!(package.namespace, Some("debian".to_string()));
2798        assert!(package.purl.is_some());
2799        assert!(package.purl.as_ref().unwrap().contains("zsh"));
2800        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
2801    }
2802
2803    #[test]
2804    fn test_parse_dsc_content_basic() {
2805        let content = "Format: 3.0 (native)
2806Source: testpkg
2807Binary: testpkg
2808Architecture: amd64
2809Version: 1.0.0
2810Maintainer: Test User <test@example.com>
2811Standards-Version: 4.5.0
2812Build-Depends: debhelper (>= 12)
2813Files:
2814 abc123 1024 testpkg_1.0.0.tar.xz
2815";
2816
2817        let package = parse_dsc_content(content);
2818        assert_eq!(package.name, Some("testpkg".to_string()));
2819        assert_eq!(package.version, Some("1.0.0".to_string()));
2820        assert_eq!(package.namespace, Some("debian".to_string()));
2821        assert_eq!(package.parties.len(), 1);
2822        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
2823        assert_eq!(
2824            package.parties[0].email,
2825            Some("test@example.com".to_string())
2826        );
2827        assert_eq!(package.dependencies.len(), 1);
2828        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
2829    }
2830
2831    #[test]
2832    fn test_parse_dsc_content_with_uploaders() {
2833        let content = "Source: mypkg
2834Version: 2.0
2835Architecture: all
2836Maintainer: Main Dev <main@example.com>
2837Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
2838";
2839
2840        let package = parse_dsc_content(content);
2841        assert_eq!(package.parties.len(), 3);
2842        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
2843        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
2844        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
2845    }
2846
2847    #[test]
2848    fn test_orig_tar_parser_is_match() {
2849        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2850            "package_1.0.orig.tar.gz"
2851        )));
2852        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
2853            "abseil_0~20200923.3.orig.tar.xz"
2854        )));
2855        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
2856            "package.debian.tar.gz"
2857        )));
2858        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
2859    }
2860
2861    #[test]
2862    fn test_debian_tar_parser_is_match() {
2863        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2864            "package_1.0-1.debian.tar.xz"
2865        )));
2866        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
2867            "abseil_20220623.1-1.debian.tar.gz"
2868        )));
2869        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
2870            "package.orig.tar.gz"
2871        )));
2872        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
2873    }
2874
2875    #[test]
2876    fn test_parse_orig_tar_filename() {
2877        let pkg = parse_source_tarball_filename(
2878            "abseil_0~20200923.3.orig.tar.gz",
2879            DatasourceId::DebianOriginalSourceTarball,
2880        );
2881        assert_eq!(pkg.name, Some("abseil".to_string()));
2882        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
2883        assert_eq!(pkg.namespace, Some("debian".to_string()));
2884        assert_eq!(
2885            pkg.purl,
2886            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
2887        );
2888        assert_eq!(
2889            pkg.datasource_id,
2890            Some(DatasourceId::DebianOriginalSourceTarball)
2891        );
2892    }
2893
2894    #[test]
2895    fn test_parse_debian_tar_filename() {
2896        let pkg = parse_source_tarball_filename(
2897            "abseil_20220623.1-1.debian.tar.xz",
2898            DatasourceId::DebianSourceMetadataTarball,
2899        );
2900        assert_eq!(pkg.name, Some("abseil".to_string()));
2901        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
2902        assert_eq!(pkg.namespace, Some("debian".to_string()));
2903        assert_eq!(
2904            pkg.purl,
2905            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
2906        );
2907    }
2908
2909    #[test]
2910    fn test_parse_deb_filename() {
2911        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
2912        assert_eq!(pkg.name, Some("nginx".to_string()));
2913        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
2914
2915        let pkg = parse_deb_filename("invalid.deb");
2916        assert!(pkg.name.is_none());
2917        assert!(pkg.version.is_none());
2918    }
2919
2920    #[test]
2921    fn test_parse_source_tarball_various_compressions() {
2922        let pkg_gz = parse_source_tarball_filename(
2923            "test_1.0.orig.tar.gz",
2924            DatasourceId::DebianOriginalSourceTarball,
2925        );
2926        let pkg_xz = parse_source_tarball_filename(
2927            "test_1.0.orig.tar.xz",
2928            DatasourceId::DebianOriginalSourceTarball,
2929        );
2930        let pkg_bz2 = parse_source_tarball_filename(
2931            "test_1.0.orig.tar.bz2",
2932            DatasourceId::DebianOriginalSourceTarball,
2933        );
2934
2935        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
2936        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
2937        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
2938    }
2939
2940    #[test]
2941    fn test_parse_source_tarball_invalid_format() {
2942        let pkg = parse_source_tarball_filename(
2943            "invalid-no-underscore.tar.gz",
2944            DatasourceId::DebianOriginalSourceTarball,
2945        );
2946        assert!(pkg.name.is_none());
2947        assert!(pkg.version.is_none());
2948    }
2949
2950    #[test]
2951    fn test_list_parser_is_match() {
2952        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2953            "/var/lib/dpkg/info/bash.list"
2954        )));
2955        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
2956            "/var/lib/dpkg/info/package:amd64.list"
2957        )));
2958        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2959            "bash.list"
2960        )));
2961        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
2962            "/var/lib/dpkg/info/bash.md5sums"
2963        )));
2964    }
2965
2966    #[test]
2967    fn test_md5sums_parser_is_match() {
2968        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2969            "/var/lib/dpkg/info/bash.md5sums"
2970        )));
2971        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2972            "/var/lib/dpkg/info/package:amd64.md5sums"
2973        )));
2974        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2975            "bash.md5sums"
2976        )));
2977        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
2978            "/var/lib/dpkg/info/bash.list"
2979        )));
2980    }
2981
2982    #[test]
2983    fn test_parse_debian_file_list_plain_list() {
2984        let content = "/.
2985/bin
2986/bin/bash
2987/usr/bin/bashbug
2988/usr/share/doc/bash/README
2989";
2990        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
2991        assert_eq!(pkg.name, Some("bash".to_string()));
2992        assert_eq!(pkg.file_references.len(), 3);
2993        assert_eq!(pkg.file_references[0].path, "/bin/bash");
2994        assert_eq!(pkg.file_references[0].md5, None);
2995        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
2996        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
2997    }
2998
2999    #[test]
3000    fn test_parse_debian_file_list_md5sums() {
3001        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
30021c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3003f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3004";
3005        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3006        assert_eq!(pkg.name, Some("bash".to_string()));
3007        assert_eq!(pkg.file_references.len(), 3);
3008        assert_eq!(pkg.file_references[0].path, "bin/bash");
3009        assert_eq!(
3010            pkg.file_references[0].md5,
3011            Some("77506afebd3b7e19e937a678a185b62e".to_string())
3012        );
3013        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3014        assert_eq!(
3015            pkg.file_references[1].md5,
3016            Some("1c77d2031971b4e4c512ac952102cd85".to_string())
3017        );
3018    }
3019
3020    #[test]
3021    fn test_parse_debian_file_list_with_arch() {
3022        let content = "/usr/bin/foo
3023/usr/lib/x86_64-linux-gnu/libfoo.so
3024";
3025        let pkg = parse_debian_file_list(
3026            content,
3027            "libfoo:amd64",
3028            DatasourceId::DebianInstalledFilesList,
3029        );
3030        assert_eq!(pkg.name, Some("libfoo".to_string()));
3031        assert!(pkg.purl.is_some());
3032        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3033        assert_eq!(pkg.file_references.len(), 2);
3034    }
3035
3036    #[test]
3037    fn test_parse_debian_file_list_skips_comments_and_empty() {
3038        let content = "# This is a comment
3039/bin/bash
3040
3041/usr/bin/bashbug
3042  
3043";
3044        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3045        assert_eq!(pkg.file_references.len(), 2);
3046    }
3047
3048    #[test]
3049    fn test_parse_debian_file_list_md5sums_only() {
3050        let content = "abc123  usr/bin/tool
3051";
3052        let pkg =
3053            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3054        assert_eq!(pkg.name, None);
3055        assert_eq!(pkg.file_references.len(), 1);
3056    }
3057
3058    #[test]
3059    fn test_parse_debian_file_list_ignores_root_dirs() {
3060        let content = "/.
3061/bin
3062/bin/bash
3063/etc
3064/usr
3065/var
3066";
3067        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3068        assert_eq!(pkg.file_references.len(), 1);
3069        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3070    }
3071
3072    #[test]
3073    fn test_copyright_parser_is_match() {
3074        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3075            "/usr/share/doc/bash/copyright"
3076        )));
3077        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3078            "debian/copyright"
3079        )));
3080        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3081            "copyright.txt"
3082        )));
3083        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3084            "/etc/copyright"
3085        )));
3086    }
3087
3088    #[test]
3089    fn test_extract_package_name_from_path() {
3090        assert_eq!(
3091            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3092            Some("bash".to_string())
3093        );
3094        assert_eq!(
3095            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3096            Some("libseccomp2".to_string())
3097        );
3098        assert_eq!(
3099            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3100            None
3101        );
3102    }
3103
3104    #[test]
3105    fn test_parse_copyright_dep5_format() {
3106        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3107Upstream-Name: libseccomp
3108Source: https://sourceforge.net/projects/libseccomp/
3109
3110Files: *
3111Copyright: 2012 Paul Moore <pmoore@redhat.com>
3112 2012 Ashley Lai <adlai@us.ibm.com>
3113License: LGPL-2.1
3114
3115License: LGPL-2.1
3116 This library is free software
3117";
3118        let pkg = parse_copyright_file(content, Some("libseccomp"));
3119        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3120        assert_eq!(pkg.namespace, Some("debian".to_string()));
3121        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3122        assert_eq!(
3123            pkg.extracted_license_statement,
3124            Some("LGPL-2.1".to_string())
3125        );
3126        assert!(pkg.parties.len() >= 2);
3127        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3128        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3129    }
3130
3131    #[test]
3132    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3133        let path = PathBuf::from(
3134            "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3135        );
3136        let pkg = DebianCopyrightParser::extract_first_package(&path);
3137
3138        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3139        let extracted = pkg
3140            .extracted_license_statement
3141            .as_deref()
3142            .expect("license statement should exist");
3143        assert!(extracted.contains("GPL-2+"));
3144        assert!(!pkg.license_detections.is_empty());
3145
3146        let primary = &pkg.license_detections[0];
3147        assert_eq!(
3148            primary.matches[0].matched_text.as_deref(),
3149            Some("License: GPL-2+")
3150        );
3151        assert_eq!(primary.matches[0].start_line, 47);
3152        assert_eq!(primary.matches[0].end_line, 47);
3153    }
3154
3155    #[test]
3156    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3157        let path = PathBuf::from("testdata/debian/copyright/copyright");
3158        let pkg = DebianCopyrightParser::extract_first_package(&path);
3159
3160        assert_eq!(pkg.license_detections.len(), 1);
3161        assert_eq!(pkg.other_license_detections.len(), 4);
3162
3163        let primary = &pkg.license_detections[0];
3164        assert_eq!(
3165            primary.matches[0].matched_text.as_deref(),
3166            Some("License: LGPL-2.1")
3167        );
3168        assert_eq!(primary.matches[0].start_line, 11);
3169
3170        let ordered_lines: Vec<usize> = pkg
3171            .other_license_detections
3172            .iter()
3173            .map(|detection| detection.matches[0].start_line)
3174            .collect();
3175        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3176
3177        let ordered_texts: Vec<&str> = pkg
3178            .other_license_detections
3179            .iter()
3180            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3181            .collect();
3182        assert_eq!(
3183            ordered_texts,
3184            vec![
3185                "License: LGPL-2.1",
3186                "License: LGPL-2.1",
3187                "License: LGPL-2.1",
3188                "License: LGPL-2.1",
3189            ]
3190        );
3191    }
3192
3193    #[test]
3194    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3195        let path = PathBuf::from(
3196            "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-2019-11-15/main/c/clamav/stable_copyright",
3197        );
3198        let pkg = DebianCopyrightParser::extract_first_package(&path);
3199
3200        let zlib = pkg
3201            .other_license_detections
3202            .iter()
3203            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3204            .expect("at least one Zlib license paragraph should be detected");
3205        assert_eq!(
3206            zlib.matches[0].matched_text.as_deref(),
3207            Some("License: Zlib")
3208        );
3209
3210        let last_zlib = pkg
3211            .other_license_detections
3212            .iter()
3213            .rev()
3214            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3215            .expect("bottom standalone Zlib license paragraph should be detected");
3216        assert_eq!(last_zlib.matches[0].start_line, 732);
3217        assert_eq!(last_zlib.matches[0].end_line, 732);
3218    }
3219
3220    #[test]
3221    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3222        let path = PathBuf::from(
3223            "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/crafted_for_tests/test_license_nameless",
3224        );
3225        let pkg = DebianCopyrightParser::extract_first_package(&path);
3226
3227        assert_eq!(pkg.license_detections.len(), 1);
3228        let primary = &pkg.license_detections[0];
3229        assert_eq!(
3230            primary.matches[0].matched_text.as_deref(),
3231            Some("License: LGPL-3+ or GPL-2+")
3232        );
3233        assert_eq!(primary.matches[0].start_line, 8);
3234        assert_eq!(primary.matches[0].end_line, 8);
3235
3236        assert!(pkg.other_license_detections.iter().any(|detection| {
3237            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3238        }));
3239    }
3240
3241    #[test]
3242    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3243        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3244        let pkg = parse_copyright_file(content, Some("foo"));
3245
3246        assert_eq!(pkg.license_detections.len(), 1);
3247        let primary = &pkg.license_detections[0];
3248        assert_eq!(
3249            primary.matches[0].matched_text.as_deref(),
3250            Some("License: GPL-2+")
3251        );
3252        assert_eq!(primary.matches[0].start_line, 7);
3253    }
3254
3255    #[test]
3256    #[ignore = "performance probe for Debian copyright parsing"]
3257    fn test_debian_copyright_perf_guardrail_large_dep5_fixtures() {
3258        use std::hint::black_box;
3259        use std::time::Instant;
3260
3261        let fixtures = [
3262            (
3263                "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3264                Some("bsdutils"),
3265                47usize,
3266            ),
3267            (
3268                "reference/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-2019-11-15/main/c/clamav/stable_copyright",
3269                Some("clamav"),
3270                47usize,
3271            ),
3272        ];
3273
3274        let iterations = 100usize;
3275        let start = Instant::now();
3276
3277        for _ in 0..iterations {
3278            for (path, package_name, expected_line) in fixtures {
3279                let content =
3280                    read_file_to_string(Path::new(path)).expect("fixture should be readable");
3281                let pkg = black_box(parse_copyright_file(&content, package_name));
3282                assert!(!pkg.license_detections.is_empty());
3283                assert_eq!(
3284                    pkg.license_detections[0].matches[0].start_line,
3285                    expected_line
3286                );
3287            }
3288        }
3289
3290        eprintln!(
3291            "Debian copyright perf probe: parsed {} fixtures x {} iterations in {:?}",
3292            fixtures.len(),
3293            iterations,
3294            start.elapsed()
3295        );
3296    }
3297
3298    #[test]
3299    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3300        let raw_lines = vec![
3301            "Files: *".to_string(),
3302            "Copyright: 2024 Example Org".to_string(),
3303            "License: Apache-2.0".to_string(),
3304            " Licensed under the Apache License, Version 2.0.".to_string(),
3305        ];
3306
3307        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3308        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3309            .into_iter()
3310            .next()
3311            .expect("reference RFC822 paragraph should parse");
3312
3313        assert_eq!(paragraph.metadata.headers, expected.headers);
3314        assert_eq!(paragraph.metadata.body, expected.body);
3315        assert_eq!(
3316            paragraph.license_header_line,
3317            Some(("License: Apache-2.0".to_string(), 12))
3318        );
3319    }
3320
3321    #[test]
3322    fn test_parse_copyright_unstructured() {
3323        let content = "This package was debianized by John Doe.
3324
3325Upstream Authors:
3326    Jane Smith
3327
3328Copyright:
3329    2009 10gen
3330
3331License:
3332    SSPL
3333";
3334        let pkg = parse_copyright_file(content, Some("mongodb"));
3335        assert_eq!(pkg.name, Some("mongodb".to_string()));
3336        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3337        assert!(!pkg.parties.is_empty());
3338    }
3339
3340    #[test]
3341    fn test_parse_copyright_holders() {
3342        let text = "2012 Paul Moore <pmoore@redhat.com>
33432012 Ashley Lai <adlai@us.ibm.com>
3344Copyright (C) 2015-2018 Example Corp";
3345        let holders = parse_copyright_holders(text);
3346        assert!(holders.len() >= 3);
3347        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3348        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3349    }
3350
3351    #[test]
3352    fn test_parse_copyright_empty() {
3353        let content = "This is just some text without proper copyright info.";
3354        let pkg = parse_copyright_file(content, Some("test"));
3355        assert_eq!(pkg.name, Some("test".to_string()));
3356        assert!(pkg.parties.is_empty());
3357        assert!(pkg.extracted_license_statement.is_none());
3358    }
3359
3360    #[test]
3361    fn test_deb_parser_is_match() {
3362        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3363        assert!(DebianDebParser::is_match(&PathBuf::from(
3364            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3365        )));
3366        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3367        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3368    }
3369
3370    #[test]
3371    fn test_parse_deb_filename_with_arch() {
3372        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3373        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3374        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3375        assert_eq!(pkg.namespace, Some("debian".to_string()));
3376        assert_eq!(
3377            pkg.purl,
3378            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3379        );
3380        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3381    }
3382
3383    #[test]
3384    fn test_parse_deb_filename_without_arch() {
3385        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3386        assert_eq!(pkg.name, Some("package".to_string()));
3387        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3388        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3389    }
3390
3391    #[test]
3392    fn test_extract_deb_archive() {
3393        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3394        if !test_path.exists() {
3395            return;
3396        }
3397
3398        let pkg = DebianDebParser::extract_first_package(&test_path);
3399
3400        assert_eq!(pkg.name, Some("adduser".to_string()));
3401        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3402        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3403        assert!(pkg.description.is_some());
3404        assert!(!pkg.parties.is_empty());
3405
3406        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3407        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3408    }
3409
3410    #[test]
3411    fn test_extract_deb_archive_with_control_tar_xz() {
3412        let deb = create_synthetic_deb_with_control_tar_xz();
3413
3414        let pkg = DebianDebParser::extract_first_package(deb.path());
3415
3416        assert_eq!(pkg.name, Some("synthetic".to_string()));
3417        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3418        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3419        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3420    }
3421
3422    #[test]
3423    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3424        let deb = create_synthetic_deb_with_copyright();
3425
3426        let pkg = DebianDebParser::extract_first_package(deb.path());
3427
3428        assert_eq!(pkg.name, Some("synthetic".to_string()));
3429        assert_eq!(
3430            pkg.extracted_license_statement,
3431            Some("Apache-2.0".to_string())
3432        );
3433        assert!(pkg.parties.iter().any(|party| {
3434            party.role.as_deref() == Some("copyright-holder")
3435                && party.name.as_deref() == Some("Example Org")
3436        }));
3437    }
3438
3439    #[test]
3440    fn test_parse_deb_filename_simple() {
3441        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3442        assert_eq!(pkg.name, Some("adduser".to_string()));
3443        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3444        assert_eq!(pkg.namespace, Some("debian".to_string()));
3445    }
3446
3447    #[test]
3448    fn test_parse_deb_filename_invalid() {
3449        let pkg = parse_deb_filename("invalid.deb");
3450        assert!(pkg.name.is_none());
3451        assert!(pkg.version.is_none());
3452    }
3453
3454    #[test]
3455    fn test_distroless_parser() {
3456        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3457
3458        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3459
3460        if !test_file.exists() {
3461            eprintln!("Warning: Test file not found, skipping test");
3462            return;
3463        }
3464
3465        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3466
3467        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3468        assert_eq!(
3469            pkg.datasource_id,
3470            Some(DatasourceId::DebianDistrolessInstalledDb)
3471        );
3472        assert_eq!(pkg.name, Some("base-files".to_string()));
3473        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3474        assert_eq!(pkg.namespace, Some("debian".to_string()));
3475        assert!(pkg.purl.is_some());
3476        assert!(
3477            pkg.purl
3478                .as_ref()
3479                .unwrap()
3480                .contains("pkg:deb/debian/base-files")
3481        );
3482    }
3483}