Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35use std::sync::LazyLock;
36
37use crate::parser_warn as warn;
38use packageurl::PackageUrl;
39use regex::Regex;
40
41use crate::models::{
42    DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
43    PackageType, Party,
44};
45use crate::parsers::rfc822::{self, Rfc822Metadata};
46use crate::parsers::utils::{
47    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
48};
49use crate::utils::spdx::combine_license_expressions;
50
51use super::PackageParser;
52use super::license_normalization::{
53    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
54    normalize_declared_license_key,
55};
56
57const PACKAGE_TYPE: PackageType = PackageType::Deb;
58
59const MAX_ARCHIVE_SIZE: u64 = 1024 * 1024 * 1024;
60const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
61const MAX_COMPRESSION_RATIO: usize = 100;
62
63static DEP_RE: LazyLock<Regex> = LazyLock::new(|| {
64    Regex::new(
65        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
66    )
67    .expect("compile-time constant dependency regex")
68});
69
70fn default_package_data(datasource_id: DatasourceId) -> PackageData {
71    PackageData {
72        package_type: Some(PACKAGE_TYPE),
73        datasource_id: Some(datasource_id),
74        ..Default::default()
75    }
76}
77
78// Namespace detection clues from version strings
79const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
80const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
81
82// Namespace detection clues from maintainer fields
83const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
84    "packages.debian.org",
85    "lists.debian.org",
86    "lists.alioth.debian.org",
87    "@debian.org",
88    "debian-init-diversity@",
89];
90const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
91
92// Dependency field names and their scope/flags
93struct DepFieldSpec {
94    field: &'static str,
95    scope: &'static str,
96    is_runtime: bool,
97    is_optional: bool,
98}
99
100const DEP_FIELDS: &[DepFieldSpec] = &[
101    DepFieldSpec {
102        field: "depends",
103        scope: "depends",
104        is_runtime: true,
105        is_optional: false,
106    },
107    DepFieldSpec {
108        field: "pre-depends",
109        scope: "pre-depends",
110        is_runtime: true,
111        is_optional: false,
112    },
113    DepFieldSpec {
114        field: "recommends",
115        scope: "recommends",
116        is_runtime: true,
117        is_optional: true,
118    },
119    DepFieldSpec {
120        field: "suggests",
121        scope: "suggests",
122        is_runtime: true,
123        is_optional: true,
124    },
125    DepFieldSpec {
126        field: "breaks",
127        scope: "breaks",
128        is_runtime: false,
129        is_optional: false,
130    },
131    DepFieldSpec {
132        field: "conflicts",
133        scope: "conflicts",
134        is_runtime: false,
135        is_optional: false,
136    },
137    DepFieldSpec {
138        field: "replaces",
139        scope: "replaces",
140        is_runtime: false,
141        is_optional: false,
142    },
143    DepFieldSpec {
144        field: "provides",
145        scope: "provides",
146        is_runtime: false,
147        is_optional: false,
148    },
149    DepFieldSpec {
150        field: "build-depends",
151        scope: "build-depends",
152        is_runtime: false,
153        is_optional: false,
154    },
155    DepFieldSpec {
156        field: "build-depends-indep",
157        scope: "build-depends-indep",
158        is_runtime: false,
159        is_optional: false,
160    },
161    DepFieldSpec {
162        field: "build-conflicts",
163        scope: "build-conflicts",
164        is_runtime: false,
165        is_optional: false,
166    },
167];
168
169// ---------------------------------------------------------------------------
170// DebianControlParser: debian/control files (source + binary paragraphs)
171// ---------------------------------------------------------------------------
172
173pub struct DebianControlParser;
174
175impl PackageParser for DebianControlParser {
176    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
177
178    fn is_match(path: &Path) -> bool {
179        if let Some(name) = path.file_name()
180            && name == "control"
181            && let Some(parent) = path.parent()
182            && let Some(parent_name) = parent.file_name()
183        {
184            return parent_name == "debian";
185        }
186        false
187    }
188
189    fn extract_packages(path: &Path) -> Vec<PackageData> {
190        let content = match read_file_to_string(path, None) {
191            Ok(c) => c,
192            Err(e) => {
193                warn!("Failed to read debian/control at {:?}: {}", path, e);
194                return vec![default_package_data(DatasourceId::DebianControlInSource)];
195            }
196        };
197
198        let packages = parse_debian_control(&content);
199        if packages.is_empty() {
200            vec![default_package_data(DatasourceId::DebianControlInSource)]
201        } else {
202            packages
203        }
204    }
205}
206
207// ---------------------------------------------------------------------------
208// DebianInstalledParser: /var/lib/dpkg/status
209// ---------------------------------------------------------------------------
210
211pub struct DebianInstalledParser;
212
213impl PackageParser for DebianInstalledParser {
214    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216    fn is_match(path: &Path) -> bool {
217        let path_str = path.to_string_lossy();
218        path_str.ends_with("var/lib/dpkg/status")
219    }
220
221    fn extract_packages(path: &Path) -> Vec<PackageData> {
222        let content = match read_file_to_string(path, None) {
223            Ok(c) => c,
224            Err(e) => {
225                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
226                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
227            }
228        };
229
230        let packages = parse_dpkg_status(&content);
231        if packages.is_empty() {
232            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
233        } else {
234            packages
235        }
236    }
237}
238
239pub struct DebianDistrolessInstalledParser;
240
241impl PackageParser for DebianDistrolessInstalledParser {
242    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
243
244    fn is_match(path: &Path) -> bool {
245        let path_str = path.to_string_lossy();
246        path_str.contains("var/lib/dpkg/status.d/")
247    }
248
249    fn extract_packages(path: &Path) -> Vec<PackageData> {
250        let content = match read_file_to_string(path, None) {
251            Ok(c) => c,
252            Err(e) => {
253                warn!("Failed to read distroless status file at {:?}: {}", path, e);
254                return vec![default_package_data(
255                    DatasourceId::DebianDistrolessInstalledDb,
256                )];
257            }
258        };
259
260        vec![parse_distroless_status(&content)]
261    }
262}
263
264fn parse_distroless_status(content: &str) -> PackageData {
265    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
266
267    if paragraphs.is_empty() {
268        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
269    }
270
271    build_package_from_paragraph(
272        &paragraphs[0],
273        None,
274        DatasourceId::DebianDistrolessInstalledDb,
275    )
276    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
277}
278
279// ---------------------------------------------------------------------------
280// Parsing logic
281// ---------------------------------------------------------------------------
282
283/// Parses a debian/control file into PackageData entries.
284///
285/// A debian/control file has a Source paragraph followed by one or more Binary
286/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
287/// into each binary package.
288fn parse_debian_control(content: &str) -> Vec<PackageData> {
289    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
290    if paragraphs.is_empty() {
291        return Vec::new();
292    }
293
294    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
295
296    let (source_paragraph, binary_start) = if has_source {
297        (Some(&paragraphs[0]), 1)
298    } else {
299        (None, 0)
300    };
301
302    let source_meta = source_paragraph.map(extract_source_meta);
303
304    let mut packages = Vec::new();
305    let mut count = 0usize;
306
307    for para in &paragraphs[binary_start..] {
308        count += 1;
309        if count > MAX_ITERATION_COUNT {
310            warn!("parse_debian_control: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
311            break;
312        }
313        if let Some(pkg) = build_package_from_paragraph(
314            para,
315            source_meta.as_ref(),
316            DatasourceId::DebianControlInSource,
317        ) {
318            packages.push(pkg);
319        }
320    }
321
322    if packages.is_empty()
323        && let Some(source_para) = source_paragraph
324        && let Some(pkg) = build_package_from_source_paragraph(source_para)
325    {
326        packages.push(pkg);
327    }
328
329    packages
330}
331
332/// Parses a dpkg/status file into PackageData entries.
333///
334/// Each paragraph represents an installed package. Only packages with
335/// `Status: install ok installed` are included.
336fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
337    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
338    let mut packages = Vec::new();
339    let mut count = 0usize;
340
341    for para in &paragraphs {
342        count += 1;
343        if count > MAX_ITERATION_COUNT {
344            warn!("parse_dpkg_status: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
345            break;
346        }
347        let status = rfc822::get_header_first(&para.headers, "status");
348        if status.as_deref() != Some("install ok installed") {
349            continue;
350        }
351
352        if let Some(pkg) =
353            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
354        {
355            packages.push(pkg);
356        }
357    }
358
359    packages
360}
361
362// ---------------------------------------------------------------------------
363// Source paragraph metadata (shared across binary packages)
364// ---------------------------------------------------------------------------
365
366struct SourceMeta {
367    parties: Vec<Party>,
368    homepage_url: Option<String>,
369    vcs_url: Option<String>,
370    code_view_url: Option<String>,
371    bug_tracking_url: Option<String>,
372}
373
374fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
375    let mut parties = Vec::new();
376
377    // Maintainer
378    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
379        let (name, email) = split_name_email(&maintainer);
380        parties.push(Party {
381            r#type: Some("person".to_string()),
382            role: Some("maintainer".to_string()),
383            name,
384            email,
385            url: None,
386            organization: None,
387            organization_url: None,
388            timezone: None,
389        });
390    }
391
392    // Original-Maintainer
393    if let Some(orig_maintainer) =
394        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
395    {
396        let (name, email) = split_name_email(&orig_maintainer);
397        parties.push(Party {
398            r#type: Some("person".to_string()),
399            role: Some("maintainer".to_string()),
400            name,
401            email,
402            url: None,
403            organization: None,
404            organization_url: None,
405            timezone: None,
406        });
407    }
408
409    // Uploaders (comma-separated)
410    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
411        for uploader in uploaders_str.split(',') {
412            let trimmed = uploader.trim();
413            if !trimmed.is_empty() {
414                let (name, email) = split_name_email(trimmed);
415                parties.push(Party {
416                    r#type: Some("person".to_string()),
417                    role: Some("uploader".to_string()),
418                    name,
419                    email,
420                    url: None,
421                    organization: None,
422                    organization_url: None,
423                    timezone: None,
424                });
425            }
426        }
427    }
428
429    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
430
431    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
432        .map(|url| truncate_field(url.split_whitespace().next().unwrap_or(&url).to_string()));
433
434    let code_view_url =
435        rfc822::get_header_first(&paragraph.headers, "vcs-browser").map(truncate_field);
436
437    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs").map(truncate_field);
438
439    SourceMeta {
440        parties,
441        homepage_url,
442        vcs_url,
443        code_view_url,
444        bug_tracking_url,
445    }
446}
447
448// ---------------------------------------------------------------------------
449// Package building
450// ---------------------------------------------------------------------------
451
452fn build_package_from_paragraph(
453    paragraph: &Rfc822Metadata,
454    source_meta: Option<&SourceMeta>,
455    datasource_id: DatasourceId,
456) -> Option<PackageData> {
457    let name = rfc822::get_header_first(&paragraph.headers, "package").map(truncate_field)?;
458    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
459    let architecture =
460        rfc822::get_header_first(&paragraph.headers, "architecture").map(truncate_field);
461    let description =
462        rfc822::get_header_first(&paragraph.headers, "description").map(truncate_field);
463    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
464    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
465    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
466    let section = rfc822::get_header_first(&paragraph.headers, "section");
467    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
468    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
469
470    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
471
472    // Build parties: use source_meta parties if available, otherwise parse from paragraph
473    let parties = if let Some(meta) = source_meta {
474        meta.parties.clone()
475    } else {
476        let mut p = Vec::new();
477        if let Some(m) = &maintainer_str {
478            let (n, e) = split_name_email(m);
479            p.push(Party {
480                r#type: Some("person".to_string()),
481                role: Some("maintainer".to_string()),
482                name: n,
483                email: e,
484                url: None,
485                organization: None,
486                organization_url: None,
487                timezone: None,
488            });
489        }
490        p
491    };
492
493    // Resolve homepage: paragraph's own, or from source metadata
494    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
495    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
496    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
497    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
498
499    // Build PURL
500    let purl = build_debian_purl(
501        &name,
502        version.as_deref(),
503        namespace.as_deref(),
504        architecture.as_deref(),
505    );
506
507    // Parse dependencies from all dependency fields
508    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
509
510    // Keywords from section
511    let keywords = section.into_iter().collect();
512
513    // Source packages
514    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
515
516    // Extra data
517    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
518    if let Some(ma) = &multi_arch
519        && !ma.is_empty()
520    {
521        extra_data.insert(
522            "multi_arch".to_string(),
523            serde_json::Value::String(ma.clone()),
524        );
525    }
526    if let Some(size_str) = &installed_size
527        && let Ok(size) = size_str.parse::<u64>()
528    {
529        extra_data.insert(
530            "installed_size".to_string(),
531            serde_json::Value::Number(serde_json::Number::from(size)),
532        );
533    }
534
535    // Qualifiers for architecture
536    let qualifiers = architecture.as_ref().map(|arch| {
537        let mut q = HashMap::new();
538        q.insert("arch".to_string(), arch.clone());
539        q
540    });
541
542    Some(PackageData {
543        package_type: Some(PACKAGE_TYPE),
544        namespace: namespace.clone(),
545        name: Some(name),
546        version,
547        qualifiers,
548        subpath: None,
549        primary_language: None,
550        description,
551        release_date: None,
552        parties,
553        keywords,
554        homepage_url,
555        download_url: None,
556        size: None,
557        sha1: None,
558        md5: None,
559        sha256: None,
560        sha512: None,
561        bug_tracking_url,
562        code_view_url,
563        vcs_url,
564        copyright: None,
565        holder: None,
566        declared_license_expression: None,
567        declared_license_expression_spdx: None,
568        license_detections: Vec::new(),
569        other_license_expression: None,
570        other_license_expression_spdx: None,
571        other_license_detections: Vec::new(),
572        extracted_license_statement: None,
573        notice_text: None,
574        source_packages,
575        file_references: Vec::new(),
576        is_private: false,
577        is_virtual: false,
578        extra_data: if extra_data.is_empty() {
579            None
580        } else {
581            Some(extra_data)
582        },
583        dependencies,
584        repository_homepage_url: None,
585        repository_download_url: None,
586        api_data_url: None,
587        datasource_id: Some(datasource_id),
588        purl,
589    })
590}
591
592fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
593    let name = rfc822::get_header_first(&paragraph.headers, "source").map(truncate_field)?;
594    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
595    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
596
597    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
598    let source_meta = extract_source_meta(paragraph);
599
600    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
601    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
602
603    let section = rfc822::get_header_first(&paragraph.headers, "section");
604    let keywords = section.into_iter().collect();
605
606    Some(PackageData {
607        package_type: Some(PACKAGE_TYPE),
608        namespace: namespace.clone(),
609        name: Some(name),
610        version,
611        qualifiers: None,
612        subpath: None,
613        primary_language: None,
614        description: None,
615        release_date: None,
616        parties: source_meta.parties,
617        keywords,
618        homepage_url: source_meta.homepage_url,
619        download_url: None,
620        size: None,
621        sha1: None,
622        md5: None,
623        sha256: None,
624        sha512: None,
625        bug_tracking_url: source_meta.bug_tracking_url,
626        code_view_url: source_meta.code_view_url,
627        vcs_url: source_meta.vcs_url,
628        copyright: None,
629        holder: None,
630        declared_license_expression: None,
631        declared_license_expression_spdx: None,
632        license_detections: Vec::new(),
633        other_license_expression: None,
634        other_license_expression_spdx: None,
635        other_license_detections: Vec::new(),
636        extracted_license_statement: None,
637        notice_text: None,
638        source_packages: Vec::new(),
639        file_references: Vec::new(),
640        is_private: false,
641        is_virtual: false,
642        extra_data: None,
643        dependencies,
644        repository_homepage_url: None,
645        repository_download_url: None,
646        api_data_url: None,
647        datasource_id: Some(DatasourceId::DebianControlInSource),
648        purl,
649    })
650}
651
652// ---------------------------------------------------------------------------
653// Namespace detection
654// ---------------------------------------------------------------------------
655
656fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
657    // Check version clues first
658    if let Some(ver) = version {
659        let ver_lower = ver.to_lowercase();
660        for clue in VERSION_CLUES_UBUNTU {
661            if ver_lower.contains(clue) {
662                return Some("ubuntu".to_string());
663            }
664        }
665        for clue in VERSION_CLUES_DEBIAN {
666            if ver_lower.contains(clue) {
667                return Some("debian".to_string());
668            }
669        }
670    }
671
672    // Check maintainer clues
673    if let Some(maint) = maintainer {
674        let maint_lower = maint.to_lowercase();
675        for clue in MAINTAINER_CLUES_UBUNTU {
676            if maint_lower.contains(clue) {
677                return Some("ubuntu".to_string());
678            }
679        }
680        for clue in MAINTAINER_CLUES_DEBIAN {
681            if maint_lower.contains(clue) {
682                return Some("debian".to_string());
683            }
684        }
685    }
686
687    // Default to debian
688    Some("debian".to_string())
689}
690
691// ---------------------------------------------------------------------------
692// PURL generation
693// ---------------------------------------------------------------------------
694
695fn build_debian_purl(
696    name: &str,
697    version: Option<&str>,
698    namespace: Option<&str>,
699    architecture: Option<&str>,
700) -> Option<String> {
701    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
702
703    if let Some(ns) = namespace {
704        purl.with_namespace(ns).ok()?;
705    }
706
707    if let Some(ver) = version {
708        purl.with_version(ver).ok()?;
709    }
710
711    if let Some(arch) = architecture {
712        purl.add_qualifier("arch", arch).ok()?;
713    }
714
715    Some(purl.to_string())
716}
717
718// ---------------------------------------------------------------------------
719// Dependency parsing
720// ---------------------------------------------------------------------------
721
722fn parse_all_dependencies(
723    headers: &HashMap<String, Vec<String>>,
724    namespace: Option<&str>,
725) -> Vec<Dependency> {
726    let mut dependencies = Vec::new();
727
728    for spec in DEP_FIELDS {
729        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
730            dependencies.extend(parse_dependency_field(
731                &dep_str,
732                spec.scope,
733                spec.is_runtime,
734                spec.is_optional,
735                namespace,
736            ));
737        }
738    }
739
740    dependencies
741}
742
743/// Parses a Debian dependency field value.
744///
745/// Debian dependencies are comma-separated, with optional version constraints
746/// in parentheses and alternative packages separated by `|`.
747///
748/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
749///
750/// Alternatives (|) are treated as separate optional dependencies.
751fn parse_dependency_field(
752    dep_str: &str,
753    scope: &str,
754    is_runtime: bool,
755    is_optional: bool,
756    namespace: Option<&str>,
757) -> Vec<Dependency> {
758    let mut deps = Vec::new();
759
760    for group in dep_str.split(',').take(MAX_ITERATION_COUNT) {
761        let group = group.trim();
762        if group.is_empty() {
763            continue;
764        }
765
766        let alternatives: Vec<&str> = group.split('|').collect();
767        let has_alternatives = alternatives.len() > 1;
768
769        for alt in alternatives {
770            let alt = alt.trim();
771            if alt.is_empty() {
772                continue;
773            }
774
775            if let Some(caps) = DEP_RE.captures(alt) {
776                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
777                let operator = caps.get(2).map(|m| m.as_str().trim());
778                let version = caps.get(3).map(|m| m.as_str().trim());
779
780                if pkg_name.is_empty() {
781                    continue;
782                }
783
784                if pkg_name.starts_with('$') {
785                    continue;
786                }
787
788                let extracted_requirement = match (operator, version) {
789                    (Some(op), Some(ver)) => Some(truncate_field(format!("{} {}", op, ver))),
790                    _ => None,
791                };
792
793                let is_pinned = operator.map(|op| op == "=");
794
795                let purl = build_debian_purl(pkg_name, None, namespace, None);
796
797                deps.push(Dependency {
798                    purl,
799                    extracted_requirement,
800                    scope: Some(scope.to_string()),
801                    is_runtime: Some(is_runtime),
802                    is_optional: Some(is_optional || has_alternatives),
803                    is_pinned,
804                    is_direct: Some(true),
805                    resolved_package: None,
806                    extra_data: None,
807                });
808            }
809        }
810    }
811
812    deps
813}
814
815// ---------------------------------------------------------------------------
816// Source field parsing
817// ---------------------------------------------------------------------------
818
819/// Parses the Source field which may contain a version in parentheses.
820///
821/// Format: `source-name` or `source-name (version)`
822fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
823    let Some(source_str) = source else {
824        return Vec::new();
825    };
826
827    let trimmed = source_str.trim();
828    if trimmed.is_empty() {
829        return Vec::new();
830    }
831
832    // Extract name and optional version from "name (version)" format
833    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
834        let name = trimmed[..paren_start].trim();
835        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
836        (
837            name,
838            if version.is_empty() {
839                None
840            } else {
841                Some(version)
842            },
843        )
844    } else {
845        (trimmed, None)
846    };
847
848    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
849        vec![purl]
850    } else {
851        Vec::new()
852    }
853}
854
855// ---------------------------------------------------------------------------
856// Parser registration macros
857// ---------------------------------------------------------------------------
858
859crate::register_parser!(
860    "Debian source package control file (debian/control)",
861    &["**/debian/control"],
862    "deb",
863    "",
864    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
865);
866
867crate::register_parser!(
868    "Debian installed package database (dpkg status)",
869    &["**/var/lib/dpkg/status"],
870    "deb",
871    "",
872    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
873);
874
875crate::register_parser!(
876    "Debian distroless package database (status.d)",
877    &["**/var/lib/dpkg/status.d/*"],
878    "deb",
879    "",
880    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
881);
882
883// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
884// but we register it for the single-package interface too.
885
886// ============================================================================
887// WAVE 2 PARSERS: Additional Debian Format Support
888// ============================================================================
889
890/// Parser for Debian Source Control (.dsc) files
891pub struct DebianDscParser;
892
893impl PackageParser for DebianDscParser {
894    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
895
896    fn is_match(path: &Path) -> bool {
897        path.extension().and_then(|e| e.to_str()) == Some("dsc")
898    }
899
900    fn extract_packages(path: &Path) -> Vec<PackageData> {
901        let content = match read_file_to_string(path, None) {
902            Ok(c) => c,
903            Err(e) => {
904                warn!("Failed to read .dsc file {:?}: {}", path, e);
905                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
906            }
907        };
908
909        vec![parse_dsc_content(&content)]
910    }
911}
912
913crate::register_parser!(
914    "Debian source control file (.dsc)",
915    &["**/*.dsc"],
916    "deb",
917    "",
918    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
919);
920
921fn strip_pgp_signature(content: &str) -> String {
922    let mut result = String::new();
923    let mut in_pgp_block = false;
924    let mut in_signature = false;
925    let mut count = 0usize;
926
927    for line in content.lines() {
928        count += 1;
929        if count > MAX_ITERATION_COUNT {
930            warn!("strip_pgp_signature: exceeded MAX_ITERATION_COUNT lines, stopping");
931            break;
932        }
933        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
934            in_pgp_block = true;
935            continue;
936        }
937        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
938            in_signature = true;
939            continue;
940        }
941        if line.starts_with("-----END PGP SIGNATURE-----") {
942            in_signature = false;
943            continue;
944        }
945        if in_pgp_block && line.starts_with("Hash:") {
946            continue;
947        }
948        if in_pgp_block && line.is_empty() && result.is_empty() {
949            in_pgp_block = false;
950            continue;
951        }
952        if !in_signature {
953            result.push_str(line);
954            result.push('\n');
955        }
956    }
957
958    result
959}
960
961fn parse_dsc_content(content: &str) -> PackageData {
962    let clean_content = strip_pgp_signature(content);
963    let metadata = rfc822::parse_rfc822_content(&clean_content);
964    let headers = &metadata.headers;
965
966    let name = rfc822::get_header_first(headers, "source").map(truncate_field);
967    let version = rfc822::get_header_first(headers, "version").map(truncate_field);
968    let architecture = rfc822::get_header_first(headers, "architecture").map(truncate_field);
969    let namespace = Some("debian".to_string());
970
971    let mut package = PackageData {
972        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
973        package_type: Some(PACKAGE_TYPE),
974        namespace: namespace.clone(),
975        name: name.clone(),
976        version: version.clone(),
977        description: rfc822::get_header_first(headers, "description").map(truncate_field),
978        homepage_url: rfc822::get_header_first(headers, "homepage").map(truncate_field),
979        vcs_url: rfc822::get_header_first(headers, "vcs-git").map(truncate_field),
980        code_view_url: rfc822::get_header_first(headers, "vcs-browser").map(truncate_field),
981        ..Default::default()
982    };
983
984    // Build PURL with architecture qualifier
985    if let (Some(n), Some(v)) = (&name, &version) {
986        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
987    }
988
989    // Set source_packages to point to the source itself (without version)
990    if let Some(n) = &name
991        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
992    {
993        package.source_packages.push(source_purl);
994    }
995
996    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
997        let (name_opt, email_opt) = split_name_email(&maintainer);
998        package.parties.push(Party {
999            r#type: None,
1000            role: Some("maintainer".to_string()),
1001            name: name_opt,
1002            email: email_opt,
1003            url: None,
1004            organization: None,
1005            organization_url: None,
1006            timezone: None,
1007        });
1008    }
1009
1010    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
1011        for uploader in uploaders_str.split(',') {
1012            let uploader = uploader.trim();
1013            if uploader.is_empty() {
1014                continue;
1015            }
1016            let (name_opt, email_opt) = split_name_email(uploader);
1017            package.parties.push(Party {
1018                r#type: None,
1019                role: Some("uploader".to_string()),
1020                name: name_opt,
1021                email: email_opt,
1022                url: None,
1023                organization: None,
1024                organization_url: None,
1025                timezone: None,
1026            });
1027        }
1028    }
1029
1030    // Parse Build-Depends
1031    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1032        package.dependencies.extend(parse_dependency_field(
1033            &build_deps,
1034            "build",
1035            false,
1036            false,
1037            namespace.as_deref(),
1038        ));
1039    }
1040
1041    // Store Standards-Version in extra_data
1042    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1043        let map = package.extra_data.get_or_insert_with(HashMap::new);
1044        map.insert("standards_version".to_string(), standards.into());
1045    }
1046
1047    package
1048}
1049
1050/// Parser for Debian original source tarballs (*.orig.tar.*)
1051pub struct DebianOrigTarParser;
1052
1053impl PackageParser for DebianOrigTarParser {
1054    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1055
1056    fn is_match(path: &Path) -> bool {
1057        path.file_name()
1058            .and_then(|n| n.to_str())
1059            .map(|name| name.contains(".orig.tar."))
1060            .unwrap_or(false)
1061    }
1062
1063    fn extract_packages(path: &Path) -> Vec<PackageData> {
1064        let filename = match path.file_name().and_then(|n| n.to_str()) {
1065            Some(f) => f,
1066            None => {
1067                return vec![default_package_data(
1068                    DatasourceId::DebianOriginalSourceTarball,
1069                )];
1070            }
1071        };
1072
1073        vec![parse_source_tarball_filename(
1074            filename,
1075            DatasourceId::DebianOriginalSourceTarball,
1076        )]
1077    }
1078}
1079
1080crate::register_parser!(
1081    "Debian original source tarball",
1082    &["**/*.orig.tar.*"],
1083    "deb",
1084    "",
1085    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1086);
1087
1088/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1089pub struct DebianDebianTarParser;
1090
1091impl PackageParser for DebianDebianTarParser {
1092    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1093
1094    fn is_match(path: &Path) -> bool {
1095        path.file_name()
1096            .and_then(|n| n.to_str())
1097            .map(|name| name.contains(".debian.tar."))
1098            .unwrap_or(false)
1099    }
1100
1101    fn extract_packages(path: &Path) -> Vec<PackageData> {
1102        let filename = match path.file_name().and_then(|n| n.to_str()) {
1103            Some(f) => f,
1104            None => {
1105                return vec![default_package_data(
1106                    DatasourceId::DebianSourceMetadataTarball,
1107                )];
1108            }
1109        };
1110
1111        vec![parse_source_tarball_filename(
1112            filename,
1113            DatasourceId::DebianSourceMetadataTarball,
1114        )]
1115    }
1116}
1117
1118crate::register_parser!(
1119    "Debian source metadata tarball",
1120    &["**/*.debian.tar.*"],
1121    "deb",
1122    "",
1123    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1124);
1125
1126fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1127    let without_tar_ext = filename
1128        .trim_end_matches(".gz")
1129        .trim_end_matches(".xz")
1130        .trim_end_matches(".bz2")
1131        .trim_end_matches(".tar");
1132
1133    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1134    if parts.len() < 2 {
1135        return default_package_data(datasource_id);
1136    }
1137
1138    let name = truncate_field(parts[0].to_string());
1139    let version_with_suffix = parts[1];
1140
1141    let version = version_with_suffix
1142        .trim_end_matches(".orig")
1143        .trim_end_matches(".debian")
1144        .to_string();
1145    let version = truncate_field(version);
1146
1147    let namespace = Some("debian".to_string());
1148
1149    PackageData {
1150        datasource_id: Some(datasource_id),
1151        package_type: Some(PACKAGE_TYPE),
1152        namespace: namespace.clone(),
1153        name: Some(name.clone()),
1154        version: Some(version.clone()),
1155        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1156        ..Default::default()
1157    }
1158}
1159
1160/// Parser for Debian installed file lists (*.list)
1161pub struct DebianInstalledListParser;
1162
1163impl PackageParser for DebianInstalledListParser {
1164    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1165
1166    fn is_match(path: &Path) -> bool {
1167        path.extension().and_then(|e| e.to_str()) == Some("list")
1168            && path
1169                .to_str()
1170                .map(|p| p.contains("/var/lib/dpkg/info/"))
1171                .unwrap_or(false)
1172    }
1173
1174    fn extract_packages(path: &Path) -> Vec<PackageData> {
1175        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1176            Some(f) => f,
1177            None => {
1178                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1179            }
1180        };
1181
1182        let content = match read_file_to_string(path, None) {
1183            Ok(c) => c,
1184            Err(e) => {
1185                warn!("Failed to read .list file {:?}: {}", path, e);
1186                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1187            }
1188        };
1189
1190        vec![parse_debian_file_list(
1191            &content,
1192            filename,
1193            DatasourceId::DebianInstalledFilesList,
1194        )]
1195    }
1196}
1197
1198crate::register_parser!(
1199    "Debian installed files list",
1200    &["**/var/lib/dpkg/info/*.list"],
1201    "deb",
1202    "",
1203    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1204);
1205
1206/// Parser for Debian installed MD5 checksum files (*.md5sums)
1207pub struct DebianInstalledMd5sumsParser;
1208
1209impl PackageParser for DebianInstalledMd5sumsParser {
1210    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1211
1212    fn is_match(path: &Path) -> bool {
1213        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1214            && path
1215                .to_str()
1216                .map(|p| p.contains("/var/lib/dpkg/info/"))
1217                .unwrap_or(false)
1218    }
1219
1220    fn extract_packages(path: &Path) -> Vec<PackageData> {
1221        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1222            Some(f) => f,
1223            None => {
1224                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1225            }
1226        };
1227
1228        let content = match read_file_to_string(path, None) {
1229            Ok(c) => c,
1230            Err(e) => {
1231                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1232                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1233            }
1234        };
1235
1236        vec![parse_debian_file_list(
1237            &content,
1238            filename,
1239            DatasourceId::DebianInstalledMd5Sums,
1240        )]
1241    }
1242}
1243
1244crate::register_parser!(
1245    "Debian installed package md5sums",
1246    &["**/var/lib/dpkg/info/*.md5sums"],
1247    "deb",
1248    "",
1249    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1250);
1251
1252const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1253
1254fn parse_debian_file_list(
1255    content: &str,
1256    filename: &str,
1257    datasource_id: DatasourceId,
1258) -> PackageData {
1259    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1260        (
1261            Some(truncate_field(pkg.to_string())),
1262            Some(arch.to_string()),
1263        )
1264    } else if filename == "md5sums" {
1265        (None, None)
1266    } else {
1267        (Some(truncate_field(filename.to_string())), None)
1268    };
1269
1270    let mut file_references = Vec::new();
1271    let mut count = 0usize;
1272
1273    for line in content.lines() {
1274        count += 1;
1275        if count > MAX_ITERATION_COUNT {
1276            warn!("parse_debian_file_list: exceeded MAX_ITERATION_COUNT lines, stopping");
1277            break;
1278        }
1279        let line = line.trim();
1280        if line.is_empty() || line.starts_with('#') {
1281            continue;
1282        }
1283
1284        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1285            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1286        } else {
1287            (None, line)
1288        };
1289
1290        if IGNORED_ROOT_DIRS.contains(&path) {
1291            continue;
1292        }
1293
1294        file_references.push(FileReference {
1295            path: path.to_string(),
1296            size: None,
1297            sha1: None,
1298            md5: md5sum,
1299            sha256: None,
1300            sha512: None,
1301            extra_data: None,
1302        });
1303    }
1304
1305    if file_references.is_empty() {
1306        return default_package_data(datasource_id);
1307    }
1308
1309    let namespace = Some("debian".to_string());
1310    let mut package = PackageData {
1311        datasource_id: Some(datasource_id),
1312        package_type: Some(PACKAGE_TYPE),
1313        namespace: namespace.clone(),
1314        name: name.clone(),
1315        file_references,
1316        ..Default::default()
1317    };
1318
1319    if let Some(n) = &name {
1320        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1321    }
1322
1323    package
1324}
1325
1326/// Parser for Debian machine-readable copyright files (DEP-5 format)
1327pub struct DebianCopyrightParser;
1328
1329impl PackageParser for DebianCopyrightParser {
1330    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1331
1332    fn is_match(path: &Path) -> bool {
1333        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1334            if filename != "copyright" {
1335                return filename.ends_with("_copyright");
1336            }
1337            let path_str = path.to_string_lossy();
1338            path_str.contains("/debian/")
1339                || path_str.contains("/ports/")
1340                || path_str.starts_with("ports/")
1341                || path_str.contains("/packages/deb/")
1342                || path_str.contains("/usr/share/doc/")
1343                || path_str.ends_with("debian/copyright")
1344        } else {
1345            false
1346        }
1347    }
1348
1349    fn extract_packages(path: &Path) -> Vec<PackageData> {
1350        let datasource_id = detect_debian_copyright_datasource(path);
1351        let content = match read_file_to_string(path, None) {
1352            Ok(c) => c,
1353            Err(e) => {
1354                warn!("Failed to read copyright file {:?}: {}", path, e);
1355                return vec![default_package_data(datasource_id)];
1356            }
1357        };
1358
1359        let package_name = extract_package_name_from_path(path)
1360            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
1361        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1362        package_data.datasource_id = Some(datasource_id);
1363        vec![package_data]
1364    }
1365}
1366
1367crate::register_parser!(
1368    "Debian machine-readable copyright file",
1369    &[
1370        "**/debian/copyright",
1371        "**/ports/*/copyright",
1372        "**/packages/deb/copyright",
1373        "**/usr/share/doc/*/copyright",
1374        "**/*_copyright"
1375    ],
1376    "deb",
1377    "",
1378    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1379);
1380
1381fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1382    let path_str = path.to_string_lossy();
1383    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1384        DatasourceId::DebianCopyrightInSource
1385    } else if path_str.contains("/usr/share/doc/") {
1386        DatasourceId::DebianCopyrightInPackage
1387    } else {
1388        DatasourceId::DebianCopyrightStandalone
1389    }
1390}
1391
1392fn extract_package_name_from_path(path: &Path) -> Option<String> {
1393    let components: Vec<_> = path.components().collect();
1394
1395    for (i, component) in components.iter().enumerate() {
1396        if let std::path::Component::Normal(os_str) = component
1397            && os_str.to_str() == Some("doc")
1398            && i + 1 < components.len()
1399            && let std::path::Component::Normal(next) = components[i + 1]
1400        {
1401            return next.to_str().map(|s| s.to_string());
1402        }
1403    }
1404    None
1405}
1406
1407fn extract_standalone_package_name_from_path(
1408    path: &Path,
1409    datasource_id: DatasourceId,
1410) -> Option<String> {
1411    if datasource_id != DatasourceId::DebianCopyrightStandalone {
1412        return None;
1413    }
1414
1415    path.file_name()
1416        .and_then(|name| name.to_str())
1417        .filter(|name| *name == "copyright")?;
1418
1419    path.parent()
1420        .and_then(|parent| parent.file_name())
1421        .and_then(|name| name.to_str())
1422        .map(str::to_string)
1423}
1424
1425fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1426    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1427
1428    let is_dep5 = paragraphs
1429        .first()
1430        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1431        .is_some();
1432
1433    let namespace = Some("debian".to_string());
1434    let mut parties = Vec::new();
1435    let mut license_statements = Vec::new();
1436    let mut primary_license_detection = None;
1437    let mut header_license_detection = None;
1438    let mut other_license_detections = Vec::new();
1439
1440    if is_dep5 {
1441        let mut para_count = 0usize;
1442        for para in &paragraphs {
1443            para_count += 1;
1444            if para_count > MAX_ITERATION_COUNT {
1445                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
1446                break;
1447            }
1448            if let Some(copyright_text) =
1449                rfc822::get_header_first(&para.metadata.headers, "copyright")
1450            {
1451                for holder in parse_copyright_holders(&copyright_text) {
1452                    if !holder.is_empty() {
1453                        parties.push(Party {
1454                            r#type: None,
1455                            role: Some("copyright-holder".to_string()),
1456                            name: Some(holder),
1457                            email: None,
1458                            url: None,
1459                            organization: None,
1460                            organization_url: None,
1461                            timezone: None,
1462                        });
1463                    }
1464                }
1465            }
1466
1467            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1468                let license_name = license.lines().next().unwrap_or(&license).trim();
1469                if !license_name.is_empty()
1470                    && !license_statements.contains(&license_name.to_string())
1471                {
1472                    license_statements.push(license_name.to_string());
1473                }
1474
1475                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1476                    let detection =
1477                        build_primary_license_detection(license_name, matched_text, line_no);
1478                    let is_header_paragraph =
1479                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1480                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1481                        == Some("*")
1482                    {
1483                        primary_license_detection = Some(detection);
1484                    } else if is_header_paragraph {
1485                        header_license_detection.get_or_insert(detection);
1486                    } else {
1487                        other_license_detections.push(detection);
1488                    }
1489                }
1490            }
1491        }
1492
1493        if primary_license_detection.is_none() && header_license_detection.is_some() {
1494            primary_license_detection = header_license_detection;
1495        }
1496    } else {
1497        let copyright_block = extract_unstructured_field(content, "Copyright:");
1498        if let Some(text) = copyright_block {
1499            for holder in parse_copyright_holders(&text) {
1500                if !holder.is_empty() {
1501                    parties.push(Party {
1502                        r#type: None,
1503                        role: Some("copyright-holder".to_string()),
1504                        name: Some(holder),
1505                        email: None,
1506                        url: None,
1507                        organization: None,
1508                        organization_url: None,
1509                        timezone: None,
1510                    });
1511                }
1512            }
1513        }
1514
1515        let license_block = extract_unstructured_field(content, "License:");
1516        if let Some(text) = license_block {
1517            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1518        }
1519    }
1520
1521    let extracted_license_statement = if license_statements.is_empty() {
1522        None
1523    } else {
1524        Some(truncate_field(license_statements.join(" AND ")))
1525    };
1526
1527    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1528    let declared_license_expression = license_detections
1529        .first()
1530        .map(|detection| detection.license_expression.clone());
1531    let declared_license_expression_spdx = license_detections
1532        .first()
1533        .map(|detection| detection.license_expression_spdx.clone());
1534    let other_license_expression = combine_license_expressions(
1535        other_license_detections
1536            .iter()
1537            .map(|detection| detection.license_expression.clone()),
1538    );
1539    let other_license_expression_spdx = combine_license_expressions(
1540        other_license_detections
1541            .iter()
1542            .map(|detection| detection.license_expression_spdx.clone()),
1543    );
1544
1545    PackageData {
1546        datasource_id: Some(DatasourceId::DebianCopyright),
1547        package_type: Some(PACKAGE_TYPE),
1548        namespace: namespace.clone(),
1549        name: package_name.map(|s| truncate_field(s.to_string())),
1550        parties,
1551        declared_license_expression,
1552        declared_license_expression_spdx,
1553        license_detections,
1554        other_license_expression,
1555        other_license_expression_spdx,
1556        other_license_detections,
1557        extracted_license_statement,
1558        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1559        ..Default::default()
1560    }
1561}
1562
1563#[derive(Debug)]
1564struct CopyrightParagraph {
1565    metadata: Rfc822Metadata,
1566    license_header_line: Option<(String, usize)>,
1567}
1568
1569fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1570    let mut paragraphs = Vec::new();
1571    let mut current_lines = Vec::new();
1572    let mut current_start_line = 1usize;
1573    let mut count = 0usize;
1574
1575    for (idx, line) in content.lines().enumerate() {
1576        count += 1;
1577        if count > MAX_ITERATION_COUNT {
1578            warn!(
1579                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
1580            );
1581            break;
1582        }
1583        let line_no = idx + 1;
1584        if line.is_empty() {
1585            if !current_lines.is_empty() {
1586                paragraphs.push(finalize_copyright_paragraph(
1587                    std::mem::take(&mut current_lines),
1588                    current_start_line,
1589                ));
1590            }
1591            current_start_line = line_no + 1;
1592        } else {
1593            if current_lines.is_empty() {
1594                current_start_line = line_no;
1595            }
1596            current_lines.push(line.to_string());
1597        }
1598    }
1599
1600    if !current_lines.is_empty() {
1601        paragraphs.push(finalize_copyright_paragraph(
1602            current_lines,
1603            current_start_line,
1604        ));
1605    }
1606
1607    paragraphs
1608}
1609
1610fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1611    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1612    let mut current_name: Option<String> = None;
1613    let mut current_value = String::new();
1614    let mut license_header_line = None;
1615
1616    for (idx, line) in raw_lines.iter().enumerate() {
1617        if line.starts_with(' ') || line.starts_with('\t') {
1618            if current_name.is_some() {
1619                current_value.push('\n');
1620                current_value.push_str(line);
1621            }
1622            continue;
1623        }
1624
1625        if let Some(name) = current_name.take() {
1626            add_copyright_header_value(&mut headers, &name, &current_value);
1627            current_value.clear();
1628        }
1629
1630        if let Some((name, value)) = line.split_once(':') {
1631            let normalized_name = name.trim().to_ascii_lowercase();
1632            if normalized_name == "license" && license_header_line.is_none() {
1633                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1634            }
1635            current_name = Some(normalized_name);
1636            current_value = value.trim_start().to_string();
1637        }
1638    }
1639
1640    if let Some(name) = current_name.take() {
1641        add_copyright_header_value(&mut headers, &name, &current_value);
1642    }
1643
1644    CopyrightParagraph {
1645        metadata: Rfc822Metadata {
1646            headers,
1647            body: String::new(),
1648        },
1649        license_header_line,
1650    }
1651}
1652
1653fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1654    let entry = headers.entry(name.to_string()).or_default();
1655    let trimmed = value.trim_end();
1656    if !trimmed.is_empty() {
1657        entry.push(trimmed.to_string());
1658    }
1659}
1660
1661fn build_primary_license_detection(
1662    license_name: &str,
1663    matched_text: String,
1664    line_no: usize,
1665) -> LicenseDetection {
1666    let normalized = normalize_debian_license_name(license_name);
1667    let line = match LineNumber::new(line_no) {
1668        Some(l) => l,
1669        None => {
1670            warn!(
1671                "build_primary_license_detection: line number {} out of range, clamping to 1",
1672                line_no
1673            );
1674            LineNumber::new(1).expect("1 is a valid line number")
1675        }
1676    };
1677
1678    build_declared_license_detection(
1679        &normalized,
1680        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1681    )
1682}
1683
1684fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1685    match license_name.trim() {
1686        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1687        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1688        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1689        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1690        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1691        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1692        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1693        "public-domain" => {
1694            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1695        }
1696        other => normalize_declared_license_key(other)
1697            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1698    }
1699}
1700
1701fn parse_copyright_holders(text: &str) -> Vec<String> {
1702    let mut holders = Vec::new();
1703    let mut count = 0usize;
1704
1705    for line in text.lines() {
1706        count += 1;
1707        if count > MAX_ITERATION_COUNT {
1708            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
1709            break;
1710        }
1711        let line = line.trim();
1712        if line.is_empty() {
1713            continue;
1714        }
1715
1716        let cleaned = line
1717            .trim_start_matches("Copyright")
1718            .trim_start_matches("copyright")
1719            .trim_start_matches("(C)")
1720            .trim_start_matches("(c)")
1721            .trim_start_matches("©")
1722            .trim();
1723
1724        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1725            let without_years = &cleaned[year_end..];
1726            let holder = without_years
1727                .trim_start_matches(',')
1728                .trim_start_matches('-')
1729                .trim();
1730
1731            if !holder.is_empty() && holder.len() > 2 {
1732                holders.push(holder.to_string());
1733            }
1734        }
1735    }
1736
1737    holders
1738}
1739
1740fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1741    let mut in_field = false;
1742    let mut field_content = String::new();
1743    let mut count = 0usize;
1744
1745    for line in content.lines() {
1746        count += 1;
1747        if count > MAX_ITERATION_COUNT {
1748            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
1749            break;
1750        }
1751        if line.starts_with(field_name) {
1752            in_field = true;
1753            field_content.push_str(line.trim_start_matches(field_name).trim());
1754            field_content.push('\n');
1755        } else if in_field {
1756            if line.starts_with(char::is_whitespace) {
1757                field_content.push_str(line.trim());
1758                field_content.push('\n');
1759            } else if !line.trim().is_empty() {
1760                break;
1761            }
1762        }
1763    }
1764
1765    let trimmed = field_content.trim();
1766    if trimmed.is_empty() {
1767        None
1768    } else {
1769        Some(truncate_field(trimmed.to_string()))
1770    }
1771}
1772
1773/// Parser for Debian binary package archives (.deb files)
1774pub struct DebianDebParser;
1775
1776impl PackageParser for DebianDebParser {
1777    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1778
1779    fn is_match(path: &Path) -> bool {
1780        path.extension().and_then(|e| e.to_str()) == Some("deb")
1781    }
1782
1783    fn extract_packages(path: &Path) -> Vec<PackageData> {
1784        // Try to extract metadata from archive contents first
1785        if let Ok(data) = extract_deb_archive(path) {
1786            return vec![data];
1787        }
1788
1789        // Fallback to filename parsing
1790        let filename = match path.file_name().and_then(|n| n.to_str()) {
1791            Some(f) => f,
1792            None => {
1793                return vec![default_package_data(DatasourceId::DebianDeb)];
1794            }
1795        };
1796
1797        vec![parse_deb_filename(filename)]
1798    }
1799}
1800
1801crate::register_parser!(
1802    "Debian binary package archive (.deb)",
1803    &["**/*.deb"],
1804    "deb",
1805    "",
1806    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1807);
1808
1809fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1810    use flate2::read::GzDecoder;
1811    use liblzma::read::XzDecoder;
1812    use std::io::{Cursor, Read};
1813
1814    let file_metadata =
1815        std::fs::metadata(path).map_err(|e| format!("Failed to stat .deb file: {}", e))?;
1816    if file_metadata.len() > MAX_ARCHIVE_SIZE {
1817        return Err(format!(
1818            ".deb file exceeds MAX_ARCHIVE_SIZE ({} bytes)",
1819            file_metadata.len()
1820        ));
1821    }
1822    let compressed_size = file_metadata.len() as usize;
1823
1824    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1825
1826    let mut archive = ar::Archive::new(file);
1827    let mut package: Option<PackageData> = None;
1828    let mut total_extracted: usize = 0;
1829
1830    while let Some(entry_result) = archive.next_entry() {
1831        let entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1832
1833        let entry_name_raw = entry.header().identifier();
1834        let entry_name = String::from_utf8_lossy(entry_name_raw);
1835        let had_replacement = entry_name_raw.iter().any(|&b| b > 127);
1836        if had_replacement {
1837            warn!(
1838                "extract_deb_archive: non-UTF-8 bytes in entry name replaced with lossy conversion"
1839            );
1840        }
1841        let entry_name = entry_name.trim().to_string();
1842
1843        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1844            let entry_size = entry.header().size();
1845            if entry_size > MAX_FILE_SIZE {
1846                warn!(
1847                    "extract_deb_archive: control tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1848                    entry_size
1849                );
1850                continue;
1851            }
1852            let mut control_data = Vec::new();
1853            entry
1854                .take(MAX_FILE_SIZE)
1855                .read_to_end(&mut control_data)
1856                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1857
1858            total_extracted += control_data.len();
1859            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1860                warn!(
1861                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1862                );
1863                break;
1864            }
1865            if total_extracted > MAX_ARCHIVE_SIZE as usize {
1866                warn!(
1867                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1868                );
1869                break;
1870            }
1871
1872            if entry_name.ends_with(".gz") {
1873                let decoder = GzDecoder::new(Cursor::new(control_data));
1874                if let Some(parsed_package) =
1875                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1876                {
1877                    package = Some(parsed_package);
1878                }
1879            } else if entry_name.ends_with(".xz") {
1880                let decoder = XzDecoder::new(Cursor::new(control_data));
1881                if let Some(parsed_package) =
1882                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1883                {
1884                    package = Some(parsed_package);
1885                }
1886            }
1887        } else if entry_name.starts_with("data.tar") {
1888            let entry_size = entry.header().size();
1889            if entry_size > MAX_FILE_SIZE {
1890                warn!(
1891                    "extract_deb_archive: data tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1892                    entry_size
1893                );
1894                continue;
1895            }
1896            let mut data = Vec::new();
1897            entry
1898                .take(MAX_FILE_SIZE)
1899                .read_to_end(&mut data)
1900                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1901
1902            total_extracted += data.len();
1903            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1904                warn!(
1905                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1906                );
1907                break;
1908            }
1909            if total_extracted > MAX_ARCHIVE_SIZE as usize {
1910                warn!(
1911                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1912                );
1913                break;
1914            }
1915
1916            let Some(current_package) = package.as_mut() else {
1917                continue;
1918            };
1919
1920            if entry_name.ends_with(".gz") {
1921                let decoder = GzDecoder::new(Cursor::new(data));
1922                merge_deb_data_archive(
1923                    decoder,
1924                    current_package,
1925                    &mut total_extracted,
1926                    compressed_size,
1927                )?;
1928            } else if entry_name.ends_with(".xz") {
1929                let decoder = XzDecoder::new(Cursor::new(data));
1930                merge_deb_data_archive(
1931                    decoder,
1932                    current_package,
1933                    &mut total_extracted,
1934                    compressed_size,
1935                )?;
1936            }
1937        }
1938    }
1939
1940    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1941}
1942
1943fn parse_control_tar_archive<R: std::io::Read>(
1944    reader: R,
1945    total_extracted: &mut usize,
1946    compressed_size: usize,
1947) -> Result<Option<PackageData>, String> {
1948    use std::io::Read;
1949
1950    let mut tar_archive = tar::Archive::new(reader);
1951
1952    for tar_entry_result in tar_archive
1953        .entries()
1954        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1955    {
1956        let tar_entry = tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1957
1958        let tar_path = tar_entry
1959            .path()
1960            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1961
1962        if tar_path
1963            .components()
1964            .any(|c| matches!(c, std::path::Component::ParentDir))
1965        {
1966            warn!(
1967                "parse_control_tar_archive: skipping tar entry with path traversal: {:?}",
1968                tar_path
1969            );
1970            continue;
1971        }
1972
1973        if tar_entry.size() > MAX_FILE_SIZE {
1974            warn!(
1975                "parse_control_tar_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1976                tar_entry.size()
1977            );
1978            continue;
1979        }
1980
1981        if tar_path.ends_with("control") {
1982            let mut control_content = String::new();
1983            tar_entry
1984                .take(MAX_FILE_SIZE)
1985                .read_to_string(&mut control_content)
1986                .map_err(|e| format!("Failed to read control file: {}", e))?;
1987
1988            *total_extracted += control_content.len();
1989            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1990                warn!(
1991                    "parse_control_tar_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1992                );
1993                return Ok(None);
1994            }
1995            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
1996                warn!(
1997                    "parse_control_tar_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1998                );
1999                return Ok(None);
2000            }
2001
2002            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
2003            if paragraphs.is_empty() {
2004                return Err("No paragraphs in control file".to_string());
2005            }
2006
2007            if let Some(package) =
2008                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
2009            {
2010                return Ok(Some(package));
2011            }
2012
2013            return Err("Failed to parse control file".to_string());
2014        }
2015    }
2016
2017    Ok(None)
2018}
2019
2020fn merge_deb_data_archive<R: std::io::Read>(
2021    reader: R,
2022    package: &mut PackageData,
2023    total_extracted: &mut usize,
2024    compressed_size: usize,
2025) -> Result<(), String> {
2026    use std::io::Read;
2027
2028    let mut tar_archive = tar::Archive::new(reader);
2029
2030    for tar_entry_result in tar_archive
2031        .entries()
2032        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
2033    {
2034        let tar_entry =
2035            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
2036
2037        let tar_path = tar_entry
2038            .path()
2039            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
2040
2041        if tar_path
2042            .components()
2043            .any(|c| matches!(c, std::path::Component::ParentDir))
2044        {
2045            warn!(
2046                "merge_deb_data_archive: skipping tar entry with path traversal: {:?}",
2047                tar_path
2048            );
2049            continue;
2050        }
2051
2052        if tar_entry.size() > MAX_FILE_SIZE {
2053            warn!(
2054                "merge_deb_data_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
2055                tar_entry.size()
2056            );
2057            continue;
2058        }
2059
2060        let tar_path_str = tar_path.to_string_lossy();
2061
2062        if tar_path_str.ends_with(&format!(
2063            "/usr/share/doc/{}/copyright",
2064            package.name.as_deref().unwrap_or_default()
2065        )) || tar_path_str.ends_with(&format!(
2066            "usr/share/doc/{}/copyright",
2067            package.name.as_deref().unwrap_or_default()
2068        )) {
2069            let mut copyright_content = String::new();
2070            tar_entry
2071                .take(MAX_FILE_SIZE)
2072                .read_to_string(&mut copyright_content)
2073                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
2074
2075            *total_extracted += copyright_content.len();
2076            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
2077                warn!(
2078                    "merge_deb_data_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
2079                );
2080                return Ok(());
2081            }
2082            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
2083                warn!(
2084                    "merge_deb_data_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
2085                );
2086                return Ok(());
2087            }
2088
2089            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
2090            merge_debian_copyright_into_package(package, &copyright_pkg);
2091            break;
2092        }
2093    }
2094
2095    Ok(())
2096}
2097
2098fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
2099    if target.extracted_license_statement.is_none() {
2100        target.extracted_license_statement = copyright.extracted_license_statement.clone();
2101    }
2102
2103    if target.declared_license_expression.is_none() {
2104        target.declared_license_expression = copyright.declared_license_expression.clone();
2105    }
2106    if target.declared_license_expression_spdx.is_none() {
2107        target.declared_license_expression_spdx =
2108            copyright.declared_license_expression_spdx.clone();
2109    }
2110    if target.license_detections.is_empty() {
2111        target.license_detections = copyright.license_detections.clone();
2112    }
2113    if target.other_license_expression.is_none() {
2114        target.other_license_expression = copyright.other_license_expression.clone();
2115    }
2116    if target.other_license_expression_spdx.is_none() {
2117        target.other_license_expression_spdx = copyright.other_license_expression_spdx.clone();
2118    }
2119    if target.other_license_detections.is_empty() {
2120        target.other_license_detections = copyright.other_license_detections.clone();
2121    }
2122
2123    for party in &copyright.parties {
2124        if !target.parties.iter().any(|existing| {
2125            existing.r#type == party.r#type
2126                && existing.role == party.role
2127                && existing.name == party.name
2128                && existing.email == party.email
2129                && existing.url == party.url
2130                && existing.organization == party.organization
2131                && existing.organization_url == party.organization_url
2132                && existing.timezone == party.timezone
2133        }) {
2134            target.parties.push(party.clone());
2135        }
2136    }
2137}
2138
2139fn parse_deb_filename(filename: &str) -> PackageData {
2140    let without_ext = filename.trim_end_matches(".deb");
2141
2142    let parts: Vec<&str> = without_ext.split('_').collect();
2143    if parts.len() < 2 {
2144        return default_package_data(DatasourceId::DebianDeb);
2145    }
2146
2147    let name = truncate_field(parts[0].to_string());
2148    let version = truncate_field(parts[1].to_string());
2149    let architecture = if parts.len() >= 3 {
2150        Some(truncate_field(parts[2].to_string()))
2151    } else {
2152        None
2153    };
2154
2155    let namespace = Some("debian".to_string());
2156
2157    PackageData {
2158        datasource_id: Some(DatasourceId::DebianDeb),
2159        package_type: Some(PACKAGE_TYPE),
2160        namespace: namespace.clone(),
2161        name: Some(name.clone()),
2162        version: Some(version.clone()),
2163        purl: build_debian_purl(
2164            &name,
2165            Some(&version),
2166            namespace.as_deref(),
2167            architecture.as_deref(),
2168        ),
2169        ..Default::default()
2170    }
2171}
2172
2173/// Parser for control files inside extracted .deb control tarballs.
2174///
2175/// Matches paths like `*/control.tar.gz-extract/control` and
2176/// `*/control.tar.xz-extract/control` which are created by ExtractCode
2177/// when extracting .deb archives.
2178pub struct DebianControlInExtractedDebParser;
2179
2180impl PackageParser for DebianControlInExtractedDebParser {
2181    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2182
2183    fn is_match(path: &Path) -> bool {
2184        path.file_name()
2185            .and_then(|n| n.to_str())
2186            .is_some_and(|name| name == "control")
2187            && path
2188                .to_str()
2189                .map(|p| {
2190                    p.ends_with("control.tar.gz-extract/control")
2191                        || p.ends_with("control.tar.xz-extract/control")
2192                })
2193                .unwrap_or(false)
2194    }
2195
2196    fn extract_packages(path: &Path) -> Vec<PackageData> {
2197        let content = match read_file_to_string(path, None) {
2198            Ok(c) => c,
2199            Err(e) => {
2200                warn!(
2201                    "Failed to read control file in extracted deb {:?}: {}",
2202                    path, e
2203                );
2204                return vec![default_package_data(
2205                    DatasourceId::DebianControlExtractedDeb,
2206                )];
2207            }
2208        };
2209
2210        // A control file inside an extracted .deb has a single paragraph
2211        // (unlike debian/control which has source + binary paragraphs)
2212        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
2213        if paragraphs.is_empty() {
2214            return vec![default_package_data(
2215                DatasourceId::DebianControlExtractedDeb,
2216            )];
2217        }
2218
2219        if let Some(pkg) = build_package_from_paragraph(
2220            &paragraphs[0],
2221            None,
2222            DatasourceId::DebianControlExtractedDeb,
2223        ) {
2224            vec![pkg]
2225        } else {
2226            vec![default_package_data(
2227                DatasourceId::DebianControlExtractedDeb,
2228            )]
2229        }
2230    }
2231}
2232
2233/// Parser for MD5 checksum files inside extracted .deb control tarballs
2234pub struct DebianMd5sumInPackageParser;
2235
2236impl PackageParser for DebianMd5sumInPackageParser {
2237    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2238
2239    fn is_match(path: &Path) -> bool {
2240        path.file_name()
2241            .and_then(|n| n.to_str())
2242            .is_some_and(|name| name == "md5sums")
2243            && path
2244                .to_str()
2245                .map(|p| {
2246                    p.ends_with("control.tar.gz-extract/md5sums")
2247                        || p.ends_with("control.tar.xz-extract/md5sums")
2248                })
2249                .unwrap_or(false)
2250    }
2251
2252    fn extract_packages(path: &Path) -> Vec<PackageData> {
2253        let content = match read_file_to_string(path, None) {
2254            Ok(c) => c,
2255            Err(e) => {
2256                warn!("Failed to read md5sums file {:?}: {}", path, e);
2257                return vec![default_package_data(
2258                    DatasourceId::DebianMd5SumsInExtractedDeb,
2259                )];
2260            }
2261        };
2262
2263        let package_name = extract_package_name_from_deb_path(path);
2264
2265        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2266    }
2267}
2268
2269pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2270    let parent = path.parent()?;
2271    let grandparent = parent.parent()?;
2272    let dirname = grandparent.file_name()?.to_str()?;
2273    let without_extract = dirname.strip_suffix("-extract")?;
2274    let without_deb = without_extract.strip_suffix(".deb")?;
2275    let name = without_deb.split('_').next()?;
2276
2277    Some(name.to_string())
2278}
2279
2280fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2281    let mut file_references = Vec::new();
2282    let mut count = 0usize;
2283
2284    for line in content.lines() {
2285        count += 1;
2286        if count > MAX_ITERATION_COUNT {
2287            warn!("parse_md5sums_in_package: exceeded MAX_ITERATION_COUNT lines, stopping");
2288            break;
2289        }
2290        let line = line.trim();
2291        if line.is_empty() || line.starts_with('#') {
2292            continue;
2293        }
2294
2295        let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
2296            (
2297                Md5Digest::from_hex(line[..idx].trim()).ok(),
2298                line[idx + 2..].trim(),
2299            )
2300        } else if let Some((hash, path)) = line.split_once(' ') {
2301            (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2302        } else {
2303            (None, line)
2304        };
2305
2306        if IGNORED_ROOT_DIRS.contains(&filepath) {
2307            continue;
2308        }
2309
2310        file_references.push(FileReference {
2311            path: filepath.to_string(),
2312            size: None,
2313            sha1: None,
2314            md5: md5sum,
2315            sha256: None,
2316            sha512: None,
2317            extra_data: None,
2318        });
2319    }
2320
2321    if file_references.is_empty() {
2322        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2323    }
2324
2325    let namespace = Some("debian".to_string());
2326    let mut package = PackageData {
2327        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2328        package_type: Some(PACKAGE_TYPE),
2329        namespace: namespace.clone(),
2330        name: package_name.map(|s| truncate_field(s.to_string())),
2331        file_references,
2332        ..Default::default()
2333    };
2334
2335    if let Some(n) = &package.name {
2336        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2337    }
2338
2339    package
2340}
2341
2342crate::register_parser!(
2343    "Debian control file in extracted .deb control tarball",
2344    &[
2345        "**/control.tar.gz-extract/control",
2346        "**/control.tar.xz-extract/control"
2347    ],
2348    "deb",
2349    "",
2350    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2351);
2352
2353crate::register_parser!(
2354    "Debian MD5 checksums in extracted .deb control tarball",
2355    &[
2356        "**/control.tar.gz-extract/md5sums",
2357        "**/control.tar.xz-extract/md5sums"
2358    ],
2359    "deb",
2360    "",
2361    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2362);
2363
2364#[cfg(test)]
2365mod tests {
2366    use super::*;
2367    use crate::models::DatasourceId;
2368    use crate::models::PackageType;
2369    use ar::{Builder as ArBuilder, Header as ArHeader};
2370    use flate2::Compression;
2371    use flate2::write::GzEncoder;
2372    use liblzma::write::XzEncoder;
2373    use std::io::Cursor;
2374    use std::path::PathBuf;
2375    use tar::{Builder as TarBuilder, Header as TarHeader};
2376    use tempfile::NamedTempFile;
2377
2378    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2379        let mut control_tar = Vec::new();
2380        {
2381            let encoder = XzEncoder::new(&mut control_tar, 6);
2382            let mut tar_builder = TarBuilder::new(encoder);
2383
2384            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2385            let mut header = TarHeader::new_gnu();
2386            header
2387                .set_path("control")
2388                .expect("control tar path should be valid");
2389            header.set_size(control_content.len() as u64);
2390            header.set_mode(0o644);
2391            header.set_cksum();
2392            tar_builder
2393                .append(&header, Cursor::new(control_content))
2394                .expect("control file should be appended to tar.xz");
2395            tar_builder.finish().expect("control tar.xz should finish");
2396        }
2397
2398        let deb = NamedTempFile::new().expect("temp deb file should be created");
2399        {
2400            let mut builder = ArBuilder::new(
2401                deb.reopen()
2402                    .expect("temporary deb file should reopen for writing"),
2403            );
2404
2405            let debian_binary = b"2.0\n";
2406            let mut debian_binary_header =
2407                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2408            debian_binary_header.set_mode(0o100644);
2409            builder
2410                .append(&debian_binary_header, Cursor::new(debian_binary))
2411                .expect("debian-binary entry should be appended");
2412
2413            let mut control_header =
2414                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2415            control_header.set_mode(0o100644);
2416            builder
2417                .append(&control_header, Cursor::new(control_tar))
2418                .expect("control.tar.xz entry should be appended");
2419        }
2420
2421        deb
2422    }
2423
2424    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2425        let mut control_tar = Vec::new();
2426        {
2427            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2428            let mut tar_builder = TarBuilder::new(encoder);
2429
2430            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2431            let mut header = TarHeader::new_gnu();
2432            header
2433                .set_path("control")
2434                .expect("control tar path should be valid");
2435            header.set_size(control_content.len() as u64);
2436            header.set_mode(0o644);
2437            header.set_cksum();
2438            tar_builder
2439                .append(&header, Cursor::new(control_content))
2440                .expect("control file should be appended to tar.gz");
2441            tar_builder.finish().expect("control tar.gz should finish");
2442        }
2443
2444        let mut data_tar = Vec::new();
2445        {
2446            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2447            let mut tar_builder = TarBuilder::new(encoder);
2448
2449            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2450            let mut header = TarHeader::new_gnu();
2451            header
2452                .set_path("./usr/share/doc/synthetic/copyright")
2453                .expect("copyright path should be valid");
2454            header.set_size(copyright.len() as u64);
2455            header.set_mode(0o644);
2456            header.set_cksum();
2457            tar_builder
2458                .append(&header, Cursor::new(copyright))
2459                .expect("copyright file should be appended to data tar");
2460            tar_builder.finish().expect("data tar.gz should finish");
2461        }
2462
2463        let deb = NamedTempFile::new().expect("temp deb file should be created");
2464        {
2465            let mut builder = ArBuilder::new(
2466                deb.reopen()
2467                    .expect("temporary deb file should reopen for writing"),
2468            );
2469
2470            let debian_binary = b"2.0\n";
2471            let mut debian_binary_header =
2472                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2473            debian_binary_header.set_mode(0o100644);
2474            builder
2475                .append(&debian_binary_header, Cursor::new(debian_binary))
2476                .expect("debian-binary entry should be appended");
2477
2478            let mut control_header =
2479                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2480            control_header.set_mode(0o100644);
2481            builder
2482                .append(&control_header, Cursor::new(control_tar))
2483                .expect("control.tar.gz entry should be appended");
2484
2485            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2486            data_header.set_mode(0o100644);
2487            builder
2488                .append(&data_header, Cursor::new(data_tar))
2489                .expect("data.tar.gz entry should be appended");
2490        }
2491
2492        deb
2493    }
2494
2495    // ====== Namespace detection ======
2496
2497    #[test]
2498    fn test_detect_namespace_from_ubuntu_version() {
2499        assert_eq!(
2500            detect_namespace(Some("1.0-1ubuntu1"), None),
2501            Some("ubuntu".to_string())
2502        );
2503    }
2504
2505    #[test]
2506    fn test_detect_namespace_from_debian_version() {
2507        assert_eq!(
2508            detect_namespace(Some("1.0-1+deb11u1"), None),
2509            Some("debian".to_string())
2510        );
2511    }
2512
2513    #[test]
2514    fn test_detect_namespace_from_ubuntu_maintainer() {
2515        assert_eq!(
2516            detect_namespace(
2517                None,
2518                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2519            ),
2520            Some("ubuntu".to_string())
2521        );
2522    }
2523
2524    #[test]
2525    fn test_detect_namespace_from_debian_maintainer() {
2526        assert_eq!(
2527            detect_namespace(None, Some("John Doe <john@debian.org>")),
2528            Some("debian".to_string())
2529        );
2530    }
2531
2532    #[test]
2533    fn test_detect_namespace_default() {
2534        assert_eq!(
2535            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2536            Some("debian".to_string())
2537        );
2538    }
2539
2540    #[test]
2541    fn test_detect_namespace_version_takes_priority() {
2542        // Version clue should be checked before maintainer
2543        assert_eq!(
2544            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2545            Some("ubuntu".to_string())
2546        );
2547    }
2548
2549    // ====== PURL generation ======
2550
2551    #[test]
2552    fn test_build_purl_basic() {
2553        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2554        assert_eq!(
2555            purl,
2556            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2557        );
2558    }
2559
2560    #[test]
2561    fn test_build_purl_no_version() {
2562        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2563        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2564    }
2565
2566    #[test]
2567    fn test_build_purl_no_arch() {
2568        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2569        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2570    }
2571
2572    #[test]
2573    fn test_build_purl_no_namespace() {
2574        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2575        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2576    }
2577
2578    // ====== Dependency parsing ======
2579
2580    #[test]
2581    fn test_parse_simple_dependency() {
2582        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2583        assert_eq!(deps.len(), 1);
2584        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2585        assert_eq!(deps[0].extracted_requirement, None);
2586        assert_eq!(deps[0].scope, Some("depends".to_string()));
2587    }
2588
2589    #[test]
2590    fn test_parse_dependency_with_version() {
2591        let deps =
2592            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2593        assert_eq!(deps.len(), 1);
2594        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2595        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2596    }
2597
2598    #[test]
2599    fn test_parse_dependency_exact_version() {
2600        let deps = parse_dependency_field(
2601            "libc6 (= 2.31-13+deb11u5)",
2602            "depends",
2603            true,
2604            false,
2605            Some("debian"),
2606        );
2607        assert_eq!(deps.len(), 1);
2608        assert_eq!(deps[0].is_pinned, Some(true));
2609    }
2610
2611    #[test]
2612    fn test_parse_dependency_strict_less() {
2613        let deps =
2614            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2615        assert_eq!(deps.len(), 1);
2616        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2617        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2618    }
2619
2620    #[test]
2621    fn test_parse_multiple_dependencies() {
2622        let deps = parse_dependency_field(
2623            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2624            "depends",
2625            true,
2626            false,
2627            Some("debian"),
2628        );
2629        assert_eq!(deps.len(), 3);
2630    }
2631
2632    #[test]
2633    fn test_parse_dependency_alternatives() {
2634        let deps = parse_dependency_field(
2635            "libssl1.1 | libssl3",
2636            "depends",
2637            true,
2638            false,
2639            Some("debian"),
2640        );
2641        assert_eq!(deps.len(), 2);
2642        // Alternatives are marked as optional
2643        assert_eq!(deps[0].is_optional, Some(true));
2644        assert_eq!(deps[1].is_optional, Some(true));
2645    }
2646
2647    #[test]
2648    fn test_parse_dependency_skips_substitutions() {
2649        let deps = parse_dependency_field(
2650            "${shlibs:Depends}, ${misc:Depends}, libc6",
2651            "depends",
2652            true,
2653            false,
2654            Some("debian"),
2655        );
2656        assert_eq!(deps.len(), 1);
2657        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2658    }
2659
2660    #[test]
2661    fn test_parse_dependency_with_arch_qualifier() {
2662        // Dependencies can have [arch] qualifiers which we ignore
2663        let deps = parse_dependency_field(
2664            "libc6 (>= 2.17) [amd64]",
2665            "depends",
2666            true,
2667            false,
2668            Some("debian"),
2669        );
2670        assert_eq!(deps.len(), 1);
2671        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2672    }
2673
2674    #[test]
2675    fn test_parse_empty_dependency() {
2676        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2677        assert!(deps.is_empty());
2678    }
2679
2680    // ====== Source field parsing ======
2681
2682    #[test]
2683    fn test_parse_source_field_name_only() {
2684        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2685        assert_eq!(sources.len(), 1);
2686        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2687    }
2688
2689    #[test]
2690    fn test_parse_source_field_with_version() {
2691        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2692        assert_eq!(sources.len(), 1);
2693        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2694    }
2695
2696    #[test]
2697    fn test_parse_source_field_empty() {
2698        let sources = parse_source_field(None, Some("debian"));
2699        assert!(sources.is_empty());
2700    }
2701
2702    // ====== Control file parsing ======
2703
2704    #[test]
2705    fn test_parse_debian_control_source_and_binary() {
2706        let content = "\
2707Source: curl
2708Section: web
2709Priority: optional
2710Maintainer: Alessandro Ghedini <ghedo@debian.org>
2711Homepage: https://curl.se/
2712Vcs-Browser: https://salsa.debian.org/debian/curl
2713Vcs-Git: https://salsa.debian.org/debian/curl.git
2714Build-Depends: debhelper (>= 12), libssl-dev
2715
2716Package: curl
2717Architecture: amd64
2718Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2719Description: command line tool for transferring data with URL syntax";
2720
2721        let packages = parse_debian_control(content);
2722        assert_eq!(packages.len(), 1);
2723
2724        let pkg = &packages[0];
2725        assert_eq!(pkg.name, Some("curl".to_string()));
2726        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2727        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2728        assert_eq!(
2729            pkg.vcs_url,
2730            Some("https://salsa.debian.org/debian/curl.git".to_string())
2731        );
2732        assert_eq!(
2733            pkg.code_view_url,
2734            Some("https://salsa.debian.org/debian/curl".to_string())
2735        );
2736
2737        // Maintainer from source paragraph
2738        assert_eq!(pkg.parties.len(), 1);
2739        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2740        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2741        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2742
2743        // Dependencies parsed
2744        assert!(!pkg.dependencies.is_empty());
2745    }
2746
2747    #[test]
2748    fn test_parse_debian_control_multiple_binary() {
2749        let content = "\
2750Source: gzip
2751Maintainer: Debian Developer <dev@debian.org>
2752
2753Package: gzip
2754Architecture: any
2755Depends: libc6 (>= 2.17)
2756Description: GNU file compression
2757
2758Package: gzip-win32
2759Architecture: all
2760Description: gzip for Windows";
2761
2762        let packages = parse_debian_control(content);
2763        assert_eq!(packages.len(), 2);
2764        assert_eq!(packages[0].name, Some("gzip".to_string()));
2765        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2766
2767        // Both inherit source maintainer
2768        assert_eq!(packages[0].parties.len(), 1);
2769        assert_eq!(packages[1].parties.len(), 1);
2770    }
2771
2772    #[test]
2773    fn test_parse_debian_control_source_only() {
2774        let content = "\
2775Source: my-package
2776Maintainer: Test User <test@debian.org>
2777Build-Depends: debhelper (>= 13)";
2778
2779        let packages = parse_debian_control(content);
2780        assert_eq!(packages.len(), 1);
2781        assert_eq!(packages[0].name, Some("my-package".to_string()));
2782        // Build-Depends parsed
2783        assert!(!packages[0].dependencies.is_empty());
2784        assert_eq!(
2785            packages[0].dependencies[0].scope,
2786            Some("build-depends".to_string())
2787        );
2788    }
2789
2790    #[test]
2791    fn test_parse_debian_control_with_uploaders() {
2792        let content = "\
2793Source: example
2794Maintainer: Main Dev <main@debian.org>
2795Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2796
2797Package: example
2798Architecture: any
2799Description: test package";
2800
2801        let packages = parse_debian_control(content);
2802        assert_eq!(packages.len(), 1);
2803        // 1 maintainer + 2 uploaders
2804        assert_eq!(packages[0].parties.len(), 3);
2805        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2806        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2807        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2808    }
2809
2810    #[test]
2811    fn test_parse_debian_control_vcs_git_with_branch() {
2812        let content = "\
2813Source: example
2814Maintainer: Dev <dev@debian.org>
2815Vcs-Git: https://salsa.debian.org/example.git -b main
2816
2817Package: example
2818Architecture: any
2819Description: test";
2820
2821        let packages = parse_debian_control(content);
2822        assert_eq!(packages.len(), 1);
2823        // Should only take the URL, not the branch
2824        assert_eq!(
2825            packages[0].vcs_url,
2826            Some("https://salsa.debian.org/example.git".to_string())
2827        );
2828    }
2829
2830    #[test]
2831    fn test_parse_debian_control_multi_arch() {
2832        let content = "\
2833Source: example
2834Maintainer: Dev <dev@debian.org>
2835
2836Package: libexample
2837Architecture: any
2838Multi-Arch: same
2839Description: shared library";
2840
2841        let packages = parse_debian_control(content);
2842        assert_eq!(packages.len(), 1);
2843        let extra = packages[0].extra_data.as_ref().unwrap();
2844        assert_eq!(
2845            extra.get("multi_arch"),
2846            Some(&serde_json::Value::String("same".to_string()))
2847        );
2848    }
2849
2850    // ====== dpkg/status parsing ======
2851
2852    #[test]
2853    fn test_parse_dpkg_status_basic() {
2854        let content = "\
2855Package: base-files
2856Status: install ok installed
2857Priority: required
2858Section: admin
2859Installed-Size: 391
2860Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2861Architecture: amd64
2862Version: 11ubuntu5.6
2863Description: Debian base system miscellaneous files
2864Homepage: https://tracker.debian.org/pkg/base-files
2865
2866Package: not-installed
2867Status: deinstall ok config-files
2868Architecture: amd64
2869Version: 1.0
2870Description: This should be skipped";
2871
2872        let packages = parse_dpkg_status(content);
2873        assert_eq!(packages.len(), 1);
2874
2875        let pkg = &packages[0];
2876        assert_eq!(pkg.name, Some("base-files".to_string()));
2877        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2878        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2879        assert_eq!(
2880            pkg.datasource_id,
2881            Some(DatasourceId::DebianInstalledStatusDb)
2882        );
2883
2884        // Installed-Size in extra_data
2885        let extra = pkg.extra_data.as_ref().unwrap();
2886        assert_eq!(
2887            extra.get("installed_size"),
2888            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2889        );
2890    }
2891
2892    #[test]
2893    fn test_parse_dpkg_status_multiple_installed() {
2894        let content = "\
2895Package: libc6
2896Status: install ok installed
2897Architecture: amd64
2898Version: 2.31-13+deb11u5
2899Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2900Description: GNU C Library
2901
2902Package: zlib1g
2903Status: install ok installed
2904Architecture: amd64
2905Version: 1:1.2.11.dfsg-2+deb11u2
2906Maintainer: Mark Brown <broonie@debian.org>
2907Description: compression library";
2908
2909        let packages = parse_dpkg_status(content);
2910        assert_eq!(packages.len(), 2);
2911        assert_eq!(packages[0].name, Some("libc6".to_string()));
2912        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2913    }
2914
2915    #[test]
2916    fn test_parse_dpkg_status_with_dependencies() {
2917        let content = "\
2918Package: curl
2919Status: install ok installed
2920Architecture: amd64
2921Version: 7.74.0-1.3+deb11u7
2922Maintainer: Alessandro Ghedini <ghedo@debian.org>
2923Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2924Recommends: ca-certificates
2925Description: command line tool for transferring data with URL syntax";
2926
2927        let packages = parse_dpkg_status(content);
2928        assert_eq!(packages.len(), 1);
2929
2930        let deps = &packages[0].dependencies;
2931        // 2 from Depends + 1 from Recommends
2932        assert_eq!(deps.len(), 3);
2933
2934        // Check first dependency
2935        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2936        assert_eq!(deps[0].scope, Some("depends".to_string()));
2937        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2938
2939        // Check recommends
2940        assert_eq!(
2941            deps[2].purl,
2942            Some("pkg:deb/debian/ca-certificates".to_string())
2943        );
2944        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2945        assert_eq!(deps[2].is_optional, Some(true));
2946    }
2947
2948    #[test]
2949    fn test_parse_dpkg_status_with_source() {
2950        let content = "\
2951Package: libncurses6
2952Status: install ok installed
2953Architecture: amd64
2954Source: ncurses (6.2+20201114-2+deb11u1)
2955Version: 6.2+20201114-2+deb11u1
2956Maintainer: Craig Small <csmall@debian.org>
2957Description: shared libraries for terminal handling";
2958
2959        let packages = parse_dpkg_status(content);
2960        assert_eq!(packages.len(), 1);
2961        assert!(!packages[0].source_packages.is_empty());
2962        // Source PURL should include version from parentheses
2963        assert!(packages[0].source_packages[0].contains("ncurses"));
2964    }
2965
2966    #[test]
2967    fn test_parse_dpkg_status_filters_not_installed() {
2968        let content = "\
2969Package: installed-pkg
2970Status: install ok installed
2971Version: 1.0
2972Architecture: amd64
2973Description: installed
2974
2975Package: half-installed
2976Status: install ok half-installed
2977Version: 2.0
2978Architecture: amd64
2979Description: half installed
2980
2981Package: deinstall-pkg
2982Status: deinstall ok config-files
2983Version: 3.0
2984Architecture: amd64
2985Description: deinstalled
2986
2987Package: purge-pkg
2988Status: purge ok not-installed
2989Version: 4.0
2990Architecture: amd64
2991Description: purged";
2992
2993        let packages = parse_dpkg_status(content);
2994        assert_eq!(packages.len(), 1);
2995        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2996    }
2997
2998    #[test]
2999    fn test_parse_dpkg_status_empty() {
3000        let packages = parse_dpkg_status("");
3001        assert!(packages.is_empty());
3002    }
3003
3004    // ====== is_match tests ======
3005
3006    #[test]
3007    fn test_debian_control_is_match() {
3008        assert!(DebianControlParser::is_match(Path::new(
3009            "/path/to/debian/control"
3010        )));
3011        assert!(DebianControlParser::is_match(Path::new("debian/control")));
3012        assert!(!DebianControlParser::is_match(Path::new(
3013            "/path/to/control"
3014        )));
3015        assert!(!DebianControlParser::is_match(Path::new(
3016            "/path/to/debian/changelog"
3017        )));
3018    }
3019
3020    #[test]
3021    fn test_debian_installed_is_match() {
3022        assert!(DebianInstalledParser::is_match(Path::new(
3023            "/var/lib/dpkg/status"
3024        )));
3025        assert!(DebianInstalledParser::is_match(Path::new(
3026            "some/root/var/lib/dpkg/status"
3027        )));
3028        assert!(!DebianInstalledParser::is_match(Path::new(
3029            "/var/lib/dpkg/status.d/something"
3030        )));
3031        assert!(!DebianInstalledParser::is_match(Path::new(
3032            "/var/lib/dpkg/available"
3033        )));
3034    }
3035
3036    // ====== Edge cases ======
3037
3038    #[test]
3039    fn test_parse_debian_control_empty_input() {
3040        let packages = parse_debian_control("");
3041        assert!(packages.is_empty());
3042    }
3043
3044    #[test]
3045    fn test_parse_debian_control_malformed_input() {
3046        let content = "this is not a valid control file\nwith random text";
3047        let packages = parse_debian_control(content);
3048        // Should not panic, may return empty or partial results
3049        assert!(packages.is_empty());
3050    }
3051
3052    #[test]
3053    fn test_dependency_with_epoch_version() {
3054        // Debian versions can have epochs like 1:2.3.4
3055        let deps = parse_dependency_field(
3056            "zlib1g (>= 1:1.2.11)",
3057            "depends",
3058            true,
3059            false,
3060            Some("debian"),
3061        );
3062        assert_eq!(deps.len(), 1);
3063        assert_eq!(
3064            deps[0].extracted_requirement,
3065            Some(">= 1:1.2.11".to_string())
3066        );
3067    }
3068
3069    #[test]
3070    fn test_dependency_with_plus_in_name() {
3071        let deps =
3072            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
3073        assert_eq!(deps.len(), 1);
3074        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
3075    }
3076
3077    #[test]
3078    fn test_dsc_parser_is_match() {
3079        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
3080        assert!(DebianDscParser::is_match(&PathBuf::from(
3081            "adduser_3.118+deb11u1.dsc"
3082        )));
3083        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
3084        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
3085    }
3086
3087    #[test]
3088    fn test_dsc_parser_adduser() {
3089        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
3090        let package = DebianDscParser::extract_first_package(&path);
3091
3092        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
3093        assert_eq!(package.namespace, Some("debian".to_string()));
3094        assert_eq!(package.name, Some("adduser".to_string()));
3095        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
3096        assert_eq!(
3097            package.purl,
3098            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
3099        );
3100        assert_eq!(
3101            package.vcs_url,
3102            Some("https://salsa.debian.org/debian/adduser.git".to_string())
3103        );
3104        assert_eq!(
3105            package.code_view_url,
3106            Some("https://salsa.debian.org/debian/adduser".to_string())
3107        );
3108        assert_eq!(
3109            package.datasource_id,
3110            Some(DatasourceId::DebianSourceControlDsc)
3111        );
3112
3113        assert_eq!(package.parties.len(), 2);
3114        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3115        assert_eq!(
3116            package.parties[0].name,
3117            Some("Debian Adduser Developers".to_string())
3118        );
3119        assert_eq!(
3120            package.parties[0].email,
3121            Some("adduser@packages.debian.org".to_string())
3122        );
3123        assert_eq!(package.parties[0].r#type, None);
3124
3125        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3126        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
3127        assert_eq!(
3128            package.parties[1].email,
3129            Some("mh+debian-packages@zugschlus.de".to_string())
3130        );
3131        assert_eq!(package.parties[1].r#type, None);
3132
3133        assert_eq!(package.source_packages.len(), 1);
3134        assert_eq!(
3135            package.source_packages[0],
3136            "pkg:deb/debian/adduser".to_string()
3137        );
3138
3139        assert!(!package.dependencies.is_empty());
3140        let build_dep_names: Vec<String> = package
3141            .dependencies
3142            .iter()
3143            .filter_map(|d| d.purl.as_ref())
3144            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
3145            .map(|p| p.to_string())
3146            .collect();
3147        assert!(build_dep_names.len() >= 2);
3148    }
3149
3150    #[test]
3151    fn test_dsc_parser_zsh() {
3152        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
3153        let package = DebianDscParser::extract_first_package(&path);
3154
3155        assert_eq!(package.name, Some("zsh".to_string()));
3156        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
3157        assert_eq!(package.namespace, Some("debian".to_string()));
3158        assert!(package.purl.is_some());
3159        assert!(package.purl.as_ref().unwrap().contains("zsh"));
3160        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
3161    }
3162
3163    #[test]
3164    fn test_parse_dsc_content_basic() {
3165        let content = "Format: 3.0 (native)
3166Source: testpkg
3167Binary: testpkg
3168Architecture: amd64
3169Version: 1.0.0
3170Maintainer: Test User <test@example.com>
3171Standards-Version: 4.5.0
3172Build-Depends: debhelper (>= 12)
3173Files:
3174 abc123 1024 testpkg_1.0.0.tar.xz
3175";
3176
3177        let package = parse_dsc_content(content);
3178        assert_eq!(package.name, Some("testpkg".to_string()));
3179        assert_eq!(package.version, Some("1.0.0".to_string()));
3180        assert_eq!(package.namespace, Some("debian".to_string()));
3181        assert_eq!(package.parties.len(), 1);
3182        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
3183        assert_eq!(
3184            package.parties[0].email,
3185            Some("test@example.com".to_string())
3186        );
3187        assert_eq!(package.dependencies.len(), 1);
3188        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
3189    }
3190
3191    #[test]
3192    fn test_parse_dsc_content_with_uploaders() {
3193        let content = "Source: mypkg
3194Version: 2.0
3195Architecture: all
3196Maintainer: Main Dev <main@example.com>
3197Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
3198";
3199
3200        let package = parse_dsc_content(content);
3201        assert_eq!(package.parties.len(), 3);
3202        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3203        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3204        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
3205    }
3206
3207    #[test]
3208    fn test_orig_tar_parser_is_match() {
3209        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3210            "package_1.0.orig.tar.gz"
3211        )));
3212        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3213            "abseil_0~20200923.3.orig.tar.xz"
3214        )));
3215        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
3216            "package.debian.tar.gz"
3217        )));
3218        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
3219    }
3220
3221    #[test]
3222    fn test_debian_tar_parser_is_match() {
3223        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3224            "package_1.0-1.debian.tar.xz"
3225        )));
3226        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3227            "abseil_20220623.1-1.debian.tar.gz"
3228        )));
3229        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
3230            "package.orig.tar.gz"
3231        )));
3232        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
3233    }
3234
3235    #[test]
3236    fn test_parse_orig_tar_filename() {
3237        let pkg = parse_source_tarball_filename(
3238            "abseil_0~20200923.3.orig.tar.gz",
3239            DatasourceId::DebianOriginalSourceTarball,
3240        );
3241        assert_eq!(pkg.name, Some("abseil".to_string()));
3242        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
3243        assert_eq!(pkg.namespace, Some("debian".to_string()));
3244        assert_eq!(
3245            pkg.purl,
3246            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
3247        );
3248        assert_eq!(
3249            pkg.datasource_id,
3250            Some(DatasourceId::DebianOriginalSourceTarball)
3251        );
3252    }
3253
3254    #[test]
3255    fn test_parse_debian_tar_filename() {
3256        let pkg = parse_source_tarball_filename(
3257            "abseil_20220623.1-1.debian.tar.xz",
3258            DatasourceId::DebianSourceMetadataTarball,
3259        );
3260        assert_eq!(pkg.name, Some("abseil".to_string()));
3261        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
3262        assert_eq!(pkg.namespace, Some("debian".to_string()));
3263        assert_eq!(
3264            pkg.purl,
3265            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
3266        );
3267    }
3268
3269    #[test]
3270    fn test_parse_deb_filename() {
3271        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3272        assert_eq!(pkg.name, Some("nginx".to_string()));
3273        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3274
3275        let pkg = parse_deb_filename("invalid.deb");
3276        assert!(pkg.name.is_none());
3277        assert!(pkg.version.is_none());
3278    }
3279
3280    #[test]
3281    fn test_parse_source_tarball_various_compressions() {
3282        let pkg_gz = parse_source_tarball_filename(
3283            "test_1.0.orig.tar.gz",
3284            DatasourceId::DebianOriginalSourceTarball,
3285        );
3286        let pkg_xz = parse_source_tarball_filename(
3287            "test_1.0.orig.tar.xz",
3288            DatasourceId::DebianOriginalSourceTarball,
3289        );
3290        let pkg_bz2 = parse_source_tarball_filename(
3291            "test_1.0.orig.tar.bz2",
3292            DatasourceId::DebianOriginalSourceTarball,
3293        );
3294
3295        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3296        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3297        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3298    }
3299
3300    #[test]
3301    fn test_parse_source_tarball_invalid_format() {
3302        let pkg = parse_source_tarball_filename(
3303            "invalid-no-underscore.tar.gz",
3304            DatasourceId::DebianOriginalSourceTarball,
3305        );
3306        assert!(pkg.name.is_none());
3307        assert!(pkg.version.is_none());
3308    }
3309
3310    #[test]
3311    fn test_list_parser_is_match() {
3312        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3313            "/var/lib/dpkg/info/bash.list"
3314        )));
3315        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3316            "/var/lib/dpkg/info/package:amd64.list"
3317        )));
3318        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3319            "bash.list"
3320        )));
3321        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3322            "/var/lib/dpkg/info/bash.md5sums"
3323        )));
3324    }
3325
3326    #[test]
3327    fn test_md5sums_parser_is_match() {
3328        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3329            "/var/lib/dpkg/info/bash.md5sums"
3330        )));
3331        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3332            "/var/lib/dpkg/info/package:amd64.md5sums"
3333        )));
3334        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3335            "bash.md5sums"
3336        )));
3337        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3338            "/var/lib/dpkg/info/bash.list"
3339        )));
3340    }
3341
3342    #[test]
3343    fn test_parse_debian_file_list_plain_list() {
3344        let content = "/.
3345/bin
3346/bin/bash
3347/usr/bin/bashbug
3348/usr/share/doc/bash/README
3349";
3350        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3351        assert_eq!(pkg.name, Some("bash".to_string()));
3352        assert_eq!(pkg.file_references.len(), 3);
3353        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3354        assert_eq!(pkg.file_references[0].md5, None);
3355        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3356        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3357    }
3358
3359    #[test]
3360    fn test_parse_debian_file_list_md5sums() {
3361        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
33621c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3363f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3364";
3365        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3366        assert_eq!(pkg.name, Some("bash".to_string()));
3367        assert_eq!(pkg.file_references.len(), 3);
3368        assert_eq!(pkg.file_references[0].path, "bin/bash");
3369        assert_eq!(
3370            pkg.file_references[0].md5,
3371            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3372        );
3373        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3374        assert_eq!(
3375            pkg.file_references[1].md5,
3376            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3377        );
3378    }
3379
3380    #[test]
3381    fn test_parse_debian_file_list_with_arch() {
3382        let content = "/usr/bin/foo
3383/usr/lib/x86_64-linux-gnu/libfoo.so
3384";
3385        let pkg = parse_debian_file_list(
3386            content,
3387            "libfoo:amd64",
3388            DatasourceId::DebianInstalledFilesList,
3389        );
3390        assert_eq!(pkg.name, Some("libfoo".to_string()));
3391        assert!(pkg.purl.is_some());
3392        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3393        assert_eq!(pkg.file_references.len(), 2);
3394    }
3395
3396    #[test]
3397    fn test_parse_debian_file_list_skips_comments_and_empty() {
3398        let content = "# This is a comment
3399/bin/bash
3400
3401/usr/bin/bashbug
3402  
3403";
3404        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3405        assert_eq!(pkg.file_references.len(), 2);
3406    }
3407
3408    #[test]
3409    fn test_parse_debian_file_list_md5sums_only() {
3410        let content = "abc123  usr/bin/tool
3411";
3412        let pkg =
3413            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3414        assert_eq!(pkg.name, None);
3415        assert_eq!(pkg.file_references.len(), 1);
3416    }
3417
3418    #[test]
3419    fn test_parse_debian_file_list_ignores_root_dirs() {
3420        let content = "/.
3421/bin
3422/bin/bash
3423/etc
3424/usr
3425/var
3426";
3427        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3428        assert_eq!(pkg.file_references.len(), 1);
3429        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3430    }
3431
3432    #[test]
3433    fn test_copyright_parser_is_match() {
3434        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3435            "/usr/share/doc/bash/copyright"
3436        )));
3437        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3438            "debian/copyright"
3439        )));
3440        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3441            "src/third_party/gperftools/dist/packages/deb/copyright"
3442        )));
3443        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3444            "ports/zlib/copyright"
3445        )));
3446        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3447            "copyright.txt"
3448        )));
3449        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3450            "/etc/copyright"
3451        )));
3452        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3453            "/tmp/sample_copyright"
3454        )));
3455    }
3456
3457    #[test]
3458    fn test_detect_debian_copyright_datasource() {
3459        assert_eq!(
3460            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3461            DatasourceId::DebianCopyrightInSource
3462        );
3463        assert_eq!(
3464            detect_debian_copyright_datasource(&PathBuf::from(
3465                "src/third_party/gperftools/dist/packages/deb/copyright"
3466            )),
3467            DatasourceId::DebianCopyrightStandalone
3468        );
3469        assert_eq!(
3470            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
3471            DatasourceId::DebianCopyrightStandalone
3472        );
3473        assert_eq!(
3474            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3475            DatasourceId::DebianCopyrightInPackage
3476        );
3477        assert_eq!(
3478            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3479            DatasourceId::DebianCopyrightStandalone
3480        );
3481    }
3482
3483    #[test]
3484    fn test_extract_package_name_from_path() {
3485        assert_eq!(
3486            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3487            Some("bash".to_string())
3488        );
3489        assert_eq!(
3490            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3491            Some("libseccomp2".to_string())
3492        );
3493        assert_eq!(
3494            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3495            None
3496        );
3497        assert_eq!(
3498            extract_standalone_package_name_from_path(
3499                &PathBuf::from("ports/zlib/copyright"),
3500                DatasourceId::DebianCopyrightStandalone,
3501            ),
3502            Some("zlib".to_string())
3503        );
3504    }
3505
3506    #[test]
3507    fn test_parse_copyright_dep5_format() {
3508        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3509Upstream-Name: libseccomp
3510Source: https://sourceforge.net/projects/libseccomp/
3511
3512Files: *
3513Copyright: 2012 Paul Moore <pmoore@redhat.com>
3514 2012 Ashley Lai <adlai@us.ibm.com>
3515License: LGPL-2.1
3516
3517License: LGPL-2.1
3518 This library is free software
3519";
3520        let pkg = parse_copyright_file(content, Some("libseccomp"));
3521        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3522        assert_eq!(pkg.namespace, Some("debian".to_string()));
3523        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3524        assert_eq!(
3525            pkg.extracted_license_statement,
3526            Some("LGPL-2.1".to_string())
3527        );
3528        assert!(pkg.parties.len() >= 2);
3529        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3530        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3531    }
3532
3533    #[test]
3534    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3535        let path = PathBuf::from(
3536            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3537        );
3538        let pkg = DebianCopyrightParser::extract_first_package(&path);
3539
3540        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3541        let extracted = pkg
3542            .extracted_license_statement
3543            .as_deref()
3544            .expect("license statement should exist");
3545        assert!(extracted.contains("GPL-2+"));
3546        assert!(!pkg.license_detections.is_empty());
3547
3548        let primary = &pkg.license_detections[0];
3549        assert_eq!(
3550            primary.matches[0].matched_text.as_deref(),
3551            Some("License: GPL-2+")
3552        );
3553        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3554        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3555    }
3556
3557    #[test]
3558    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3559        let path = PathBuf::from("testdata/debian/copyright/copyright");
3560        let pkg = DebianCopyrightParser::extract_first_package(&path);
3561
3562        assert_eq!(pkg.license_detections.len(), 1);
3563        assert_eq!(pkg.other_license_detections.len(), 4);
3564
3565        let primary = &pkg.license_detections[0];
3566        assert_eq!(
3567            primary.matches[0].matched_text.as_deref(),
3568            Some("License: LGPL-2.1")
3569        );
3570        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3571
3572        let ordered_lines: Vec<usize> = pkg
3573            .other_license_detections
3574            .iter()
3575            .map(|detection| detection.matches[0].start_line.get())
3576            .collect();
3577        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3578
3579        let ordered_texts: Vec<&str> = pkg
3580            .other_license_detections
3581            .iter()
3582            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3583            .collect();
3584        assert_eq!(
3585            ordered_texts,
3586            vec![
3587                "License: LGPL-2.1",
3588                "License: LGPL-2.1",
3589                "License: LGPL-2.1",
3590                "License: LGPL-2.1",
3591            ]
3592        );
3593    }
3594
3595    #[test]
3596    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3597        let path = PathBuf::from(
3598            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3599        );
3600        let pkg = DebianCopyrightParser::extract_first_package(&path);
3601
3602        let zlib = pkg
3603            .other_license_detections
3604            .iter()
3605            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3606            .expect("at least one Zlib license paragraph should be detected");
3607        assert_eq!(
3608            zlib.matches[0].matched_text.as_deref(),
3609            Some("License: Zlib")
3610        );
3611
3612        let last_zlib = pkg
3613            .other_license_detections
3614            .iter()
3615            .rev()
3616            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3617            .expect("bottom standalone Zlib license paragraph should be detected");
3618        assert_eq!(
3619            last_zlib.matches[0].start_line,
3620            LineNumber::new(732).unwrap()
3621        );
3622        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3623    }
3624
3625    #[test]
3626    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3627        let path =
3628            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3629        let pkg = DebianCopyrightParser::extract_first_package(&path);
3630
3631        assert_eq!(pkg.license_detections.len(), 1);
3632        let primary = &pkg.license_detections[0];
3633        assert_eq!(
3634            primary.matches[0].matched_text.as_deref(),
3635            Some("License: LGPL-3+ or GPL-2+")
3636        );
3637        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3638        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3639
3640        assert!(pkg.other_license_detections.iter().any(|detection| {
3641            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3642        }));
3643    }
3644
3645    #[test]
3646    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3647        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3648        let pkg = parse_copyright_file(content, Some("foo"));
3649
3650        assert_eq!(pkg.license_detections.len(), 1);
3651        let primary = &pkg.license_detections[0];
3652        assert_eq!(
3653            primary.matches[0].matched_text.as_deref(),
3654            Some("License: GPL-2+")
3655        );
3656        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3657    }
3658
3659    #[test]
3660    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3661        let raw_lines = vec![
3662            "Files: *".to_string(),
3663            "Copyright: 2024 Example Org".to_string(),
3664            "License: Apache-2.0".to_string(),
3665            " Licensed under the Apache License, Version 2.0.".to_string(),
3666        ];
3667
3668        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3669        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3670            .into_iter()
3671            .next()
3672            .expect("reference RFC822 paragraph should parse");
3673
3674        assert_eq!(paragraph.metadata.headers, expected.headers);
3675        assert_eq!(paragraph.metadata.body, expected.body);
3676        assert_eq!(
3677            paragraph.license_header_line,
3678            Some(("License: Apache-2.0".to_string(), 12))
3679        );
3680    }
3681
3682    #[test]
3683    fn test_parse_copyright_unstructured() {
3684        let content = "This package was debianized by John Doe.
3685
3686Upstream Authors:
3687    Jane Smith
3688
3689Copyright:
3690    2009 10gen
3691
3692License:
3693    SSPL
3694";
3695        let pkg = parse_copyright_file(content, Some("mongodb"));
3696        assert_eq!(pkg.name, Some("mongodb".to_string()));
3697        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3698        assert!(!pkg.parties.is_empty());
3699    }
3700
3701    #[test]
3702    fn test_parse_copyright_holders() {
3703        let text = "2012 Paul Moore <pmoore@redhat.com>
37042012 Ashley Lai <adlai@us.ibm.com>
3705Copyright (C) 2015-2018 Example Corp";
3706        let holders = parse_copyright_holders(text);
3707        assert!(holders.len() >= 3);
3708        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3709        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3710    }
3711
3712    #[test]
3713    fn test_parse_copyright_empty() {
3714        let content = "This is just some text without proper copyright info.";
3715        let pkg = parse_copyright_file(content, Some("test"));
3716        assert_eq!(pkg.name, Some("test".to_string()));
3717        assert!(pkg.parties.is_empty());
3718        assert!(pkg.extracted_license_statement.is_none());
3719    }
3720
3721    #[test]
3722    fn test_merge_debian_copyright_into_package_preserves_license_fields() {
3723        let copyright = parse_copyright_file(
3724            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n\
3725             Upstream-Name: demo\n\n\
3726             Files: *\n\
3727             Copyright: 2024 Example\n\
3728             License: MIT\n\n\
3729             Files: debian/*\n\
3730             Copyright: 2024 Debian Example\n\
3731             License: Apache-2.0\n",
3732            Some("demo"),
3733        );
3734        let mut target = default_package_data(DatasourceId::DebianDeb);
3735
3736        merge_debian_copyright_into_package(&mut target, &copyright);
3737
3738        assert_eq!(target.declared_license_expression.as_deref(), Some("mit"));
3739        assert_eq!(
3740            target.declared_license_expression_spdx.as_deref(),
3741            Some("MIT")
3742        );
3743        assert_eq!(
3744            target.other_license_expression.as_deref(),
3745            Some("apache-2.0")
3746        );
3747        assert_eq!(
3748            target.other_license_expression_spdx.as_deref(),
3749            Some("Apache-2.0")
3750        );
3751        assert_eq!(target.license_detections.len(), 1);
3752        assert_eq!(target.other_license_detections.len(), 1);
3753    }
3754
3755    #[test]
3756    fn test_deb_parser_is_match() {
3757        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3758        assert!(DebianDebParser::is_match(&PathBuf::from(
3759            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3760        )));
3761        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3762        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3763    }
3764
3765    #[test]
3766    fn test_parse_deb_filename_with_arch() {
3767        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3768        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3769        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3770        assert_eq!(pkg.namespace, Some("debian".to_string()));
3771        assert_eq!(
3772            pkg.purl,
3773            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3774        );
3775        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3776    }
3777
3778    #[test]
3779    fn test_parse_deb_filename_without_arch() {
3780        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3781        assert_eq!(pkg.name, Some("package".to_string()));
3782        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3783        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3784    }
3785
3786    #[test]
3787    fn test_extract_deb_archive() {
3788        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3789        if !test_path.exists() {
3790            return;
3791        }
3792
3793        let pkg = DebianDebParser::extract_first_package(&test_path);
3794
3795        assert_eq!(pkg.name, Some("adduser".to_string()));
3796        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3797        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3798        assert!(pkg.description.is_some());
3799        assert!(!pkg.parties.is_empty());
3800
3801        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3802        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3803    }
3804
3805    #[test]
3806    fn test_extract_deb_archive_with_control_tar_xz() {
3807        let deb = create_synthetic_deb_with_control_tar_xz();
3808
3809        let pkg = DebianDebParser::extract_first_package(deb.path());
3810
3811        assert_eq!(pkg.name, Some("synthetic".to_string()));
3812        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3813        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3814        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3815    }
3816
3817    #[test]
3818    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3819        let deb = create_synthetic_deb_with_copyright();
3820
3821        let pkg = DebianDebParser::extract_first_package(deb.path());
3822
3823        assert_eq!(pkg.name, Some("synthetic".to_string()));
3824        assert_eq!(
3825            pkg.extracted_license_statement,
3826            Some("Apache-2.0".to_string())
3827        );
3828        assert!(pkg.parties.iter().any(|party| {
3829            party.role.as_deref() == Some("copyright-holder")
3830                && party.name.as_deref() == Some("Example Org")
3831        }));
3832    }
3833
3834    #[test]
3835    fn test_parse_deb_filename_simple() {
3836        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3837        assert_eq!(pkg.name, Some("adduser".to_string()));
3838        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3839        assert_eq!(pkg.namespace, Some("debian".to_string()));
3840    }
3841
3842    #[test]
3843    fn test_parse_deb_filename_invalid() {
3844        let pkg = parse_deb_filename("invalid.deb");
3845        assert!(pkg.name.is_none());
3846        assert!(pkg.version.is_none());
3847    }
3848
3849    #[test]
3850    fn test_distroless_parser() {
3851        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3852
3853        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3854
3855        if !test_file.exists() {
3856            eprintln!("Warning: Test file not found, skipping test");
3857            return;
3858        }
3859
3860        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3861
3862        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3863        assert_eq!(
3864            pkg.datasource_id,
3865            Some(DatasourceId::DebianDistrolessInstalledDb)
3866        );
3867        assert_eq!(pkg.name, Some("base-files".to_string()));
3868        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3869        assert_eq!(pkg.namespace, Some("debian".to_string()));
3870        assert!(pkg.purl.is_some());
3871        assert!(
3872            pkg.purl
3873                .as_ref()
3874                .unwrap()
3875                .contains("pkg:deb/debian/base-files")
3876        );
3877    }
3878}