Skip to main content

provenant/parsers/
debian.rs

1//! Parser for Debian package metadata files.
2//!
3//! Extracts package metadata from Debian package management files using RFC 822
4//! format parsing for control files and installed package databases.
5//!
6//! # Supported Formats
7//! - `debian/control` (Source package control files - multi-paragraph)
8//! - `/var/lib/dpkg/status` (Installed package database - multi-paragraph)
9//! - `/var/lib/dpkg/status.d/*` (Distroless installed packages)
10//! - `*.dsc` (Debian source control files)
11//! - `*.orig.tar.*` (Original upstream tarballs)
12//! - `*.debian.tar.*` (Debian packaging tarballs)
13//! - `/var/lib/dpkg/info/*.list` (Installed file lists)
14//! - `/var/lib/dpkg/info/*.md5sums` (Installed file checksums)
15//! - `debian/copyright` (Copyright/license declarations)
16//! - `*.deb` (Debian binary package archives)
17//! - `control` (extracted from .deb archives)
18//! - `md5sums` (extracted from .deb archives)
19//!
20//! # Key Features
21//! - RFC 822 format parsing for control files
22//! - Dependency extraction with scope tracking (Depends, Build-Depends, etc.)
23//! - Debian vs Ubuntu namespace detection from version and maintainer fields
24//! - Multi-paragraph record parsing for package databases
25//! - License and copyright information extraction
26//! - Package URL (purl) generation with namespace
27//!
28//! # Implementation Notes
29//! - Uses RFC 822 parser from `crate::parsers::rfc822` module
30//! - Multi-paragraph records separated by blank lines
31//! - Graceful error handling with `warn!()` logs
32
33use std::collections::HashMap;
34use std::path::Path;
35use std::sync::LazyLock;
36
37use crate::parser_warn as warn;
38use packageurl::PackageUrl;
39use regex::Regex;
40
41use crate::models::{
42    DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData,
43    PackageType, Party,
44};
45use crate::parsers::rfc822::{self, Rfc822Metadata};
46use crate::parsers::utils::{
47    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
48};
49use crate::utils::spdx::combine_license_expressions;
50
51use super::PackageParser;
52use super::license_normalization::{
53    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_detection,
54    normalize_declared_license_key,
55};
56
57const PACKAGE_TYPE: PackageType = PackageType::Deb;
58
59const MAX_ARCHIVE_SIZE: u64 = 1024 * 1024 * 1024;
60const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
61const MAX_COMPRESSION_RATIO: usize = 100;
62
63static DEP_RE: LazyLock<Regex> = LazyLock::new(|| {
64    Regex::new(
65        r"^\s*([a-zA-Z0-9][a-zA-Z0-9.+\-]+)\s*(?:\(([<>=!]+)\s*([^)]+)\))?\s*(?:\[.*\])?\s*$",
66    )
67    .expect("compile-time constant dependency regex")
68});
69
70fn default_package_data(datasource_id: DatasourceId) -> PackageData {
71    PackageData {
72        package_type: Some(PACKAGE_TYPE),
73        datasource_id: Some(datasource_id),
74        ..Default::default()
75    }
76}
77
78// Namespace detection clues from version strings
79const VERSION_CLUES_DEBIAN: &[&str] = &["deb"];
80const VERSION_CLUES_UBUNTU: &[&str] = &["ubuntu"];
81
82// Namespace detection clues from maintainer fields
83const MAINTAINER_CLUES_DEBIAN: &[&str] = &[
84    "packages.debian.org",
85    "lists.debian.org",
86    "lists.alioth.debian.org",
87    "@debian.org",
88    "debian-init-diversity@",
89];
90const MAINTAINER_CLUES_UBUNTU: &[&str] = &["lists.ubuntu.com", "@canonical.com"];
91
92// Dependency field names and their scope/flags
93struct DepFieldSpec {
94    field: &'static str,
95    scope: &'static str,
96    is_runtime: bool,
97    is_optional: bool,
98}
99
100const DEP_FIELDS: &[DepFieldSpec] = &[
101    DepFieldSpec {
102        field: "depends",
103        scope: "depends",
104        is_runtime: true,
105        is_optional: false,
106    },
107    DepFieldSpec {
108        field: "pre-depends",
109        scope: "pre-depends",
110        is_runtime: true,
111        is_optional: false,
112    },
113    DepFieldSpec {
114        field: "recommends",
115        scope: "recommends",
116        is_runtime: true,
117        is_optional: true,
118    },
119    DepFieldSpec {
120        field: "suggests",
121        scope: "suggests",
122        is_runtime: true,
123        is_optional: true,
124    },
125    DepFieldSpec {
126        field: "breaks",
127        scope: "breaks",
128        is_runtime: false,
129        is_optional: false,
130    },
131    DepFieldSpec {
132        field: "conflicts",
133        scope: "conflicts",
134        is_runtime: false,
135        is_optional: false,
136    },
137    DepFieldSpec {
138        field: "replaces",
139        scope: "replaces",
140        is_runtime: false,
141        is_optional: false,
142    },
143    DepFieldSpec {
144        field: "provides",
145        scope: "provides",
146        is_runtime: false,
147        is_optional: false,
148    },
149    DepFieldSpec {
150        field: "build-depends",
151        scope: "build-depends",
152        is_runtime: false,
153        is_optional: false,
154    },
155    DepFieldSpec {
156        field: "build-depends-indep",
157        scope: "build-depends-indep",
158        is_runtime: false,
159        is_optional: false,
160    },
161    DepFieldSpec {
162        field: "build-conflicts",
163        scope: "build-conflicts",
164        is_runtime: false,
165        is_optional: false,
166    },
167];
168
169// ---------------------------------------------------------------------------
170// DebianControlParser: debian/control files (source + binary paragraphs)
171// ---------------------------------------------------------------------------
172
173pub struct DebianControlParser;
174
175impl PackageParser for DebianControlParser {
176    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
177
178    fn is_match(path: &Path) -> bool {
179        if let Some(name) = path.file_name()
180            && name == "control"
181            && let Some(parent) = path.parent()
182            && let Some(parent_name) = parent.file_name()
183        {
184            return parent_name == "debian";
185        }
186        false
187    }
188
189    fn extract_packages(path: &Path) -> Vec<PackageData> {
190        let content = match read_file_to_string(path, None) {
191            Ok(c) => c,
192            Err(e) => {
193                warn!("Failed to read debian/control at {:?}: {}", path, e);
194                return vec![default_package_data(DatasourceId::DebianControlInSource)];
195            }
196        };
197
198        let packages = parse_debian_control(&content);
199        if packages.is_empty() {
200            vec![default_package_data(DatasourceId::DebianControlInSource)]
201        } else {
202            packages
203        }
204    }
205}
206
207// ---------------------------------------------------------------------------
208// DebianInstalledParser: /var/lib/dpkg/status
209// ---------------------------------------------------------------------------
210
211pub struct DebianInstalledParser;
212
213impl PackageParser for DebianInstalledParser {
214    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
215
216    fn is_match(path: &Path) -> bool {
217        let path_str = path.to_string_lossy();
218        path_str.ends_with("var/lib/dpkg/status")
219    }
220
221    fn extract_packages(path: &Path) -> Vec<PackageData> {
222        let content = match read_file_to_string(path, None) {
223            Ok(c) => c,
224            Err(e) => {
225                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
226                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
227            }
228        };
229
230        let packages = parse_dpkg_status(&content);
231        if packages.is_empty() {
232            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
233        } else {
234            packages
235        }
236    }
237}
238
239pub struct DebianDistrolessInstalledParser;
240
241impl PackageParser for DebianDistrolessInstalledParser {
242    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
243
244    fn is_match(path: &Path) -> bool {
245        let path_str = path.to_string_lossy();
246        path_str.contains("var/lib/dpkg/status.d/")
247    }
248
249    fn extract_packages(path: &Path) -> Vec<PackageData> {
250        let content = match read_file_to_string(path, None) {
251            Ok(c) => c,
252            Err(e) => {
253                warn!("Failed to read distroless status file at {:?}: {}", path, e);
254                return vec![default_package_data(
255                    DatasourceId::DebianDistrolessInstalledDb,
256                )];
257            }
258        };
259
260        vec![parse_distroless_status(&content)]
261    }
262}
263
264fn parse_distroless_status(content: &str) -> PackageData {
265    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
266
267    if paragraphs.is_empty() {
268        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
269    }
270
271    build_package_from_paragraph(
272        &paragraphs[0],
273        None,
274        DatasourceId::DebianDistrolessInstalledDb,
275    )
276    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
277}
278
279// ---------------------------------------------------------------------------
280// Parsing logic
281// ---------------------------------------------------------------------------
282
283/// Parses a debian/control file into PackageData entries.
284///
285/// A debian/control file has a Source paragraph followed by one or more Binary
286/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
287/// into each binary package.
288fn parse_debian_control(content: &str) -> Vec<PackageData> {
289    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
290    if paragraphs.is_empty() {
291        return Vec::new();
292    }
293
294    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
295
296    let (source_paragraph, binary_start) = if has_source {
297        (Some(&paragraphs[0]), 1)
298    } else {
299        (None, 0)
300    };
301
302    let source_meta = source_paragraph.map(extract_source_meta);
303
304    let mut packages = Vec::new();
305    let mut count = 0usize;
306
307    for para in &paragraphs[binary_start..] {
308        count += 1;
309        if count > MAX_ITERATION_COUNT {
310            warn!("parse_debian_control: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
311            break;
312        }
313        if let Some(pkg) = build_package_from_paragraph(
314            para,
315            source_meta.as_ref(),
316            DatasourceId::DebianControlInSource,
317        ) {
318            packages.push(pkg);
319        }
320    }
321
322    if packages.is_empty()
323        && let Some(source_para) = source_paragraph
324        && let Some(pkg) = build_package_from_source_paragraph(source_para)
325    {
326        packages.push(pkg);
327    }
328
329    packages
330}
331
332/// Parses a dpkg/status file into PackageData entries.
333///
334/// Each paragraph represents an installed package. Only packages with
335/// `Status: install ok installed` are included.
336fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
337    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
338    let mut packages = Vec::new();
339    let mut count = 0usize;
340
341    for para in &paragraphs {
342        count += 1;
343        if count > MAX_ITERATION_COUNT {
344            warn!("parse_dpkg_status: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
345            break;
346        }
347        let status = rfc822::get_header_first(&para.headers, "status");
348        if status.as_deref() != Some("install ok installed") {
349            continue;
350        }
351
352        if let Some(pkg) =
353            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
354        {
355            packages.push(pkg);
356        }
357    }
358
359    packages
360}
361
362// ---------------------------------------------------------------------------
363// Source paragraph metadata (shared across binary packages)
364// ---------------------------------------------------------------------------
365
366struct SourceMeta {
367    parties: Vec<Party>,
368    homepage_url: Option<String>,
369    vcs_url: Option<String>,
370    code_view_url: Option<String>,
371    bug_tracking_url: Option<String>,
372}
373
374fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
375    let mut parties = Vec::new();
376
377    // Maintainer
378    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
379        let (name, email) = split_name_email(&maintainer);
380        parties.push(Party {
381            r#type: Some("person".to_string()),
382            role: Some("maintainer".to_string()),
383            name,
384            email,
385            url: None,
386            organization: None,
387            organization_url: None,
388            timezone: None,
389        });
390    }
391
392    // Original-Maintainer
393    if let Some(orig_maintainer) =
394        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
395    {
396        let (name, email) = split_name_email(&orig_maintainer);
397        parties.push(Party {
398            r#type: Some("person".to_string()),
399            role: Some("maintainer".to_string()),
400            name,
401            email,
402            url: None,
403            organization: None,
404            organization_url: None,
405            timezone: None,
406        });
407    }
408
409    // Uploaders (comma-separated)
410    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
411        for uploader in uploaders_str.split(',') {
412            let trimmed = uploader.trim();
413            if !trimmed.is_empty() {
414                let (name, email) = split_name_email(trimmed);
415                parties.push(Party {
416                    r#type: Some("person".to_string()),
417                    role: Some("uploader".to_string()),
418                    name,
419                    email,
420                    url: None,
421                    organization: None,
422                    organization_url: None,
423                    timezone: None,
424                });
425            }
426        }
427    }
428
429    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
430
431    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
432        .map(|url| truncate_field(url.split_whitespace().next().unwrap_or(&url).to_string()));
433
434    let code_view_url =
435        rfc822::get_header_first(&paragraph.headers, "vcs-browser").map(truncate_field);
436
437    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs").map(truncate_field);
438
439    SourceMeta {
440        parties,
441        homepage_url,
442        vcs_url,
443        code_view_url,
444        bug_tracking_url,
445    }
446}
447
448// ---------------------------------------------------------------------------
449// Package building
450// ---------------------------------------------------------------------------
451
452fn build_package_from_paragraph(
453    paragraph: &Rfc822Metadata,
454    source_meta: Option<&SourceMeta>,
455    datasource_id: DatasourceId,
456) -> Option<PackageData> {
457    let name = rfc822::get_header_first(&paragraph.headers, "package").map(truncate_field)?;
458    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
459    let architecture =
460        rfc822::get_header_first(&paragraph.headers, "architecture").map(truncate_field);
461    let description =
462        rfc822::get_header_first(&paragraph.headers, "description").map(truncate_field);
463    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
464    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
465    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
466    let section = rfc822::get_header_first(&paragraph.headers, "section");
467    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
468    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
469
470    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
471
472    // Build parties: use source_meta parties if available, otherwise parse from paragraph
473    let parties = if let Some(meta) = source_meta {
474        meta.parties.clone()
475    } else {
476        let mut p = Vec::new();
477        if let Some(m) = &maintainer_str {
478            let (n, e) = split_name_email(m);
479            p.push(Party {
480                r#type: Some("person".to_string()),
481                role: Some("maintainer".to_string()),
482                name: n,
483                email: e,
484                url: None,
485                organization: None,
486                organization_url: None,
487                timezone: None,
488            });
489        }
490        p
491    };
492
493    // Resolve homepage: paragraph's own, or from source metadata
494    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
495    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
496    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
497    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
498
499    // Build PURL
500    let purl = build_debian_purl(
501        &name,
502        version.as_deref(),
503        namespace.as_deref(),
504        architecture.as_deref(),
505    );
506
507    // Parse dependencies from all dependency fields
508    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
509
510    // Keywords from section
511    let keywords = section.into_iter().collect();
512
513    // Source packages
514    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
515
516    // Extra data
517    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
518    if let Some(ma) = &multi_arch
519        && !ma.is_empty()
520    {
521        extra_data.insert(
522            "multi_arch".to_string(),
523            serde_json::Value::String(ma.clone()),
524        );
525    }
526    if let Some(size_str) = &installed_size
527        && let Ok(size) = size_str.parse::<u64>()
528    {
529        extra_data.insert(
530            "installed_size".to_string(),
531            serde_json::Value::Number(serde_json::Number::from(size)),
532        );
533    }
534
535    // Qualifiers for architecture
536    let qualifiers = architecture.as_ref().map(|arch| {
537        let mut q = HashMap::new();
538        q.insert("arch".to_string(), arch.clone());
539        q
540    });
541
542    Some(PackageData {
543        package_type: Some(PACKAGE_TYPE),
544        namespace: namespace.clone(),
545        name: Some(name),
546        version,
547        qualifiers,
548        subpath: None,
549        primary_language: None,
550        description,
551        release_date: None,
552        parties,
553        keywords,
554        homepage_url,
555        download_url: None,
556        size: None,
557        sha1: None,
558        md5: None,
559        sha256: None,
560        sha512: None,
561        bug_tracking_url,
562        code_view_url,
563        vcs_url,
564        copyright: None,
565        holder: None,
566        declared_license_expression: None,
567        declared_license_expression_spdx: None,
568        license_detections: Vec::new(),
569        other_license_expression: None,
570        other_license_expression_spdx: None,
571        other_license_detections: Vec::new(),
572        extracted_license_statement: None,
573        notice_text: None,
574        source_packages,
575        file_references: Vec::new(),
576        is_private: false,
577        is_virtual: false,
578        extra_data: if extra_data.is_empty() {
579            None
580        } else {
581            Some(extra_data)
582        },
583        dependencies,
584        repository_homepage_url: None,
585        repository_download_url: None,
586        api_data_url: None,
587        datasource_id: Some(datasource_id),
588        purl,
589    })
590}
591
592fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
593    let name = rfc822::get_header_first(&paragraph.headers, "source").map(truncate_field)?;
594    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
595    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
596
597    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
598    let source_meta = extract_source_meta(paragraph);
599
600    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
601    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
602
603    let section = rfc822::get_header_first(&paragraph.headers, "section");
604    let keywords = section.into_iter().collect();
605
606    Some(PackageData {
607        package_type: Some(PACKAGE_TYPE),
608        namespace: namespace.clone(),
609        name: Some(name),
610        version,
611        qualifiers: None,
612        subpath: None,
613        primary_language: None,
614        description: None,
615        release_date: None,
616        parties: source_meta.parties,
617        keywords,
618        homepage_url: source_meta.homepage_url,
619        download_url: None,
620        size: None,
621        sha1: None,
622        md5: None,
623        sha256: None,
624        sha512: None,
625        bug_tracking_url: source_meta.bug_tracking_url,
626        code_view_url: source_meta.code_view_url,
627        vcs_url: source_meta.vcs_url,
628        copyright: None,
629        holder: None,
630        declared_license_expression: None,
631        declared_license_expression_spdx: None,
632        license_detections: Vec::new(),
633        other_license_expression: None,
634        other_license_expression_spdx: None,
635        other_license_detections: Vec::new(),
636        extracted_license_statement: None,
637        notice_text: None,
638        source_packages: Vec::new(),
639        file_references: Vec::new(),
640        is_private: false,
641        is_virtual: false,
642        extra_data: None,
643        dependencies,
644        repository_homepage_url: None,
645        repository_download_url: None,
646        api_data_url: None,
647        datasource_id: Some(DatasourceId::DebianControlInSource),
648        purl,
649    })
650}
651
652// ---------------------------------------------------------------------------
653// Namespace detection
654// ---------------------------------------------------------------------------
655
656fn detect_namespace(version: Option<&str>, maintainer: Option<&str>) -> Option<String> {
657    // Check version clues first
658    if let Some(ver) = version {
659        let ver_lower = ver.to_lowercase();
660        for clue in VERSION_CLUES_UBUNTU {
661            if ver_lower.contains(clue) {
662                return Some("ubuntu".to_string());
663            }
664        }
665        for clue in VERSION_CLUES_DEBIAN {
666            if ver_lower.contains(clue) {
667                return Some("debian".to_string());
668            }
669        }
670    }
671
672    // Check maintainer clues
673    if let Some(maint) = maintainer {
674        let maint_lower = maint.to_lowercase();
675        for clue in MAINTAINER_CLUES_UBUNTU {
676            if maint_lower.contains(clue) {
677                return Some("ubuntu".to_string());
678            }
679        }
680        for clue in MAINTAINER_CLUES_DEBIAN {
681            if maint_lower.contains(clue) {
682                return Some("debian".to_string());
683            }
684        }
685    }
686
687    // Default to debian
688    Some("debian".to_string())
689}
690
691// ---------------------------------------------------------------------------
692// PURL generation
693// ---------------------------------------------------------------------------
694
695fn build_debian_purl(
696    name: &str,
697    version: Option<&str>,
698    namespace: Option<&str>,
699    architecture: Option<&str>,
700) -> Option<String> {
701    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
702
703    if let Some(ns) = namespace {
704        purl.with_namespace(ns).ok()?;
705    }
706
707    if let Some(ver) = version {
708        purl.with_version(ver).ok()?;
709    }
710
711    if let Some(arch) = architecture {
712        purl.add_qualifier("arch", arch).ok()?;
713    }
714
715    Some(purl.to_string())
716}
717
718// ---------------------------------------------------------------------------
719// Dependency parsing
720// ---------------------------------------------------------------------------
721
722fn parse_all_dependencies(
723    headers: &HashMap<String, Vec<String>>,
724    namespace: Option<&str>,
725) -> Vec<Dependency> {
726    let mut dependencies = Vec::new();
727
728    for spec in DEP_FIELDS {
729        if let Some(dep_str) = rfc822::get_header_first(headers, spec.field) {
730            dependencies.extend(parse_dependency_field(
731                &dep_str,
732                spec.scope,
733                spec.is_runtime,
734                spec.is_optional,
735                namespace,
736            ));
737        }
738    }
739
740    dependencies
741}
742
743/// Parses a Debian dependency field value.
744///
745/// Debian dependencies are comma-separated, with optional version constraints
746/// in parentheses and alternative packages separated by `|`.
747///
748/// Format: `pkg1 (>= 1.0), pkg2 | pkg3 (<< 2.0), pkg4`
749///
750/// Alternatives (|) are treated as separate optional dependencies.
751fn parse_dependency_field(
752    dep_str: &str,
753    scope: &str,
754    is_runtime: bool,
755    is_optional: bool,
756    namespace: Option<&str>,
757) -> Vec<Dependency> {
758    let mut deps = Vec::new();
759
760    for group in dep_str.split(',').take(MAX_ITERATION_COUNT) {
761        let group = group.trim();
762        if group.is_empty() {
763            continue;
764        }
765
766        let alternatives: Vec<&str> = group.split('|').collect();
767        let has_alternatives = alternatives.len() > 1;
768
769        for alt in alternatives {
770            let alt = alt.trim();
771            if alt.is_empty() {
772                continue;
773            }
774
775            if let Some(caps) = DEP_RE.captures(alt) {
776                let pkg_name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
777                let operator = caps.get(2).map(|m| m.as_str().trim());
778                let version = caps.get(3).map(|m| m.as_str().trim());
779
780                if pkg_name.is_empty() {
781                    continue;
782                }
783
784                if pkg_name.starts_with('$') {
785                    continue;
786                }
787
788                let extracted_requirement = match (operator, version) {
789                    (Some(op), Some(ver)) => Some(truncate_field(format!("{} {}", op, ver))),
790                    _ => None,
791                };
792
793                let is_pinned = operator.map(|op| op == "=");
794
795                let purl = build_debian_purl(pkg_name, None, namespace, None);
796
797                deps.push(Dependency {
798                    purl,
799                    extracted_requirement,
800                    scope: Some(scope.to_string()),
801                    is_runtime: Some(is_runtime),
802                    is_optional: Some(is_optional || has_alternatives),
803                    is_pinned,
804                    is_direct: Some(true),
805                    resolved_package: None,
806                    extra_data: None,
807                });
808            }
809        }
810    }
811
812    deps
813}
814
815// ---------------------------------------------------------------------------
816// Source field parsing
817// ---------------------------------------------------------------------------
818
819/// Parses the Source field which may contain a version in parentheses.
820///
821/// Format: `source-name` or `source-name (version)`
822fn parse_source_field(source: Option<&str>, namespace: Option<&str>) -> Vec<String> {
823    let Some(source_str) = source else {
824        return Vec::new();
825    };
826
827    let trimmed = source_str.trim();
828    if trimmed.is_empty() {
829        return Vec::new();
830    }
831
832    // Extract name and optional version from "name (version)" format
833    let (name, version) = if let Some(paren_start) = trimmed.find(" (") {
834        let name = trimmed[..paren_start].trim();
835        let version = trimmed[paren_start + 2..].trim_end_matches(')').trim();
836        (
837            name,
838            if version.is_empty() {
839                None
840            } else {
841                Some(version)
842            },
843        )
844    } else {
845        (trimmed, None)
846    };
847
848    if let Some(purl) = build_debian_purl(name, version, namespace, None) {
849        vec![purl]
850    } else {
851        Vec::new()
852    }
853}
854
855// ---------------------------------------------------------------------------
856// Parser registration macros
857// ---------------------------------------------------------------------------
858
859crate::register_parser!(
860    "Debian source package control file (debian/control)",
861    &["**/debian/control"],
862    "deb",
863    "",
864    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
865);
866
867crate::register_parser!(
868    "Debian installed package database (dpkg status)",
869    &["**/var/lib/dpkg/status"],
870    "deb",
871    "",
872    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
873);
874
875crate::register_parser!(
876    "Debian distroless package database (status.d)",
877    &["**/var/lib/dpkg/status.d/*"],
878    "deb",
879    "",
880    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
881);
882
883// Note: DebianInstalledParser uses try_parse_installed for Vec<PackageData>,
884// but we register it for the single-package interface too.
885
886// ============================================================================
887// WAVE 2 PARSERS: Additional Debian Format Support
888// ============================================================================
889
890/// Parser for Debian Source Control (.dsc) files
891pub struct DebianDscParser;
892
893impl PackageParser for DebianDscParser {
894    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
895
896    fn is_match(path: &Path) -> bool {
897        path.extension().and_then(|e| e.to_str()) == Some("dsc")
898    }
899
900    fn extract_packages(path: &Path) -> Vec<PackageData> {
901        let content = match read_file_to_string(path, None) {
902            Ok(c) => c,
903            Err(e) => {
904                warn!("Failed to read .dsc file {:?}: {}", path, e);
905                return vec![default_package_data(DatasourceId::DebianSourceControlDsc)];
906            }
907        };
908
909        vec![parse_dsc_content(&content)]
910    }
911}
912
913crate::register_parser!(
914    "Debian source control file (.dsc)",
915    &["**/*.dsc"],
916    "deb",
917    "",
918    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
919);
920
921fn strip_pgp_signature(content: &str) -> String {
922    let mut result = String::new();
923    let mut in_pgp_block = false;
924    let mut in_signature = false;
925    let mut count = 0usize;
926
927    for line in content.lines() {
928        count += 1;
929        if count > MAX_ITERATION_COUNT {
930            warn!("strip_pgp_signature: exceeded MAX_ITERATION_COUNT lines, stopping");
931            break;
932        }
933        if line.starts_with("-----BEGIN PGP SIGNED MESSAGE-----") {
934            in_pgp_block = true;
935            continue;
936        }
937        if line.starts_with("-----BEGIN PGP SIGNATURE-----") {
938            in_signature = true;
939            continue;
940        }
941        if line.starts_with("-----END PGP SIGNATURE-----") {
942            in_signature = false;
943            continue;
944        }
945        if in_pgp_block && line.starts_with("Hash:") {
946            continue;
947        }
948        if in_pgp_block && line.is_empty() && result.is_empty() {
949            in_pgp_block = false;
950            continue;
951        }
952        if !in_signature {
953            result.push_str(line);
954            result.push('\n');
955        }
956    }
957
958    result
959}
960
961fn parse_dsc_content(content: &str) -> PackageData {
962    let clean_content = strip_pgp_signature(content);
963    let metadata = rfc822::parse_rfc822_content(&clean_content);
964    let headers = &metadata.headers;
965
966    let name = rfc822::get_header_first(headers, "source").map(truncate_field);
967    let version = rfc822::get_header_first(headers, "version").map(truncate_field);
968    let architecture = rfc822::get_header_first(headers, "architecture").map(truncate_field);
969    let namespace = Some("debian".to_string());
970
971    let mut package = PackageData {
972        datasource_id: Some(DatasourceId::DebianSourceControlDsc),
973        package_type: Some(PACKAGE_TYPE),
974        namespace: namespace.clone(),
975        name: name.clone(),
976        version: version.clone(),
977        description: rfc822::get_header_first(headers, "description").map(truncate_field),
978        homepage_url: rfc822::get_header_first(headers, "homepage").map(truncate_field),
979        vcs_url: rfc822::get_header_first(headers, "vcs-git").map(truncate_field),
980        code_view_url: rfc822::get_header_first(headers, "vcs-browser").map(truncate_field),
981        ..Default::default()
982    };
983
984    // Build PURL with architecture qualifier
985    if let (Some(n), Some(v)) = (&name, &version) {
986        package.purl = build_debian_purl(n, Some(v), namespace.as_deref(), architecture.as_deref());
987    }
988
989    // Set source_packages to point to the source itself (without version)
990    if let Some(n) = &name
991        && let Some(source_purl) = build_debian_purl(n, None, namespace.as_deref(), None)
992    {
993        package.source_packages.push(source_purl);
994    }
995
996    if let Some(maintainer) = rfc822::get_header_first(headers, "maintainer") {
997        let (name_opt, email_opt) = split_name_email(&maintainer);
998        package.parties.push(Party {
999            r#type: None,
1000            role: Some("maintainer".to_string()),
1001            name: name_opt,
1002            email: email_opt,
1003            url: None,
1004            organization: None,
1005            organization_url: None,
1006            timezone: None,
1007        });
1008    }
1009
1010    if let Some(uploaders_str) = rfc822::get_header_first(headers, "uploaders") {
1011        for uploader in uploaders_str.split(',') {
1012            let uploader = uploader.trim();
1013            if uploader.is_empty() {
1014                continue;
1015            }
1016            let (name_opt, email_opt) = split_name_email(uploader);
1017            package.parties.push(Party {
1018                r#type: None,
1019                role: Some("uploader".to_string()),
1020                name: name_opt,
1021                email: email_opt,
1022                url: None,
1023                organization: None,
1024                organization_url: None,
1025                timezone: None,
1026            });
1027        }
1028    }
1029
1030    // Parse Build-Depends
1031    if let Some(build_deps) = rfc822::get_header_first(headers, "build-depends") {
1032        package.dependencies.extend(parse_dependency_field(
1033            &build_deps,
1034            "build",
1035            false,
1036            false,
1037            namespace.as_deref(),
1038        ));
1039    }
1040
1041    // Store Standards-Version in extra_data
1042    if let Some(standards) = rfc822::get_header_first(headers, "standards-version") {
1043        let map = package.extra_data.get_or_insert_with(HashMap::new);
1044        map.insert("standards_version".to_string(), standards.into());
1045    }
1046
1047    package
1048}
1049
1050/// Parser for Debian original source tarballs (*.orig.tar.*)
1051pub struct DebianOrigTarParser;
1052
1053impl PackageParser for DebianOrigTarParser {
1054    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1055
1056    fn is_match(path: &Path) -> bool {
1057        path.file_name()
1058            .and_then(|n| n.to_str())
1059            .map(|name| name.contains(".orig.tar."))
1060            .unwrap_or(false)
1061    }
1062
1063    fn extract_packages(path: &Path) -> Vec<PackageData> {
1064        let filename = match path.file_name().and_then(|n| n.to_str()) {
1065            Some(f) => f,
1066            None => {
1067                return vec![default_package_data(
1068                    DatasourceId::DebianOriginalSourceTarball,
1069                )];
1070            }
1071        };
1072
1073        vec![parse_source_tarball_filename(
1074            filename,
1075            DatasourceId::DebianOriginalSourceTarball,
1076        )]
1077    }
1078}
1079
1080crate::register_parser!(
1081    "Debian original source tarball",
1082    &["**/*.orig.tar.*"],
1083    "deb",
1084    "",
1085    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1086);
1087
1088/// Parser for Debian source package metadata tarballs (*.debian.tar.*)
1089pub struct DebianDebianTarParser;
1090
1091impl PackageParser for DebianDebianTarParser {
1092    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1093
1094    fn is_match(path: &Path) -> bool {
1095        path.file_name()
1096            .and_then(|n| n.to_str())
1097            .map(|name| name.contains(".debian.tar."))
1098            .unwrap_or(false)
1099    }
1100
1101    fn extract_packages(path: &Path) -> Vec<PackageData> {
1102        let filename = match path.file_name().and_then(|n| n.to_str()) {
1103            Some(f) => f,
1104            None => {
1105                return vec![default_package_data(
1106                    DatasourceId::DebianSourceMetadataTarball,
1107                )];
1108            }
1109        };
1110
1111        vec![parse_source_tarball_filename(
1112            filename,
1113            DatasourceId::DebianSourceMetadataTarball,
1114        )]
1115    }
1116}
1117
1118crate::register_parser!(
1119    "Debian source metadata tarball",
1120    &["**/*.debian.tar.*"],
1121    "deb",
1122    "",
1123    Some("https://www.debian.org/doc/debian-policy/ch-source.html"),
1124);
1125
1126fn parse_source_tarball_filename(filename: &str, datasource_id: DatasourceId) -> PackageData {
1127    let without_tar_ext = filename
1128        .trim_end_matches(".gz")
1129        .trim_end_matches(".xz")
1130        .trim_end_matches(".bz2")
1131        .trim_end_matches(".tar");
1132
1133    let parts: Vec<&str> = without_tar_ext.splitn(2, '_').collect();
1134    if parts.len() < 2 {
1135        return default_package_data(datasource_id);
1136    }
1137
1138    let name = truncate_field(parts[0].to_string());
1139    let version_with_suffix = parts[1];
1140
1141    let version = version_with_suffix
1142        .trim_end_matches(".orig")
1143        .trim_end_matches(".debian")
1144        .to_string();
1145    let version = truncate_field(version);
1146
1147    let namespace = Some("debian".to_string());
1148
1149    PackageData {
1150        datasource_id: Some(datasource_id),
1151        package_type: Some(PACKAGE_TYPE),
1152        namespace: namespace.clone(),
1153        name: Some(name.clone()),
1154        version: Some(version.clone()),
1155        purl: build_debian_purl(&name, Some(&version), namespace.as_deref(), None),
1156        ..Default::default()
1157    }
1158}
1159
1160/// Parser for Debian installed file lists (*.list)
1161pub struct DebianInstalledListParser;
1162
1163impl PackageParser for DebianInstalledListParser {
1164    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1165
1166    fn is_match(path: &Path) -> bool {
1167        path.extension().and_then(|e| e.to_str()) == Some("list")
1168            && path
1169                .to_str()
1170                .map(|p| p.contains("/var/lib/dpkg/info/"))
1171                .unwrap_or(false)
1172    }
1173
1174    fn extract_packages(path: &Path) -> Vec<PackageData> {
1175        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1176            Some(f) => f,
1177            None => {
1178                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1179            }
1180        };
1181
1182        let content = match read_file_to_string(path, None) {
1183            Ok(c) => c,
1184            Err(e) => {
1185                warn!("Failed to read .list file {:?}: {}", path, e);
1186                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
1187            }
1188        };
1189
1190        vec![parse_debian_file_list(
1191            &content,
1192            filename,
1193            DatasourceId::DebianInstalledFilesList,
1194        )]
1195    }
1196}
1197
1198crate::register_parser!(
1199    "Debian installed files list",
1200    &["**/var/lib/dpkg/info/*.list"],
1201    "deb",
1202    "",
1203    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1204);
1205
1206/// Parser for Debian installed MD5 checksum files (*.md5sums)
1207pub struct DebianInstalledMd5sumsParser;
1208
1209impl PackageParser for DebianInstalledMd5sumsParser {
1210    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1211
1212    fn is_match(path: &Path) -> bool {
1213        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
1214            && path
1215                .to_str()
1216                .map(|p| p.contains("/var/lib/dpkg/info/"))
1217                .unwrap_or(false)
1218    }
1219
1220    fn extract_packages(path: &Path) -> Vec<PackageData> {
1221        let filename = match path.file_stem().and_then(|s| s.to_str()) {
1222            Some(f) => f,
1223            None => {
1224                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1225            }
1226        };
1227
1228        let content = match read_file_to_string(path, None) {
1229            Ok(c) => c,
1230            Err(e) => {
1231                warn!("Failed to read .md5sums file {:?}: {}", path, e);
1232                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
1233            }
1234        };
1235
1236        vec![parse_debian_file_list(
1237            &content,
1238            filename,
1239            DatasourceId::DebianInstalledMd5Sums,
1240        )]
1241    }
1242}
1243
1244crate::register_parser!(
1245    "Debian installed package md5sums",
1246    &["**/var/lib/dpkg/info/*.md5sums"],
1247    "deb",
1248    "",
1249    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
1250);
1251
1252const IGNORED_ROOT_DIRS: &[&str] = &["/.", "/bin", "/etc", "/lib", "/sbin", "/usr", "/var"];
1253
1254fn parse_debian_file_list(
1255    content: &str,
1256    filename: &str,
1257    datasource_id: DatasourceId,
1258) -> PackageData {
1259    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
1260        (
1261            Some(truncate_field(pkg.to_string())),
1262            Some(arch.to_string()),
1263        )
1264    } else if filename == "md5sums" {
1265        (None, None)
1266    } else {
1267        (Some(truncate_field(filename.to_string())), None)
1268    };
1269
1270    let mut file_references = Vec::new();
1271    let mut count = 0usize;
1272
1273    for line in content.lines() {
1274        count += 1;
1275        if count > MAX_ITERATION_COUNT {
1276            warn!("parse_debian_file_list: exceeded MAX_ITERATION_COUNT lines, stopping");
1277            break;
1278        }
1279        let line = line.trim();
1280        if line.is_empty() || line.starts_with('#') {
1281            continue;
1282        }
1283
1284        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
1285            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
1286        } else {
1287            (None, line)
1288        };
1289
1290        if IGNORED_ROOT_DIRS.contains(&path) {
1291            continue;
1292        }
1293
1294        file_references.push(FileReference {
1295            path: path.to_string(),
1296            size: None,
1297            sha1: None,
1298            md5: md5sum,
1299            sha256: None,
1300            sha512: None,
1301            extra_data: None,
1302        });
1303    }
1304
1305    if file_references.is_empty() {
1306        return default_package_data(datasource_id);
1307    }
1308
1309    let namespace = Some("debian".to_string());
1310    let mut package = PackageData {
1311        datasource_id: Some(datasource_id),
1312        package_type: Some(PACKAGE_TYPE),
1313        namespace: namespace.clone(),
1314        name: name.clone(),
1315        file_references,
1316        ..Default::default()
1317    };
1318
1319    if let Some(n) = &name {
1320        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
1321    }
1322
1323    package
1324}
1325
1326/// Parser for Debian machine-readable copyright files (DEP-5 format)
1327pub struct DebianCopyrightParser;
1328
1329impl PackageParser for DebianCopyrightParser {
1330    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1331
1332    fn is_match(path: &Path) -> bool {
1333        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
1334            if filename != "copyright" {
1335                return filename.ends_with("_copyright");
1336            }
1337            let path_str = path.to_string_lossy();
1338            path_str.contains("/debian/")
1339                || path_str.contains("/ports/")
1340                || path_str.starts_with("ports/")
1341                || path_str.contains("/packages/deb/")
1342                || path_str.contains("/usr/share/doc/")
1343                || path_str.ends_with("debian/copyright")
1344        } else {
1345            false
1346        }
1347    }
1348
1349    fn extract_packages(path: &Path) -> Vec<PackageData> {
1350        let datasource_id = detect_debian_copyright_datasource(path);
1351        let content = match read_file_to_string(path, None) {
1352            Ok(c) => c,
1353            Err(e) => {
1354                warn!("Failed to read copyright file {:?}: {}", path, e);
1355                return vec![default_package_data(datasource_id)];
1356            }
1357        };
1358
1359        let package_name = extract_package_name_from_path(path)
1360            .or_else(|| extract_standalone_package_name_from_path(path, datasource_id));
1361        let mut package_data = parse_copyright_file(&content, package_name.as_deref());
1362        package_data.datasource_id = Some(datasource_id);
1363        vec![package_data]
1364    }
1365}
1366
1367crate::register_parser!(
1368    "Debian machine-readable copyright file",
1369    &[
1370        "**/debian/copyright",
1371        "**/ports/*/copyright",
1372        "**/packages/deb/copyright",
1373        "**/usr/share/doc/*/copyright",
1374        "**/*_copyright"
1375    ],
1376    "deb",
1377    "",
1378    Some("https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"),
1379);
1380
1381fn detect_debian_copyright_datasource(path: &Path) -> DatasourceId {
1382    let path_str = path.to_string_lossy();
1383    if path_str.contains("/debian/") || path_str.ends_with("debian/copyright") {
1384        DatasourceId::DebianCopyrightInSource
1385    } else if path_str.contains("/usr/share/doc/") {
1386        DatasourceId::DebianCopyrightInPackage
1387    } else {
1388        DatasourceId::DebianCopyrightStandalone
1389    }
1390}
1391
1392fn extract_package_name_from_path(path: &Path) -> Option<String> {
1393    let components: Vec<_> = path.components().collect();
1394
1395    for (i, component) in components.iter().enumerate() {
1396        if let std::path::Component::Normal(os_str) = component
1397            && os_str.to_str() == Some("doc")
1398            && i + 1 < components.len()
1399            && let std::path::Component::Normal(next) = components[i + 1]
1400        {
1401            return next.to_str().map(|s| s.to_string());
1402        }
1403    }
1404    None
1405}
1406
1407fn extract_standalone_package_name_from_path(
1408    path: &Path,
1409    datasource_id: DatasourceId,
1410) -> Option<String> {
1411    if datasource_id != DatasourceId::DebianCopyrightStandalone {
1412        return None;
1413    }
1414
1415    path.file_name()
1416        .and_then(|name| name.to_str())
1417        .filter(|name| *name == "copyright")?;
1418
1419    path.parent()
1420        .and_then(|parent| parent.file_name())
1421        .and_then(|name| name.to_str())
1422        .map(str::to_string)
1423}
1424
1425fn parse_copyright_file(content: &str, package_name: Option<&str>) -> PackageData {
1426    let paragraphs = parse_copyright_paragraphs_with_lines(content);
1427
1428    let is_dep5 = paragraphs
1429        .first()
1430        .and_then(|p| rfc822::get_header_first(&p.metadata.headers, "format"))
1431        .is_some();
1432
1433    let namespace = Some("debian".to_string());
1434    let mut parties = Vec::new();
1435    let mut license_statements = Vec::new();
1436    let mut primary_license_detection = None;
1437    let mut header_license_detection = None;
1438    let mut other_license_detections = Vec::new();
1439
1440    if is_dep5 {
1441        let mut para_count = 0usize;
1442        for para in &paragraphs {
1443            para_count += 1;
1444            if para_count > MAX_ITERATION_COUNT {
1445                warn!("parse_copyright_file: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
1446                break;
1447            }
1448            if let Some(copyright_text) =
1449                rfc822::get_header_first(&para.metadata.headers, "copyright")
1450            {
1451                for holder in parse_copyright_holders(&copyright_text) {
1452                    if !holder.is_empty() {
1453                        parties.push(Party {
1454                            r#type: None,
1455                            role: Some("copyright-holder".to_string()),
1456                            name: Some(holder),
1457                            email: None,
1458                            url: None,
1459                            organization: None,
1460                            organization_url: None,
1461                            timezone: None,
1462                        });
1463                    }
1464                }
1465            }
1466
1467            if let Some(license) = rfc822::get_header_first(&para.metadata.headers, "license") {
1468                let license_name = license.lines().next().unwrap_or(&license).trim();
1469                if !license_name.is_empty()
1470                    && !license_statements.contains(&license_name.to_string())
1471                {
1472                    license_statements.push(license_name.to_string());
1473                }
1474
1475                if let Some((matched_text, line_no)) = para.license_header_line.clone() {
1476                    let detection =
1477                        build_primary_license_detection(license_name, matched_text, line_no);
1478                    let is_header_paragraph =
1479                        rfc822::get_header_first(&para.metadata.headers, "format").is_some();
1480                    if rfc822::get_header_first(&para.metadata.headers, "files").as_deref()
1481                        == Some("*")
1482                    {
1483                        primary_license_detection = Some(detection);
1484                    } else if is_header_paragraph {
1485                        header_license_detection.get_or_insert(detection);
1486                    } else {
1487                        other_license_detections.push(detection);
1488                    }
1489                }
1490            }
1491        }
1492
1493        if primary_license_detection.is_none() && header_license_detection.is_some() {
1494            primary_license_detection = header_license_detection;
1495        }
1496    } else {
1497        let copyright_block = extract_unstructured_field(content, "Copyright:");
1498        if let Some(text) = copyright_block {
1499            for holder in parse_copyright_holders(&text) {
1500                if !holder.is_empty() {
1501                    parties.push(Party {
1502                        r#type: None,
1503                        role: Some("copyright-holder".to_string()),
1504                        name: Some(holder),
1505                        email: None,
1506                        url: None,
1507                        organization: None,
1508                        organization_url: None,
1509                        timezone: None,
1510                    });
1511                }
1512            }
1513        }
1514
1515        let license_block = extract_unstructured_field(content, "License:");
1516        if let Some(text) = license_block {
1517            license_statements.push(text.lines().next().unwrap_or(&text).trim().to_string());
1518        }
1519    }
1520
1521    let extracted_license_statement = if license_statements.is_empty() {
1522        None
1523    } else {
1524        Some(truncate_field(license_statements.join(" AND ")))
1525    };
1526
1527    let license_detections = primary_license_detection.into_iter().collect::<Vec<_>>();
1528    let declared_license_expression = license_detections
1529        .first()
1530        .map(|detection| detection.license_expression.clone());
1531    let declared_license_expression_spdx = license_detections
1532        .first()
1533        .map(|detection| detection.license_expression_spdx.clone());
1534    let other_license_expression = combine_license_expressions(
1535        other_license_detections
1536            .iter()
1537            .map(|detection| detection.license_expression.clone()),
1538    );
1539    let other_license_expression_spdx = combine_license_expressions(
1540        other_license_detections
1541            .iter()
1542            .map(|detection| detection.license_expression_spdx.clone()),
1543    );
1544
1545    PackageData {
1546        datasource_id: Some(DatasourceId::DebianCopyright),
1547        package_type: Some(PACKAGE_TYPE),
1548        namespace: namespace.clone(),
1549        name: package_name.map(|s| truncate_field(s.to_string())),
1550        parties,
1551        declared_license_expression,
1552        declared_license_expression_spdx,
1553        license_detections,
1554        other_license_expression,
1555        other_license_expression_spdx,
1556        other_license_detections,
1557        extracted_license_statement,
1558        purl: package_name.and_then(|n| build_debian_purl(n, None, namespace.as_deref(), None)),
1559        ..Default::default()
1560    }
1561}
1562
1563#[derive(Debug)]
1564struct CopyrightParagraph {
1565    metadata: Rfc822Metadata,
1566    license_header_line: Option<(String, usize)>,
1567}
1568
1569fn parse_copyright_paragraphs_with_lines(content: &str) -> Vec<CopyrightParagraph> {
1570    let mut paragraphs = Vec::new();
1571    let mut current_lines = Vec::new();
1572    let mut current_start_line = 1usize;
1573    let mut count = 0usize;
1574
1575    for (idx, line) in content.lines().enumerate() {
1576        count += 1;
1577        if count > MAX_ITERATION_COUNT {
1578            warn!(
1579                "parse_copyright_paragraphs_with_lines: exceeded MAX_ITERATION_COUNT lines, stopping"
1580            );
1581            break;
1582        }
1583        let line_no = idx + 1;
1584        if line.is_empty() {
1585            if !current_lines.is_empty() {
1586                paragraphs.push(finalize_copyright_paragraph(
1587                    std::mem::take(&mut current_lines),
1588                    current_start_line,
1589                ));
1590            }
1591            current_start_line = line_no + 1;
1592        } else {
1593            if current_lines.is_empty() {
1594                current_start_line = line_no;
1595            }
1596            current_lines.push(line.to_string());
1597        }
1598    }
1599
1600    if !current_lines.is_empty() {
1601        paragraphs.push(finalize_copyright_paragraph(
1602            current_lines,
1603            current_start_line,
1604        ));
1605    }
1606
1607    paragraphs
1608}
1609
1610fn finalize_copyright_paragraph(raw_lines: Vec<String>, start_line: usize) -> CopyrightParagraph {
1611    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
1612    let mut current_name: Option<String> = None;
1613    let mut current_value = String::new();
1614    let mut license_header_line = None;
1615
1616    for (idx, line) in raw_lines.iter().enumerate() {
1617        if line.starts_with(' ') || line.starts_with('\t') {
1618            if current_name.is_some() {
1619                current_value.push('\n');
1620                current_value.push_str(line);
1621            }
1622            continue;
1623        }
1624
1625        if let Some(name) = current_name.take() {
1626            add_copyright_header_value(&mut headers, &name, &current_value);
1627            current_value.clear();
1628        }
1629
1630        if let Some((name, value)) = line.split_once(':') {
1631            let normalized_name = name.trim().to_ascii_lowercase();
1632            if normalized_name == "license" && license_header_line.is_none() {
1633                license_header_line = Some((line.trim_end().to_string(), start_line + idx));
1634            }
1635            current_name = Some(normalized_name);
1636            current_value = value.trim_start().to_string();
1637        }
1638    }
1639
1640    if let Some(name) = current_name.take() {
1641        add_copyright_header_value(&mut headers, &name, &current_value);
1642    }
1643
1644    CopyrightParagraph {
1645        metadata: Rfc822Metadata {
1646            headers,
1647            body: String::new(),
1648        },
1649        license_header_line,
1650    }
1651}
1652
1653fn add_copyright_header_value(headers: &mut HashMap<String, Vec<String>>, name: &str, value: &str) {
1654    let entry = headers.entry(name.to_string()).or_default();
1655    let trimmed = value.trim_end();
1656    if !trimmed.is_empty() {
1657        entry.push(trimmed.to_string());
1658    }
1659}
1660
1661fn build_primary_license_detection(
1662    license_name: &str,
1663    matched_text: String,
1664    line_no: usize,
1665) -> LicenseDetection {
1666    let normalized = normalize_debian_license_name(license_name);
1667    let line = match LineNumber::new(line_no) {
1668        Some(l) => l,
1669        None => {
1670            warn!(
1671                "build_primary_license_detection: line number {} out of range, clamping to 1",
1672                line_no
1673            );
1674            LineNumber::new(1).expect("1 is a valid line number")
1675        }
1676    };
1677
1678    build_declared_license_detection(
1679        &normalized,
1680        DeclaredLicenseMatchMetadata::new(&matched_text, line, line),
1681    )
1682}
1683
1684fn normalize_debian_license_name(license_name: &str) -> NormalizedDeclaredLicense {
1685    match license_name.trim() {
1686        "GPL-2+" => NormalizedDeclaredLicense::new("gpl-2.0-plus", "GPL-2.0-or-later"),
1687        "GPL-2" => NormalizedDeclaredLicense::new("gpl-2.0", "GPL-2.0-only"),
1688        "LGPL-2+" => NormalizedDeclaredLicense::new("lgpl-2.0-plus", "LGPL-2.0-or-later"),
1689        "LGPL-2.1" => NormalizedDeclaredLicense::new("lgpl-2.1", "LGPL-2.1-only"),
1690        "LGPL-2.1+" => NormalizedDeclaredLicense::new("lgpl-2.1-plus", "LGPL-2.1-or-later"),
1691        "LGPL-3+" => NormalizedDeclaredLicense::new("lgpl-3.0-plus", "LGPL-3.0-or-later"),
1692        "BSD-4-clause" => NormalizedDeclaredLicense::new("bsd-original-uc", "BSD-4-Clause-UC"),
1693        "public-domain" => {
1694            NormalizedDeclaredLicense::new("public-domain", "LicenseRef-provenant-public-domain")
1695        }
1696        other => normalize_declared_license_key(other)
1697            .unwrap_or_else(|| NormalizedDeclaredLicense::new(other.to_ascii_lowercase(), other)),
1698    }
1699}
1700
1701fn parse_copyright_holders(text: &str) -> Vec<String> {
1702    let mut holders = Vec::new();
1703    let mut count = 0usize;
1704
1705    for line in text.lines() {
1706        count += 1;
1707        if count > MAX_ITERATION_COUNT {
1708            warn!("parse_copyright_holders: exceeded MAX_ITERATION_COUNT lines, stopping");
1709            break;
1710        }
1711        let line = line.trim();
1712        if line.is_empty() {
1713            continue;
1714        }
1715
1716        let cleaned = line
1717            .trim_start_matches("Copyright")
1718            .trim_start_matches("copyright")
1719            .trim_start_matches("(C)")
1720            .trim_start_matches("(c)")
1721            .trim_start_matches("©")
1722            .trim();
1723
1724        if let Some(year_end) = cleaned.find(char::is_alphabetic) {
1725            let without_years = &cleaned[year_end..];
1726            let holder = without_years
1727                .trim_start_matches(',')
1728                .trim_start_matches('-')
1729                .trim();
1730
1731            if !holder.is_empty() && holder.len() > 2 {
1732                holders.push(holder.to_string());
1733            }
1734        }
1735    }
1736
1737    holders
1738}
1739
1740fn extract_unstructured_field(content: &str, field_name: &str) -> Option<String> {
1741    let mut in_field = false;
1742    let mut field_content = String::new();
1743    let mut count = 0usize;
1744
1745    for line in content.lines() {
1746        count += 1;
1747        if count > MAX_ITERATION_COUNT {
1748            warn!("extract_unstructured_field: exceeded MAX_ITERATION_COUNT lines, stopping");
1749            break;
1750        }
1751        if line.starts_with(field_name) {
1752            in_field = true;
1753            field_content.push_str(line.trim_start_matches(field_name).trim());
1754            field_content.push('\n');
1755        } else if in_field {
1756            if line.starts_with(char::is_whitespace) {
1757                field_content.push_str(line.trim());
1758                field_content.push('\n');
1759            } else if !line.trim().is_empty() {
1760                break;
1761            }
1762        }
1763    }
1764
1765    let trimmed = field_content.trim();
1766    if trimmed.is_empty() {
1767        None
1768    } else {
1769        Some(truncate_field(trimmed.to_string()))
1770    }
1771}
1772
1773/// Parser for Debian binary package archives (.deb files)
1774pub struct DebianDebParser;
1775
1776impl PackageParser for DebianDebParser {
1777    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1778
1779    fn is_match(path: &Path) -> bool {
1780        path.extension().and_then(|e| e.to_str()) == Some("deb")
1781    }
1782
1783    fn extract_packages(path: &Path) -> Vec<PackageData> {
1784        // Try to extract metadata from archive contents first
1785        if let Ok(data) = extract_deb_archive(path) {
1786            return vec![data];
1787        }
1788
1789        // Fallback to filename parsing
1790        let filename = match path.file_name().and_then(|n| n.to_str()) {
1791            Some(f) => f,
1792            None => {
1793                return vec![default_package_data(DatasourceId::DebianDeb)];
1794            }
1795        };
1796
1797        vec![parse_deb_filename(filename)]
1798    }
1799}
1800
1801crate::register_parser!(
1802    "Debian binary package archive (.deb)",
1803    &["**/*.deb"],
1804    "deb",
1805    "",
1806    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
1807);
1808
1809fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
1810    use flate2::read::GzDecoder;
1811    use liblzma::read::XzDecoder;
1812    use std::io::{Cursor, Read};
1813
1814    let file_metadata =
1815        std::fs::metadata(path).map_err(|e| format!("Failed to stat .deb file: {}", e))?;
1816    if file_metadata.len() > MAX_ARCHIVE_SIZE {
1817        return Err(format!(
1818            ".deb file exceeds MAX_ARCHIVE_SIZE ({} bytes)",
1819            file_metadata.len()
1820        ));
1821    }
1822    let compressed_size = file_metadata.len() as usize;
1823
1824    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
1825
1826    let mut archive = ar::Archive::new(file);
1827    let mut package: Option<PackageData> = None;
1828    let mut total_extracted: usize = 0;
1829
1830    while let Some(entry_result) = archive.next_entry() {
1831        let entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
1832
1833        let entry_name_raw = entry.header().identifier();
1834        let entry_name = String::from_utf8_lossy(entry_name_raw);
1835        let had_replacement = entry_name_raw.iter().any(|&b| b > 127);
1836        if had_replacement {
1837            warn!(
1838                "extract_deb_archive: non-UTF-8 bytes in entry name replaced with lossy conversion"
1839            );
1840        }
1841        let entry_name = entry_name.trim().to_string();
1842
1843        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
1844            let entry_size = entry.header().size();
1845            if entry_size > MAX_FILE_SIZE {
1846                warn!(
1847                    "extract_deb_archive: control tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1848                    entry_size
1849                );
1850                continue;
1851            }
1852            let mut control_data = Vec::new();
1853            entry
1854                .take(MAX_FILE_SIZE)
1855                .read_to_end(&mut control_data)
1856                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
1857
1858            total_extracted += control_data.len();
1859            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1860                warn!(
1861                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1862                );
1863                break;
1864            }
1865            if total_extracted > MAX_ARCHIVE_SIZE as usize {
1866                warn!(
1867                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1868                );
1869                break;
1870            }
1871
1872            if entry_name.ends_with(".gz") {
1873                let decoder = GzDecoder::new(Cursor::new(control_data));
1874                if let Some(parsed_package) =
1875                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1876                {
1877                    package = Some(parsed_package);
1878                }
1879            } else if entry_name.ends_with(".xz") {
1880                let decoder = XzDecoder::new(Cursor::new(control_data));
1881                if let Some(parsed_package) =
1882                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
1883                {
1884                    package = Some(parsed_package);
1885                }
1886            }
1887        } else if entry_name.starts_with("data.tar") {
1888            let entry_size = entry.header().size();
1889            if entry_size > MAX_FILE_SIZE {
1890                warn!(
1891                    "extract_deb_archive: data tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1892                    entry_size
1893                );
1894                continue;
1895            }
1896            let mut data = Vec::new();
1897            entry
1898                .take(MAX_FILE_SIZE)
1899                .read_to_end(&mut data)
1900                .map_err(|e| format!("Failed to read data archive: {}", e))?;
1901
1902            total_extracted += data.len();
1903            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1904                warn!(
1905                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1906                );
1907                break;
1908            }
1909            if total_extracted > MAX_ARCHIVE_SIZE as usize {
1910                warn!(
1911                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1912                );
1913                break;
1914            }
1915
1916            let Some(current_package) = package.as_mut() else {
1917                continue;
1918            };
1919
1920            if entry_name.ends_with(".gz") {
1921                let decoder = GzDecoder::new(Cursor::new(data));
1922                merge_deb_data_archive(
1923                    decoder,
1924                    current_package,
1925                    &mut total_extracted,
1926                    compressed_size,
1927                )?;
1928            } else if entry_name.ends_with(".xz") {
1929                let decoder = XzDecoder::new(Cursor::new(data));
1930                merge_deb_data_archive(
1931                    decoder,
1932                    current_package,
1933                    &mut total_extracted,
1934                    compressed_size,
1935                )?;
1936            }
1937        }
1938    }
1939
1940    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
1941}
1942
1943fn parse_control_tar_archive<R: std::io::Read>(
1944    reader: R,
1945    total_extracted: &mut usize,
1946    compressed_size: usize,
1947) -> Result<Option<PackageData>, String> {
1948    use std::io::Read;
1949
1950    let mut tar_archive = tar::Archive::new(reader);
1951
1952    for tar_entry_result in tar_archive
1953        .entries()
1954        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1955    {
1956        let tar_entry = tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1957
1958        let tar_path = tar_entry
1959            .path()
1960            .map_err(|e| format!("Failed to get tar path: {}", e))?;
1961
1962        if tar_path
1963            .components()
1964            .any(|c| matches!(c, std::path::Component::ParentDir))
1965        {
1966            warn!(
1967                "parse_control_tar_archive: skipping tar entry with path traversal: {:?}",
1968                tar_path
1969            );
1970            continue;
1971        }
1972
1973        if tar_entry.size() > MAX_FILE_SIZE {
1974            warn!(
1975                "parse_control_tar_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
1976                tar_entry.size()
1977            );
1978            continue;
1979        }
1980
1981        if tar_path.ends_with("control") {
1982            let mut control_content = String::new();
1983            tar_entry
1984                .take(MAX_FILE_SIZE)
1985                .read_to_string(&mut control_content)
1986                .map_err(|e| format!("Failed to read control file: {}", e))?;
1987
1988            *total_extracted += control_content.len();
1989            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
1990                warn!(
1991                    "parse_control_tar_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
1992                );
1993                return Ok(None);
1994            }
1995            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
1996                warn!(
1997                    "parse_control_tar_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
1998                );
1999                return Ok(None);
2000            }
2001
2002            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
2003            if paragraphs.is_empty() {
2004                return Err("No paragraphs in control file".to_string());
2005            }
2006
2007            if let Some(package) =
2008                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
2009            {
2010                return Ok(Some(package));
2011            }
2012
2013            return Err("Failed to parse control file".to_string());
2014        }
2015    }
2016
2017    Ok(None)
2018}
2019
2020fn merge_deb_data_archive<R: std::io::Read>(
2021    reader: R,
2022    package: &mut PackageData,
2023    total_extracted: &mut usize,
2024    compressed_size: usize,
2025) -> Result<(), String> {
2026    use std::io::Read;
2027
2028    let mut tar_archive = tar::Archive::new(reader);
2029
2030    for tar_entry_result in tar_archive
2031        .entries()
2032        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
2033    {
2034        let tar_entry =
2035            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
2036
2037        let tar_path = tar_entry
2038            .path()
2039            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
2040
2041        if tar_path
2042            .components()
2043            .any(|c| matches!(c, std::path::Component::ParentDir))
2044        {
2045            warn!(
2046                "merge_deb_data_archive: skipping tar entry with path traversal: {:?}",
2047                tar_path
2048            );
2049            continue;
2050        }
2051
2052        if tar_entry.size() > MAX_FILE_SIZE {
2053            warn!(
2054                "merge_deb_data_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
2055                tar_entry.size()
2056            );
2057            continue;
2058        }
2059
2060        let tar_path_str = tar_path.to_string_lossy();
2061
2062        if tar_path_str.ends_with(&format!(
2063            "/usr/share/doc/{}/copyright",
2064            package.name.as_deref().unwrap_or_default()
2065        )) || tar_path_str.ends_with(&format!(
2066            "usr/share/doc/{}/copyright",
2067            package.name.as_deref().unwrap_or_default()
2068        )) {
2069            let mut copyright_content = String::new();
2070            tar_entry
2071                .take(MAX_FILE_SIZE)
2072                .read_to_string(&mut copyright_content)
2073                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
2074
2075            *total_extracted += copyright_content.len();
2076            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
2077                warn!(
2078                    "merge_deb_data_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
2079                );
2080                return Ok(());
2081            }
2082            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
2083                warn!(
2084                    "merge_deb_data_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
2085                );
2086                return Ok(());
2087            }
2088
2089            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
2090            merge_debian_copyright_into_package(package, &copyright_pkg);
2091            break;
2092        }
2093    }
2094
2095    Ok(())
2096}
2097
2098fn merge_debian_copyright_into_package(target: &mut PackageData, copyright: &PackageData) {
2099    if target.extracted_license_statement.is_none() {
2100        target.extracted_license_statement = copyright.extracted_license_statement.clone();
2101    }
2102
2103    for party in &copyright.parties {
2104        if !target.parties.iter().any(|existing| {
2105            existing.r#type == party.r#type
2106                && existing.role == party.role
2107                && existing.name == party.name
2108                && existing.email == party.email
2109                && existing.url == party.url
2110                && existing.organization == party.organization
2111                && existing.organization_url == party.organization_url
2112                && existing.timezone == party.timezone
2113        }) {
2114            target.parties.push(party.clone());
2115        }
2116    }
2117}
2118
2119fn parse_deb_filename(filename: &str) -> PackageData {
2120    let without_ext = filename.trim_end_matches(".deb");
2121
2122    let parts: Vec<&str> = without_ext.split('_').collect();
2123    if parts.len() < 2 {
2124        return default_package_data(DatasourceId::DebianDeb);
2125    }
2126
2127    let name = truncate_field(parts[0].to_string());
2128    let version = truncate_field(parts[1].to_string());
2129    let architecture = if parts.len() >= 3 {
2130        Some(truncate_field(parts[2].to_string()))
2131    } else {
2132        None
2133    };
2134
2135    let namespace = Some("debian".to_string());
2136
2137    PackageData {
2138        datasource_id: Some(DatasourceId::DebianDeb),
2139        package_type: Some(PACKAGE_TYPE),
2140        namespace: namespace.clone(),
2141        name: Some(name.clone()),
2142        version: Some(version.clone()),
2143        purl: build_debian_purl(
2144            &name,
2145            Some(&version),
2146            namespace.as_deref(),
2147            architecture.as_deref(),
2148        ),
2149        ..Default::default()
2150    }
2151}
2152
2153/// Parser for control files inside extracted .deb control tarballs.
2154///
2155/// Matches paths like `*/control.tar.gz-extract/control` and
2156/// `*/control.tar.xz-extract/control` which are created by ExtractCode
2157/// when extracting .deb archives.
2158pub struct DebianControlInExtractedDebParser;
2159
2160impl PackageParser for DebianControlInExtractedDebParser {
2161    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2162
2163    fn is_match(path: &Path) -> bool {
2164        path.file_name()
2165            .and_then(|n| n.to_str())
2166            .is_some_and(|name| name == "control")
2167            && path
2168                .to_str()
2169                .map(|p| {
2170                    p.ends_with("control.tar.gz-extract/control")
2171                        || p.ends_with("control.tar.xz-extract/control")
2172                })
2173                .unwrap_or(false)
2174    }
2175
2176    fn extract_packages(path: &Path) -> Vec<PackageData> {
2177        let content = match read_file_to_string(path, None) {
2178            Ok(c) => c,
2179            Err(e) => {
2180                warn!(
2181                    "Failed to read control file in extracted deb {:?}: {}",
2182                    path, e
2183                );
2184                return vec![default_package_data(
2185                    DatasourceId::DebianControlExtractedDeb,
2186                )];
2187            }
2188        };
2189
2190        // A control file inside an extracted .deb has a single paragraph
2191        // (unlike debian/control which has source + binary paragraphs)
2192        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
2193        if paragraphs.is_empty() {
2194            return vec![default_package_data(
2195                DatasourceId::DebianControlExtractedDeb,
2196            )];
2197        }
2198
2199        if let Some(pkg) = build_package_from_paragraph(
2200            &paragraphs[0],
2201            None,
2202            DatasourceId::DebianControlExtractedDeb,
2203        ) {
2204            vec![pkg]
2205        } else {
2206            vec![default_package_data(
2207                DatasourceId::DebianControlExtractedDeb,
2208            )]
2209        }
2210    }
2211}
2212
2213/// Parser for MD5 checksum files inside extracted .deb control tarballs
2214pub struct DebianMd5sumInPackageParser;
2215
2216impl PackageParser for DebianMd5sumInPackageParser {
2217    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2218
2219    fn is_match(path: &Path) -> bool {
2220        path.file_name()
2221            .and_then(|n| n.to_str())
2222            .is_some_and(|name| name == "md5sums")
2223            && path
2224                .to_str()
2225                .map(|p| {
2226                    p.ends_with("control.tar.gz-extract/md5sums")
2227                        || p.ends_with("control.tar.xz-extract/md5sums")
2228                })
2229                .unwrap_or(false)
2230    }
2231
2232    fn extract_packages(path: &Path) -> Vec<PackageData> {
2233        let content = match read_file_to_string(path, None) {
2234            Ok(c) => c,
2235            Err(e) => {
2236                warn!("Failed to read md5sums file {:?}: {}", path, e);
2237                return vec![default_package_data(
2238                    DatasourceId::DebianMd5SumsInExtractedDeb,
2239                )];
2240            }
2241        };
2242
2243        let package_name = extract_package_name_from_deb_path(path);
2244
2245        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
2246    }
2247}
2248
2249pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
2250    let parent = path.parent()?;
2251    let grandparent = parent.parent()?;
2252    let dirname = grandparent.file_name()?.to_str()?;
2253    let without_extract = dirname.strip_suffix("-extract")?;
2254    let without_deb = without_extract.strip_suffix(".deb")?;
2255    let name = without_deb.split('_').next()?;
2256
2257    Some(name.to_string())
2258}
2259
2260fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
2261    let mut file_references = Vec::new();
2262    let mut count = 0usize;
2263
2264    for line in content.lines() {
2265        count += 1;
2266        if count > MAX_ITERATION_COUNT {
2267            warn!("parse_md5sums_in_package: exceeded MAX_ITERATION_COUNT lines, stopping");
2268            break;
2269        }
2270        let line = line.trim();
2271        if line.is_empty() || line.starts_with('#') {
2272            continue;
2273        }
2274
2275        let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
2276            (
2277                Md5Digest::from_hex(line[..idx].trim()).ok(),
2278                line[idx + 2..].trim(),
2279            )
2280        } else if let Some((hash, path)) = line.split_once(' ') {
2281            (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
2282        } else {
2283            (None, line)
2284        };
2285
2286        if IGNORED_ROOT_DIRS.contains(&filepath) {
2287            continue;
2288        }
2289
2290        file_references.push(FileReference {
2291            path: filepath.to_string(),
2292            size: None,
2293            sha1: None,
2294            md5: md5sum,
2295            sha256: None,
2296            sha512: None,
2297            extra_data: None,
2298        });
2299    }
2300
2301    if file_references.is_empty() {
2302        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
2303    }
2304
2305    let namespace = Some("debian".to_string());
2306    let mut package = PackageData {
2307        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
2308        package_type: Some(PACKAGE_TYPE),
2309        namespace: namespace.clone(),
2310        name: package_name.map(|s| truncate_field(s.to_string())),
2311        file_references,
2312        ..Default::default()
2313    };
2314
2315    if let Some(n) = &package.name {
2316        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
2317    }
2318
2319    package
2320}
2321
2322crate::register_parser!(
2323    "Debian control file in extracted .deb control tarball",
2324    &[
2325        "**/control.tar.gz-extract/control",
2326        "**/control.tar.xz-extract/control"
2327    ],
2328    "deb",
2329    "",
2330    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2331);
2332
2333crate::register_parser!(
2334    "Debian MD5 checksums in extracted .deb control tarball",
2335    &[
2336        "**/control.tar.gz-extract/md5sums",
2337        "**/control.tar.xz-extract/md5sums"
2338    ],
2339    "deb",
2340    "",
2341    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
2342);
2343
2344#[cfg(test)]
2345mod tests {
2346    use super::*;
2347    use crate::models::DatasourceId;
2348    use crate::models::PackageType;
2349    use ar::{Builder as ArBuilder, Header as ArHeader};
2350    use flate2::Compression;
2351    use flate2::write::GzEncoder;
2352    use liblzma::write::XzEncoder;
2353    use std::io::Cursor;
2354    use std::path::PathBuf;
2355    use tar::{Builder as TarBuilder, Header as TarHeader};
2356    use tempfile::NamedTempFile;
2357
2358    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
2359        let mut control_tar = Vec::new();
2360        {
2361            let encoder = XzEncoder::new(&mut control_tar, 6);
2362            let mut tar_builder = TarBuilder::new(encoder);
2363
2364            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
2365            let mut header = TarHeader::new_gnu();
2366            header
2367                .set_path("control")
2368                .expect("control tar path should be valid");
2369            header.set_size(control_content.len() as u64);
2370            header.set_mode(0o644);
2371            header.set_cksum();
2372            tar_builder
2373                .append(&header, Cursor::new(control_content))
2374                .expect("control file should be appended to tar.xz");
2375            tar_builder.finish().expect("control tar.xz should finish");
2376        }
2377
2378        let deb = NamedTempFile::new().expect("temp deb file should be created");
2379        {
2380            let mut builder = ArBuilder::new(
2381                deb.reopen()
2382                    .expect("temporary deb file should reopen for writing"),
2383            );
2384
2385            let debian_binary = b"2.0\n";
2386            let mut debian_binary_header =
2387                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2388            debian_binary_header.set_mode(0o100644);
2389            builder
2390                .append(&debian_binary_header, Cursor::new(debian_binary))
2391                .expect("debian-binary entry should be appended");
2392
2393            let mut control_header =
2394                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
2395            control_header.set_mode(0o100644);
2396            builder
2397                .append(&control_header, Cursor::new(control_tar))
2398                .expect("control.tar.xz entry should be appended");
2399        }
2400
2401        deb
2402    }
2403
2404    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
2405        let mut control_tar = Vec::new();
2406        {
2407            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
2408            let mut tar_builder = TarBuilder::new(encoder);
2409
2410            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
2411            let mut header = TarHeader::new_gnu();
2412            header
2413                .set_path("control")
2414                .expect("control tar path should be valid");
2415            header.set_size(control_content.len() as u64);
2416            header.set_mode(0o644);
2417            header.set_cksum();
2418            tar_builder
2419                .append(&header, Cursor::new(control_content))
2420                .expect("control file should be appended to tar.gz");
2421            tar_builder.finish().expect("control tar.gz should finish");
2422        }
2423
2424        let mut data_tar = Vec::new();
2425        {
2426            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
2427            let mut tar_builder = TarBuilder::new(encoder);
2428
2429            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
2430            let mut header = TarHeader::new_gnu();
2431            header
2432                .set_path("./usr/share/doc/synthetic/copyright")
2433                .expect("copyright path should be valid");
2434            header.set_size(copyright.len() as u64);
2435            header.set_mode(0o644);
2436            header.set_cksum();
2437            tar_builder
2438                .append(&header, Cursor::new(copyright))
2439                .expect("copyright file should be appended to data tar");
2440            tar_builder.finish().expect("data tar.gz should finish");
2441        }
2442
2443        let deb = NamedTempFile::new().expect("temp deb file should be created");
2444        {
2445            let mut builder = ArBuilder::new(
2446                deb.reopen()
2447                    .expect("temporary deb file should reopen for writing"),
2448            );
2449
2450            let debian_binary = b"2.0\n";
2451            let mut debian_binary_header =
2452                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
2453            debian_binary_header.set_mode(0o100644);
2454            builder
2455                .append(&debian_binary_header, Cursor::new(debian_binary))
2456                .expect("debian-binary entry should be appended");
2457
2458            let mut control_header =
2459                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
2460            control_header.set_mode(0o100644);
2461            builder
2462                .append(&control_header, Cursor::new(control_tar))
2463                .expect("control.tar.gz entry should be appended");
2464
2465            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
2466            data_header.set_mode(0o100644);
2467            builder
2468                .append(&data_header, Cursor::new(data_tar))
2469                .expect("data.tar.gz entry should be appended");
2470        }
2471
2472        deb
2473    }
2474
2475    // ====== Namespace detection ======
2476
2477    #[test]
2478    fn test_detect_namespace_from_ubuntu_version() {
2479        assert_eq!(
2480            detect_namespace(Some("1.0-1ubuntu1"), None),
2481            Some("ubuntu".to_string())
2482        );
2483    }
2484
2485    #[test]
2486    fn test_detect_namespace_from_debian_version() {
2487        assert_eq!(
2488            detect_namespace(Some("1.0-1+deb11u1"), None),
2489            Some("debian".to_string())
2490        );
2491    }
2492
2493    #[test]
2494    fn test_detect_namespace_from_ubuntu_maintainer() {
2495        assert_eq!(
2496            detect_namespace(
2497                None,
2498                Some("Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>")
2499            ),
2500            Some("ubuntu".to_string())
2501        );
2502    }
2503
2504    #[test]
2505    fn test_detect_namespace_from_debian_maintainer() {
2506        assert_eq!(
2507            detect_namespace(None, Some("John Doe <john@debian.org>")),
2508            Some("debian".to_string())
2509        );
2510    }
2511
2512    #[test]
2513    fn test_detect_namespace_default() {
2514        assert_eq!(
2515            detect_namespace(None, Some("Unknown <unknown@example.com>")),
2516            Some("debian".to_string())
2517        );
2518    }
2519
2520    #[test]
2521    fn test_detect_namespace_version_takes_priority() {
2522        // Version clue should be checked before maintainer
2523        assert_eq!(
2524            detect_namespace(Some("1.0ubuntu1"), Some("maintainer@debian.org")),
2525            Some("ubuntu".to_string())
2526        );
2527    }
2528
2529    // ====== PURL generation ======
2530
2531    #[test]
2532    fn test_build_purl_basic() {
2533        let purl = build_debian_purl("curl", Some("7.68.0-1"), Some("debian"), Some("amd64"));
2534        assert_eq!(
2535            purl,
2536            Some("pkg:deb/debian/curl@7.68.0-1?arch=amd64".to_string())
2537        );
2538    }
2539
2540    #[test]
2541    fn test_build_purl_no_version() {
2542        let purl = build_debian_purl("curl", None, Some("debian"), Some("any"));
2543        assert_eq!(purl, Some("pkg:deb/debian/curl?arch=any".to_string()));
2544    }
2545
2546    #[test]
2547    fn test_build_purl_no_arch() {
2548        let purl = build_debian_purl("curl", Some("7.68.0"), Some("ubuntu"), None);
2549        assert_eq!(purl, Some("pkg:deb/ubuntu/curl@7.68.0".to_string()));
2550    }
2551
2552    #[test]
2553    fn test_build_purl_no_namespace() {
2554        let purl = build_debian_purl("curl", Some("7.68.0"), None, None);
2555        assert_eq!(purl, Some("pkg:deb/curl@7.68.0".to_string()));
2556    }
2557
2558    // ====== Dependency parsing ======
2559
2560    #[test]
2561    fn test_parse_simple_dependency() {
2562        let deps = parse_dependency_field("libc6", "depends", true, false, Some("debian"));
2563        assert_eq!(deps.len(), 1);
2564        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2565        assert_eq!(deps[0].extracted_requirement, None);
2566        assert_eq!(deps[0].scope, Some("depends".to_string()));
2567    }
2568
2569    #[test]
2570    fn test_parse_dependency_with_version() {
2571        let deps =
2572            parse_dependency_field("libc6 (>= 2.17)", "depends", true, false, Some("debian"));
2573        assert_eq!(deps.len(), 1);
2574        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2575        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2576    }
2577
2578    #[test]
2579    fn test_parse_dependency_exact_version() {
2580        let deps = parse_dependency_field(
2581            "libc6 (= 2.31-13+deb11u5)",
2582            "depends",
2583            true,
2584            false,
2585            Some("debian"),
2586        );
2587        assert_eq!(deps.len(), 1);
2588        assert_eq!(deps[0].is_pinned, Some(true));
2589    }
2590
2591    #[test]
2592    fn test_parse_dependency_strict_less() {
2593        let deps =
2594            parse_dependency_field("libgcc-s1 (<< 12)", "breaks", false, false, Some("debian"));
2595        assert_eq!(deps.len(), 1);
2596        assert_eq!(deps[0].extracted_requirement, Some("<< 12".to_string()));
2597        assert_eq!(deps[0].scope, Some("breaks".to_string()));
2598    }
2599
2600    #[test]
2601    fn test_parse_multiple_dependencies() {
2602        let deps = parse_dependency_field(
2603            "libc6 (>= 2.17), libssl1.1 (>= 1.1.0), zlib1g (>= 1:1.2.0)",
2604            "depends",
2605            true,
2606            false,
2607            Some("debian"),
2608        );
2609        assert_eq!(deps.len(), 3);
2610    }
2611
2612    #[test]
2613    fn test_parse_dependency_alternatives() {
2614        let deps = parse_dependency_field(
2615            "libssl1.1 | libssl3",
2616            "depends",
2617            true,
2618            false,
2619            Some("debian"),
2620        );
2621        assert_eq!(deps.len(), 2);
2622        // Alternatives are marked as optional
2623        assert_eq!(deps[0].is_optional, Some(true));
2624        assert_eq!(deps[1].is_optional, Some(true));
2625    }
2626
2627    #[test]
2628    fn test_parse_dependency_skips_substitutions() {
2629        let deps = parse_dependency_field(
2630            "${shlibs:Depends}, ${misc:Depends}, libc6",
2631            "depends",
2632            true,
2633            false,
2634            Some("debian"),
2635        );
2636        assert_eq!(deps.len(), 1);
2637        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2638    }
2639
2640    #[test]
2641    fn test_parse_dependency_with_arch_qualifier() {
2642        // Dependencies can have [arch] qualifiers which we ignore
2643        let deps = parse_dependency_field(
2644            "libc6 (>= 2.17) [amd64]",
2645            "depends",
2646            true,
2647            false,
2648            Some("debian"),
2649        );
2650        assert_eq!(deps.len(), 1);
2651        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2652    }
2653
2654    #[test]
2655    fn test_parse_empty_dependency() {
2656        let deps = parse_dependency_field("", "depends", true, false, Some("debian"));
2657        assert!(deps.is_empty());
2658    }
2659
2660    // ====== Source field parsing ======
2661
2662    #[test]
2663    fn test_parse_source_field_name_only() {
2664        let sources = parse_source_field(Some("util-linux"), Some("debian"));
2665        assert_eq!(sources.len(), 1);
2666        assert_eq!(sources[0], "pkg:deb/debian/util-linux");
2667    }
2668
2669    #[test]
2670    fn test_parse_source_field_with_version() {
2671        let sources = parse_source_field(Some("util-linux (2.36.1-8+deb11u1)"), Some("debian"));
2672        assert_eq!(sources.len(), 1);
2673        assert_eq!(sources[0], "pkg:deb/debian/util-linux@2.36.1-8%2Bdeb11u1");
2674    }
2675
2676    #[test]
2677    fn test_parse_source_field_empty() {
2678        let sources = parse_source_field(None, Some("debian"));
2679        assert!(sources.is_empty());
2680    }
2681
2682    // ====== Control file parsing ======
2683
2684    #[test]
2685    fn test_parse_debian_control_source_and_binary() {
2686        let content = "\
2687Source: curl
2688Section: web
2689Priority: optional
2690Maintainer: Alessandro Ghedini <ghedo@debian.org>
2691Homepage: https://curl.se/
2692Vcs-Browser: https://salsa.debian.org/debian/curl
2693Vcs-Git: https://salsa.debian.org/debian/curl.git
2694Build-Depends: debhelper (>= 12), libssl-dev
2695
2696Package: curl
2697Architecture: amd64
2698Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
2699Description: command line tool for transferring data with URL syntax";
2700
2701        let packages = parse_debian_control(content);
2702        assert_eq!(packages.len(), 1);
2703
2704        let pkg = &packages[0];
2705        assert_eq!(pkg.name, Some("curl".to_string()));
2706        assert_eq!(pkg.package_type, Some(PackageType::Deb));
2707        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
2708        assert_eq!(
2709            pkg.vcs_url,
2710            Some("https://salsa.debian.org/debian/curl.git".to_string())
2711        );
2712        assert_eq!(
2713            pkg.code_view_url,
2714            Some("https://salsa.debian.org/debian/curl".to_string())
2715        );
2716
2717        // Maintainer from source paragraph
2718        assert_eq!(pkg.parties.len(), 1);
2719        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
2720        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
2721        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
2722
2723        // Dependencies parsed
2724        assert!(!pkg.dependencies.is_empty());
2725    }
2726
2727    #[test]
2728    fn test_parse_debian_control_multiple_binary() {
2729        let content = "\
2730Source: gzip
2731Maintainer: Debian Developer <dev@debian.org>
2732
2733Package: gzip
2734Architecture: any
2735Depends: libc6 (>= 2.17)
2736Description: GNU file compression
2737
2738Package: gzip-win32
2739Architecture: all
2740Description: gzip for Windows";
2741
2742        let packages = parse_debian_control(content);
2743        assert_eq!(packages.len(), 2);
2744        assert_eq!(packages[0].name, Some("gzip".to_string()));
2745        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
2746
2747        // Both inherit source maintainer
2748        assert_eq!(packages[0].parties.len(), 1);
2749        assert_eq!(packages[1].parties.len(), 1);
2750    }
2751
2752    #[test]
2753    fn test_parse_debian_control_source_only() {
2754        let content = "\
2755Source: my-package
2756Maintainer: Test User <test@debian.org>
2757Build-Depends: debhelper (>= 13)";
2758
2759        let packages = parse_debian_control(content);
2760        assert_eq!(packages.len(), 1);
2761        assert_eq!(packages[0].name, Some("my-package".to_string()));
2762        // Build-Depends parsed
2763        assert!(!packages[0].dependencies.is_empty());
2764        assert_eq!(
2765            packages[0].dependencies[0].scope,
2766            Some("build-depends".to_string())
2767        );
2768    }
2769
2770    #[test]
2771    fn test_parse_debian_control_with_uploaders() {
2772        let content = "\
2773Source: example
2774Maintainer: Main Dev <main@debian.org>
2775Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
2776
2777Package: example
2778Architecture: any
2779Description: test package";
2780
2781        let packages = parse_debian_control(content);
2782        assert_eq!(packages.len(), 1);
2783        // 1 maintainer + 2 uploaders
2784        assert_eq!(packages[0].parties.len(), 3);
2785        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
2786        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
2787        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
2788    }
2789
2790    #[test]
2791    fn test_parse_debian_control_vcs_git_with_branch() {
2792        let content = "\
2793Source: example
2794Maintainer: Dev <dev@debian.org>
2795Vcs-Git: https://salsa.debian.org/example.git -b main
2796
2797Package: example
2798Architecture: any
2799Description: test";
2800
2801        let packages = parse_debian_control(content);
2802        assert_eq!(packages.len(), 1);
2803        // Should only take the URL, not the branch
2804        assert_eq!(
2805            packages[0].vcs_url,
2806            Some("https://salsa.debian.org/example.git".to_string())
2807        );
2808    }
2809
2810    #[test]
2811    fn test_parse_debian_control_multi_arch() {
2812        let content = "\
2813Source: example
2814Maintainer: Dev <dev@debian.org>
2815
2816Package: libexample
2817Architecture: any
2818Multi-Arch: same
2819Description: shared library";
2820
2821        let packages = parse_debian_control(content);
2822        assert_eq!(packages.len(), 1);
2823        let extra = packages[0].extra_data.as_ref().unwrap();
2824        assert_eq!(
2825            extra.get("multi_arch"),
2826            Some(&serde_json::Value::String("same".to_string()))
2827        );
2828    }
2829
2830    // ====== dpkg/status parsing ======
2831
2832    #[test]
2833    fn test_parse_dpkg_status_basic() {
2834        let content = "\
2835Package: base-files
2836Status: install ok installed
2837Priority: required
2838Section: admin
2839Installed-Size: 391
2840Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
2841Architecture: amd64
2842Version: 11ubuntu5.6
2843Description: Debian base system miscellaneous files
2844Homepage: https://tracker.debian.org/pkg/base-files
2845
2846Package: not-installed
2847Status: deinstall ok config-files
2848Architecture: amd64
2849Version: 1.0
2850Description: This should be skipped";
2851
2852        let packages = parse_dpkg_status(content);
2853        assert_eq!(packages.len(), 1);
2854
2855        let pkg = &packages[0];
2856        assert_eq!(pkg.name, Some("base-files".to_string()));
2857        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
2858        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
2859        assert_eq!(
2860            pkg.datasource_id,
2861            Some(DatasourceId::DebianInstalledStatusDb)
2862        );
2863
2864        // Installed-Size in extra_data
2865        let extra = pkg.extra_data.as_ref().unwrap();
2866        assert_eq!(
2867            extra.get("installed_size"),
2868            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
2869        );
2870    }
2871
2872    #[test]
2873    fn test_parse_dpkg_status_multiple_installed() {
2874        let content = "\
2875Package: libc6
2876Status: install ok installed
2877Architecture: amd64
2878Version: 2.31-13+deb11u5
2879Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
2880Description: GNU C Library
2881
2882Package: zlib1g
2883Status: install ok installed
2884Architecture: amd64
2885Version: 1:1.2.11.dfsg-2+deb11u2
2886Maintainer: Mark Brown <broonie@debian.org>
2887Description: compression library";
2888
2889        let packages = parse_dpkg_status(content);
2890        assert_eq!(packages.len(), 2);
2891        assert_eq!(packages[0].name, Some("libc6".to_string()));
2892        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
2893    }
2894
2895    #[test]
2896    fn test_parse_dpkg_status_with_dependencies() {
2897        let content = "\
2898Package: curl
2899Status: install ok installed
2900Architecture: amd64
2901Version: 7.74.0-1.3+deb11u7
2902Maintainer: Alessandro Ghedini <ghedo@debian.org>
2903Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
2904Recommends: ca-certificates
2905Description: command line tool for transferring data with URL syntax";
2906
2907        let packages = parse_dpkg_status(content);
2908        assert_eq!(packages.len(), 1);
2909
2910        let deps = &packages[0].dependencies;
2911        // 2 from Depends + 1 from Recommends
2912        assert_eq!(deps.len(), 3);
2913
2914        // Check first dependency
2915        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
2916        assert_eq!(deps[0].scope, Some("depends".to_string()));
2917        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
2918
2919        // Check recommends
2920        assert_eq!(
2921            deps[2].purl,
2922            Some("pkg:deb/debian/ca-certificates".to_string())
2923        );
2924        assert_eq!(deps[2].scope, Some("recommends".to_string()));
2925        assert_eq!(deps[2].is_optional, Some(true));
2926    }
2927
2928    #[test]
2929    fn test_parse_dpkg_status_with_source() {
2930        let content = "\
2931Package: libncurses6
2932Status: install ok installed
2933Architecture: amd64
2934Source: ncurses (6.2+20201114-2+deb11u1)
2935Version: 6.2+20201114-2+deb11u1
2936Maintainer: Craig Small <csmall@debian.org>
2937Description: shared libraries for terminal handling";
2938
2939        let packages = parse_dpkg_status(content);
2940        assert_eq!(packages.len(), 1);
2941        assert!(!packages[0].source_packages.is_empty());
2942        // Source PURL should include version from parentheses
2943        assert!(packages[0].source_packages[0].contains("ncurses"));
2944    }
2945
2946    #[test]
2947    fn test_parse_dpkg_status_filters_not_installed() {
2948        let content = "\
2949Package: installed-pkg
2950Status: install ok installed
2951Version: 1.0
2952Architecture: amd64
2953Description: installed
2954
2955Package: half-installed
2956Status: install ok half-installed
2957Version: 2.0
2958Architecture: amd64
2959Description: half installed
2960
2961Package: deinstall-pkg
2962Status: deinstall ok config-files
2963Version: 3.0
2964Architecture: amd64
2965Description: deinstalled
2966
2967Package: purge-pkg
2968Status: purge ok not-installed
2969Version: 4.0
2970Architecture: amd64
2971Description: purged";
2972
2973        let packages = parse_dpkg_status(content);
2974        assert_eq!(packages.len(), 1);
2975        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
2976    }
2977
2978    #[test]
2979    fn test_parse_dpkg_status_empty() {
2980        let packages = parse_dpkg_status("");
2981        assert!(packages.is_empty());
2982    }
2983
2984    // ====== is_match tests ======
2985
2986    #[test]
2987    fn test_debian_control_is_match() {
2988        assert!(DebianControlParser::is_match(Path::new(
2989            "/path/to/debian/control"
2990        )));
2991        assert!(DebianControlParser::is_match(Path::new("debian/control")));
2992        assert!(!DebianControlParser::is_match(Path::new(
2993            "/path/to/control"
2994        )));
2995        assert!(!DebianControlParser::is_match(Path::new(
2996            "/path/to/debian/changelog"
2997        )));
2998    }
2999
3000    #[test]
3001    fn test_debian_installed_is_match() {
3002        assert!(DebianInstalledParser::is_match(Path::new(
3003            "/var/lib/dpkg/status"
3004        )));
3005        assert!(DebianInstalledParser::is_match(Path::new(
3006            "some/root/var/lib/dpkg/status"
3007        )));
3008        assert!(!DebianInstalledParser::is_match(Path::new(
3009            "/var/lib/dpkg/status.d/something"
3010        )));
3011        assert!(!DebianInstalledParser::is_match(Path::new(
3012            "/var/lib/dpkg/available"
3013        )));
3014    }
3015
3016    // ====== Edge cases ======
3017
3018    #[test]
3019    fn test_parse_debian_control_empty_input() {
3020        let packages = parse_debian_control("");
3021        assert!(packages.is_empty());
3022    }
3023
3024    #[test]
3025    fn test_parse_debian_control_malformed_input() {
3026        let content = "this is not a valid control file\nwith random text";
3027        let packages = parse_debian_control(content);
3028        // Should not panic, may return empty or partial results
3029        assert!(packages.is_empty());
3030    }
3031
3032    #[test]
3033    fn test_dependency_with_epoch_version() {
3034        // Debian versions can have epochs like 1:2.3.4
3035        let deps = parse_dependency_field(
3036            "zlib1g (>= 1:1.2.11)",
3037            "depends",
3038            true,
3039            false,
3040            Some("debian"),
3041        );
3042        assert_eq!(deps.len(), 1);
3043        assert_eq!(
3044            deps[0].extracted_requirement,
3045            Some(">= 1:1.2.11".to_string())
3046        );
3047    }
3048
3049    #[test]
3050    fn test_dependency_with_plus_in_name() {
3051        let deps =
3052            parse_dependency_field("libstdc++6 (>= 10)", "depends", true, false, Some("debian"));
3053        assert_eq!(deps.len(), 1);
3054        assert!(deps[0].purl.as_ref().unwrap().contains("libstdc%2B%2B6"));
3055    }
3056
3057    #[test]
3058    fn test_dsc_parser_is_match() {
3059        assert!(DebianDscParser::is_match(&PathBuf::from("package.dsc")));
3060        assert!(DebianDscParser::is_match(&PathBuf::from(
3061            "adduser_3.118+deb11u1.dsc"
3062        )));
3063        assert!(!DebianDscParser::is_match(&PathBuf::from("control")));
3064        assert!(!DebianDscParser::is_match(&PathBuf::from("package.txt")));
3065    }
3066
3067    #[test]
3068    fn test_dsc_parser_adduser() {
3069        let path = PathBuf::from("testdata/debian/dsc_files/adduser_3.118+deb11u1.dsc");
3070        let package = DebianDscParser::extract_first_package(&path);
3071
3072        assert_eq!(package.package_type, Some(PACKAGE_TYPE));
3073        assert_eq!(package.namespace, Some("debian".to_string()));
3074        assert_eq!(package.name, Some("adduser".to_string()));
3075        assert_eq!(package.version, Some("3.118+deb11u1".to_string()));
3076        assert_eq!(
3077            package.purl,
3078            Some("pkg:deb/debian/adduser@3.118%2Bdeb11u1?arch=all".to_string())
3079        );
3080        assert_eq!(
3081            package.vcs_url,
3082            Some("https://salsa.debian.org/debian/adduser.git".to_string())
3083        );
3084        assert_eq!(
3085            package.code_view_url,
3086            Some("https://salsa.debian.org/debian/adduser".to_string())
3087        );
3088        assert_eq!(
3089            package.datasource_id,
3090            Some(DatasourceId::DebianSourceControlDsc)
3091        );
3092
3093        assert_eq!(package.parties.len(), 2);
3094        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3095        assert_eq!(
3096            package.parties[0].name,
3097            Some("Debian Adduser Developers".to_string())
3098        );
3099        assert_eq!(
3100            package.parties[0].email,
3101            Some("adduser@packages.debian.org".to_string())
3102        );
3103        assert_eq!(package.parties[0].r#type, None);
3104
3105        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3106        assert_eq!(package.parties[1].name, Some("Marc Haber".to_string()));
3107        assert_eq!(
3108            package.parties[1].email,
3109            Some("mh+debian-packages@zugschlus.de".to_string())
3110        );
3111        assert_eq!(package.parties[1].r#type, None);
3112
3113        assert_eq!(package.source_packages.len(), 1);
3114        assert_eq!(
3115            package.source_packages[0],
3116            "pkg:deb/debian/adduser".to_string()
3117        );
3118
3119        assert!(!package.dependencies.is_empty());
3120        let build_dep_names: Vec<String> = package
3121            .dependencies
3122            .iter()
3123            .filter_map(|d| d.purl.as_ref())
3124            .filter(|p| p.contains("po-debconf") || p.contains("debhelper"))
3125            .map(|p| p.to_string())
3126            .collect();
3127        assert!(build_dep_names.len() >= 2);
3128    }
3129
3130    #[test]
3131    fn test_dsc_parser_zsh() {
3132        let path = PathBuf::from("testdata/debian/dsc_files/zsh_5.7.1-1+deb10u1.dsc");
3133        let package = DebianDscParser::extract_first_package(&path);
3134
3135        assert_eq!(package.name, Some("zsh".to_string()));
3136        assert_eq!(package.version, Some("5.7.1-1+deb10u1".to_string()));
3137        assert_eq!(package.namespace, Some("debian".to_string()));
3138        assert!(package.purl.is_some());
3139        assert!(package.purl.as_ref().unwrap().contains("zsh"));
3140        assert!(package.purl.as_ref().unwrap().contains("5.7.1"));
3141    }
3142
3143    #[test]
3144    fn test_parse_dsc_content_basic() {
3145        let content = "Format: 3.0 (native)
3146Source: testpkg
3147Binary: testpkg
3148Architecture: amd64
3149Version: 1.0.0
3150Maintainer: Test User <test@example.com>
3151Standards-Version: 4.5.0
3152Build-Depends: debhelper (>= 12)
3153Files:
3154 abc123 1024 testpkg_1.0.0.tar.xz
3155";
3156
3157        let package = parse_dsc_content(content);
3158        assert_eq!(package.name, Some("testpkg".to_string()));
3159        assert_eq!(package.version, Some("1.0.0".to_string()));
3160        assert_eq!(package.namespace, Some("debian".to_string()));
3161        assert_eq!(package.parties.len(), 1);
3162        assert_eq!(package.parties[0].name, Some("Test User".to_string()));
3163        assert_eq!(
3164            package.parties[0].email,
3165            Some("test@example.com".to_string())
3166        );
3167        assert_eq!(package.dependencies.len(), 1);
3168        assert!(package.purl.as_ref().unwrap().contains("arch=amd64"));
3169    }
3170
3171    #[test]
3172    fn test_parse_dsc_content_with_uploaders() {
3173        let content = "Source: mypkg
3174Version: 2.0
3175Architecture: all
3176Maintainer: Main Dev <main@example.com>
3177Uploaders: Dev One <dev1@example.com>, Dev Two <dev2@example.com>
3178";
3179
3180        let package = parse_dsc_content(content);
3181        assert_eq!(package.parties.len(), 3);
3182        assert_eq!(package.parties[0].role, Some("maintainer".to_string()));
3183        assert_eq!(package.parties[1].role, Some("uploader".to_string()));
3184        assert_eq!(package.parties[2].role, Some("uploader".to_string()));
3185    }
3186
3187    #[test]
3188    fn test_orig_tar_parser_is_match() {
3189        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3190            "package_1.0.orig.tar.gz"
3191        )));
3192        assert!(DebianOrigTarParser::is_match(&PathBuf::from(
3193            "abseil_0~20200923.3.orig.tar.xz"
3194        )));
3195        assert!(!DebianOrigTarParser::is_match(&PathBuf::from(
3196            "package.debian.tar.gz"
3197        )));
3198        assert!(!DebianOrigTarParser::is_match(&PathBuf::from("control")));
3199    }
3200
3201    #[test]
3202    fn test_debian_tar_parser_is_match() {
3203        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3204            "package_1.0-1.debian.tar.xz"
3205        )));
3206        assert!(DebianDebianTarParser::is_match(&PathBuf::from(
3207            "abseil_20220623.1-1.debian.tar.gz"
3208        )));
3209        assert!(!DebianDebianTarParser::is_match(&PathBuf::from(
3210            "package.orig.tar.gz"
3211        )));
3212        assert!(!DebianDebianTarParser::is_match(&PathBuf::from("control")));
3213    }
3214
3215    #[test]
3216    fn test_parse_orig_tar_filename() {
3217        let pkg = parse_source_tarball_filename(
3218            "abseil_0~20200923.3.orig.tar.gz",
3219            DatasourceId::DebianOriginalSourceTarball,
3220        );
3221        assert_eq!(pkg.name, Some("abseil".to_string()));
3222        assert_eq!(pkg.version, Some("0~20200923.3".to_string()));
3223        assert_eq!(pkg.namespace, Some("debian".to_string()));
3224        assert_eq!(
3225            pkg.purl,
3226            Some("pkg:deb/debian/abseil@0~20200923.3".to_string())
3227        );
3228        assert_eq!(
3229            pkg.datasource_id,
3230            Some(DatasourceId::DebianOriginalSourceTarball)
3231        );
3232    }
3233
3234    #[test]
3235    fn test_parse_debian_tar_filename() {
3236        let pkg = parse_source_tarball_filename(
3237            "abseil_20220623.1-1.debian.tar.xz",
3238            DatasourceId::DebianSourceMetadataTarball,
3239        );
3240        assert_eq!(pkg.name, Some("abseil".to_string()));
3241        assert_eq!(pkg.version, Some("20220623.1-1".to_string()));
3242        assert_eq!(pkg.namespace, Some("debian".to_string()));
3243        assert_eq!(
3244            pkg.purl,
3245            Some("pkg:deb/debian/abseil@20220623.1-1".to_string())
3246        );
3247    }
3248
3249    #[test]
3250    fn test_parse_deb_filename() {
3251        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
3252        assert_eq!(pkg.name, Some("nginx".to_string()));
3253        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
3254
3255        let pkg = parse_deb_filename("invalid.deb");
3256        assert!(pkg.name.is_none());
3257        assert!(pkg.version.is_none());
3258    }
3259
3260    #[test]
3261    fn test_parse_source_tarball_various_compressions() {
3262        let pkg_gz = parse_source_tarball_filename(
3263            "test_1.0.orig.tar.gz",
3264            DatasourceId::DebianOriginalSourceTarball,
3265        );
3266        let pkg_xz = parse_source_tarball_filename(
3267            "test_1.0.orig.tar.xz",
3268            DatasourceId::DebianOriginalSourceTarball,
3269        );
3270        let pkg_bz2 = parse_source_tarball_filename(
3271            "test_1.0.orig.tar.bz2",
3272            DatasourceId::DebianOriginalSourceTarball,
3273        );
3274
3275        assert_eq!(pkg_gz.version, Some("1.0".to_string()));
3276        assert_eq!(pkg_xz.version, Some("1.0".to_string()));
3277        assert_eq!(pkg_bz2.version, Some("1.0".to_string()));
3278    }
3279
3280    #[test]
3281    fn test_parse_source_tarball_invalid_format() {
3282        let pkg = parse_source_tarball_filename(
3283            "invalid-no-underscore.tar.gz",
3284            DatasourceId::DebianOriginalSourceTarball,
3285        );
3286        assert!(pkg.name.is_none());
3287        assert!(pkg.version.is_none());
3288    }
3289
3290    #[test]
3291    fn test_list_parser_is_match() {
3292        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3293            "/var/lib/dpkg/info/bash.list"
3294        )));
3295        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
3296            "/var/lib/dpkg/info/package:amd64.list"
3297        )));
3298        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3299            "bash.list"
3300        )));
3301        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
3302            "/var/lib/dpkg/info/bash.md5sums"
3303        )));
3304    }
3305
3306    #[test]
3307    fn test_md5sums_parser_is_match() {
3308        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3309            "/var/lib/dpkg/info/bash.md5sums"
3310        )));
3311        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3312            "/var/lib/dpkg/info/package:amd64.md5sums"
3313        )));
3314        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3315            "bash.md5sums"
3316        )));
3317        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
3318            "/var/lib/dpkg/info/bash.list"
3319        )));
3320    }
3321
3322    #[test]
3323    fn test_parse_debian_file_list_plain_list() {
3324        let content = "/.
3325/bin
3326/bin/bash
3327/usr/bin/bashbug
3328/usr/share/doc/bash/README
3329";
3330        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3331        assert_eq!(pkg.name, Some("bash".to_string()));
3332        assert_eq!(pkg.file_references.len(), 3);
3333        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3334        assert_eq!(pkg.file_references[0].md5, None);
3335        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
3336        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
3337    }
3338
3339    #[test]
3340    fn test_parse_debian_file_list_md5sums() {
3341        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
33421c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
3343f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
3344";
3345        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3346        assert_eq!(pkg.name, Some("bash".to_string()));
3347        assert_eq!(pkg.file_references.len(), 3);
3348        assert_eq!(pkg.file_references[0].path, "bin/bash");
3349        assert_eq!(
3350            pkg.file_references[0].md5,
3351            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
3352        );
3353        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
3354        assert_eq!(
3355            pkg.file_references[1].md5,
3356            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
3357        );
3358    }
3359
3360    #[test]
3361    fn test_parse_debian_file_list_with_arch() {
3362        let content = "/usr/bin/foo
3363/usr/lib/x86_64-linux-gnu/libfoo.so
3364";
3365        let pkg = parse_debian_file_list(
3366            content,
3367            "libfoo:amd64",
3368            DatasourceId::DebianInstalledFilesList,
3369        );
3370        assert_eq!(pkg.name, Some("libfoo".to_string()));
3371        assert!(pkg.purl.is_some());
3372        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
3373        assert_eq!(pkg.file_references.len(), 2);
3374    }
3375
3376    #[test]
3377    fn test_parse_debian_file_list_skips_comments_and_empty() {
3378        let content = "# This is a comment
3379/bin/bash
3380
3381/usr/bin/bashbug
3382  
3383";
3384        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3385        assert_eq!(pkg.file_references.len(), 2);
3386    }
3387
3388    #[test]
3389    fn test_parse_debian_file_list_md5sums_only() {
3390        let content = "abc123  usr/bin/tool
3391";
3392        let pkg =
3393            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
3394        assert_eq!(pkg.name, None);
3395        assert_eq!(pkg.file_references.len(), 1);
3396    }
3397
3398    #[test]
3399    fn test_parse_debian_file_list_ignores_root_dirs() {
3400        let content = "/.
3401/bin
3402/bin/bash
3403/etc
3404/usr
3405/var
3406";
3407        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
3408        assert_eq!(pkg.file_references.len(), 1);
3409        assert_eq!(pkg.file_references[0].path, "/bin/bash");
3410    }
3411
3412    #[test]
3413    fn test_copyright_parser_is_match() {
3414        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3415            "/usr/share/doc/bash/copyright"
3416        )));
3417        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3418            "debian/copyright"
3419        )));
3420        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3421            "src/third_party/gperftools/dist/packages/deb/copyright"
3422        )));
3423        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3424            "ports/zlib/copyright"
3425        )));
3426        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3427            "copyright.txt"
3428        )));
3429        assert!(!DebianCopyrightParser::is_match(&PathBuf::from(
3430            "/etc/copyright"
3431        )));
3432        assert!(DebianCopyrightParser::is_match(&PathBuf::from(
3433            "/tmp/sample_copyright"
3434        )));
3435    }
3436
3437    #[test]
3438    fn test_detect_debian_copyright_datasource() {
3439        assert_eq!(
3440            detect_debian_copyright_datasource(&PathBuf::from("debian/copyright")),
3441            DatasourceId::DebianCopyrightInSource
3442        );
3443        assert_eq!(
3444            detect_debian_copyright_datasource(&PathBuf::from(
3445                "src/third_party/gperftools/dist/packages/deb/copyright"
3446            )),
3447            DatasourceId::DebianCopyrightStandalone
3448        );
3449        assert_eq!(
3450            detect_debian_copyright_datasource(&PathBuf::from("ports/zlib/copyright")),
3451            DatasourceId::DebianCopyrightStandalone
3452        );
3453        assert_eq!(
3454            detect_debian_copyright_datasource(&PathBuf::from("/usr/share/doc/bash/copyright")),
3455            DatasourceId::DebianCopyrightInPackage
3456        );
3457        assert_eq!(
3458            detect_debian_copyright_datasource(&PathBuf::from("stable_copyright")),
3459            DatasourceId::DebianCopyrightStandalone
3460        );
3461    }
3462
3463    #[test]
3464    fn test_extract_package_name_from_path() {
3465        assert_eq!(
3466            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/bash/copyright")),
3467            Some("bash".to_string())
3468        );
3469        assert_eq!(
3470            extract_package_name_from_path(&PathBuf::from("/usr/share/doc/libseccomp2/copyright")),
3471            Some("libseccomp2".to_string())
3472        );
3473        assert_eq!(
3474            extract_package_name_from_path(&PathBuf::from("debian/copyright")),
3475            None
3476        );
3477        assert_eq!(
3478            extract_standalone_package_name_from_path(
3479                &PathBuf::from("ports/zlib/copyright"),
3480                DatasourceId::DebianCopyrightStandalone,
3481            ),
3482            Some("zlib".to_string())
3483        );
3484    }
3485
3486    #[test]
3487    fn test_parse_copyright_dep5_format() {
3488        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
3489Upstream-Name: libseccomp
3490Source: https://sourceforge.net/projects/libseccomp/
3491
3492Files: *
3493Copyright: 2012 Paul Moore <pmoore@redhat.com>
3494 2012 Ashley Lai <adlai@us.ibm.com>
3495License: LGPL-2.1
3496
3497License: LGPL-2.1
3498 This library is free software
3499";
3500        let pkg = parse_copyright_file(content, Some("libseccomp"));
3501        assert_eq!(pkg.name, Some("libseccomp".to_string()));
3502        assert_eq!(pkg.namespace, Some("debian".to_string()));
3503        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianCopyright));
3504        assert_eq!(
3505            pkg.extracted_license_statement,
3506            Some("LGPL-2.1".to_string())
3507        );
3508        assert!(pkg.parties.len() >= 2);
3509        assert_eq!(pkg.parties[0].role, Some("copyright-holder".to_string()));
3510        assert!(pkg.parties[0].name.as_ref().unwrap().contains("Paul Moore"));
3511    }
3512
3513    #[test]
3514    fn test_parse_copyright_primary_license_detection_from_bsdutils_fixture() {
3515        let path = PathBuf::from(
3516            "testdata/debian-fixtures/debian-slim-2021-04-07/usr/share/doc/bsdutils/copyright",
3517        );
3518        let pkg = DebianCopyrightParser::extract_first_package(&path);
3519
3520        assert_eq!(pkg.name, Some("bsdutils".to_string()));
3521        let extracted = pkg
3522            .extracted_license_statement
3523            .as_deref()
3524            .expect("license statement should exist");
3525        assert!(extracted.contains("GPL-2+"));
3526        assert!(!pkg.license_detections.is_empty());
3527
3528        let primary = &pkg.license_detections[0];
3529        assert_eq!(
3530            primary.matches[0].matched_text.as_deref(),
3531            Some("License: GPL-2+")
3532        );
3533        assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap());
3534        assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap());
3535    }
3536
3537    #[test]
3538    fn test_parse_copyright_emits_ordered_absolute_case_preserved_detections() {
3539        let path = PathBuf::from("testdata/debian/copyright/copyright");
3540        let pkg = DebianCopyrightParser::extract_first_package(&path);
3541
3542        assert_eq!(pkg.license_detections.len(), 1);
3543        assert_eq!(pkg.other_license_detections.len(), 4);
3544
3545        let primary = &pkg.license_detections[0];
3546        assert_eq!(
3547            primary.matches[0].matched_text.as_deref(),
3548            Some("License: LGPL-2.1")
3549        );
3550        assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap());
3551
3552        let ordered_lines: Vec<usize> = pkg
3553            .other_license_detections
3554            .iter()
3555            .map(|detection| detection.matches[0].start_line.get())
3556            .collect();
3557        assert_eq!(ordered_lines, vec![15, 19, 23, 25]);
3558
3559        let ordered_texts: Vec<&str> = pkg
3560            .other_license_detections
3561            .iter()
3562            .map(|detection| detection.matches[0].matched_text.as_deref().unwrap())
3563            .collect();
3564        assert_eq!(
3565            ordered_texts,
3566            vec![
3567                "License: LGPL-2.1",
3568                "License: LGPL-2.1",
3569                "License: LGPL-2.1",
3570                "License: LGPL-2.1",
3571            ]
3572        );
3573    }
3574
3575    #[test]
3576    fn test_parse_copyright_detects_bottom_standalone_license_paragraph() {
3577        let path = PathBuf::from(
3578            "testdata/debian-fixtures/debian-2019-11-15/main/c/clamav/stable_copyright",
3579        );
3580        let pkg = DebianCopyrightParser::extract_first_package(&path);
3581
3582        let zlib = pkg
3583            .other_license_detections
3584            .iter()
3585            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3586            .expect("at least one Zlib license paragraph should be detected");
3587        assert_eq!(
3588            zlib.matches[0].matched_text.as_deref(),
3589            Some("License: Zlib")
3590        );
3591
3592        let last_zlib = pkg
3593            .other_license_detections
3594            .iter()
3595            .rev()
3596            .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib"))
3597            .expect("bottom standalone Zlib license paragraph should be detected");
3598        assert_eq!(
3599            last_zlib.matches[0].start_line,
3600            LineNumber::new(732).unwrap()
3601        );
3602        assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap());
3603    }
3604
3605    #[test]
3606    fn test_parse_copyright_uses_header_paragraph_as_primary_when_files_star_is_blank() {
3607        let path =
3608            PathBuf::from("testdata/debian-fixtures/crafted_for_tests/test_license_nameless");
3609        let pkg = DebianCopyrightParser::extract_first_package(&path);
3610
3611        assert_eq!(pkg.license_detections.len(), 1);
3612        let primary = &pkg.license_detections[0];
3613        assert_eq!(
3614            primary.matches[0].matched_text.as_deref(),
3615            Some("License: LGPL-3+ or GPL-2+")
3616        );
3617        assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap());
3618        assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap());
3619
3620        assert!(pkg.other_license_detections.iter().any(|detection| {
3621            detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+")
3622        }));
3623    }
3624
3625    #[test]
3626    fn test_parse_copyright_prefers_files_star_primary_over_header_paragraph() {
3627        let content = "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: foo\nLicense: MIT\n\nFiles: *\nCopyright: 2024 Example\nLicense: GPL-2+\n";
3628        let pkg = parse_copyright_file(content, Some("foo"));
3629
3630        assert_eq!(pkg.license_detections.len(), 1);
3631        let primary = &pkg.license_detections[0];
3632        assert_eq!(
3633            primary.matches[0].matched_text.as_deref(),
3634            Some("License: GPL-2+")
3635        );
3636        assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap());
3637    }
3638
3639    #[test]
3640    fn test_finalize_copyright_paragraph_matches_rfc822_headers_and_license_line() {
3641        let raw_lines = vec![
3642            "Files: *".to_string(),
3643            "Copyright: 2024 Example Org".to_string(),
3644            "License: Apache-2.0".to_string(),
3645            " Licensed under the Apache License, Version 2.0.".to_string(),
3646        ];
3647
3648        let paragraph = finalize_copyright_paragraph(raw_lines.clone(), 10);
3649        let expected = rfc822::parse_rfc822_paragraphs(&raw_lines.join("\n"))
3650            .into_iter()
3651            .next()
3652            .expect("reference RFC822 paragraph should parse");
3653
3654        assert_eq!(paragraph.metadata.headers, expected.headers);
3655        assert_eq!(paragraph.metadata.body, expected.body);
3656        assert_eq!(
3657            paragraph.license_header_line,
3658            Some(("License: Apache-2.0".to_string(), 12))
3659        );
3660    }
3661
3662    #[test]
3663    fn test_parse_copyright_unstructured() {
3664        let content = "This package was debianized by John Doe.
3665
3666Upstream Authors:
3667    Jane Smith
3668
3669Copyright:
3670    2009 10gen
3671
3672License:
3673    SSPL
3674";
3675        let pkg = parse_copyright_file(content, Some("mongodb"));
3676        assert_eq!(pkg.name, Some("mongodb".to_string()));
3677        assert_eq!(pkg.extracted_license_statement, Some("SSPL".to_string()));
3678        assert!(!pkg.parties.is_empty());
3679    }
3680
3681    #[test]
3682    fn test_parse_copyright_holders() {
3683        let text = "2012 Paul Moore <pmoore@redhat.com>
36842012 Ashley Lai <adlai@us.ibm.com>
3685Copyright (C) 2015-2018 Example Corp";
3686        let holders = parse_copyright_holders(text);
3687        assert!(holders.len() >= 3);
3688        assert!(holders.iter().any(|h| h.contains("Paul Moore")));
3689        assert!(holders.iter().any(|h| h.contains("Example Corp")));
3690    }
3691
3692    #[test]
3693    fn test_parse_copyright_empty() {
3694        let content = "This is just some text without proper copyright info.";
3695        let pkg = parse_copyright_file(content, Some("test"));
3696        assert_eq!(pkg.name, Some("test".to_string()));
3697        assert!(pkg.parties.is_empty());
3698        assert!(pkg.extracted_license_statement.is_none());
3699    }
3700
3701    #[test]
3702    fn test_deb_parser_is_match() {
3703        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
3704        assert!(DebianDebParser::is_match(&PathBuf::from(
3705            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
3706        )));
3707        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
3708        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
3709    }
3710
3711    #[test]
3712    fn test_parse_deb_filename_with_arch() {
3713        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
3714        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
3715        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
3716        assert_eq!(pkg.namespace, Some("debian".to_string()));
3717        assert_eq!(
3718            pkg.purl,
3719            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
3720        );
3721        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
3722    }
3723
3724    #[test]
3725    fn test_parse_deb_filename_without_arch() {
3726        let pkg = parse_deb_filename("package_1.0-1_all.deb");
3727        assert_eq!(pkg.name, Some("package".to_string()));
3728        assert_eq!(pkg.version, Some("1.0-1".to_string()));
3729        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
3730    }
3731
3732    #[test]
3733    fn test_extract_deb_archive() {
3734        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
3735        if !test_path.exists() {
3736            return;
3737        }
3738
3739        let pkg = DebianDebParser::extract_first_package(&test_path);
3740
3741        assert_eq!(pkg.name, Some("adduser".to_string()));
3742        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3743        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
3744        assert!(pkg.description.is_some());
3745        assert!(!pkg.parties.is_empty());
3746
3747        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
3748        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
3749    }
3750
3751    #[test]
3752    fn test_extract_deb_archive_with_control_tar_xz() {
3753        let deb = create_synthetic_deb_with_control_tar_xz();
3754
3755        let pkg = DebianDebParser::extract_first_package(deb.path());
3756
3757        assert_eq!(pkg.name, Some("synthetic".to_string()));
3758        assert_eq!(pkg.version, Some("1.2.3".to_string()));
3759        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
3760        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
3761    }
3762
3763    #[test]
3764    fn test_extract_deb_archive_collects_embedded_copyright_metadata() {
3765        let deb = create_synthetic_deb_with_copyright();
3766
3767        let pkg = DebianDebParser::extract_first_package(deb.path());
3768
3769        assert_eq!(pkg.name, Some("synthetic".to_string()));
3770        assert_eq!(
3771            pkg.extracted_license_statement,
3772            Some("Apache-2.0".to_string())
3773        );
3774        assert!(pkg.parties.iter().any(|party| {
3775            party.role.as_deref() == Some("copyright-holder")
3776                && party.name.as_deref() == Some("Example Org")
3777        }));
3778    }
3779
3780    #[test]
3781    fn test_parse_deb_filename_simple() {
3782        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
3783        assert_eq!(pkg.name, Some("adduser".to_string()));
3784        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
3785        assert_eq!(pkg.namespace, Some("debian".to_string()));
3786    }
3787
3788    #[test]
3789    fn test_parse_deb_filename_invalid() {
3790        let pkg = parse_deb_filename("invalid.deb");
3791        assert!(pkg.name.is_none());
3792        assert!(pkg.version.is_none());
3793    }
3794
3795    #[test]
3796    fn test_distroless_parser() {
3797        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
3798
3799        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
3800
3801        if !test_file.exists() {
3802            eprintln!("Warning: Test file not found, skipping test");
3803            return;
3804        }
3805
3806        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
3807
3808        assert_eq!(pkg.package_type, Some(PackageType::Deb));
3809        assert_eq!(
3810            pkg.datasource_id,
3811            Some(DatasourceId::DebianDistrolessInstalledDb)
3812        );
3813        assert_eq!(pkg.name, Some("base-files".to_string()));
3814        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
3815        assert_eq!(pkg.namespace, Some("debian".to_string()));
3816        assert!(pkg.purl.is_some());
3817        assert!(
3818            pkg.purl
3819                .as_ref()
3820                .unwrap()
3821                .contains("pkg:deb/debian/base-files")
3822        );
3823    }
3824}