Skip to main content

provenant/parsers/debian/
control.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::models::{DatasourceId, PackageData, PackageType, Party};
5use crate::parser_warn as warn;
6use crate::parsers::rfc822::{self, Rfc822Metadata};
7use crate::parsers::utils::{
8    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
9};
10
11use super::utils::{
12    build_debian_purl, detect_namespace, parse_all_dependencies, parse_source_field,
13};
14use super::{PACKAGE_TYPE, default_package_data};
15use crate::parsers::PackageParser;
16
17// ---------------------------------------------------------------------------
18// DebianControlParser: debian/control files (source + binary paragraphs)
19// ---------------------------------------------------------------------------
20
21pub struct DebianControlParser;
22
23impl PackageParser for DebianControlParser {
24    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
25
26    fn is_match(path: &Path) -> bool {
27        if let Some(name) = path.file_name()
28            && name == "control"
29            && let Some(parent) = path.parent()
30            && let Some(parent_name) = parent.file_name()
31        {
32            return parent_name == "debian";
33        }
34        false
35    }
36
37    fn extract_packages(path: &Path) -> Vec<PackageData> {
38        let content = match read_file_to_string(path, None) {
39            Ok(c) => c,
40            Err(e) => {
41                warn!("Failed to read debian/control at {:?}: {}", path, e);
42                return vec![default_package_data(DatasourceId::DebianControlInSource)];
43            }
44        };
45
46        let packages = parse_debian_control(&content);
47        if packages.is_empty() {
48            vec![default_package_data(DatasourceId::DebianControlInSource)]
49        } else {
50            packages
51        }
52    }
53}
54
55// ---------------------------------------------------------------------------
56// DebianInstalledParser: /var/lib/dpkg/status
57// ---------------------------------------------------------------------------
58
59pub struct DebianInstalledParser;
60
61impl PackageParser for DebianInstalledParser {
62    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
63
64    fn is_match(path: &Path) -> bool {
65        let path_str = path.to_string_lossy();
66        path_str.ends_with("var/lib/dpkg/status")
67    }
68
69    fn extract_packages(path: &Path) -> Vec<PackageData> {
70        let content = match read_file_to_string(path, None) {
71            Ok(c) => c,
72            Err(e) => {
73                warn!("Failed to read dpkg/status at {:?}: {}", path, e);
74                return vec![default_package_data(DatasourceId::DebianInstalledStatusDb)];
75            }
76        };
77
78        let packages = parse_dpkg_status(&content);
79        if packages.is_empty() {
80            vec![default_package_data(DatasourceId::DebianInstalledStatusDb)]
81        } else {
82            packages
83        }
84    }
85}
86
87pub struct DebianDistrolessInstalledParser;
88
89impl PackageParser for DebianDistrolessInstalledParser {
90    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
91
92    fn is_match(path: &Path) -> bool {
93        let path_str = path.to_string_lossy();
94        path_str.contains("var/lib/dpkg/status.d/")
95    }
96
97    fn extract_packages(path: &Path) -> Vec<PackageData> {
98        let content = match read_file_to_string(path, None) {
99            Ok(c) => c,
100            Err(e) => {
101                warn!("Failed to read distroless status file at {:?}: {}", path, e);
102                return vec![default_package_data(
103                    DatasourceId::DebianDistrolessInstalledDb,
104                )];
105            }
106        };
107
108        vec![parse_distroless_status(&content)]
109    }
110}
111
112fn parse_distroless_status(content: &str) -> PackageData {
113    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
114
115    if paragraphs.is_empty() {
116        return default_package_data(DatasourceId::DebianDistrolessInstalledDb);
117    }
118
119    build_package_from_paragraph(
120        &paragraphs[0],
121        None,
122        DatasourceId::DebianDistrolessInstalledDb,
123    )
124    .unwrap_or_else(|| default_package_data(DatasourceId::DebianDistrolessInstalledDb))
125}
126
127// ---------------------------------------------------------------------------
128// Parsing logic
129// ---------------------------------------------------------------------------
130
131/// Parses a debian/control file into PackageData entries.
132///
133/// A debian/control file has a Source paragraph followed by one or more Binary
134/// paragraphs. Source-level metadata (maintainer, homepage, VCS URLs) is merged
135/// into each binary package.
136fn parse_debian_control(content: &str) -> Vec<PackageData> {
137    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
138    if paragraphs.is_empty() {
139        return Vec::new();
140    }
141
142    let has_source = rfc822::get_header_first(&paragraphs[0].headers, "source").is_some();
143
144    let (source_paragraph, binary_start) = if has_source {
145        (Some(&paragraphs[0]), 1)
146    } else {
147        (None, 0)
148    };
149
150    let source_meta = source_paragraph.map(extract_source_meta);
151
152    let mut packages = Vec::new();
153    let mut count = 0usize;
154
155    for para in &paragraphs[binary_start..] {
156        count += 1;
157        if count > MAX_ITERATION_COUNT {
158            warn!("parse_debian_control: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
159            break;
160        }
161        if let Some(pkg) = build_package_from_paragraph(
162            para,
163            source_meta.as_ref(),
164            DatasourceId::DebianControlInSource,
165        ) {
166            packages.push(pkg);
167        }
168    }
169
170    if packages.is_empty()
171        && let Some(source_para) = source_paragraph
172        && let Some(pkg) = build_package_from_source_paragraph(source_para)
173    {
174        packages.push(pkg);
175    }
176
177    packages
178}
179
180/// Parses a dpkg/status file into PackageData entries.
181///
182/// Each paragraph represents an installed package. Only packages with
183/// `Status: install ok installed` are included.
184fn parse_dpkg_status(content: &str) -> Vec<PackageData> {
185    let paragraphs = rfc822::parse_rfc822_paragraphs(content);
186    let mut packages = Vec::new();
187    let mut count = 0usize;
188
189    for para in &paragraphs {
190        count += 1;
191        if count > MAX_ITERATION_COUNT {
192            warn!("parse_dpkg_status: exceeded MAX_ITERATION_COUNT paragraphs, stopping");
193            break;
194        }
195        let status = rfc822::get_header_first(&para.headers, "status");
196        if status.as_deref() != Some("install ok installed") {
197            continue;
198        }
199
200        if let Some(pkg) =
201            build_package_from_paragraph(para, None, DatasourceId::DebianInstalledStatusDb)
202        {
203            packages.push(pkg);
204        }
205    }
206
207    packages
208}
209
210// ---------------------------------------------------------------------------
211// Source paragraph metadata (shared across binary packages)
212// ---------------------------------------------------------------------------
213
214pub(super) struct SourceMeta {
215    parties: Vec<Party>,
216    homepage_url: Option<String>,
217    vcs_url: Option<String>,
218    code_view_url: Option<String>,
219    bug_tracking_url: Option<String>,
220}
221
222fn extract_source_meta(paragraph: &Rfc822Metadata) -> SourceMeta {
223    let mut parties = Vec::new();
224
225    // Maintainer
226    if let Some(maintainer) = rfc822::get_header_first(&paragraph.headers, "maintainer") {
227        let (name, email) = split_name_email(&maintainer);
228        parties.push(Party {
229            r#type: Some("person".to_string()),
230            role: Some("maintainer".to_string()),
231            name,
232            email,
233            url: None,
234            organization: None,
235            organization_url: None,
236            timezone: None,
237        });
238    }
239
240    // Original-Maintainer
241    if let Some(orig_maintainer) =
242        rfc822::get_header_first(&paragraph.headers, "original-maintainer")
243    {
244        let (name, email) = split_name_email(&orig_maintainer);
245        parties.push(Party {
246            r#type: Some("person".to_string()),
247            role: Some("maintainer".to_string()),
248            name,
249            email,
250            url: None,
251            organization: None,
252            organization_url: None,
253            timezone: None,
254        });
255    }
256
257    // Uploaders (comma-separated)
258    if let Some(uploaders_str) = rfc822::get_header_first(&paragraph.headers, "uploaders") {
259        for uploader in uploaders_str.split(',') {
260            let trimmed = uploader.trim();
261            if !trimmed.is_empty() {
262                let (name, email) = split_name_email(trimmed);
263                parties.push(Party {
264                    r#type: Some("person".to_string()),
265                    role: Some("uploader".to_string()),
266                    name,
267                    email,
268                    url: None,
269                    organization: None,
270                    organization_url: None,
271                    timezone: None,
272                });
273            }
274        }
275    }
276
277    let homepage_url = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
278
279    let vcs_url = rfc822::get_header_first(&paragraph.headers, "vcs-git")
280        .map(|url| truncate_field(url.split_whitespace().next().unwrap_or(&url).to_string()));
281
282    let code_view_url =
283        rfc822::get_header_first(&paragraph.headers, "vcs-browser").map(truncate_field);
284
285    let bug_tracking_url = rfc822::get_header_first(&paragraph.headers, "bugs").map(truncate_field);
286
287    SourceMeta {
288        parties,
289        homepage_url,
290        vcs_url,
291        code_view_url,
292        bug_tracking_url,
293    }
294}
295
296// ---------------------------------------------------------------------------
297// Package building
298// ---------------------------------------------------------------------------
299
300pub(super) fn build_package_from_paragraph(
301    paragraph: &Rfc822Metadata,
302    source_meta: Option<&SourceMeta>,
303    datasource_id: DatasourceId,
304) -> Option<PackageData> {
305    let name = rfc822::get_header_first(&paragraph.headers, "package").map(truncate_field)?;
306    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
307    let architecture =
308        rfc822::get_header_first(&paragraph.headers, "architecture").map(truncate_field);
309    let description =
310        rfc822::get_header_first(&paragraph.headers, "description").map(truncate_field);
311    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
312    let homepage = rfc822::get_header_first(&paragraph.headers, "homepage").map(truncate_field);
313    let source_field = rfc822::get_header_first(&paragraph.headers, "source");
314    let section = rfc822::get_header_first(&paragraph.headers, "section");
315    let installed_size = rfc822::get_header_first(&paragraph.headers, "installed-size");
316    let multi_arch = rfc822::get_header_first(&paragraph.headers, "multi-arch");
317
318    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
319
320    // Build parties: use source_meta parties if available, otherwise parse from paragraph
321    let parties = if let Some(meta) = source_meta {
322        meta.parties.clone()
323    } else {
324        let mut p = Vec::new();
325        if let Some(m) = &maintainer_str {
326            let (n, e) = split_name_email(m);
327            p.push(Party {
328                r#type: Some("person".to_string()),
329                role: Some("maintainer".to_string()),
330                name: n,
331                email: e,
332                url: None,
333                organization: None,
334                organization_url: None,
335                timezone: None,
336            });
337        }
338        p
339    };
340
341    // Resolve homepage: paragraph's own, or from source metadata
342    let homepage_url = homepage.or_else(|| source_meta.and_then(|m| m.homepage_url.clone()));
343    let vcs_url = source_meta.and_then(|m| m.vcs_url.clone());
344    let code_view_url = source_meta.and_then(|m| m.code_view_url.clone());
345    let bug_tracking_url = source_meta.and_then(|m| m.bug_tracking_url.clone());
346
347    // Build PURL
348    let purl = build_debian_purl(
349        &name,
350        version.as_deref(),
351        namespace.as_deref(),
352        architecture.as_deref(),
353    );
354
355    // Parse dependencies from all dependency fields
356    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
357
358    // Keywords from section
359    let keywords = section.into_iter().collect();
360
361    // Source packages
362    let source_packages = parse_source_field(source_field.as_deref(), namespace.as_deref());
363
364    // Extra data
365    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
366    if let Some(ma) = &multi_arch
367        && !ma.is_empty()
368    {
369        extra_data.insert(
370            "multi_arch".to_string(),
371            serde_json::Value::String(ma.clone()),
372        );
373    }
374    if let Some(size_str) = &installed_size
375        && let Ok(size) = size_str.parse::<u64>()
376    {
377        extra_data.insert(
378            "installed_size".to_string(),
379            serde_json::Value::Number(serde_json::Number::from(size)),
380        );
381    }
382
383    // Qualifiers for architecture
384    let qualifiers = architecture.as_ref().map(|arch| {
385        let mut q = HashMap::new();
386        q.insert("arch".to_string(), arch.clone());
387        q
388    });
389
390    Some(PackageData {
391        package_type: Some(PACKAGE_TYPE),
392        namespace: namespace.clone(),
393        name: Some(name),
394        version,
395        qualifiers,
396        subpath: None,
397        primary_language: None,
398        description,
399        release_date: None,
400        parties,
401        keywords,
402        homepage_url,
403        download_url: None,
404        size: None,
405        sha1: None,
406        md5: None,
407        sha256: None,
408        sha512: None,
409        bug_tracking_url,
410        code_view_url,
411        vcs_url,
412        copyright: None,
413        holder: None,
414        declared_license_expression: None,
415        declared_license_expression_spdx: None,
416        license_detections: Vec::new(),
417        other_license_expression: None,
418        other_license_expression_spdx: None,
419        other_license_detections: Vec::new(),
420        extracted_license_statement: None,
421        notice_text: None,
422        source_packages,
423        file_references: Vec::new(),
424        is_private: false,
425        is_virtual: false,
426        extra_data: if extra_data.is_empty() {
427            None
428        } else {
429            Some(extra_data)
430        },
431        dependencies,
432        repository_homepage_url: None,
433        repository_download_url: None,
434        api_data_url: None,
435        datasource_id: Some(datasource_id),
436        purl,
437    })
438}
439
440fn build_package_from_source_paragraph(paragraph: &Rfc822Metadata) -> Option<PackageData> {
441    let name = rfc822::get_header_first(&paragraph.headers, "source").map(truncate_field)?;
442    let version = rfc822::get_header_first(&paragraph.headers, "version").map(truncate_field);
443    let maintainer_str = rfc822::get_header_first(&paragraph.headers, "maintainer");
444
445    let namespace = detect_namespace(version.as_deref(), maintainer_str.as_deref());
446    let source_meta = extract_source_meta(paragraph);
447
448    let purl = build_debian_purl(&name, version.as_deref(), namespace.as_deref(), None);
449    let dependencies = parse_all_dependencies(&paragraph.headers, namespace.as_deref());
450
451    let section = rfc822::get_header_first(&paragraph.headers, "section");
452    let keywords = section.into_iter().collect();
453
454    Some(PackageData {
455        package_type: Some(PACKAGE_TYPE),
456        namespace: namespace.clone(),
457        name: Some(name),
458        version,
459        qualifiers: None,
460        subpath: None,
461        primary_language: None,
462        description: None,
463        release_date: None,
464        parties: source_meta.parties,
465        keywords,
466        homepage_url: source_meta.homepage_url,
467        download_url: None,
468        size: None,
469        sha1: None,
470        md5: None,
471        sha256: None,
472        sha512: None,
473        bug_tracking_url: source_meta.bug_tracking_url,
474        code_view_url: source_meta.code_view_url,
475        vcs_url: source_meta.vcs_url,
476        copyright: None,
477        holder: None,
478        declared_license_expression: None,
479        declared_license_expression_spdx: None,
480        license_detections: Vec::new(),
481        other_license_expression: None,
482        other_license_expression_spdx: None,
483        other_license_detections: Vec::new(),
484        extracted_license_statement: None,
485        notice_text: None,
486        source_packages: Vec::new(),
487        file_references: Vec::new(),
488        is_private: false,
489        is_virtual: false,
490        extra_data: None,
491        dependencies,
492        repository_homepage_url: None,
493        repository_download_url: None,
494        api_data_url: None,
495        datasource_id: Some(DatasourceId::DebianControlInSource),
496        purl,
497    })
498}
499
500// ---------------------------------------------------------------------------
501// Parser registration macros
502// ---------------------------------------------------------------------------
503
504crate::register_parser!(
505    "Debian source package control file (debian/control)",
506    &["**/debian/control"],
507    "deb",
508    "",
509    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
510);
511
512crate::register_parser!(
513    "Debian installed package database (dpkg status)",
514    &["**/var/lib/dpkg/status"],
515    "deb",
516    "",
517    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
518);
519
520crate::register_parser!(
521    "Debian distroless package database (status.d)",
522    &["**/var/lib/dpkg/status.d/*"],
523    "deb",
524    "",
525    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
526);
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531    use crate::models::DatasourceId;
532    use crate::models::PackageType;
533    use std::path::Path;
534    use std::path::PathBuf;
535
536    #[test]
537    fn test_parse_debian_control_source_and_binary() {
538        let content = "\
539Source: curl
540Section: web
541Priority: optional
542Maintainer: Alessandro Ghedini <ghedo@debian.org>
543Homepage: https://curl.se/
544Vcs-Browser: https://salsa.debian.org/debian/curl
545Vcs-Git: https://salsa.debian.org/debian/curl.git
546Build-Depends: debhelper (>= 12), libssl-dev
547
548Package: curl
549Architecture: amd64
550Depends: libc6 (>= 2.17), libcurl4 (= ${binary:Version})
551Description: command line tool for transferring data with URL syntax";
552
553        let packages = parse_debian_control(content);
554        assert_eq!(packages.len(), 1);
555
556        let pkg = &packages[0];
557        assert_eq!(pkg.name, Some("curl".to_string()));
558        assert_eq!(pkg.package_type, Some(PackageType::Deb));
559        assert_eq!(pkg.homepage_url, Some("https://curl.se/".to_string()));
560        assert_eq!(
561            pkg.vcs_url,
562            Some("https://salsa.debian.org/debian/curl.git".to_string())
563        );
564        assert_eq!(
565            pkg.code_view_url,
566            Some("https://salsa.debian.org/debian/curl".to_string())
567        );
568
569        assert_eq!(pkg.parties.len(), 1);
570        assert_eq!(pkg.parties[0].role, Some("maintainer".to_string()));
571        assert_eq!(pkg.parties[0].name, Some("Alessandro Ghedini".to_string()));
572        assert_eq!(pkg.parties[0].email, Some("ghedo@debian.org".to_string()));
573
574        assert!(!pkg.dependencies.is_empty());
575    }
576
577    #[test]
578    fn test_parse_debian_control_multiple_binary() {
579        let content = "\
580Source: gzip
581Maintainer: Debian Developer <dev@debian.org>
582
583Package: gzip
584Architecture: any
585Depends: libc6 (>= 2.17)
586Description: GNU file compression
587
588Package: gzip-win32
589Architecture: all
590Description: gzip for Windows";
591
592        let packages = parse_debian_control(content);
593        assert_eq!(packages.len(), 2);
594        assert_eq!(packages[0].name, Some("gzip".to_string()));
595        assert_eq!(packages[1].name, Some("gzip-win32".to_string()));
596
597        assert_eq!(packages[0].parties.len(), 1);
598        assert_eq!(packages[1].parties.len(), 1);
599    }
600
601    #[test]
602    fn test_parse_debian_control_source_only() {
603        let content = "\
604Source: my-package
605Maintainer: Test User <test@debian.org>
606Build-Depends: debhelper (>= 13)";
607
608        let packages = parse_debian_control(content);
609        assert_eq!(packages.len(), 1);
610        assert_eq!(packages[0].name, Some("my-package".to_string()));
611        assert!(!packages[0].dependencies.is_empty());
612        assert_eq!(
613            packages[0].dependencies[0].scope,
614            Some("build-depends".to_string())
615        );
616    }
617
618    #[test]
619    fn test_parse_debian_control_with_uploaders() {
620        let content = "\
621Source: example
622Maintainer: Main Dev <main@debian.org>
623Uploaders: Alice <alice@example.com>, Bob <bob@example.com>
624
625Package: example
626Architecture: any
627Description: test package";
628
629        let packages = parse_debian_control(content);
630        assert_eq!(packages.len(), 1);
631        assert_eq!(packages[0].parties.len(), 3);
632        assert_eq!(packages[0].parties[0].role, Some("maintainer".to_string()));
633        assert_eq!(packages[0].parties[1].role, Some("uploader".to_string()));
634        assert_eq!(packages[0].parties[2].role, Some("uploader".to_string()));
635    }
636
637    #[test]
638    fn test_parse_debian_control_vcs_git_with_branch() {
639        let content = "\
640Source: example
641Maintainer: Dev <dev@debian.org>
642Vcs-Git: https://salsa.debian.org/example.git -b main
643
644Package: example
645Architecture: any
646Description: test";
647
648        let packages = parse_debian_control(content);
649        assert_eq!(packages.len(), 1);
650        assert_eq!(
651            packages[0].vcs_url,
652            Some("https://salsa.debian.org/example.git".to_string())
653        );
654    }
655
656    #[test]
657    fn test_parse_debian_control_multi_arch() {
658        let content = "\
659Source: example
660Maintainer: Dev <dev@debian.org>
661
662Package: libexample
663Architecture: any
664Multi-Arch: same
665Description: shared library";
666
667        let packages = parse_debian_control(content);
668        assert_eq!(packages.len(), 1);
669        let extra = packages[0].extra_data.as_ref().unwrap();
670        assert_eq!(
671            extra.get("multi_arch"),
672            Some(&serde_json::Value::String("same".to_string()))
673        );
674    }
675
676    #[test]
677    fn test_parse_dpkg_status_basic() {
678        let content = "\
679Package: base-files
680Status: install ok installed
681Priority: required
682Section: admin
683Installed-Size: 391
684Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
685Architecture: amd64
686Version: 11ubuntu5.6
687Description: Debian base system miscellaneous files
688Homepage: https://tracker.debian.org/pkg/base-files
689
690Package: not-installed
691Status: deinstall ok config-files
692Architecture: amd64
693Version: 1.0
694Description: This should be skipped";
695
696        let packages = parse_dpkg_status(content);
697        assert_eq!(packages.len(), 1);
698
699        let pkg = &packages[0];
700        assert_eq!(pkg.name, Some("base-files".to_string()));
701        assert_eq!(pkg.version, Some("11ubuntu5.6".to_string()));
702        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
703        assert_eq!(
704            pkg.datasource_id,
705            Some(DatasourceId::DebianInstalledStatusDb)
706        );
707
708        let extra = pkg.extra_data.as_ref().unwrap();
709        assert_eq!(
710            extra.get("installed_size"),
711            Some(&serde_json::Value::Number(serde_json::Number::from(391)))
712        );
713    }
714
715    #[test]
716    fn test_parse_dpkg_status_multiple_installed() {
717        let content = "\
718Package: libc6
719Status: install ok installed
720Architecture: amd64
721Version: 2.31-13+deb11u5
722Maintainer: GNU Libc Maintainers <debian-glibc@lists.debian.org>
723Description: GNU C Library
724
725Package: zlib1g
726Status: install ok installed
727Architecture: amd64
728Version: 1:1.2.11.dfsg-2+deb11u2
729Maintainer: Mark Brown <broonie@debian.org>
730Description: compression library";
731
732        let packages = parse_dpkg_status(content);
733        assert_eq!(packages.len(), 2);
734        assert_eq!(packages[0].name, Some("libc6".to_string()));
735        assert_eq!(packages[1].name, Some("zlib1g".to_string()));
736    }
737
738    #[test]
739    fn test_parse_dpkg_status_with_dependencies() {
740        let content = "\
741Package: curl
742Status: install ok installed
743Architecture: amd64
744Version: 7.74.0-1.3+deb11u7
745Maintainer: Alessandro Ghedini <ghedo@debian.org>
746Depends: libc6 (>= 2.17), libcurl4 (= 7.74.0-1.3+deb11u7)
747Recommends: ca-certificates
748Description: command line tool for transferring data with URL syntax";
749
750        let packages = parse_dpkg_status(content);
751        assert_eq!(packages.len(), 1);
752
753        let deps = &packages[0].dependencies;
754        assert_eq!(deps.len(), 3);
755
756        assert_eq!(deps[0].purl, Some("pkg:deb/debian/libc6".to_string()));
757        assert_eq!(deps[0].scope, Some("depends".to_string()));
758        assert_eq!(deps[0].extracted_requirement, Some(">= 2.17".to_string()));
759
760        assert_eq!(
761            deps[2].purl,
762            Some("pkg:deb/debian/ca-certificates".to_string())
763        );
764        assert_eq!(deps[2].scope, Some("recommends".to_string()));
765        assert_eq!(deps[2].is_optional, Some(true));
766    }
767
768    #[test]
769    fn test_parse_dpkg_status_with_source() {
770        let content = "\
771Package: libncurses6
772Status: install ok installed
773Architecture: amd64
774Source: ncurses (6.2+20201114-2+deb11u1)
775Version: 6.2+20201114-2+deb11u1
776Maintainer: Craig Small <csmall@debian.org>
777Description: shared libraries for terminal handling";
778
779        let packages = parse_dpkg_status(content);
780        assert_eq!(packages.len(), 1);
781        assert!(!packages[0].source_packages.is_empty());
782        assert!(packages[0].source_packages[0].contains("ncurses"));
783    }
784
785    #[test]
786    fn test_parse_dpkg_status_filters_not_installed() {
787        let content = "\
788Package: installed-pkg
789Status: install ok installed
790Version: 1.0
791Architecture: amd64
792Description: installed
793
794Package: half-installed
795Status: install ok half-installed
796Version: 2.0
797Architecture: amd64
798Description: half installed
799
800Package: deinstall-pkg
801Status: deinstall ok config-files
802Version: 3.0
803Architecture: amd64
804Description: deinstalled
805
806Package: purge-pkg
807Status: purge ok not-installed
808Version: 4.0
809Architecture: amd64
810Description: purged";
811
812        let packages = parse_dpkg_status(content);
813        assert_eq!(packages.len(), 1);
814        assert_eq!(packages[0].name, Some("installed-pkg".to_string()));
815    }
816
817    #[test]
818    fn test_parse_dpkg_status_empty() {
819        let packages = parse_dpkg_status("");
820        assert!(packages.is_empty());
821    }
822
823    #[test]
824    fn test_debian_control_is_match() {
825        assert!(DebianControlParser::is_match(Path::new(
826            "/path/to/debian/control"
827        )));
828        assert!(DebianControlParser::is_match(Path::new("debian/control")));
829        assert!(!DebianControlParser::is_match(Path::new(
830            "/path/to/control"
831        )));
832        assert!(!DebianControlParser::is_match(Path::new(
833            "/path/to/debian/changelog"
834        )));
835    }
836
837    #[test]
838    fn test_debian_installed_is_match() {
839        assert!(DebianInstalledParser::is_match(Path::new(
840            "/var/lib/dpkg/status"
841        )));
842        assert!(DebianInstalledParser::is_match(Path::new(
843            "some/root/var/lib/dpkg/status"
844        )));
845        assert!(!DebianInstalledParser::is_match(Path::new(
846            "/var/lib/dpkg/status.d/something"
847        )));
848        assert!(!DebianInstalledParser::is_match(Path::new(
849            "/var/lib/dpkg/available"
850        )));
851    }
852
853    #[test]
854    fn test_parse_debian_control_empty_input() {
855        let packages = parse_debian_control("");
856        assert!(packages.is_empty());
857    }
858
859    #[test]
860    fn test_parse_debian_control_malformed_input() {
861        let content = "this is not a valid control file\nwith random text";
862        let packages = parse_debian_control(content);
863        assert!(packages.is_empty());
864    }
865
866    #[test]
867    fn test_distroless_parser() {
868        let test_file = PathBuf::from("testdata/debian/var/lib/dpkg/status.d/base-files");
869
870        assert!(DebianDistrolessInstalledParser::is_match(&test_file));
871
872        if !test_file.exists() {
873            eprintln!("Warning: Test file not found, skipping test");
874            return;
875        }
876
877        let pkg = DebianDistrolessInstalledParser::extract_first_package(&test_file);
878
879        assert_eq!(pkg.package_type, Some(PackageType::Deb));
880        assert_eq!(
881            pkg.datasource_id,
882            Some(DatasourceId::DebianDistrolessInstalledDb)
883        );
884        assert_eq!(pkg.name, Some("base-files".to_string()));
885        assert_eq!(pkg.version, Some("11.1+deb11u8".to_string()));
886        assert_eq!(pkg.namespace, Some("debian".to_string()));
887        assert!(pkg.purl.is_some());
888        assert!(
889            pkg.purl
890                .as_ref()
891                .unwrap()
892                .contains("pkg:deb/debian/base-files")
893        );
894    }
895}