Skip to main content

provenant/parsers/debian/
file_list.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::path::Path;
5
6use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
9
10use super::utils::build_debian_purl;
11use super::{IGNORED_ROOT_DIRS, PACKAGE_TYPE, default_package_data, read_or_default};
12use crate::parsers::PackageParser;
13
14/// Parser for Debian installed file lists (*.list)
15pub struct DebianInstalledListParser;
16
17impl PackageParser for DebianInstalledListParser {
18    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
19
20    fn is_match(path: &Path) -> bool {
21        path.extension().and_then(|e| e.to_str()) == Some("list")
22            && path
23                .to_str()
24                .map(|p| p.contains("/var/lib/dpkg/info/"))
25                .unwrap_or(false)
26    }
27
28    fn extract_packages(path: &Path) -> Vec<PackageData> {
29        let filename = match path.file_stem().and_then(|s| s.to_str()) {
30            Some(f) => f,
31            None => {
32                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
33            }
34        };
35
36        let content = read_or_default!(path, ".list file", DatasourceId::DebianInstalledFilesList);
37
38        vec![parse_debian_file_list(
39            &content,
40            filename,
41            DatasourceId::DebianInstalledFilesList,
42        )]
43    }
44}
45
46crate::register_parser!(
47    "Debian installed files list",
48    &["**/var/lib/dpkg/info/*.list"],
49    "deb",
50    "",
51    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
52);
53
54/// Parser for Debian installed MD5 checksum files (*.md5sums)
55pub struct DebianInstalledMd5sumsParser;
56
57impl PackageParser for DebianInstalledMd5sumsParser {
58    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
59
60    fn is_match(path: &Path) -> bool {
61        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
62            && path
63                .to_str()
64                .map(|p| p.contains("/var/lib/dpkg/info/"))
65                .unwrap_or(false)
66    }
67
68    fn extract_packages(path: &Path) -> Vec<PackageData> {
69        let filename = match path.file_stem().and_then(|s| s.to_str()) {
70            Some(f) => f,
71            None => {
72                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
73            }
74        };
75
76        let content = read_or_default!(path, ".md5sums file", DatasourceId::DebianInstalledMd5Sums);
77
78        vec![parse_debian_file_list(
79            &content,
80            filename,
81            DatasourceId::DebianInstalledMd5Sums,
82        )]
83    }
84}
85
86crate::register_parser!(
87    "Debian installed package md5sums",
88    &["**/var/lib/dpkg/info/*.md5sums"],
89    "deb",
90    "",
91    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
92);
93
94pub(crate) fn parse_file_entries(content: &str, log_label: &str) -> Vec<FileReference> {
95    let mut file_references = Vec::new();
96    let mut count = 0usize;
97
98    for line in content.lines() {
99        count += 1;
100        if count > MAX_ITERATION_COUNT {
101            warn!("{log_label}: exceeded MAX_ITERATION_COUNT lines, stopping");
102            break;
103        }
104        let line = line.trim();
105        if line.is_empty() || line.starts_with('#') {
106            continue;
107        }
108
109        let (md5sum, path): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
110            (
111                Md5Digest::from_hex(line[..idx].trim()).ok(),
112                line[idx + 2..].trim(),
113            )
114        } else if let Some((hash, p)) = line.split_once(' ') {
115            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
116        } else {
117            (None, line)
118        };
119
120        if IGNORED_ROOT_DIRS.contains(&path) {
121            continue;
122        }
123
124        file_references.push(FileReference {
125            path: path.to_string(),
126            size: None,
127            sha1: None,
128            md5: md5sum,
129            sha256: None,
130            sha512: None,
131            extra_data: None,
132        });
133    }
134
135    file_references
136}
137
138fn parse_debian_file_list(
139    content: &str,
140    filename: &str,
141    datasource_id: DatasourceId,
142) -> PackageData {
143    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
144        (
145            Some(truncate_field(pkg.to_string())),
146            Some(arch.to_string()),
147        )
148    } else if filename == "md5sums" {
149        (None, None)
150    } else {
151        (Some(truncate_field(filename.to_string())), None)
152    };
153
154    let file_references = parse_file_entries(content, "parse_debian_file_list");
155
156    if file_references.is_empty() {
157        return default_package_data(datasource_id);
158    }
159
160    let namespace = Some("debian".to_string());
161    let mut package = PackageData {
162        datasource_id: Some(datasource_id),
163        package_type: Some(PACKAGE_TYPE),
164        namespace: namespace.clone(),
165        name: name.clone(),
166        file_references,
167        ..Default::default()
168    };
169
170    if let Some(n) = &name {
171        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
172    }
173
174    package
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use crate::models::DatasourceId;
181    use std::path::PathBuf;
182
183    #[test]
184    fn test_list_parser_is_match() {
185        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
186            "/var/lib/dpkg/info/bash.list"
187        )));
188        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
189            "/var/lib/dpkg/info/package:amd64.list"
190        )));
191        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
192            "bash.list"
193        )));
194        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
195            "/var/lib/dpkg/info/bash.md5sums"
196        )));
197    }
198
199    #[test]
200    fn test_md5sums_parser_is_match() {
201        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
202            "/var/lib/dpkg/info/bash.md5sums"
203        )));
204        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
205            "/var/lib/dpkg/info/package:amd64.md5sums"
206        )));
207        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
208            "bash.md5sums"
209        )));
210        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
211            "/var/lib/dpkg/info/bash.list"
212        )));
213    }
214
215    #[test]
216    fn test_parse_debian_file_list_plain_list() {
217        let content = "/.
218/bin
219/bin/bash
220/usr/bin/bashbug
221/usr/share/doc/bash/README
222";
223        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
224        assert_eq!(pkg.name, Some("bash".to_string()));
225        assert_eq!(pkg.file_references.len(), 3);
226        assert_eq!(pkg.file_references[0].path, "/bin/bash");
227        assert_eq!(pkg.file_references[0].md5, None);
228        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
229        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
230    }
231
232    #[test]
233    fn test_parse_debian_file_list_md5sums() {
234        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
2351c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
236f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
237";
238        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
239        assert_eq!(pkg.name, Some("bash".to_string()));
240        assert_eq!(pkg.file_references.len(), 3);
241        assert_eq!(pkg.file_references[0].path, "bin/bash");
242        assert_eq!(
243            pkg.file_references[0].md5,
244            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
245        );
246        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
247        assert_eq!(
248            pkg.file_references[1].md5,
249            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
250        );
251    }
252
253    #[test]
254    fn test_parse_debian_file_list_with_arch() {
255        let content = "/usr/bin/foo
256/usr/lib/x86_64-linux-gnu/libfoo.so
257";
258        let pkg = parse_debian_file_list(
259            content,
260            "libfoo:amd64",
261            DatasourceId::DebianInstalledFilesList,
262        );
263        assert_eq!(pkg.name, Some("libfoo".to_string()));
264        assert!(pkg.purl.is_some());
265        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
266        assert_eq!(pkg.file_references.len(), 2);
267    }
268
269    #[test]
270    fn test_parse_debian_file_list_skips_comments_and_empty() {
271        let content = "# This is a comment
272/bin/bash
273
274/usr/bin/bashbug
275  
276";
277        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
278        assert_eq!(pkg.file_references.len(), 2);
279    }
280
281    #[test]
282    fn test_parse_debian_file_list_md5sums_only() {
283        let content = "abc123  usr/bin/tool
284";
285        let pkg =
286            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
287        assert_eq!(pkg.name, None);
288        assert_eq!(pkg.file_references.len(), 1);
289    }
290
291    #[test]
292    fn test_parse_debian_file_list_ignores_root_dirs() {
293        let content = "/.
294/bin
295/bin/bash
296/etc
297/usr
298/var
299";
300        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
301        assert_eq!(pkg.file_references.len(), 1);
302        assert_eq!(pkg.file_references[0].path, "/bin/bash");
303    }
304}