Skip to main content

provenant/parsers/debian/
file_list.rs

1use std::path::Path;
2
3use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
4use crate::parser_warn as warn;
5use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
6
7use super::utils::build_debian_purl;
8use super::{IGNORED_ROOT_DIRS, PACKAGE_TYPE, default_package_data, read_or_default};
9use crate::parsers::PackageParser;
10
11/// Parser for Debian installed file lists (*.list)
12pub struct DebianInstalledListParser;
13
14impl PackageParser for DebianInstalledListParser {
15    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
16
17    fn is_match(path: &Path) -> bool {
18        path.extension().and_then(|e| e.to_str()) == Some("list")
19            && path
20                .to_str()
21                .map(|p| p.contains("/var/lib/dpkg/info/"))
22                .unwrap_or(false)
23    }
24
25    fn extract_packages(path: &Path) -> Vec<PackageData> {
26        let filename = match path.file_stem().and_then(|s| s.to_str()) {
27            Some(f) => f,
28            None => {
29                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
30            }
31        };
32
33        let content = read_or_default!(path, ".list file", DatasourceId::DebianInstalledFilesList);
34
35        vec![parse_debian_file_list(
36            &content,
37            filename,
38            DatasourceId::DebianInstalledFilesList,
39        )]
40    }
41}
42
43crate::register_parser!(
44    "Debian installed files list",
45    &["**/var/lib/dpkg/info/*.list"],
46    "deb",
47    "",
48    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
49);
50
51/// Parser for Debian installed MD5 checksum files (*.md5sums)
52pub struct DebianInstalledMd5sumsParser;
53
54impl PackageParser for DebianInstalledMd5sumsParser {
55    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
56
57    fn is_match(path: &Path) -> bool {
58        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
59            && path
60                .to_str()
61                .map(|p| p.contains("/var/lib/dpkg/info/"))
62                .unwrap_or(false)
63    }
64
65    fn extract_packages(path: &Path) -> Vec<PackageData> {
66        let filename = match path.file_stem().and_then(|s| s.to_str()) {
67            Some(f) => f,
68            None => {
69                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
70            }
71        };
72
73        let content = read_or_default!(path, ".md5sums file", DatasourceId::DebianInstalledMd5Sums);
74
75        vec![parse_debian_file_list(
76            &content,
77            filename,
78            DatasourceId::DebianInstalledMd5Sums,
79        )]
80    }
81}
82
83crate::register_parser!(
84    "Debian installed package md5sums",
85    &["**/var/lib/dpkg/info/*.md5sums"],
86    "deb",
87    "",
88    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
89);
90
91pub(crate) fn parse_file_entries(content: &str, log_label: &str) -> Vec<FileReference> {
92    let mut file_references = Vec::new();
93    let mut count = 0usize;
94
95    for line in content.lines() {
96        count += 1;
97        if count > MAX_ITERATION_COUNT {
98            warn!("{log_label}: exceeded MAX_ITERATION_COUNT lines, stopping");
99            break;
100        }
101        let line = line.trim();
102        if line.is_empty() || line.starts_with('#') {
103            continue;
104        }
105
106        let (md5sum, path): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
107            (
108                Md5Digest::from_hex(line[..idx].trim()).ok(),
109                line[idx + 2..].trim(),
110            )
111        } else if let Some((hash, p)) = line.split_once(' ') {
112            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
113        } else {
114            (None, line)
115        };
116
117        if IGNORED_ROOT_DIRS.contains(&path) {
118            continue;
119        }
120
121        file_references.push(FileReference {
122            path: path.to_string(),
123            size: None,
124            sha1: None,
125            md5: md5sum,
126            sha256: None,
127            sha512: None,
128            extra_data: None,
129        });
130    }
131
132    file_references
133}
134
135fn parse_debian_file_list(
136    content: &str,
137    filename: &str,
138    datasource_id: DatasourceId,
139) -> PackageData {
140    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
141        (
142            Some(truncate_field(pkg.to_string())),
143            Some(arch.to_string()),
144        )
145    } else if filename == "md5sums" {
146        (None, None)
147    } else {
148        (Some(truncate_field(filename.to_string())), None)
149    };
150
151    let file_references = parse_file_entries(content, "parse_debian_file_list");
152
153    if file_references.is_empty() {
154        return default_package_data(datasource_id);
155    }
156
157    let namespace = Some("debian".to_string());
158    let mut package = PackageData {
159        datasource_id: Some(datasource_id),
160        package_type: Some(PACKAGE_TYPE),
161        namespace: namespace.clone(),
162        name: name.clone(),
163        file_references,
164        ..Default::default()
165    };
166
167    if let Some(n) = &name {
168        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
169    }
170
171    package
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177    use crate::models::DatasourceId;
178    use std::path::PathBuf;
179
180    #[test]
181    fn test_list_parser_is_match() {
182        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
183            "/var/lib/dpkg/info/bash.list"
184        )));
185        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
186            "/var/lib/dpkg/info/package:amd64.list"
187        )));
188        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
189            "bash.list"
190        )));
191        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
192            "/var/lib/dpkg/info/bash.md5sums"
193        )));
194    }
195
196    #[test]
197    fn test_md5sums_parser_is_match() {
198        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
199            "/var/lib/dpkg/info/bash.md5sums"
200        )));
201        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
202            "/var/lib/dpkg/info/package:amd64.md5sums"
203        )));
204        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
205            "bash.md5sums"
206        )));
207        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
208            "/var/lib/dpkg/info/bash.list"
209        )));
210    }
211
212    #[test]
213    fn test_parse_debian_file_list_plain_list() {
214        let content = "/.
215/bin
216/bin/bash
217/usr/bin/bashbug
218/usr/share/doc/bash/README
219";
220        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
221        assert_eq!(pkg.name, Some("bash".to_string()));
222        assert_eq!(pkg.file_references.len(), 3);
223        assert_eq!(pkg.file_references[0].path, "/bin/bash");
224        assert_eq!(pkg.file_references[0].md5, None);
225        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
226        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
227    }
228
229    #[test]
230    fn test_parse_debian_file_list_md5sums() {
231        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
2321c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
233f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
234";
235        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
236        assert_eq!(pkg.name, Some("bash".to_string()));
237        assert_eq!(pkg.file_references.len(), 3);
238        assert_eq!(pkg.file_references[0].path, "bin/bash");
239        assert_eq!(
240            pkg.file_references[0].md5,
241            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
242        );
243        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
244        assert_eq!(
245            pkg.file_references[1].md5,
246            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
247        );
248    }
249
250    #[test]
251    fn test_parse_debian_file_list_with_arch() {
252        let content = "/usr/bin/foo
253/usr/lib/x86_64-linux-gnu/libfoo.so
254";
255        let pkg = parse_debian_file_list(
256            content,
257            "libfoo:amd64",
258            DatasourceId::DebianInstalledFilesList,
259        );
260        assert_eq!(pkg.name, Some("libfoo".to_string()));
261        assert!(pkg.purl.is_some());
262        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
263        assert_eq!(pkg.file_references.len(), 2);
264    }
265
266    #[test]
267    fn test_parse_debian_file_list_skips_comments_and_empty() {
268        let content = "# This is a comment
269/bin/bash
270
271/usr/bin/bashbug
272  
273";
274        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
275        assert_eq!(pkg.file_references.len(), 2);
276    }
277
278    #[test]
279    fn test_parse_debian_file_list_md5sums_only() {
280        let content = "abc123  usr/bin/tool
281";
282        let pkg =
283            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
284        assert_eq!(pkg.name, None);
285        assert_eq!(pkg.file_references.len(), 1);
286    }
287
288    #[test]
289    fn test_parse_debian_file_list_ignores_root_dirs() {
290        let content = "/.
291/bin
292/bin/bash
293/etc
294/usr
295/var
296";
297        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
298        assert_eq!(pkg.file_references.len(), 1);
299        assert_eq!(pkg.file_references[0].path, "/bin/bash");
300    }
301}