Skip to main content

provenant/parsers/debian/
file_list.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::path::Path;
5
6use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
9
10use super::super::metadata::ParserMetadata;
11use super::utils::build_debian_purl;
12use super::{IGNORED_ROOT_DIRS, PACKAGE_TYPE, default_package_data, read_or_default};
13use crate::parsers::PackageParser;
14
15/// Parser for Debian installed file lists (*.list)
16pub struct DebianInstalledListParser;
17
18impl PackageParser for DebianInstalledListParser {
19    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
20
21    fn metadata() -> Vec<ParserMetadata> {
22        vec![ParserMetadata {
23            description: "Debian installed files list",
24            file_patterns: &["**/var/lib/dpkg/info/*.list"],
25            package_type: "deb",
26            primary_language: "",
27            documentation_url: Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
28        }]
29    }
30
31    fn is_match(path: &Path) -> bool {
32        path.extension().and_then(|e| e.to_str()) == Some("list")
33            && path
34                .to_str()
35                .map(|p| p.contains("/var/lib/dpkg/info/"))
36                .unwrap_or(false)
37    }
38
39    fn extract_packages(path: &Path) -> Vec<PackageData> {
40        let filename = match path.file_stem().and_then(|s| s.to_str()) {
41            Some(f) => f,
42            None => {
43                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
44            }
45        };
46
47        let content = read_or_default!(path, ".list file", DatasourceId::DebianInstalledFilesList);
48
49        vec![parse_debian_file_list(
50            &content,
51            filename,
52            DatasourceId::DebianInstalledFilesList,
53        )]
54    }
55}
56
57/// Parser for Debian installed MD5 checksum files (*.md5sums)
58pub struct DebianInstalledMd5sumsParser;
59
60impl PackageParser for DebianInstalledMd5sumsParser {
61    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
62
63    fn metadata() -> Vec<ParserMetadata> {
64        vec![ParserMetadata {
65            description: "Debian installed package md5sums",
66            file_patterns: &["**/var/lib/dpkg/info/*.md5sums"],
67            package_type: "deb",
68            primary_language: "",
69            documentation_url: Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
70        }]
71    }
72
73    fn is_match(path: &Path) -> bool {
74        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
75            && path
76                .to_str()
77                .map(|p| p.contains("/var/lib/dpkg/info/"))
78                .unwrap_or(false)
79    }
80
81    fn extract_packages(path: &Path) -> Vec<PackageData> {
82        let filename = match path.file_stem().and_then(|s| s.to_str()) {
83            Some(f) => f,
84            None => {
85                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
86            }
87        };
88
89        let content = read_or_default!(path, ".md5sums file", DatasourceId::DebianInstalledMd5Sums);
90
91        vec![parse_debian_file_list(
92            &content,
93            filename,
94            DatasourceId::DebianInstalledMd5Sums,
95        )]
96    }
97}
98
99pub(crate) fn parse_file_entries(content: &str, log_label: &str) -> Vec<FileReference> {
100    let mut file_references = Vec::new();
101    let mut count = 0usize;
102
103    for line in content.lines() {
104        count += 1;
105        if count > MAX_ITERATION_COUNT {
106            warn!("{log_label}: exceeded MAX_ITERATION_COUNT lines, stopping");
107            break;
108        }
109        let line = line.trim();
110        if line.is_empty() || line.starts_with('#') {
111            continue;
112        }
113
114        let (md5sum, path): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
115            (
116                Md5Digest::from_hex(line[..idx].trim()).ok(),
117                line[idx + 2..].trim(),
118            )
119        } else if let Some((hash, p)) = line.split_once(' ') {
120            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
121        } else {
122            (None, line)
123        };
124
125        if IGNORED_ROOT_DIRS.contains(&path) {
126            continue;
127        }
128
129        file_references.push(FileReference {
130            path: path.to_string(),
131            size: None,
132            sha1: None,
133            md5: md5sum,
134            sha256: None,
135            sha512: None,
136            extra_data: None,
137        });
138    }
139
140    file_references
141}
142
143fn parse_debian_file_list(
144    content: &str,
145    filename: &str,
146    datasource_id: DatasourceId,
147) -> PackageData {
148    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
149        (
150            Some(truncate_field(pkg.to_string())),
151            Some(arch.to_string()),
152        )
153    } else if filename == "md5sums" {
154        (None, None)
155    } else {
156        (Some(truncate_field(filename.to_string())), None)
157    };
158
159    let file_references = parse_file_entries(content, "parse_debian_file_list");
160
161    if file_references.is_empty() {
162        return default_package_data(datasource_id);
163    }
164
165    let namespace = Some("debian".to_string());
166    let mut package = PackageData {
167        datasource_id: Some(datasource_id),
168        package_type: Some(PACKAGE_TYPE),
169        namespace: namespace.clone(),
170        name: name.clone(),
171        file_references,
172        ..Default::default()
173    };
174
175    if let Some(n) = &name {
176        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
177    }
178
179    package
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::models::DatasourceId;
186    use std::path::PathBuf;
187
188    #[test]
189    fn test_list_parser_is_match() {
190        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
191            "/var/lib/dpkg/info/bash.list"
192        )));
193        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
194            "/var/lib/dpkg/info/package:amd64.list"
195        )));
196        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
197            "bash.list"
198        )));
199        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
200            "/var/lib/dpkg/info/bash.md5sums"
201        )));
202    }
203
204    #[test]
205    fn test_md5sums_parser_is_match() {
206        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
207            "/var/lib/dpkg/info/bash.md5sums"
208        )));
209        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
210            "/var/lib/dpkg/info/package:amd64.md5sums"
211        )));
212        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
213            "bash.md5sums"
214        )));
215        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
216            "/var/lib/dpkg/info/bash.list"
217        )));
218    }
219
220    #[test]
221    fn test_parse_debian_file_list_plain_list() {
222        let content = "/.
223/bin
224/bin/bash
225/usr/bin/bashbug
226/usr/share/doc/bash/README
227";
228        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
229        assert_eq!(pkg.name, Some("bash".to_string()));
230        assert_eq!(pkg.file_references.len(), 3);
231        assert_eq!(pkg.file_references[0].path, "/bin/bash");
232        assert_eq!(pkg.file_references[0].md5, None);
233        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
234        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
235    }
236
237    #[test]
238    fn test_parse_debian_file_list_md5sums() {
239        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
2401c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
241f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
242";
243        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
244        assert_eq!(pkg.name, Some("bash".to_string()));
245        assert_eq!(pkg.file_references.len(), 3);
246        assert_eq!(pkg.file_references[0].path, "bin/bash");
247        assert_eq!(
248            pkg.file_references[0].md5,
249            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
250        );
251        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
252        assert_eq!(
253            pkg.file_references[1].md5,
254            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
255        );
256    }
257
258    #[test]
259    fn test_parse_debian_file_list_with_arch() {
260        let content = "/usr/bin/foo
261/usr/lib/x86_64-linux-gnu/libfoo.so
262";
263        let pkg = parse_debian_file_list(
264            content,
265            "libfoo:amd64",
266            DatasourceId::DebianInstalledFilesList,
267        );
268        assert_eq!(pkg.name, Some("libfoo".to_string()));
269        assert!(pkg.purl.is_some());
270        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
271        assert_eq!(pkg.file_references.len(), 2);
272    }
273
274    #[test]
275    fn test_parse_debian_file_list_skips_comments_and_empty() {
276        let content = "# This is a comment
277/bin/bash
278
279/usr/bin/bashbug
280  
281";
282        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
283        assert_eq!(pkg.file_references.len(), 2);
284    }
285
286    #[test]
287    fn test_parse_debian_file_list_md5sums_only() {
288        let content = "abc123  usr/bin/tool
289";
290        let pkg =
291            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
292        assert_eq!(pkg.name, None);
293        assert_eq!(pkg.file_references.len(), 1);
294    }
295
296    #[test]
297    fn test_parse_debian_file_list_ignores_root_dirs() {
298        let content = "/.
299/bin
300/bin/bash
301/etc
302/usr
303/var
304";
305        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
306        assert_eq!(pkg.file_references.len(), 1);
307        assert_eq!(pkg.file_references[0].path, "/bin/bash");
308    }
309}