Skip to main content

provenant/parsers/debian/
file_list.rs

1use std::path::Path;
2
3use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
4use crate::parser_warn as warn;
5use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
6
7use super::utils::build_debian_purl;
8use super::{IGNORED_ROOT_DIRS, PACKAGE_TYPE, default_package_data};
9use crate::parsers::PackageParser;
10
11/// Parser for Debian installed file lists (*.list)
12pub struct DebianInstalledListParser;
13
14impl PackageParser for DebianInstalledListParser {
15    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
16
17    fn is_match(path: &Path) -> bool {
18        path.extension().and_then(|e| e.to_str()) == Some("list")
19            && path
20                .to_str()
21                .map(|p| p.contains("/var/lib/dpkg/info/"))
22                .unwrap_or(false)
23    }
24
25    fn extract_packages(path: &Path) -> Vec<PackageData> {
26        let filename = match path.file_stem().and_then(|s| s.to_str()) {
27            Some(f) => f,
28            None => {
29                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
30            }
31        };
32
33        let content = match read_file_to_string(path, None) {
34            Ok(c) => c,
35            Err(e) => {
36                warn!("Failed to read .list file {:?}: {}", path, e);
37                return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
38            }
39        };
40
41        vec![parse_debian_file_list(
42            &content,
43            filename,
44            DatasourceId::DebianInstalledFilesList,
45        )]
46    }
47}
48
49crate::register_parser!(
50    "Debian installed files list",
51    &["**/var/lib/dpkg/info/*.list"],
52    "deb",
53    "",
54    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
55);
56
57/// Parser for Debian installed MD5 checksum files (*.md5sums)
58pub struct DebianInstalledMd5sumsParser;
59
60impl PackageParser for DebianInstalledMd5sumsParser {
61    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
62
63    fn is_match(path: &Path) -> bool {
64        path.extension().and_then(|e| e.to_str()) == Some("md5sums")
65            && path
66                .to_str()
67                .map(|p| p.contains("/var/lib/dpkg/info/"))
68                .unwrap_or(false)
69    }
70
71    fn extract_packages(path: &Path) -> Vec<PackageData> {
72        let filename = match path.file_stem().and_then(|s| s.to_str()) {
73            Some(f) => f,
74            None => {
75                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
76            }
77        };
78
79        let content = match read_file_to_string(path, None) {
80            Ok(c) => c,
81            Err(e) => {
82                warn!("Failed to read .md5sums file {:?}: {}", path, e);
83                return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
84            }
85        };
86
87        vec![parse_debian_file_list(
88            &content,
89            filename,
90            DatasourceId::DebianInstalledMd5Sums,
91        )]
92    }
93}
94
95crate::register_parser!(
96    "Debian installed package md5sums",
97    &["**/var/lib/dpkg/info/*.md5sums"],
98    "deb",
99    "",
100    Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
101);
102
103fn parse_debian_file_list(
104    content: &str,
105    filename: &str,
106    datasource_id: DatasourceId,
107) -> PackageData {
108    let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
109        (
110            Some(truncate_field(pkg.to_string())),
111            Some(arch.to_string()),
112        )
113    } else if filename == "md5sums" {
114        (None, None)
115    } else {
116        (Some(truncate_field(filename.to_string())), None)
117    };
118
119    let mut file_references = Vec::new();
120    let mut count = 0usize;
121
122    for line in content.lines() {
123        count += 1;
124        if count > MAX_ITERATION_COUNT {
125            warn!("parse_debian_file_list: exceeded MAX_ITERATION_COUNT lines, stopping");
126            break;
127        }
128        let line = line.trim();
129        if line.is_empty() || line.starts_with('#') {
130            continue;
131        }
132
133        let (md5sum, path) = if let Some((hash, p)) = line.split_once(' ') {
134            (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
135        } else {
136            (None, line)
137        };
138
139        if IGNORED_ROOT_DIRS.contains(&path) {
140            continue;
141        }
142
143        file_references.push(FileReference {
144            path: path.to_string(),
145            size: None,
146            sha1: None,
147            md5: md5sum,
148            sha256: None,
149            sha512: None,
150            extra_data: None,
151        });
152    }
153
154    if file_references.is_empty() {
155        return default_package_data(datasource_id);
156    }
157
158    let namespace = Some("debian".to_string());
159    let mut package = PackageData {
160        datasource_id: Some(datasource_id),
161        package_type: Some(PACKAGE_TYPE),
162        namespace: namespace.clone(),
163        name: name.clone(),
164        file_references,
165        ..Default::default()
166    };
167
168    if let Some(n) = &name {
169        package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
170    }
171
172    package
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use crate::models::DatasourceId;
179    use std::path::PathBuf;
180
181    #[test]
182    fn test_list_parser_is_match() {
183        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
184            "/var/lib/dpkg/info/bash.list"
185        )));
186        assert!(DebianInstalledListParser::is_match(&PathBuf::from(
187            "/var/lib/dpkg/info/package:amd64.list"
188        )));
189        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
190            "bash.list"
191        )));
192        assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
193            "/var/lib/dpkg/info/bash.md5sums"
194        )));
195    }
196
197    #[test]
198    fn test_md5sums_parser_is_match() {
199        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
200            "/var/lib/dpkg/info/bash.md5sums"
201        )));
202        assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
203            "/var/lib/dpkg/info/package:amd64.md5sums"
204        )));
205        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
206            "bash.md5sums"
207        )));
208        assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
209            "/var/lib/dpkg/info/bash.list"
210        )));
211    }
212
213    #[test]
214    fn test_parse_debian_file_list_plain_list() {
215        let content = "/.
216/bin
217/bin/bash
218/usr/bin/bashbug
219/usr/share/doc/bash/README
220";
221        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
222        assert_eq!(pkg.name, Some("bash".to_string()));
223        assert_eq!(pkg.file_references.len(), 3);
224        assert_eq!(pkg.file_references[0].path, "/bin/bash");
225        assert_eq!(pkg.file_references[0].md5, None);
226        assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
227        assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
228    }
229
230    #[test]
231    fn test_parse_debian_file_list_md5sums() {
232        let content = "77506afebd3b7e19e937a678a185b62e  bin/bash
2331c77d2031971b4e4c512ac952102cd85  usr/bin/bashbug
234f55e3a16959b0bb8915cb5f219521c80  usr/share/doc/bash/COMPAT.gz
235";
236        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
237        assert_eq!(pkg.name, Some("bash".to_string()));
238        assert_eq!(pkg.file_references.len(), 3);
239        assert_eq!(pkg.file_references[0].path, "bin/bash");
240        assert_eq!(
241            pkg.file_references[0].md5,
242            Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
243        );
244        assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
245        assert_eq!(
246            pkg.file_references[1].md5,
247            Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
248        );
249    }
250
251    #[test]
252    fn test_parse_debian_file_list_with_arch() {
253        let content = "/usr/bin/foo
254/usr/lib/x86_64-linux-gnu/libfoo.so
255";
256        let pkg = parse_debian_file_list(
257            content,
258            "libfoo:amd64",
259            DatasourceId::DebianInstalledFilesList,
260        );
261        assert_eq!(pkg.name, Some("libfoo".to_string()));
262        assert!(pkg.purl.is_some());
263        assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
264        assert_eq!(pkg.file_references.len(), 2);
265    }
266
267    #[test]
268    fn test_parse_debian_file_list_skips_comments_and_empty() {
269        let content = "# This is a comment
270/bin/bash
271
272/usr/bin/bashbug
273  
274";
275        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
276        assert_eq!(pkg.file_references.len(), 2);
277    }
278
279    #[test]
280    fn test_parse_debian_file_list_md5sums_only() {
281        let content = "abc123  usr/bin/tool
282";
283        let pkg =
284            parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
285        assert_eq!(pkg.name, None);
286        assert_eq!(pkg.file_references.len(), 1);
287    }
288
289    #[test]
290    fn test_parse_debian_file_list_ignores_root_dirs() {
291        let content = "/.
292/bin
293/bin/bash
294/etc
295/usr
296/var
297";
298        let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
299        assert_eq!(pkg.file_references.len(), 1);
300        assert_eq!(pkg.file_references[0].path, "/bin/bash");
301    }
302}