provenant/parsers/debian/
file_list.rs1use std::path::Path;
5
6use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
9
10use super::super::metadata::ParserMetadata;
11use super::utils::build_debian_purl;
12use super::{IGNORED_ROOT_DIRS, PACKAGE_TYPE, default_package_data, read_or_default};
13use crate::parsers::PackageParser;
14
15pub struct DebianInstalledListParser;
17
18impl PackageParser for DebianInstalledListParser {
19 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
20
21 fn metadata() -> Vec<ParserMetadata> {
22 vec![ParserMetadata {
23 description: "Debian installed files list",
24 file_patterns: &["**/var/lib/dpkg/info/*.list"],
25 package_type: "deb",
26 primary_language: "",
27 documentation_url: Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
28 }]
29 }
30
31 fn is_match(path: &Path) -> bool {
32 path.extension().and_then(|e| e.to_str()) == Some("list")
33 && path
34 .to_str()
35 .map(|p| p.contains("/var/lib/dpkg/info/"))
36 .unwrap_or(false)
37 }
38
39 fn extract_packages(path: &Path) -> Vec<PackageData> {
40 let filename = match path.file_stem().and_then(|s| s.to_str()) {
41 Some(f) => f,
42 None => {
43 return vec![default_package_data(DatasourceId::DebianInstalledFilesList)];
44 }
45 };
46
47 let content = read_or_default!(path, ".list file", DatasourceId::DebianInstalledFilesList);
48
49 vec![parse_debian_file_list(
50 &content,
51 filename,
52 DatasourceId::DebianInstalledFilesList,
53 )]
54 }
55}
56
57pub struct DebianInstalledMd5sumsParser;
59
60impl PackageParser for DebianInstalledMd5sumsParser {
61 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
62
63 fn metadata() -> Vec<ParserMetadata> {
64 vec![ParserMetadata {
65 description: "Debian installed package md5sums",
66 file_patterns: &["**/var/lib/dpkg/info/*.md5sums"],
67 package_type: "deb",
68 primary_language: "",
69 documentation_url: Some("https://www.debian.org/doc/debian-policy/ch-files.html"),
70 }]
71 }
72
73 fn is_match(path: &Path) -> bool {
74 path.extension().and_then(|e| e.to_str()) == Some("md5sums")
75 && path
76 .to_str()
77 .map(|p| p.contains("/var/lib/dpkg/info/"))
78 .unwrap_or(false)
79 }
80
81 fn extract_packages(path: &Path) -> Vec<PackageData> {
82 let filename = match path.file_stem().and_then(|s| s.to_str()) {
83 Some(f) => f,
84 None => {
85 return vec![default_package_data(DatasourceId::DebianInstalledMd5Sums)];
86 }
87 };
88
89 let content = read_or_default!(path, ".md5sums file", DatasourceId::DebianInstalledMd5Sums);
90
91 vec![parse_debian_file_list(
92 &content,
93 filename,
94 DatasourceId::DebianInstalledMd5Sums,
95 )]
96 }
97}
98
99pub(crate) fn parse_file_entries(content: &str, log_label: &str) -> Vec<FileReference> {
100 let mut file_references = Vec::new();
101 let mut count = 0usize;
102
103 for line in content.lines() {
104 count += 1;
105 if count > MAX_ITERATION_COUNT {
106 warn!("{log_label}: exceeded MAX_ITERATION_COUNT lines, stopping");
107 break;
108 }
109 let line = line.trim();
110 if line.is_empty() || line.starts_with('#') {
111 continue;
112 }
113
114 let (md5sum, path): (Option<Md5Digest>, &str) = if let Some(idx) = line.find(" ") {
115 (
116 Md5Digest::from_hex(line[..idx].trim()).ok(),
117 line[idx + 2..].trim(),
118 )
119 } else if let Some((hash, p)) = line.split_once(' ') {
120 (Md5Digest::from_hex(hash.trim()).ok(), p.trim())
121 } else {
122 (None, line)
123 };
124
125 if IGNORED_ROOT_DIRS.contains(&path) {
126 continue;
127 }
128
129 file_references.push(FileReference {
130 path: path.to_string(),
131 size: None,
132 sha1: None,
133 md5: md5sum,
134 sha256: None,
135 sha512: None,
136 extra_data: None,
137 });
138 }
139
140 file_references
141}
142
143fn parse_debian_file_list(
144 content: &str,
145 filename: &str,
146 datasource_id: DatasourceId,
147) -> PackageData {
148 let (name, arch_qualifier) = if let Some((pkg, arch)) = filename.split_once(':') {
149 (
150 Some(truncate_field(pkg.to_string())),
151 Some(arch.to_string()),
152 )
153 } else if filename == "md5sums" {
154 (None, None)
155 } else {
156 (Some(truncate_field(filename.to_string())), None)
157 };
158
159 let file_references = parse_file_entries(content, "parse_debian_file_list");
160
161 if file_references.is_empty() {
162 return default_package_data(datasource_id);
163 }
164
165 let namespace = Some("debian".to_string());
166 let mut package = PackageData {
167 datasource_id: Some(datasource_id),
168 package_type: Some(PACKAGE_TYPE),
169 namespace: namespace.clone(),
170 name: name.clone(),
171 file_references,
172 ..Default::default()
173 };
174
175 if let Some(n) = &name {
176 package.purl = build_debian_purl(n, None, namespace.as_deref(), arch_qualifier.as_deref());
177 }
178
179 package
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185 use crate::models::DatasourceId;
186 use std::path::PathBuf;
187
188 #[test]
189 fn test_list_parser_is_match() {
190 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
191 "/var/lib/dpkg/info/bash.list"
192 )));
193 assert!(DebianInstalledListParser::is_match(&PathBuf::from(
194 "/var/lib/dpkg/info/package:amd64.list"
195 )));
196 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
197 "bash.list"
198 )));
199 assert!(!DebianInstalledListParser::is_match(&PathBuf::from(
200 "/var/lib/dpkg/info/bash.md5sums"
201 )));
202 }
203
204 #[test]
205 fn test_md5sums_parser_is_match() {
206 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
207 "/var/lib/dpkg/info/bash.md5sums"
208 )));
209 assert!(DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
210 "/var/lib/dpkg/info/package:amd64.md5sums"
211 )));
212 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
213 "bash.md5sums"
214 )));
215 assert!(!DebianInstalledMd5sumsParser::is_match(&PathBuf::from(
216 "/var/lib/dpkg/info/bash.list"
217 )));
218 }
219
220 #[test]
221 fn test_parse_debian_file_list_plain_list() {
222 let content = "/.
223/bin
224/bin/bash
225/usr/bin/bashbug
226/usr/share/doc/bash/README
227";
228 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
229 assert_eq!(pkg.name, Some("bash".to_string()));
230 assert_eq!(pkg.file_references.len(), 3);
231 assert_eq!(pkg.file_references[0].path, "/bin/bash");
232 assert_eq!(pkg.file_references[0].md5, None);
233 assert_eq!(pkg.file_references[1].path, "/usr/bin/bashbug");
234 assert_eq!(pkg.file_references[2].path, "/usr/share/doc/bash/README");
235 }
236
237 #[test]
238 fn test_parse_debian_file_list_md5sums() {
239 let content = "77506afebd3b7e19e937a678a185b62e bin/bash
2401c77d2031971b4e4c512ac952102cd85 usr/bin/bashbug
241f55e3a16959b0bb8915cb5f219521c80 usr/share/doc/bash/COMPAT.gz
242";
243 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
244 assert_eq!(pkg.name, Some("bash".to_string()));
245 assert_eq!(pkg.file_references.len(), 3);
246 assert_eq!(pkg.file_references[0].path, "bin/bash");
247 assert_eq!(
248 pkg.file_references[0].md5,
249 Some(Md5Digest::from_hex("77506afebd3b7e19e937a678a185b62e").unwrap())
250 );
251 assert_eq!(pkg.file_references[1].path, "usr/bin/bashbug");
252 assert_eq!(
253 pkg.file_references[1].md5,
254 Some(Md5Digest::from_hex("1c77d2031971b4e4c512ac952102cd85").unwrap())
255 );
256 }
257
258 #[test]
259 fn test_parse_debian_file_list_with_arch() {
260 let content = "/usr/bin/foo
261/usr/lib/x86_64-linux-gnu/libfoo.so
262";
263 let pkg = parse_debian_file_list(
264 content,
265 "libfoo:amd64",
266 DatasourceId::DebianInstalledFilesList,
267 );
268 assert_eq!(pkg.name, Some("libfoo".to_string()));
269 assert!(pkg.purl.is_some());
270 assert!(pkg.purl.as_ref().unwrap().contains("arch=amd64"));
271 assert_eq!(pkg.file_references.len(), 2);
272 }
273
274 #[test]
275 fn test_parse_debian_file_list_skips_comments_and_empty() {
276 let content = "# This is a comment
277/bin/bash
278
279/usr/bin/bashbug
280
281";
282 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
283 assert_eq!(pkg.file_references.len(), 2);
284 }
285
286 #[test]
287 fn test_parse_debian_file_list_md5sums_only() {
288 let content = "abc123 usr/bin/tool
289";
290 let pkg =
291 parse_debian_file_list(content, "md5sums", DatasourceId::DebianInstalledFilesList);
292 assert_eq!(pkg.name, None);
293 assert_eq!(pkg.file_references.len(), 1);
294 }
295
296 #[test]
297 fn test_parse_debian_file_list_ignores_root_dirs() {
298 let content = "/.
299/bin
300/bin/bash
301/etc
302/usr
303/var
304";
305 let pkg = parse_debian_file_list(content, "bash", DatasourceId::DebianInstalledFilesList);
306 assert_eq!(pkg.file_references.len(), 1);
307 assert_eq!(pkg.file_references[0].path, "/bin/bash");
308 }
309}