Skip to main content

provenant/parsers/
rpm_db.rs

1//! Parser for RPM database files.
2//!
3//! Extracts installed package metadata from the RPM database maintained by the
4//! system package manager, typically located in /var/lib/rpm/.
5//!
6//! # Supported Formats
7//! - /var/lib/rpm/Packages (BerkleyDB format or SQLite - raw database file)
8//! - Other RPM database index files
9//!
10//! # Key Features
11//! - Installed package metadata extraction from system RPM database
12//! - Database format detection (BDB vs SQLite)
13//! - Multi-version package support
14//! - Package URL (purl) generation with architecture namespace
15//!
16//! # Implementation Notes
17//! - Direct parsing of RPM database files (not via rpm CLI)
18//! - Database location detection (/var/lib/rpm/Packages or variants)
19//! - Graceful error handling for unreadable or corrupted databases
20//! - Returns package data for each installed package entry
21
22use std::path::Path;
23
24use log::warn;
25
26use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType};
27
28use super::PackageParser;
29use super::rpm_parser::infer_rpm_namespace;
30
31const PACKAGE_TYPE: PackageType = PackageType::Rpm;
32
33fn default_package_data(datasource_id: DatasourceId) -> PackageData {
34    PackageData {
35        package_type: Some(PACKAGE_TYPE),
36        datasource_id: Some(datasource_id),
37        ..Default::default()
38    }
39}
40
41pub struct RpmBdbDatabaseParser;
42
43impl PackageParser for RpmBdbDatabaseParser {
44    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
45
46    fn is_match(path: &Path) -> bool {
47        if cfg!(target_os = "windows") {
48            return false;
49        }
50
51        let path_str = path.to_string_lossy();
52        (path_str.ends_with("/Packages") || path_str.contains("/var/lib/rpm/Packages"))
53            && !path_str.ends_with(".db")
54    }
55
56    fn extract_packages(path: &Path) -> Vec<PackageData> {
57        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseBdb) {
58            Ok(pkgs) if !pkgs.is_empty() => pkgs,
59            Ok(_) => vec![default_package_data(DatasourceId::RpmInstalledDatabaseBdb)],
60            Err(e) => {
61                warn!("Failed to parse RPM BDB database {:?}: {}", path, e);
62                vec![default_package_data(DatasourceId::RpmInstalledDatabaseBdb)]
63            }
64        }
65    }
66}
67
68pub struct RpmNdbDatabaseParser;
69
70impl PackageParser for RpmNdbDatabaseParser {
71    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73    fn is_match(path: &Path) -> bool {
74        if cfg!(target_os = "windows") {
75            return false;
76        }
77
78        let path_str = path.to_string_lossy();
79        path_str.ends_with("/Packages.db") || path_str.contains("usr/lib/sysimage/rpm/Packages.db")
80    }
81
82    fn extract_packages(path: &Path) -> Vec<PackageData> {
83        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseNdb) {
84            Ok(pkgs) if !pkgs.is_empty() => pkgs,
85            Ok(_) => vec![default_package_data(DatasourceId::RpmInstalledDatabaseNdb)],
86            Err(e) => {
87                warn!("Failed to parse RPM NDB database {:?}: {}", path, e);
88                vec![default_package_data(DatasourceId::RpmInstalledDatabaseNdb)]
89            }
90        }
91    }
92}
93
94pub struct RpmSqliteDatabaseParser;
95
96impl PackageParser for RpmSqliteDatabaseParser {
97    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
98
99    fn is_match(path: &Path) -> bool {
100        if cfg!(target_os = "windows") {
101            return false;
102        }
103
104        let path_str = path.to_string_lossy();
105        path_str.ends_with("/rpmdb.sqlite") || path_str.contains("rpm/rpmdb.sqlite")
106    }
107
108    fn extract_packages(path: &Path) -> Vec<PackageData> {
109        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseSqlite) {
110            Ok(pkgs) if !pkgs.is_empty() => pkgs,
111            Ok(_) => vec![default_package_data(
112                DatasourceId::RpmInstalledDatabaseSqlite,
113            )],
114            Err(e) => {
115                warn!("Failed to parse RPM SQLite database {:?}: {}", path, e);
116                vec![default_package_data(
117                    DatasourceId::RpmInstalledDatabaseSqlite,
118                )]
119            }
120        }
121    }
122}
123
124fn parse_rpm_database(
125    path: &Path,
126    datasource_id: DatasourceId,
127) -> Result<Vec<PackageData>, String> {
128    #[cfg(unix)]
129    match rpmdb::read_packages(path.to_path_buf()) {
130        Ok(packages) => Ok(packages
131            .into_iter()
132            .map(|pkg| {
133                let name = if pkg.name.is_empty() {
134                    None
135                } else {
136                    Some(pkg.name.clone())
137                };
138
139                let version = build_evr_version(pkg.epoch, &pkg.version, &pkg.release);
140
141                let namespace = infer_rpm_namespace(
142                    None,
143                    (!pkg.vendor.is_empty()).then_some(pkg.vendor.as_str()),
144                    Some(pkg.release.as_str()),
145                    None,
146                );
147
148                let architecture = if pkg.arch.is_empty() {
149                    None
150                } else {
151                    Some(pkg.arch.clone())
152                };
153
154                let dependencies = pkg
155                    .requires
156                    .iter()
157                    .filter(|r| {
158                        !r.is_empty() && !r.starts_with("rpmlib(") && !r.starts_with("config(")
159                    })
160                    .map(|require| {
161                        use packageurl::PackageUrl;
162                        let purl = PackageUrl::new(PACKAGE_TYPE.as_str(), require)
163                            .ok()
164                            .map(|p| p.to_string());
165
166                        Dependency {
167                            purl,
168                            extracted_requirement: None,
169                            scope: Some("requires".to_string()),
170                            is_runtime: Some(true),
171                            is_optional: Some(false),
172                            is_pinned: Some(false),
173                            is_direct: Some(true),
174                            resolved_package: None,
175                            extra_data: None,
176                        }
177                    })
178                    .collect();
179
180                let extracted_license_statement = if pkg.license.is_empty() {
181                    None
182                } else {
183                    Some(pkg.license)
184                };
185
186                let purl = name.as_ref().and_then(|n| {
187                    use packageurl::PackageUrl;
188                    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), n).ok()?;
189
190                    if let Some(ns) = &namespace {
191                        purl.with_namespace(ns).ok()?;
192                    }
193
194                    if let Some(ver) = &version {
195                        purl.with_version(ver).ok()?;
196                    }
197
198                    if let Some(arch) = &architecture {
199                        purl.add_qualifier("arch", arch).ok()?;
200                    }
201
202                    Some(purl.to_string())
203                });
204
205                PackageData {
206                    datasource_id: Some(datasource_id),
207                    package_type: Some(PACKAGE_TYPE),
208                    namespace,
209                    name,
210                    version,
211                    qualifiers: architecture.as_ref().map(|arch| {
212                        let mut q = std::collections::HashMap::new();
213                        q.insert("arch".to_string(), arch.clone());
214                        q
215                    }),
216                    subpath: None,
217                    primary_language: None,
218                    description: None,
219                    release_date: None,
220                    parties: Vec::new(),
221                    keywords: Vec::new(),
222                    homepage_url: None,
223                    download_url: None,
224                    size: if pkg.size > 0 {
225                        Some(pkg.size as u64)
226                    } else {
227                        None
228                    },
229                    sha1: None,
230                    md5: None,
231                    sha256: None,
232                    sha512: None,
233                    bug_tracking_url: None,
234                    code_view_url: None,
235                    vcs_url: None,
236                    copyright: None,
237                    holder: None,
238                    declared_license_expression: None,
239                    declared_license_expression_spdx: None,
240                    license_detections: Vec::new(),
241                    other_license_expression: None,
242                    other_license_expression_spdx: None,
243                    other_license_detections: Vec::new(),
244                    extracted_license_statement,
245                    notice_text: None,
246                    source_packages: if pkg.source_rpm.is_empty() {
247                        Vec::new()
248                    } else {
249                        vec![pkg.source_rpm]
250                    },
251                    file_references: build_file_references(
252                        &pkg.base_names,
253                        &pkg.dir_indexes,
254                        &pkg.dir_names,
255                    ),
256                    is_private: false,
257                    is_virtual: false,
258                    extra_data: None,
259                    dependencies,
260                    repository_homepage_url: None,
261                    repository_download_url: None,
262                    api_data_url: None,
263                    purl,
264                }
265            })
266            .collect()),
267        Err(e) => Err(format!("Failed to read RPM database: {:?}", e)),
268    }
269
270    #[cfg(not(unix))]
271    {
272        let _ = (path, datasource_id);
273        Err(format!(
274            "RPM database parsing is only supported on Unix targets (current target: {})",
275            std::env::consts::OS
276        ))
277    }
278}
279
280fn build_evr_version(epoch: i32, version: &str, release: &str) -> Option<String> {
281    if version.is_empty() {
282        return None;
283    }
284
285    let mut evr = String::new();
286
287    if epoch > 0 {
288        evr.push_str(&format!("{}:", epoch));
289    }
290
291    evr.push_str(version);
292
293    if !release.is_empty() {
294        evr.push('-');
295        evr.push_str(release);
296    }
297
298    Some(evr)
299}
300
301fn build_file_references(
302    base_names: &[String],
303    dir_indexes: &[i32],
304    dir_names: &[String],
305) -> Vec<FileReference> {
306    if base_names.is_empty() || dir_names.is_empty() {
307        return Vec::new();
308    }
309
310    base_names
311        .iter()
312        .zip(dir_indexes.iter())
313        .filter_map(|(basename, &dir_idx)| {
314            let dirname = dir_names.get(dir_idx as usize)?;
315            let path = format!("{}{}", dirname, basename);
316            if path.is_empty() || path == "/" {
317                return None;
318            }
319            Some(FileReference {
320                path,
321                size: None,
322                sha1: None,
323                md5: None,
324                sha256: None,
325                sha512: None,
326                extra_data: None,
327            })
328        })
329        .collect()
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335    use crate::models::DatasourceId;
336    use std::path::PathBuf;
337
338    #[test]
339    fn test_bdb_parser_is_match() {
340        assert!(RpmBdbDatabaseParser::is_match(&PathBuf::from(
341            "/var/lib/rpm/Packages"
342        )));
343        assert!(RpmBdbDatabaseParser::is_match(&PathBuf::from(
344            "rootfs/var/lib/rpm/Packages"
345        )));
346        assert!(!RpmBdbDatabaseParser::is_match(&PathBuf::from(
347            "/var/lib/rpm/Packages.db"
348        )));
349    }
350
351    #[test]
352    fn test_ndb_parser_is_match() {
353        assert!(RpmNdbDatabaseParser::is_match(&PathBuf::from(
354            "usr/lib/sysimage/rpm/Packages.db"
355        )));
356        assert!(RpmNdbDatabaseParser::is_match(&PathBuf::from(
357            "/rootfs/usr/lib/sysimage/rpm/Packages.db"
358        )));
359        assert!(!RpmNdbDatabaseParser::is_match(&PathBuf::from(
360            "usr/lib/rpm/Packages"
361        )));
362    }
363
364    #[test]
365    fn test_sqlite_parser_is_match() {
366        assert!(RpmSqliteDatabaseParser::is_match(&PathBuf::from(
367            "var/lib/rpm/rpmdb.sqlite"
368        )));
369        assert!(RpmSqliteDatabaseParser::is_match(&PathBuf::from(
370            "/rootfs/var/lib/rpm/rpmdb.sqlite"
371        )));
372        assert!(!RpmSqliteDatabaseParser::is_match(&PathBuf::from(
373            "/var/lib/rpm/Packages"
374        )));
375    }
376
377    #[test]
378    fn test_build_evr_version_full() {
379        assert_eq!(
380            build_evr_version(2, "1.0.0", "1.el7"),
381            Some("2:1.0.0-1.el7".to_string())
382        );
383    }
384
385    #[test]
386    fn test_build_evr_version_no_epoch() {
387        assert_eq!(
388            build_evr_version(0, "1.0.0", "1.el7"),
389            Some("1.0.0-1.el7".to_string())
390        );
391    }
392
393    #[test]
394    fn test_build_evr_version_no_release() {
395        assert_eq!(build_evr_version(0, "1.0.0", ""), Some("1.0.0".to_string()));
396    }
397
398    #[test]
399    fn test_build_evr_version_empty() {
400        assert_eq!(build_evr_version(0, "", ""), None);
401    }
402
403    #[cfg(unix)]
404    #[test]
405    fn test_parse_rpm_database_sqlite() {
406        let test_file = PathBuf::from("testdata/rpm/rpmdb.sqlite");
407
408        let pkg = RpmSqliteDatabaseParser::extract_first_package(&test_file);
409
410        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
411        assert_eq!(
412            pkg.datasource_id,
413            Some(DatasourceId::RpmInstalledDatabaseSqlite)
414        );
415        assert!(pkg.name.is_some());
416    }
417
418    #[cfg(unix)]
419    #[test]
420    fn test_parse_rpm_database_sqlite_preserves_release_in_version() {
421        let test_file = PathBuf::from("testdata/rpm/rpmdb.sqlite");
422
423        let pkg = RpmSqliteDatabaseParser::extract_first_package(&test_file);
424
425        assert!(
426            pkg.version
427                .as_ref()
428                .is_some_and(|version| version.contains('-'))
429        );
430    }
431
432    #[test]
433    fn test_build_file_references_skips_invalid_entries() {
434        let file_refs = build_file_references(
435            &["valid".to_string(), "".to_string(), "ignored".to_string()],
436            &[0, 0, -1],
437            &["/usr/bin/".to_string()],
438        );
439
440        assert_eq!(file_refs.len(), 2);
441        assert_eq!(file_refs[0].path, "/usr/bin/valid");
442        assert_eq!(file_refs[1].path, "/usr/bin/");
443    }
444}
445
446#[cfg(unix)]
447crate::register_parser!(
448    "RPM installed package database",
449    &[
450        "**/var/lib/rpm/Packages",
451        "**/var/lib/rpm/Packages.db",
452        "**/var/lib/rpm/rpmdb.sqlite"
453    ],
454    "rpm",
455    "",
456    Some("https://rpm.org/"),
457);