Skip to main content

provenant/parsers/
rpm_db.rs

1//! Parser for RPM database files.
2//!
3//! Extracts installed package metadata from the RPM database maintained by the
4//! system package manager, typically located in /var/lib/rpm/.
5//!
6//! # Supported Formats
7//! - /var/lib/rpm/Packages (BerkleyDB format or SQLite - raw database file)
8//! - Other RPM database index files
9//!
10//! # Key Features
11//! - Installed package metadata extraction from system RPM database
12//! - Database format detection (BDB vs SQLite)
13//! - Multi-version package support
14//! - Package URL (purl) generation with architecture namespace
15//!
16//! # Implementation Notes
17//! - Direct parsing of RPM database files (not via rpm CLI)
18//! - Database location detection (/var/lib/rpm/Packages or variants)
19//! - Graceful error handling for unreadable or corrupted databases
20//! - Returns package data for each installed package entry
21
22use std::path::Path;
23
24use log::warn;
25
26use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType};
27
28use super::PackageParser;
29use super::rpm_parser::infer_rpm_namespace;
30
31const PACKAGE_TYPE: PackageType = PackageType::Rpm;
32
33fn default_package_data(datasource_id: DatasourceId) -> PackageData {
34    PackageData {
35        package_type: Some(PACKAGE_TYPE),
36        datasource_id: Some(datasource_id),
37        ..Default::default()
38    }
39}
40
41pub struct RpmBdbDatabaseParser;
42
43impl PackageParser for RpmBdbDatabaseParser {
44    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
45
46    fn is_match(path: &Path) -> bool {
47        let path_str = path.to_string_lossy();
48        (path_str.ends_with("/Packages") || path_str.contains("/var/lib/rpm/Packages"))
49            && !path_str.ends_with(".db")
50    }
51
52    fn extract_packages(path: &Path) -> Vec<PackageData> {
53        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseBdb) {
54            Ok(pkgs) if !pkgs.is_empty() => pkgs,
55            Ok(_) => vec![default_package_data(DatasourceId::RpmInstalledDatabaseBdb)],
56            Err(e) => {
57                warn!("Failed to parse RPM BDB database {:?}: {}", path, e);
58                vec![default_package_data(DatasourceId::RpmInstalledDatabaseBdb)]
59            }
60        }
61    }
62}
63
64pub struct RpmNdbDatabaseParser;
65
66impl PackageParser for RpmNdbDatabaseParser {
67    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
68
69    fn is_match(path: &Path) -> bool {
70        let path_str = path.to_string_lossy();
71        path_str.ends_with("/Packages.db") || path_str.contains("usr/lib/sysimage/rpm/Packages.db")
72    }
73
74    fn extract_packages(path: &Path) -> Vec<PackageData> {
75        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseNdb) {
76            Ok(pkgs) if !pkgs.is_empty() => pkgs,
77            Ok(_) => vec![default_package_data(DatasourceId::RpmInstalledDatabaseNdb)],
78            Err(e) => {
79                warn!("Failed to parse RPM NDB database {:?}: {}", path, e);
80                vec![default_package_data(DatasourceId::RpmInstalledDatabaseNdb)]
81            }
82        }
83    }
84}
85
86pub struct RpmSqliteDatabaseParser;
87
88impl PackageParser for RpmSqliteDatabaseParser {
89    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
90
91    fn is_match(path: &Path) -> bool {
92        let path_str = path.to_string_lossy();
93        path_str.ends_with("/rpmdb.sqlite") || path_str.contains("rpm/rpmdb.sqlite")
94    }
95
96    fn extract_packages(path: &Path) -> Vec<PackageData> {
97        match parse_rpm_database(path, DatasourceId::RpmInstalledDatabaseSqlite) {
98            Ok(pkgs) if !pkgs.is_empty() => pkgs,
99            Ok(_) => vec![default_package_data(
100                DatasourceId::RpmInstalledDatabaseSqlite,
101            )],
102            Err(e) => {
103                warn!("Failed to parse RPM SQLite database {:?}: {}", path, e);
104                vec![default_package_data(
105                    DatasourceId::RpmInstalledDatabaseSqlite,
106                )]
107            }
108        }
109    }
110}
111
112fn parse_rpm_database(
113    path: &Path,
114    datasource_id: DatasourceId,
115) -> Result<Vec<PackageData>, String> {
116    match rpmdb::read_packages(path.to_path_buf()) {
117        Ok(packages) => Ok(packages
118            .into_iter()
119            .map(|pkg| {
120                let name = if pkg.name.is_empty() {
121                    None
122                } else {
123                    Some(pkg.name.clone())
124                };
125
126                let version = build_evr_version(pkg.epoch, &pkg.version, &pkg.release);
127
128                let namespace = infer_rpm_namespace(
129                    None,
130                    (!pkg.vendor.is_empty()).then_some(pkg.vendor.as_str()),
131                    Some(pkg.release.as_str()),
132                    None,
133                );
134
135                let architecture = if pkg.arch.is_empty() {
136                    None
137                } else {
138                    Some(pkg.arch.clone())
139                };
140
141                let dependencies = pkg
142                    .requires
143                    .iter()
144                    .filter(|r| {
145                        !r.is_empty() && !r.starts_with("rpmlib(") && !r.starts_with("config(")
146                    })
147                    .map(|require| {
148                        use packageurl::PackageUrl;
149                        let purl = PackageUrl::new(PACKAGE_TYPE.as_str(), require)
150                            .ok()
151                            .map(|p| p.to_string());
152
153                        Dependency {
154                            purl,
155                            extracted_requirement: None,
156                            scope: Some("requires".to_string()),
157                            is_runtime: Some(true),
158                            is_optional: Some(false),
159                            is_pinned: Some(false),
160                            is_direct: Some(true),
161                            resolved_package: None,
162                            extra_data: None,
163                        }
164                    })
165                    .collect();
166
167                let extracted_license_statement = if pkg.license.is_empty() {
168                    None
169                } else {
170                    Some(pkg.license)
171                };
172
173                let purl = name.as_ref().and_then(|n| {
174                    use packageurl::PackageUrl;
175                    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), n).ok()?;
176
177                    if let Some(ns) = &namespace {
178                        purl.with_namespace(ns).ok()?;
179                    }
180
181                    if let Some(ver) = &version {
182                        purl.with_version(ver).ok()?;
183                    }
184
185                    if let Some(arch) = &architecture {
186                        purl.add_qualifier("arch", arch).ok()?;
187                    }
188
189                    Some(purl.to_string())
190                });
191
192                PackageData {
193                    datasource_id: Some(datasource_id),
194                    package_type: Some(PACKAGE_TYPE),
195                    namespace,
196                    name,
197                    version,
198                    qualifiers: architecture.as_ref().map(|arch| {
199                        let mut q = std::collections::HashMap::new();
200                        q.insert("arch".to_string(), arch.clone());
201                        q
202                    }),
203                    subpath: None,
204                    primary_language: None,
205                    description: None,
206                    release_date: None,
207                    parties: Vec::new(),
208                    keywords: Vec::new(),
209                    homepage_url: None,
210                    download_url: None,
211                    size: if pkg.size > 0 {
212                        Some(pkg.size as u64)
213                    } else {
214                        None
215                    },
216                    sha1: None,
217                    md5: None,
218                    sha256: None,
219                    sha512: None,
220                    bug_tracking_url: None,
221                    code_view_url: None,
222                    vcs_url: None,
223                    copyright: None,
224                    holder: None,
225                    declared_license_expression: None,
226                    declared_license_expression_spdx: None,
227                    license_detections: Vec::new(),
228                    other_license_expression: None,
229                    other_license_expression_spdx: None,
230                    other_license_detections: Vec::new(),
231                    extracted_license_statement,
232                    notice_text: None,
233                    source_packages: if pkg.source_rpm.is_empty() {
234                        Vec::new()
235                    } else {
236                        vec![pkg.source_rpm]
237                    },
238                    file_references: build_file_references(
239                        &pkg.base_names,
240                        &pkg.dir_indexes,
241                        &pkg.dir_names,
242                    ),
243                    is_private: false,
244                    is_virtual: false,
245                    extra_data: None,
246                    dependencies,
247                    repository_homepage_url: None,
248                    repository_download_url: None,
249                    api_data_url: None,
250                    purl,
251                }
252            })
253            .collect()),
254        Err(e) => Err(format!("Failed to read RPM database: {:?}", e)),
255    }
256}
257
258fn build_evr_version(epoch: i32, version: &str, release: &str) -> Option<String> {
259    if version.is_empty() {
260        return None;
261    }
262
263    let mut evr = String::new();
264
265    if epoch > 0 {
266        evr.push_str(&format!("{}:", epoch));
267    }
268
269    evr.push_str(version);
270
271    if !release.is_empty() {
272        evr.push('-');
273        evr.push_str(release);
274    }
275
276    Some(evr)
277}
278
279fn build_file_references(
280    base_names: &[String],
281    dir_indexes: &[i32],
282    dir_names: &[String],
283) -> Vec<FileReference> {
284    if base_names.is_empty() || dir_names.is_empty() {
285        return Vec::new();
286    }
287
288    base_names
289        .iter()
290        .zip(dir_indexes.iter())
291        .filter_map(|(basename, &dir_idx)| {
292            let dirname = dir_names.get(dir_idx as usize)?;
293            let path = format!("{}{}", dirname, basename);
294            if path.is_empty() || path == "/" {
295                return None;
296            }
297            Some(FileReference {
298                path,
299                size: None,
300                sha1: None,
301                md5: None,
302                sha256: None,
303                sha512: None,
304                extra_data: None,
305            })
306        })
307        .collect()
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use crate::models::DatasourceId;
314    use std::path::PathBuf;
315
316    #[test]
317    fn test_bdb_parser_is_match() {
318        assert!(RpmBdbDatabaseParser::is_match(&PathBuf::from(
319            "/var/lib/rpm/Packages"
320        )));
321        assert!(RpmBdbDatabaseParser::is_match(&PathBuf::from(
322            "rootfs/var/lib/rpm/Packages"
323        )));
324        assert!(!RpmBdbDatabaseParser::is_match(&PathBuf::from(
325            "/var/lib/rpm/Packages.db"
326        )));
327    }
328
329    #[test]
330    fn test_ndb_parser_is_match() {
331        assert!(RpmNdbDatabaseParser::is_match(&PathBuf::from(
332            "usr/lib/sysimage/rpm/Packages.db"
333        )));
334        assert!(RpmNdbDatabaseParser::is_match(&PathBuf::from(
335            "/rootfs/usr/lib/sysimage/rpm/Packages.db"
336        )));
337        assert!(!RpmNdbDatabaseParser::is_match(&PathBuf::from(
338            "usr/lib/rpm/Packages"
339        )));
340    }
341
342    #[test]
343    fn test_sqlite_parser_is_match() {
344        assert!(RpmSqliteDatabaseParser::is_match(&PathBuf::from(
345            "var/lib/rpm/rpmdb.sqlite"
346        )));
347        assert!(RpmSqliteDatabaseParser::is_match(&PathBuf::from(
348            "/rootfs/var/lib/rpm/rpmdb.sqlite"
349        )));
350        assert!(!RpmSqliteDatabaseParser::is_match(&PathBuf::from(
351            "/var/lib/rpm/Packages"
352        )));
353    }
354
355    #[test]
356    fn test_build_evr_version_full() {
357        assert_eq!(
358            build_evr_version(2, "1.0.0", "1.el7"),
359            Some("2:1.0.0-1.el7".to_string())
360        );
361    }
362
363    #[test]
364    fn test_build_evr_version_no_epoch() {
365        assert_eq!(
366            build_evr_version(0, "1.0.0", "1.el7"),
367            Some("1.0.0-1.el7".to_string())
368        );
369    }
370
371    #[test]
372    fn test_build_evr_version_no_release() {
373        assert_eq!(build_evr_version(0, "1.0.0", ""), Some("1.0.0".to_string()));
374    }
375
376    #[test]
377    fn test_build_evr_version_empty() {
378        assert_eq!(build_evr_version(0, "", ""), None);
379    }
380
381    #[test]
382    fn test_parse_rpm_database_sqlite() {
383        let test_file = PathBuf::from("testdata/rpm/rpmdb.sqlite");
384
385        let pkg = RpmSqliteDatabaseParser::extract_first_package(&test_file);
386
387        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
388        assert_eq!(
389            pkg.datasource_id,
390            Some(DatasourceId::RpmInstalledDatabaseSqlite)
391        );
392        assert!(pkg.name.is_some());
393    }
394
395    #[test]
396    fn test_parse_rpm_database_sqlite_preserves_release_in_version() {
397        let test_file = PathBuf::from("testdata/rpm/rpmdb.sqlite");
398
399        let pkg = RpmSqliteDatabaseParser::extract_first_package(&test_file);
400
401        assert!(
402            pkg.version
403                .as_ref()
404                .is_some_and(|version| version.contains('-'))
405        );
406    }
407
408    #[test]
409    fn test_build_file_references_skips_invalid_entries() {
410        let file_refs = build_file_references(
411            &["valid".to_string(), "".to_string(), "ignored".to_string()],
412            &[0, 0, -1],
413            &["/usr/bin/".to_string()],
414        );
415
416        assert_eq!(file_refs.len(), 2);
417        assert_eq!(file_refs[0].path, "/usr/bin/valid");
418        assert_eq!(file_refs[1].path, "/usr/bin/");
419    }
420}
421
422crate::register_parser!(
423    "RPM installed package database",
424    &[
425        "**/var/lib/rpm/Packages",
426        "**/var/lib/rpm/Packages.db",
427        "**/var/lib/rpm/rpmdb.sqlite"
428    ],
429    "rpm",
430    "",
431    Some("https://rpm.org/"),
432);