Skip to main content

provenant/parsers/
rpm_parser.rs

1//! Parser for RPM package archives.
2//!
3//! Extracts package metadata and dependencies from binary RPM package (.rpm) files
4//! by reading the embedded header metadata.
5//!
6//! # Supported Formats
7//! - *.rpm (binary RPM package archives)
8//!
9//! # Key Features
10//! - Metadata extraction from RPM headers (name, version, release, architecture)
11//! - Dependency extraction (requires, provides, obsoletes)
12//! - License and distribution information parsing
13//! - Package URL (purl) generation for installed packages
14//! - Graceful handling of malformed or corrupted RPM files
15//!
16//! # Implementation Notes
17//! - Uses `rpm` crate for low-level RPM format parsing
18//! - RPM architecture is captured as namespace in metadata
19//! - Direct dependency tracking (all requires are direct)
20//! - Error handling with `warn!()` logs on parse failures
21
22use std::fs::File;
23use std::io::{BufReader, Read};
24use std::path::Path;
25
26use log::warn;
27use rpm::{IndexTag, Package, PackageMetadata, RPM_MAGIC};
28
29use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
30
31use super::PackageParser;
32
33const PACKAGE_TYPE: PackageType = PackageType::Rpm;
34
35fn default_package_data() -> PackageData {
36    PackageData {
37        package_type: Some(PACKAGE_TYPE),
38        datasource_id: Some(DatasourceId::RpmArchive),
39        ..Default::default()
40    }
41}
42
43pub(crate) fn infer_rpm_namespace(
44    distribution: Option<&str>,
45    vendor: Option<&str>,
46    release: Option<&str>,
47    dist_url: Option<&str>,
48) -> Option<String> {
49    for candidate in [distribution, vendor, dist_url].into_iter().flatten() {
50        let lower = candidate.to_ascii_lowercase();
51        if lower.contains("fedora") || lower.contains("koji") {
52            return Some("fedora".to_string());
53        }
54        if lower.contains("centos") {
55            return Some("centos".to_string());
56        }
57        if lower.contains("red hat") || lower.contains("redhat") || lower.contains("ubi") {
58            return Some("rhel".to_string());
59        }
60        if lower.contains("opensuse") {
61            return Some("opensuse".to_string());
62        }
63        if lower.contains("suse") {
64            return Some("suse".to_string());
65        }
66        if lower.contains("openmandriva") || lower.contains("mandriva") {
67            return Some("openmandriva".to_string());
68        }
69        if lower.contains("mariner") {
70            return Some("mariner".to_string());
71        }
72    }
73
74    if let Some(release) = release {
75        let lower = release.to_ascii_lowercase();
76        if lower.contains(".fc") {
77            return Some("fedora".to_string());
78        }
79        if lower.contains(".el") {
80            return Some("rhel".to_string());
81        }
82        if lower.contains("mdv") || lower.contains("mnb") {
83            return Some("openmandriva".to_string());
84        }
85        if lower.contains("suse") {
86            return Some("suse".to_string());
87        }
88    }
89
90    None
91}
92
93fn rpm_header_string(metadata: &PackageMetadata, tag: IndexTag) -> Option<String> {
94    metadata
95        .header
96        .get_entry_data_as_string(tag)
97        .ok()
98        .and_then(|value| {
99            let trimmed = value.trim();
100            if trimmed.is_empty() || trimmed == "(none)" {
101                None
102            } else {
103                Some(trimmed.to_string())
104            }
105        })
106}
107
108fn rpm_header_string_array(metadata: &PackageMetadata, tag: IndexTag) -> Option<Vec<String>> {
109    metadata
110        .header
111        .get_entry_data_as_string_array(tag)
112        .ok()
113        .map(|items| {
114            items
115                .iter()
116                .map(|item| item.trim().to_string())
117                .filter(|item| !item.is_empty() && item != "(none)")
118                .collect::<Vec<_>>()
119        })
120        .filter(|items| !items.is_empty())
121}
122
123fn infer_vcs_url(metadata: &PackageMetadata, source_urls: &[String]) -> Option<String> {
124    if let Ok(vcs) = metadata.get_vcs()
125        && !vcs.trim().is_empty()
126    {
127        return Some(vcs.to_string());
128    }
129
130    source_urls
131        .iter()
132        .find(|url| url.starts_with("git+") || url.contains("src.fedoraproject.org"))
133        .cloned()
134}
135
136fn build_rpm_qualifiers(
137    architecture: Option<&str>,
138    is_source: bool,
139) -> Option<std::collections::HashMap<String, String>> {
140    let mut qualifiers = std::collections::HashMap::new();
141
142    if let Some(arch) = architecture.filter(|arch| !arch.is_empty()) {
143        qualifiers.insert("arch".to_string(), arch.to_string());
144    }
145
146    if is_source {
147        qualifiers.insert("source".to_string(), "true".to_string());
148    }
149
150    (!qualifiers.is_empty()).then_some(qualifiers)
151}
152
153/// Parser for RPM package archives
154pub struct RpmParser;
155
156impl PackageParser for RpmParser {
157    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
158
159    fn is_match(path: &Path) -> bool {
160        if let Some(ext) = path.extension().and_then(|e| e.to_str())
161            && matches!(ext, "rpm" | "srpm")
162        {
163            return true;
164        }
165
166        let mut file = match File::open(path) {
167            Ok(file) => file,
168            Err(_) => return false,
169        };
170        let mut magic = [0_u8; 4];
171        file.read_exact(&mut magic).is_ok() && magic == RPM_MAGIC
172    }
173
174    fn extract_packages(path: &Path) -> Vec<PackageData> {
175        let file = match File::open(path) {
176            Ok(f) => f,
177            Err(e) => {
178                warn!("Failed to open RPM file {:?}: {}", path, e);
179                return vec![default_package_data()];
180            }
181        };
182
183        let mut reader = BufReader::new(file);
184        let pkg = match Package::parse(&mut reader) {
185            Ok(p) => p,
186            Err(e) => {
187                warn!("Failed to parse RPM file {:?}: {}", path, e);
188                return vec![default_package_data()];
189            }
190        };
191
192        vec![parse_rpm_package(&pkg, path)]
193    }
194}
195
196fn infer_rpm_namespace_from_filename(path: &Path) -> Option<String> {
197    let filename = path.file_name()?.to_str()?.to_ascii_lowercase();
198
199    if filename.contains(".fc") {
200        return Some("fedora".to_string());
201    }
202    if filename.contains(".el") {
203        return Some("rhel".to_string());
204    }
205    if filename.contains("mdv") || filename.contains("mnb") {
206        return Some("openmandriva".to_string());
207    }
208    if filename.contains("opensuse") {
209        return Some("opensuse".to_string());
210    }
211    if filename.contains("suse") {
212        return Some("suse".to_string());
213    }
214
215    None
216}
217
218fn parse_rpm_package(pkg: &Package, path: &Path) -> PackageData {
219    let metadata = &pkg.metadata;
220
221    let name = metadata.get_name().ok().map(|s| s.to_string());
222    let version = build_evr_version(metadata);
223    let description = metadata.get_description().ok().map(|s| s.to_string());
224    let homepage_url = metadata.get_url().ok().map(|s| s.to_string());
225    let architecture = metadata.get_arch().ok().map(|s| s.to_string());
226    let path_str = path.to_string_lossy();
227    let is_source = metadata.is_source_package()
228        || path_str.ends_with(".src.rpm")
229        || path_str.ends_with(".srpm");
230    let distribution = rpm_header_string(metadata, IndexTag::RPMTAG_DISTRIBUTION);
231    let dist_url = rpm_header_string(metadata, IndexTag::RPMTAG_DISTURL);
232    let bug_tracking_url = rpm_header_string(metadata, IndexTag::RPMTAG_BUGURL);
233    let source_urls =
234        rpm_header_string_array(metadata, IndexTag::RPMTAG_SOURCE).unwrap_or_default();
235    let source_rpm = metadata
236        .get_source_rpm()
237        .ok()
238        .filter(|value| !value.is_empty())
239        .map(|value| value.to_string());
240    let namespace = infer_rpm_namespace(
241        distribution.as_deref(),
242        metadata.get_vendor().ok(),
243        metadata.get_release().ok(),
244        dist_url.as_deref(),
245    )
246    .or_else(|| infer_rpm_namespace_from_filename(path));
247
248    let mut parties = Vec::new();
249
250    if let Ok(vendor) = metadata.get_vendor()
251        && !vendor.is_empty()
252    {
253        parties.push(Party {
254            r#type: Some("organization".to_string()),
255            role: Some("vendor".to_string()),
256            name: Some(vendor.to_string()),
257            email: None,
258            url: None,
259            organization: None,
260            organization_url: None,
261            timezone: None,
262        });
263    }
264
265    if let Some(distribution_name) = distribution.as_ref() {
266        parties.push(Party {
267            r#type: Some("organization".to_string()),
268            role: Some("distributor".to_string()),
269            name: Some(distribution_name.clone()),
270            email: None,
271            url: None,
272            organization: None,
273            organization_url: None,
274            timezone: None,
275        });
276    }
277
278    if let Ok(packager) = metadata.get_packager()
279        && !packager.is_empty()
280    {
281        let (name_opt, email_opt) = parse_packager(packager);
282        parties.push(Party {
283            r#type: Some("person".to_string()),
284            role: Some("packager".to_string()),
285            name: name_opt,
286            email: email_opt,
287            url: None,
288            organization: None,
289            organization_url: None,
290            timezone: None,
291        });
292    }
293
294    let extracted_license_statement = metadata.get_license().ok().map(|s| s.to_string());
295
296    let dependencies = extract_rpm_dependencies(pkg, namespace.as_deref());
297
298    let qualifiers = build_rpm_qualifiers(architecture.as_deref(), is_source);
299
300    let mut keywords = Vec::new();
301    if let Ok(group) = metadata.get_group()
302        && !group.is_empty()
303    {
304        keywords.push(group.to_string());
305    }
306
307    let mut extra_data = std::collections::HashMap::new();
308    if let Some(distribution) = distribution.clone() {
309        extra_data.insert(
310            "distribution".to_string(),
311            serde_json::Value::String(distribution),
312        );
313    }
314    if let Some(dist_url) = dist_url.clone() {
315        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
316    }
317    if let Ok(build_host) = metadata.get_build_host()
318        && !build_host.is_empty()
319    {
320        extra_data.insert(
321            "build_host".to_string(),
322            serde_json::Value::String(build_host.to_string()),
323        );
324    }
325    if let Ok(build_time) = metadata.get_build_time() {
326        extra_data.insert(
327            "build_time".to_string(),
328            serde_json::Value::Number(serde_json::Number::from(build_time)),
329        );
330    }
331    if !source_urls.is_empty() {
332        extra_data.insert(
333            "source_urls".to_string(),
334            serde_json::Value::Array(
335                source_urls
336                    .iter()
337                    .cloned()
338                    .map(serde_json::Value::String)
339                    .collect(),
340            ),
341        );
342    }
343    let vcs_url = infer_vcs_url(metadata, &source_urls);
344
345    PackageData {
346        datasource_id: Some(DatasourceId::RpmArchive),
347        package_type: Some(PACKAGE_TYPE),
348        namespace: namespace.clone(),
349        name: name.clone(),
350        version: version.clone(),
351        qualifiers,
352        description,
353        homepage_url,
354        size: metadata.get_installed_size().ok(),
355        parties,
356        keywords,
357        bug_tracking_url,
358        extracted_license_statement,
359        dependencies,
360        source_packages: source_rpm.into_iter().collect(),
361        vcs_url,
362        extra_data: (!extra_data.is_empty()).then_some(extra_data),
363        purl: name.as_ref().and_then(|n| {
364            build_rpm_purl(
365                n,
366                version.as_deref(),
367                namespace.as_deref(),
368                architecture.as_deref(),
369                is_source,
370            )
371        }),
372        ..Default::default()
373    }
374}
375
376fn extract_rpm_dependencies(pkg: &Package, namespace: Option<&str>) -> Vec<Dependency> {
377    let mut dependencies = Vec::new();
378
379    if let Ok(requires) = pkg.metadata.get_requires() {
380        for rpm_dep in requires {
381            let purl = build_rpm_purl(
382                &rpm_dep.name,
383                if rpm_dep.version.is_empty() {
384                    None
385                } else {
386                    Some(&rpm_dep.version)
387                },
388                namespace,
389                None,
390                false,
391            );
392
393            let extracted_requirement = if !rpm_dep.version.is_empty() {
394                Some(format_rpm_requirement(&rpm_dep))
395            } else {
396                None
397            };
398
399            dependencies.push(Dependency {
400                purl,
401                extracted_requirement,
402                scope: Some("install".to_string()),
403                is_runtime: Some(true),
404                is_optional: Some(false),
405                is_direct: Some(true),
406                resolved_package: None,
407                extra_data: None,
408                is_pinned: Some(!rpm_dep.version.is_empty()),
409            });
410        }
411    }
412
413    dependencies
414}
415
416fn format_rpm_requirement(dep: &rpm::Dependency) -> String {
417    use rpm::DependencyFlags;
418
419    if dep.version.is_empty() {
420        return dep.name.clone();
421    }
422
423    let operator = if dep.flags.contains(DependencyFlags::EQUAL)
424        && dep.flags.contains(DependencyFlags::LESS)
425    {
426        "<="
427    } else if dep.flags.contains(DependencyFlags::EQUAL)
428        && dep.flags.contains(DependencyFlags::GREATER)
429    {
430        ">="
431    } else if dep.flags.contains(DependencyFlags::EQUAL) {
432        "="
433    } else if dep.flags.contains(DependencyFlags::LESS) {
434        "<"
435    } else if dep.flags.contains(DependencyFlags::GREATER) {
436        ">"
437    } else {
438        ""
439    };
440
441    if operator.is_empty() {
442        dep.name.clone()
443    } else {
444        format!("{} {} {}", dep.name, operator, dep.version)
445    }
446}
447
448fn build_evr_version(metadata: &PackageMetadata) -> Option<String> {
449    let version = metadata.get_version().ok()?;
450    let release = metadata.get_release().ok();
451
452    let mut evr = String::from(version);
453
454    if let Some(r) = release {
455        evr.push('-');
456        evr.push_str(r);
457    }
458
459    Some(evr)
460}
461
462fn parse_packager(packager: &str) -> (Option<String>, Option<String>) {
463    if let Some(email_start) = packager.find('<') {
464        let name = packager[..email_start].trim();
465        if let Some(email_end) = packager.find('>') {
466            let email = &packager[email_start + 1..email_end];
467            return (Some(name.to_string()), Some(email.to_string()));
468        }
469    }
470    (Some(packager.to_string()), None)
471}
472
473fn build_rpm_purl(
474    name: &str,
475    version: Option<&str>,
476    namespace: Option<&str>,
477    architecture: Option<&str>,
478    is_source: bool,
479) -> Option<String> {
480    use packageurl::PackageUrl;
481
482    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
483
484    if let Some(ns) = namespace {
485        purl.with_namespace(ns).ok()?;
486    }
487
488    if let Some(ver) = version {
489        purl.with_version(ver).ok()?;
490    }
491
492    if let Some(arch) = architecture {
493        purl.add_qualifier("arch", arch).ok()?;
494    }
495
496    if is_source {
497        purl.add_qualifier("source", "true").ok()?;
498    }
499
500    Some(purl.to_string())
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506    use std::fs;
507    use std::path::PathBuf;
508    use tempfile::NamedTempFile;
509
510    #[test]
511    fn test_rpm_parser_is_match() {
512        assert!(RpmParser::is_match(&PathBuf::from("package.rpm")));
513        assert!(RpmParser::is_match(&PathBuf::from("package.srpm")));
514        assert!(RpmParser::is_match(&PathBuf::from(
515            "test-1.0-1.el7.x86_64.rpm"
516        )));
517        assert!(!RpmParser::is_match(&PathBuf::from("package.deb")));
518        assert!(!RpmParser::is_match(&PathBuf::from("package.tar.gz")));
519    }
520
521    #[test]
522    fn test_rpm_parser_matches_hash_named_source_rpm_by_magic() {
523        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
524        if !source_fixture.exists() {
525            return;
526        }
527
528        let temp_file = NamedTempFile::new().unwrap();
529        fs::copy(&source_fixture, temp_file.path()).unwrap();
530
531        assert!(RpmParser::is_match(temp_file.path()));
532    }
533
534    #[test]
535    fn test_build_evr_version_simple() {
536        let evr = "1.0-1";
537        assert_eq!(evr, "1.0-1");
538    }
539
540    #[test]
541    fn test_build_evr_version_with_epoch() {
542        let evr = "2:1.0-1";
543        assert!(evr.starts_with("2:"));
544    }
545
546    #[test]
547    fn test_parse_packager() {
548        let (name, email) = parse_packager("John Doe <john@example.com>");
549        assert_eq!(name, Some("John Doe".to_string()));
550        assert_eq!(email, Some("john@example.com".to_string()));
551
552        let (name2, email2) = parse_packager("Plain Name");
553        assert_eq!(name2, Some("Plain Name".to_string()));
554        assert_eq!(email2, None);
555    }
556
557    #[test]
558    fn test_build_rpm_purl() {
559        let purl = build_rpm_purl(
560            "bash",
561            Some("4.4.19-1.el7"),
562            Some("fedora"),
563            Some("x86_64"),
564            false,
565        );
566        assert!(purl.is_some());
567        let purl_str = purl.unwrap();
568        assert!(purl_str.contains("pkg:rpm/fedora/bash"));
569        assert!(purl_str.contains("4.4.19-1.el7"));
570        assert!(purl_str.contains("arch=x86_64"));
571    }
572
573    #[test]
574    fn test_parse_real_rpm() {
575        let test_file = PathBuf::from("testdata/rpm/Eterm-0.9.3-5mdv2007.0.rpm");
576        if !test_file.exists() {
577            eprintln!("Warning: Test file not found, skipping test");
578            return;
579        }
580
581        let pkg = RpmParser::extract_first_package(&test_file);
582
583        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
584
585        if pkg.name.is_some() {
586            assert_eq!(pkg.name, Some("Eterm".to_string()));
587            assert!(pkg.version.is_some());
588        }
589    }
590
591    #[test]
592    fn test_build_rpm_purl_no_namespace() {
593        let purl = build_rpm_purl("package", Some("1.0-1"), None, Some("x86_64"), false);
594        assert!(purl.is_some());
595        let purl_str = purl.unwrap();
596        assert!(purl_str.starts_with("pkg:rpm/package@"));
597        assert!(purl_str.contains("arch=x86_64"));
598    }
599
600    #[test]
601    fn test_rpm_dependency_extraction() {
602        use rpm::{Dependency as RpmDependency, DependencyFlags};
603
604        let rpm_dep = RpmDependency {
605            name: "libc.so.6".to_string(),
606            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
607            version: "2.2.5".to_string(),
608        };
609
610        let formatted = format_rpm_requirement(&rpm_dep);
611        assert_eq!(formatted, "libc.so.6 >= 2.2.5");
612
613        let rpm_dep_no_version = RpmDependency {
614            name: "bash".to_string(),
615            flags: DependencyFlags::ANY,
616            version: String::new(),
617        };
618
619        let formatted_no_ver = format_rpm_requirement(&rpm_dep_no_version);
620        assert_eq!(formatted_no_ver, "bash");
621    }
622
623    #[test]
624    fn test_parse_packager_with_parentheses() {
625        let (name, email) = parse_packager("John Doe (Company) <john@example.com>");
626        assert_eq!(name, Some("John Doe (Company)".to_string()));
627        assert_eq!(email, Some("john@example.com".to_string()));
628    }
629
630    #[test]
631    fn test_parse_packager_email_only() {
632        let (name, email) = parse_packager("<noreply@example.com>");
633        assert!(name.is_none() || name == Some(String::new()));
634        assert_eq!(email, Some("noreply@example.com".to_string()));
635    }
636
637    #[test]
638    fn test_rpm_fping_package() {
639        let test_file = PathBuf::from("testdata/rpm/fping-2.4b2-10.fc12.x86_64.rpm");
640        if !test_file.exists() {
641            return;
642        }
643
644        let pkg = RpmParser::extract_first_package(&test_file);
645        if pkg.name.is_some() {
646            assert_eq!(pkg.name, Some("fping".to_string()));
647            assert!(pkg.version.is_some());
648        }
649    }
650
651    #[test]
652    fn test_rpm_archive_extracts_additional_metadata_fields() {
653        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
654        if !test_file.exists() {
655            return;
656        }
657
658        let pkg = RpmParser::extract_first_package(&test_file);
659
660        assert_eq!(pkg.name.as_deref(), Some("setup"));
661        assert_eq!(
662            pkg.qualifiers
663                .as_ref()
664                .and_then(|q| q.get("arch"))
665                .map(String::as_str),
666            Some("noarch")
667        );
668        assert!(!pkg.keywords.is_empty());
669        assert!(pkg.size.is_some());
670        assert!(
671            pkg.parties
672                .iter()
673                .any(|party| party.role.as_deref() == Some("packager"))
674        );
675        assert!(
676            pkg.qualifiers
677                .as_ref()
678                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
679        );
680    }
681
682    #[test]
683    fn test_source_rpm_sets_source_qualifier() {
684        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
685        if !test_file.exists() {
686            return;
687        }
688
689        let pkg = RpmParser::extract_first_package(&test_file);
690
691        assert!(
692            pkg.qualifiers
693                .as_ref()
694                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
695        );
696        assert!(
697            pkg.purl
698                .as_ref()
699                .is_some_and(|purl| purl.contains("source=true"))
700        );
701    }
702
703    #[test]
704    fn test_rpm_archive_extracts_vcs_and_source_metadata() {
705        let package = rpm::PackageBuilder::new(
706            "thunar-sendto-clamtk",
707            "0.08",
708            "GPL-2.0-or-later",
709            "noarch",
710            "Simple virus scanning extension for Thunar",
711        )
712        .release("2.fc40")
713        .vendor("Fedora Project")
714        .packager("Fedora Release Engineering <releng@fedoraproject.org>")
715        .group("Applications/System")
716        .vcs("git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e")
717        .build()
718        .unwrap();
719
720        let temp_file = NamedTempFile::new().unwrap();
721        package.write_file(temp_file.path()).unwrap();
722
723        let pkg = RpmParser::extract_first_package(temp_file.path());
724
725        assert_eq!(pkg.namespace.as_deref(), Some("fedora"));
726        assert_eq!(
727            pkg.vcs_url.as_deref(),
728            Some(
729                "git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e",
730            )
731        );
732        assert!(
733            pkg.extra_data
734                .as_ref()
735                .is_some_and(|extra| extra.contains_key("build_time"))
736        );
737        assert!(!pkg.keywords.is_empty());
738    }
739}
740
741crate::register_parser!(
742    "RPM package archive",
743    &["**/*.rpm", "**/*.srpm"],
744    "rpm",
745    "",
746    Some("https://rpm.org/"),
747);