Skip to main content

provenant/parsers/
rpm_parser.rs

1//! Parser for RPM package archives.
2//!
3//! Extracts package metadata and dependencies from binary RPM package (.rpm) files
4//! by reading the embedded header metadata.
5//!
6//! # Supported Formats
7//! - *.rpm (binary RPM package archives)
8//!
9//! # Key Features
10//! - Metadata extraction from RPM headers (name, version, release, architecture)
11//! - Dependency extraction (requires, provides, obsoletes)
12//! - License and distribution information parsing
13//! - Package URL (purl) generation for installed packages
14//! - Graceful handling of malformed or corrupted RPM files
15//!
16//! # Implementation Notes
17//! - Uses `rpm` crate for low-level RPM format parsing
18//! - RPM architecture is captured as namespace in metadata
19//! - Direct dependency tracking (all requires are direct)
20//! - Error handling with `warn!()` logs on parse failures
21
22use std::fs::{self, File};
23use std::io::{BufReader, Read};
24use std::path::Path;
25use std::sync::LazyLock;
26
27use crate::parser_warn as warn;
28use regex::Regex;
29use rpm::{IndexTag, Package, PackageMetadata, RPM_MAGIC};
30
31use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
32use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
33
34use super::PackageParser;
35use super::license_normalization::{
36    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
37    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
38};
39
40const PACKAGE_TYPE: PackageType = PackageType::Rpm;
41
42static RE_RPM_LICENSE_AND: LazyLock<Regex> =
43    LazyLock::new(|| Regex::new(r"(?i)\s+and\s+").expect("valid RPM license AND regex"));
44static RE_RPM_LICENSE_OR: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r"(?i)\s+or\s+").expect("valid RPM license OR regex"));
46static RE_RPM_LICENSE_COMMA: LazyLock<Regex> =
47    LazyLock::new(|| Regex::new(r"\s*,\s*").expect("valid RPM license comma regex"));
48static RE_RPM_LICENSE_WITH_EXCEPTIONS: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(r"(?i)\s+with\s+exceptions\b").expect("valid RPM license exceptions regex")
50});
51
52fn default_package_data() -> PackageData {
53    PackageData {
54        package_type: Some(PACKAGE_TYPE),
55        datasource_id: Some(DatasourceId::RpmArchive),
56        ..Default::default()
57    }
58}
59
60pub(crate) fn infer_rpm_namespace(
61    distribution: Option<&str>,
62    vendor: Option<&str>,
63    release: Option<&str>,
64    dist_url: Option<&str>,
65) -> Option<String> {
66    for candidate in [distribution, vendor, dist_url].into_iter().flatten() {
67        let lower = candidate.to_ascii_lowercase();
68        if lower.contains("fedora") || lower.contains("koji") {
69            return Some("fedora".to_string());
70        }
71        if lower.contains("centos") {
72            return Some("centos".to_string());
73        }
74        if lower.contains("red hat") || lower.contains("redhat") || lower.contains("ubi") {
75            return Some("rhel".to_string());
76        }
77        if lower.contains("opensuse") {
78            return Some("opensuse".to_string());
79        }
80        if lower.contains("suse") {
81            return Some("suse".to_string());
82        }
83        if lower.contains("openmandriva") || lower.contains("mandriva") {
84            return Some("openmandriva".to_string());
85        }
86        if lower.contains("mariner") {
87            return Some("mariner".to_string());
88        }
89    }
90
91    if let Some(release) = release {
92        let lower = release.to_ascii_lowercase();
93        if lower.contains(".fc") {
94            return Some("fedora".to_string());
95        }
96        if lower.contains(".el") {
97            return Some("rhel".to_string());
98        }
99        if lower.contains("mdv") || lower.contains("mnb") {
100            return Some("openmandriva".to_string());
101        }
102        if lower.contains("suse") {
103            return Some("suse".to_string());
104        }
105    }
106
107    None
108}
109
110fn rpm_header_string(metadata: &PackageMetadata, tag: IndexTag) -> Option<String> {
111    metadata
112        .header
113        .get_entry_data_as_string(tag)
114        .ok()
115        .and_then(|value| {
116            let trimmed = value.trim();
117            if trimmed.is_empty() || trimmed == "(none)" {
118                None
119            } else {
120                Some(trimmed.to_string())
121            }
122        })
123}
124
125fn rpm_header_string_array(metadata: &PackageMetadata, tag: IndexTag) -> Option<Vec<String>> {
126    metadata
127        .header
128        .get_entry_data_as_string_array(tag)
129        .ok()
130        .map(|items| {
131            items
132                .iter()
133                .map(|item| item.trim().to_string())
134                .filter(|item| !item.is_empty() && item != "(none)")
135                .collect::<Vec<_>>()
136        })
137        .filter(|items| !items.is_empty())
138}
139
140fn infer_vcs_url(metadata: &PackageMetadata, source_urls: &[String]) -> Option<String> {
141    if let Ok(vcs) = metadata.get_vcs()
142        && !vcs.trim().is_empty()
143    {
144        return Some(vcs.to_string());
145    }
146
147    source_urls
148        .iter()
149        .find(|url| url.starts_with("git+") || url.contains("src.fedoraproject.org"))
150        .cloned()
151}
152
153fn build_rpm_qualifiers(
154    architecture: Option<&str>,
155    is_source: bool,
156) -> Option<std::collections::HashMap<String, String>> {
157    let mut qualifiers = std::collections::HashMap::new();
158
159    if let Some(arch) = architecture.filter(|arch| !arch.is_empty()) {
160        qualifiers.insert("arch".to_string(), arch.to_string());
161    }
162
163    if is_source {
164        qualifiers.insert("source".to_string(), "true".to_string());
165    }
166
167    (!qualifiers.is_empty()).then_some(qualifiers)
168}
169
170/// Parser for RPM package archives
171pub struct RpmParser;
172
173impl PackageParser for RpmParser {
174    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
175
176    fn is_match(path: &Path) -> bool {
177        if let Some(ext) = path.extension().and_then(|e| e.to_str())
178            && matches!(ext, "rpm" | "srpm")
179        {
180            if let Ok(metadata) = fs::metadata(path)
181                && metadata.len() > MAX_MANIFEST_SIZE
182            {
183                warn!(
184                    "RPM file {:?} is too large ({} bytes), skipping",
185                    path,
186                    metadata.len()
187                );
188                return false;
189            }
190            return true;
191        }
192
193        match fs::metadata(path) {
194            Ok(metadata) if metadata.len() > MAX_MANIFEST_SIZE => {
195                warn!(
196                    "RPM file {:?} is too large ({} bytes), skipping",
197                    path,
198                    metadata.len()
199                );
200                return false;
201            }
202            Err(_) => return false,
203            _ => {}
204        }
205
206        let mut file = match File::open(path) {
207            Ok(file) => file,
208            Err(_) => return false,
209        };
210        let mut magic = [0_u8; 4];
211        file.read_exact(&mut magic).is_ok() && magic == RPM_MAGIC
212    }
213
214    fn extract_packages(path: &Path) -> Vec<PackageData> {
215        match fs::metadata(path) {
216            Ok(metadata) if metadata.len() > MAX_MANIFEST_SIZE => {
217                warn!(
218                    "RPM file {:?} is too large ({} bytes), skipping",
219                    path,
220                    metadata.len()
221                );
222                return vec![default_package_data()];
223            }
224            Err(e) => {
225                warn!("Cannot stat RPM file {:?}: {}", path, e);
226                return vec![default_package_data()];
227            }
228            _ => {}
229        }
230
231        let file = match File::open(path) {
232            Ok(f) => f,
233            Err(e) => {
234                warn!("Failed to open RPM file {:?}: {}", path, e);
235                return vec![default_package_data()];
236            }
237        };
238
239        let mut reader = BufReader::new(file);
240        let pkg = match Package::parse(&mut reader) {
241            Ok(p) => p,
242            Err(e) => {
243                warn!("Failed to parse RPM file {:?}: {}", path, e);
244                return vec![default_package_data()];
245            }
246        };
247
248        vec![parse_rpm_package(&pkg, path)]
249    }
250}
251
252pub(crate) fn infer_rpm_namespace_from_filename(path: &Path) -> Option<String> {
253    let filename = path.file_name()?.to_str()?.to_ascii_lowercase();
254
255    if filename.contains(".fc") {
256        return Some("fedora".to_string());
257    }
258    if filename.contains(".el") {
259        return Some("rhel".to_string());
260    }
261    if filename.contains("mdv") || filename.contains("mnb") {
262        return Some("openmandriva".to_string());
263    }
264    if filename.contains("opensuse") {
265        return Some("opensuse".to_string());
266    }
267    if filename.contains("suse") {
268        return Some("suse".to_string());
269    }
270
271    None
272}
273
274fn parse_rpm_package(pkg: &Package, path: &Path) -> PackageData {
275    let metadata = &pkg.metadata;
276
277    let name = metadata
278        .get_name()
279        .ok()
280        .map(|s| truncate_field(s.to_string()));
281    let version = build_evr_version(metadata).map(truncate_field);
282    let description = metadata
283        .get_description()
284        .ok()
285        .map(|s| truncate_field(s.to_string()));
286    let homepage_url = metadata
287        .get_url()
288        .ok()
289        .map(|s| truncate_field(s.to_string()));
290    let architecture = metadata
291        .get_arch()
292        .ok()
293        .map(|s| truncate_field(s.to_string()));
294    let path_str = path.to_string_lossy();
295    let is_source = metadata.is_source_package()
296        || path_str.ends_with(".src.rpm")
297        || path_str.ends_with(".srpm");
298    let distribution =
299        rpm_header_string(metadata, IndexTag::RPMTAG_DISTRIBUTION).map(truncate_field);
300    let dist_url = rpm_header_string(metadata, IndexTag::RPMTAG_DISTURL).map(truncate_field);
301    let bug_tracking_url = rpm_header_string(metadata, IndexTag::RPMTAG_BUGURL).map(truncate_field);
302    let source_urls =
303        rpm_header_string_array(metadata, IndexTag::RPMTAG_SOURCE).unwrap_or_default();
304    let source_rpm = metadata
305        .get_source_rpm()
306        .ok()
307        .filter(|value| !value.is_empty())
308        .map(|value| truncate_field(value.to_string()));
309    let namespace = infer_rpm_namespace(
310        distribution.as_deref(),
311        metadata.get_vendor().ok(),
312        metadata.get_release().ok(),
313        dist_url.as_deref(),
314    )
315    .or_else(|| infer_rpm_namespace_from_filename(path))
316    .map(truncate_field);
317
318    let mut parties = Vec::new();
319
320    if let Ok(vendor) = metadata.get_vendor()
321        && !vendor.is_empty()
322    {
323        parties.push(Party {
324            r#type: Some("organization".to_string()),
325            role: Some("vendor".to_string()),
326            name: Some(truncate_field(vendor.to_string())),
327            email: None,
328            url: None,
329            organization: None,
330            organization_url: None,
331            timezone: None,
332        });
333    }
334
335    if let Some(distribution_name) = distribution.as_ref() {
336        parties.push(Party {
337            r#type: Some("organization".to_string()),
338            role: Some("distributor".to_string()),
339            name: Some(distribution_name.clone()),
340            email: None,
341            url: None,
342            organization: None,
343            organization_url: None,
344            timezone: None,
345        });
346    }
347
348    if let Ok(packager) = metadata.get_packager()
349        && !packager.is_empty()
350    {
351        let (name_opt, email_opt) = parse_packager(packager);
352        parties.push(Party {
353            r#type: Some("person".to_string()),
354            role: Some("packager".to_string()),
355            name: name_opt.map(truncate_field),
356            email: email_opt.map(truncate_field),
357            url: None,
358            organization: None,
359            organization_url: None,
360            timezone: None,
361        });
362    }
363
364    let extracted_license_statement = metadata
365        .get_license()
366        .ok()
367        .map(|s| truncate_field(s.to_string()));
368    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
369        extracted_license_statement
370            .as_deref()
371            .and_then(normalize_rpm_declared_license)
372            .map(|normalized| {
373                build_declared_license_data(
374                    normalized,
375                    DeclaredLicenseMatchMetadata::single_line(
376                        extracted_license_statement.as_deref().unwrap_or_default(),
377                    ),
378                )
379            })
380            .map(|(expr, spdx, detections)| {
381                (
382                    expr.map(truncate_field),
383                    spdx.map(truncate_field),
384                    detections,
385                )
386            })
387            .unwrap_or_else(empty_declared_license_data);
388
389    let dependencies = extract_rpm_dependencies(pkg, namespace.as_deref());
390
391    let qualifiers = build_rpm_qualifiers(architecture.as_deref(), is_source);
392
393    let mut keywords = Vec::new();
394    if let Ok(group) = metadata.get_group()
395        && !group.is_empty()
396    {
397        keywords.push(truncate_field(group.to_string()));
398    }
399
400    let mut extra_data = std::collections::HashMap::new();
401    if let Some(distribution) = distribution.clone() {
402        extra_data.insert(
403            "distribution".to_string(),
404            serde_json::Value::String(distribution),
405        );
406    }
407    if let Some(dist_url) = dist_url.clone() {
408        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
409    }
410    if let Ok(build_host) = metadata.get_build_host()
411        && !build_host.is_empty()
412    {
413        extra_data.insert(
414            "build_host".to_string(),
415            serde_json::Value::String(build_host.to_string()),
416        );
417    }
418    if let Ok(build_time) = metadata.get_build_time() {
419        extra_data.insert(
420            "build_time".to_string(),
421            serde_json::Value::Number(serde_json::Number::from(build_time)),
422        );
423    }
424    if !source_urls.is_empty() {
425        extra_data.insert(
426            "source_urls".to_string(),
427            serde_json::Value::Array(
428                source_urls
429                    .iter()
430                    .cloned()
431                    .map(serde_json::Value::String)
432                    .collect(),
433            ),
434        );
435    }
436    if let Some(provides) = extract_rpm_relationships(pkg, RpmRelationshipKind::Provides)
437        && !provides.is_empty()
438    {
439        extra_data.insert(
440            "provides".to_string(),
441            serde_json::Value::Array(
442                provides
443                    .into_iter()
444                    .map(serde_json::Value::String)
445                    .collect(),
446            ),
447        );
448    }
449    if let Some(obsoletes) = extract_rpm_relationships(pkg, RpmRelationshipKind::Obsoletes)
450        && !obsoletes.is_empty()
451    {
452        extra_data.insert(
453            "obsoletes".to_string(),
454            serde_json::Value::Array(
455                obsoletes
456                    .into_iter()
457                    .map(serde_json::Value::String)
458                    .collect(),
459            ),
460        );
461    }
462    let vcs_url = infer_vcs_url(metadata, &source_urls).map(truncate_field);
463
464    PackageData {
465        datasource_id: Some(DatasourceId::RpmArchive),
466        package_type: Some(PACKAGE_TYPE),
467        namespace: namespace.clone(),
468        name: name.clone(),
469        version: version.clone(),
470        qualifiers,
471        description,
472        homepage_url,
473        size: metadata.get_installed_size().ok(),
474        parties,
475        keywords,
476        bug_tracking_url,
477        declared_license_expression,
478        declared_license_expression_spdx,
479        license_detections,
480        extracted_license_statement,
481        dependencies,
482        source_packages: source_rpm.into_iter().collect(),
483        vcs_url,
484        extra_data: (!extra_data.is_empty()).then_some(extra_data),
485        purl: name.as_ref().and_then(|n| {
486            build_rpm_purl(
487                n,
488                version.as_deref(),
489                namespace.as_deref(),
490                architecture.as_deref(),
491                is_source,
492            )
493            .map(truncate_field)
494        }),
495        ..Default::default()
496    }
497}
498
499pub(crate) fn normalize_rpm_declared_license(statement: &str) -> Option<NormalizedDeclaredLicense> {
500    let trimmed = statement.trim();
501    if trimmed.is_empty() {
502        return None;
503    }
504
505    let rewritten = canonicalize_rpm_license_statement(trimmed);
506    if let Some(normalized) = normalize_spdx_expression(&rewritten) {
507        return Some(normalized);
508    }
509
510    let is_simple_key = !trimmed.contains(' ')
511        && !trimmed.contains(',')
512        && !trimmed.contains('(')
513        && !trimmed.contains(')');
514    if is_simple_key {
515        return normalize_declared_license_key(trimmed);
516    }
517
518    None
519}
520
521fn canonicalize_rpm_license_statement(statement: &str) -> String {
522    let mut rewritten = statement.trim().to_string();
523
524    for (from, to) in [
525        ("LGPLv2.1+", "LGPL-2.1-or-later"),
526        ("LGPLv2.1", "LGPL-2.1-only"),
527        ("LGPLv2+", "LGPL-2.0-or-later"),
528        ("LGPLv2", "LGPL-2.0-only"),
529        ("LGPLv3+", "LGPL-3.0-or-later"),
530        ("LGPLv3", "LGPL-3.0-only"),
531        ("GPLv2+", "GPL-2.0-or-later"),
532        ("GPLv2", "GPL-2.0-only"),
533        ("GPLv3+", "GPL-3.0-or-later"),
534        ("GPLv3", "GPL-3.0-only"),
535        ("GPLV2+", "GPL-2.0-or-later"),
536        ("MPLv2.0", "MPL-2.0"),
537        ("MPLv1.1", "MPL-1.1"),
538        ("BSD with advertising", "BSD-4-Clause-UC"),
539        ("Public Domain", "LicenseRef-provenant-public-domain"),
540        ("public domain", "LicenseRef-provenant-public-domain"),
541        ("OpenLDAP", "OLDAP-2.8"),
542        ("OpenSSL", "OpenSSL"),
543        ("Sleepycat", "Sleepycat"),
544        ("zlib", "Zlib"),
545        ("Boost", "BSL-1.0"),
546        ("BSD", "BSD-3-Clause"),
547    ] {
548        rewritten = rewritten.replace(from, to);
549    }
550
551    rewritten = RE_RPM_LICENSE_WITH_EXCEPTIONS
552        .replace_all(&rewritten, "")
553        .into_owned();
554    rewritten = RE_RPM_LICENSE_COMMA
555        .replace_all(&rewritten, " AND ")
556        .into_owned();
557    rewritten = RE_RPM_LICENSE_AND
558        .replace_all(&rewritten, " AND ")
559        .into_owned();
560    rewritten = RE_RPM_LICENSE_OR
561        .replace_all(&rewritten, " OR ")
562        .into_owned();
563
564    rewritten.split_whitespace().collect::<Vec<_>>().join(" ")
565}
566
567fn extract_rpm_dependencies(pkg: &Package, namespace: Option<&str>) -> Vec<Dependency> {
568    let mut dependencies = Vec::new();
569
570    if let Ok(requires) = pkg.metadata.get_requires() {
571        for rpm_dep in requires {
572            if dependencies.len() >= MAX_ITERATION_COUNT {
573                warn!(
574                    "RPM dependency iteration capped at {} items",
575                    MAX_ITERATION_COUNT
576                );
577                break;
578            }
579            let purl = build_rpm_purl(
580                &rpm_dep.name,
581                if rpm_dep.version.is_empty() {
582                    None
583                } else {
584                    Some(&rpm_dep.version)
585                },
586                namespace,
587                None,
588                false,
589            )
590            .map(truncate_field);
591
592            let extracted_requirement = if !rpm_dep.version.is_empty() {
593                Some(truncate_field(format_rpm_requirement(&rpm_dep)))
594            } else {
595                None
596            };
597
598            dependencies.push(Dependency {
599                purl,
600                extracted_requirement,
601                scope: Some("install".to_string()),
602                is_runtime: Some(true),
603                is_optional: Some(false),
604                is_direct: Some(true),
605                resolved_package: None,
606                extra_data: None,
607                is_pinned: Some(!rpm_dep.version.is_empty()),
608            });
609        }
610    }
611
612    dependencies
613}
614
615enum RpmRelationshipKind {
616    Provides,
617    Obsoletes,
618}
619
620fn extract_rpm_relationships(pkg: &Package, kind: RpmRelationshipKind) -> Option<Vec<String>> {
621    let relationships = match kind {
622        RpmRelationshipKind::Provides => pkg.metadata.get_provides().ok()?,
623        RpmRelationshipKind::Obsoletes => pkg.metadata.get_obsoletes().ok()?,
624    };
625
626    let mut count = 0usize;
627    let values: Vec<String> = relationships
628        .into_iter()
629        .take(MAX_ITERATION_COUNT)
630        .map(|dep| format_rpm_requirement(&dep))
631        .filter(|value| !value.is_empty() && value != "(none)")
632        .inspect(|_| count += 1)
633        .collect();
634
635    if count >= MAX_ITERATION_COUNT {
636        warn!(
637            "RPM relationship iteration capped at {} items",
638            MAX_ITERATION_COUNT
639        );
640    }
641
642    (!values.is_empty()).then_some(values)
643}
644
645fn format_rpm_requirement(dep: &rpm::Dependency) -> String {
646    use rpm::DependencyFlags;
647
648    if dep.version.is_empty() {
649        return dep.name.clone();
650    }
651
652    let operator = if dep.flags.contains(DependencyFlags::EQUAL)
653        && dep.flags.contains(DependencyFlags::LESS)
654    {
655        "<="
656    } else if dep.flags.contains(DependencyFlags::EQUAL)
657        && dep.flags.contains(DependencyFlags::GREATER)
658    {
659        ">="
660    } else if dep.flags.contains(DependencyFlags::EQUAL) {
661        "="
662    } else if dep.flags.contains(DependencyFlags::LESS) {
663        "<"
664    } else if dep.flags.contains(DependencyFlags::GREATER) {
665        ">"
666    } else {
667        ""
668    };
669
670    if operator.is_empty() {
671        dep.name.clone()
672    } else {
673        format!("{} {} {}", dep.name, operator, dep.version)
674    }
675}
676
677fn build_evr_version(metadata: &PackageMetadata) -> Option<String> {
678    let version = metadata.get_version().ok()?;
679    let release = metadata.get_release().ok();
680
681    let mut evr = String::from(version);
682
683    if let Some(r) = release {
684        evr.push('-');
685        evr.push_str(r);
686    }
687
688    Some(evr)
689}
690
691fn parse_packager(packager: &str) -> (Option<String>, Option<String>) {
692    if let Some(email_start) = packager.find('<') {
693        let name = packager[..email_start].trim();
694        if let Some(email_end) = packager.find('>') {
695            let email = &packager[email_start + 1..email_end];
696            return (Some(name.to_string()), Some(email.to_string()));
697        }
698    }
699    (Some(packager.to_string()), None)
700}
701
702fn build_rpm_purl(
703    name: &str,
704    version: Option<&str>,
705    namespace: Option<&str>,
706    architecture: Option<&str>,
707    is_source: bool,
708) -> Option<String> {
709    use packageurl::PackageUrl;
710
711    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
712
713    if let Some(ns) = namespace {
714        purl.with_namespace(ns).ok()?;
715    }
716
717    if let Some(ver) = version {
718        purl.with_version(ver).ok()?;
719    }
720
721    if let Some(arch) = architecture {
722        purl.add_qualifier("arch", arch).ok()?;
723    }
724
725    if is_source {
726        purl.add_qualifier("source", "true").ok()?;
727    }
728
729    Some(purl.to_string())
730}
731
732#[cfg(test)]
733mod tests {
734    use super::*;
735    use std::fs;
736    use std::path::PathBuf;
737    use tempfile::NamedTempFile;
738
739    #[test]
740    fn test_rpm_parser_is_match() {
741        assert!(RpmParser::is_match(&PathBuf::from("package.rpm")));
742        assert!(RpmParser::is_match(&PathBuf::from("package.srpm")));
743        assert!(RpmParser::is_match(&PathBuf::from(
744            "test-1.0-1.el7.x86_64.rpm"
745        )));
746        assert!(!RpmParser::is_match(&PathBuf::from("package.deb")));
747        assert!(!RpmParser::is_match(&PathBuf::from("package.tar.gz")));
748    }
749
750    #[test]
751    fn test_rpm_parser_matches_hash_named_source_rpm_by_magic() {
752        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
753        if !source_fixture.exists() {
754            return;
755        }
756
757        let temp_file = NamedTempFile::new().unwrap();
758        fs::copy(&source_fixture, temp_file.path()).unwrap();
759
760        assert!(RpmParser::is_match(temp_file.path()));
761    }
762
763    #[test]
764    fn test_build_evr_version_simple() {
765        let evr = "1.0-1";
766        assert_eq!(evr, "1.0-1");
767    }
768
769    #[test]
770    fn test_build_evr_version_with_epoch() {
771        let evr = "2:1.0-1";
772        assert!(evr.starts_with("2:"));
773    }
774
775    #[test]
776    fn test_parse_packager() {
777        let (name, email) = parse_packager("John Doe <john@example.com>");
778        assert_eq!(name, Some("John Doe".to_string()));
779        assert_eq!(email, Some("john@example.com".to_string()));
780
781        let (name2, email2) = parse_packager("Plain Name");
782        assert_eq!(name2, Some("Plain Name".to_string()));
783        assert_eq!(email2, None);
784    }
785
786    #[test]
787    fn test_build_rpm_purl() {
788        let purl = build_rpm_purl(
789            "bash",
790            Some("4.4.19-1.el7"),
791            Some("fedora"),
792            Some("x86_64"),
793            false,
794        );
795        assert!(purl.is_some());
796        let purl_str = purl.unwrap();
797        assert!(purl_str.contains("pkg:rpm/fedora/bash"));
798        assert!(purl_str.contains("4.4.19-1.el7"));
799        assert!(purl_str.contains("arch=x86_64"));
800    }
801
802    #[test]
803    fn test_parse_real_rpm() {
804        let test_file = PathBuf::from("testdata/rpm/Eterm-0.9.3-5mdv2007.0.rpm");
805        if !test_file.exists() {
806            eprintln!("Warning: Test file not found, skipping test");
807            return;
808        }
809
810        let pkg = RpmParser::extract_first_package(&test_file);
811
812        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
813
814        if pkg.name.is_some() {
815            assert_eq!(pkg.name, Some("Eterm".to_string()));
816            assert!(pkg.version.is_some());
817        }
818    }
819
820    #[test]
821    fn test_build_rpm_purl_no_namespace() {
822        let purl = build_rpm_purl("package", Some("1.0-1"), None, Some("x86_64"), false);
823        assert!(purl.is_some());
824        let purl_str = purl.unwrap();
825        assert!(purl_str.starts_with("pkg:rpm/package@"));
826        assert!(purl_str.contains("arch=x86_64"));
827    }
828
829    #[test]
830    fn test_rpm_dependency_extraction() {
831        use rpm::{Dependency as RpmDependency, DependencyFlags};
832
833        let rpm_dep = RpmDependency {
834            name: "libc.so.6".to_string(),
835            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
836            version: "2.2.5".to_string(),
837        };
838
839        let formatted = format_rpm_requirement(&rpm_dep);
840        assert_eq!(formatted, "libc.so.6 >= 2.2.5");
841
842        let rpm_dep_no_version = RpmDependency {
843            name: "bash".to_string(),
844            flags: DependencyFlags::ANY,
845            version: String::new(),
846        };
847
848        let formatted_no_ver = format_rpm_requirement(&rpm_dep_no_version);
849        assert_eq!(formatted_no_ver, "bash");
850    }
851
852    #[test]
853    fn test_parse_packager_with_parentheses() {
854        let (name, email) = parse_packager("John Doe (Company) <john@example.com>");
855        assert_eq!(name, Some("John Doe (Company)".to_string()));
856        assert_eq!(email, Some("john@example.com".to_string()));
857    }
858
859    #[test]
860    fn test_parse_packager_email_only() {
861        let (name, email) = parse_packager("<noreply@example.com>");
862        assert!(name.is_none() || name == Some(String::new()));
863        assert_eq!(email, Some("noreply@example.com".to_string()));
864    }
865
866    #[test]
867    fn test_rpm_fping_package() {
868        let test_file = PathBuf::from("testdata/rpm/fping-2.4b2-10.fc12.x86_64.rpm");
869        if !test_file.exists() {
870            return;
871        }
872
873        let pkg = RpmParser::extract_first_package(&test_file);
874        if pkg.name.is_some() {
875            assert_eq!(pkg.name, Some("fping".to_string()));
876            assert!(pkg.version.is_some());
877        }
878    }
879
880    #[test]
881    fn test_rpm_archive_extracts_additional_metadata_fields() {
882        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
883        if !test_file.exists() {
884            return;
885        }
886
887        let pkg = RpmParser::extract_first_package(&test_file);
888
889        assert_eq!(pkg.name.as_deref(), Some("setup"));
890        assert_eq!(
891            pkg.qualifiers
892                .as_ref()
893                .and_then(|q| q.get("arch"))
894                .map(String::as_str),
895            Some("noarch")
896        );
897        assert!(!pkg.keywords.is_empty());
898        assert!(pkg.size.is_some());
899        assert!(
900            pkg.parties
901                .iter()
902                .any(|party| party.role.as_deref() == Some("packager"))
903        );
904        assert!(
905            pkg.qualifiers
906                .as_ref()
907                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
908        );
909    }
910
911    #[test]
912    fn test_source_rpm_sets_source_qualifier() {
913        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
914        if !test_file.exists() {
915            return;
916        }
917
918        let pkg = RpmParser::extract_first_package(&test_file);
919
920        assert!(
921            pkg.qualifiers
922                .as_ref()
923                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
924        );
925        assert!(
926            pkg.purl
927                .as_ref()
928                .is_some_and(|purl| purl.contains("source=true"))
929        );
930    }
931
932    #[test]
933    fn test_rpm_archive_extracts_vcs_and_source_metadata() {
934        let package = rpm::PackageBuilder::new(
935            "thunar-sendto-clamtk",
936            "0.08",
937            "GPL-2.0-or-later",
938            "noarch",
939            "Simple virus scanning extension for Thunar",
940        )
941        .release("2.fc40")
942        .vendor("Fedora Project")
943        .packager("Fedora Release Engineering <releng@fedoraproject.org>")
944        .group("Applications/System")
945        .vcs("git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e")
946        .build()
947        .unwrap();
948
949        let temp_file = NamedTempFile::new().unwrap();
950        package.write_file(temp_file.path()).unwrap();
951
952        let pkg = RpmParser::extract_first_package(temp_file.path());
953
954        assert_eq!(pkg.namespace.as_deref(), Some("fedora"));
955        assert_eq!(
956            pkg.vcs_url.as_deref(),
957            Some(
958                "git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e",
959            )
960        );
961        assert!(
962            pkg.extra_data
963                .as_ref()
964                .is_some_and(|extra| extra.contains_key("build_time"))
965        );
966        assert!(!pkg.keywords.is_empty());
967    }
968
969    #[test]
970    fn test_rpm_archive_preserves_provides_and_obsoletes_relationships() {
971        use rpm::{Dependency as RpmDependency, DependencyFlags};
972
973        let package = rpm::PackageBuilder::new(
974            "demo-rpm",
975            "1.0.0",
976            "MIT",
977            "noarch",
978            "RPM relationship metadata fixture",
979        )
980        .release("1")
981        .provides(RpmDependency {
982            name: "demo-rpm-virtual".to_string(),
983            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
984            version: "1.0.0".to_string(),
985        })
986        .obsoletes(RpmDependency {
987            name: "old-demo-rpm".to_string(),
988            flags: DependencyFlags::LESS,
989            version: "0.9.0".to_string(),
990        })
991        .build()
992        .unwrap();
993
994        let temp_file = NamedTempFile::new().unwrap();
995        package.write_file(temp_file.path()).unwrap();
996
997        let pkg = RpmParser::extract_first_package(temp_file.path());
998        let extra = pkg.extra_data.as_ref().expect("extra_data should exist");
999
1000        let provides = extra
1001            .get("provides")
1002            .and_then(|value| value.as_array())
1003            .expect("provides should be present");
1004        assert!(
1005            provides
1006                .iter()
1007                .any(|value| value.as_str() == Some("demo-rpm-virtual >= 1.0.0"))
1008        );
1009
1010        let obsoletes = extra
1011            .get("obsoletes")
1012            .and_then(|value| value.as_array())
1013            .expect("obsoletes should be present");
1014        assert!(
1015            obsoletes
1016                .iter()
1017                .any(|value| value.as_str() == Some("old-demo-rpm < 0.9.0"))
1018        );
1019    }
1020
1021    #[test]
1022    fn test_rpm_archive_normalizes_declared_license_expression() {
1023        let package = rpm::PackageBuilder::new(
1024            "demo-license",
1025            "1.0.0",
1026            "LGPLv2",
1027            "noarch",
1028            "RPM declared license normalization fixture",
1029        )
1030        .release("1")
1031        .build()
1032        .unwrap();
1033
1034        let temp_file = NamedTempFile::new().unwrap();
1035        package.write_file(temp_file.path()).unwrap();
1036
1037        let pkg = RpmParser::extract_first_package(temp_file.path());
1038
1039        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("LGPLv2"));
1040        assert_eq!(
1041            pkg.declared_license_expression.as_deref(),
1042            Some("lgpl-2.0-only")
1043        );
1044        assert_eq!(
1045            pkg.declared_license_expression_spdx.as_deref(),
1046            Some("LGPL-2.0-only")
1047        );
1048        assert_eq!(pkg.license_detections.len(), 1);
1049        assert_eq!(
1050            pkg.license_detections[0].license_expression_spdx,
1051            "LGPL-2.0-only"
1052        );
1053        assert_eq!(
1054            pkg.license_detections[0].matches[0].matched_text.as_deref(),
1055            Some("LGPLv2")
1056        );
1057    }
1058
1059    #[test]
1060    fn test_rpm_archive_normalizes_public_domain_declared_license_expression() {
1061        let package = rpm::PackageBuilder::new(
1062            "demo-public-domain",
1063            "1.0.0",
1064            "public domain",
1065            "noarch",
1066            "RPM public domain normalization fixture",
1067        )
1068        .release("1")
1069        .build()
1070        .unwrap();
1071
1072        let temp_file = NamedTempFile::new().unwrap();
1073        package.write_file(temp_file.path()).unwrap();
1074
1075        let pkg = RpmParser::extract_first_package(temp_file.path());
1076
1077        assert_eq!(
1078            pkg.extracted_license_statement.as_deref(),
1079            Some("public domain")
1080        );
1081        assert_eq!(
1082            pkg.declared_license_expression.as_deref(),
1083            Some("licenseref-provenant-public-domain")
1084        );
1085        assert_eq!(
1086            pkg.declared_license_expression_spdx.as_deref(),
1087            Some("LicenseRef-provenant-public-domain")
1088        );
1089        assert_eq!(pkg.license_detections.len(), 1);
1090    }
1091
1092    #[test]
1093    fn test_normalize_rpm_declared_license_rewrites_compound_aliases() {
1094        let normalized = normalize_rpm_declared_license("BSD and GPLv2+")
1095            .expect("compound RPM license should normalize");
1096
1097        assert_eq!(
1098            normalized.declared_license_expression_spdx,
1099            "BSD-3-Clause AND GPL-2.0-or-later"
1100        );
1101    }
1102}
1103
1104crate::register_parser!(
1105    "RPM package archive",
1106    &["**/*.rpm", "**/*.srpm"],
1107    "rpm",
1108    "",
1109    Some("https://rpm.org/"),
1110);