Skip to main content

provenant/parsers/
rpm_parser.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM package archives.
5//!
6//! Extracts package metadata and dependencies from binary RPM package (.rpm) files
7//! by reading the embedded header metadata.
8//!
9//! # Supported Formats
10//! - *.rpm (binary RPM package archives)
11//!
12//! # Key Features
13//! - Metadata extraction from RPM headers (name, version, release, architecture)
14//! - Dependency extraction (requires, provides, obsoletes)
15//! - License and distribution information parsing
16//! - Package URL (purl) generation for installed packages
17//! - Graceful handling of malformed or corrupted RPM files
18//!
19//! # Implementation Notes
20//! - Uses `rpm` crate for low-level RPM format parsing
21//! - RPM architecture is captured as namespace in metadata
22//! - Direct dependency tracking (all requires are direct)
23//! - Error handling with `warn!()` logs on parse failures
24
25use std::fs::{self, File};
26use std::io::{BufReader, Read};
27use std::path::Path;
28use std::sync::LazyLock;
29
30use crate::parser_warn as warn;
31use regex::Regex;
32use rpm::{IndexTag, PackageMetadata, RPM_MAGIC};
33
34use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
35use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
36
37use super::PackageParser;
38use super::license_normalization::{
39    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
40    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
41};
42
43const PACKAGE_TYPE: PackageType = PackageType::Rpm;
44const RPM_HEADER_PARSE_LIMIT_BYTES: u64 = MAX_MANIFEST_SIZE.saturating_add(1);
45
46static RE_RPM_LICENSE_AND: LazyLock<Regex> =
47    LazyLock::new(|| Regex::new(r"(?i)\s+and\s+").expect("valid RPM license AND regex"));
48static RE_RPM_LICENSE_OR: LazyLock<Regex> =
49    LazyLock::new(|| Regex::new(r"(?i)\s+or\s+").expect("valid RPM license OR regex"));
50static RE_RPM_LICENSE_COMMA: LazyLock<Regex> =
51    LazyLock::new(|| Regex::new(r"\s*,\s*").expect("valid RPM license comma regex"));
52static RE_RPM_LICENSE_WITH_EXCEPTIONS: LazyLock<Regex> = LazyLock::new(|| {
53    Regex::new(r"(?i)\s+with\s+exceptions\b").expect("valid RPM license exceptions regex")
54});
55
56fn default_package_data() -> PackageData {
57    PackageData {
58        package_type: Some(PACKAGE_TYPE),
59        datasource_id: Some(DatasourceId::RpmArchive),
60        ..Default::default()
61    }
62}
63
64pub(crate) fn infer_rpm_namespace(
65    distribution: Option<&str>,
66    vendor: Option<&str>,
67    release: Option<&str>,
68    dist_url: Option<&str>,
69) -> Option<String> {
70    for candidate in [distribution, vendor, dist_url].into_iter().flatten() {
71        let lower = candidate.to_ascii_lowercase();
72        if lower.contains("fedora") || lower.contains("koji") {
73            return Some("fedora".to_string());
74        }
75        if lower.contains("centos") {
76            return Some("centos".to_string());
77        }
78        if lower.contains("red hat") || lower.contains("redhat") || lower.contains("ubi") {
79            return Some("rhel".to_string());
80        }
81        if lower.contains("opensuse") {
82            return Some("opensuse".to_string());
83        }
84        if lower.contains("suse") {
85            return Some("suse".to_string());
86        }
87        if lower.contains("openmandriva") || lower.contains("mandriva") {
88            return Some("openmandriva".to_string());
89        }
90        if lower.contains("mariner") {
91            return Some("mariner".to_string());
92        }
93    }
94
95    if let Some(release) = release {
96        let lower = release.to_ascii_lowercase();
97        if lower.contains(".fc") {
98            return Some("fedora".to_string());
99        }
100        if lower.contains(".el") {
101            return Some("rhel".to_string());
102        }
103        if lower.contains("mdv") || lower.contains("mnb") {
104            return Some("openmandriva".to_string());
105        }
106        if lower.contains("suse") {
107            return Some("suse".to_string());
108        }
109    }
110
111    None
112}
113
114fn rpm_header_string(metadata: &PackageMetadata, tag: IndexTag) -> Option<String> {
115    metadata
116        .header
117        .get_entry_data_as_string(tag)
118        .ok()
119        .and_then(|value| {
120            let trimmed = value.trim();
121            if trimmed.is_empty() || trimmed == "(none)" {
122                None
123            } else {
124                Some(trimmed.to_string())
125            }
126        })
127}
128
129fn rpm_header_string_array(metadata: &PackageMetadata, tag: IndexTag) -> Option<Vec<String>> {
130    metadata
131        .header
132        .get_entry_data_as_string_array(tag)
133        .ok()
134        .map(|items| {
135            items
136                .iter()
137                .map(|item| item.trim().to_string())
138                .filter(|item| !item.is_empty() && item != "(none)")
139                .collect::<Vec<_>>()
140        })
141        .filter(|items| !items.is_empty())
142}
143
144fn infer_vcs_url(metadata: &PackageMetadata, source_urls: &[String]) -> Option<String> {
145    if let Ok(vcs) = metadata.get_vcs()
146        && !vcs.trim().is_empty()
147    {
148        return Some(vcs.to_string());
149    }
150
151    source_urls
152        .iter()
153        .find(|url| url.starts_with("git+") || url.contains("src.fedoraproject.org"))
154        .cloned()
155}
156
157fn build_rpm_qualifiers(
158    architecture: Option<&str>,
159    is_source: bool,
160) -> Option<std::collections::HashMap<String, String>> {
161    let mut qualifiers = std::collections::HashMap::new();
162
163    if let Some(arch) = architecture.filter(|arch| !arch.is_empty()) {
164        qualifiers.insert("arch".to_string(), arch.to_string());
165    }
166
167    if is_source {
168        qualifiers.insert("source".to_string(), "true".to_string());
169    }
170
171    (!qualifiers.is_empty()).then_some(qualifiers)
172}
173
174pub(crate) fn is_rpm_archive_extension(path: &Path) -> bool {
175    path.extension()
176        .and_then(|e| e.to_str())
177        .is_some_and(|ext| matches!(ext, "rpm" | "srpm"))
178}
179
180pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
181    if is_rpm_archive_extension(path) {
182        return true;
183    }
184
185    if fs::metadata(path).is_err() {
186        return false;
187    }
188
189    let mut file = match File::open(path) {
190        Ok(file) => file,
191        Err(_) => return false,
192    };
193    let mut magic = [0_u8; 4];
194    file.read_exact(&mut magic).is_ok() && magic == RPM_MAGIC
195}
196
197fn parse_rpm_metadata_only(path: &Path) -> Result<PackageMetadata, String> {
198    let file =
199        File::open(path).map_err(|e| format!("Failed to open RPM file {:?}: {}", path, e))?;
200    let limited_file = file.take(RPM_HEADER_PARSE_LIMIT_BYTES);
201    let mut reader = BufReader::new(limited_file);
202
203    PackageMetadata::parse(&mut reader)
204        .map_err(|e| format!("Failed to parse RPM file {:?}: {}", path, e))
205}
206
207pub(crate) fn extract_rpm_packages(path: &Path) -> Vec<PackageData> {
208    if let Err(e) = fs::metadata(path) {
209        warn!("Cannot stat RPM file {:?}: {}", path, e);
210        return vec![default_package_data()];
211    }
212
213    let metadata = match parse_rpm_metadata_only(path) {
214        Ok(metadata) => metadata,
215        Err(message) => {
216            warn!("{}", message);
217            return vec![default_package_data()];
218        }
219    };
220
221    vec![parse_rpm_package(&metadata, path)]
222}
223
224/// Parser for RPM package archives
225pub struct RpmParser;
226
227impl PackageParser for RpmParser {
228    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
229
230    fn is_match(path: &Path) -> bool {
231        path_looks_like_rpm_archive(path)
232    }
233
234    fn extract_packages(path: &Path) -> Vec<PackageData> {
235        extract_rpm_packages(path)
236    }
237}
238
239pub(crate) fn infer_rpm_namespace_from_filename(path: &Path) -> Option<String> {
240    let filename = path.file_name()?.to_str()?.to_ascii_lowercase();
241
242    if filename.contains(".fc") {
243        return Some("fedora".to_string());
244    }
245    if filename.contains(".el") {
246        return Some("rhel".to_string());
247    }
248    if filename.contains("mdv") || filename.contains("mnb") {
249        return Some("openmandriva".to_string());
250    }
251    if filename.contains("opensuse") {
252        return Some("opensuse".to_string());
253    }
254    if filename.contains("suse") {
255        return Some("suse".to_string());
256    }
257
258    None
259}
260
261fn parse_rpm_package(metadata: &PackageMetadata, path: &Path) -> PackageData {
262    let name = metadata
263        .get_name()
264        .ok()
265        .map(|s| truncate_field(s.to_string()));
266    let version = build_evr_version(metadata).map(truncate_field);
267    let description = metadata
268        .get_description()
269        .ok()
270        .map(|s| truncate_field(s.to_string()));
271    let homepage_url = metadata
272        .get_url()
273        .ok()
274        .map(|s| truncate_field(s.to_string()));
275    let architecture = metadata
276        .get_arch()
277        .ok()
278        .map(|s| truncate_field(s.to_string()));
279    let path_str = path.to_string_lossy();
280    let is_source = metadata.is_source_package()
281        || path_str.ends_with(".src.rpm")
282        || path_str.ends_with(".srpm");
283    let distribution =
284        rpm_header_string(metadata, IndexTag::RPMTAG_DISTRIBUTION).map(truncate_field);
285    let dist_url = rpm_header_string(metadata, IndexTag::RPMTAG_DISTURL).map(truncate_field);
286    let bug_tracking_url = rpm_header_string(metadata, IndexTag::RPMTAG_BUGURL).map(truncate_field);
287    let source_urls =
288        rpm_header_string_array(metadata, IndexTag::RPMTAG_SOURCE).unwrap_or_default();
289    let source_rpm = metadata
290        .get_source_rpm()
291        .ok()
292        .filter(|value| !value.is_empty())
293        .map(|value| truncate_field(value.to_string()));
294    let namespace = infer_rpm_namespace(
295        distribution.as_deref(),
296        metadata.get_vendor().ok(),
297        metadata.get_release().ok(),
298        dist_url.as_deref(),
299    )
300    .or_else(|| infer_rpm_namespace_from_filename(path))
301    .map(truncate_field);
302
303    let mut parties = Vec::new();
304
305    if let Ok(vendor) = metadata.get_vendor()
306        && !vendor.is_empty()
307    {
308        parties.push(Party {
309            r#type: Some("organization".to_string()),
310            role: Some("vendor".to_string()),
311            name: Some(truncate_field(vendor.to_string())),
312            email: None,
313            url: None,
314            organization: None,
315            organization_url: None,
316            timezone: None,
317        });
318    }
319
320    if let Some(distribution_name) = distribution.as_ref() {
321        parties.push(Party {
322            r#type: Some("organization".to_string()),
323            role: Some("distributor".to_string()),
324            name: Some(distribution_name.clone()),
325            email: None,
326            url: None,
327            organization: None,
328            organization_url: None,
329            timezone: None,
330        });
331    }
332
333    if let Ok(packager) = metadata.get_packager()
334        && !packager.is_empty()
335    {
336        let (name_opt, email_opt) = parse_packager(packager);
337        parties.push(Party {
338            r#type: Some("person".to_string()),
339            role: Some("packager".to_string()),
340            name: name_opt.map(truncate_field),
341            email: email_opt.map(truncate_field),
342            url: None,
343            organization: None,
344            organization_url: None,
345            timezone: None,
346        });
347    }
348
349    let extracted_license_statement = metadata
350        .get_license()
351        .ok()
352        .map(|s| truncate_field(s.to_string()));
353    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
354        extracted_license_statement
355            .as_deref()
356            .and_then(normalize_rpm_declared_license)
357            .map(|normalized| {
358                build_declared_license_data(
359                    normalized,
360                    DeclaredLicenseMatchMetadata::single_line(
361                        extracted_license_statement.as_deref().unwrap_or_default(),
362                    ),
363                )
364            })
365            .map(|(expr, spdx, detections)| {
366                (
367                    expr.map(truncate_field),
368                    spdx.map(truncate_field),
369                    detections,
370                )
371            })
372            .unwrap_or_else(empty_declared_license_data);
373
374    let dependencies = extract_rpm_dependencies(metadata, namespace.as_deref());
375
376    let qualifiers = build_rpm_qualifiers(architecture.as_deref(), is_source);
377
378    let mut keywords = Vec::new();
379    if let Ok(group) = metadata.get_group()
380        && !group.is_empty()
381    {
382        keywords.push(truncate_field(group.to_string()));
383    }
384
385    let mut extra_data = std::collections::HashMap::new();
386    if let Some(distribution) = distribution.clone() {
387        extra_data.insert(
388            "distribution".to_string(),
389            serde_json::Value::String(distribution),
390        );
391    }
392    if let Some(dist_url) = dist_url.clone() {
393        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
394    }
395    if let Ok(build_host) = metadata.get_build_host()
396        && !build_host.is_empty()
397    {
398        extra_data.insert(
399            "build_host".to_string(),
400            serde_json::Value::String(build_host.to_string()),
401        );
402    }
403    if let Ok(build_time) = metadata.get_build_time() {
404        extra_data.insert(
405            "build_time".to_string(),
406            serde_json::Value::Number(serde_json::Number::from(build_time)),
407        );
408    }
409    if !source_urls.is_empty() {
410        extra_data.insert(
411            "source_urls".to_string(),
412            serde_json::Value::Array(
413                source_urls
414                    .iter()
415                    .cloned()
416                    .map(serde_json::Value::String)
417                    .collect(),
418            ),
419        );
420    }
421    if let Some(provides) = extract_rpm_relationships(metadata, RpmRelationshipKind::Provides)
422        && !provides.is_empty()
423    {
424        extra_data.insert(
425            "provides".to_string(),
426            serde_json::Value::Array(
427                provides
428                    .into_iter()
429                    .map(serde_json::Value::String)
430                    .collect(),
431            ),
432        );
433    }
434    if let Some(obsoletes) = extract_rpm_relationships(metadata, RpmRelationshipKind::Obsoletes)
435        && !obsoletes.is_empty()
436    {
437        extra_data.insert(
438            "obsoletes".to_string(),
439            serde_json::Value::Array(
440                obsoletes
441                    .into_iter()
442                    .map(serde_json::Value::String)
443                    .collect(),
444            ),
445        );
446    }
447    let vcs_url = infer_vcs_url(metadata, &source_urls).map(truncate_field);
448
449    PackageData {
450        datasource_id: Some(DatasourceId::RpmArchive),
451        package_type: Some(PACKAGE_TYPE),
452        namespace: namespace.clone(),
453        name: name.clone(),
454        version: version.clone(),
455        qualifiers,
456        description,
457        homepage_url,
458        size: metadata.get_installed_size().ok(),
459        parties,
460        keywords,
461        bug_tracking_url,
462        declared_license_expression,
463        declared_license_expression_spdx,
464        license_detections,
465        extracted_license_statement,
466        dependencies,
467        source_packages: source_rpm.into_iter().collect(),
468        vcs_url,
469        extra_data: (!extra_data.is_empty()).then_some(extra_data),
470        purl: name.as_ref().and_then(|n| {
471            build_rpm_purl(
472                n,
473                version.as_deref(),
474                namespace.as_deref(),
475                architecture.as_deref(),
476                is_source,
477            )
478            .map(truncate_field)
479        }),
480        ..Default::default()
481    }
482}
483
484pub(crate) fn normalize_rpm_declared_license(statement: &str) -> Option<NormalizedDeclaredLicense> {
485    let trimmed = statement.trim();
486    if trimmed.is_empty() {
487        return None;
488    }
489
490    let rewritten = canonicalize_rpm_license_statement(trimmed);
491    if let Some(normalized) = normalize_spdx_expression(&rewritten) {
492        return Some(normalized);
493    }
494
495    let is_simple_key = !trimmed.contains(' ')
496        && !trimmed.contains(',')
497        && !trimmed.contains('(')
498        && !trimmed.contains(')');
499    if is_simple_key {
500        return normalize_declared_license_key(trimmed);
501    }
502
503    None
504}
505
506fn canonicalize_rpm_license_statement(statement: &str) -> String {
507    let mut rewritten = statement.trim().to_string();
508
509    for (from, to) in [
510        ("LGPLv2.1+", "LGPL-2.1-or-later"),
511        ("LGPLv2.1", "LGPL-2.1-only"),
512        ("LGPLv2+", "LGPL-2.0-or-later"),
513        ("LGPLv2", "LGPL-2.0-only"),
514        ("LGPLv3+", "LGPL-3.0-or-later"),
515        ("LGPLv3", "LGPL-3.0-only"),
516        ("GPLv2+", "GPL-2.0-or-later"),
517        ("GPLv2", "GPL-2.0-only"),
518        ("GPLv3+", "GPL-3.0-or-later"),
519        ("GPLv3", "GPL-3.0-only"),
520        ("GPLV2+", "GPL-2.0-or-later"),
521        ("MPLv2.0", "MPL-2.0"),
522        ("MPLv1.1", "MPL-1.1"),
523        ("BSD with advertising", "BSD-4-Clause-UC"),
524        ("Public Domain", "LicenseRef-provenant-public-domain"),
525        ("public domain", "LicenseRef-provenant-public-domain"),
526        ("OpenLDAP", "OLDAP-2.8"),
527        ("OpenSSL", "OpenSSL"),
528        ("Sleepycat", "Sleepycat"),
529        ("zlib", "Zlib"),
530        ("Boost", "BSL-1.0"),
531        ("BSD", "BSD-3-Clause"),
532    ] {
533        rewritten = rewritten.replace(from, to);
534    }
535
536    rewritten = RE_RPM_LICENSE_WITH_EXCEPTIONS
537        .replace_all(&rewritten, "")
538        .into_owned();
539    rewritten = RE_RPM_LICENSE_COMMA
540        .replace_all(&rewritten, " AND ")
541        .into_owned();
542    rewritten = RE_RPM_LICENSE_AND
543        .replace_all(&rewritten, " AND ")
544        .into_owned();
545    rewritten = RE_RPM_LICENSE_OR
546        .replace_all(&rewritten, " OR ")
547        .into_owned();
548
549    rewritten.split_whitespace().collect::<Vec<_>>().join(" ")
550}
551
552fn extract_rpm_dependencies(
553    metadata: &PackageMetadata,
554    namespace: Option<&str>,
555) -> Vec<Dependency> {
556    let mut dependencies = Vec::new();
557
558    if let Ok(requires) = metadata.get_requires() {
559        for rpm_dep in requires {
560            if dependencies.len() >= MAX_ITERATION_COUNT {
561                warn!(
562                    "RPM dependency iteration capped at {} items",
563                    MAX_ITERATION_COUNT
564                );
565                break;
566            }
567            let purl = build_rpm_purl(
568                &rpm_dep.name,
569                if rpm_dep.version.is_empty() {
570                    None
571                } else {
572                    Some(&rpm_dep.version)
573                },
574                namespace,
575                None,
576                false,
577            )
578            .map(truncate_field);
579
580            let extracted_requirement = if !rpm_dep.version.is_empty() {
581                Some(truncate_field(format_rpm_requirement(&rpm_dep)))
582            } else {
583                None
584            };
585
586            dependencies.push(Dependency {
587                purl,
588                extracted_requirement,
589                scope: Some("install".to_string()),
590                is_runtime: Some(true),
591                is_optional: Some(false),
592                is_direct: Some(true),
593                resolved_package: None,
594                extra_data: None,
595                is_pinned: Some(!rpm_dep.version.is_empty()),
596            });
597        }
598    }
599
600    dependencies
601}
602
603enum RpmRelationshipKind {
604    Provides,
605    Obsoletes,
606}
607
608fn extract_rpm_relationships(
609    metadata: &PackageMetadata,
610    kind: RpmRelationshipKind,
611) -> Option<Vec<String>> {
612    let relationships = match kind {
613        RpmRelationshipKind::Provides => metadata.get_provides().ok()?,
614        RpmRelationshipKind::Obsoletes => metadata.get_obsoletes().ok()?,
615    };
616
617    let mut count = 0usize;
618    let values: Vec<String> = relationships
619        .into_iter()
620        .take(MAX_ITERATION_COUNT)
621        .map(|dep| format_rpm_requirement(&dep))
622        .filter(|value| !value.is_empty() && value != "(none)")
623        .inspect(|_| count += 1)
624        .collect();
625
626    if count >= MAX_ITERATION_COUNT {
627        warn!(
628            "RPM relationship iteration capped at {} items",
629            MAX_ITERATION_COUNT
630        );
631    }
632
633    (!values.is_empty()).then_some(values)
634}
635
636fn format_rpm_requirement(dep: &rpm::Dependency) -> String {
637    use rpm::DependencyFlags;
638
639    if dep.version.is_empty() {
640        return dep.name.clone();
641    }
642
643    let operator = if dep.flags.contains(DependencyFlags::EQUAL)
644        && dep.flags.contains(DependencyFlags::LESS)
645    {
646        "<="
647    } else if dep.flags.contains(DependencyFlags::EQUAL)
648        && dep.flags.contains(DependencyFlags::GREATER)
649    {
650        ">="
651    } else if dep.flags.contains(DependencyFlags::EQUAL) {
652        "="
653    } else if dep.flags.contains(DependencyFlags::LESS) {
654        "<"
655    } else if dep.flags.contains(DependencyFlags::GREATER) {
656        ">"
657    } else {
658        ""
659    };
660
661    if operator.is_empty() {
662        dep.name.clone()
663    } else {
664        format!("{} {} {}", dep.name, operator, dep.version)
665    }
666}
667
668fn build_evr_version(metadata: &PackageMetadata) -> Option<String> {
669    let version = metadata.get_version().ok()?;
670    let release = metadata.get_release().ok();
671
672    let mut evr = String::from(version);
673
674    if let Some(r) = release {
675        evr.push('-');
676        evr.push_str(r);
677    }
678
679    Some(evr)
680}
681
682fn parse_packager(packager: &str) -> (Option<String>, Option<String>) {
683    if let Some(email_start) = packager.find('<') {
684        let name = packager[..email_start].trim();
685        if let Some(email_end) = packager.find('>') {
686            let email = &packager[email_start + 1..email_end];
687            return (Some(name.to_string()), Some(email.to_string()));
688        }
689    }
690    (Some(packager.to_string()), None)
691}
692
693fn build_rpm_purl(
694    name: &str,
695    version: Option<&str>,
696    namespace: Option<&str>,
697    architecture: Option<&str>,
698    is_source: bool,
699) -> Option<String> {
700    use packageurl::PackageUrl;
701
702    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
703
704    if let Some(ns) = namespace {
705        purl.with_namespace(ns).ok()?;
706    }
707
708    if let Some(ver) = version {
709        purl.with_version(ver).ok()?;
710    }
711
712    if let Some(arch) = architecture {
713        purl.add_qualifier("arch", arch).ok()?;
714    }
715
716    if is_source {
717        purl.add_qualifier("source", "true").ok()?;
718    }
719
720    Some(purl.to_string())
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726    use std::fs;
727    use std::path::PathBuf;
728    use tempfile::NamedTempFile;
729
730    fn build_sparse_oversized_rpm(name: &str) -> PathBuf {
731        let package = rpm::PackageBuilder::new(name, "1.0", "MIT", "x86_64", "Demo RPM package")
732            .release("1")
733            .build()
734            .unwrap();
735
736        let temp_file = NamedTempFile::new().unwrap();
737        package.write_file(temp_file.path()).unwrap();
738        let oversized_len = MAX_MANIFEST_SIZE + 1_048_576;
739        fs::OpenOptions::new()
740            .write(true)
741            .open(temp_file.path())
742            .unwrap()
743            .set_len(oversized_len)
744            .unwrap();
745
746        temp_file.into_temp_path().keep().unwrap()
747    }
748
749    #[test]
750    fn test_rpm_parser_is_match() {
751        assert!(RpmParser::is_match(&PathBuf::from("package.rpm")));
752        assert!(RpmParser::is_match(&PathBuf::from("package.srpm")));
753        assert!(RpmParser::is_match(&PathBuf::from(
754            "test-1.0-1.el7.x86_64.rpm"
755        )));
756        assert!(!RpmParser::is_match(&PathBuf::from("package.deb")));
757        assert!(!RpmParser::is_match(&PathBuf::from("package.tar.gz")));
758    }
759
760    #[test]
761    fn test_rpm_parser_matches_hash_named_source_rpm_by_magic() {
762        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
763        if !source_fixture.exists() {
764            return;
765        }
766
767        let temp_file = NamedTempFile::new().unwrap();
768        fs::copy(&source_fixture, temp_file.path()).unwrap();
769
770        assert!(RpmParser::is_match(temp_file.path()));
771    }
772
773    #[test]
774    fn test_rpm_parser_matches_pack_named_rpm_by_magic() {
775        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
776        if !source_fixture.exists() {
777            return;
778        }
779
780        let temp_dir = tempfile::TempDir::new().unwrap();
781        let pack_path = temp_dir.path().join("setup-2.5.49-b1.src.pack");
782        fs::copy(&source_fixture, &pack_path).unwrap();
783
784        assert!(RpmParser::is_match(&pack_path));
785        assert!(path_looks_like_rpm_archive(&pack_path));
786    }
787
788    #[test]
789    fn test_build_evr_version_simple() {
790        let evr = "1.0-1";
791        assert_eq!(evr, "1.0-1");
792    }
793
794    #[test]
795    fn test_build_evr_version_with_epoch() {
796        let evr = "2:1.0-1";
797        assert!(evr.starts_with("2:"));
798    }
799
800    #[test]
801    fn test_parse_packager() {
802        let (name, email) = parse_packager("John Doe <john@example.com>");
803        assert_eq!(name, Some("John Doe".to_string()));
804        assert_eq!(email, Some("john@example.com".to_string()));
805
806        let (name2, email2) = parse_packager("Plain Name");
807        assert_eq!(name2, Some("Plain Name".to_string()));
808        assert_eq!(email2, None);
809    }
810
811    #[test]
812    fn test_build_rpm_purl() {
813        let purl = build_rpm_purl(
814            "bash",
815            Some("4.4.19-1.el7"),
816            Some("fedora"),
817            Some("x86_64"),
818            false,
819        );
820        assert!(purl.is_some());
821        let purl_str = purl.unwrap();
822        assert!(purl_str.contains("pkg:rpm/fedora/bash"));
823        assert!(purl_str.contains("4.4.19-1.el7"));
824        assert!(purl_str.contains("arch=x86_64"));
825    }
826
827    #[test]
828    fn test_parse_real_rpm() {
829        let test_file = PathBuf::from("testdata/rpm/Eterm-0.9.3-5mdv2007.0.rpm");
830        if !test_file.exists() {
831            eprintln!("Warning: Test file not found, skipping test");
832            return;
833        }
834
835        let pkg = RpmParser::extract_first_package(&test_file);
836
837        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
838
839        if pkg.name.is_some() {
840            assert_eq!(pkg.name, Some("Eterm".to_string()));
841            assert!(pkg.version.is_some());
842        }
843    }
844
845    #[test]
846    fn test_parse_oversized_rpm_from_headers_only() {
847        let test_file = build_sparse_oversized_rpm("oversized-demo");
848
849        assert!(RpmParser::is_match(&test_file));
850
851        let pkg = RpmParser::extract_first_package(&test_file);
852
853        assert_eq!(pkg.datasource_id, Some(DatasourceId::RpmArchive));
854        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
855        assert_eq!(pkg.name.as_deref(), Some("oversized-demo"));
856        assert_eq!(pkg.version.as_deref(), Some("1.0-1"));
857
858        fs::remove_file(test_file).unwrap();
859    }
860
861    #[test]
862    fn test_build_rpm_purl_no_namespace() {
863        let purl = build_rpm_purl("package", Some("1.0-1"), None, Some("x86_64"), false);
864        assert!(purl.is_some());
865        let purl_str = purl.unwrap();
866        assert!(purl_str.starts_with("pkg:rpm/package@"));
867        assert!(purl_str.contains("arch=x86_64"));
868    }
869
870    #[test]
871    fn test_rpm_dependency_extraction() {
872        use rpm::{Dependency as RpmDependency, DependencyFlags};
873
874        let rpm_dep = RpmDependency {
875            name: "libc.so.6".to_string(),
876            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
877            version: "2.2.5".to_string(),
878        };
879
880        let formatted = format_rpm_requirement(&rpm_dep);
881        assert_eq!(formatted, "libc.so.6 >= 2.2.5");
882
883        let rpm_dep_no_version = RpmDependency {
884            name: "bash".to_string(),
885            flags: DependencyFlags::ANY,
886            version: String::new(),
887        };
888
889        let formatted_no_ver = format_rpm_requirement(&rpm_dep_no_version);
890        assert_eq!(formatted_no_ver, "bash");
891    }
892
893    #[test]
894    fn test_parse_packager_with_parentheses() {
895        let (name, email) = parse_packager("John Doe (Company) <john@example.com>");
896        assert_eq!(name, Some("John Doe (Company)".to_string()));
897        assert_eq!(email, Some("john@example.com".to_string()));
898    }
899
900    #[test]
901    fn test_parse_packager_email_only() {
902        let (name, email) = parse_packager("<noreply@example.com>");
903        assert!(name.is_none() || name == Some(String::new()));
904        assert_eq!(email, Some("noreply@example.com".to_string()));
905    }
906
907    #[test]
908    fn test_rpm_fping_package() {
909        let test_file = PathBuf::from("testdata/rpm/fping-2.4b2-10.fc12.x86_64.rpm");
910        if !test_file.exists() {
911            return;
912        }
913
914        let pkg = RpmParser::extract_first_package(&test_file);
915        if pkg.name.is_some() {
916            assert_eq!(pkg.name, Some("fping".to_string()));
917            assert!(pkg.version.is_some());
918        }
919    }
920
921    #[test]
922    fn test_rpm_archive_extracts_additional_metadata_fields() {
923        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
924        if !test_file.exists() {
925            return;
926        }
927
928        let pkg = RpmParser::extract_first_package(&test_file);
929
930        assert_eq!(pkg.name.as_deref(), Some("setup"));
931        assert_eq!(
932            pkg.qualifiers
933                .as_ref()
934                .and_then(|q| q.get("arch"))
935                .map(String::as_str),
936            Some("noarch")
937        );
938        assert!(!pkg.keywords.is_empty());
939        assert!(pkg.size.is_some());
940        assert!(
941            pkg.parties
942                .iter()
943                .any(|party| party.role.as_deref() == Some("packager"))
944        );
945        assert!(
946            pkg.qualifiers
947                .as_ref()
948                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
949        );
950    }
951
952    #[test]
953    fn test_source_rpm_sets_source_qualifier() {
954        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
955        if !test_file.exists() {
956            return;
957        }
958
959        let pkg = RpmParser::extract_first_package(&test_file);
960
961        assert!(
962            pkg.qualifiers
963                .as_ref()
964                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
965        );
966        assert!(
967            pkg.purl
968                .as_ref()
969                .is_some_and(|purl| purl.contains("source=true"))
970        );
971    }
972
973    #[test]
974    fn test_rpm_archive_extracts_vcs_and_source_metadata() {
975        let package = rpm::PackageBuilder::new(
976            "thunar-sendto-clamtk",
977            "0.08",
978            "GPL-2.0-or-later",
979            "noarch",
980            "Simple virus scanning extension for Thunar",
981        )
982        .release("2.fc40")
983        .vendor("Fedora Project")
984        .packager("Fedora Release Engineering <releng@fedoraproject.org>")
985        .group("Applications/System")
986        .vcs("git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e")
987        .build()
988        .unwrap();
989
990        let temp_file = NamedTempFile::new().unwrap();
991        package.write_file(temp_file.path()).unwrap();
992
993        let pkg = RpmParser::extract_first_package(temp_file.path());
994
995        assert_eq!(pkg.namespace.as_deref(), Some("fedora"));
996        assert_eq!(
997            pkg.vcs_url.as_deref(),
998            Some(
999                "git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e",
1000            )
1001        );
1002        assert!(
1003            pkg.extra_data
1004                .as_ref()
1005                .is_some_and(|extra| extra.contains_key("build_time"))
1006        );
1007        assert!(!pkg.keywords.is_empty());
1008    }
1009
1010    #[test]
1011    fn test_rpm_archive_preserves_provides_and_obsoletes_relationships() {
1012        use rpm::{Dependency as RpmDependency, DependencyFlags};
1013
1014        let package = rpm::PackageBuilder::new(
1015            "demo-rpm",
1016            "1.0.0",
1017            "MIT",
1018            "noarch",
1019            "RPM relationship metadata fixture",
1020        )
1021        .release("1")
1022        .provides(RpmDependency {
1023            name: "demo-rpm-virtual".to_string(),
1024            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
1025            version: "1.0.0".to_string(),
1026        })
1027        .obsoletes(RpmDependency {
1028            name: "old-demo-rpm".to_string(),
1029            flags: DependencyFlags::LESS,
1030            version: "0.9.0".to_string(),
1031        })
1032        .build()
1033        .unwrap();
1034
1035        let temp_file = NamedTempFile::new().unwrap();
1036        package.write_file(temp_file.path()).unwrap();
1037
1038        let pkg = RpmParser::extract_first_package(temp_file.path());
1039        let extra = pkg.extra_data.as_ref().expect("extra_data should exist");
1040
1041        let provides = extra
1042            .get("provides")
1043            .and_then(|value| value.as_array())
1044            .expect("provides should be present");
1045        assert!(
1046            provides
1047                .iter()
1048                .any(|value| value.as_str() == Some("demo-rpm-virtual >= 1.0.0"))
1049        );
1050
1051        let obsoletes = extra
1052            .get("obsoletes")
1053            .and_then(|value| value.as_array())
1054            .expect("obsoletes should be present");
1055        assert!(
1056            obsoletes
1057                .iter()
1058                .any(|value| value.as_str() == Some("old-demo-rpm < 0.9.0"))
1059        );
1060    }
1061
1062    #[test]
1063    fn test_rpm_archive_normalizes_declared_license_expression() {
1064        let package = rpm::PackageBuilder::new(
1065            "demo-license",
1066            "1.0.0",
1067            "LGPLv2",
1068            "noarch",
1069            "RPM declared license normalization fixture",
1070        )
1071        .release("1")
1072        .build()
1073        .unwrap();
1074
1075        let temp_file = NamedTempFile::new().unwrap();
1076        package.write_file(temp_file.path()).unwrap();
1077
1078        let pkg = RpmParser::extract_first_package(temp_file.path());
1079
1080        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("LGPLv2"));
1081        assert_eq!(
1082            pkg.declared_license_expression.as_deref(),
1083            Some("lgpl-2.0-only")
1084        );
1085        assert_eq!(
1086            pkg.declared_license_expression_spdx.as_deref(),
1087            Some("LGPL-2.0-only")
1088        );
1089        assert_eq!(pkg.license_detections.len(), 1);
1090        assert_eq!(
1091            pkg.license_detections[0].license_expression_spdx,
1092            "LGPL-2.0-only"
1093        );
1094        assert_eq!(
1095            pkg.license_detections[0].matches[0].matched_text.as_deref(),
1096            Some("LGPLv2")
1097        );
1098    }
1099
1100    #[test]
1101    fn test_rpm_archive_normalizes_public_domain_declared_license_expression() {
1102        let package = rpm::PackageBuilder::new(
1103            "demo-public-domain",
1104            "1.0.0",
1105            "public domain",
1106            "noarch",
1107            "RPM public domain normalization fixture",
1108        )
1109        .release("1")
1110        .build()
1111        .unwrap();
1112
1113        let temp_file = NamedTempFile::new().unwrap();
1114        package.write_file(temp_file.path()).unwrap();
1115
1116        let pkg = RpmParser::extract_first_package(temp_file.path());
1117
1118        assert_eq!(
1119            pkg.extracted_license_statement.as_deref(),
1120            Some("public domain")
1121        );
1122        assert_eq!(
1123            pkg.declared_license_expression.as_deref(),
1124            Some("licenseref-provenant-public-domain")
1125        );
1126        assert_eq!(
1127            pkg.declared_license_expression_spdx.as_deref(),
1128            Some("LicenseRef-provenant-public-domain")
1129        );
1130        assert_eq!(pkg.license_detections.len(), 1);
1131    }
1132
1133    #[test]
1134    fn test_normalize_rpm_declared_license_rewrites_compound_aliases() {
1135        let normalized = normalize_rpm_declared_license("BSD and GPLv2+")
1136            .expect("compound RPM license should normalize");
1137
1138        assert_eq!(
1139            normalized.declared_license_expression_spdx,
1140            "BSD-3-Clause AND GPL-2.0-or-later"
1141        );
1142    }
1143}
1144
1145crate::register_parser!(
1146    "RPM package archive",
1147    &["**/*.rpm", "**/*.srpm"],
1148    "rpm",
1149    "",
1150    Some("https://rpm.org/"),
1151);