Skip to main content

provenant/parsers/
rpm_parser.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM package archives.
5//!
6//! Extracts package metadata and dependencies from binary RPM package (.rpm) files
7//! by reading the embedded header metadata.
8//!
9//! # Supported Formats
10//! - *.rpm (binary RPM package archives)
11//!
12//! # Key Features
13//! - Metadata extraction from RPM headers (name, version, release, architecture)
14//! - Dependency extraction (requires, provides, obsoletes)
15//! - License and distribution information parsing
16//! - Package URL (purl) generation for installed packages
17//! - Graceful handling of malformed or corrupted RPM files
18//!
19//! # Implementation Notes
20//! - Uses `rpm` crate for low-level RPM format parsing
21//! - RPM architecture is captured as namespace in metadata
22//! - Direct dependency tracking (all requires are direct)
23//! - Error handling with `warn!()` logs on parse failures
24
25use std::fs::{self, File};
26use std::io::{BufReader, Read};
27use std::path::Path;
28use std::sync::LazyLock;
29
30use crate::parser_warn as warn;
31use regex::Regex;
32use rpm::{
33    HEADER_MAGIC, INDEX_ENTRY_SIZE, INDEX_HEADER_SIZE, IndexTag, LEAD_SIZE, PackageMetadata,
34    RPM_MAGIC,
35};
36
37use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
38use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
39
40use super::PackageParser;
41use super::license_normalization::{
42    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
43    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
44};
45
46const PACKAGE_TYPE: PackageType = PackageType::Rpm;
47const RPM_HEADER_PARSE_LIMIT_BYTES: u64 = MAX_MANIFEST_SIZE.saturating_add(1);
48
49static RE_RPM_LICENSE_AND: LazyLock<Regex> =
50    LazyLock::new(|| Regex::new(r"(?i)\s+and\s+").expect("valid RPM license AND regex"));
51static RE_RPM_LICENSE_OR: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r"(?i)\s+or\s+").expect("valid RPM license OR regex"));
53static RE_RPM_LICENSE_COMMA: LazyLock<Regex> =
54    LazyLock::new(|| Regex::new(r"\s*,\s*").expect("valid RPM license comma regex"));
55static RE_RPM_LICENSE_WITH_EXCEPTIONS: LazyLock<Regex> = LazyLock::new(|| {
56    Regex::new(r"(?i)\s+with\s+exceptions\b").expect("valid RPM license exceptions regex")
57});
58
59fn default_package_data() -> PackageData {
60    PackageData {
61        package_type: Some(PACKAGE_TYPE),
62        datasource_id: Some(DatasourceId::RpmArchive),
63        ..Default::default()
64    }
65}
66
67pub(crate) fn infer_rpm_namespace(
68    distribution: Option<&str>,
69    vendor: Option<&str>,
70    release: Option<&str>,
71    dist_url: Option<&str>,
72) -> Option<String> {
73    for candidate in [distribution, vendor, dist_url].into_iter().flatten() {
74        let lower = candidate.to_ascii_lowercase();
75        if lower.contains("fedora") || lower.contains("koji") {
76            return Some("fedora".to_string());
77        }
78        if lower.contains("centos") {
79            return Some("centos".to_string());
80        }
81        if lower.contains("red hat") || lower.contains("redhat") || lower.contains("ubi") {
82            return Some("rhel".to_string());
83        }
84        if lower.contains("opensuse") {
85            return Some("opensuse".to_string());
86        }
87        if lower.contains("suse") {
88            return Some("suse".to_string());
89        }
90        if lower.contains("openmandriva") || lower.contains("mandriva") {
91            return Some("openmandriva".to_string());
92        }
93        if lower.contains("mariner") {
94            return Some("mariner".to_string());
95        }
96    }
97
98    if let Some(release) = release {
99        let lower = release.to_ascii_lowercase();
100        if lower.contains(".fc") {
101            return Some("fedora".to_string());
102        }
103        if lower.contains(".el") {
104            return Some("rhel".to_string());
105        }
106        if lower.contains("mdv") || lower.contains("mnb") {
107            return Some("openmandriva".to_string());
108        }
109        if lower.contains("suse") {
110            return Some("suse".to_string());
111        }
112    }
113
114    None
115}
116
117fn rpm_header_string(metadata: &PackageMetadata, tag: IndexTag) -> Option<String> {
118    metadata
119        .header
120        .get_entry_data_as_string(tag)
121        .ok()
122        .and_then(|value| {
123            let trimmed = value.trim();
124            if trimmed.is_empty() || trimmed == "(none)" {
125                None
126            } else {
127                Some(trimmed.to_string())
128            }
129        })
130}
131
132fn rpm_header_string_array(metadata: &PackageMetadata, tag: IndexTag) -> Option<Vec<String>> {
133    metadata
134        .header
135        .get_entry_data_as_string_array(tag)
136        .ok()
137        .map(|items| {
138            items
139                .iter()
140                .map(|item| item.trim().to_string())
141                .filter(|item| !item.is_empty() && item != "(none)")
142                .collect::<Vec<_>>()
143        })
144        .filter(|items| !items.is_empty())
145}
146
147fn infer_vcs_url(metadata: &PackageMetadata, source_urls: &[String]) -> Option<String> {
148    if let Ok(vcs) = metadata.get_vcs()
149        && !vcs.trim().is_empty()
150    {
151        return Some(vcs.to_string());
152    }
153
154    source_urls
155        .iter()
156        .find(|url| url.starts_with("git+") || url.contains("src.fedoraproject.org"))
157        .cloned()
158}
159
160fn build_rpm_qualifiers(
161    architecture: Option<&str>,
162    is_source: bool,
163) -> Option<std::collections::HashMap<String, String>> {
164    let mut qualifiers = std::collections::HashMap::new();
165
166    if let Some(arch) = architecture.filter(|arch| !arch.is_empty()) {
167        qualifiers.insert("arch".to_string(), arch.to_string());
168    }
169
170    if is_source {
171        qualifiers.insert("source".to_string(), "true".to_string());
172    }
173
174    (!qualifiers.is_empty()).then_some(qualifiers)
175}
176
177pub(crate) fn is_rpm_archive_extension(path: &Path) -> bool {
178    path.extension()
179        .and_then(|e| e.to_str())
180        .is_some_and(|ext| matches!(ext, "rpm" | "srpm"))
181}
182
183pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
184    if is_rpm_archive_extension(path) {
185        return true;
186    }
187
188    if fs::metadata(path).is_err() {
189        return false;
190    }
191
192    let mut file = match File::open(path) {
193        Ok(file) => file,
194        Err(_) => return false,
195    };
196    let mut magic = [0_u8; 4];
197    file.read_exact(&mut magic).is_ok() && magic == RPM_MAGIC
198}
199
200fn parse_rpm_metadata_only(path: &Path) -> Result<PackageMetadata, String> {
201    let file =
202        File::open(path).map_err(|e| format!("Failed to open RPM file {:?}: {}", path, e))?;
203    let limited_file = file.take(RPM_HEADER_PARSE_LIMIT_BYTES);
204    let mut reader = BufReader::new(limited_file);
205
206    PackageMetadata::parse(&mut reader)
207        .map_err(|e| format!("Failed to parse RPM file {:?}: {}", path, e))
208}
209
210#[derive(Debug, Clone, Copy)]
211struct RpmHeaderEntryView {
212    tag: u32,
213    data_type: u32,
214    offset: usize,
215    num_items: usize,
216}
217
218struct ParsedRpmHeader<'a> {
219    entries: Vec<RpmHeaderEntryView>,
220    store: &'a [u8],
221}
222
223#[derive(Default)]
224struct SalvagedRpmFields {
225    name: Option<String>,
226    version: Option<String>,
227    release: Option<String>,
228    summary: Option<String>,
229    description: Option<String>,
230    distribution: Option<String>,
231    vendor: Option<String>,
232    license: Option<String>,
233    packager: Option<String>,
234    group: Option<String>,
235    url: Option<String>,
236    arch: Option<String>,
237    source_rpm: Option<String>,
238    dist_url: Option<String>,
239}
240
241fn read_rpm_header_bytes(path: &Path) -> Result<Vec<u8>, String> {
242    let file =
243        File::open(path).map_err(|e| format!("Failed to open RPM file {:?}: {}", path, e))?;
244    let mut limited_file = file.take(RPM_HEADER_PARSE_LIMIT_BYTES);
245    let mut bytes = Vec::new();
246    limited_file
247        .read_to_end(&mut bytes)
248        .map_err(|e| format!("Failed to read RPM file {:?}: {}", path, e))?;
249    Ok(bytes)
250}
251
252fn parse_index_header(bytes: &[u8], offset: usize) -> Option<(usize, usize)> {
253    let header = bytes.get(offset..offset + INDEX_HEADER_SIZE as usize)?;
254    if header.get(..3)? != HEADER_MAGIC {
255        return None;
256    }
257    if header.get(3).copied()? != 1 {
258        return None;
259    }
260
261    let num_entries = u32::from_be_bytes(header.get(8..12)?.try_into().ok()?) as usize;
262    let data_section_size = u32::from_be_bytes(header.get(12..16)?.try_into().ok()?) as usize;
263    Some((num_entries, data_section_size))
264}
265
266fn parse_header_entries<'a>(
267    bytes: &'a [u8],
268    offset: usize,
269    allow_truncated_store: bool,
270) -> Option<(ParsedRpmHeader<'a>, usize)> {
271    let (num_entries, data_section_size) = parse_index_header(bytes, offset)?;
272    let entries_offset = offset.checked_add(INDEX_HEADER_SIZE as usize)?;
273    let entries_size = num_entries.checked_mul(INDEX_ENTRY_SIZE as usize)?;
274    let store_offset = entries_offset.checked_add(entries_size)?;
275    bytes.get(entries_offset..store_offset)?;
276    let store_end = store_offset.checked_add(data_section_size)?;
277    let store = if allow_truncated_store {
278        bytes.get(store_offset..).unwrap_or(&[])
279    } else {
280        bytes.get(store_offset..store_end)?
281    };
282
283    let mut entries = Vec::with_capacity(num_entries);
284    for index in 0..num_entries {
285        let entry_offset =
286            entries_offset.checked_add(index.checked_mul(INDEX_ENTRY_SIZE as usize)?)?;
287        let entry = bytes.get(entry_offset..entry_offset + INDEX_ENTRY_SIZE as usize)?;
288        entries.push(RpmHeaderEntryView {
289            tag: u32::from_be_bytes(entry.get(0..4)?.try_into().ok()?),
290            data_type: u32::from_be_bytes(entry.get(4..8)?.try_into().ok()?),
291            offset: u32::from_be_bytes(entry.get(8..12)?.try_into().ok()?) as usize,
292            num_items: u32::from_be_bytes(entry.get(12..16)?.try_into().ok()?) as usize,
293        });
294    }
295
296    Some((ParsedRpmHeader { entries, store }, store_end))
297}
298
299fn parse_main_rpm_header(bytes: &[u8]) -> Option<ParsedRpmHeader<'_>> {
300    if bytes.get(..RPM_MAGIC.len())? != RPM_MAGIC {
301        return None;
302    }
303
304    let (_, signature_end) = parse_header_entries(bytes, LEAD_SIZE as usize, false)?;
305    let signature_padding = (8 - (signature_end - (LEAD_SIZE as usize)) % 8) % 8;
306    let main_header_offset = signature_end.checked_add(signature_padding)?;
307    let (header, _) = parse_header_entries(bytes, main_header_offset, true)?;
308    Some(header)
309}
310
311fn read_header_string(store: &[u8], offset: usize) -> Option<(String, usize)> {
312    let remaining = store.get(offset..)?;
313    let nul = remaining.iter().position(|byte| *byte == 0)?;
314    let text = String::from_utf8_lossy(&remaining[..nul])
315        .trim()
316        .to_string();
317    let next_offset = offset.checked_add(nul)?.checked_add(1)?;
318    if text.is_empty() || text == "(none)" {
319        None
320    } else {
321        Some((text, next_offset))
322    }
323}
324
325fn read_entry_first_string(header: &ParsedRpmHeader<'_>, tag: u32) -> Option<String> {
326    let entry = header.entries.iter().find(|entry| entry.tag == tag)?;
327    match entry.data_type {
328        6 => read_header_string(header.store, entry.offset).map(|(value, _)| value),
329        8 | 9 => {
330            let mut offset = entry.offset;
331            let mut first_value = None;
332            for _ in 0..entry.num_items {
333                let (value, next_offset) = read_header_string(header.store, offset)?;
334                first_value.get_or_insert(value);
335                offset = next_offset;
336            }
337            first_value
338        }
339        _ => None,
340    }
341}
342
343fn salvage_rpm_header_fields(path: &Path) -> Option<SalvagedRpmFields> {
344    let bytes = read_rpm_header_bytes(path).ok()?;
345    let header = parse_main_rpm_header(&bytes)?;
346
347    Some(SalvagedRpmFields {
348        name: read_entry_first_string(&header, IndexTag::RPMTAG_NAME as u32).map(truncate_field),
349        version: read_entry_first_string(&header, IndexTag::RPMTAG_VERSION as u32)
350            .map(truncate_field),
351        release: read_entry_first_string(&header, IndexTag::RPMTAG_RELEASE as u32)
352            .map(truncate_field),
353        summary: read_entry_first_string(&header, IndexTag::RPMTAG_SUMMARY as u32)
354            .map(truncate_field),
355        description: read_entry_first_string(&header, IndexTag::RPMTAG_DESCRIPTION as u32)
356            .map(truncate_field),
357        distribution: read_entry_first_string(&header, IndexTag::RPMTAG_DISTRIBUTION as u32)
358            .map(truncate_field),
359        vendor: read_entry_first_string(&header, IndexTag::RPMTAG_VENDOR as u32)
360            .map(truncate_field),
361        license: read_entry_first_string(&header, IndexTag::RPMTAG_LICENSE as u32)
362            .map(truncate_field),
363        packager: read_entry_first_string(&header, IndexTag::RPMTAG_PACKAGER as u32)
364            .map(truncate_field),
365        group: read_entry_first_string(&header, IndexTag::RPMTAG_GROUP as u32).map(truncate_field),
366        url: read_entry_first_string(&header, IndexTag::RPMTAG_URL as u32).map(truncate_field),
367        arch: read_entry_first_string(&header, IndexTag::RPMTAG_ARCH as u32).map(truncate_field),
368        source_rpm: read_entry_first_string(&header, IndexTag::RPMTAG_SOURCERPM as u32)
369            .map(truncate_field),
370        dist_url: read_entry_first_string(&header, IndexTag::RPMTAG_DISTURL as u32)
371            .map(truncate_field),
372    })
373}
374
375fn build_salvaged_rpm_package(path: &Path, fields: SalvagedRpmFields) -> Option<PackageData> {
376    let name = fields.name?;
377    let mut version = fields.version;
378    if let Some(release) = fields.release.as_deref() {
379        let mut evr = version.take().unwrap_or_default();
380        if !evr.is_empty() {
381            evr.push('-');
382        }
383        evr.push_str(release);
384        version = Some(truncate_field(evr));
385    }
386
387    let namespace = infer_rpm_namespace(
388        fields.distribution.as_deref(),
389        fields.vendor.as_deref(),
390        fields.release.as_deref(),
391        fields.dist_url.as_deref(),
392    )
393    .or_else(|| infer_rpm_namespace_from_filename(path))
394    .map(truncate_field);
395    let is_source =
396        path.to_string_lossy().ends_with(".src.rpm") || path.to_string_lossy().ends_with(".srpm");
397    let qualifiers = build_rpm_qualifiers(fields.arch.as_deref(), is_source);
398
399    let mut parties = Vec::new();
400    if let Some(vendor) = fields.vendor.clone() {
401        parties.push(Party {
402            r#type: Some("organization".to_string()),
403            role: Some("vendor".to_string()),
404            name: Some(vendor),
405            email: None,
406            url: None,
407            organization: None,
408            organization_url: None,
409            timezone: None,
410        });
411    }
412    if let Some(distribution) = fields.distribution.clone() {
413        parties.push(Party {
414            r#type: Some("organization".to_string()),
415            role: Some("distributor".to_string()),
416            name: Some(distribution),
417            email: None,
418            url: None,
419            organization: None,
420            organization_url: None,
421            timezone: None,
422        });
423    }
424    if let Some(packager) = fields.packager.as_deref() {
425        let (name_opt, email_opt) = parse_packager(packager);
426        parties.push(Party {
427            r#type: Some("person".to_string()),
428            role: Some("packager".to_string()),
429            name: name_opt.map(truncate_field),
430            email: email_opt.map(truncate_field),
431            url: None,
432            organization: None,
433            organization_url: None,
434            timezone: None,
435        });
436    }
437
438    let extracted_license_statement = fields.license.map(truncate_field);
439    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
440        extracted_license_statement
441            .as_deref()
442            .and_then(normalize_rpm_declared_license)
443            .map(|normalized| {
444                build_declared_license_data(
445                    normalized,
446                    DeclaredLicenseMatchMetadata::single_line(
447                        extracted_license_statement.as_deref().unwrap_or_default(),
448                    ),
449                )
450            })
451            .map(|(expr, spdx, detections)| {
452                (
453                    expr.map(truncate_field),
454                    spdx.map(truncate_field),
455                    detections,
456                )
457            })
458            .unwrap_or_else(empty_declared_license_data);
459
460    let mut extra_data = std::collections::HashMap::new();
461    if let Some(distribution) = fields.distribution.clone() {
462        extra_data.insert(
463            "distribution".to_string(),
464            serde_json::Value::String(distribution),
465        );
466    }
467    if let Some(dist_url) = fields.dist_url.clone() {
468        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
469    }
470
471    Some(PackageData {
472        datasource_id: Some(DatasourceId::RpmArchive),
473        package_type: Some(PACKAGE_TYPE),
474        namespace: namespace.clone(),
475        name: Some(name.clone()),
476        version: version.clone(),
477        qualifiers,
478        description: fields.description.or(fields.summary),
479        homepage_url: fields.url,
480        parties,
481        keywords: fields.group.into_iter().collect(),
482        declared_license_expression,
483        declared_license_expression_spdx,
484        license_detections,
485        extracted_license_statement,
486        source_packages: fields.source_rpm.into_iter().collect(),
487        extra_data: (!extra_data.is_empty()).then_some(extra_data),
488        purl: build_rpm_purl(
489            &name,
490            version.as_deref(),
491            namespace.as_deref(),
492            fields.arch.as_deref(),
493            is_source,
494        )
495        .map(truncate_field),
496        ..Default::default()
497    })
498}
499
500pub(crate) fn extract_rpm_packages(path: &Path) -> Vec<PackageData> {
501    if let Err(e) = fs::metadata(path) {
502        warn!("Cannot stat RPM file {:?}: {}", path, e);
503        return vec![default_package_data()];
504    }
505
506    let metadata = match parse_rpm_metadata_only(path) {
507        Ok(metadata) => metadata,
508        Err(message) => {
509            if let Some(package) = salvage_rpm_header_fields(path)
510                .and_then(|fields| build_salvaged_rpm_package(path, fields))
511            {
512                return vec![package];
513            }
514            warn!("{}", message);
515            return vec![default_package_data()];
516        }
517    };
518
519    vec![parse_rpm_package(&metadata, path)]
520}
521
522/// Parser for RPM package archives
523pub struct RpmParser;
524
525impl PackageParser for RpmParser {
526    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
527
528    fn is_match(path: &Path) -> bool {
529        path_looks_like_rpm_archive(path)
530    }
531
532    fn extract_packages(path: &Path) -> Vec<PackageData> {
533        extract_rpm_packages(path)
534    }
535}
536
537pub(crate) fn infer_rpm_namespace_from_filename(path: &Path) -> Option<String> {
538    let filename = path.file_name()?.to_str()?.to_ascii_lowercase();
539
540    if filename.contains(".fc") {
541        return Some("fedora".to_string());
542    }
543    if filename.contains(".el") {
544        return Some("rhel".to_string());
545    }
546    if filename.contains("mdv") || filename.contains("mnb") {
547        return Some("openmandriva".to_string());
548    }
549    if filename.contains("opensuse") {
550        return Some("opensuse".to_string());
551    }
552    if filename.contains("suse") {
553        return Some("suse".to_string());
554    }
555
556    None
557}
558
559fn parse_rpm_package(metadata: &PackageMetadata, path: &Path) -> PackageData {
560    let name = metadata
561        .get_name()
562        .ok()
563        .map(|s| truncate_field(s.to_string()));
564    let version = build_evr_version(metadata).map(truncate_field);
565    let description = metadata
566        .get_description()
567        .ok()
568        .map(|s| truncate_field(s.to_string()));
569    let homepage_url = metadata
570        .get_url()
571        .ok()
572        .map(|s| truncate_field(s.to_string()));
573    let architecture = metadata
574        .get_arch()
575        .ok()
576        .map(|s| truncate_field(s.to_string()));
577    let path_str = path.to_string_lossy();
578    let is_source = metadata.is_source_package()
579        || path_str.ends_with(".src.rpm")
580        || path_str.ends_with(".srpm");
581    let distribution =
582        rpm_header_string(metadata, IndexTag::RPMTAG_DISTRIBUTION).map(truncate_field);
583    let dist_url = rpm_header_string(metadata, IndexTag::RPMTAG_DISTURL).map(truncate_field);
584    let bug_tracking_url = rpm_header_string(metadata, IndexTag::RPMTAG_BUGURL).map(truncate_field);
585    let source_urls =
586        rpm_header_string_array(metadata, IndexTag::RPMTAG_SOURCE).unwrap_or_default();
587    let source_rpm = metadata
588        .get_source_rpm()
589        .ok()
590        .filter(|value| !value.is_empty())
591        .map(|value| truncate_field(value.to_string()));
592    let namespace = infer_rpm_namespace(
593        distribution.as_deref(),
594        metadata.get_vendor().ok(),
595        metadata.get_release().ok(),
596        dist_url.as_deref(),
597    )
598    .or_else(|| infer_rpm_namespace_from_filename(path))
599    .map(truncate_field);
600
601    let mut parties = Vec::new();
602
603    if let Ok(vendor) = metadata.get_vendor()
604        && !vendor.is_empty()
605    {
606        parties.push(Party {
607            r#type: Some("organization".to_string()),
608            role: Some("vendor".to_string()),
609            name: Some(truncate_field(vendor.to_string())),
610            email: None,
611            url: None,
612            organization: None,
613            organization_url: None,
614            timezone: None,
615        });
616    }
617
618    if let Some(distribution_name) = distribution.as_ref() {
619        parties.push(Party {
620            r#type: Some("organization".to_string()),
621            role: Some("distributor".to_string()),
622            name: Some(distribution_name.clone()),
623            email: None,
624            url: None,
625            organization: None,
626            organization_url: None,
627            timezone: None,
628        });
629    }
630
631    if let Ok(packager) = metadata.get_packager()
632        && !packager.is_empty()
633    {
634        let (name_opt, email_opt) = parse_packager(packager);
635        parties.push(Party {
636            r#type: Some("person".to_string()),
637            role: Some("packager".to_string()),
638            name: name_opt.map(truncate_field),
639            email: email_opt.map(truncate_field),
640            url: None,
641            organization: None,
642            organization_url: None,
643            timezone: None,
644        });
645    }
646
647    let extracted_license_statement = metadata
648        .get_license()
649        .ok()
650        .map(|s| truncate_field(s.to_string()));
651    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
652        extracted_license_statement
653            .as_deref()
654            .and_then(normalize_rpm_declared_license)
655            .map(|normalized| {
656                build_declared_license_data(
657                    normalized,
658                    DeclaredLicenseMatchMetadata::single_line(
659                        extracted_license_statement.as_deref().unwrap_or_default(),
660                    ),
661                )
662            })
663            .map(|(expr, spdx, detections)| {
664                (
665                    expr.map(truncate_field),
666                    spdx.map(truncate_field),
667                    detections,
668                )
669            })
670            .unwrap_or_else(empty_declared_license_data);
671
672    let dependencies = extract_rpm_dependencies(metadata, namespace.as_deref());
673
674    let qualifiers = build_rpm_qualifiers(architecture.as_deref(), is_source);
675
676    let mut keywords = Vec::new();
677    if let Ok(group) = metadata.get_group()
678        && !group.is_empty()
679    {
680        keywords.push(truncate_field(group.to_string()));
681    }
682
683    let mut extra_data = std::collections::HashMap::new();
684    if let Some(distribution) = distribution.clone() {
685        extra_data.insert(
686            "distribution".to_string(),
687            serde_json::Value::String(distribution),
688        );
689    }
690    if let Some(dist_url) = dist_url.clone() {
691        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
692    }
693    if let Ok(build_host) = metadata.get_build_host()
694        && !build_host.is_empty()
695    {
696        extra_data.insert(
697            "build_host".to_string(),
698            serde_json::Value::String(build_host.to_string()),
699        );
700    }
701    if let Ok(build_time) = metadata.get_build_time() {
702        extra_data.insert(
703            "build_time".to_string(),
704            serde_json::Value::Number(serde_json::Number::from(build_time)),
705        );
706    }
707    if !source_urls.is_empty() {
708        extra_data.insert(
709            "source_urls".to_string(),
710            serde_json::Value::Array(
711                source_urls
712                    .iter()
713                    .cloned()
714                    .map(serde_json::Value::String)
715                    .collect(),
716            ),
717        );
718    }
719    if let Some(provides) = extract_rpm_relationships(metadata, RpmRelationshipKind::Provides)
720        && !provides.is_empty()
721    {
722        extra_data.insert(
723            "provides".to_string(),
724            serde_json::Value::Array(
725                provides
726                    .into_iter()
727                    .map(serde_json::Value::String)
728                    .collect(),
729            ),
730        );
731    }
732    if let Some(obsoletes) = extract_rpm_relationships(metadata, RpmRelationshipKind::Obsoletes)
733        && !obsoletes.is_empty()
734    {
735        extra_data.insert(
736            "obsoletes".to_string(),
737            serde_json::Value::Array(
738                obsoletes
739                    .into_iter()
740                    .map(serde_json::Value::String)
741                    .collect(),
742            ),
743        );
744    }
745    let vcs_url = infer_vcs_url(metadata, &source_urls).map(truncate_field);
746
747    PackageData {
748        datasource_id: Some(DatasourceId::RpmArchive),
749        package_type: Some(PACKAGE_TYPE),
750        namespace: namespace.clone(),
751        name: name.clone(),
752        version: version.clone(),
753        qualifiers,
754        description,
755        homepage_url,
756        size: metadata.get_installed_size().ok(),
757        parties,
758        keywords,
759        bug_tracking_url,
760        declared_license_expression,
761        declared_license_expression_spdx,
762        license_detections,
763        extracted_license_statement,
764        dependencies,
765        source_packages: source_rpm.into_iter().collect(),
766        vcs_url,
767        extra_data: (!extra_data.is_empty()).then_some(extra_data),
768        purl: name.as_ref().and_then(|n| {
769            build_rpm_purl(
770                n,
771                version.as_deref(),
772                namespace.as_deref(),
773                architecture.as_deref(),
774                is_source,
775            )
776            .map(truncate_field)
777        }),
778        ..Default::default()
779    }
780}
781
782pub(crate) fn normalize_rpm_declared_license(statement: &str) -> Option<NormalizedDeclaredLicense> {
783    let trimmed = statement.trim();
784    if trimmed.is_empty() {
785        return None;
786    }
787
788    let rewritten = canonicalize_rpm_license_statement(trimmed);
789    if let Some(normalized) = normalize_spdx_expression(&rewritten) {
790        return Some(normalized);
791    }
792
793    let is_simple_key = !trimmed.contains(' ')
794        && !trimmed.contains(',')
795        && !trimmed.contains('(')
796        && !trimmed.contains(')');
797    if is_simple_key {
798        return normalize_declared_license_key(trimmed);
799    }
800
801    None
802}
803
804fn canonicalize_rpm_license_statement(statement: &str) -> String {
805    let mut rewritten = statement.trim().to_string();
806
807    for (from, to) in [
808        ("LGPLv2.1+", "LGPL-2.1-or-later"),
809        ("LGPLv2.1", "LGPL-2.1-only"),
810        ("LGPLv2+", "LGPL-2.0-or-later"),
811        ("LGPLv2", "LGPL-2.0-only"),
812        ("LGPLv3+", "LGPL-3.0-or-later"),
813        ("LGPLv3", "LGPL-3.0-only"),
814        ("GPLv2+", "GPL-2.0-or-later"),
815        ("GPLv2", "GPL-2.0-only"),
816        ("GPLv3+", "GPL-3.0-or-later"),
817        ("GPLv3", "GPL-3.0-only"),
818        ("GPLV2+", "GPL-2.0-or-later"),
819        ("MPLv2.0", "MPL-2.0"),
820        ("MPLv1.1", "MPL-1.1"),
821        ("BSD with advertising", "BSD-4-Clause-UC"),
822        ("Public Domain", "LicenseRef-provenant-public-domain"),
823        ("public domain", "LicenseRef-provenant-public-domain"),
824        ("OpenLDAP", "OLDAP-2.8"),
825        ("OpenSSL", "OpenSSL"),
826        ("Sleepycat", "Sleepycat"),
827        ("zlib", "Zlib"),
828        ("Boost", "BSL-1.0"),
829        ("BSD", "BSD-3-Clause"),
830    ] {
831        rewritten = rewritten.replace(from, to);
832    }
833
834    rewritten = RE_RPM_LICENSE_WITH_EXCEPTIONS
835        .replace_all(&rewritten, "")
836        .into_owned();
837    rewritten = RE_RPM_LICENSE_COMMA
838        .replace_all(&rewritten, " AND ")
839        .into_owned();
840    rewritten = RE_RPM_LICENSE_AND
841        .replace_all(&rewritten, " AND ")
842        .into_owned();
843    rewritten = RE_RPM_LICENSE_OR
844        .replace_all(&rewritten, " OR ")
845        .into_owned();
846
847    rewritten.split_whitespace().collect::<Vec<_>>().join(" ")
848}
849
850fn extract_rpm_dependencies(
851    metadata: &PackageMetadata,
852    namespace: Option<&str>,
853) -> Vec<Dependency> {
854    let mut dependencies = Vec::new();
855
856    if let Ok(requires) = metadata.get_requires() {
857        for rpm_dep in requires {
858            if dependencies.len() >= MAX_ITERATION_COUNT {
859                warn!(
860                    "RPM dependency iteration capped at {} items",
861                    MAX_ITERATION_COUNT
862                );
863                break;
864            }
865            let purl = build_rpm_purl(
866                &rpm_dep.name,
867                if rpm_dep.version.is_empty() {
868                    None
869                } else {
870                    Some(&rpm_dep.version)
871                },
872                namespace,
873                None,
874                false,
875            )
876            .map(truncate_field);
877
878            let extracted_requirement = if !rpm_dep.version.is_empty() {
879                Some(truncate_field(format_rpm_requirement(&rpm_dep)))
880            } else {
881                None
882            };
883
884            dependencies.push(Dependency {
885                purl,
886                extracted_requirement,
887                scope: Some("install".to_string()),
888                is_runtime: Some(true),
889                is_optional: Some(false),
890                is_direct: Some(true),
891                resolved_package: None,
892                extra_data: None,
893                is_pinned: Some(!rpm_dep.version.is_empty()),
894            });
895        }
896    }
897
898    dependencies
899}
900
901enum RpmRelationshipKind {
902    Provides,
903    Obsoletes,
904}
905
906fn extract_rpm_relationships(
907    metadata: &PackageMetadata,
908    kind: RpmRelationshipKind,
909) -> Option<Vec<String>> {
910    let relationships = match kind {
911        RpmRelationshipKind::Provides => metadata.get_provides().ok()?,
912        RpmRelationshipKind::Obsoletes => metadata.get_obsoletes().ok()?,
913    };
914
915    let mut count = 0usize;
916    let values: Vec<String> = relationships
917        .into_iter()
918        .take(MAX_ITERATION_COUNT)
919        .map(|dep| format_rpm_requirement(&dep))
920        .filter(|value| !value.is_empty() && value != "(none)")
921        .inspect(|_| count += 1)
922        .collect();
923
924    if count >= MAX_ITERATION_COUNT {
925        warn!(
926            "RPM relationship iteration capped at {} items",
927            MAX_ITERATION_COUNT
928        );
929    }
930
931    (!values.is_empty()).then_some(values)
932}
933
934fn format_rpm_requirement(dep: &rpm::Dependency) -> String {
935    use rpm::DependencyFlags;
936
937    if dep.version.is_empty() {
938        return dep.name.clone();
939    }
940
941    let operator = if dep.flags.contains(DependencyFlags::EQUAL)
942        && dep.flags.contains(DependencyFlags::LESS)
943    {
944        "<="
945    } else if dep.flags.contains(DependencyFlags::EQUAL)
946        && dep.flags.contains(DependencyFlags::GREATER)
947    {
948        ">="
949    } else if dep.flags.contains(DependencyFlags::EQUAL) {
950        "="
951    } else if dep.flags.contains(DependencyFlags::LESS) {
952        "<"
953    } else if dep.flags.contains(DependencyFlags::GREATER) {
954        ">"
955    } else {
956        ""
957    };
958
959    if operator.is_empty() {
960        dep.name.clone()
961    } else {
962        format!("{} {} {}", dep.name, operator, dep.version)
963    }
964}
965
966fn build_evr_version(metadata: &PackageMetadata) -> Option<String> {
967    let version = metadata.get_version().ok()?;
968    let release = metadata.get_release().ok();
969
970    let mut evr = String::from(version);
971
972    if let Some(r) = release {
973        evr.push('-');
974        evr.push_str(r);
975    }
976
977    Some(evr)
978}
979
980fn parse_packager(packager: &str) -> (Option<String>, Option<String>) {
981    if let Some(email_start) = packager.find('<') {
982        let name = packager[..email_start].trim();
983        if let Some(email_end) = packager.find('>') {
984            let email = &packager[email_start + 1..email_end];
985            return (Some(name.to_string()), Some(email.to_string()));
986        }
987    }
988    (Some(packager.to_string()), None)
989}
990
991fn build_rpm_purl(
992    name: &str,
993    version: Option<&str>,
994    namespace: Option<&str>,
995    architecture: Option<&str>,
996    is_source: bool,
997) -> Option<String> {
998    use packageurl::PackageUrl;
999
1000    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
1001
1002    if let Some(ns) = namespace {
1003        purl.with_namespace(ns).ok()?;
1004    }
1005
1006    if let Some(ver) = version {
1007        purl.with_version(ver).ok()?;
1008    }
1009
1010    if let Some(arch) = architecture {
1011        purl.add_qualifier("arch", arch).ok()?;
1012    }
1013
1014    if is_source {
1015        purl.add_qualifier("source", "true").ok()?;
1016    }
1017
1018    Some(purl.to_string())
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023    use super::*;
1024    use std::fs;
1025    use std::path::PathBuf;
1026    use tempfile::NamedTempFile;
1027
1028    fn build_sparse_oversized_rpm(name: &str) -> PathBuf {
1029        let package = rpm::PackageBuilder::new(name, "1.0", "MIT", "x86_64", "Demo RPM package")
1030            .release("1")
1031            .build()
1032            .unwrap();
1033
1034        let temp_file = NamedTempFile::new().unwrap();
1035        package.write_file(temp_file.path()).unwrap();
1036        let oversized_len = MAX_MANIFEST_SIZE + 1_048_576;
1037        fs::OpenOptions::new()
1038            .write(true)
1039            .open(temp_file.path())
1040            .unwrap()
1041            .set_len(oversized_len)
1042            .unwrap();
1043
1044        temp_file.into_temp_path().keep().unwrap()
1045    }
1046
1047    #[test]
1048    fn test_rpm_parser_is_match() {
1049        assert!(RpmParser::is_match(&PathBuf::from("package.rpm")));
1050        assert!(RpmParser::is_match(&PathBuf::from("package.srpm")));
1051        assert!(RpmParser::is_match(&PathBuf::from(
1052            "test-1.0-1.el7.x86_64.rpm"
1053        )));
1054        assert!(!RpmParser::is_match(&PathBuf::from("package.deb")));
1055        assert!(!RpmParser::is_match(&PathBuf::from("package.tar.gz")));
1056    }
1057
1058    #[test]
1059    fn test_rpm_parser_matches_hash_named_source_rpm_by_magic() {
1060        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1061        if !source_fixture.exists() {
1062            return;
1063        }
1064
1065        let temp_file = NamedTempFile::new().unwrap();
1066        fs::copy(&source_fixture, temp_file.path()).unwrap();
1067
1068        assert!(RpmParser::is_match(temp_file.path()));
1069    }
1070
1071    #[test]
1072    fn test_rpm_parser_matches_pack_named_rpm_by_magic() {
1073        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1074        if !source_fixture.exists() {
1075            return;
1076        }
1077
1078        let temp_dir = tempfile::TempDir::new().unwrap();
1079        let pack_path = temp_dir.path().join("setup-2.5.49-b1.src.pack");
1080        fs::copy(&source_fixture, &pack_path).unwrap();
1081
1082        assert!(RpmParser::is_match(&pack_path));
1083        assert!(path_looks_like_rpm_archive(&pack_path));
1084    }
1085
1086    #[test]
1087    fn test_build_evr_version_simple() {
1088        let evr = "1.0-1";
1089        assert_eq!(evr, "1.0-1");
1090    }
1091
1092    #[test]
1093    fn test_build_evr_version_with_epoch() {
1094        let evr = "2:1.0-1";
1095        assert!(evr.starts_with("2:"));
1096    }
1097
1098    #[test]
1099    fn test_parse_packager() {
1100        let (name, email) = parse_packager("John Doe <john@example.com>");
1101        assert_eq!(name, Some("John Doe".to_string()));
1102        assert_eq!(email, Some("john@example.com".to_string()));
1103
1104        let (name2, email2) = parse_packager("Plain Name");
1105        assert_eq!(name2, Some("Plain Name".to_string()));
1106        assert_eq!(email2, None);
1107    }
1108
1109    #[test]
1110    fn test_build_rpm_purl() {
1111        let purl = build_rpm_purl(
1112            "bash",
1113            Some("4.4.19-1.el7"),
1114            Some("fedora"),
1115            Some("x86_64"),
1116            false,
1117        );
1118        assert!(purl.is_some());
1119        let purl_str = purl.unwrap();
1120        assert!(purl_str.contains("pkg:rpm/fedora/bash"));
1121        assert!(purl_str.contains("4.4.19-1.el7"));
1122        assert!(purl_str.contains("arch=x86_64"));
1123    }
1124
1125    #[test]
1126    fn test_parse_real_rpm() {
1127        let test_file = PathBuf::from("testdata/rpm/Eterm-0.9.3-5mdv2007.0.rpm");
1128        if !test_file.exists() {
1129            eprintln!("Warning: Test file not found, skipping test");
1130            return;
1131        }
1132
1133        let pkg = RpmParser::extract_first_package(&test_file);
1134
1135        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
1136        assert_eq!(pkg.name, Some("Eterm".to_string()));
1137        assert_eq!(pkg.version, Some("0.9.3-5mdv2007.0".to_string()));
1138    }
1139
1140    #[test]
1141    fn test_parse_oversized_rpm_from_headers_only() {
1142        let test_file = build_sparse_oversized_rpm("oversized-demo");
1143
1144        assert!(RpmParser::is_match(&test_file));
1145
1146        let pkg = RpmParser::extract_first_package(&test_file);
1147
1148        assert_eq!(pkg.datasource_id, Some(DatasourceId::RpmArchive));
1149        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
1150        assert_eq!(pkg.name.as_deref(), Some("oversized-demo"));
1151        assert_eq!(pkg.version.as_deref(), Some("1.0-1"));
1152
1153        fs::remove_file(test_file).unwrap();
1154    }
1155
1156    #[test]
1157    fn test_build_rpm_purl_no_namespace() {
1158        let purl = build_rpm_purl("package", Some("1.0-1"), None, Some("x86_64"), false);
1159        assert!(purl.is_some());
1160        let purl_str = purl.unwrap();
1161        assert!(purl_str.starts_with("pkg:rpm/package@"));
1162        assert!(purl_str.contains("arch=x86_64"));
1163    }
1164
1165    #[test]
1166    fn test_rpm_dependency_extraction() {
1167        use rpm::{Dependency as RpmDependency, DependencyFlags};
1168
1169        let rpm_dep = RpmDependency {
1170            name: "libc.so.6".to_string(),
1171            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
1172            version: "2.2.5".to_string(),
1173        };
1174
1175        let formatted = format_rpm_requirement(&rpm_dep);
1176        assert_eq!(formatted, "libc.so.6 >= 2.2.5");
1177
1178        let rpm_dep_no_version = RpmDependency {
1179            name: "bash".to_string(),
1180            flags: DependencyFlags::ANY,
1181            version: String::new(),
1182        };
1183
1184        let formatted_no_ver = format_rpm_requirement(&rpm_dep_no_version);
1185        assert_eq!(formatted_no_ver, "bash");
1186    }
1187
1188    #[test]
1189    fn test_parse_packager_with_parentheses() {
1190        let (name, email) = parse_packager("John Doe (Company) <john@example.com>");
1191        assert_eq!(name, Some("John Doe (Company)".to_string()));
1192        assert_eq!(email, Some("john@example.com".to_string()));
1193    }
1194
1195    #[test]
1196    fn test_parse_packager_email_only() {
1197        let (name, email) = parse_packager("<noreply@example.com>");
1198        assert!(name.is_none() || name == Some(String::new()));
1199        assert_eq!(email, Some("noreply@example.com".to_string()));
1200    }
1201
1202    #[test]
1203    fn test_rpm_fping_package() {
1204        let test_file = PathBuf::from("testdata/rpm/fping-2.4b2-10.fc12.x86_64.rpm");
1205        if !test_file.exists() {
1206            return;
1207        }
1208
1209        let pkg = RpmParser::extract_first_package(&test_file);
1210        assert_eq!(pkg.name, Some("fping".to_string()));
1211        assert_eq!(pkg.version, Some("2.4b2-10.fc12".to_string()));
1212    }
1213
1214    #[test]
1215    fn test_rpm_archive_extracts_additional_metadata_fields() {
1216        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1217        if !test_file.exists() {
1218            return;
1219        }
1220
1221        let pkg = RpmParser::extract_first_package(&test_file);
1222
1223        assert_eq!(pkg.name.as_deref(), Some("setup"));
1224        assert_eq!(
1225            pkg.qualifiers
1226                .as_ref()
1227                .and_then(|q| q.get("arch"))
1228                .map(String::as_str),
1229            Some("noarch")
1230        );
1231        assert!(!pkg.keywords.is_empty());
1232        assert!(pkg.size.is_some());
1233        assert!(
1234            pkg.parties
1235                .iter()
1236                .any(|party| party.role.as_deref() == Some("packager"))
1237        );
1238        assert!(
1239            pkg.qualifiers
1240                .as_ref()
1241                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
1242        );
1243    }
1244
1245    #[test]
1246    fn test_source_rpm_sets_source_qualifier() {
1247        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1248        if !test_file.exists() {
1249            return;
1250        }
1251
1252        let pkg = RpmParser::extract_first_package(&test_file);
1253
1254        assert!(
1255            pkg.qualifiers
1256                .as_ref()
1257                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
1258        );
1259        assert!(
1260            pkg.purl
1261                .as_ref()
1262                .is_some_and(|purl| purl.contains("source=true"))
1263        );
1264    }
1265
1266    #[test]
1267    fn test_rpm_archive_extracts_vcs_and_source_metadata() {
1268        let package = rpm::PackageBuilder::new(
1269            "thunar-sendto-clamtk",
1270            "0.08",
1271            "GPL-2.0-or-later",
1272            "noarch",
1273            "Simple virus scanning extension for Thunar",
1274        )
1275        .release("2.fc40")
1276        .vendor("Fedora Project")
1277        .packager("Fedora Release Engineering <releng@fedoraproject.org>")
1278        .group("Applications/System")
1279        .vcs("git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e")
1280        .build()
1281        .unwrap();
1282
1283        let temp_file = NamedTempFile::new().unwrap();
1284        package.write_file(temp_file.path()).unwrap();
1285
1286        let pkg = RpmParser::extract_first_package(temp_file.path());
1287
1288        assert_eq!(pkg.namespace.as_deref(), Some("fedora"));
1289        assert_eq!(
1290            pkg.vcs_url.as_deref(),
1291            Some(
1292                "git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e",
1293            )
1294        );
1295        assert!(
1296            pkg.extra_data
1297                .as_ref()
1298                .is_some_and(|extra| extra.contains_key("build_time"))
1299        );
1300        assert!(!pkg.keywords.is_empty());
1301    }
1302
1303    #[test]
1304    fn test_rpm_archive_preserves_provides_and_obsoletes_relationships() {
1305        use rpm::{Dependency as RpmDependency, DependencyFlags};
1306
1307        let package = rpm::PackageBuilder::new(
1308            "demo-rpm",
1309            "1.0.0",
1310            "MIT",
1311            "noarch",
1312            "RPM relationship metadata fixture",
1313        )
1314        .release("1")
1315        .provides(RpmDependency {
1316            name: "demo-rpm-virtual".to_string(),
1317            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
1318            version: "1.0.0".to_string(),
1319        })
1320        .obsoletes(RpmDependency {
1321            name: "old-demo-rpm".to_string(),
1322            flags: DependencyFlags::LESS,
1323            version: "0.9.0".to_string(),
1324        })
1325        .build()
1326        .unwrap();
1327
1328        let temp_file = NamedTempFile::new().unwrap();
1329        package.write_file(temp_file.path()).unwrap();
1330
1331        let pkg = RpmParser::extract_first_package(temp_file.path());
1332        let extra = pkg.extra_data.as_ref().expect("extra_data should exist");
1333
1334        let provides = extra
1335            .get("provides")
1336            .and_then(|value| value.as_array())
1337            .expect("provides should be present");
1338        assert!(
1339            provides
1340                .iter()
1341                .any(|value| value.as_str() == Some("demo-rpm-virtual >= 1.0.0"))
1342        );
1343
1344        let obsoletes = extra
1345            .get("obsoletes")
1346            .and_then(|value| value.as_array())
1347            .expect("obsoletes should be present");
1348        assert!(
1349            obsoletes
1350                .iter()
1351                .any(|value| value.as_str() == Some("old-demo-rpm < 0.9.0"))
1352        );
1353    }
1354
1355    #[test]
1356    fn test_rpm_archive_normalizes_declared_license_expression() {
1357        let package = rpm::PackageBuilder::new(
1358            "demo-license",
1359            "1.0.0",
1360            "LGPLv2",
1361            "noarch",
1362            "RPM declared license normalization fixture",
1363        )
1364        .release("1")
1365        .build()
1366        .unwrap();
1367
1368        let temp_file = NamedTempFile::new().unwrap();
1369        package.write_file(temp_file.path()).unwrap();
1370
1371        let pkg = RpmParser::extract_first_package(temp_file.path());
1372
1373        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("LGPLv2"));
1374        assert_eq!(
1375            pkg.declared_license_expression.as_deref(),
1376            Some("lgpl-2.0-only")
1377        );
1378        assert_eq!(
1379            pkg.declared_license_expression_spdx.as_deref(),
1380            Some("LGPL-2.0-only")
1381        );
1382        assert_eq!(pkg.license_detections.len(), 1);
1383        assert_eq!(
1384            pkg.license_detections[0].license_expression_spdx,
1385            "LGPL-2.0-only"
1386        );
1387        assert_eq!(
1388            pkg.license_detections[0].matches[0].matched_text.as_deref(),
1389            Some("LGPLv2")
1390        );
1391    }
1392
1393    #[test]
1394    fn test_rpm_archive_normalizes_public_domain_declared_license_expression() {
1395        let package = rpm::PackageBuilder::new(
1396            "demo-public-domain",
1397            "1.0.0",
1398            "public domain",
1399            "noarch",
1400            "RPM public domain normalization fixture",
1401        )
1402        .release("1")
1403        .build()
1404        .unwrap();
1405
1406        let temp_file = NamedTempFile::new().unwrap();
1407        package.write_file(temp_file.path()).unwrap();
1408
1409        let pkg = RpmParser::extract_first_package(temp_file.path());
1410
1411        assert_eq!(
1412            pkg.extracted_license_statement.as_deref(),
1413            Some("public domain")
1414        );
1415        assert_eq!(
1416            pkg.declared_license_expression.as_deref(),
1417            Some("licenseref-provenant-public-domain")
1418        );
1419        assert_eq!(
1420            pkg.declared_license_expression_spdx.as_deref(),
1421            Some("LicenseRef-provenant-public-domain")
1422        );
1423        assert_eq!(pkg.license_detections.len(), 1);
1424    }
1425
1426    #[test]
1427    fn test_normalize_rpm_declared_license_rewrites_compound_aliases() {
1428        let normalized = normalize_rpm_declared_license("BSD and GPLv2+")
1429            .expect("compound RPM license should normalize");
1430
1431        assert_eq!(
1432            normalized.declared_license_expression_spdx,
1433            "BSD-3-Clause AND GPL-2.0-or-later"
1434        );
1435    }
1436}
1437
1438crate::register_parser!(
1439    "RPM package archive",
1440    &["**/*.rpm", "**/*.srpm"],
1441    "rpm",
1442    "",
1443    Some("https://rpm.org/"),
1444);