Skip to main content

provenant/parsers/
rpm_parser.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM package archives.
5//!
6//! Extracts package metadata and dependencies from binary RPM package (.rpm) files
7//! by reading the embedded header metadata.
8//!
9//! # Supported Formats
10//! - *.rpm (binary RPM package archives)
11//!
12//! # Key Features
13//! - Metadata extraction from RPM headers (name, version, release, architecture)
14//! - Dependency extraction (requires, provides, obsoletes)
15//! - License and distribution information parsing
16//! - Package URL (purl) generation for installed packages
17//! - Graceful handling of malformed or corrupted RPM files
18//!
19//! # Implementation Notes
20//! - Uses `rpm` crate for low-level RPM format parsing
21//! - RPM architecture is captured as namespace in metadata
22//! - Direct dependency tracking (all requires are direct)
23//! - Error handling with `warn!()` logs on parse failures
24
25use std::fs::{self, File};
26use std::io::{BufReader, Read};
27use std::path::Path;
28use std::sync::LazyLock;
29
30use crate::parser_warn as warn;
31use regex::Regex;
32use rpm::{
33    HEADER_MAGIC, INDEX_ENTRY_SIZE, INDEX_HEADER_SIZE, IndexTag, LEAD_SIZE, PackageMetadata,
34    RPM_MAGIC,
35};
36
37use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
38use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
39
40use super::PackageParser;
41use super::license_normalization::{
42    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
43    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
44};
45
46const PACKAGE_TYPE: PackageType = PackageType::Rpm;
47const RPM_HEADER_PARSE_LIMIT_BYTES: u64 = MAX_MANIFEST_SIZE.saturating_add(1);
48
49static RE_RPM_LICENSE_AND: LazyLock<Regex> =
50    LazyLock::new(|| Regex::new(r"(?i)\s+and\s+").expect("valid RPM license AND regex"));
51static RE_RPM_LICENSE_OR: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r"(?i)\s+or\s+").expect("valid RPM license OR regex"));
53static RE_RPM_LICENSE_COMMA: LazyLock<Regex> =
54    LazyLock::new(|| Regex::new(r"\s*,\s*").expect("valid RPM license comma regex"));
55static RE_RPM_LICENSE_WITH_EXCEPTIONS: LazyLock<Regex> = LazyLock::new(|| {
56    Regex::new(r"(?i)\s+with\s+exceptions\b").expect("valid RPM license exceptions regex")
57});
58
59fn default_package_data() -> PackageData {
60    PackageData {
61        package_type: Some(PACKAGE_TYPE),
62        datasource_id: Some(DatasourceId::RpmArchive),
63        ..Default::default()
64    }
65}
66
67pub(crate) fn infer_rpm_namespace(
68    distribution: Option<&str>,
69    vendor: Option<&str>,
70    release: Option<&str>,
71    dist_url: Option<&str>,
72) -> Option<String> {
73    for candidate in [distribution, vendor, dist_url].into_iter().flatten() {
74        let lower = candidate.to_ascii_lowercase();
75        if lower.contains("fedora") || lower.contains("koji") {
76            return Some("fedora".to_string());
77        }
78        if lower.contains("centos") {
79            return Some("centos".to_string());
80        }
81        if lower.contains("red hat") || lower.contains("redhat") || lower.contains("ubi") {
82            return Some("rhel".to_string());
83        }
84        if lower.contains("opensuse") {
85            return Some("opensuse".to_string());
86        }
87        if lower.contains("suse") {
88            return Some("suse".to_string());
89        }
90        if lower.contains("openmandriva") || lower.contains("mandriva") {
91            return Some("openmandriva".to_string());
92        }
93        if lower.contains("mariner") {
94            return Some("mariner".to_string());
95        }
96    }
97
98    if let Some(release) = release {
99        let lower = release.to_ascii_lowercase();
100        if lower.contains(".fc") {
101            return Some("fedora".to_string());
102        }
103        if lower.contains(".el") {
104            return Some("rhel".to_string());
105        }
106        if lower.contains("mdv") || lower.contains("mnb") {
107            return Some("openmandriva".to_string());
108        }
109        if lower.contains("suse") {
110            return Some("suse".to_string());
111        }
112    }
113
114    None
115}
116
117fn rpm_header_string(metadata: &PackageMetadata, tag: IndexTag) -> Option<String> {
118    metadata
119        .header
120        .get_entry_data_as_string(tag)
121        .ok()
122        .and_then(|value| {
123            let trimmed = value.trim();
124            if trimmed.is_empty() || trimmed == "(none)" {
125                None
126            } else {
127                Some(trimmed.to_string())
128            }
129        })
130}
131
132fn rpm_header_string_array(metadata: &PackageMetadata, tag: IndexTag) -> Option<Vec<String>> {
133    metadata
134        .header
135        .get_entry_data_as_string_array(tag)
136        .ok()
137        .map(|items| {
138            items
139                .iter()
140                .map(|item| item.trim().to_string())
141                .filter(|item| !item.is_empty() && item != "(none)")
142                .collect::<Vec<_>>()
143        })
144        .filter(|items| !items.is_empty())
145}
146
147fn infer_vcs_url(metadata: &PackageMetadata, source_urls: &[String]) -> Option<String> {
148    if let Ok(vcs) = metadata.get_vcs()
149        && !vcs.trim().is_empty()
150    {
151        return Some(vcs.to_string());
152    }
153
154    source_urls
155        .iter()
156        .find(|url| url.starts_with("git+") || url.contains("src.fedoraproject.org"))
157        .cloned()
158}
159
160fn build_rpm_qualifiers(
161    architecture: Option<&str>,
162    is_source: bool,
163) -> Option<std::collections::HashMap<String, String>> {
164    let mut qualifiers = std::collections::HashMap::new();
165
166    if let Some(arch) = architecture.filter(|arch| !arch.is_empty()) {
167        qualifiers.insert("arch".to_string(), arch.to_string());
168    }
169
170    if is_source {
171        qualifiers.insert("source".to_string(), "true".to_string());
172    }
173
174    (!qualifiers.is_empty()).then_some(qualifiers)
175}
176
177pub(crate) fn is_rpm_archive_extension(path: &Path) -> bool {
178    path.extension()
179        .and_then(|e| e.to_str())
180        .is_some_and(|ext| matches!(ext, "rpm" | "srpm"))
181}
182
183pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
184    if is_rpm_archive_extension(path) {
185        return true;
186    }
187
188    if fs::metadata(path).is_err() {
189        return false;
190    }
191
192    let mut file = match File::open(path) {
193        Ok(file) => file,
194        Err(_) => return false,
195    };
196    let mut magic = [0_u8; 4];
197    file.read_exact(&mut magic).is_ok() && magic == RPM_MAGIC
198}
199
200fn parse_rpm_metadata_only(path: &Path) -> Result<PackageMetadata, String> {
201    let file =
202        File::open(path).map_err(|e| format!("Failed to open RPM file {:?}: {}", path, e))?;
203    let limited_file = file.take(RPM_HEADER_PARSE_LIMIT_BYTES);
204    let mut reader = BufReader::new(limited_file);
205
206    PackageMetadata::parse(&mut reader)
207        .map_err(|e| format!("Failed to parse RPM file {:?}: {}", path, e))
208}
209
210#[derive(Debug, Clone, Copy)]
211struct RpmHeaderEntryView {
212    tag: u32,
213    data_type: u32,
214    offset: usize,
215    num_items: usize,
216}
217
218struct ParsedRpmHeader<'a> {
219    entries: Vec<RpmHeaderEntryView>,
220    store: &'a [u8],
221}
222
223#[derive(Default)]
224struct SalvagedRpmFields {
225    name: Option<String>,
226    version: Option<String>,
227    release: Option<String>,
228    summary: Option<String>,
229    description: Option<String>,
230    distribution: Option<String>,
231    vendor: Option<String>,
232    license: Option<String>,
233    packager: Option<String>,
234    group: Option<String>,
235    url: Option<String>,
236    arch: Option<String>,
237    source_rpm: Option<String>,
238    dist_url: Option<String>,
239}
240
241fn read_rpm_header_bytes(path: &Path) -> Result<Vec<u8>, String> {
242    let file =
243        File::open(path).map_err(|e| format!("Failed to open RPM file {:?}: {}", path, e))?;
244    let mut limited_file = file.take(RPM_HEADER_PARSE_LIMIT_BYTES);
245    let mut bytes = Vec::new();
246    limited_file
247        .read_to_end(&mut bytes)
248        .map_err(|e| format!("Failed to read RPM file {:?}: {}", path, e))?;
249    Ok(bytes)
250}
251
252fn parse_index_header(bytes: &[u8], offset: usize) -> Option<(usize, usize)> {
253    let header = bytes.get(offset..offset + INDEX_HEADER_SIZE as usize)?;
254    if header.get(..3)? != HEADER_MAGIC {
255        return None;
256    }
257    if header.get(3).copied()? != 1 {
258        return None;
259    }
260
261    let num_entries = u32::from_be_bytes(header.get(8..12)?.try_into().ok()?) as usize;
262    let data_section_size = u32::from_be_bytes(header.get(12..16)?.try_into().ok()?) as usize;
263    Some((num_entries, data_section_size))
264}
265
266fn parse_header_entries<'a>(
267    bytes: &'a [u8],
268    offset: usize,
269    allow_truncated_store: bool,
270) -> Option<(ParsedRpmHeader<'a>, usize)> {
271    let (num_entries, data_section_size) = parse_index_header(bytes, offset)?;
272    let entries_offset = offset.checked_add(INDEX_HEADER_SIZE as usize)?;
273    let entries_size = num_entries.checked_mul(INDEX_ENTRY_SIZE as usize)?;
274    let store_offset = entries_offset.checked_add(entries_size)?;
275    bytes.get(entries_offset..store_offset)?;
276    let store_end = store_offset.checked_add(data_section_size)?;
277    let store = if allow_truncated_store {
278        bytes.get(store_offset..).unwrap_or(&[])
279    } else {
280        bytes.get(store_offset..store_end)?
281    };
282
283    let mut entries = Vec::with_capacity(num_entries);
284    for index in 0..num_entries {
285        let entry_offset =
286            entries_offset.checked_add(index.checked_mul(INDEX_ENTRY_SIZE as usize)?)?;
287        let entry = bytes.get(entry_offset..entry_offset + INDEX_ENTRY_SIZE as usize)?;
288        entries.push(RpmHeaderEntryView {
289            tag: u32::from_be_bytes(entry.get(0..4)?.try_into().ok()?),
290            data_type: u32::from_be_bytes(entry.get(4..8)?.try_into().ok()?),
291            offset: u32::from_be_bytes(entry.get(8..12)?.try_into().ok()?) as usize,
292            num_items: u32::from_be_bytes(entry.get(12..16)?.try_into().ok()?) as usize,
293        });
294    }
295
296    Some((ParsedRpmHeader { entries, store }, store_end))
297}
298
299fn parse_main_rpm_header(bytes: &[u8]) -> Option<ParsedRpmHeader<'_>> {
300    if bytes.get(..RPM_MAGIC.len())? != RPM_MAGIC {
301        return None;
302    }
303
304    let (_, signature_end) = parse_header_entries(bytes, LEAD_SIZE as usize, false)?;
305    let signature_padding = (8 - (signature_end - (LEAD_SIZE as usize)) % 8) % 8;
306    let main_header_offset = signature_end.checked_add(signature_padding)?;
307    let (header, _) = parse_header_entries(bytes, main_header_offset, true)?;
308    Some(header)
309}
310
311fn read_header_string(store: &[u8], offset: usize) -> Option<(String, usize)> {
312    let remaining = store.get(offset..)?;
313    let nul = remaining.iter().position(|byte| *byte == 0)?;
314    let text = String::from_utf8_lossy(&remaining[..nul])
315        .trim()
316        .to_string();
317    let next_offset = offset.checked_add(nul)?.checked_add(1)?;
318    if text.is_empty() || text == "(none)" {
319        None
320    } else {
321        Some((text, next_offset))
322    }
323}
324
325fn read_entry_first_string(header: &ParsedRpmHeader<'_>, tag: u32) -> Option<String> {
326    let entry = header.entries.iter().find(|entry| entry.tag == tag)?;
327    match entry.data_type {
328        6 => read_header_string(header.store, entry.offset).map(|(value, _)| value),
329        8 | 9 => {
330            let mut offset = entry.offset;
331            let mut first_value = None;
332            for _ in 0..entry.num_items {
333                let (value, next_offset) = read_header_string(header.store, offset)?;
334                first_value.get_or_insert(value);
335                offset = next_offset;
336            }
337            first_value
338        }
339        _ => None,
340    }
341}
342
343fn salvage_rpm_header_fields(path: &Path) -> Option<SalvagedRpmFields> {
344    let bytes = read_rpm_header_bytes(path).ok()?;
345    let header = parse_main_rpm_header(&bytes)?;
346
347    Some(SalvagedRpmFields {
348        name: read_entry_first_string(&header, IndexTag::RPMTAG_NAME as u32).map(truncate_field),
349        version: read_entry_first_string(&header, IndexTag::RPMTAG_VERSION as u32)
350            .map(truncate_field),
351        release: read_entry_first_string(&header, IndexTag::RPMTAG_RELEASE as u32)
352            .map(truncate_field),
353        summary: read_entry_first_string(&header, IndexTag::RPMTAG_SUMMARY as u32)
354            .map(truncate_field),
355        description: read_entry_first_string(&header, IndexTag::RPMTAG_DESCRIPTION as u32)
356            .map(truncate_field),
357        distribution: read_entry_first_string(&header, IndexTag::RPMTAG_DISTRIBUTION as u32)
358            .map(truncate_field),
359        vendor: read_entry_first_string(&header, IndexTag::RPMTAG_VENDOR as u32)
360            .map(truncate_field),
361        license: read_entry_first_string(&header, IndexTag::RPMTAG_LICENSE as u32)
362            .map(truncate_field),
363        packager: read_entry_first_string(&header, IndexTag::RPMTAG_PACKAGER as u32)
364            .map(truncate_field),
365        group: read_entry_first_string(&header, IndexTag::RPMTAG_GROUP as u32).map(truncate_field),
366        url: read_entry_first_string(&header, IndexTag::RPMTAG_URL as u32).map(truncate_field),
367        arch: read_entry_first_string(&header, IndexTag::RPMTAG_ARCH as u32).map(truncate_field),
368        source_rpm: read_entry_first_string(&header, IndexTag::RPMTAG_SOURCERPM as u32)
369            .map(truncate_field),
370        dist_url: read_entry_first_string(&header, IndexTag::RPMTAG_DISTURL as u32)
371            .map(truncate_field),
372    })
373}
374
375fn build_salvaged_rpm_package(path: &Path, fields: SalvagedRpmFields) -> Option<PackageData> {
376    let name = fields.name?;
377    let mut version = fields.version;
378    if let Some(release) = fields.release.as_deref() {
379        let mut evr = version.take().unwrap_or_default();
380        if !evr.is_empty() {
381            evr.push('-');
382        }
383        evr.push_str(release);
384        version = Some(truncate_field(evr));
385    }
386
387    let namespace = infer_rpm_namespace(
388        fields.distribution.as_deref(),
389        fields.vendor.as_deref(),
390        fields.release.as_deref(),
391        fields.dist_url.as_deref(),
392    )
393    .or_else(|| infer_rpm_namespace_from_filename(path))
394    .map(truncate_field);
395    let is_source =
396        path.to_string_lossy().ends_with(".src.rpm") || path.to_string_lossy().ends_with(".srpm");
397    let qualifiers = build_rpm_qualifiers(fields.arch.as_deref(), is_source);
398
399    let mut parties = Vec::new();
400    if let Some(vendor) = fields.vendor.clone() {
401        parties.push(Party {
402            r#type: Some("organization".to_string()),
403            role: Some("vendor".to_string()),
404            name: Some(vendor),
405            email: None,
406            url: None,
407            organization: None,
408            organization_url: None,
409            timezone: None,
410        });
411    }
412    if let Some(distribution) = fields.distribution.clone() {
413        parties.push(Party {
414            r#type: Some("organization".to_string()),
415            role: Some("distributor".to_string()),
416            name: Some(distribution),
417            email: None,
418            url: None,
419            organization: None,
420            organization_url: None,
421            timezone: None,
422        });
423    }
424    if let Some(packager) = fields.packager.as_deref() {
425        let (name_opt, email_opt) = parse_packager(packager);
426        parties.push(Party {
427            r#type: Some("person".to_string()),
428            role: Some("packager".to_string()),
429            name: name_opt.map(truncate_field),
430            email: email_opt.map(truncate_field),
431            url: None,
432            organization: None,
433            organization_url: None,
434            timezone: None,
435        });
436    }
437
438    let extracted_license_statement = fields.license.map(truncate_field);
439    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
440        extracted_license_statement
441            .as_deref()
442            .and_then(normalize_rpm_declared_license)
443            .map(|normalized| {
444                build_declared_license_data(
445                    normalized,
446                    DeclaredLicenseMatchMetadata::single_line(
447                        extracted_license_statement.as_deref().unwrap_or_default(),
448                    ),
449                )
450            })
451            .map(|(expr, spdx, detections)| {
452                (
453                    expr.map(truncate_field),
454                    spdx.map(truncate_field),
455                    detections,
456                )
457            })
458            .unwrap_or_else(empty_declared_license_data);
459
460    let mut extra_data = std::collections::HashMap::new();
461    if let Some(distribution) = fields.distribution.clone() {
462        extra_data.insert(
463            "distribution".to_string(),
464            serde_json::Value::String(distribution),
465        );
466    }
467    if let Some(dist_url) = fields.dist_url.clone() {
468        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
469    }
470
471    Some(PackageData {
472        datasource_id: Some(DatasourceId::RpmArchive),
473        package_type: Some(PACKAGE_TYPE),
474        namespace: namespace.clone(),
475        name: Some(name.clone()),
476        version: version.clone(),
477        qualifiers,
478        description: fields.description.or(fields.summary),
479        homepage_url: fields.url,
480        parties,
481        keywords: fields.group.into_iter().collect(),
482        declared_license_expression,
483        declared_license_expression_spdx,
484        license_detections,
485        extracted_license_statement,
486        source_packages: fields.source_rpm.into_iter().collect(),
487        extra_data: (!extra_data.is_empty()).then_some(extra_data),
488        purl: build_rpm_purl(
489            &name,
490            version.as_deref(),
491            namespace.as_deref(),
492            fields.arch.as_deref(),
493            is_source,
494        )
495        .map(truncate_field),
496        ..Default::default()
497    })
498}
499
500pub(crate) fn extract_rpm_packages(path: &Path) -> Vec<PackageData> {
501    if let Err(e) = fs::metadata(path) {
502        warn!("Cannot stat RPM file {:?}: {}", path, e);
503        return vec![default_package_data()];
504    }
505
506    let metadata = match parse_rpm_metadata_only(path) {
507        Ok(metadata) => metadata,
508        Err(message) => {
509            if let Some(package) = salvage_rpm_header_fields(path)
510                .and_then(|fields| build_salvaged_rpm_package(path, fields))
511            {
512                return vec![package];
513            }
514            warn!("{}", message);
515            return vec![default_package_data()];
516        }
517    };
518
519    vec![parse_rpm_package(&metadata, path)]
520}
521
522/// Parser for RPM package archives
523pub struct RpmParser;
524
525impl PackageParser for RpmParser {
526    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
527
528    fn is_match(path: &Path) -> bool {
529        path_looks_like_rpm_archive(path)
530    }
531
532    fn extract_packages(path: &Path) -> Vec<PackageData> {
533        extract_rpm_packages(path)
534    }
535
536    fn metadata() -> Vec<super::metadata::ParserMetadata> {
537        vec![super::metadata::ParserMetadata {
538            description: "RPM package archive",
539            file_patterns: &["**/*.rpm", "**/*.srpm"],
540            package_type: "rpm",
541            primary_language: "",
542            documentation_url: Some("https://rpm.org/"),
543        }]
544    }
545}
546
547pub(crate) fn infer_rpm_namespace_from_filename(path: &Path) -> Option<String> {
548    let filename = path.file_name()?.to_str()?.to_ascii_lowercase();
549
550    if filename.contains(".fc") {
551        return Some("fedora".to_string());
552    }
553    if filename.contains(".el") {
554        return Some("rhel".to_string());
555    }
556    if filename.contains("mdv") || filename.contains("mnb") {
557        return Some("openmandriva".to_string());
558    }
559    if filename.contains("opensuse") {
560        return Some("opensuse".to_string());
561    }
562    if filename.contains("suse") {
563        return Some("suse".to_string());
564    }
565
566    None
567}
568
569fn parse_rpm_package(metadata: &PackageMetadata, path: &Path) -> PackageData {
570    let name = metadata
571        .get_name()
572        .ok()
573        .map(|s| truncate_field(s.to_string()));
574    let version = build_evr_version(metadata).map(truncate_field);
575    let description = metadata
576        .get_description()
577        .ok()
578        .map(|s| truncate_field(s.to_string()));
579    let homepage_url = metadata
580        .get_url()
581        .ok()
582        .map(|s| truncate_field(s.to_string()));
583    let architecture = metadata
584        .get_arch()
585        .ok()
586        .map(|s| truncate_field(s.to_string()));
587    let path_str = path.to_string_lossy();
588    let is_source = metadata.is_source_package()
589        || path_str.ends_with(".src.rpm")
590        || path_str.ends_with(".srpm");
591    let distribution =
592        rpm_header_string(metadata, IndexTag::RPMTAG_DISTRIBUTION).map(truncate_field);
593    let dist_url = rpm_header_string(metadata, IndexTag::RPMTAG_DISTURL).map(truncate_field);
594    let bug_tracking_url = rpm_header_string(metadata, IndexTag::RPMTAG_BUGURL).map(truncate_field);
595    let source_urls =
596        rpm_header_string_array(metadata, IndexTag::RPMTAG_SOURCE).unwrap_or_default();
597    let source_rpm = metadata
598        .get_source_rpm()
599        .ok()
600        .filter(|value| !value.is_empty())
601        .map(|value| truncate_field(value.to_string()));
602    let namespace = infer_rpm_namespace(
603        distribution.as_deref(),
604        metadata.get_vendor().ok(),
605        metadata.get_release().ok(),
606        dist_url.as_deref(),
607    )
608    .or_else(|| infer_rpm_namespace_from_filename(path))
609    .map(truncate_field);
610
611    let mut parties = Vec::new();
612
613    if let Ok(vendor) = metadata.get_vendor()
614        && !vendor.is_empty()
615    {
616        parties.push(Party {
617            r#type: Some("organization".to_string()),
618            role: Some("vendor".to_string()),
619            name: Some(truncate_field(vendor.to_string())),
620            email: None,
621            url: None,
622            organization: None,
623            organization_url: None,
624            timezone: None,
625        });
626    }
627
628    if let Some(distribution_name) = distribution.as_ref() {
629        parties.push(Party {
630            r#type: Some("organization".to_string()),
631            role: Some("distributor".to_string()),
632            name: Some(distribution_name.clone()),
633            email: None,
634            url: None,
635            organization: None,
636            organization_url: None,
637            timezone: None,
638        });
639    }
640
641    if let Ok(packager) = metadata.get_packager()
642        && !packager.is_empty()
643    {
644        let (name_opt, email_opt) = parse_packager(packager);
645        parties.push(Party {
646            r#type: Some("person".to_string()),
647            role: Some("packager".to_string()),
648            name: name_opt.map(truncate_field),
649            email: email_opt.map(truncate_field),
650            url: None,
651            organization: None,
652            organization_url: None,
653            timezone: None,
654        });
655    }
656
657    let extracted_license_statement = metadata
658        .get_license()
659        .ok()
660        .map(|s| truncate_field(s.to_string()));
661    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
662        extracted_license_statement
663            .as_deref()
664            .and_then(normalize_rpm_declared_license)
665            .map(|normalized| {
666                build_declared_license_data(
667                    normalized,
668                    DeclaredLicenseMatchMetadata::single_line(
669                        extracted_license_statement.as_deref().unwrap_or_default(),
670                    ),
671                )
672            })
673            .map(|(expr, spdx, detections)| {
674                (
675                    expr.map(truncate_field),
676                    spdx.map(truncate_field),
677                    detections,
678                )
679            })
680            .unwrap_or_else(empty_declared_license_data);
681
682    let dependencies = extract_rpm_dependencies(metadata, namespace.as_deref());
683
684    let qualifiers = build_rpm_qualifiers(architecture.as_deref(), is_source);
685
686    let mut keywords = Vec::new();
687    if let Ok(group) = metadata.get_group()
688        && !group.is_empty()
689    {
690        keywords.push(truncate_field(group.to_string()));
691    }
692
693    let mut extra_data = std::collections::HashMap::new();
694    if let Some(distribution) = distribution.clone() {
695        extra_data.insert(
696            "distribution".to_string(),
697            serde_json::Value::String(distribution),
698        );
699    }
700    if let Some(dist_url) = dist_url.clone() {
701        extra_data.insert("dist_url".to_string(), serde_json::Value::String(dist_url));
702    }
703    if let Ok(build_host) = metadata.get_build_host()
704        && !build_host.is_empty()
705    {
706        extra_data.insert(
707            "build_host".to_string(),
708            serde_json::Value::String(build_host.to_string()),
709        );
710    }
711    if let Ok(build_time) = metadata.get_build_time() {
712        extra_data.insert(
713            "build_time".to_string(),
714            serde_json::Value::Number(serde_json::Number::from(build_time)),
715        );
716    }
717    if !source_urls.is_empty() {
718        extra_data.insert(
719            "source_urls".to_string(),
720            serde_json::Value::Array(
721                source_urls
722                    .iter()
723                    .cloned()
724                    .map(serde_json::Value::String)
725                    .collect(),
726            ),
727        );
728    }
729    if let Some(provides) = extract_rpm_relationships(metadata, RpmRelationshipKind::Provides)
730        && !provides.is_empty()
731    {
732        extra_data.insert(
733            "provides".to_string(),
734            serde_json::Value::Array(
735                provides
736                    .into_iter()
737                    .map(serde_json::Value::String)
738                    .collect(),
739            ),
740        );
741    }
742    if let Some(obsoletes) = extract_rpm_relationships(metadata, RpmRelationshipKind::Obsoletes)
743        && !obsoletes.is_empty()
744    {
745        extra_data.insert(
746            "obsoletes".to_string(),
747            serde_json::Value::Array(
748                obsoletes
749                    .into_iter()
750                    .map(serde_json::Value::String)
751                    .collect(),
752            ),
753        );
754    }
755    let vcs_url = infer_vcs_url(metadata, &source_urls).map(truncate_field);
756
757    PackageData {
758        datasource_id: Some(DatasourceId::RpmArchive),
759        package_type: Some(PACKAGE_TYPE),
760        namespace: namespace.clone(),
761        name: name.clone(),
762        version: version.clone(),
763        qualifiers,
764        description,
765        homepage_url,
766        size: metadata.get_installed_size().ok(),
767        parties,
768        keywords,
769        bug_tracking_url,
770        declared_license_expression,
771        declared_license_expression_spdx,
772        license_detections,
773        extracted_license_statement,
774        dependencies,
775        source_packages: source_rpm.into_iter().collect(),
776        vcs_url,
777        extra_data: (!extra_data.is_empty()).then_some(extra_data),
778        purl: name.as_ref().and_then(|n| {
779            build_rpm_purl(
780                n,
781                version.as_deref(),
782                namespace.as_deref(),
783                architecture.as_deref(),
784                is_source,
785            )
786            .map(truncate_field)
787        }),
788        ..Default::default()
789    }
790}
791
792pub(crate) fn normalize_rpm_declared_license(statement: &str) -> Option<NormalizedDeclaredLicense> {
793    let trimmed = statement.trim();
794    if trimmed.is_empty() {
795        return None;
796    }
797
798    let rewritten = canonicalize_rpm_license_statement(trimmed);
799    if let Some(normalized) = normalize_spdx_expression(&rewritten) {
800        return Some(normalized);
801    }
802
803    let is_simple_key = !trimmed.contains(' ')
804        && !trimmed.contains(',')
805        && !trimmed.contains('(')
806        && !trimmed.contains(')');
807    if is_simple_key {
808        return normalize_declared_license_key(trimmed);
809    }
810
811    None
812}
813
814fn canonicalize_rpm_license_statement(statement: &str) -> String {
815    let mut rewritten = statement.trim().to_string();
816
817    for (from, to) in [
818        ("LGPLv2.1+", "LGPL-2.1-or-later"),
819        ("LGPLv2.1", "LGPL-2.1-only"),
820        ("LGPLv2+", "LGPL-2.0-or-later"),
821        ("LGPLv2", "LGPL-2.0-only"),
822        ("LGPLv3+", "LGPL-3.0-or-later"),
823        ("LGPLv3", "LGPL-3.0-only"),
824        ("GPLv2+", "GPL-2.0-or-later"),
825        ("GPLv2", "GPL-2.0-only"),
826        ("GPLv3+", "GPL-3.0-or-later"),
827        ("GPLv3", "GPL-3.0-only"),
828        ("GPLV2+", "GPL-2.0-or-later"),
829        ("MPLv2.0", "MPL-2.0"),
830        ("MPLv1.1", "MPL-1.1"),
831        ("BSD with advertising", "BSD-4-Clause-UC"),
832        ("Public Domain", "LicenseRef-scancode-public-domain"),
833        ("public domain", "LicenseRef-scancode-public-domain"),
834        ("OpenLDAP", "OLDAP-2.8"),
835        ("OpenSSL", "OpenSSL"),
836        ("Sleepycat", "Sleepycat"),
837        ("zlib", "Zlib"),
838        ("Boost", "BSL-1.0"),
839        ("BSD", "BSD-3-Clause"),
840    ] {
841        rewritten = rewritten.replace(from, to);
842    }
843
844    rewritten = RE_RPM_LICENSE_WITH_EXCEPTIONS
845        .replace_all(&rewritten, "")
846        .into_owned();
847    rewritten = RE_RPM_LICENSE_COMMA
848        .replace_all(&rewritten, " AND ")
849        .into_owned();
850    rewritten = RE_RPM_LICENSE_AND
851        .replace_all(&rewritten, " AND ")
852        .into_owned();
853    rewritten = RE_RPM_LICENSE_OR
854        .replace_all(&rewritten, " OR ")
855        .into_owned();
856
857    rewritten.split_whitespace().collect::<Vec<_>>().join(" ")
858}
859
860fn extract_rpm_dependencies(
861    metadata: &PackageMetadata,
862    namespace: Option<&str>,
863) -> Vec<Dependency> {
864    let mut dependencies = Vec::new();
865
866    if let Ok(requires) = metadata.get_requires() {
867        for rpm_dep in requires {
868            if dependencies.len() >= MAX_ITERATION_COUNT {
869                warn!(
870                    "RPM dependency iteration capped at {} items",
871                    MAX_ITERATION_COUNT
872                );
873                break;
874            }
875            let purl = build_rpm_purl(
876                &rpm_dep.name,
877                if rpm_dep.version.is_empty() {
878                    None
879                } else {
880                    Some(&rpm_dep.version)
881                },
882                namespace,
883                None,
884                false,
885            )
886            .map(truncate_field);
887
888            let extracted_requirement = if !rpm_dep.version.is_empty() {
889                Some(truncate_field(format_rpm_requirement(&rpm_dep)))
890            } else {
891                None
892            };
893
894            dependencies.push(Dependency {
895                purl,
896                extracted_requirement,
897                scope: Some("install".to_string()),
898                is_runtime: Some(true),
899                is_optional: Some(false),
900                is_direct: Some(true),
901                resolved_package: None,
902                extra_data: None,
903                is_pinned: Some(!rpm_dep.version.is_empty()),
904            });
905        }
906    }
907
908    dependencies
909}
910
911enum RpmRelationshipKind {
912    Provides,
913    Obsoletes,
914}
915
916fn extract_rpm_relationships(
917    metadata: &PackageMetadata,
918    kind: RpmRelationshipKind,
919) -> Option<Vec<String>> {
920    let relationships = match kind {
921        RpmRelationshipKind::Provides => metadata.get_provides().ok()?,
922        RpmRelationshipKind::Obsoletes => metadata.get_obsoletes().ok()?,
923    };
924
925    let mut count = 0usize;
926    let values: Vec<String> = relationships
927        .into_iter()
928        .take(MAX_ITERATION_COUNT)
929        .map(|dep| format_rpm_requirement(&dep))
930        .filter(|value| !value.is_empty() && value != "(none)")
931        .inspect(|_| count += 1)
932        .collect();
933
934    if count >= MAX_ITERATION_COUNT {
935        warn!(
936            "RPM relationship iteration capped at {} items",
937            MAX_ITERATION_COUNT
938        );
939    }
940
941    (!values.is_empty()).then_some(values)
942}
943
944fn format_rpm_requirement(dep: &rpm::Dependency) -> String {
945    use rpm::DependencyFlags;
946
947    if dep.version.is_empty() {
948        return dep.name.clone();
949    }
950
951    let operator = if dep.flags.contains(DependencyFlags::EQUAL)
952        && dep.flags.contains(DependencyFlags::LESS)
953    {
954        "<="
955    } else if dep.flags.contains(DependencyFlags::EQUAL)
956        && dep.flags.contains(DependencyFlags::GREATER)
957    {
958        ">="
959    } else if dep.flags.contains(DependencyFlags::EQUAL) {
960        "="
961    } else if dep.flags.contains(DependencyFlags::LESS) {
962        "<"
963    } else if dep.flags.contains(DependencyFlags::GREATER) {
964        ">"
965    } else {
966        ""
967    };
968
969    if operator.is_empty() {
970        dep.name.clone()
971    } else {
972        format!("{} {} {}", dep.name, operator, dep.version)
973    }
974}
975
976fn build_evr_version(metadata: &PackageMetadata) -> Option<String> {
977    let version = metadata.get_version().ok()?;
978    let release = metadata.get_release().ok();
979
980    let mut evr = String::from(version);
981
982    if let Some(r) = release {
983        evr.push('-');
984        evr.push_str(r);
985    }
986
987    Some(evr)
988}
989
990fn parse_packager(packager: &str) -> (Option<String>, Option<String>) {
991    if let Some(email_start) = packager.find('<') {
992        let name = packager[..email_start].trim();
993        if let Some(email_end) = packager.find('>') {
994            let email = &packager[email_start + 1..email_end];
995            return (Some(name.to_string()), Some(email.to_string()));
996        }
997    }
998    (Some(packager.to_string()), None)
999}
1000
1001fn build_rpm_purl(
1002    name: &str,
1003    version: Option<&str>,
1004    namespace: Option<&str>,
1005    architecture: Option<&str>,
1006    is_source: bool,
1007) -> Option<String> {
1008    use packageurl::PackageUrl;
1009
1010    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
1011
1012    if let Some(ns) = namespace {
1013        purl.with_namespace(ns).ok()?;
1014    }
1015
1016    if let Some(ver) = version {
1017        purl.with_version(ver).ok()?;
1018    }
1019
1020    if let Some(arch) = architecture {
1021        purl.add_qualifier("arch", arch).ok()?;
1022    }
1023
1024    if is_source {
1025        purl.add_qualifier("source", "true").ok()?;
1026    }
1027
1028    Some(purl.to_string())
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033    use super::*;
1034    use std::fs;
1035    use std::path::PathBuf;
1036    use tempfile::NamedTempFile;
1037
1038    fn build_sparse_oversized_rpm(name: &str) -> PathBuf {
1039        let package = rpm::PackageBuilder::new(name, "1.0", "MIT", "x86_64", "Demo RPM package")
1040            .release("1")
1041            .build()
1042            .unwrap();
1043
1044        let temp_file = NamedTempFile::new().unwrap();
1045        package.write_file(temp_file.path()).unwrap();
1046        let oversized_len = MAX_MANIFEST_SIZE + 1_048_576;
1047        fs::OpenOptions::new()
1048            .write(true)
1049            .open(temp_file.path())
1050            .unwrap()
1051            .set_len(oversized_len)
1052            .unwrap();
1053
1054        temp_file.into_temp_path().keep().unwrap()
1055    }
1056
1057    #[test]
1058    fn test_rpm_parser_is_match() {
1059        assert!(RpmParser::is_match(&PathBuf::from("package.rpm")));
1060        assert!(RpmParser::is_match(&PathBuf::from("package.srpm")));
1061        assert!(RpmParser::is_match(&PathBuf::from(
1062            "test-1.0-1.el7.x86_64.rpm"
1063        )));
1064        assert!(!RpmParser::is_match(&PathBuf::from("package.deb")));
1065        assert!(!RpmParser::is_match(&PathBuf::from("package.tar.gz")));
1066    }
1067
1068    #[test]
1069    fn test_rpm_parser_matches_hash_named_source_rpm_by_magic() {
1070        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1071        if !source_fixture.exists() {
1072            return;
1073        }
1074
1075        let temp_file = NamedTempFile::new().unwrap();
1076        fs::copy(&source_fixture, temp_file.path()).unwrap();
1077
1078        assert!(RpmParser::is_match(temp_file.path()));
1079    }
1080
1081    #[test]
1082    fn test_rpm_parser_matches_pack_named_rpm_by_magic() {
1083        let source_fixture = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1084        if !source_fixture.exists() {
1085            return;
1086        }
1087
1088        let temp_dir = tempfile::TempDir::new().unwrap();
1089        let pack_path = temp_dir.path().join("setup-2.5.49-b1.src.pack");
1090        fs::copy(&source_fixture, &pack_path).unwrap();
1091
1092        assert!(RpmParser::is_match(&pack_path));
1093        assert!(path_looks_like_rpm_archive(&pack_path));
1094    }
1095
1096    #[test]
1097    fn test_build_evr_version_simple() {
1098        let evr = "1.0-1";
1099        assert_eq!(evr, "1.0-1");
1100    }
1101
1102    #[test]
1103    fn test_build_evr_version_with_epoch() {
1104        let evr = "2:1.0-1";
1105        assert!(evr.starts_with("2:"));
1106    }
1107
1108    #[test]
1109    fn test_parse_packager() {
1110        let (name, email) = parse_packager("John Doe <john@example.com>");
1111        assert_eq!(name, Some("John Doe".to_string()));
1112        assert_eq!(email, Some("john@example.com".to_string()));
1113
1114        let (name2, email2) = parse_packager("Plain Name");
1115        assert_eq!(name2, Some("Plain Name".to_string()));
1116        assert_eq!(email2, None);
1117    }
1118
1119    #[test]
1120    fn test_build_rpm_purl() {
1121        let purl = build_rpm_purl(
1122            "bash",
1123            Some("4.4.19-1.el7"),
1124            Some("fedora"),
1125            Some("x86_64"),
1126            false,
1127        );
1128        assert!(purl.is_some());
1129        let purl_str = purl.unwrap();
1130        assert!(purl_str.contains("pkg:rpm/fedora/bash"));
1131        assert!(purl_str.contains("4.4.19-1.el7"));
1132        assert!(purl_str.contains("arch=x86_64"));
1133    }
1134
1135    #[test]
1136    fn test_parse_real_rpm() {
1137        let test_file = PathBuf::from("testdata/rpm/Eterm-0.9.3-5mdv2007.0.rpm");
1138        if !test_file.exists() {
1139            eprintln!("Warning: Test file not found, skipping test");
1140            return;
1141        }
1142
1143        let pkg = RpmParser::extract_first_package(&test_file);
1144
1145        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
1146        assert_eq!(pkg.name, Some("Eterm".to_string()));
1147        assert_eq!(pkg.version, Some("0.9.3-5mdv2007.0".to_string()));
1148    }
1149
1150    #[test]
1151    fn test_parse_oversized_rpm_from_headers_only() {
1152        let test_file = build_sparse_oversized_rpm("oversized-demo");
1153
1154        assert!(RpmParser::is_match(&test_file));
1155
1156        let pkg = RpmParser::extract_first_package(&test_file);
1157
1158        assert_eq!(pkg.datasource_id, Some(DatasourceId::RpmArchive));
1159        assert_eq!(pkg.package_type, Some(PackageType::Rpm));
1160        assert_eq!(pkg.name.as_deref(), Some("oversized-demo"));
1161        assert_eq!(pkg.version.as_deref(), Some("1.0-1"));
1162
1163        fs::remove_file(test_file).unwrap();
1164    }
1165
1166    #[test]
1167    fn test_build_rpm_purl_no_namespace() {
1168        let purl = build_rpm_purl("package", Some("1.0-1"), None, Some("x86_64"), false);
1169        assert!(purl.is_some());
1170        let purl_str = purl.unwrap();
1171        assert!(purl_str.starts_with("pkg:rpm/package@"));
1172        assert!(purl_str.contains("arch=x86_64"));
1173    }
1174
1175    #[test]
1176    fn test_rpm_dependency_extraction() {
1177        use rpm::{Dependency as RpmDependency, DependencyFlags};
1178
1179        let rpm_dep = RpmDependency {
1180            name: "libc.so.6".to_string(),
1181            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
1182            version: "2.2.5".to_string(),
1183        };
1184
1185        let formatted = format_rpm_requirement(&rpm_dep);
1186        assert_eq!(formatted, "libc.so.6 >= 2.2.5");
1187
1188        let rpm_dep_no_version = RpmDependency {
1189            name: "bash".to_string(),
1190            flags: DependencyFlags::ANY,
1191            version: String::new(),
1192        };
1193
1194        let formatted_no_ver = format_rpm_requirement(&rpm_dep_no_version);
1195        assert_eq!(formatted_no_ver, "bash");
1196    }
1197
1198    #[test]
1199    fn test_parse_packager_with_parentheses() {
1200        let (name, email) = parse_packager("John Doe (Company) <john@example.com>");
1201        assert_eq!(name, Some("John Doe (Company)".to_string()));
1202        assert_eq!(email, Some("john@example.com".to_string()));
1203    }
1204
1205    #[test]
1206    fn test_parse_packager_email_only() {
1207        let (name, email) = parse_packager("<noreply@example.com>");
1208        assert!(name.is_none() || name == Some(String::new()));
1209        assert_eq!(email, Some("noreply@example.com".to_string()));
1210    }
1211
1212    #[test]
1213    fn test_rpm_fping_package() {
1214        let test_file = PathBuf::from("testdata/rpm/fping-2.4b2-10.fc12.x86_64.rpm");
1215        if !test_file.exists() {
1216            return;
1217        }
1218
1219        let pkg = RpmParser::extract_first_package(&test_file);
1220        assert_eq!(pkg.name, Some("fping".to_string()));
1221        assert_eq!(pkg.version, Some("2.4b2-10.fc12".to_string()));
1222    }
1223
1224    #[test]
1225    fn test_rpm_archive_extracts_additional_metadata_fields() {
1226        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1227        if !test_file.exists() {
1228            return;
1229        }
1230
1231        let pkg = RpmParser::extract_first_package(&test_file);
1232
1233        assert_eq!(pkg.name.as_deref(), Some("setup"));
1234        assert_eq!(
1235            pkg.qualifiers
1236                .as_ref()
1237                .and_then(|q| q.get("arch"))
1238                .map(String::as_str),
1239            Some("noarch")
1240        );
1241        assert!(!pkg.keywords.is_empty());
1242        assert!(pkg.size.is_some());
1243        assert!(
1244            pkg.parties
1245                .iter()
1246                .any(|party| party.role.as_deref() == Some("packager"))
1247        );
1248        assert!(
1249            pkg.qualifiers
1250                .as_ref()
1251                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
1252        );
1253    }
1254
1255    #[test]
1256    fn test_source_rpm_sets_source_qualifier() {
1257        let test_file = PathBuf::from("testdata/rpm/setup-2.5.49-b1.src.rpm");
1258        if !test_file.exists() {
1259            return;
1260        }
1261
1262        let pkg = RpmParser::extract_first_package(&test_file);
1263
1264        assert!(
1265            pkg.qualifiers
1266                .as_ref()
1267                .is_some_and(|q| q.get("source") == Some(&"true".to_string()))
1268        );
1269        assert!(
1270            pkg.purl
1271                .as_ref()
1272                .is_some_and(|purl| purl.contains("source=true"))
1273        );
1274    }
1275
1276    #[test]
1277    fn test_rpm_archive_extracts_vcs_and_source_metadata() {
1278        let package = rpm::PackageBuilder::new(
1279            "thunar-sendto-clamtk",
1280            "0.08",
1281            "GPL-2.0-or-later",
1282            "noarch",
1283            "Simple virus scanning extension for Thunar",
1284        )
1285        .release("2.fc40")
1286        .vendor("Fedora Project")
1287        .packager("Fedora Release Engineering <releng@fedoraproject.org>")
1288        .group("Applications/System")
1289        .vcs("git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e")
1290        .build()
1291        .unwrap();
1292
1293        let temp_file = NamedTempFile::new().unwrap();
1294        package.write_file(temp_file.path()).unwrap();
1295
1296        let pkg = RpmParser::extract_first_package(temp_file.path());
1297
1298        assert_eq!(pkg.namespace.as_deref(), Some("fedora"));
1299        assert_eq!(
1300            pkg.vcs_url.as_deref(),
1301            Some(
1302                "git+https://src.fedoraproject.org/rpms/thunar-sendto-clamtk.git#5a3f8e92b45f46b464e6924c79d4bf3e11bb1f0e",
1303            )
1304        );
1305        assert!(
1306            pkg.extra_data
1307                .as_ref()
1308                .is_some_and(|extra| extra.contains_key("build_time"))
1309        );
1310        assert!(!pkg.keywords.is_empty());
1311    }
1312
1313    #[test]
1314    fn test_rpm_archive_preserves_provides_and_obsoletes_relationships() {
1315        use rpm::{Dependency as RpmDependency, DependencyFlags};
1316
1317        let package = rpm::PackageBuilder::new(
1318            "demo-rpm",
1319            "1.0.0",
1320            "MIT",
1321            "noarch",
1322            "RPM relationship metadata fixture",
1323        )
1324        .release("1")
1325        .provides(RpmDependency {
1326            name: "demo-rpm-virtual".to_string(),
1327            flags: DependencyFlags::GREATER | DependencyFlags::EQUAL,
1328            version: "1.0.0".to_string(),
1329        })
1330        .obsoletes(RpmDependency {
1331            name: "old-demo-rpm".to_string(),
1332            flags: DependencyFlags::LESS,
1333            version: "0.9.0".to_string(),
1334        })
1335        .build()
1336        .unwrap();
1337
1338        let temp_file = NamedTempFile::new().unwrap();
1339        package.write_file(temp_file.path()).unwrap();
1340
1341        let pkg = RpmParser::extract_first_package(temp_file.path());
1342        let extra = pkg.extra_data.as_ref().expect("extra_data should exist");
1343
1344        let provides = extra
1345            .get("provides")
1346            .and_then(|value| value.as_array())
1347            .expect("provides should be present");
1348        assert!(
1349            provides
1350                .iter()
1351                .any(|value| value.as_str() == Some("demo-rpm-virtual >= 1.0.0"))
1352        );
1353
1354        let obsoletes = extra
1355            .get("obsoletes")
1356            .and_then(|value| value.as_array())
1357            .expect("obsoletes should be present");
1358        assert!(
1359            obsoletes
1360                .iter()
1361                .any(|value| value.as_str() == Some("old-demo-rpm < 0.9.0"))
1362        );
1363    }
1364
1365    #[test]
1366    fn test_rpm_archive_normalizes_declared_license_expression() {
1367        let package = rpm::PackageBuilder::new(
1368            "demo-license",
1369            "1.0.0",
1370            "LGPLv2",
1371            "noarch",
1372            "RPM declared license normalization fixture",
1373        )
1374        .release("1")
1375        .build()
1376        .unwrap();
1377
1378        let temp_file = NamedTempFile::new().unwrap();
1379        package.write_file(temp_file.path()).unwrap();
1380
1381        let pkg = RpmParser::extract_first_package(temp_file.path());
1382
1383        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("LGPLv2"));
1384        assert_eq!(
1385            pkg.declared_license_expression.as_deref(),
1386            Some("lgpl-2.0-only")
1387        );
1388        assert_eq!(
1389            pkg.declared_license_expression_spdx.as_deref(),
1390            Some("LGPL-2.0-only")
1391        );
1392        assert_eq!(pkg.license_detections.len(), 1);
1393        assert_eq!(
1394            pkg.license_detections[0].license_expression_spdx,
1395            "LGPL-2.0-only"
1396        );
1397        assert_eq!(
1398            pkg.license_detections[0].matches[0].matched_text.as_deref(),
1399            Some("LGPLv2")
1400        );
1401    }
1402
1403    #[test]
1404    fn test_rpm_archive_normalizes_public_domain_declared_license_expression() {
1405        let package = rpm::PackageBuilder::new(
1406            "demo-public-domain",
1407            "1.0.0",
1408            "public domain",
1409            "noarch",
1410            "RPM public domain normalization fixture",
1411        )
1412        .release("1")
1413        .build()
1414        .unwrap();
1415
1416        let temp_file = NamedTempFile::new().unwrap();
1417        package.write_file(temp_file.path()).unwrap();
1418
1419        let pkg = RpmParser::extract_first_package(temp_file.path());
1420
1421        assert_eq!(
1422            pkg.extracted_license_statement.as_deref(),
1423            Some("public domain")
1424        );
1425        assert_eq!(
1426            pkg.declared_license_expression.as_deref(),
1427            Some("licenseref-scancode-public-domain")
1428        );
1429        assert_eq!(
1430            pkg.declared_license_expression_spdx.as_deref(),
1431            Some("LicenseRef-scancode-public-domain")
1432        );
1433        assert_eq!(pkg.license_detections.len(), 1);
1434    }
1435
1436    #[test]
1437    fn test_normalize_rpm_declared_license_rewrites_compound_aliases() {
1438        let normalized = normalize_rpm_declared_license("BSD and GPLv2+")
1439            .expect("compound RPM license should normalize");
1440
1441        assert_eq!(
1442            normalized.declared_license_expression_spdx,
1443            "BSD-3-Clause AND GPL-2.0-or-later"
1444        );
1445    }
1446}