Skip to main content

provenant/parsers/
rpm_specfile.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM .spec files.
5//!
6//! Extracts package metadata from RPM specfiles, which define how RPM packages
7//! are built. This is a beyond-parity implementation - the Python version is
8//! a complete stub with "TODO: implement me!!" comments.
9//!
10//! # Supported Formats
11//! - *.spec (RPM specfiles)
12//!
13//! # Key Features
14//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
15//! - Dependency extraction (BuildRequires, Requires, Provides)
16//! - %description section parsing
17//! - Basic macro expansion (%{name}, %{version}, %{release})
18//! - %define and %global macro definitions
19//! - Conditional macro handling (%{?dist})
20//! - Multi-line dependency lists (comma-separated)
21//! - Scoped Requires (Requires(post), Requires(preun), etc.)
22//!
23//! # Implementation Notes
24//! - Parses only the preamble (before %prep, %build, etc. sections)
25//! - Tags are case-insensitive per RPM spec format
26//! - Simple macro expansion for common patterns
27//! - BuildRequires dependencies have is_runtime=false, scope="build"
28//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
29//! - datasource_id is "rpm_specfile"
30
31use std::collections::HashMap;
32use std::path::Path;
33use std::sync::LazyLock;
34
35use crate::parser_warn as warn;
36use packageurl::PackageUrl;
37use regex::Regex;
38
39use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
40use crate::parsers::utils::{
41    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
42};
43
44use super::PackageParser;
45
46static RE_CONDITIONAL_MACRO: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(r"%\{\?[^}]+\}").expect("valid regex: %{?...} pattern is a compile-time constant")
48});
49
50const PACKAGE_TYPE: PackageType = PackageType::Rpm;
51
52/// Parser for RPM specfiles
53pub struct RpmSpecfileParser;
54
55impl PackageParser for RpmSpecfileParser {
56    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
57
58    fn is_match(path: &Path) -> bool {
59        path.extension()
60            .and_then(|e| e.to_str())
61            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
62    }
63
64    fn extract_packages(path: &Path) -> Vec<PackageData> {
65        let content = match read_file_to_string(path, None) {
66            Ok(c) => c,
67            Err(e) => {
68                warn!("Failed to read RPM specfile {:?}: {}", path, e);
69                return vec![PackageData {
70                    package_type: Some(PACKAGE_TYPE),
71                    datasource_id: Some(DatasourceId::RpmSpecfile),
72                    ..Default::default()
73                }];
74            }
75        };
76
77        vec![parse_specfile(&content)]
78    }
79
80    fn metadata() -> Vec<super::metadata::ParserMetadata> {
81        vec![super::metadata::ParserMetadata {
82            description: "RPM specfile",
83            file_patterns: &["**/*.spec"],
84            package_type: "rpm",
85            primary_language: "",
86            documentation_url: Some(
87                "https://rpm-software-management.github.io/rpm/manual/spec.html",
88            ),
89        }]
90    }
91}
92
93fn parse_specfile(content: &str) -> PackageData {
94    let mut tags: HashMap<String, String> = HashMap::new();
95    let mut macros: HashMap<String, String> = HashMap::new();
96    let mut build_requires: Vec<String> = Vec::new();
97    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
98    let mut provides: Vec<String> = Vec::new();
99    let mut description: Option<String> = None;
100
101    let lines: Vec<&str> = content.lines().collect();
102    let mut i = 0;
103    let mut iterations: usize = 0;
104
105    while i < lines.len() {
106        iterations += 1;
107        if iterations > MAX_ITERATION_COUNT {
108            warn!(
109                "RPM specfile preamble iteration limit ({}) exceeded",
110                MAX_ITERATION_COUNT
111            );
112            break;
113        }
114        let line = lines[i].trim();
115
116        // Stop at first section marker (%, but not %define/%global)
117        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
118            if is_conditional_preamble_directive(line) {
119                i += 1;
120                continue;
121            }
122            break;
123        }
124
125        // Skip empty lines and comments
126        if line.is_empty() || line.starts_with('#') {
127            i += 1;
128            continue;
129        }
130
131        // Parse %define and %global macros
132        if let Some(stripped) = line
133            .strip_prefix("%define")
134            .or(line.strip_prefix("%global"))
135        {
136            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
137            if parts.len() == 2 {
138                macros.insert(
139                    parts[0].to_string(),
140                    truncate_field(parts[1].trim().to_string()),
141                );
142            }
143            i += 1;
144            continue;
145        }
146
147        // Parse Tag: Value lines
148        if let Some(colon_pos) = line.find(':') {
149            let tag = line[..colon_pos].trim().to_lowercase();
150            let value = line[colon_pos + 1..].trim().to_string();
151
152            match tag.as_str() {
153                "buildrequires" => {
154                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
155                        let dep = dep.trim();
156                        if !dep.is_empty() {
157                            build_requires.push(dep.to_string());
158                        }
159                    }
160                }
161                t if t.starts_with("requires") => {
162                    // Parse Requires, Requires(post), Requires(preun), etc.
163                    let scope = if let Some(start) = t.find('(') {
164                        if let Some(end) = t.find(')') {
165                            Some(t[start + 1..end].to_string())
166                        } else {
167                            Some("runtime".to_string())
168                        }
169                    } else {
170                        Some("runtime".to_string())
171                    };
172
173                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
174                        let dep = dep.trim();
175                        if !dep.is_empty() {
176                            requires.push((dep.to_string(), scope.clone()));
177                        }
178                    }
179                }
180                "provides" => {
181                    for prov in value.split(',').take(MAX_ITERATION_COUNT) {
182                        let prov = prov.trim();
183                        if !prov.is_empty() {
184                            provides.push(prov.to_string());
185                        }
186                    }
187                }
188                _ => {
189                    tags.insert(tag, value);
190                }
191            }
192        }
193
194        i += 1;
195    }
196
197    // Now parse %description section if present
198    let mut desc_iterations: usize = 0;
199    while i < lines.len() {
200        desc_iterations += 1;
201        if desc_iterations > MAX_ITERATION_COUNT {
202            warn!(
203                "RPM specfile description search iteration limit ({}) exceeded",
204                MAX_ITERATION_COUNT
205            );
206            break;
207        }
208        let line = lines[i].trim();
209
210        if line.starts_with("%description") {
211            i += 1;
212            let mut desc_lines = Vec::new();
213
214            while i < lines.len() {
215                desc_iterations += 1;
216                if desc_iterations > MAX_ITERATION_COUNT {
217                    warn!(
218                        "RPM specfile description iteration limit ({}) exceeded",
219                        MAX_ITERATION_COUNT
220                    );
221                    break;
222                }
223                let desc_line = lines[i];
224                let trimmed = desc_line.trim();
225
226                // Stop at next section
227                if trimmed.starts_with('%') {
228                    break;
229                }
230
231                // Don't include empty lines at start
232                if !desc_lines.is_empty() || !trimmed.is_empty() {
233                    desc_lines.push(desc_line);
234                }
235
236                i += 1;
237            }
238
239            // Trim trailing empty lines
240            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
241                desc_lines.pop();
242            }
243
244            if !desc_lines.is_empty() {
245                description = Some(desc_lines.join("\n"));
246            }
247
248            break;
249        }
250
251        i += 1;
252    }
253
254    // Extract basic metadata from tags
255    let name = tags.get("name").cloned();
256    let version = tags.get("version").cloned();
257    let release = tags.get("release").cloned();
258
259    // Store name and version in macros for expansion
260    if let Some(ref n) = name {
261        macros.insert("name".to_string(), n.clone());
262    }
263    if let Some(ref v) = version {
264        macros.insert("version".to_string(), v.clone());
265    }
266    if let Some(ref r) = release {
267        macros.insert("release".to_string(), r.clone());
268    }
269
270    // Expand macros in all tag values
271    let mut expanded_tags: HashMap<String, String> = HashMap::new();
272    for (tag, value) in tags.iter() {
273        expanded_tags.insert(tag.clone(), truncate_field(expand_macros(value, &macros)));
274    }
275
276    // Get expanded values
277    let name = expanded_tags.get("name").cloned();
278    let version = expanded_tags.get("version").cloned();
279    let release = expanded_tags.get("release").cloned();
280    let summary = expanded_tags.get("summary").cloned();
281    let license = expanded_tags.get("license").cloned();
282    let url = expanded_tags.get("url").cloned();
283    let group = expanded_tags.get("group").cloned();
284    let epoch = expanded_tags.get("epoch").cloned();
285    let packager = expanded_tags.get("packager").cloned();
286
287    let download_url = expanded_tags
288        .get("source")
289        .or_else(|| expanded_tags.get("source0"))
290        .cloned()
291        .map(truncate_field);
292
293    // Create parties
294    let mut parties = Vec::new();
295    if let Some(pkg) = packager {
296        let (name_opt, email_opt) = split_name_email(&pkg);
297        parties.push(Party {
298            r#type: None,
299            role: Some("packager".to_string()),
300            name: name_opt,
301            email: email_opt,
302            url: None,
303            organization: None,
304            organization_url: None,
305            timezone: None,
306        });
307    }
308
309    // Create dependencies
310    let mut dependencies = Vec::new();
311
312    for dep_str in build_requires.into_iter().take(MAX_ITERATION_COUNT) {
313        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
314        let dep_name = extract_dep_name(&dep_str);
315        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
316
317        dependencies.push(Dependency {
318            purl,
319            extracted_requirement: Some(dep_str),
320            scope: Some("build".to_string()),
321            is_runtime: Some(false),
322            is_optional: Some(false),
323            is_direct: Some(true),
324            is_pinned: None,
325            resolved_package: None,
326            extra_data: None,
327        });
328    }
329
330    for (dep_str, scope) in requires.into_iter().take(MAX_ITERATION_COUNT) {
331        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
332        let dep_name = extract_dep_name(&dep_str);
333        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
334
335        dependencies.push(Dependency {
336            purl,
337            extracted_requirement: Some(dep_str),
338            scope,
339            is_runtime: Some(true),
340            is_optional: Some(false),
341            is_direct: Some(true),
342            is_pinned: None,
343            resolved_package: None,
344            extra_data: None,
345        });
346    }
347
348    // Build PURL
349    let purl = name
350        .as_ref()
351        .and_then(|n| build_rpm_purl(n, version.as_deref()))
352        .map(truncate_field);
353
354    // Build extra_data for non-standard fields
355    let mut extra_data = HashMap::new();
356    if let Some(r) = release {
357        extra_data.insert("release".to_string(), serde_json::Value::String(r));
358    }
359    if let Some(e) = epoch {
360        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
361    }
362    if let Some(g) = group {
363        extra_data.insert("group".to_string(), serde_json::Value::String(g));
364    }
365    if !provides.is_empty() {
366        let provides_json: Vec<serde_json::Value> = provides
367            .into_iter()
368            .take(MAX_ITERATION_COUNT)
369            .map(|prov| serde_json::Value::String(truncate_field(expand_macros(&prov, &macros))))
370            .collect();
371        extra_data.insert(
372            "provides".to_string(),
373            serde_json::Value::Array(provides_json),
374        );
375    }
376
377    let extra_data_opt = if extra_data.is_empty() {
378        None
379    } else {
380        Some(extra_data)
381    };
382
383    // Use %description if available, otherwise use Summary
384    let description_text = description.map(truncate_field).or(summary);
385
386    PackageData {
387        datasource_id: Some(DatasourceId::RpmSpecfile),
388        package_type: Some(PACKAGE_TYPE),
389        namespace: None, // RPM namespace is optional
390        name,
391        version,
392        description: description_text,
393        homepage_url: url,
394        download_url,
395        extracted_license_statement: license,
396        parties,
397        dependencies,
398        purl,
399        extra_data: extra_data_opt,
400        ..Default::default()
401    }
402}
403
404fn is_conditional_preamble_directive(line: &str) -> bool {
405    [
406        "%if", "%ifarch", "%ifnarch", "%ifos", "%ifnos", "%elif", "%else", "%endif",
407    ]
408    .iter()
409    .any(|directive| line.starts_with(directive))
410}
411
412/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
413fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
414    let mut result = s.to_string();
415
416    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
417
418    // Expand simple macros %{macro}
419    for (key, value) in macros {
420        let pattern = format!("%{{{}}}", key);
421        result = result.replace(&pattern, value);
422    }
423
424    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
425
426    result
427}
428
429/// Extracts the package name from a dependency string (removes version constraints)
430fn extract_dep_name(dep: &str) -> String {
431    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
432
433    truncate_field(parts[0].to_string())
434}
435
436/// Builds a package URL for RPM packages.
437/// Returns `None` for file-path dependencies (e.g. `/bin/bash`) since they
438/// are not valid purl names and would produce broken `%2F`-encoded results.
439fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
440    if name.starts_with('/') {
441        return None;
442    }
443
444    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
445
446    if let Some(ver) = version {
447        purl.with_version(ver).ok()?;
448    }
449
450    Some(purl.to_string())
451}