Skip to main content

provenant/parsers/
rpm_specfile.rs

1//! Parser for RPM .spec files.
2//!
3//! Extracts package metadata from RPM specfiles, which define how RPM packages
4//! are built. This is a beyond-parity implementation - the Python version is
5//! a complete stub with "TODO: implement me!!" comments.
6//!
7//! # Supported Formats
8//! - *.spec (RPM specfiles)
9//!
10//! # Key Features
11//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
12//! - Dependency extraction (BuildRequires, Requires, Provides)
13//! - %description section parsing
14//! - Basic macro expansion (%{name}, %{version}, %{release})
15//! - %define and %global macro definitions
16//! - Conditional macro handling (%{?dist})
17//! - Multi-line dependency lists (comma-separated)
18//! - Scoped Requires (Requires(post), Requires(preun), etc.)
19//!
20//! # Implementation Notes
21//! - Parses only the preamble (before %prep, %build, etc. sections)
22//! - Tags are case-insensitive per RPM spec format
23//! - Simple macro expansion for common patterns
24//! - BuildRequires dependencies have is_runtime=false, scope="build"
25//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
26//! - datasource_id is "rpm_specfile"
27
28use std::collections::HashMap;
29use std::path::Path;
30use std::sync::LazyLock;
31
32use crate::parser_warn as warn;
33use packageurl::PackageUrl;
34use regex::Regex;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
37use crate::parsers::utils::{
38    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
39};
40
41use super::PackageParser;
42
43static RE_CONDITIONAL_MACRO: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"%\{\?[^}]+\}").unwrap());
45
46const PACKAGE_TYPE: PackageType = PackageType::Rpm;
47
48/// Parser for RPM specfiles
49pub struct RpmSpecfileParser;
50
51impl PackageParser for RpmSpecfileParser {
52    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
53
54    fn is_match(path: &Path) -> bool {
55        path.extension()
56            .and_then(|e| e.to_str())
57            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
58    }
59
60    fn extract_packages(path: &Path) -> Vec<PackageData> {
61        let content = match read_file_to_string(path, None) {
62            Ok(c) => c,
63            Err(e) => {
64                warn!("Failed to read RPM specfile {:?}: {}", path, e);
65                return vec![PackageData {
66                    package_type: Some(PACKAGE_TYPE),
67                    datasource_id: Some(DatasourceId::RpmSpecfile),
68                    ..Default::default()
69                }];
70            }
71        };
72
73        vec![parse_specfile(&content)]
74    }
75}
76
77fn parse_specfile(content: &str) -> PackageData {
78    let mut tags: HashMap<String, String> = HashMap::new();
79    let mut macros: HashMap<String, String> = HashMap::new();
80    let mut build_requires: Vec<String> = Vec::new();
81    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
82    let mut provides: Vec<String> = Vec::new();
83    let mut description: Option<String> = None;
84
85    let lines: Vec<&str> = content.lines().collect();
86    let mut i = 0;
87    let mut iterations: usize = 0;
88
89    while i < lines.len() {
90        iterations += 1;
91        if iterations > MAX_ITERATION_COUNT {
92            warn!(
93                "RPM specfile preamble iteration limit ({}) exceeded",
94                MAX_ITERATION_COUNT
95            );
96            break;
97        }
98        let line = lines[i].trim();
99
100        // Stop at first section marker (%, but not %define/%global)
101        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
102            if is_conditional_preamble_directive(line) {
103                i += 1;
104                continue;
105            }
106            break;
107        }
108
109        // Skip empty lines and comments
110        if line.is_empty() || line.starts_with('#') {
111            i += 1;
112            continue;
113        }
114
115        // Parse %define and %global macros
116        if let Some(stripped) = line
117            .strip_prefix("%define")
118            .or(line.strip_prefix("%global"))
119        {
120            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
121            if parts.len() == 2 {
122                macros.insert(
123                    parts[0].to_string(),
124                    truncate_field(parts[1].trim().to_string()),
125                );
126            }
127            i += 1;
128            continue;
129        }
130
131        // Parse Tag: Value lines
132        if let Some(colon_pos) = line.find(':') {
133            let tag = line[..colon_pos].trim().to_lowercase();
134            let value = line[colon_pos + 1..].trim().to_string();
135
136            match tag.as_str() {
137                "buildrequires" => {
138                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
139                        let dep = dep.trim();
140                        if !dep.is_empty() {
141                            build_requires.push(dep.to_string());
142                        }
143                    }
144                }
145                t if t.starts_with("requires") => {
146                    // Parse Requires, Requires(post), Requires(preun), etc.
147                    let scope = if let Some(start) = t.find('(') {
148                        if let Some(end) = t.find(')') {
149                            Some(t[start + 1..end].to_string())
150                        } else {
151                            Some("runtime".to_string())
152                        }
153                    } else {
154                        Some("runtime".to_string())
155                    };
156
157                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
158                        let dep = dep.trim();
159                        if !dep.is_empty() {
160                            requires.push((dep.to_string(), scope.clone()));
161                        }
162                    }
163                }
164                "provides" => {
165                    for prov in value.split(',').take(MAX_ITERATION_COUNT) {
166                        let prov = prov.trim();
167                        if !prov.is_empty() {
168                            provides.push(prov.to_string());
169                        }
170                    }
171                }
172                _ => {
173                    tags.insert(tag, value);
174                }
175            }
176        }
177
178        i += 1;
179    }
180
181    // Now parse %description section if present
182    let mut desc_iterations: usize = 0;
183    while i < lines.len() {
184        desc_iterations += 1;
185        if desc_iterations > MAX_ITERATION_COUNT {
186            warn!(
187                "RPM specfile description search iteration limit ({}) exceeded",
188                MAX_ITERATION_COUNT
189            );
190            break;
191        }
192        let line = lines[i].trim();
193
194        if line.starts_with("%description") {
195            i += 1;
196            let mut desc_lines = Vec::new();
197
198            while i < lines.len() {
199                desc_iterations += 1;
200                if desc_iterations > MAX_ITERATION_COUNT {
201                    warn!(
202                        "RPM specfile description iteration limit ({}) exceeded",
203                        MAX_ITERATION_COUNT
204                    );
205                    break;
206                }
207                let desc_line = lines[i];
208                let trimmed = desc_line.trim();
209
210                // Stop at next section
211                if trimmed.starts_with('%') {
212                    break;
213                }
214
215                // Don't include empty lines at start
216                if !desc_lines.is_empty() || !trimmed.is_empty() {
217                    desc_lines.push(desc_line);
218                }
219
220                i += 1;
221            }
222
223            // Trim trailing empty lines
224            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
225                desc_lines.pop();
226            }
227
228            if !desc_lines.is_empty() {
229                description = Some(desc_lines.join("\n"));
230            }
231
232            break;
233        }
234
235        i += 1;
236    }
237
238    // Extract basic metadata from tags
239    let name = tags.get("name").cloned();
240    let version = tags.get("version").cloned();
241    let release = tags.get("release").cloned();
242
243    // Store name and version in macros for expansion
244    if let Some(ref n) = name {
245        macros.insert("name".to_string(), n.clone());
246    }
247    if let Some(ref v) = version {
248        macros.insert("version".to_string(), v.clone());
249    }
250    if let Some(ref r) = release {
251        macros.insert("release".to_string(), r.clone());
252    }
253
254    // Expand macros in all tag values
255    let mut expanded_tags: HashMap<String, String> = HashMap::new();
256    for (tag, value) in tags.iter() {
257        expanded_tags.insert(tag.clone(), truncate_field(expand_macros(value, &macros)));
258    }
259
260    // Get expanded values
261    let name = expanded_tags.get("name").cloned();
262    let version = expanded_tags.get("version").cloned();
263    let release = expanded_tags.get("release").cloned();
264    let summary = expanded_tags.get("summary").cloned();
265    let license = expanded_tags.get("license").cloned();
266    let url = expanded_tags.get("url").cloned();
267    let group = expanded_tags.get("group").cloned();
268    let epoch = expanded_tags.get("epoch").cloned();
269    let packager = expanded_tags.get("packager").cloned();
270
271    let download_url = expanded_tags
272        .get("source")
273        .or_else(|| expanded_tags.get("source0"))
274        .cloned()
275        .map(truncate_field);
276
277    // Create parties
278    let mut parties = Vec::new();
279    if let Some(pkg) = packager {
280        let (name_opt, email_opt) = split_name_email(&pkg);
281        parties.push(Party {
282            r#type: None,
283            role: Some("packager".to_string()),
284            name: name_opt,
285            email: email_opt,
286            url: None,
287            organization: None,
288            organization_url: None,
289            timezone: None,
290        });
291    }
292
293    // Create dependencies
294    let mut dependencies = Vec::new();
295
296    for dep_str in build_requires.into_iter().take(MAX_ITERATION_COUNT) {
297        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
298        let dep_name = extract_dep_name(&dep_str);
299        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
300
301        dependencies.push(Dependency {
302            purl,
303            extracted_requirement: Some(dep_str),
304            scope: Some("build".to_string()),
305            is_runtime: Some(false),
306            is_optional: Some(false),
307            is_direct: Some(true),
308            is_pinned: None,
309            resolved_package: None,
310            extra_data: None,
311        });
312    }
313
314    for (dep_str, scope) in requires.into_iter().take(MAX_ITERATION_COUNT) {
315        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
316        let dep_name = extract_dep_name(&dep_str);
317        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
318
319        dependencies.push(Dependency {
320            purl,
321            extracted_requirement: Some(dep_str),
322            scope,
323            is_runtime: Some(true),
324            is_optional: Some(false),
325            is_direct: Some(true),
326            is_pinned: None,
327            resolved_package: None,
328            extra_data: None,
329        });
330    }
331
332    // Build PURL
333    let purl = name
334        .as_ref()
335        .and_then(|n| build_rpm_purl(n, version.as_deref()))
336        .map(truncate_field);
337
338    // Build extra_data for non-standard fields
339    let mut extra_data = HashMap::new();
340    if let Some(r) = release {
341        extra_data.insert("release".to_string(), serde_json::Value::String(r));
342    }
343    if let Some(e) = epoch {
344        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
345    }
346    if let Some(g) = group {
347        extra_data.insert("group".to_string(), serde_json::Value::String(g));
348    }
349    if !provides.is_empty() {
350        let provides_json: Vec<serde_json::Value> = provides
351            .into_iter()
352            .take(MAX_ITERATION_COUNT)
353            .map(|prov| serde_json::Value::String(truncate_field(expand_macros(&prov, &macros))))
354            .collect();
355        extra_data.insert(
356            "provides".to_string(),
357            serde_json::Value::Array(provides_json),
358        );
359    }
360
361    let extra_data_opt = if extra_data.is_empty() {
362        None
363    } else {
364        Some(extra_data)
365    };
366
367    // Use %description if available, otherwise use Summary
368    let description_text = description.map(truncate_field).or(summary);
369
370    PackageData {
371        datasource_id: Some(DatasourceId::RpmSpecfile),
372        package_type: Some(PACKAGE_TYPE),
373        namespace: None, // RPM namespace is optional
374        name,
375        version,
376        description: description_text,
377        homepage_url: url,
378        download_url,
379        extracted_license_statement: license,
380        parties,
381        dependencies,
382        purl,
383        extra_data: extra_data_opt,
384        ..Default::default()
385    }
386}
387
388fn is_conditional_preamble_directive(line: &str) -> bool {
389    [
390        "%if", "%ifarch", "%ifnarch", "%ifos", "%ifnos", "%elif", "%else", "%endif",
391    ]
392    .iter()
393    .any(|directive| line.starts_with(directive))
394}
395
396/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
397fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
398    let mut result = s.to_string();
399
400    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
401
402    // Expand simple macros %{macro}
403    for (key, value) in macros {
404        let pattern = format!("%{{{}}}", key);
405        result = result.replace(&pattern, value);
406    }
407
408    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
409
410    result
411}
412
413/// Extracts the package name from a dependency string (removes version constraints)
414fn extract_dep_name(dep: &str) -> String {
415    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
416
417    truncate_field(parts[0].to_string())
418}
419
420/// Builds a package URL for RPM packages
421fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
422    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
423
424    if let Some(ver) = version {
425        purl.with_version(ver).ok()?;
426    }
427
428    Some(purl.to_string())
429}
430
431crate::register_parser!(
432    "RPM specfile",
433    &["**/*.spec"],
434    "rpm",
435    "",
436    Some("https://rpm-software-management.github.io/rpm/manual/spec.html"),
437);