Skip to main content

provenant/parsers/
rpm_specfile.rs

1//! Parser for RPM .spec files.
2//!
3//! Extracts package metadata from RPM specfiles, which define how RPM packages
4//! are built. This is a beyond-parity implementation - the Python version is
5//! a complete stub with "TODO: implement me!!" comments.
6//!
7//! # Supported Formats
8//! - *.spec (RPM specfiles)
9//!
10//! # Key Features
11//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
12//! - Dependency extraction (BuildRequires, Requires, Provides)
13//! - %description section parsing
14//! - Basic macro expansion (%{name}, %{version}, %{release})
15//! - %define and %global macro definitions
16//! - Conditional macro handling (%{?dist})
17//! - Multi-line dependency lists (comma-separated)
18//! - Scoped Requires (Requires(post), Requires(preun), etc.)
19//!
20//! # Implementation Notes
21//! - Parses only the preamble (before %prep, %build, etc. sections)
22//! - Tags are case-insensitive per RPM spec format
23//! - Simple macro expansion for common patterns
24//! - BuildRequires dependencies have is_runtime=false, scope="build"
25//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
26//! - datasource_id is "rpm_specfile"
27
28use std::collections::HashMap;
29use std::path::Path;
30use std::sync::LazyLock;
31
32use log::warn;
33use packageurl::PackageUrl;
34use regex::Regex;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
37use crate::parsers::utils::{read_file_to_string, split_name_email};
38
39use super::PackageParser;
40
41static RE_CONDITIONAL_MACRO: LazyLock<Regex> =
42    LazyLock::new(|| Regex::new(r"%\{\?[^}]+\}").unwrap());
43
44const PACKAGE_TYPE: PackageType = PackageType::Rpm;
45
46/// Parser for RPM specfiles
47pub struct RpmSpecfileParser;
48
49impl PackageParser for RpmSpecfileParser {
50    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
51
52    fn is_match(path: &Path) -> bool {
53        path.extension()
54            .and_then(|e| e.to_str())
55            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        let content = match read_file_to_string(path) {
60            Ok(c) => c,
61            Err(e) => {
62                warn!("Failed to read RPM specfile {:?}: {}", path, e);
63                return vec![PackageData {
64                    package_type: Some(PACKAGE_TYPE),
65                    datasource_id: Some(DatasourceId::RpmSpecfile),
66                    ..Default::default()
67                }];
68            }
69        };
70
71        vec![parse_specfile(&content)]
72    }
73}
74
75fn parse_specfile(content: &str) -> PackageData {
76    let mut tags: HashMap<String, String> = HashMap::new();
77    let mut macros: HashMap<String, String> = HashMap::new();
78    let mut build_requires: Vec<String> = Vec::new();
79    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
80    let mut provides: Vec<String> = Vec::new();
81    let mut description: Option<String> = None;
82
83    let lines: Vec<&str> = content.lines().collect();
84    let mut i = 0;
85
86    // Parse preamble (everything before % sections)
87    while i < lines.len() {
88        let line = lines[i].trim();
89
90        // Stop at first section marker (%, but not %define/%global)
91        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
92            if is_conditional_preamble_directive(line) {
93                i += 1;
94                continue;
95            }
96            break;
97        }
98
99        // Skip empty lines and comments
100        if line.is_empty() || line.starts_with('#') {
101            i += 1;
102            continue;
103        }
104
105        // Parse %define and %global macros
106        if let Some(stripped) = line
107            .strip_prefix("%define")
108            .or(line.strip_prefix("%global"))
109        {
110            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
111            if parts.len() == 2 {
112                macros.insert(parts[0].to_string(), parts[1].trim().to_string());
113            }
114            i += 1;
115            continue;
116        }
117
118        // Parse Tag: Value lines
119        if let Some(colon_pos) = line.find(':') {
120            let tag = line[..colon_pos].trim().to_lowercase();
121            let value = line[colon_pos + 1..].trim().to_string();
122
123            match tag.as_str() {
124                "buildrequires" => {
125                    // BuildRequires can be comma-separated
126                    for dep in value.split(',') {
127                        let dep = dep.trim();
128                        if !dep.is_empty() {
129                            build_requires.push(dep.to_string());
130                        }
131                    }
132                }
133                t if t.starts_with("requires") => {
134                    // Parse Requires, Requires(post), Requires(preun), etc.
135                    let scope = if let Some(start) = t.find('(') {
136                        if let Some(end) = t.find(')') {
137                            Some(t[start + 1..end].to_string())
138                        } else {
139                            Some("runtime".to_string())
140                        }
141                    } else {
142                        Some("runtime".to_string())
143                    };
144
145                    for dep in value.split(',') {
146                        let dep = dep.trim();
147                        if !dep.is_empty() {
148                            requires.push((dep.to_string(), scope.clone()));
149                        }
150                    }
151                }
152                "provides" => {
153                    for prov in value.split(',') {
154                        let prov = prov.trim();
155                        if !prov.is_empty() {
156                            provides.push(prov.to_string());
157                        }
158                    }
159                }
160                _ => {
161                    tags.insert(tag, value);
162                }
163            }
164        }
165
166        i += 1;
167    }
168
169    // Now parse %description section if present
170    while i < lines.len() {
171        let line = lines[i].trim();
172
173        if line.starts_with("%description") {
174            i += 1;
175            let mut desc_lines = Vec::new();
176
177            // Collect lines until next % section
178            while i < lines.len() {
179                let desc_line = lines[i];
180                let trimmed = desc_line.trim();
181
182                // Stop at next section
183                if trimmed.starts_with('%') {
184                    break;
185                }
186
187                // Don't include empty lines at start
188                if !desc_lines.is_empty() || !trimmed.is_empty() {
189                    desc_lines.push(desc_line);
190                }
191
192                i += 1;
193            }
194
195            // Trim trailing empty lines
196            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
197                desc_lines.pop();
198            }
199
200            if !desc_lines.is_empty() {
201                description = Some(desc_lines.join("\n"));
202            }
203
204            break;
205        }
206
207        i += 1;
208    }
209
210    // Extract basic metadata from tags
211    let name = tags.get("name").cloned();
212    let version = tags.get("version").cloned();
213    let release = tags.get("release").cloned();
214
215    // Store name and version in macros for expansion
216    if let Some(ref n) = name {
217        macros.insert("name".to_string(), n.clone());
218    }
219    if let Some(ref v) = version {
220        macros.insert("version".to_string(), v.clone());
221    }
222    if let Some(ref r) = release {
223        macros.insert("release".to_string(), r.clone());
224    }
225
226    // Expand macros in all tag values
227    let mut expanded_tags: HashMap<String, String> = HashMap::new();
228    for (tag, value) in tags.iter() {
229        expanded_tags.insert(tag.clone(), expand_macros(value, &macros));
230    }
231
232    // Get expanded values
233    let name = expanded_tags.get("name").cloned();
234    let version = expanded_tags.get("version").cloned();
235    let release = expanded_tags.get("release").cloned();
236    let summary = expanded_tags.get("summary").cloned();
237    let license = expanded_tags.get("license").cloned();
238    let url = expanded_tags.get("url").cloned();
239    let group = expanded_tags.get("group").cloned();
240    let epoch = expanded_tags.get("epoch").cloned();
241    let packager = expanded_tags.get("packager").cloned();
242
243    let download_url = expanded_tags
244        .get("source")
245        .or_else(|| expanded_tags.get("source0"))
246        .cloned();
247
248    // Create parties
249    let mut parties = Vec::new();
250    if let Some(pkg) = packager {
251        let (name_opt, email_opt) = split_name_email(&pkg);
252        parties.push(Party {
253            r#type: None,
254            role: Some("packager".to_string()),
255            name: name_opt,
256            email: email_opt,
257            url: None,
258            organization: None,
259            organization_url: None,
260            timezone: None,
261        });
262    }
263
264    // Create dependencies
265    let mut dependencies = Vec::new();
266
267    for dep_str in build_requires {
268        let dep_str = expand_macros(&dep_str, &macros);
269        let dep_name = extract_dep_name(&dep_str);
270        let purl = build_rpm_purl(&dep_name, None);
271
272        dependencies.push(Dependency {
273            purl,
274            extracted_requirement: Some(dep_str),
275            scope: Some("build".to_string()),
276            is_runtime: Some(false),
277            is_optional: Some(false),
278            is_direct: Some(true),
279            is_pinned: None,
280            resolved_package: None,
281            extra_data: None,
282        });
283    }
284
285    for (dep_str, scope) in requires {
286        let dep_str = expand_macros(&dep_str, &macros);
287        let dep_name = extract_dep_name(&dep_str);
288        let purl = build_rpm_purl(&dep_name, None);
289
290        dependencies.push(Dependency {
291            purl,
292            extracted_requirement: Some(dep_str),
293            scope,
294            is_runtime: Some(true),
295            is_optional: Some(false),
296            is_direct: Some(true),
297            is_pinned: None,
298            resolved_package: None,
299            extra_data: None,
300        });
301    }
302
303    // Build PURL
304    let purl = name
305        .as_ref()
306        .and_then(|n| build_rpm_purl(n, version.as_deref()));
307
308    // Build extra_data for non-standard fields
309    let mut extra_data = HashMap::new();
310    if let Some(r) = release {
311        extra_data.insert("release".to_string(), serde_json::Value::String(r));
312    }
313    if let Some(e) = epoch {
314        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
315    }
316    if let Some(g) = group {
317        extra_data.insert("group".to_string(), serde_json::Value::String(g));
318    }
319    if !provides.is_empty() {
320        let provides_json: Vec<serde_json::Value> = provides
321            .into_iter()
322            .map(|prov| serde_json::Value::String(expand_macros(&prov, &macros)))
323            .collect();
324        extra_data.insert(
325            "provides".to_string(),
326            serde_json::Value::Array(provides_json),
327        );
328    }
329
330    let extra_data_opt = if extra_data.is_empty() {
331        None
332    } else {
333        Some(extra_data)
334    };
335
336    // Use %description if available, otherwise use Summary
337    let description_text = description.or(summary);
338
339    PackageData {
340        datasource_id: Some(DatasourceId::RpmSpecfile),
341        package_type: Some(PACKAGE_TYPE),
342        namespace: None, // RPM namespace is optional
343        name,
344        version,
345        description: description_text,
346        homepage_url: url,
347        download_url,
348        extracted_license_statement: license,
349        parties,
350        dependencies,
351        purl,
352        extra_data: extra_data_opt,
353        ..Default::default()
354    }
355}
356
357fn is_conditional_preamble_directive(line: &str) -> bool {
358    [
359        "%if", "%ifarch", "%ifnarch", "%ifos", "%ifnos", "%elif", "%else", "%endif",
360    ]
361    .iter()
362    .any(|directive| line.starts_with(directive))
363}
364
365/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
366fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
367    let mut result = s.to_string();
368
369    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
370
371    // Expand simple macros %{macro}
372    for (key, value) in macros {
373        let pattern = format!("%{{{}}}", key);
374        result = result.replace(&pattern, value);
375    }
376
377    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
378
379    result
380}
381
382/// Extracts the package name from a dependency string (removes version constraints)
383fn extract_dep_name(dep: &str) -> String {
384    // Split on operators: >=, <=, =, >, <
385    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
386
387    parts[0].to_string()
388}
389
390/// Builds a package URL for RPM packages
391fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
392    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
393
394    if let Some(ver) = version {
395        purl.with_version(ver).ok()?;
396    }
397
398    Some(purl.to_string())
399}
400
401crate::register_parser!(
402    "RPM specfile",
403    &["**/*.spec"],
404    "rpm",
405    "",
406    Some("https://rpm-software-management.github.io/rpm/manual/spec.html"),
407);