Skip to main content

provenant/parsers/
arch.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use log::warn;
5use packageurl::PackageUrl;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
9use crate::parsers::utils::{read_file_to_string, split_name_email};
10
11use super::PackageParser;
12
13const PACKAGE_TYPE: PackageType = PackageType::Alpm;
14const PACKAGE_NAMESPACE: &str = "arch";
15
16pub struct ArchSrcinfoParser;
17pub struct ArchPkginfoParser;
18
19impl PackageParser for ArchSrcinfoParser {
20    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
21
22    fn is_match(path: &Path) -> bool {
23        path.file_name()
24            .and_then(|name| name.to_str())
25            .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
26    }
27
28    fn extract_packages(path: &Path) -> Vec<PackageData> {
29        let content = match read_file_to_string(path) {
30            Ok(content) => content,
31            Err(e) => {
32                warn!("Failed to read Arch source metadata {:?}: {}", path, e);
33                return vec![default_package_data(srcinfo_datasource_id(path))];
34            }
35        };
36
37        parse_srcinfo_like(&content, srcinfo_datasource_id(path))
38    }
39}
40
41impl PackageParser for ArchPkginfoParser {
42    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
43
44    fn is_match(path: &Path) -> bool {
45        path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
46    }
47
48    fn extract_packages(path: &Path) -> Vec<PackageData> {
49        let content = match read_file_to_string(path) {
50            Ok(content) => content,
51            Err(e) => {
52                warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
53                return vec![default_package_data(DatasourceId::ArchPkginfo)];
54            }
55        };
56
57        vec![parse_pkginfo(&content)]
58    }
59}
60
61fn default_package_data(datasource_id: DatasourceId) -> PackageData {
62    PackageData {
63        package_type: Some(PACKAGE_TYPE),
64        namespace: Some(PACKAGE_NAMESPACE.to_string()),
65        datasource_id: Some(datasource_id),
66        ..Default::default()
67    }
68}
69
70fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
71    match path.file_name().and_then(|name| name.to_str()) {
72        Some(".AURINFO") => DatasourceId::ArchAurinfo,
73        _ => DatasourceId::ArchSrcinfo,
74    }
75}
76
77type MultiMap = HashMap<String, Vec<String>>;
78
79fn parse_key_value_lines(content: &str) -> MultiMap {
80    let mut fields: MultiMap = HashMap::new();
81
82    for line in content.lines() {
83        let line = line.trim();
84        if line.is_empty() || line.starts_with('#') {
85            continue;
86        }
87
88        if let Some((key, value)) = line.split_once('=') {
89            let key = key.trim();
90            let value = value.trim();
91            if !key.is_empty() {
92                fields
93                    .entry(key.to_string())
94                    .or_default()
95                    .push(value.to_string());
96            }
97        }
98    }
99
100    fields
101}
102
103fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
104    let mut pkgbase: MultiMap = HashMap::new();
105    let mut packages: Vec<MultiMap> = Vec::new();
106    let mut current_is_pkgbase = true;
107
108    for line in content.lines() {
109        let line = line.trim();
110        if line.is_empty() || line.starts_with('#') {
111            continue;
112        }
113
114        let Some((key, value)) = line.split_once('=') else {
115            continue;
116        };
117
118        let key = key.trim();
119        let value = value.trim();
120
121        if key == "pkgbase" {
122            pkgbase
123                .entry(key.to_string())
124                .or_default()
125                .push(value.to_string());
126            current_is_pkgbase = true;
127            continue;
128        }
129
130        if key == "pkgname" {
131            packages.push(HashMap::from([(key.to_string(), vec![value.to_string()])]));
132            current_is_pkgbase = false;
133            continue;
134        }
135
136        let target = if current_is_pkgbase {
137            &mut pkgbase
138        } else {
139            packages.last_mut().unwrap_or(&mut pkgbase)
140        };
141
142        target
143            .entry(key.to_string())
144            .or_default()
145            .push(value.to_string());
146    }
147
148    if packages.is_empty() {
149        packages.push(HashMap::new());
150    }
151
152    let results: Vec<_> = packages
153        .into_iter()
154        .filter_map(|package_section| {
155            let merged = merge_srcinfo_sections(&pkgbase, &package_section);
156            let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
157            pkg.name.is_some().then_some(pkg)
158        })
159        .collect();
160
161    if results.is_empty() {
162        vec![default_package_data(datasource_id)]
163    } else {
164        results
165    }
166}
167
168fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
169    let mut merged = pkgbase.clone();
170
171    for (key, values) in package {
172        if should_append_srcinfo_values(key) {
173            merged
174                .entry(key.clone())
175                .or_default()
176                .extend(values.clone());
177        } else {
178            merged.insert(key.clone(), values.clone());
179        }
180    }
181
182    if !merged.contains_key("pkgname")
183        && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
184    {
185        merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
186    }
187
188    merged
189}
190
191fn should_append_srcinfo_values(key: &str) -> bool {
192    matches!(
193        key,
194        "arch"
195            | "groups"
196            | "license"
197            | "noextract"
198            | "options"
199            | "backup"
200            | "validpgpkeys"
201            | "source"
202            | "depends"
203            | "makedepends"
204            | "checkdepends"
205            | "optdepends"
206            | "provides"
207            | "conflicts"
208            | "replaces"
209            | "md5sums"
210            | "sha1sums"
211            | "sha224sums"
212            | "sha256sums"
213            | "sha384sums"
214            | "sha512sums"
215            | "b2sums"
216            | "cksums"
217    ) || is_arch_variant_key(key)
218}
219
220fn is_arch_variant_key(key: &str) -> bool {
221    arch_variant_base(key).is_some()
222}
223
224fn arch_variant_base(key: &str) -> Option<&'static str> {
225    [
226        "source",
227        "depends",
228        "makedepends",
229        "checkdepends",
230        "optdepends",
231        "provides",
232        "conflicts",
233        "replaces",
234        "md5sums",
235        "sha1sums",
236        "sha224sums",
237        "sha256sums",
238        "sha384sums",
239        "sha512sums",
240        "b2sums",
241        "cksums",
242    ]
243    .into_iter()
244    .find(|base| {
245        key.strip_prefix(base)
246            .and_then(|rest| rest.strip_prefix('_'))
247            .is_some_and(|arch| !arch.is_empty())
248    })
249}
250
251fn parse_pkginfo(content: &str) -> PackageData {
252    let fields = parse_key_value_lines(content);
253    build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
254}
255
256fn build_package_from_arch_metadata(
257    fields: &MultiMap,
258    datasource_id: DatasourceId,
259    is_srcinfo_like: bool,
260) -> PackageData {
261    let name = get_first(fields, "pkgname");
262    let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
263    let version = if is_srcinfo_like {
264        build_srcinfo_version(fields)
265    } else {
266        get_first(fields, "pkgver")
267    };
268    let description = get_first(fields, "pkgdesc");
269    let homepage_url = get_first(fields, "url");
270    let extracted_license_statement = join_values(fields.get("license"));
271    let arch_values = get_all(fields, "arch");
272    let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
273
274    let mut package = default_package_data(datasource_id);
275    package.name = name.clone();
276    package.version = version.clone();
277    package.description = description;
278    package.homepage_url = homepage_url;
279    package.extracted_license_statement = extracted_license_statement;
280    package.primary_language = None;
281    package.purl = name
282        .as_deref()
283        .and_then(|name| build_alpm_purl(name, version.as_deref(), purl_arch));
284    package.source_packages = pkgbase
285        .as_deref()
286        .and_then(|base| build_alpm_purl(base, version.as_deref(), purl_arch))
287        .into_iter()
288        .collect();
289
290    if !is_srcinfo_like {
291        if let Some(packager) = get_first(fields, "packager") {
292            let (name, email) = split_name_email(&packager);
293            package.parties.push(Party {
294                r#type: Some("person".to_string()),
295                role: Some("packager".to_string()),
296                name,
297                email,
298                url: None,
299                organization: None,
300                organization_url: None,
301                timezone: None,
302            });
303        }
304        package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
305    }
306
307    package.dependencies = build_dependencies(fields);
308    package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
309    package
310}
311
312fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
313    let pkgver = get_first(fields, "pkgver")?;
314    let pkgrel = get_first(fields, "pkgrel");
315    let epoch = get_first(fields, "epoch");
316
317    let mut version = match pkgrel {
318        Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
319        None => pkgver,
320    };
321
322    if let Some(epoch) = epoch
323        && epoch != "0"
324    {
325        version = format!("{}:{}", epoch, version);
326    }
327
328    Some(version)
329}
330
331fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
332    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
333    purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
334
335    if let Some(version) = version {
336        purl.with_version(version).ok()?;
337    }
338
339    if let Some(arch) = arch {
340        purl.add_qualifier("arch", arch).ok()?;
341    }
342
343    Some(purl.to_string())
344}
345
346fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
347    let mut dependencies = Vec::new();
348    let mut keys: Vec<_> = fields.keys().cloned().collect();
349    keys.sort();
350
351    for key in keys {
352        let Some((scope, is_runtime, is_optional)) = dependency_semantics(&key) else {
353            continue;
354        };
355
356        for value in get_all(fields, &key) {
357            if let Some(dep_name) = extract_arch_dependency_name(&value) {
358                dependencies.push(Dependency {
359                    purl: build_alpm_purl(&dep_name, None, None),
360                    extracted_requirement: Some(value.clone()),
361                    scope: Some(scope.to_string()),
362                    is_runtime: Some(is_runtime),
363                    is_optional: Some(is_optional),
364                    is_pinned: Some(false),
365                    is_direct: Some(true),
366                    resolved_package: None,
367                    extra_data: None,
368                });
369            }
370        }
371    }
372
373    dependencies
374}
375
376fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
377    let base = key;
378    let normalized = arch_variant_base(key).unwrap_or(key);
379
380    match normalized {
381        "depends" | "depend" => Some((base, true, false)),
382        "makedepends" | "makedepend" => Some((base, false, false)),
383        "checkdepends" | "checkdepend" => Some((base, false, false)),
384        "optdepends" | "optdepend" => Some((base, true, true)),
385        _ => None,
386    }
387}
388
389fn extract_arch_dependency_name(value: &str) -> Option<String> {
390    let dep = value.split(':').next()?.trim();
391    let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
392    let name = dep[..end].trim();
393    (!name.is_empty()).then(|| name.to_string())
394}
395
396fn build_extra_data(
397    fields: &MultiMap,
398    is_srcinfo_like: bool,
399    purl_arch: Option<&str>,
400) -> Option<HashMap<String, JsonValue>> {
401    let consumed: HashSet<&str> = HashSet::from([
402        "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
403        "size",
404    ]);
405
406    let mut extra = HashMap::new();
407
408    for (key, values) in fields {
409        if consumed.contains(key.as_str()) {
410            continue;
411        }
412
413        let value = if should_force_array_extra_value(key) {
414            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect())
415        } else if values.len() == 1 {
416            if key == "builddate" {
417                values[0]
418                    .parse::<u64>()
419                    .map(JsonValue::from)
420                    .unwrap_or_else(|_| JsonValue::String(values[0].clone()))
421            } else {
422                JsonValue::String(values[0].clone())
423            }
424        } else {
425            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect())
426        };
427        extra.insert(key.clone(), value);
428    }
429
430    if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
431        return None;
432    }
433
434    if !is_srcinfo_like
435        && purl_arch.is_some()
436        && !extra.contains_key("arch")
437        && let Some(arch) = purl_arch
438    {
439        extra.insert("arch".to_string(), JsonValue::String(arch.to_string()));
440    }
441
442    (!extra.is_empty()).then_some(extra)
443}
444
445fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
446    fields.get(key).and_then(|values| values.first()).cloned()
447}
448
449fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
450    fields.get(key).cloned().unwrap_or_default()
451}
452
453fn join_values(values: Option<&Vec<String>>) -> Option<String> {
454    let values = values?;
455    if values.is_empty() {
456        None
457    } else {
458        Some(values.join(" AND "))
459    }
460}
461
462fn should_force_array_extra_value(key: &str) -> bool {
463    matches!(
464        key,
465        "provides"
466            | "conflict"
467            | "conflicts"
468            | "replace"
469            | "replaces"
470            | "source"
471            | "arch"
472            | "license"
473            | "groups"
474            | "options"
475            | "backup"
476            | "validpgpkeys"
477            | "md5sums"
478            | "sha1sums"
479            | "sha224sums"
480            | "sha256sums"
481            | "sha384sums"
482            | "sha512sums"
483            | "b2sums"
484            | "cksums"
485    ) || is_arch_variant_key(key)
486}
487
488crate::register_parser!(
489    "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
490    &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
491    "alpm",
492    "",
493    Some("https://wiki.archlinux.org/title/.SRCINFO"),
494);