Skip to main content

provenant/parsers/
arch.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10
11use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
12use crate::parsers::utils::{
13    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
14};
15
16use super::PackageParser;
17
18const PACKAGE_TYPE: PackageType = PackageType::Alpm;
19const PACKAGE_NAMESPACE: &str = "arch";
20
21pub struct ArchSrcinfoParser;
22pub struct ArchPkginfoParser;
23
24impl PackageParser for ArchSrcinfoParser {
25    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
26
27    fn is_match(path: &Path) -> bool {
28        path.file_name()
29            .and_then(|name| name.to_str())
30            .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
31    }
32
33    fn extract_packages(path: &Path) -> Vec<PackageData> {
34        let content = match read_file_to_string(path, None) {
35            Ok(content) => content,
36            Err(e) => {
37                warn!("Failed to read Arch source metadata {:?}: {}", path, e);
38                return vec![default_package_data(srcinfo_datasource_id(path))];
39            }
40        };
41
42        parse_srcinfo_like(&content, srcinfo_datasource_id(path))
43    }
44}
45
46impl PackageParser for ArchPkginfoParser {
47    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
48
49    fn is_match(path: &Path) -> bool {
50        path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
51    }
52
53    fn extract_packages(path: &Path) -> Vec<PackageData> {
54        let content = match read_file_to_string(path, None) {
55            Ok(content) => content,
56            Err(e) => {
57                warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
58                return vec![default_package_data(DatasourceId::ArchPkginfo)];
59            }
60        };
61
62        vec![parse_pkginfo(&content)]
63    }
64}
65
66fn default_package_data(datasource_id: DatasourceId) -> PackageData {
67    PackageData {
68        package_type: Some(PACKAGE_TYPE),
69        namespace: Some(PACKAGE_NAMESPACE.to_string()),
70        datasource_id: Some(datasource_id),
71        ..Default::default()
72    }
73}
74
75fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
76    match path.file_name().and_then(|name| name.to_str()) {
77        Some(".AURINFO") => DatasourceId::ArchAurinfo,
78        _ => DatasourceId::ArchSrcinfo,
79    }
80}
81
82type MultiMap = HashMap<String, Vec<String>>;
83
84fn parse_key_value_lines(content: &str) -> MultiMap {
85    let mut fields: MultiMap = HashMap::new();
86
87    for line in content.lines().take(MAX_ITERATION_COUNT) {
88        let line = line.trim();
89        if line.is_empty() || line.starts_with('#') {
90            continue;
91        }
92
93        if let Some((key, value)) = line.split_once('=') {
94            let key = key.trim();
95            let value = value.trim();
96            if !key.is_empty() {
97                fields
98                    .entry(key.to_string())
99                    .or_default()
100                    .push(truncate_field(value.to_string()));
101            }
102        }
103    }
104
105    fields
106}
107
108fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
109    let mut pkgbase: MultiMap = HashMap::new();
110    let mut packages: Vec<MultiMap> = Vec::new();
111    let mut current_is_pkgbase = true;
112
113    for line in content.lines().take(MAX_ITERATION_COUNT) {
114        let line = line.trim();
115        if line.is_empty() || line.starts_with('#') {
116            continue;
117        }
118
119        let Some((key, value)) = line.split_once('=') else {
120            continue;
121        };
122
123        let key = key.trim();
124        let value = value.trim();
125
126        if key == "pkgbase" {
127            pkgbase
128                .entry(key.to_string())
129                .or_default()
130                .push(truncate_field(value.to_string()));
131            current_is_pkgbase = true;
132            continue;
133        }
134
135        if key == "pkgname" {
136            packages.push(HashMap::from([(
137                key.to_string(),
138                vec![truncate_field(value.to_string())],
139            )]));
140            current_is_pkgbase = false;
141            continue;
142        }
143
144        let target = if current_is_pkgbase {
145            &mut pkgbase
146        } else {
147            packages.last_mut().unwrap_or(&mut pkgbase)
148        };
149
150        target
151            .entry(key.to_string())
152            .or_default()
153            .push(truncate_field(value.to_string()));
154    }
155
156    if packages.is_empty() {
157        packages.push(HashMap::new());
158    }
159
160    let results: Vec<_> = packages
161        .into_iter()
162        .filter_map(|package_section| {
163            let merged = merge_srcinfo_sections(&pkgbase, &package_section);
164            let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
165            pkg.name.is_some().then_some(pkg)
166        })
167        .collect();
168
169    if results.is_empty() {
170        vec![default_package_data(datasource_id)]
171    } else {
172        results
173    }
174}
175
176fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
177    let mut merged = pkgbase.clone();
178
179    for (key, values) in package {
180        if should_append_srcinfo_values(key) {
181            merged
182                .entry(key.clone())
183                .or_default()
184                .extend(values.clone());
185        } else {
186            merged.insert(key.clone(), values.clone());
187        }
188    }
189
190    if !merged.contains_key("pkgname")
191        && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
192    {
193        merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
194    }
195
196    merged
197}
198
199fn should_append_srcinfo_values(key: &str) -> bool {
200    matches!(
201        key,
202        "arch"
203            | "groups"
204            | "license"
205            | "noextract"
206            | "options"
207            | "backup"
208            | "validpgpkeys"
209            | "source"
210            | "depends"
211            | "makedepends"
212            | "checkdepends"
213            | "optdepends"
214            | "provides"
215            | "conflicts"
216            | "replaces"
217            | "md5sums"
218            | "sha1sums"
219            | "sha224sums"
220            | "sha256sums"
221            | "sha384sums"
222            | "sha512sums"
223            | "b2sums"
224            | "cksums"
225    ) || is_arch_variant_key(key)
226}
227
228fn is_arch_variant_key(key: &str) -> bool {
229    arch_variant_base(key).is_some()
230}
231
232fn arch_variant_base(key: &str) -> Option<&'static str> {
233    [
234        "source",
235        "depends",
236        "makedepends",
237        "checkdepends",
238        "optdepends",
239        "provides",
240        "conflicts",
241        "replaces",
242        "md5sums",
243        "sha1sums",
244        "sha224sums",
245        "sha256sums",
246        "sha384sums",
247        "sha512sums",
248        "b2sums",
249        "cksums",
250    ]
251    .into_iter()
252    .find(|base| {
253        key.strip_prefix(base)
254            .and_then(|rest| rest.strip_prefix('_'))
255            .is_some_and(|arch| !arch.is_empty())
256    })
257}
258
259fn parse_pkginfo(content: &str) -> PackageData {
260    let fields = parse_key_value_lines(content);
261    build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
262}
263
264fn build_package_from_arch_metadata(
265    fields: &MultiMap,
266    datasource_id: DatasourceId,
267    is_srcinfo_like: bool,
268) -> PackageData {
269    let name = get_first(fields, "pkgname");
270    let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
271    let version = if is_srcinfo_like {
272        build_srcinfo_version(fields)
273    } else {
274        get_first(fields, "pkgver")
275    };
276    let description = get_first(fields, "pkgdesc");
277    let homepage_url = get_first(fields, "url");
278    let extracted_license_statement = join_values(fields.get("license"));
279    let arch_values = get_all(fields, "arch");
280    let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
281
282    let mut package = default_package_data(datasource_id);
283    package.name = name.map(truncate_field);
284    package.version = version.map(truncate_field);
285    package.description = description.map(truncate_field);
286    package.homepage_url = homepage_url.map(truncate_field);
287    package.extracted_license_statement = extracted_license_statement.map(truncate_field);
288    package.primary_language = None;
289    package.purl = package
290        .name
291        .as_deref()
292        .and_then(|name| build_alpm_purl(name, package.version.as_deref(), purl_arch));
293    package.source_packages = pkgbase
294        .and_then(|base| build_alpm_purl(&base, package.version.as_deref(), purl_arch))
295        .into_iter()
296        .collect();
297
298    if !is_srcinfo_like {
299        if let Some(packager) = get_first(fields, "packager") {
300            let (packager_name, packager_email) = split_name_email(&packager);
301            package.parties.push(Party {
302                r#type: Some("person".to_string()),
303                role: Some("packager".to_string()),
304                name: packager_name.map(truncate_field),
305                email: packager_email.map(truncate_field),
306                url: None,
307                organization: None,
308                organization_url: None,
309                timezone: None,
310            });
311        }
312        package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
313    }
314
315    package.dependencies = build_dependencies(fields);
316    package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
317    package
318}
319
320fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
321    let pkgver = get_first(fields, "pkgver")?;
322    let pkgrel = get_first(fields, "pkgrel");
323    let epoch = get_first(fields, "epoch");
324
325    let mut version = match pkgrel {
326        Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
327        None => pkgver,
328    };
329
330    if let Some(epoch) = epoch
331        && epoch != "0"
332    {
333        version = format!("{}:{}", epoch, version);
334    }
335
336    Some(version)
337}
338
339fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
340    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
341    purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
342
343    if let Some(version) = version {
344        purl.with_version(version).ok()?;
345    }
346
347    if let Some(arch) = arch {
348        purl.add_qualifier("arch", arch).ok()?;
349    }
350
351    Some(purl.to_string())
352}
353
354fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
355    let mut dependencies = Vec::new();
356    let mut keys: Vec<_> = fields.keys().cloned().collect();
357    keys.sort();
358
359    for key in keys.iter().take(MAX_ITERATION_COUNT) {
360        let Some((scope, is_runtime, is_optional)) = dependency_semantics(key) else {
361            continue;
362        };
363
364        for value in get_all(fields, key) {
365            if let Some(dep_name) = extract_arch_dependency_name(&value) {
366                dependencies.push(Dependency {
367                    purl: build_alpm_purl(&dep_name, None, None),
368                    extracted_requirement: Some(truncate_field(value.clone())),
369                    scope: Some(scope.to_string()),
370                    is_runtime: Some(is_runtime),
371                    is_optional: Some(is_optional),
372                    is_pinned: Some(false),
373                    is_direct: Some(true),
374                    resolved_package: None,
375                    extra_data: None,
376                });
377            }
378        }
379    }
380
381    dependencies
382}
383
384fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
385    let base = key;
386    let normalized = arch_variant_base(key).unwrap_or(key);
387
388    match normalized {
389        "depends" | "depend" => Some((base, true, false)),
390        "makedepends" | "makedepend" => Some((base, false, false)),
391        "checkdepends" | "checkdepend" => Some((base, false, false)),
392        "optdepends" | "optdepend" => Some((base, true, true)),
393        _ => None,
394    }
395}
396
397fn extract_arch_dependency_name(value: &str) -> Option<String> {
398    let dep = value.split(':').next()?.trim();
399    let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
400    let name = dep[..end].trim();
401    (!name.is_empty()).then(|| truncate_field(name.to_string()))
402}
403
404fn build_extra_data(
405    fields: &MultiMap,
406    is_srcinfo_like: bool,
407    purl_arch: Option<&str>,
408) -> Option<HashMap<String, JsonValue>> {
409    let consumed: HashSet<&str> = HashSet::from([
410        "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
411        "size",
412    ]);
413
414    let mut extra = HashMap::new();
415
416    for (key, values) in fields.iter().take(MAX_ITERATION_COUNT) {
417        if consumed.contains(key.as_str()) {
418            continue;
419        }
420
421        let value = if should_force_array_extra_value(key) {
422            JsonValue::Array(
423                values
424                    .iter()
425                    .cloned()
426                    .map(|v| JsonValue::String(truncate_field(v)))
427                    .collect(),
428            )
429        } else if values.len() == 1 {
430            if key == "builddate" {
431                values[0]
432                    .parse::<u64>()
433                    .map(JsonValue::from)
434                    .unwrap_or_else(|_| JsonValue::String(truncate_field(values[0].clone())))
435            } else {
436                JsonValue::String(truncate_field(values[0].clone()))
437            }
438        } else {
439            JsonValue::Array(
440                values
441                    .iter()
442                    .cloned()
443                    .map(|v| JsonValue::String(truncate_field(v)))
444                    .collect(),
445            )
446        };
447        extra.insert(key.clone(), value);
448    }
449
450    if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
451        return None;
452    }
453
454    if !is_srcinfo_like
455        && purl_arch.is_some()
456        && !extra.contains_key("arch")
457        && let Some(arch) = purl_arch
458    {
459        extra.insert(
460            "arch".to_string(),
461            JsonValue::String(truncate_field(arch.to_string())),
462        );
463    }
464
465    (!extra.is_empty()).then_some(extra)
466}
467
468fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
469    fields.get(key).and_then(|values| values.first()).cloned()
470}
471
472fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
473    fields.get(key).cloned().unwrap_or_default()
474}
475
476fn join_values(values: Option<&Vec<String>>) -> Option<String> {
477    let values = values?;
478    if values.is_empty() {
479        None
480    } else {
481        Some(values.join(" AND "))
482    }
483}
484
485fn should_force_array_extra_value(key: &str) -> bool {
486    matches!(
487        key,
488        "provides"
489            | "conflict"
490            | "conflicts"
491            | "replace"
492            | "replaces"
493            | "source"
494            | "arch"
495            | "license"
496            | "groups"
497            | "options"
498            | "backup"
499            | "validpgpkeys"
500            | "md5sums"
501            | "sha1sums"
502            | "sha224sums"
503            | "sha256sums"
504            | "sha384sums"
505            | "sha512sums"
506            | "b2sums"
507            | "cksums"
508    ) || is_arch_variant_key(key)
509}
510
511crate::register_parser!(
512    "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
513    &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
514    "alpm",
515    "",
516    Some("https://wiki.archlinux.org/title/.SRCINFO"),
517);