Skip to main content

provenant/parsers/
arch.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use packageurl::PackageUrl;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
9use crate::parsers::utils::{
10    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
11};
12
13use super::PackageParser;
14
15const PACKAGE_TYPE: PackageType = PackageType::Alpm;
16const PACKAGE_NAMESPACE: &str = "arch";
17
18pub struct ArchSrcinfoParser;
19pub struct ArchPkginfoParser;
20
21impl PackageParser for ArchSrcinfoParser {
22    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
23
24    fn is_match(path: &Path) -> bool {
25        path.file_name()
26            .and_then(|name| name.to_str())
27            .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
28    }
29
30    fn extract_packages(path: &Path) -> Vec<PackageData> {
31        let content = match read_file_to_string(path, None) {
32            Ok(content) => content,
33            Err(e) => {
34                warn!("Failed to read Arch source metadata {:?}: {}", path, e);
35                return vec![default_package_data(srcinfo_datasource_id(path))];
36            }
37        };
38
39        parse_srcinfo_like(&content, srcinfo_datasource_id(path))
40    }
41}
42
43impl PackageParser for ArchPkginfoParser {
44    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
45
46    fn is_match(path: &Path) -> bool {
47        path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
48    }
49
50    fn extract_packages(path: &Path) -> Vec<PackageData> {
51        let content = match read_file_to_string(path, None) {
52            Ok(content) => content,
53            Err(e) => {
54                warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
55                return vec![default_package_data(DatasourceId::ArchPkginfo)];
56            }
57        };
58
59        vec![parse_pkginfo(&content)]
60    }
61}
62
63fn default_package_data(datasource_id: DatasourceId) -> PackageData {
64    PackageData {
65        package_type: Some(PACKAGE_TYPE),
66        namespace: Some(PACKAGE_NAMESPACE.to_string()),
67        datasource_id: Some(datasource_id),
68        ..Default::default()
69    }
70}
71
72fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
73    match path.file_name().and_then(|name| name.to_str()) {
74        Some(".AURINFO") => DatasourceId::ArchAurinfo,
75        _ => DatasourceId::ArchSrcinfo,
76    }
77}
78
79type MultiMap = HashMap<String, Vec<String>>;
80
81fn parse_key_value_lines(content: &str) -> MultiMap {
82    let mut fields: MultiMap = HashMap::new();
83
84    for line in content.lines().take(MAX_ITERATION_COUNT) {
85        let line = line.trim();
86        if line.is_empty() || line.starts_with('#') {
87            continue;
88        }
89
90        if let Some((key, value)) = line.split_once('=') {
91            let key = key.trim();
92            let value = value.trim();
93            if !key.is_empty() {
94                fields
95                    .entry(key.to_string())
96                    .or_default()
97                    .push(truncate_field(value.to_string()));
98            }
99        }
100    }
101
102    fields
103}
104
105fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
106    let mut pkgbase: MultiMap = HashMap::new();
107    let mut packages: Vec<MultiMap> = Vec::new();
108    let mut current_is_pkgbase = true;
109
110    for line in content.lines().take(MAX_ITERATION_COUNT) {
111        let line = line.trim();
112        if line.is_empty() || line.starts_with('#') {
113            continue;
114        }
115
116        let Some((key, value)) = line.split_once('=') else {
117            continue;
118        };
119
120        let key = key.trim();
121        let value = value.trim();
122
123        if key == "pkgbase" {
124            pkgbase
125                .entry(key.to_string())
126                .or_default()
127                .push(truncate_field(value.to_string()));
128            current_is_pkgbase = true;
129            continue;
130        }
131
132        if key == "pkgname" {
133            packages.push(HashMap::from([(
134                key.to_string(),
135                vec![truncate_field(value.to_string())],
136            )]));
137            current_is_pkgbase = false;
138            continue;
139        }
140
141        let target = if current_is_pkgbase {
142            &mut pkgbase
143        } else {
144            packages.last_mut().unwrap_or(&mut pkgbase)
145        };
146
147        target
148            .entry(key.to_string())
149            .or_default()
150            .push(truncate_field(value.to_string()));
151    }
152
153    if packages.is_empty() {
154        packages.push(HashMap::new());
155    }
156
157    let results: Vec<_> = packages
158        .into_iter()
159        .filter_map(|package_section| {
160            let merged = merge_srcinfo_sections(&pkgbase, &package_section);
161            let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
162            pkg.name.is_some().then_some(pkg)
163        })
164        .collect();
165
166    if results.is_empty() {
167        vec![default_package_data(datasource_id)]
168    } else {
169        results
170    }
171}
172
173fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
174    let mut merged = pkgbase.clone();
175
176    for (key, values) in package {
177        if should_append_srcinfo_values(key) {
178            merged
179                .entry(key.clone())
180                .or_default()
181                .extend(values.clone());
182        } else {
183            merged.insert(key.clone(), values.clone());
184        }
185    }
186
187    if !merged.contains_key("pkgname")
188        && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
189    {
190        merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
191    }
192
193    merged
194}
195
196fn should_append_srcinfo_values(key: &str) -> bool {
197    matches!(
198        key,
199        "arch"
200            | "groups"
201            | "license"
202            | "noextract"
203            | "options"
204            | "backup"
205            | "validpgpkeys"
206            | "source"
207            | "depends"
208            | "makedepends"
209            | "checkdepends"
210            | "optdepends"
211            | "provides"
212            | "conflicts"
213            | "replaces"
214            | "md5sums"
215            | "sha1sums"
216            | "sha224sums"
217            | "sha256sums"
218            | "sha384sums"
219            | "sha512sums"
220            | "b2sums"
221            | "cksums"
222    ) || is_arch_variant_key(key)
223}
224
225fn is_arch_variant_key(key: &str) -> bool {
226    arch_variant_base(key).is_some()
227}
228
229fn arch_variant_base(key: &str) -> Option<&'static str> {
230    [
231        "source",
232        "depends",
233        "makedepends",
234        "checkdepends",
235        "optdepends",
236        "provides",
237        "conflicts",
238        "replaces",
239        "md5sums",
240        "sha1sums",
241        "sha224sums",
242        "sha256sums",
243        "sha384sums",
244        "sha512sums",
245        "b2sums",
246        "cksums",
247    ]
248    .into_iter()
249    .find(|base| {
250        key.strip_prefix(base)
251            .and_then(|rest| rest.strip_prefix('_'))
252            .is_some_and(|arch| !arch.is_empty())
253    })
254}
255
256fn parse_pkginfo(content: &str) -> PackageData {
257    let fields = parse_key_value_lines(content);
258    build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
259}
260
261fn build_package_from_arch_metadata(
262    fields: &MultiMap,
263    datasource_id: DatasourceId,
264    is_srcinfo_like: bool,
265) -> PackageData {
266    let name = get_first(fields, "pkgname");
267    let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
268    let version = if is_srcinfo_like {
269        build_srcinfo_version(fields)
270    } else {
271        get_first(fields, "pkgver")
272    };
273    let description = get_first(fields, "pkgdesc");
274    let homepage_url = get_first(fields, "url");
275    let extracted_license_statement = join_values(fields.get("license"));
276    let arch_values = get_all(fields, "arch");
277    let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
278
279    let mut package = default_package_data(datasource_id);
280    package.name = name.map(truncate_field);
281    package.version = version.map(truncate_field);
282    package.description = description.map(truncate_field);
283    package.homepage_url = homepage_url.map(truncate_field);
284    package.extracted_license_statement = extracted_license_statement.map(truncate_field);
285    package.primary_language = None;
286    package.purl = package
287        .name
288        .as_deref()
289        .and_then(|name| build_alpm_purl(name, package.version.as_deref(), purl_arch));
290    package.source_packages = pkgbase
291        .and_then(|base| build_alpm_purl(&base, package.version.as_deref(), purl_arch))
292        .into_iter()
293        .collect();
294
295    if !is_srcinfo_like {
296        if let Some(packager) = get_first(fields, "packager") {
297            let (packager_name, packager_email) = split_name_email(&packager);
298            package.parties.push(Party {
299                r#type: Some("person".to_string()),
300                role: Some("packager".to_string()),
301                name: packager_name.map(truncate_field),
302                email: packager_email.map(truncate_field),
303                url: None,
304                organization: None,
305                organization_url: None,
306                timezone: None,
307            });
308        }
309        package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
310    }
311
312    package.dependencies = build_dependencies(fields);
313    package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
314    package
315}
316
317fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
318    let pkgver = get_first(fields, "pkgver")?;
319    let pkgrel = get_first(fields, "pkgrel");
320    let epoch = get_first(fields, "epoch");
321
322    let mut version = match pkgrel {
323        Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
324        None => pkgver,
325    };
326
327    if let Some(epoch) = epoch
328        && epoch != "0"
329    {
330        version = format!("{}:{}", epoch, version);
331    }
332
333    Some(version)
334}
335
336fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
337    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
338    purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
339
340    if let Some(version) = version {
341        purl.with_version(version).ok()?;
342    }
343
344    if let Some(arch) = arch {
345        purl.add_qualifier("arch", arch).ok()?;
346    }
347
348    Some(purl.to_string())
349}
350
351fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
352    let mut dependencies = Vec::new();
353    let mut keys: Vec<_> = fields.keys().cloned().collect();
354    keys.sort();
355
356    for key in keys.iter().take(MAX_ITERATION_COUNT) {
357        let Some((scope, is_runtime, is_optional)) = dependency_semantics(key) else {
358            continue;
359        };
360
361        for value in get_all(fields, key) {
362            if let Some(dep_name) = extract_arch_dependency_name(&value) {
363                dependencies.push(Dependency {
364                    purl: build_alpm_purl(&dep_name, None, None),
365                    extracted_requirement: Some(truncate_field(value.clone())),
366                    scope: Some(scope.to_string()),
367                    is_runtime: Some(is_runtime),
368                    is_optional: Some(is_optional),
369                    is_pinned: Some(false),
370                    is_direct: Some(true),
371                    resolved_package: None,
372                    extra_data: None,
373                });
374            }
375        }
376    }
377
378    dependencies
379}
380
381fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
382    let base = key;
383    let normalized = arch_variant_base(key).unwrap_or(key);
384
385    match normalized {
386        "depends" | "depend" => Some((base, true, false)),
387        "makedepends" | "makedepend" => Some((base, false, false)),
388        "checkdepends" | "checkdepend" => Some((base, false, false)),
389        "optdepends" | "optdepend" => Some((base, true, true)),
390        _ => None,
391    }
392}
393
394fn extract_arch_dependency_name(value: &str) -> Option<String> {
395    let dep = value.split(':').next()?.trim();
396    let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
397    let name = dep[..end].trim();
398    (!name.is_empty()).then(|| truncate_field(name.to_string()))
399}
400
401fn build_extra_data(
402    fields: &MultiMap,
403    is_srcinfo_like: bool,
404    purl_arch: Option<&str>,
405) -> Option<HashMap<String, JsonValue>> {
406    let consumed: HashSet<&str> = HashSet::from([
407        "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
408        "size",
409    ]);
410
411    let mut extra = HashMap::new();
412
413    for (key, values) in fields.iter().take(MAX_ITERATION_COUNT) {
414        if consumed.contains(key.as_str()) {
415            continue;
416        }
417
418        let value = if should_force_array_extra_value(key) {
419            JsonValue::Array(
420                values
421                    .iter()
422                    .cloned()
423                    .map(|v| JsonValue::String(truncate_field(v)))
424                    .collect(),
425            )
426        } else if values.len() == 1 {
427            if key == "builddate" {
428                values[0]
429                    .parse::<u64>()
430                    .map(JsonValue::from)
431                    .unwrap_or_else(|_| JsonValue::String(truncate_field(values[0].clone())))
432            } else {
433                JsonValue::String(truncate_field(values[0].clone()))
434            }
435        } else {
436            JsonValue::Array(
437                values
438                    .iter()
439                    .cloned()
440                    .map(|v| JsonValue::String(truncate_field(v)))
441                    .collect(),
442            )
443        };
444        extra.insert(key.clone(), value);
445    }
446
447    if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
448        return None;
449    }
450
451    if !is_srcinfo_like
452        && purl_arch.is_some()
453        && !extra.contains_key("arch")
454        && let Some(arch) = purl_arch
455    {
456        extra.insert(
457            "arch".to_string(),
458            JsonValue::String(truncate_field(arch.to_string())),
459        );
460    }
461
462    (!extra.is_empty()).then_some(extra)
463}
464
465fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
466    fields.get(key).and_then(|values| values.first()).cloned()
467}
468
469fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
470    fields.get(key).cloned().unwrap_or_default()
471}
472
473fn join_values(values: Option<&Vec<String>>) -> Option<String> {
474    let values = values?;
475    if values.is_empty() {
476        None
477    } else {
478        Some(values.join(" AND "))
479    }
480}
481
482fn should_force_array_extra_value(key: &str) -> bool {
483    matches!(
484        key,
485        "provides"
486            | "conflict"
487            | "conflicts"
488            | "replace"
489            | "replaces"
490            | "source"
491            | "arch"
492            | "license"
493            | "groups"
494            | "options"
495            | "backup"
496            | "validpgpkeys"
497            | "md5sums"
498            | "sha1sums"
499            | "sha224sums"
500            | "sha256sums"
501            | "sha384sums"
502            | "sha512sums"
503            | "b2sums"
504            | "cksums"
505    ) || is_arch_variant_key(key)
506}
507
508crate::register_parser!(
509    "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
510    &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
511    "alpm",
512    "",
513    Some("https://wiki.archlinux.org/title/.SRCINFO"),
514);