1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use log::warn;
5use packageurl::PackageUrl;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
9use crate::parsers::utils::{read_file_to_string, split_name_email};
10
11use super::PackageParser;
12
13const PACKAGE_TYPE: PackageType = PackageType::Alpm;
14const PACKAGE_NAMESPACE: &str = "arch";
15
16pub struct ArchSrcinfoParser;
17pub struct ArchPkginfoParser;
18
19impl PackageParser for ArchSrcinfoParser {
20 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
21
22 fn is_match(path: &Path) -> bool {
23 path.file_name()
24 .and_then(|name| name.to_str())
25 .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
26 }
27
28 fn extract_packages(path: &Path) -> Vec<PackageData> {
29 let content = match read_file_to_string(path) {
30 Ok(content) => content,
31 Err(e) => {
32 warn!("Failed to read Arch source metadata {:?}: {}", path, e);
33 return vec![default_package_data(srcinfo_datasource_id(path))];
34 }
35 };
36
37 parse_srcinfo_like(&content, srcinfo_datasource_id(path))
38 }
39}
40
41impl PackageParser for ArchPkginfoParser {
42 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
43
44 fn is_match(path: &Path) -> bool {
45 path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
46 }
47
48 fn extract_packages(path: &Path) -> Vec<PackageData> {
49 let content = match read_file_to_string(path) {
50 Ok(content) => content,
51 Err(e) => {
52 warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
53 return vec![default_package_data(DatasourceId::ArchPkginfo)];
54 }
55 };
56
57 vec![parse_pkginfo(&content)]
58 }
59}
60
61fn default_package_data(datasource_id: DatasourceId) -> PackageData {
62 PackageData {
63 package_type: Some(PACKAGE_TYPE),
64 namespace: Some(PACKAGE_NAMESPACE.to_string()),
65 datasource_id: Some(datasource_id),
66 ..Default::default()
67 }
68}
69
70fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
71 match path.file_name().and_then(|name| name.to_str()) {
72 Some(".AURINFO") => DatasourceId::ArchAurinfo,
73 _ => DatasourceId::ArchSrcinfo,
74 }
75}
76
77type MultiMap = HashMap<String, Vec<String>>;
78
79fn parse_key_value_lines(content: &str) -> MultiMap {
80 let mut fields: MultiMap = HashMap::new();
81
82 for line in content.lines() {
83 let line = line.trim();
84 if line.is_empty() || line.starts_with('#') {
85 continue;
86 }
87
88 if let Some((key, value)) = line.split_once('=') {
89 let key = key.trim();
90 let value = value.trim();
91 if !key.is_empty() {
92 fields
93 .entry(key.to_string())
94 .or_default()
95 .push(value.to_string());
96 }
97 }
98 }
99
100 fields
101}
102
103fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
104 let mut pkgbase: MultiMap = HashMap::new();
105 let mut packages: Vec<MultiMap> = Vec::new();
106 let mut current_is_pkgbase = true;
107
108 for line in content.lines() {
109 let line = line.trim();
110 if line.is_empty() || line.starts_with('#') {
111 continue;
112 }
113
114 let Some((key, value)) = line.split_once('=') else {
115 continue;
116 };
117
118 let key = key.trim();
119 let value = value.trim();
120
121 if key == "pkgbase" {
122 pkgbase
123 .entry(key.to_string())
124 .or_default()
125 .push(value.to_string());
126 current_is_pkgbase = true;
127 continue;
128 }
129
130 if key == "pkgname" {
131 packages.push(HashMap::from([(key.to_string(), vec![value.to_string()])]));
132 current_is_pkgbase = false;
133 continue;
134 }
135
136 let target = if current_is_pkgbase {
137 &mut pkgbase
138 } else {
139 packages.last_mut().unwrap_or(&mut pkgbase)
140 };
141
142 target
143 .entry(key.to_string())
144 .or_default()
145 .push(value.to_string());
146 }
147
148 if packages.is_empty() {
149 packages.push(HashMap::new());
150 }
151
152 let results: Vec<_> = packages
153 .into_iter()
154 .filter_map(|package_section| {
155 let merged = merge_srcinfo_sections(&pkgbase, &package_section);
156 let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
157 pkg.name.is_some().then_some(pkg)
158 })
159 .collect();
160
161 if results.is_empty() {
162 vec![default_package_data(datasource_id)]
163 } else {
164 results
165 }
166}
167
168fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
169 let mut merged = pkgbase.clone();
170
171 for (key, values) in package {
172 if should_append_srcinfo_values(key) {
173 merged
174 .entry(key.clone())
175 .or_default()
176 .extend(values.clone());
177 } else {
178 merged.insert(key.clone(), values.clone());
179 }
180 }
181
182 if !merged.contains_key("pkgname")
183 && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
184 {
185 merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
186 }
187
188 merged
189}
190
191fn should_append_srcinfo_values(key: &str) -> bool {
192 matches!(
193 key,
194 "arch"
195 | "groups"
196 | "license"
197 | "noextract"
198 | "options"
199 | "backup"
200 | "validpgpkeys"
201 | "source"
202 | "depends"
203 | "makedepends"
204 | "checkdepends"
205 | "optdepends"
206 | "provides"
207 | "conflicts"
208 | "replaces"
209 | "md5sums"
210 | "sha1sums"
211 | "sha224sums"
212 | "sha256sums"
213 | "sha384sums"
214 | "sha512sums"
215 | "b2sums"
216 | "cksums"
217 ) || is_arch_variant_key(key)
218}
219
220fn is_arch_variant_key(key: &str) -> bool {
221 arch_variant_base(key).is_some()
222}
223
224fn arch_variant_base(key: &str) -> Option<&'static str> {
225 [
226 "source",
227 "depends",
228 "makedepends",
229 "checkdepends",
230 "optdepends",
231 "provides",
232 "conflicts",
233 "replaces",
234 "md5sums",
235 "sha1sums",
236 "sha224sums",
237 "sha256sums",
238 "sha384sums",
239 "sha512sums",
240 "b2sums",
241 "cksums",
242 ]
243 .into_iter()
244 .find(|base| {
245 key.strip_prefix(base)
246 .and_then(|rest| rest.strip_prefix('_'))
247 .is_some_and(|arch| !arch.is_empty())
248 })
249}
250
251fn parse_pkginfo(content: &str) -> PackageData {
252 let fields = parse_key_value_lines(content);
253 build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
254}
255
256fn build_package_from_arch_metadata(
257 fields: &MultiMap,
258 datasource_id: DatasourceId,
259 is_srcinfo_like: bool,
260) -> PackageData {
261 let name = get_first(fields, "pkgname");
262 let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
263 let version = if is_srcinfo_like {
264 build_srcinfo_version(fields)
265 } else {
266 get_first(fields, "pkgver")
267 };
268 let description = get_first(fields, "pkgdesc");
269 let homepage_url = get_first(fields, "url");
270 let extracted_license_statement = join_values(fields.get("license"));
271 let arch_values = get_all(fields, "arch");
272 let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
273
274 let mut package = default_package_data(datasource_id);
275 package.name = name.clone();
276 package.version = version.clone();
277 package.description = description;
278 package.homepage_url = homepage_url;
279 package.extracted_license_statement = extracted_license_statement;
280 package.primary_language = None;
281 package.purl = name
282 .as_deref()
283 .and_then(|name| build_alpm_purl(name, version.as_deref(), purl_arch));
284 package.source_packages = pkgbase
285 .as_deref()
286 .and_then(|base| build_alpm_purl(base, version.as_deref(), purl_arch))
287 .into_iter()
288 .collect();
289
290 if !is_srcinfo_like {
291 if let Some(packager) = get_first(fields, "packager") {
292 let (name, email) = split_name_email(&packager);
293 package.parties.push(Party {
294 r#type: Some("person".to_string()),
295 role: Some("packager".to_string()),
296 name,
297 email,
298 url: None,
299 organization: None,
300 organization_url: None,
301 timezone: None,
302 });
303 }
304 package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
305 }
306
307 package.dependencies = build_dependencies(fields);
308 package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
309 package
310}
311
312fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
313 let pkgver = get_first(fields, "pkgver")?;
314 let pkgrel = get_first(fields, "pkgrel");
315 let epoch = get_first(fields, "epoch");
316
317 let mut version = match pkgrel {
318 Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
319 None => pkgver,
320 };
321
322 if let Some(epoch) = epoch
323 && epoch != "0"
324 {
325 version = format!("{}:{}", epoch, version);
326 }
327
328 Some(version)
329}
330
331fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
332 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
333 purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
334
335 if let Some(version) = version {
336 purl.with_version(version).ok()?;
337 }
338
339 if let Some(arch) = arch {
340 purl.add_qualifier("arch", arch).ok()?;
341 }
342
343 Some(purl.to_string())
344}
345
346fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
347 let mut dependencies = Vec::new();
348 let mut keys: Vec<_> = fields.keys().cloned().collect();
349 keys.sort();
350
351 for key in keys {
352 let Some((scope, is_runtime, is_optional)) = dependency_semantics(&key) else {
353 continue;
354 };
355
356 for value in get_all(fields, &key) {
357 if let Some(dep_name) = extract_arch_dependency_name(&value) {
358 dependencies.push(Dependency {
359 purl: build_alpm_purl(&dep_name, None, None),
360 extracted_requirement: Some(value.clone()),
361 scope: Some(scope.to_string()),
362 is_runtime: Some(is_runtime),
363 is_optional: Some(is_optional),
364 is_pinned: Some(false),
365 is_direct: Some(true),
366 resolved_package: None,
367 extra_data: None,
368 });
369 }
370 }
371 }
372
373 dependencies
374}
375
376fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
377 let base = key;
378 let normalized = arch_variant_base(key).unwrap_or(key);
379
380 match normalized {
381 "depends" | "depend" => Some((base, true, false)),
382 "makedepends" | "makedepend" => Some((base, false, false)),
383 "checkdepends" | "checkdepend" => Some((base, false, false)),
384 "optdepends" | "optdepend" => Some((base, true, true)),
385 _ => None,
386 }
387}
388
389fn extract_arch_dependency_name(value: &str) -> Option<String> {
390 let dep = value.split(':').next()?.trim();
391 let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
392 let name = dep[..end].trim();
393 (!name.is_empty()).then(|| name.to_string())
394}
395
396fn build_extra_data(
397 fields: &MultiMap,
398 is_srcinfo_like: bool,
399 purl_arch: Option<&str>,
400) -> Option<HashMap<String, JsonValue>> {
401 let consumed: HashSet<&str> = HashSet::from([
402 "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
403 "size",
404 ]);
405
406 let mut extra = HashMap::new();
407
408 for (key, values) in fields {
409 if consumed.contains(key.as_str()) {
410 continue;
411 }
412
413 let value = if should_force_array_extra_value(key) {
414 JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect())
415 } else if values.len() == 1 {
416 if key == "builddate" {
417 values[0]
418 .parse::<u64>()
419 .map(JsonValue::from)
420 .unwrap_or_else(|_| JsonValue::String(values[0].clone()))
421 } else {
422 JsonValue::String(values[0].clone())
423 }
424 } else {
425 JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect())
426 };
427 extra.insert(key.clone(), value);
428 }
429
430 if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
431 return None;
432 }
433
434 if !is_srcinfo_like
435 && purl_arch.is_some()
436 && !extra.contains_key("arch")
437 && let Some(arch) = purl_arch
438 {
439 extra.insert("arch".to_string(), JsonValue::String(arch.to_string()));
440 }
441
442 (!extra.is_empty()).then_some(extra)
443}
444
445fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
446 fields.get(key).and_then(|values| values.first()).cloned()
447}
448
449fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
450 fields.get(key).cloned().unwrap_or_default()
451}
452
453fn join_values(values: Option<&Vec<String>>) -> Option<String> {
454 let values = values?;
455 if values.is_empty() {
456 None
457 } else {
458 Some(values.join(" AND "))
459 }
460}
461
462fn should_force_array_extra_value(key: &str) -> bool {
463 matches!(
464 key,
465 "provides"
466 | "conflict"
467 | "conflicts"
468 | "replace"
469 | "replaces"
470 | "source"
471 | "arch"
472 | "license"
473 | "groups"
474 | "options"
475 | "backup"
476 | "validpgpkeys"
477 | "md5sums"
478 | "sha1sums"
479 | "sha224sums"
480 | "sha256sums"
481 | "sha384sums"
482 | "sha512sums"
483 | "b2sums"
484 | "cksums"
485 ) || is_arch_variant_key(key)
486}
487
488crate::register_parser!(
489 "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
490 &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
491 "alpm",
492 "",
493 Some("https://wiki.archlinux.org/title/.SRCINFO"),
494);