1use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10
11use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
12use crate::parsers::utils::{
13 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
14};
15
16use super::PackageParser;
17use super::metadata::ParserMetadata;
18
19const PACKAGE_TYPE: PackageType = PackageType::Alpm;
20const PACKAGE_NAMESPACE: &str = "arch";
21
22pub struct ArchSrcinfoParser;
23pub struct ArchPkginfoParser;
24
25impl PackageParser for ArchSrcinfoParser {
26 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
27
28 fn metadata() -> Vec<ParserMetadata> {
29 vec![ParserMetadata {
30 description: "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
31 file_patterns: &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
32 package_type: "alpm",
33 primary_language: "",
34 documentation_url: Some("https://wiki.archlinux.org/title/.SRCINFO"),
35 }]
36 }
37
38 fn is_match(path: &Path) -> bool {
39 path.file_name()
40 .and_then(|name| name.to_str())
41 .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
42 }
43
44 fn extract_packages(path: &Path) -> Vec<PackageData> {
45 let content = match read_file_to_string(path, None) {
46 Ok(content) => content,
47 Err(e) => {
48 warn!("Failed to read Arch source metadata {:?}: {}", path, e);
49 return vec![default_package_data(srcinfo_datasource_id(path))];
50 }
51 };
52
53 parse_srcinfo_like(&content, srcinfo_datasource_id(path))
54 }
55}
56
57impl PackageParser for ArchPkginfoParser {
58 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
59
60 fn is_match(path: &Path) -> bool {
61 path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
62 }
63
64 fn extract_packages(path: &Path) -> Vec<PackageData> {
65 let content = match read_file_to_string(path, None) {
66 Ok(content) => content,
67 Err(e) => {
68 warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
69 return vec![default_package_data(DatasourceId::ArchPkginfo)];
70 }
71 };
72
73 vec![parse_pkginfo(&content)]
74 }
75}
76
77fn default_package_data(datasource_id: DatasourceId) -> PackageData {
78 PackageData {
79 package_type: Some(PACKAGE_TYPE),
80 namespace: Some(PACKAGE_NAMESPACE.to_string()),
81 datasource_id: Some(datasource_id),
82 ..Default::default()
83 }
84}
85
86fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
87 match path.file_name().and_then(|name| name.to_str()) {
88 Some(".AURINFO") => DatasourceId::ArchAurinfo,
89 _ => DatasourceId::ArchSrcinfo,
90 }
91}
92
93type MultiMap = HashMap<String, Vec<String>>;
94
95fn parse_key_value_lines(content: &str) -> MultiMap {
96 let mut fields: MultiMap = HashMap::new();
97
98 for line in content.lines().take(MAX_ITERATION_COUNT) {
99 let line = line.trim();
100 if line.is_empty() || line.starts_with('#') {
101 continue;
102 }
103
104 if let Some((key, value)) = line.split_once('=') {
105 let key = key.trim();
106 let value = value.trim();
107 if !key.is_empty() {
108 fields
109 .entry(key.to_string())
110 .or_default()
111 .push(truncate_field(value.to_string()));
112 }
113 }
114 }
115
116 fields
117}
118
119fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
120 let mut pkgbase: MultiMap = HashMap::new();
121 let mut packages: Vec<MultiMap> = Vec::new();
122 let mut current_is_pkgbase = true;
123
124 for line in content.lines().take(MAX_ITERATION_COUNT) {
125 let line = line.trim();
126 if line.is_empty() || line.starts_with('#') {
127 continue;
128 }
129
130 let Some((key, value)) = line.split_once('=') else {
131 continue;
132 };
133
134 let key = key.trim();
135 let value = value.trim();
136
137 if key == "pkgbase" {
138 pkgbase
139 .entry(key.to_string())
140 .or_default()
141 .push(truncate_field(value.to_string()));
142 current_is_pkgbase = true;
143 continue;
144 }
145
146 if key == "pkgname" {
147 packages.push(HashMap::from([(
148 key.to_string(),
149 vec![truncate_field(value.to_string())],
150 )]));
151 current_is_pkgbase = false;
152 continue;
153 }
154
155 let target = if current_is_pkgbase {
156 &mut pkgbase
157 } else {
158 packages.last_mut().unwrap_or(&mut pkgbase)
159 };
160
161 target
162 .entry(key.to_string())
163 .or_default()
164 .push(truncate_field(value.to_string()));
165 }
166
167 if packages.is_empty() {
168 packages.push(HashMap::new());
169 }
170
171 let results: Vec<_> = packages
172 .into_iter()
173 .filter_map(|package_section| {
174 let merged = merge_srcinfo_sections(&pkgbase, &package_section);
175 let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
176 pkg.name.is_some().then_some(pkg)
177 })
178 .collect();
179
180 if results.is_empty() {
181 vec![default_package_data(datasource_id)]
182 } else {
183 results
184 }
185}
186
187fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
188 let mut merged = pkgbase.clone();
189
190 for (key, values) in package {
191 if should_append_srcinfo_values(key) {
192 merged
193 .entry(key.clone())
194 .or_default()
195 .extend(values.clone());
196 } else {
197 merged.insert(key.clone(), values.clone());
198 }
199 }
200
201 if !merged.contains_key("pkgname")
202 && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
203 {
204 merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
205 }
206
207 merged
208}
209
210fn should_append_srcinfo_values(key: &str) -> bool {
211 matches!(
212 key,
213 "arch"
214 | "groups"
215 | "license"
216 | "noextract"
217 | "options"
218 | "backup"
219 | "validpgpkeys"
220 | "source"
221 | "depends"
222 | "makedepends"
223 | "checkdepends"
224 | "optdepends"
225 | "provides"
226 | "conflicts"
227 | "replaces"
228 | "md5sums"
229 | "sha1sums"
230 | "sha224sums"
231 | "sha256sums"
232 | "sha384sums"
233 | "sha512sums"
234 | "b2sums"
235 | "cksums"
236 ) || is_arch_variant_key(key)
237}
238
239fn is_arch_variant_key(key: &str) -> bool {
240 arch_variant_base(key).is_some()
241}
242
243fn arch_variant_base(key: &str) -> Option<&'static str> {
244 [
245 "source",
246 "depends",
247 "makedepends",
248 "checkdepends",
249 "optdepends",
250 "provides",
251 "conflicts",
252 "replaces",
253 "md5sums",
254 "sha1sums",
255 "sha224sums",
256 "sha256sums",
257 "sha384sums",
258 "sha512sums",
259 "b2sums",
260 "cksums",
261 ]
262 .into_iter()
263 .find(|base| {
264 key.strip_prefix(base)
265 .and_then(|rest| rest.strip_prefix('_'))
266 .is_some_and(|arch| !arch.is_empty())
267 })
268}
269
270fn parse_pkginfo(content: &str) -> PackageData {
271 let fields = parse_key_value_lines(content);
272 build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
273}
274
275fn build_package_from_arch_metadata(
276 fields: &MultiMap,
277 datasource_id: DatasourceId,
278 is_srcinfo_like: bool,
279) -> PackageData {
280 let name = get_first(fields, "pkgname");
281 let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
282 let version = if is_srcinfo_like {
283 build_srcinfo_version(fields)
284 } else {
285 get_first(fields, "pkgver")
286 };
287 let description = get_first(fields, "pkgdesc");
288 let homepage_url = get_first(fields, "url");
289 let extracted_license_statement = join_values(fields.get("license"));
290 let arch_values = get_all(fields, "arch");
291 let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
292
293 let mut package = default_package_data(datasource_id);
294 package.name = name.map(truncate_field);
295 package.version = version.map(truncate_field);
296 package.description = description.map(truncate_field);
297 package.homepage_url = homepage_url.map(truncate_field);
298 package.extracted_license_statement = extracted_license_statement.map(truncate_field);
299 package.primary_language = None;
300 package.purl = package
301 .name
302 .as_deref()
303 .and_then(|name| build_alpm_purl(name, package.version.as_deref(), purl_arch));
304 package.source_packages = pkgbase
305 .and_then(|base| build_alpm_purl(&base, package.version.as_deref(), purl_arch))
306 .into_iter()
307 .collect();
308
309 if !is_srcinfo_like {
310 if let Some(packager) = get_first(fields, "packager") {
311 let (packager_name, packager_email) = split_name_email(&packager);
312 package.parties.push(Party {
313 r#type: Some("person".to_string()),
314 role: Some("packager".to_string()),
315 name: packager_name.map(truncate_field),
316 email: packager_email.map(truncate_field),
317 url: None,
318 organization: None,
319 organization_url: None,
320 timezone: None,
321 });
322 }
323 package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
324 }
325
326 package.dependencies = build_dependencies(fields);
327 package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
328 package
329}
330
331fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
332 let pkgver = get_first(fields, "pkgver")?;
333 let pkgrel = get_first(fields, "pkgrel");
334 let epoch = get_first(fields, "epoch");
335
336 let mut version = match pkgrel {
337 Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
338 None => pkgver,
339 };
340
341 if let Some(epoch) = epoch
342 && epoch != "0"
343 {
344 version = format!("{}:{}", epoch, version);
345 }
346
347 Some(version)
348}
349
350fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
351 let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
352 purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
353
354 if let Some(version) = version {
355 purl.with_version(version).ok()?;
356 }
357
358 if let Some(arch) = arch {
359 purl.add_qualifier("arch", arch).ok()?;
360 }
361
362 Some(purl.to_string())
363}
364
365fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
366 let mut dependencies = Vec::new();
367 let mut keys: Vec<_> = fields.keys().cloned().collect();
368 keys.sort();
369
370 for key in keys.iter().take(MAX_ITERATION_COUNT) {
371 let Some((scope, is_runtime, is_optional)) = dependency_semantics(key) else {
372 continue;
373 };
374
375 for value in get_all(fields, key) {
376 if let Some(dep_name) = extract_arch_dependency_name(&value) {
377 dependencies.push(Dependency {
378 purl: build_alpm_purl(&dep_name, None, None),
379 extracted_requirement: Some(truncate_field(value.clone())),
380 scope: Some(scope.to_string()),
381 is_runtime: Some(is_runtime),
382 is_optional: Some(is_optional),
383 is_pinned: Some(false),
384 is_direct: Some(true),
385 resolved_package: None,
386 extra_data: None,
387 });
388 }
389 }
390 }
391
392 dependencies
393}
394
395fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
396 let base = key;
397 let normalized = arch_variant_base(key).unwrap_or(key);
398
399 match normalized {
400 "depends" | "depend" => Some((base, true, false)),
401 "makedepends" | "makedepend" => Some((base, false, false)),
402 "checkdepends" | "checkdepend" => Some((base, false, false)),
403 "optdepends" | "optdepend" => Some((base, true, true)),
404 _ => None,
405 }
406}
407
408fn extract_arch_dependency_name(value: &str) -> Option<String> {
409 let dep = value.split(':').next()?.trim();
410 let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
411 let name = dep[..end].trim();
412 (!name.is_empty()).then(|| truncate_field(name.to_string()))
413}
414
415fn build_extra_data(
416 fields: &MultiMap,
417 is_srcinfo_like: bool,
418 purl_arch: Option<&str>,
419) -> Option<HashMap<String, JsonValue>> {
420 let consumed: HashSet<&str> = HashSet::from([
421 "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
422 "size",
423 ]);
424
425 let mut extra = HashMap::new();
426
427 for (key, values) in fields.iter().take(MAX_ITERATION_COUNT) {
428 if consumed.contains(key.as_str()) {
429 continue;
430 }
431
432 let value = if should_force_array_extra_value(key) {
433 JsonValue::Array(
434 values
435 .iter()
436 .cloned()
437 .map(|v| JsonValue::String(truncate_field(v)))
438 .collect(),
439 )
440 } else if values.len() == 1 {
441 if key == "builddate" {
442 values[0]
443 .parse::<u64>()
444 .map(JsonValue::from)
445 .unwrap_or_else(|_| JsonValue::String(truncate_field(values[0].clone())))
446 } else {
447 JsonValue::String(truncate_field(values[0].clone()))
448 }
449 } else {
450 JsonValue::Array(
451 values
452 .iter()
453 .cloned()
454 .map(|v| JsonValue::String(truncate_field(v)))
455 .collect(),
456 )
457 };
458 extra.insert(key.clone(), value);
459 }
460
461 if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
462 return None;
463 }
464
465 if !is_srcinfo_like
466 && purl_arch.is_some()
467 && !extra.contains_key("arch")
468 && let Some(arch) = purl_arch
469 {
470 extra.insert(
471 "arch".to_string(),
472 JsonValue::String(truncate_field(arch.to_string())),
473 );
474 }
475
476 (!extra.is_empty()).then_some(extra)
477}
478
479fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
480 fields.get(key).and_then(|values| values.first()).cloned()
481}
482
483fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
484 fields.get(key).cloned().unwrap_or_default()
485}
486
487fn join_values(values: Option<&Vec<String>>) -> Option<String> {
488 let values = values?;
489 if values.is_empty() {
490 None
491 } else {
492 Some(values.join(" AND "))
493 }
494}
495
496fn should_force_array_extra_value(key: &str) -> bool {
497 matches!(
498 key,
499 "provides"
500 | "conflict"
501 | "conflicts"
502 | "replace"
503 | "replaces"
504 | "source"
505 | "arch"
506 | "license"
507 | "groups"
508 | "options"
509 | "backup"
510 | "validpgpkeys"
511 | "md5sums"
512 | "sha1sums"
513 | "sha224sums"
514 | "sha256sums"
515 | "sha384sums"
516 | "sha512sums"
517 | "b2sums"
518 | "cksums"
519 ) || is_arch_variant_key(key)
520}