1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{read_file_to_string, truncate_field};
30use packageurl::PackageUrl;
31use std::path::Path;
32use std::str::FromStr;
33use url::Url;
34use yaml_serde::Value;
35
36use super::PackageParser;
37use super::license_normalization::{
38 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
39 normalize_spdx_expression,
40};
41
42const FIELD_TYPE: &str = "type";
43const FIELD_PURL: &str = "purl";
44const FIELD_PACKAGE_URL: &str = "package_url";
45const FIELD_NAMESPACE: &str = "namespace";
46const FIELD_NAME: &str = "name";
47const FIELD_VERSION: &str = "version";
48const FIELD_HOME_URL: &str = "home_url";
49const FIELD_HOMEPAGE_URL: &str = "homepage_url";
50const FIELD_DOWNLOAD_URL: &str = "download_url";
51const FIELD_COPYRIGHT: &str = "copyright";
52const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
53const FIELD_OWNER: &str = "owner";
54const FIELD_ABOUT_RESOURCE: &str = "about_resource";
55
56pub struct AboutFileParser;
61
62#[derive(Clone)]
63struct InferredAboutIdentity {
64 package_type: PackageType,
65 namespace: Option<String>,
66 name: Option<String>,
67 version: Option<String>,
68}
69
70impl PackageParser for AboutFileParser {
71 const PACKAGE_TYPE: PackageType = PackageType::About;
72
73 fn extract_packages(path: &Path) -> Vec<PackageData> {
74 let yaml = match read_and_parse_yaml(path) {
75 Ok(yaml) => yaml,
76 Err(e) => {
77 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
78 return vec![default_package_data()];
79 }
80 };
81
82 let about_type = yaml
84 .get(FIELD_TYPE)
85 .and_then(|v| v.as_str())
86 .map(String::from);
87
88 let about_namespace = yaml
89 .get(FIELD_NAMESPACE)
90 .and_then(|v| v.as_str())
91 .map(|v| truncate_field(v.to_string()));
92
93 let purl_string = yaml
94 .get(FIELD_PURL)
95 .and_then(|v| v.as_str())
96 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
97 .map(|v| truncate_field(v.to_string()));
98
99 let (purl_type, purl_namespace, purl_name, purl_version) =
101 if let Some(ref purl_str) = purl_string {
102 match PackageUrl::from_str(purl_str) {
103 Ok(purl) => (
104 Some(truncate_field(purl.ty().to_string())),
105 purl.namespace().map(|v| truncate_field(v.to_string())),
106 Some(truncate_field(purl.name().to_string())),
107 purl.version().map(|v| truncate_field(v.to_string())),
108 ),
109 Err(e) => {
110 warn!("Failed to parse purl '{}': {}", purl_str, e);
111 (None, None, None, None)
112 }
113 }
114 } else {
115 (None, None, None, None)
116 };
117
118 let inferred = infer_about_from_download_url(
119 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
120 yaml.get(FIELD_NAME)
121 .and_then(yaml_value_to_string)
122 .as_deref(),
123 yaml.get(FIELD_VERSION)
124 .and_then(yaml_value_to_string)
125 .as_deref(),
126 );
127
128 let explicit_package_type = about_type
129 .clone()
130 .and_then(|s| s.parse::<crate::models::PackageType>().ok());
131 let parsed_purl_type = purl_type
132 .clone()
133 .and_then(|s| s.parse::<crate::models::PackageType>().ok());
134 let has_parsed_purl_identity = parsed_purl_type.is_some()
135 || purl_namespace.is_some()
136 || purl_name.is_some()
137 || purl_version.is_some();
138 let inferred_identity = if explicit_package_type.is_none() && !has_parsed_purl_identity {
139 inferred
140 } else {
141 None
142 };
143
144 let package_type = explicit_package_type
145 .or(parsed_purl_type)
146 .or_else(|| {
147 inferred_identity
148 .as_ref()
149 .map(|identity| identity.package_type)
150 })
151 .unwrap_or(Self::PACKAGE_TYPE);
152
153 let namespace = about_namespace
155 .clone()
156 .or(purl_namespace.clone())
157 .or_else(|| {
158 inferred_identity
159 .as_ref()
160 .and_then(|identity| identity.namespace.clone())
161 })
162 .map(truncate_field);
163
164 let name = yaml
166 .get(FIELD_NAME)
167 .and_then(yaml_value_to_string)
168 .or(purl_name.clone())
169 .or_else(|| {
170 inferred_identity
171 .as_ref()
172 .and_then(|identity| identity.name.clone())
173 })
174 .map(truncate_field);
175
176 let version = yaml
177 .get(FIELD_VERSION)
178 .and_then(yaml_value_to_string)
179 .or(purl_version.clone())
180 .or_else(|| {
181 inferred_identity
182 .as_ref()
183 .and_then(|identity| identity.version.clone())
184 })
185 .map(truncate_field);
186
187 let homepage_url = yaml
189 .get(FIELD_HOME_URL)
190 .and_then(|v| v.as_str())
191 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
192 .map(|v| truncate_field(v.to_string()));
193
194 let download_url = yaml
195 .get(FIELD_DOWNLOAD_URL)
196 .and_then(|v| v.as_str())
197 .map(|v| truncate_field(v.to_string()));
198
199 let copyright = yaml
200 .get(FIELD_COPYRIGHT)
201 .and_then(|v| v.as_str())
202 .map(|v| truncate_field(v.to_string()));
203
204 let extracted_license_statement = yaml
205 .get(FIELD_LICENSE_EXPRESSION)
206 .and_then(|v| v.as_str())
207 .map(|v| truncate_field(v.to_string()));
208 let file_references = extract_file_references(&yaml);
209 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
210 extracted_license_statement
211 .as_deref()
212 .and_then(normalize_spdx_expression)
213 .map(|normalized| {
214 build_declared_license_data(
215 normalized,
216 DeclaredLicenseMatchMetadata::single_line(
217 extracted_license_statement.as_deref().unwrap_or_default(),
218 ),
219 )
220 })
221 .unwrap_or_else(|| {
222 normalize_spdx_declared_license(extracted_license_statement.as_deref())
223 });
224
225 let vcs_url = yaml
226 .get(Value::String("vcs_url".to_string()))
227 .and_then(|v| v.as_str())
228 .map(|v| truncate_field(v.to_string()));
229
230 let extra_data = build_extra_data(&yaml);
231
232 let purl = purl_string
233 .or_else(|| {
234 let name = yaml
235 .get(FIELD_NAME)
236 .and_then(yaml_value_to_string)
237 .or(purl_name.clone())
238 .or_else(|| {
239 inferred_identity
240 .as_ref()
241 .and_then(|identity| identity.name.clone())
242 });
243 let version = yaml
244 .get(FIELD_VERSION)
245 .and_then(yaml_value_to_string)
246 .or(purl_version.clone())
247 .or_else(|| {
248 inferred_identity
249 .as_ref()
250 .and_then(|identity| identity.version.clone())
251 });
252 let namespace = about_namespace.clone().or_else(|| {
253 inferred_identity
254 .as_ref()
255 .and_then(|identity| identity.namespace.clone())
256 });
257 build_about_purl(
258 package_type,
259 namespace.as_deref(),
260 name.as_deref(),
261 version.as_deref(),
262 )
263 })
264 .map(truncate_field);
265
266 let parties = extract_owner_party(&yaml);
268
269 vec![PackageData {
271 package_type: Some(package_type),
272 namespace,
273 name,
274 version,
275 qualifiers: None,
276 subpath: None,
277 primary_language: None,
278 description: None,
279 release_date: None,
280 parties,
281 keywords: Vec::new(),
282 homepage_url,
283 download_url,
284 size: None,
285 sha1: None,
286 md5: None,
287 sha256: None,
288 sha512: None,
289 bug_tracking_url: None,
290 code_view_url: None,
291 vcs_url,
292 copyright,
293 holder: None,
294 declared_license_expression,
295 declared_license_expression_spdx,
296 license_detections,
297 other_license_expression: None,
298 other_license_expression_spdx: None,
299 other_license_detections: Vec::new(),
300 extracted_license_statement,
301 notice_text: None,
302 source_packages: Vec::new(),
303 file_references,
304 is_private: false,
305 is_virtual: false,
306 extra_data,
307 dependencies: Vec::new(),
308 repository_homepage_url: None,
309 repository_download_url: None,
310 api_data_url: None,
311 datasource_id: Some(DatasourceId::AboutFile),
312 purl,
313 }]
314 }
315
316 fn is_match(path: &Path) -> bool {
317 path.extension()
318 .and_then(|ext| ext.to_str())
319 .is_some_and(|ext| ext == "ABOUT")
320 }
321
322 fn metadata() -> Vec<super::metadata::ParserMetadata> {
323 vec![super::metadata::ParserMetadata {
324 description: "AboutCode .ABOUT metadata file",
325 file_patterns: &["**/*.ABOUT"],
326 package_type: "about",
327 primary_language: "",
328 documentation_url: Some(
329 "https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html",
330 ),
331 }]
332 }
333}
334
335fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
337 let content =
338 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
339
340 parse_yaml_mapping(&content)
341 .or_else(|yaml_error| parse_shallow_scalar_mapping(&content).ok_or(yaml_error))
342}
343
344fn parse_yaml_mapping(content: &str) -> Result<yaml_serde::Mapping, String> {
345 let value: Value =
346 yaml_serde::from_str(content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
347
348 match value {
349 Value::Mapping(map) => Ok(map),
350 _ => Err("Expected YAML mapping at root".to_string()),
351 }
352}
353
354fn parse_shallow_scalar_mapping(content: &str) -> Option<yaml_serde::Mapping> {
355 let mut map = yaml_serde::Mapping::new();
356 let mut saw_mapping_entry = false;
357
358 for line in content.lines() {
359 let trimmed = line.trim();
360 if trimmed.is_empty() || trimmed.starts_with('#') {
361 continue;
362 }
363 if line.starts_with(char::is_whitespace) {
364 return None;
365 }
366
367 let (raw_key, raw_value) = trimmed.split_once(':')?;
368 let key = raw_key.trim();
369 if key.is_empty()
370 || !key.chars().all(|character| {
371 character.is_ascii_alphanumeric() || matches!(character, '_' | '-')
372 })
373 {
374 return None;
375 }
376
377 let value = raw_value.trim();
378 if value.is_empty() {
379 return None;
380 }
381
382 saw_mapping_entry = true;
383 map.insert(
384 Value::String(key.to_string()),
385 Value::String(unquote_yaml_scalar(value)),
386 );
387 }
388
389 saw_mapping_entry.then_some(map)
390}
391
392fn unquote_yaml_scalar(value: &str) -> String {
393 if value.len() >= 2 {
394 let mut characters = value.chars();
395 let first = characters.next();
396 let last = value.chars().last();
397 if matches!(
398 (first, last),
399 (Some('"'), Some('"')) | (Some('\''), Some('\''))
400 ) {
401 return value[1..value.len() - 1].to_string();
402 }
403 }
404 value.to_string()
405}
406
407fn yaml_value_to_string(value: &Value) -> Option<String> {
409 match value {
410 Value::String(s) => Some(s.clone()),
411 Value::Number(n) => Some(n.to_string()),
412 Value::Bool(b) => Some(b.to_string()),
413 _ => None,
414 }
415}
416
417fn extract_owner_party(yaml: &yaml_serde::Mapping) -> Vec<Party> {
419 let owner = yaml
420 .get(Value::String(FIELD_OWNER.to_string()))
421 .map(|v| match v {
422 Value::String(s) => truncate_field(s.clone()),
423 _ => truncate_field(format!("{:?}", v)),
424 });
425
426 if let Some(owner_name) = owner {
427 if !owner_name.is_empty() {
428 vec![Party {
429 r#type: Some("person".to_string()),
430 role: Some("owner".to_string()),
431 name: Some(owner_name),
432 email: None,
433 url: None,
434 organization: None,
435 organization_url: None,
436 timezone: None,
437 }]
438 } else {
439 Vec::new()
440 }
441 } else {
442 Vec::new()
443 }
444}
445
446fn extract_file_references(yaml: &yaml_serde::Mapping) -> Vec<FileReference> {
448 let about_resource = yaml
449 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
450 .and_then(|v| v.as_str());
451 let license_file = yaml
452 .get(Value::String("license_file".to_string()))
453 .and_then(|v| v.as_str());
454 let notice_file = yaml
455 .get(Value::String("notice_file".to_string()))
456 .and_then(|v| v.as_str());
457
458 let mut refs = Vec::new();
459
460 if let Some(path) = about_resource {
461 refs.push(FileReference {
462 path: truncate_field(path.to_string()),
463 size: None,
464 sha1: None,
465 md5: None,
466 sha256: None,
467 sha512: None,
468 extra_data: None,
469 });
470 }
471
472 for path in [license_file, notice_file].into_iter().flatten() {
473 refs.push(FileReference {
474 path: truncate_field(path.to_string()),
475 size: None,
476 sha1: None,
477 md5: None,
478 sha256: None,
479 sha512: None,
480 extra_data: None,
481 });
482 }
483
484 refs
485}
486
487fn default_package_data() -> PackageData {
489 PackageData {
490 package_type: Some(PackageType::About),
491 datasource_id: Some(DatasourceId::AboutFile),
492 ..Default::default()
493 }
494}
495
496fn infer_about_from_download_url(
497 download_url: Option<&str>,
498 about_name: Option<&str>,
499 about_version: Option<&str>,
500) -> Option<InferredAboutIdentity> {
501 let url = Url::parse(download_url?).ok()?;
502 let host = url.host_str()?;
503
504 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
505 let name = about_name.map(str::to_string)?;
506 let version = about_version.map(str::to_string);
507 return Some(InferredAboutIdentity {
508 package_type: PackageType::Pypi,
509 namespace: None,
510 name: Some(name),
511 version,
512 });
513 }
514
515 if matches!(host, "raw.githubusercontent.com" | "github.com") {
516 let mut segments = url.path_segments()?;
517 let owner = segments.next()?.to_string();
518 let repo = segments.next()?.to_string();
519 return Some(InferredAboutIdentity {
520 package_type: PackageType::Github,
521 namespace: Some(owner),
522 name: Some(repo),
523 version: None,
524 });
525 }
526
527 None
528}
529
530fn build_about_purl(
531 package_type: PackageType,
532 namespace: Option<&str>,
533 name: Option<&str>,
534 version: Option<&str>,
535) -> Option<String> {
536 if package_type == PackageType::About {
537 return None;
538 }
539
540 let name = name?;
541 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
542 if let Some(namespace) = namespace {
543 purl.with_namespace(namespace).ok()?;
544 }
545 if let Some(version) = version {
546 purl.with_version(version).ok()?;
547 }
548 Some(purl.to_string())
549}
550
551fn build_extra_data(
552 yaml: &yaml_serde::Mapping,
553) -> Option<std::collections::HashMap<String, serde_json::Value>> {
554 let mut extra_data = std::collections::HashMap::new();
555 for key in ["license_file", "notice_file", "notes"] {
556 if let Some(value) = yaml.get(Value::String(key.to_string()))
557 && let Some(value) = yaml_value_to_string(value)
558 {
559 extra_data.insert(
560 key.to_string(),
561 serde_json::Value::String(truncate_field(value)),
562 );
563 }
564 }
565 (!extra_data.is_empty()).then_some(extra_data)
566}