1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{read_file_to_string, truncate_field};
30use packageurl::PackageUrl;
31use std::path::Path;
32use std::str::FromStr;
33use url::Url;
34use yaml_serde::Value;
35
36use super::PackageParser;
37use super::license_normalization::{
38 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
39 normalize_spdx_expression,
40};
41
42const FIELD_TYPE: &str = "type";
43const FIELD_PURL: &str = "purl";
44const FIELD_PACKAGE_URL: &str = "package_url";
45const FIELD_NAMESPACE: &str = "namespace";
46const FIELD_NAME: &str = "name";
47const FIELD_VERSION: &str = "version";
48const FIELD_HOME_URL: &str = "home_url";
49const FIELD_HOMEPAGE_URL: &str = "homepage_url";
50const FIELD_DOWNLOAD_URL: &str = "download_url";
51const FIELD_COPYRIGHT: &str = "copyright";
52const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
53const FIELD_OWNER: &str = "owner";
54const FIELD_ABOUT_RESOURCE: &str = "about_resource";
55
56pub struct AboutFileParser;
61
62#[derive(Clone)]
63struct InferredAboutIdentity {
64 package_type: PackageType,
65 namespace: Option<String>,
66 name: Option<String>,
67 version: Option<String>,
68}
69
70impl PackageParser for AboutFileParser {
71 const PACKAGE_TYPE: PackageType = PackageType::About;
72
73 fn extract_packages(path: &Path) -> Vec<PackageData> {
74 let yaml = match read_and_parse_yaml(path) {
75 Ok(yaml) => yaml,
76 Err(e) => {
77 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
78 return vec![default_package_data()];
79 }
80 };
81
82 let about_type = yaml
84 .get(FIELD_TYPE)
85 .and_then(|v| v.as_str())
86 .map(String::from);
87
88 let about_namespace = yaml
89 .get(FIELD_NAMESPACE)
90 .and_then(|v| v.as_str())
91 .map(|v| truncate_field(v.to_string()));
92
93 let purl_string = yaml
94 .get(FIELD_PURL)
95 .and_then(|v| v.as_str())
96 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
97 .map(|v| truncate_field(v.to_string()));
98
99 let (purl_type, purl_namespace, purl_name, purl_version) =
101 if let Some(ref purl_str) = purl_string {
102 match PackageUrl::from_str(purl_str) {
103 Ok(purl) => (
104 Some(truncate_field(purl.ty().to_string())),
105 purl.namespace().map(|v| truncate_field(v.to_string())),
106 Some(truncate_field(purl.name().to_string())),
107 purl.version().map(|v| truncate_field(v.to_string())),
108 ),
109 Err(e) => {
110 warn!("Failed to parse purl '{}': {}", purl_str, e);
111 (None, None, None, None)
112 }
113 }
114 } else {
115 (None, None, None, None)
116 };
117
118 let inferred = infer_about_from_download_url(
119 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
120 yaml.get(FIELD_NAME)
121 .and_then(yaml_value_to_string)
122 .as_deref(),
123 yaml.get(FIELD_VERSION)
124 .and_then(yaml_value_to_string)
125 .as_deref(),
126 );
127
128 let package_type = about_type
129 .clone()
130 .or(purl_type)
131 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
132 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
133 .unwrap_or(Self::PACKAGE_TYPE);
134
135 let namespace = about_namespace
137 .clone()
138 .or(purl_namespace.clone())
139 .or_else(|| {
140 inferred
141 .as_ref()
142 .and_then(|identity| identity.namespace.clone())
143 })
144 .map(truncate_field);
145
146 let name = yaml
148 .get(FIELD_NAME)
149 .and_then(yaml_value_to_string)
150 .or(purl_name.clone())
151 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()))
152 .map(truncate_field);
153
154 let version = yaml
155 .get(FIELD_VERSION)
156 .and_then(yaml_value_to_string)
157 .or(purl_version.clone())
158 .or_else(|| {
159 inferred
160 .as_ref()
161 .and_then(|identity| identity.version.clone())
162 })
163 .map(truncate_field);
164
165 let homepage_url = yaml
167 .get(FIELD_HOME_URL)
168 .and_then(|v| v.as_str())
169 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
170 .map(|v| truncate_field(v.to_string()));
171
172 let download_url = yaml
173 .get(FIELD_DOWNLOAD_URL)
174 .and_then(|v| v.as_str())
175 .map(|v| truncate_field(v.to_string()));
176
177 let copyright = yaml
178 .get(FIELD_COPYRIGHT)
179 .and_then(|v| v.as_str())
180 .map(|v| truncate_field(v.to_string()));
181
182 let extracted_license_statement = yaml
183 .get(FIELD_LICENSE_EXPRESSION)
184 .and_then(|v| v.as_str())
185 .map(|v| truncate_field(v.to_string()));
186 let file_references = extract_file_references(&yaml);
187 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
188 extracted_license_statement
189 .as_deref()
190 .and_then(normalize_spdx_expression)
191 .map(|normalized| {
192 build_declared_license_data(
193 normalized,
194 DeclaredLicenseMatchMetadata::single_line(
195 extracted_license_statement.as_deref().unwrap_or_default(),
196 ),
197 )
198 })
199 .unwrap_or_else(|| {
200 normalize_spdx_declared_license(extracted_license_statement.as_deref())
201 });
202
203 let vcs_url = yaml
204 .get(Value::String("vcs_url".to_string()))
205 .and_then(|v| v.as_str())
206 .map(|v| truncate_field(v.to_string()));
207
208 let extra_data = build_extra_data(&yaml);
209
210 let purl = purl_string
211 .or_else(|| {
212 let name = yaml
213 .get(FIELD_NAME)
214 .and_then(yaml_value_to_string)
215 .or(purl_name.clone())
216 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
217 let version = yaml
218 .get(FIELD_VERSION)
219 .and_then(yaml_value_to_string)
220 .or(purl_version.clone())
221 .or_else(|| {
222 inferred
223 .as_ref()
224 .and_then(|identity| identity.version.clone())
225 });
226 let namespace = about_namespace.clone().or_else(|| {
227 inferred
228 .as_ref()
229 .and_then(|identity| identity.namespace.clone())
230 });
231 build_about_purl(
232 package_type,
233 namespace.as_deref(),
234 name.as_deref(),
235 version.as_deref(),
236 )
237 })
238 .map(truncate_field);
239
240 let parties = extract_owner_party(&yaml);
242
243 vec![PackageData {
245 package_type: Some(package_type),
246 namespace,
247 name,
248 version,
249 qualifiers: None,
250 subpath: None,
251 primary_language: None,
252 description: None,
253 release_date: None,
254 parties,
255 keywords: Vec::new(),
256 homepage_url,
257 download_url,
258 size: None,
259 sha1: None,
260 md5: None,
261 sha256: None,
262 sha512: None,
263 bug_tracking_url: None,
264 code_view_url: None,
265 vcs_url,
266 copyright,
267 holder: None,
268 declared_license_expression,
269 declared_license_expression_spdx,
270 license_detections,
271 other_license_expression: None,
272 other_license_expression_spdx: None,
273 other_license_detections: Vec::new(),
274 extracted_license_statement,
275 notice_text: None,
276 source_packages: Vec::new(),
277 file_references,
278 is_private: false,
279 is_virtual: false,
280 extra_data,
281 dependencies: Vec::new(),
282 repository_homepage_url: None,
283 repository_download_url: None,
284 api_data_url: None,
285 datasource_id: Some(DatasourceId::AboutFile),
286 purl,
287 }]
288 }
289
290 fn is_match(path: &Path) -> bool {
291 path.extension()
292 .and_then(|ext| ext.to_str())
293 .is_some_and(|ext| ext == "ABOUT")
294 }
295}
296
297fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
299 let content =
300 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
301
302 parse_yaml_mapping(&content)
303 .or_else(|yaml_error| parse_shallow_scalar_mapping(&content).ok_or(yaml_error))
304}
305
306fn parse_yaml_mapping(content: &str) -> Result<yaml_serde::Mapping, String> {
307 let value: Value =
308 yaml_serde::from_str(content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
309
310 match value {
311 Value::Mapping(map) => Ok(map),
312 _ => Err("Expected YAML mapping at root".to_string()),
313 }
314}
315
316fn parse_shallow_scalar_mapping(content: &str) -> Option<yaml_serde::Mapping> {
317 let mut map = yaml_serde::Mapping::new();
318 let mut saw_mapping_entry = false;
319
320 for line in content.lines() {
321 let trimmed = line.trim();
322 if trimmed.is_empty() || trimmed.starts_with('#') {
323 continue;
324 }
325 if line.starts_with(char::is_whitespace) {
326 return None;
327 }
328
329 let (raw_key, raw_value) = trimmed.split_once(':')?;
330 let key = raw_key.trim();
331 if key.is_empty()
332 || !key.chars().all(|character| {
333 character.is_ascii_alphanumeric() || matches!(character, '_' | '-')
334 })
335 {
336 return None;
337 }
338
339 let value = raw_value.trim();
340 if value.is_empty() {
341 return None;
342 }
343
344 saw_mapping_entry = true;
345 map.insert(
346 Value::String(key.to_string()),
347 Value::String(unquote_yaml_scalar(value)),
348 );
349 }
350
351 saw_mapping_entry.then_some(map)
352}
353
354fn unquote_yaml_scalar(value: &str) -> String {
355 if value.len() >= 2 {
356 let mut characters = value.chars();
357 let first = characters.next();
358 let last = value.chars().last();
359 if matches!(
360 (first, last),
361 (Some('"'), Some('"')) | (Some('\''), Some('\''))
362 ) {
363 return value[1..value.len() - 1].to_string();
364 }
365 }
366 value.to_string()
367}
368
369fn yaml_value_to_string(value: &Value) -> Option<String> {
371 match value {
372 Value::String(s) => Some(s.clone()),
373 Value::Number(n) => Some(n.to_string()),
374 Value::Bool(b) => Some(b.to_string()),
375 _ => None,
376 }
377}
378
379fn extract_owner_party(yaml: &yaml_serde::Mapping) -> Vec<Party> {
381 let owner = yaml
382 .get(Value::String(FIELD_OWNER.to_string()))
383 .map(|v| match v {
384 Value::String(s) => truncate_field(s.clone()),
385 _ => truncate_field(format!("{:?}", v)),
386 });
387
388 if let Some(owner_name) = owner {
389 if !owner_name.is_empty() {
390 vec![Party {
391 r#type: Some("person".to_string()),
392 role: Some("owner".to_string()),
393 name: Some(owner_name),
394 email: None,
395 url: None,
396 organization: None,
397 organization_url: None,
398 timezone: None,
399 }]
400 } else {
401 Vec::new()
402 }
403 } else {
404 Vec::new()
405 }
406}
407
408fn extract_file_references(yaml: &yaml_serde::Mapping) -> Vec<FileReference> {
410 let about_resource = yaml
411 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
412 .and_then(|v| v.as_str());
413 let license_file = yaml
414 .get(Value::String("license_file".to_string()))
415 .and_then(|v| v.as_str());
416 let notice_file = yaml
417 .get(Value::String("notice_file".to_string()))
418 .and_then(|v| v.as_str());
419
420 let mut refs = Vec::new();
421
422 if let Some(path) = about_resource {
423 refs.push(FileReference {
424 path: truncate_field(path.to_string()),
425 size: None,
426 sha1: None,
427 md5: None,
428 sha256: None,
429 sha512: None,
430 extra_data: None,
431 });
432 }
433
434 for path in [license_file, notice_file].into_iter().flatten() {
435 refs.push(FileReference {
436 path: truncate_field(path.to_string()),
437 size: None,
438 sha1: None,
439 md5: None,
440 sha256: None,
441 sha512: None,
442 extra_data: None,
443 });
444 }
445
446 refs
447}
448
449fn default_package_data() -> PackageData {
451 PackageData {
452 package_type: Some(PackageType::About),
453 datasource_id: Some(DatasourceId::AboutFile),
454 ..Default::default()
455 }
456}
457
458fn infer_about_from_download_url(
459 download_url: Option<&str>,
460 about_name: Option<&str>,
461 about_version: Option<&str>,
462) -> Option<InferredAboutIdentity> {
463 let url = Url::parse(download_url?).ok()?;
464 let host = url.host_str()?;
465
466 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
467 let name = about_name.map(str::to_string)?;
468 let version = about_version.map(str::to_string);
469 return Some(InferredAboutIdentity {
470 package_type: PackageType::Pypi,
471 namespace: None,
472 name: Some(name),
473 version,
474 });
475 }
476
477 if matches!(host, "raw.githubusercontent.com" | "github.com") {
478 let mut segments = url.path_segments()?;
479 let owner = segments.next()?.to_string();
480 let repo = segments.next()?.to_string();
481 return Some(InferredAboutIdentity {
482 package_type: PackageType::Github,
483 namespace: Some(owner),
484 name: Some(repo),
485 version: None,
486 });
487 }
488
489 None
490}
491
492fn build_about_purl(
493 package_type: PackageType,
494 namespace: Option<&str>,
495 name: Option<&str>,
496 version: Option<&str>,
497) -> Option<String> {
498 if package_type == PackageType::About {
499 return None;
500 }
501
502 let name = name?;
503 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
504 if let Some(namespace) = namespace {
505 purl.with_namespace(namespace).ok()?;
506 }
507 if let Some(version) = version {
508 purl.with_version(version).ok()?;
509 }
510 Some(purl.to_string())
511}
512
513fn build_extra_data(
514 yaml: &yaml_serde::Mapping,
515) -> Option<std::collections::HashMap<String, serde_json::Value>> {
516 let mut extra_data = std::collections::HashMap::new();
517 for key in ["license_file", "notice_file", "notes"] {
518 if let Some(value) = yaml.get(Value::String(key.to_string()))
519 && let Some(value) = yaml_value_to_string(value)
520 {
521 extra_data.insert(
522 key.to_string(),
523 serde_json::Value::String(truncate_field(value)),
524 );
525 }
526 }
527 (!extra_data.is_empty()).then_some(extra_data)
528}
529
530crate::register_parser!(
531 "AboutCode .ABOUT metadata file",
532 &["**/*.ABOUT"],
533 "about",
534 "",
535 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
536);