1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use std::path::Path;
29use std::str::FromStr;
30use url::Url;
31use yaml_serde::Value;
32
33use super::PackageParser;
34use super::license_normalization::{
35 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
36 normalize_spdx_expression,
37};
38
39const FIELD_TYPE: &str = "type";
40const FIELD_PURL: &str = "purl";
41const FIELD_PACKAGE_URL: &str = "package_url";
42const FIELD_NAMESPACE: &str = "namespace";
43const FIELD_NAME: &str = "name";
44const FIELD_VERSION: &str = "version";
45const FIELD_HOME_URL: &str = "home_url";
46const FIELD_HOMEPAGE_URL: &str = "homepage_url";
47const FIELD_DOWNLOAD_URL: &str = "download_url";
48const FIELD_COPYRIGHT: &str = "copyright";
49const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
50const FIELD_OWNER: &str = "owner";
51const FIELD_ABOUT_RESOURCE: &str = "about_resource";
52
53pub struct AboutFileParser;
58
59#[derive(Clone)]
60struct InferredAboutIdentity {
61 package_type: PackageType,
62 namespace: Option<String>,
63 name: Option<String>,
64 version: Option<String>,
65}
66
67impl PackageParser for AboutFileParser {
68 const PACKAGE_TYPE: PackageType = PackageType::About;
69
70 fn extract_packages(path: &Path) -> Vec<PackageData> {
71 let yaml = match read_and_parse_yaml(path) {
72 Ok(yaml) => yaml,
73 Err(e) => {
74 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
75 return vec![default_package_data()];
76 }
77 };
78
79 let about_type = yaml
81 .get(FIELD_TYPE)
82 .and_then(|v| v.as_str())
83 .map(String::from);
84
85 let about_namespace = yaml
86 .get(FIELD_NAMESPACE)
87 .and_then(|v| v.as_str())
88 .map(|v| truncate_field(v.to_string()));
89
90 let purl_string = yaml
91 .get(FIELD_PURL)
92 .and_then(|v| v.as_str())
93 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
94 .map(|v| truncate_field(v.to_string()));
95
96 let (purl_type, purl_namespace, purl_name, purl_version) =
98 if let Some(ref purl_str) = purl_string {
99 match PackageUrl::from_str(purl_str) {
100 Ok(purl) => (
101 Some(truncate_field(purl.ty().to_string())),
102 purl.namespace().map(|v| truncate_field(v.to_string())),
103 Some(truncate_field(purl.name().to_string())),
104 purl.version().map(|v| truncate_field(v.to_string())),
105 ),
106 Err(e) => {
107 warn!("Failed to parse purl '{}': {}", purl_str, e);
108 (None, None, None, None)
109 }
110 }
111 } else {
112 (None, None, None, None)
113 };
114
115 let inferred = infer_about_from_download_url(
116 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
117 yaml.get(FIELD_NAME)
118 .and_then(yaml_value_to_string)
119 .as_deref(),
120 yaml.get(FIELD_VERSION)
121 .and_then(yaml_value_to_string)
122 .as_deref(),
123 );
124
125 let package_type = about_type
126 .clone()
127 .or(purl_type)
128 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
129 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
130 .unwrap_or(Self::PACKAGE_TYPE);
131
132 let namespace = about_namespace
134 .clone()
135 .or(purl_namespace.clone())
136 .or_else(|| {
137 inferred
138 .as_ref()
139 .and_then(|identity| identity.namespace.clone())
140 })
141 .map(truncate_field);
142
143 let name = yaml
145 .get(FIELD_NAME)
146 .and_then(yaml_value_to_string)
147 .or(purl_name.clone())
148 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()))
149 .map(truncate_field);
150
151 let version = yaml
152 .get(FIELD_VERSION)
153 .and_then(yaml_value_to_string)
154 .or(purl_version.clone())
155 .or_else(|| {
156 inferred
157 .as_ref()
158 .and_then(|identity| identity.version.clone())
159 })
160 .map(truncate_field);
161
162 let homepage_url = yaml
164 .get(FIELD_HOME_URL)
165 .and_then(|v| v.as_str())
166 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
167 .map(|v| truncate_field(v.to_string()));
168
169 let download_url = yaml
170 .get(FIELD_DOWNLOAD_URL)
171 .and_then(|v| v.as_str())
172 .map(|v| truncate_field(v.to_string()));
173
174 let copyright = yaml
175 .get(FIELD_COPYRIGHT)
176 .and_then(|v| v.as_str())
177 .map(|v| truncate_field(v.to_string()));
178
179 let extracted_license_statement = yaml
180 .get(FIELD_LICENSE_EXPRESSION)
181 .and_then(|v| v.as_str())
182 .map(|v| truncate_field(v.to_string()));
183 let file_references = extract_file_references(&yaml);
184 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
185 extracted_license_statement
186 .as_deref()
187 .and_then(normalize_spdx_expression)
188 .map(|normalized| {
189 build_declared_license_data(
190 normalized,
191 DeclaredLicenseMatchMetadata::single_line(
192 extracted_license_statement.as_deref().unwrap_or_default(),
193 ),
194 )
195 })
196 .unwrap_or_else(|| {
197 normalize_spdx_declared_license(extracted_license_statement.as_deref())
198 });
199
200 let vcs_url = yaml
201 .get(Value::String("vcs_url".to_string()))
202 .and_then(|v| v.as_str())
203 .map(|v| truncate_field(v.to_string()));
204
205 let extra_data = build_extra_data(&yaml);
206
207 let purl = purl_string
208 .or_else(|| {
209 let name = yaml
210 .get(FIELD_NAME)
211 .and_then(yaml_value_to_string)
212 .or(purl_name.clone())
213 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
214 let version = yaml
215 .get(FIELD_VERSION)
216 .and_then(yaml_value_to_string)
217 .or(purl_version.clone())
218 .or_else(|| {
219 inferred
220 .as_ref()
221 .and_then(|identity| identity.version.clone())
222 });
223 let namespace = about_namespace.clone().or_else(|| {
224 inferred
225 .as_ref()
226 .and_then(|identity| identity.namespace.clone())
227 });
228 build_about_purl(
229 package_type,
230 namespace.as_deref(),
231 name.as_deref(),
232 version.as_deref(),
233 )
234 })
235 .map(truncate_field);
236
237 let parties = extract_owner_party(&yaml);
239
240 vec![PackageData {
242 package_type: Some(package_type),
243 namespace,
244 name,
245 version,
246 qualifiers: None,
247 subpath: None,
248 primary_language: None,
249 description: None,
250 release_date: None,
251 parties,
252 keywords: Vec::new(),
253 homepage_url,
254 download_url,
255 size: None,
256 sha1: None,
257 md5: None,
258 sha256: None,
259 sha512: None,
260 bug_tracking_url: None,
261 code_view_url: None,
262 vcs_url,
263 copyright,
264 holder: None,
265 declared_license_expression,
266 declared_license_expression_spdx,
267 license_detections,
268 other_license_expression: None,
269 other_license_expression_spdx: None,
270 other_license_detections: Vec::new(),
271 extracted_license_statement,
272 notice_text: None,
273 source_packages: Vec::new(),
274 file_references,
275 is_private: false,
276 is_virtual: false,
277 extra_data,
278 dependencies: Vec::new(),
279 repository_homepage_url: None,
280 repository_download_url: None,
281 api_data_url: None,
282 datasource_id: Some(DatasourceId::AboutFile),
283 purl,
284 }]
285 }
286
287 fn is_match(path: &Path) -> bool {
288 path.extension()
289 .and_then(|ext| ext.to_str())
290 .is_some_and(|ext| ext == "ABOUT")
291 }
292}
293
294fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
296 let content =
297 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
298
299 let value: Value =
300 yaml_serde::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
301
302 match value {
303 Value::Mapping(map) => Ok(map),
304 _ => Err("Expected YAML mapping at root".to_string()),
305 }
306}
307
308fn yaml_value_to_string(value: &Value) -> Option<String> {
310 match value {
311 Value::String(s) => Some(s.clone()),
312 Value::Number(n) => Some(n.to_string()),
313 Value::Bool(b) => Some(b.to_string()),
314 _ => None,
315 }
316}
317
318fn extract_owner_party(yaml: &yaml_serde::Mapping) -> Vec<Party> {
320 let owner = yaml
321 .get(Value::String(FIELD_OWNER.to_string()))
322 .map(|v| match v {
323 Value::String(s) => truncate_field(s.clone()),
324 _ => truncate_field(format!("{:?}", v)),
325 });
326
327 if let Some(owner_name) = owner {
328 if !owner_name.is_empty() {
329 vec![Party {
330 r#type: Some("person".to_string()),
331 role: Some("owner".to_string()),
332 name: Some(owner_name),
333 email: None,
334 url: None,
335 organization: None,
336 organization_url: None,
337 timezone: None,
338 }]
339 } else {
340 Vec::new()
341 }
342 } else {
343 Vec::new()
344 }
345}
346
347fn extract_file_references(yaml: &yaml_serde::Mapping) -> Vec<FileReference> {
349 let about_resource = yaml
350 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
351 .and_then(|v| v.as_str());
352 let license_file = yaml
353 .get(Value::String("license_file".to_string()))
354 .and_then(|v| v.as_str());
355 let notice_file = yaml
356 .get(Value::String("notice_file".to_string()))
357 .and_then(|v| v.as_str());
358
359 let mut refs = Vec::new();
360
361 if let Some(path) = about_resource {
362 refs.push(FileReference {
363 path: truncate_field(path.to_string()),
364 size: None,
365 sha1: None,
366 md5: None,
367 sha256: None,
368 sha512: None,
369 extra_data: None,
370 });
371 }
372
373 for path in [license_file, notice_file].into_iter().flatten() {
374 refs.push(FileReference {
375 path: truncate_field(path.to_string()),
376 size: None,
377 sha1: None,
378 md5: None,
379 sha256: None,
380 sha512: None,
381 extra_data: None,
382 });
383 }
384
385 refs
386}
387
388fn default_package_data() -> PackageData {
390 PackageData {
391 package_type: Some(PackageType::About),
392 datasource_id: Some(DatasourceId::AboutFile),
393 ..Default::default()
394 }
395}
396
397fn infer_about_from_download_url(
398 download_url: Option<&str>,
399 about_name: Option<&str>,
400 about_version: Option<&str>,
401) -> Option<InferredAboutIdentity> {
402 let url = Url::parse(download_url?).ok()?;
403 let host = url.host_str()?;
404
405 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
406 let name = about_name.map(str::to_string)?;
407 let version = about_version.map(str::to_string);
408 return Some(InferredAboutIdentity {
409 package_type: PackageType::Pypi,
410 namespace: None,
411 name: Some(name),
412 version,
413 });
414 }
415
416 if matches!(host, "raw.githubusercontent.com" | "github.com") {
417 let mut segments = url.path_segments()?;
418 let owner = segments.next()?.to_string();
419 let repo = segments.next()?.to_string();
420 return Some(InferredAboutIdentity {
421 package_type: PackageType::Github,
422 namespace: Some(owner),
423 name: Some(repo),
424 version: None,
425 });
426 }
427
428 None
429}
430
431fn build_about_purl(
432 package_type: PackageType,
433 namespace: Option<&str>,
434 name: Option<&str>,
435 version: Option<&str>,
436) -> Option<String> {
437 if package_type == PackageType::About {
438 return None;
439 }
440
441 let name = name?;
442 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
443 if let Some(namespace) = namespace {
444 purl.with_namespace(namespace).ok()?;
445 }
446 if let Some(version) = version {
447 purl.with_version(version).ok()?;
448 }
449 Some(purl.to_string())
450}
451
452fn build_extra_data(
453 yaml: &yaml_serde::Mapping,
454) -> Option<std::collections::HashMap<String, serde_json::Value>> {
455 let mut extra_data = std::collections::HashMap::new();
456 for key in ["license_file", "notice_file", "notes"] {
457 if let Some(value) = yaml.get(Value::String(key.to_string()))
458 && let Some(value) = yaml_value_to_string(value)
459 {
460 extra_data.insert(
461 key.to_string(),
462 serde_json::Value::String(truncate_field(value)),
463 );
464 }
465 }
466 (!extra_data.is_empty()).then_some(extra_data)
467}
468
469crate::register_parser!(
470 "AboutCode .ABOUT metadata file",
471 &["**/*.ABOUT"],
472 "about",
473 "",
474 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
475);