1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{read_file_to_string, truncate_field};
30use packageurl::PackageUrl;
31use std::path::Path;
32use std::str::FromStr;
33use url::Url;
34use yaml_serde::Value;
35
36use super::PackageParser;
37use super::license_normalization::{
38 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
39 normalize_spdx_expression,
40};
41
42const FIELD_TYPE: &str = "type";
43const FIELD_PURL: &str = "purl";
44const FIELD_PACKAGE_URL: &str = "package_url";
45const FIELD_NAMESPACE: &str = "namespace";
46const FIELD_NAME: &str = "name";
47const FIELD_VERSION: &str = "version";
48const FIELD_HOME_URL: &str = "home_url";
49const FIELD_HOMEPAGE_URL: &str = "homepage_url";
50const FIELD_DOWNLOAD_URL: &str = "download_url";
51const FIELD_COPYRIGHT: &str = "copyright";
52const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
53const FIELD_OWNER: &str = "owner";
54const FIELD_ABOUT_RESOURCE: &str = "about_resource";
55
56pub struct AboutFileParser;
61
62#[derive(Clone)]
63struct InferredAboutIdentity {
64 package_type: PackageType,
65 namespace: Option<String>,
66 name: Option<String>,
67 version: Option<String>,
68}
69
70impl PackageParser for AboutFileParser {
71 const PACKAGE_TYPE: PackageType = PackageType::About;
72
73 fn extract_packages(path: &Path) -> Vec<PackageData> {
74 let yaml = match read_and_parse_yaml(path) {
75 Ok(yaml) => yaml,
76 Err(e) => {
77 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
78 return vec![default_package_data()];
79 }
80 };
81
82 let about_type = yaml
84 .get(FIELD_TYPE)
85 .and_then(|v| v.as_str())
86 .map(String::from);
87
88 let about_namespace = yaml
89 .get(FIELD_NAMESPACE)
90 .and_then(|v| v.as_str())
91 .map(|v| truncate_field(v.to_string()));
92
93 let purl_string = yaml
94 .get(FIELD_PURL)
95 .and_then(|v| v.as_str())
96 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
97 .map(|v| truncate_field(v.to_string()));
98
99 let (purl_type, purl_namespace, purl_name, purl_version) =
101 if let Some(ref purl_str) = purl_string {
102 match PackageUrl::from_str(purl_str) {
103 Ok(purl) => (
104 Some(truncate_field(purl.ty().to_string())),
105 purl.namespace().map(|v| truncate_field(v.to_string())),
106 Some(truncate_field(purl.name().to_string())),
107 purl.version().map(|v| truncate_field(v.to_string())),
108 ),
109 Err(e) => {
110 warn!("Failed to parse purl '{}': {}", purl_str, e);
111 (None, None, None, None)
112 }
113 }
114 } else {
115 (None, None, None, None)
116 };
117
118 let inferred = infer_about_from_download_url(
119 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
120 yaml.get(FIELD_NAME)
121 .and_then(yaml_value_to_string)
122 .as_deref(),
123 yaml.get(FIELD_VERSION)
124 .and_then(yaml_value_to_string)
125 .as_deref(),
126 );
127
128 let package_type = about_type
129 .clone()
130 .or(purl_type)
131 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
132 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
133 .unwrap_or(Self::PACKAGE_TYPE);
134
135 let namespace = about_namespace
137 .clone()
138 .or(purl_namespace.clone())
139 .or_else(|| {
140 inferred
141 .as_ref()
142 .and_then(|identity| identity.namespace.clone())
143 })
144 .map(truncate_field);
145
146 let name = yaml
148 .get(FIELD_NAME)
149 .and_then(yaml_value_to_string)
150 .or(purl_name.clone())
151 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()))
152 .map(truncate_field);
153
154 let version = yaml
155 .get(FIELD_VERSION)
156 .and_then(yaml_value_to_string)
157 .or(purl_version.clone())
158 .or_else(|| {
159 inferred
160 .as_ref()
161 .and_then(|identity| identity.version.clone())
162 })
163 .map(truncate_field);
164
165 let homepage_url = yaml
167 .get(FIELD_HOME_URL)
168 .and_then(|v| v.as_str())
169 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
170 .map(|v| truncate_field(v.to_string()));
171
172 let download_url = yaml
173 .get(FIELD_DOWNLOAD_URL)
174 .and_then(|v| v.as_str())
175 .map(|v| truncate_field(v.to_string()));
176
177 let copyright = yaml
178 .get(FIELD_COPYRIGHT)
179 .and_then(|v| v.as_str())
180 .map(|v| truncate_field(v.to_string()));
181
182 let extracted_license_statement = yaml
183 .get(FIELD_LICENSE_EXPRESSION)
184 .and_then(|v| v.as_str())
185 .map(|v| truncate_field(v.to_string()));
186 let file_references = extract_file_references(&yaml);
187 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
188 extracted_license_statement
189 .as_deref()
190 .and_then(normalize_spdx_expression)
191 .map(|normalized| {
192 build_declared_license_data(
193 normalized,
194 DeclaredLicenseMatchMetadata::single_line(
195 extracted_license_statement.as_deref().unwrap_or_default(),
196 ),
197 )
198 })
199 .unwrap_or_else(|| {
200 normalize_spdx_declared_license(extracted_license_statement.as_deref())
201 });
202
203 let vcs_url = yaml
204 .get(Value::String("vcs_url".to_string()))
205 .and_then(|v| v.as_str())
206 .map(|v| truncate_field(v.to_string()));
207
208 let extra_data = build_extra_data(&yaml);
209
210 let purl = purl_string
211 .or_else(|| {
212 let name = yaml
213 .get(FIELD_NAME)
214 .and_then(yaml_value_to_string)
215 .or(purl_name.clone())
216 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
217 let version = yaml
218 .get(FIELD_VERSION)
219 .and_then(yaml_value_to_string)
220 .or(purl_version.clone())
221 .or_else(|| {
222 inferred
223 .as_ref()
224 .and_then(|identity| identity.version.clone())
225 });
226 let namespace = about_namespace.clone().or_else(|| {
227 inferred
228 .as_ref()
229 .and_then(|identity| identity.namespace.clone())
230 });
231 build_about_purl(
232 package_type,
233 namespace.as_deref(),
234 name.as_deref(),
235 version.as_deref(),
236 )
237 })
238 .map(truncate_field);
239
240 let parties = extract_owner_party(&yaml);
242
243 vec![PackageData {
245 package_type: Some(package_type),
246 namespace,
247 name,
248 version,
249 qualifiers: None,
250 subpath: None,
251 primary_language: None,
252 description: None,
253 release_date: None,
254 parties,
255 keywords: Vec::new(),
256 homepage_url,
257 download_url,
258 size: None,
259 sha1: None,
260 md5: None,
261 sha256: None,
262 sha512: None,
263 bug_tracking_url: None,
264 code_view_url: None,
265 vcs_url,
266 copyright,
267 holder: None,
268 declared_license_expression,
269 declared_license_expression_spdx,
270 license_detections,
271 other_license_expression: None,
272 other_license_expression_spdx: None,
273 other_license_detections: Vec::new(),
274 extracted_license_statement,
275 notice_text: None,
276 source_packages: Vec::new(),
277 file_references,
278 is_private: false,
279 is_virtual: false,
280 extra_data,
281 dependencies: Vec::new(),
282 repository_homepage_url: None,
283 repository_download_url: None,
284 api_data_url: None,
285 datasource_id: Some(DatasourceId::AboutFile),
286 purl,
287 }]
288 }
289
290 fn is_match(path: &Path) -> bool {
291 path.extension()
292 .and_then(|ext| ext.to_str())
293 .is_some_and(|ext| ext == "ABOUT")
294 }
295}
296
297fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
299 let content =
300 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
301
302 let value: Value =
303 yaml_serde::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
304
305 match value {
306 Value::Mapping(map) => Ok(map),
307 _ => Err("Expected YAML mapping at root".to_string()),
308 }
309}
310
311fn yaml_value_to_string(value: &Value) -> Option<String> {
313 match value {
314 Value::String(s) => Some(s.clone()),
315 Value::Number(n) => Some(n.to_string()),
316 Value::Bool(b) => Some(b.to_string()),
317 _ => None,
318 }
319}
320
321fn extract_owner_party(yaml: &yaml_serde::Mapping) -> Vec<Party> {
323 let owner = yaml
324 .get(Value::String(FIELD_OWNER.to_string()))
325 .map(|v| match v {
326 Value::String(s) => truncate_field(s.clone()),
327 _ => truncate_field(format!("{:?}", v)),
328 });
329
330 if let Some(owner_name) = owner {
331 if !owner_name.is_empty() {
332 vec![Party {
333 r#type: Some("person".to_string()),
334 role: Some("owner".to_string()),
335 name: Some(owner_name),
336 email: None,
337 url: None,
338 organization: None,
339 organization_url: None,
340 timezone: None,
341 }]
342 } else {
343 Vec::new()
344 }
345 } else {
346 Vec::new()
347 }
348}
349
350fn extract_file_references(yaml: &yaml_serde::Mapping) -> Vec<FileReference> {
352 let about_resource = yaml
353 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
354 .and_then(|v| v.as_str());
355 let license_file = yaml
356 .get(Value::String("license_file".to_string()))
357 .and_then(|v| v.as_str());
358 let notice_file = yaml
359 .get(Value::String("notice_file".to_string()))
360 .and_then(|v| v.as_str());
361
362 let mut refs = Vec::new();
363
364 if let Some(path) = about_resource {
365 refs.push(FileReference {
366 path: truncate_field(path.to_string()),
367 size: None,
368 sha1: None,
369 md5: None,
370 sha256: None,
371 sha512: None,
372 extra_data: None,
373 });
374 }
375
376 for path in [license_file, notice_file].into_iter().flatten() {
377 refs.push(FileReference {
378 path: truncate_field(path.to_string()),
379 size: None,
380 sha1: None,
381 md5: None,
382 sha256: None,
383 sha512: None,
384 extra_data: None,
385 });
386 }
387
388 refs
389}
390
391fn default_package_data() -> PackageData {
393 PackageData {
394 package_type: Some(PackageType::About),
395 datasource_id: Some(DatasourceId::AboutFile),
396 ..Default::default()
397 }
398}
399
400fn infer_about_from_download_url(
401 download_url: Option<&str>,
402 about_name: Option<&str>,
403 about_version: Option<&str>,
404) -> Option<InferredAboutIdentity> {
405 let url = Url::parse(download_url?).ok()?;
406 let host = url.host_str()?;
407
408 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
409 let name = about_name.map(str::to_string)?;
410 let version = about_version.map(str::to_string);
411 return Some(InferredAboutIdentity {
412 package_type: PackageType::Pypi,
413 namespace: None,
414 name: Some(name),
415 version,
416 });
417 }
418
419 if matches!(host, "raw.githubusercontent.com" | "github.com") {
420 let mut segments = url.path_segments()?;
421 let owner = segments.next()?.to_string();
422 let repo = segments.next()?.to_string();
423 return Some(InferredAboutIdentity {
424 package_type: PackageType::Github,
425 namespace: Some(owner),
426 name: Some(repo),
427 version: None,
428 });
429 }
430
431 None
432}
433
434fn build_about_purl(
435 package_type: PackageType,
436 namespace: Option<&str>,
437 name: Option<&str>,
438 version: Option<&str>,
439) -> Option<String> {
440 if package_type == PackageType::About {
441 return None;
442 }
443
444 let name = name?;
445 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
446 if let Some(namespace) = namespace {
447 purl.with_namespace(namespace).ok()?;
448 }
449 if let Some(version) = version {
450 purl.with_version(version).ok()?;
451 }
452 Some(purl.to_string())
453}
454
455fn build_extra_data(
456 yaml: &yaml_serde::Mapping,
457) -> Option<std::collections::HashMap<String, serde_json::Value>> {
458 let mut extra_data = std::collections::HashMap::new();
459 for key in ["license_file", "notice_file", "notes"] {
460 if let Some(value) = yaml.get(Value::String(key.to_string()))
461 && let Some(value) = yaml_value_to_string(value)
462 {
463 extra_data.insert(
464 key.to_string(),
465 serde_json::Value::String(truncate_field(value)),
466 );
467 }
468 }
469 (!extra_data.is_empty()).then_some(extra_data)
470}
471
472crate::register_parser!(
473 "AboutCode .ABOUT metadata file",
474 &["**/*.ABOUT"],
475 "about",
476 "",
477 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
478);