1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
25use log::warn;
26use packageurl::PackageUrl;
27use serde_yaml::Value;
28use std::fs;
29use std::path::Path;
30use std::str::FromStr;
31use url::Url;
32
33use super::PackageParser;
34
35const FIELD_TYPE: &str = "type";
36const FIELD_PURL: &str = "purl";
37const FIELD_PACKAGE_URL: &str = "package_url";
38const FIELD_NAMESPACE: &str = "namespace";
39const FIELD_NAME: &str = "name";
40const FIELD_VERSION: &str = "version";
41const FIELD_HOME_URL: &str = "home_url";
42const FIELD_HOMEPAGE_URL: &str = "homepage_url";
43const FIELD_DOWNLOAD_URL: &str = "download_url";
44const FIELD_COPYRIGHT: &str = "copyright";
45const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
46const FIELD_OWNER: &str = "owner";
47const FIELD_ABOUT_RESOURCE: &str = "about_resource";
48
49pub struct AboutFileParser;
54
55#[derive(Clone)]
56struct InferredAboutIdentity {
57 package_type: PackageType,
58 namespace: Option<String>,
59 name: Option<String>,
60 version: Option<String>,
61}
62
63impl PackageParser for AboutFileParser {
64 const PACKAGE_TYPE: PackageType = PackageType::About;
65
66 fn extract_packages(path: &Path) -> Vec<PackageData> {
67 let yaml = match read_and_parse_yaml(path) {
68 Ok(yaml) => yaml,
69 Err(e) => {
70 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
71 return vec![default_package_data()];
72 }
73 };
74
75 let about_type = yaml
77 .get(FIELD_TYPE)
78 .and_then(|v| v.as_str())
79 .map(String::from);
80
81 let about_namespace = yaml
82 .get(FIELD_NAMESPACE)
83 .and_then(|v| v.as_str())
84 .map(String::from);
85
86 let purl_string = yaml
87 .get(FIELD_PURL)
88 .and_then(|v| v.as_str())
89 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
90 .map(String::from);
91
92 let (purl_type, purl_namespace, purl_name, purl_version) =
94 if let Some(ref purl_str) = purl_string {
95 match PackageUrl::from_str(purl_str) {
96 Ok(purl) => (
97 Some(purl.ty().to_string()),
98 purl.namespace().map(String::from),
99 Some(purl.name().to_string()),
100 purl.version().map(String::from),
101 ),
102 Err(e) => {
103 warn!("Failed to parse purl '{}': {}", purl_str, e);
104 (None, None, None, None)
105 }
106 }
107 } else {
108 (None, None, None, None)
109 };
110
111 let inferred = infer_about_from_download_url(
112 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
113 yaml.get(FIELD_NAME)
114 .and_then(yaml_value_to_string)
115 .as_deref(),
116 yaml.get(FIELD_VERSION)
117 .and_then(yaml_value_to_string)
118 .as_deref(),
119 );
120
121 let package_type = about_type
122 .clone()
123 .or(purl_type)
124 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
125 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
126 .unwrap_or(Self::PACKAGE_TYPE);
127
128 let namespace = about_namespace
130 .clone()
131 .or(purl_namespace.clone())
132 .or_else(|| {
133 inferred
134 .as_ref()
135 .and_then(|identity| identity.namespace.clone())
136 });
137
138 let name = yaml
140 .get(FIELD_NAME)
141 .and_then(yaml_value_to_string)
142 .or(purl_name.clone())
143 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
144
145 let version = yaml
146 .get(FIELD_VERSION)
147 .and_then(yaml_value_to_string)
148 .or(purl_version.clone())
149 .or_else(|| {
150 inferred
151 .as_ref()
152 .and_then(|identity| identity.version.clone())
153 });
154
155 let homepage_url = yaml
157 .get(FIELD_HOME_URL)
158 .and_then(|v| v.as_str())
159 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
160 .map(String::from);
161
162 let download_url = yaml
163 .get(FIELD_DOWNLOAD_URL)
164 .and_then(|v| v.as_str())
165 .map(String::from);
166
167 let copyright = yaml
168 .get(FIELD_COPYRIGHT)
169 .and_then(|v| v.as_str())
170 .map(String::from);
171
172 let extracted_license_statement = yaml
173 .get(FIELD_LICENSE_EXPRESSION)
174 .and_then(|v| v.as_str())
175 .map(String::from);
176
177 let vcs_url = yaml
178 .get(Value::String("vcs_url".to_string()))
179 .and_then(|v| v.as_str())
180 .map(String::from);
181
182 let extra_data = build_extra_data(&yaml);
183
184 let purl = purl_string.or_else(|| {
185 let name = yaml
186 .get(FIELD_NAME)
187 .and_then(yaml_value_to_string)
188 .or(purl_name.clone())
189 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
190 let version = yaml
191 .get(FIELD_VERSION)
192 .and_then(yaml_value_to_string)
193 .or(purl_version.clone())
194 .or_else(|| {
195 inferred
196 .as_ref()
197 .and_then(|identity| identity.version.clone())
198 });
199 let namespace = about_namespace.clone().or_else(|| {
200 inferred
201 .as_ref()
202 .and_then(|identity| identity.namespace.clone())
203 });
204 build_about_purl(
205 package_type,
206 namespace.as_deref(),
207 name.as_deref(),
208 version.as_deref(),
209 )
210 });
211
212 let parties = extract_owner_party(&yaml);
214
215 let file_references = extract_file_references(&yaml);
217
218 vec![PackageData {
219 package_type: Some(package_type),
220 namespace,
221 name,
222 version,
223 qualifiers: None,
224 subpath: None,
225 primary_language: None,
226 description: None,
227 release_date: None,
228 parties,
229 keywords: Vec::new(),
230 homepage_url,
231 download_url,
232 size: None,
233 sha1: None,
234 md5: None,
235 sha256: None,
236 sha512: None,
237 bug_tracking_url: None,
238 code_view_url: None,
239 vcs_url,
240 copyright,
241 holder: None,
242 declared_license_expression: None,
243 declared_license_expression_spdx: None,
244 license_detections: Vec::new(),
245 other_license_expression: None,
246 other_license_expression_spdx: None,
247 other_license_detections: Vec::new(),
248 extracted_license_statement,
249 notice_text: None,
250 source_packages: Vec::new(),
251 file_references,
252 is_private: false,
253 is_virtual: false,
254 extra_data,
255 dependencies: Vec::new(),
256 repository_homepage_url: None,
257 repository_download_url: None,
258 api_data_url: None,
259 datasource_id: Some(DatasourceId::AboutFile),
260 purl,
261 }]
262 }
263
264 fn is_match(path: &Path) -> bool {
265 path.extension()
266 .and_then(|ext| ext.to_str())
267 .is_some_and(|ext| ext == "ABOUT")
268 }
269}
270
271fn read_and_parse_yaml(path: &Path) -> Result<serde_yaml::Mapping, String> {
273 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
274
275 let value: Value =
276 serde_yaml::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
277
278 match value {
279 Value::Mapping(map) => Ok(map),
280 _ => Err("Expected YAML mapping at root".to_string()),
281 }
282}
283
284fn yaml_value_to_string(value: &Value) -> Option<String> {
286 match value {
287 Value::String(s) => Some(s.clone()),
288 Value::Number(n) => Some(n.to_string()),
289 Value::Bool(b) => Some(b.to_string()),
290 _ => None,
291 }
292}
293
294fn extract_owner_party(yaml: &serde_yaml::Mapping) -> Vec<Party> {
296 let owner = yaml
297 .get(Value::String(FIELD_OWNER.to_string()))
298 .map(|v| match v {
299 Value::String(s) => s.clone(),
300 _ => {
301 format!("{:?}", v)
303 }
304 });
305
306 if let Some(owner_name) = owner {
307 if !owner_name.is_empty() {
308 vec![Party {
309 r#type: Some("person".to_string()),
310 role: Some("owner".to_string()),
311 name: Some(owner_name),
312 email: None,
313 url: None,
314 organization: None,
315 organization_url: None,
316 timezone: None,
317 }]
318 } else {
319 Vec::new()
320 }
321 } else {
322 Vec::new()
323 }
324}
325
326fn extract_file_references(yaml: &serde_yaml::Mapping) -> Vec<FileReference> {
328 let about_resource = yaml
329 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
330 .and_then(|v| v.as_str());
331 let license_file = yaml
332 .get(Value::String("license_file".to_string()))
333 .and_then(|v| v.as_str());
334 let notice_file = yaml
335 .get(Value::String("notice_file".to_string()))
336 .and_then(|v| v.as_str());
337
338 let mut refs = Vec::new();
339
340 if let Some(path) = about_resource {
341 refs.push(FileReference {
342 path: path.to_string(),
343 size: None,
344 sha1: None,
345 md5: None,
346 sha256: None,
347 sha512: None,
348 extra_data: None,
349 });
350 }
351
352 for path in [license_file, notice_file].into_iter().flatten() {
353 refs.push(FileReference {
354 path: path.to_string(),
355 size: None,
356 sha1: None,
357 md5: None,
358 sha256: None,
359 sha512: None,
360 extra_data: None,
361 });
362 }
363
364 refs
365}
366
367fn default_package_data() -> PackageData {
369 PackageData {
370 package_type: Some(PackageType::About),
371 datasource_id: Some(DatasourceId::AboutFile),
372 ..Default::default()
373 }
374}
375
376fn infer_about_from_download_url(
377 download_url: Option<&str>,
378 about_name: Option<&str>,
379 about_version: Option<&str>,
380) -> Option<InferredAboutIdentity> {
381 let url = Url::parse(download_url?).ok()?;
382 let host = url.host_str()?;
383
384 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
385 let name = about_name.map(str::to_string)?;
386 let version = about_version.map(str::to_string);
387 return Some(InferredAboutIdentity {
388 package_type: PackageType::Pypi,
389 namespace: None,
390 name: Some(name),
391 version,
392 });
393 }
394
395 if matches!(host, "raw.githubusercontent.com" | "github.com") {
396 let mut segments = url.path_segments()?;
397 let owner = segments.next()?.to_string();
398 let repo = segments.next()?.to_string();
399 return Some(InferredAboutIdentity {
400 package_type: PackageType::Github,
401 namespace: Some(owner),
402 name: Some(repo),
403 version: None,
404 });
405 }
406
407 None
408}
409
410fn build_about_purl(
411 package_type: PackageType,
412 namespace: Option<&str>,
413 name: Option<&str>,
414 version: Option<&str>,
415) -> Option<String> {
416 if package_type == PackageType::About {
417 return None;
418 }
419
420 let name = name?;
421 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
422 if let Some(namespace) = namespace {
423 purl.with_namespace(namespace).ok()?;
424 }
425 if let Some(version) = version {
426 purl.with_version(version).ok()?;
427 }
428 Some(purl.to_string())
429}
430
431fn build_extra_data(
432 yaml: &serde_yaml::Mapping,
433) -> Option<std::collections::HashMap<String, serde_json::Value>> {
434 let mut extra_data = std::collections::HashMap::new();
435 for key in ["license_file", "notice_file", "notes"] {
436 if let Some(value) = yaml.get(Value::String(key.to_string()))
437 && let Some(value) = yaml_value_to_string(value)
438 {
439 extra_data.insert(key.to_string(), serde_json::Value::String(value));
440 }
441 }
442 (!extra_data.is_empty()).then_some(extra_data)
443}
444
445crate::register_parser!(
446 "AboutCode .ABOUT metadata file",
447 &["**/*.ABOUT"],
448 "about",
449 "",
450 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
451);