1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use packageurl::PackageUrl;
27use serde_yaml::Value;
28use std::fs;
29use std::path::Path;
30use std::str::FromStr;
31use url::Url;
32
33use super::PackageParser;
34use super::license_normalization::{
35 DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_spdx_declared_license,
36 normalize_spdx_expression,
37};
38
39const FIELD_TYPE: &str = "type";
40const FIELD_PURL: &str = "purl";
41const FIELD_PACKAGE_URL: &str = "package_url";
42const FIELD_NAMESPACE: &str = "namespace";
43const FIELD_NAME: &str = "name";
44const FIELD_VERSION: &str = "version";
45const FIELD_HOME_URL: &str = "home_url";
46const FIELD_HOMEPAGE_URL: &str = "homepage_url";
47const FIELD_DOWNLOAD_URL: &str = "download_url";
48const FIELD_COPYRIGHT: &str = "copyright";
49const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
50const FIELD_OWNER: &str = "owner";
51const FIELD_ABOUT_RESOURCE: &str = "about_resource";
52
53pub struct AboutFileParser;
58
59#[derive(Clone)]
60struct InferredAboutIdentity {
61 package_type: PackageType,
62 namespace: Option<String>,
63 name: Option<String>,
64 version: Option<String>,
65}
66
67impl PackageParser for AboutFileParser {
68 const PACKAGE_TYPE: PackageType = PackageType::About;
69
70 fn extract_packages(path: &Path) -> Vec<PackageData> {
71 let yaml = match read_and_parse_yaml(path) {
72 Ok(yaml) => yaml,
73 Err(e) => {
74 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
75 return vec![default_package_data()];
76 }
77 };
78
79 let about_type = yaml
81 .get(FIELD_TYPE)
82 .and_then(|v| v.as_str())
83 .map(String::from);
84
85 let about_namespace = yaml
86 .get(FIELD_NAMESPACE)
87 .and_then(|v| v.as_str())
88 .map(String::from);
89
90 let purl_string = yaml
91 .get(FIELD_PURL)
92 .and_then(|v| v.as_str())
93 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
94 .map(String::from);
95
96 let (purl_type, purl_namespace, purl_name, purl_version) =
98 if let Some(ref purl_str) = purl_string {
99 match PackageUrl::from_str(purl_str) {
100 Ok(purl) => (
101 Some(purl.ty().to_string()),
102 purl.namespace().map(String::from),
103 Some(purl.name().to_string()),
104 purl.version().map(String::from),
105 ),
106 Err(e) => {
107 warn!("Failed to parse purl '{}': {}", purl_str, e);
108 (None, None, None, None)
109 }
110 }
111 } else {
112 (None, None, None, None)
113 };
114
115 let inferred = infer_about_from_download_url(
116 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
117 yaml.get(FIELD_NAME)
118 .and_then(yaml_value_to_string)
119 .as_deref(),
120 yaml.get(FIELD_VERSION)
121 .and_then(yaml_value_to_string)
122 .as_deref(),
123 );
124
125 let package_type = about_type
126 .clone()
127 .or(purl_type)
128 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
129 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
130 .unwrap_or(Self::PACKAGE_TYPE);
131
132 let namespace = about_namespace
134 .clone()
135 .or(purl_namespace.clone())
136 .or_else(|| {
137 inferred
138 .as_ref()
139 .and_then(|identity| identity.namespace.clone())
140 });
141
142 let name = yaml
144 .get(FIELD_NAME)
145 .and_then(yaml_value_to_string)
146 .or(purl_name.clone())
147 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
148
149 let version = yaml
150 .get(FIELD_VERSION)
151 .and_then(yaml_value_to_string)
152 .or(purl_version.clone())
153 .or_else(|| {
154 inferred
155 .as_ref()
156 .and_then(|identity| identity.version.clone())
157 });
158
159 let homepage_url = yaml
161 .get(FIELD_HOME_URL)
162 .and_then(|v| v.as_str())
163 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
164 .map(String::from);
165
166 let download_url = yaml
167 .get(FIELD_DOWNLOAD_URL)
168 .and_then(|v| v.as_str())
169 .map(String::from);
170
171 let copyright = yaml
172 .get(FIELD_COPYRIGHT)
173 .and_then(|v| v.as_str())
174 .map(String::from);
175
176 let extracted_license_statement = yaml
177 .get(FIELD_LICENSE_EXPRESSION)
178 .and_then(|v| v.as_str())
179 .map(String::from);
180 let file_references = extract_file_references(&yaml);
181 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
182 extracted_license_statement
183 .as_deref()
184 .and_then(normalize_spdx_expression)
185 .map(|normalized| {
186 build_declared_license_data(
187 normalized,
188 DeclaredLicenseMatchMetadata::single_line(
189 extracted_license_statement.as_deref().unwrap_or_default(),
190 ),
191 )
192 })
193 .unwrap_or_else(|| {
194 normalize_spdx_declared_license(extracted_license_statement.as_deref())
195 });
196
197 let vcs_url = yaml
198 .get(Value::String("vcs_url".to_string()))
199 .and_then(|v| v.as_str())
200 .map(String::from);
201
202 let extra_data = build_extra_data(&yaml);
203
204 let purl = purl_string.or_else(|| {
205 let name = yaml
206 .get(FIELD_NAME)
207 .and_then(yaml_value_to_string)
208 .or(purl_name.clone())
209 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
210 let version = yaml
211 .get(FIELD_VERSION)
212 .and_then(yaml_value_to_string)
213 .or(purl_version.clone())
214 .or_else(|| {
215 inferred
216 .as_ref()
217 .and_then(|identity| identity.version.clone())
218 });
219 let namespace = about_namespace.clone().or_else(|| {
220 inferred
221 .as_ref()
222 .and_then(|identity| identity.namespace.clone())
223 });
224 build_about_purl(
225 package_type,
226 namespace.as_deref(),
227 name.as_deref(),
228 version.as_deref(),
229 )
230 });
231
232 let parties = extract_owner_party(&yaml);
234
235 vec![PackageData {
237 package_type: Some(package_type),
238 namespace,
239 name,
240 version,
241 qualifiers: None,
242 subpath: None,
243 primary_language: None,
244 description: None,
245 release_date: None,
246 parties,
247 keywords: Vec::new(),
248 homepage_url,
249 download_url,
250 size: None,
251 sha1: None,
252 md5: None,
253 sha256: None,
254 sha512: None,
255 bug_tracking_url: None,
256 code_view_url: None,
257 vcs_url,
258 copyright,
259 holder: None,
260 declared_license_expression,
261 declared_license_expression_spdx,
262 license_detections,
263 other_license_expression: None,
264 other_license_expression_spdx: None,
265 other_license_detections: Vec::new(),
266 extracted_license_statement,
267 notice_text: None,
268 source_packages: Vec::new(),
269 file_references,
270 is_private: false,
271 is_virtual: false,
272 extra_data,
273 dependencies: Vec::new(),
274 repository_homepage_url: None,
275 repository_download_url: None,
276 api_data_url: None,
277 datasource_id: Some(DatasourceId::AboutFile),
278 purl,
279 }]
280 }
281
282 fn is_match(path: &Path) -> bool {
283 path.extension()
284 .and_then(|ext| ext.to_str())
285 .is_some_and(|ext| ext == "ABOUT")
286 }
287}
288
289fn read_and_parse_yaml(path: &Path) -> Result<serde_yaml::Mapping, String> {
291 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
292
293 let value: Value =
294 serde_yaml::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
295
296 match value {
297 Value::Mapping(map) => Ok(map),
298 _ => Err("Expected YAML mapping at root".to_string()),
299 }
300}
301
302fn yaml_value_to_string(value: &Value) -> Option<String> {
304 match value {
305 Value::String(s) => Some(s.clone()),
306 Value::Number(n) => Some(n.to_string()),
307 Value::Bool(b) => Some(b.to_string()),
308 _ => None,
309 }
310}
311
312fn extract_owner_party(yaml: &serde_yaml::Mapping) -> Vec<Party> {
314 let owner = yaml
315 .get(Value::String(FIELD_OWNER.to_string()))
316 .map(|v| match v {
317 Value::String(s) => s.clone(),
318 _ => {
319 format!("{:?}", v)
321 }
322 });
323
324 if let Some(owner_name) = owner {
325 if !owner_name.is_empty() {
326 vec![Party {
327 r#type: Some("person".to_string()),
328 role: Some("owner".to_string()),
329 name: Some(owner_name),
330 email: None,
331 url: None,
332 organization: None,
333 organization_url: None,
334 timezone: None,
335 }]
336 } else {
337 Vec::new()
338 }
339 } else {
340 Vec::new()
341 }
342}
343
344fn extract_file_references(yaml: &serde_yaml::Mapping) -> Vec<FileReference> {
346 let about_resource = yaml
347 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
348 .and_then(|v| v.as_str());
349 let license_file = yaml
350 .get(Value::String("license_file".to_string()))
351 .and_then(|v| v.as_str());
352 let notice_file = yaml
353 .get(Value::String("notice_file".to_string()))
354 .and_then(|v| v.as_str());
355
356 let mut refs = Vec::new();
357
358 if let Some(path) = about_resource {
359 refs.push(FileReference {
360 path: path.to_string(),
361 size: None,
362 sha1: None,
363 md5: None,
364 sha256: None,
365 sha512: None,
366 extra_data: None,
367 });
368 }
369
370 for path in [license_file, notice_file].into_iter().flatten() {
371 refs.push(FileReference {
372 path: path.to_string(),
373 size: None,
374 sha1: None,
375 md5: None,
376 sha256: None,
377 sha512: None,
378 extra_data: None,
379 });
380 }
381
382 refs
383}
384
385fn default_package_data() -> PackageData {
387 PackageData {
388 package_type: Some(PackageType::About),
389 datasource_id: Some(DatasourceId::AboutFile),
390 ..Default::default()
391 }
392}
393
394fn infer_about_from_download_url(
395 download_url: Option<&str>,
396 about_name: Option<&str>,
397 about_version: Option<&str>,
398) -> Option<InferredAboutIdentity> {
399 let url = Url::parse(download_url?).ok()?;
400 let host = url.host_str()?;
401
402 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
403 let name = about_name.map(str::to_string)?;
404 let version = about_version.map(str::to_string);
405 return Some(InferredAboutIdentity {
406 package_type: PackageType::Pypi,
407 namespace: None,
408 name: Some(name),
409 version,
410 });
411 }
412
413 if matches!(host, "raw.githubusercontent.com" | "github.com") {
414 let mut segments = url.path_segments()?;
415 let owner = segments.next()?.to_string();
416 let repo = segments.next()?.to_string();
417 return Some(InferredAboutIdentity {
418 package_type: PackageType::Github,
419 namespace: Some(owner),
420 name: Some(repo),
421 version: None,
422 });
423 }
424
425 None
426}
427
428fn build_about_purl(
429 package_type: PackageType,
430 namespace: Option<&str>,
431 name: Option<&str>,
432 version: Option<&str>,
433) -> Option<String> {
434 if package_type == PackageType::About {
435 return None;
436 }
437
438 let name = name?;
439 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
440 if let Some(namespace) = namespace {
441 purl.with_namespace(namespace).ok()?;
442 }
443 if let Some(version) = version {
444 purl.with_version(version).ok()?;
445 }
446 Some(purl.to_string())
447}
448
449fn build_extra_data(
450 yaml: &serde_yaml::Mapping,
451) -> Option<std::collections::HashMap<String, serde_json::Value>> {
452 let mut extra_data = std::collections::HashMap::new();
453 for key in ["license_file", "notice_file", "notes"] {
454 if let Some(value) = yaml.get(Value::String(key.to_string()))
455 && let Some(value) = yaml_value_to_string(value)
456 {
457 extra_data.insert(key.to_string(), serde_json::Value::String(value));
458 }
459 }
460 (!extra_data.is_empty()).then_some(extra_data)
461}
462
463crate::register_parser!(
464 "AboutCode .ABOUT metadata file",
465 &["**/*.ABOUT"],
466 "about",
467 "",
468 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
469);