1use crate::models::{DatasourceId, FileReference, PackageData, PackageType, Party};
25use log::warn;
26use packageurl::PackageUrl;
27use serde_yaml::Value;
28use std::fs;
29use std::path::Path;
30use std::str::FromStr;
31use url::Url;
32
33use super::PackageParser;
34use super::license_normalization::normalize_spdx_declared_license;
35
36const FIELD_TYPE: &str = "type";
37const FIELD_PURL: &str = "purl";
38const FIELD_PACKAGE_URL: &str = "package_url";
39const FIELD_NAMESPACE: &str = "namespace";
40const FIELD_NAME: &str = "name";
41const FIELD_VERSION: &str = "version";
42const FIELD_HOME_URL: &str = "home_url";
43const FIELD_HOMEPAGE_URL: &str = "homepage_url";
44const FIELD_DOWNLOAD_URL: &str = "download_url";
45const FIELD_COPYRIGHT: &str = "copyright";
46const FIELD_LICENSE_EXPRESSION: &str = "license_expression";
47const FIELD_OWNER: &str = "owner";
48const FIELD_ABOUT_RESOURCE: &str = "about_resource";
49
50pub struct AboutFileParser;
55
56#[derive(Clone)]
57struct InferredAboutIdentity {
58 package_type: PackageType,
59 namespace: Option<String>,
60 name: Option<String>,
61 version: Option<String>,
62}
63
64impl PackageParser for AboutFileParser {
65 const PACKAGE_TYPE: PackageType = PackageType::About;
66
67 fn extract_packages(path: &Path) -> Vec<PackageData> {
68 let yaml = match read_and_parse_yaml(path) {
69 Ok(yaml) => yaml,
70 Err(e) => {
71 warn!("Failed to read or parse .ABOUT file at {:?}: {}", path, e);
72 return vec![default_package_data()];
73 }
74 };
75
76 let about_type = yaml
78 .get(FIELD_TYPE)
79 .and_then(|v| v.as_str())
80 .map(String::from);
81
82 let about_namespace = yaml
83 .get(FIELD_NAMESPACE)
84 .and_then(|v| v.as_str())
85 .map(String::from);
86
87 let purl_string = yaml
88 .get(FIELD_PURL)
89 .and_then(|v| v.as_str())
90 .or_else(|| yaml.get(FIELD_PACKAGE_URL).and_then(|v| v.as_str()))
91 .map(String::from);
92
93 let (purl_type, purl_namespace, purl_name, purl_version) =
95 if let Some(ref purl_str) = purl_string {
96 match PackageUrl::from_str(purl_str) {
97 Ok(purl) => (
98 Some(purl.ty().to_string()),
99 purl.namespace().map(String::from),
100 Some(purl.name().to_string()),
101 purl.version().map(String::from),
102 ),
103 Err(e) => {
104 warn!("Failed to parse purl '{}': {}", purl_str, e);
105 (None, None, None, None)
106 }
107 }
108 } else {
109 (None, None, None, None)
110 };
111
112 let inferred = infer_about_from_download_url(
113 yaml.get(FIELD_DOWNLOAD_URL).and_then(|v| v.as_str()),
114 yaml.get(FIELD_NAME)
115 .and_then(yaml_value_to_string)
116 .as_deref(),
117 yaml.get(FIELD_VERSION)
118 .and_then(yaml_value_to_string)
119 .as_deref(),
120 );
121
122 let package_type = about_type
123 .clone()
124 .or(purl_type)
125 .and_then(|s| s.parse::<crate::models::PackageType>().ok())
126 .or_else(|| inferred.as_ref().map(|identity| identity.package_type))
127 .unwrap_or(Self::PACKAGE_TYPE);
128
129 let namespace = about_namespace
131 .clone()
132 .or(purl_namespace.clone())
133 .or_else(|| {
134 inferred
135 .as_ref()
136 .and_then(|identity| identity.namespace.clone())
137 });
138
139 let name = yaml
141 .get(FIELD_NAME)
142 .and_then(yaml_value_to_string)
143 .or(purl_name.clone())
144 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
145
146 let version = yaml
147 .get(FIELD_VERSION)
148 .and_then(yaml_value_to_string)
149 .or(purl_version.clone())
150 .or_else(|| {
151 inferred
152 .as_ref()
153 .and_then(|identity| identity.version.clone())
154 });
155
156 let homepage_url = yaml
158 .get(FIELD_HOME_URL)
159 .and_then(|v| v.as_str())
160 .or_else(|| yaml.get(FIELD_HOMEPAGE_URL).and_then(|v| v.as_str()))
161 .map(String::from);
162
163 let download_url = yaml
164 .get(FIELD_DOWNLOAD_URL)
165 .and_then(|v| v.as_str())
166 .map(String::from);
167
168 let copyright = yaml
169 .get(FIELD_COPYRIGHT)
170 .and_then(|v| v.as_str())
171 .map(String::from);
172
173 let extracted_license_statement = yaml
174 .get(FIELD_LICENSE_EXPRESSION)
175 .and_then(|v| v.as_str())
176 .map(String::from);
177 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
178 normalize_spdx_declared_license(extracted_license_statement.as_deref());
179
180 let vcs_url = yaml
181 .get(Value::String("vcs_url".to_string()))
182 .and_then(|v| v.as_str())
183 .map(String::from);
184
185 let extra_data = build_extra_data(&yaml);
186
187 let purl = purl_string.or_else(|| {
188 let name = yaml
189 .get(FIELD_NAME)
190 .and_then(yaml_value_to_string)
191 .or(purl_name.clone())
192 .or_else(|| inferred.as_ref().and_then(|identity| identity.name.clone()));
193 let version = yaml
194 .get(FIELD_VERSION)
195 .and_then(yaml_value_to_string)
196 .or(purl_version.clone())
197 .or_else(|| {
198 inferred
199 .as_ref()
200 .and_then(|identity| identity.version.clone())
201 });
202 let namespace = about_namespace.clone().or_else(|| {
203 inferred
204 .as_ref()
205 .and_then(|identity| identity.namespace.clone())
206 });
207 build_about_purl(
208 package_type,
209 namespace.as_deref(),
210 name.as_deref(),
211 version.as_deref(),
212 )
213 });
214
215 let parties = extract_owner_party(&yaml);
217
218 let file_references = extract_file_references(&yaml);
220
221 vec![PackageData {
222 package_type: Some(package_type),
223 namespace,
224 name,
225 version,
226 qualifiers: None,
227 subpath: None,
228 primary_language: None,
229 description: None,
230 release_date: None,
231 parties,
232 keywords: Vec::new(),
233 homepage_url,
234 download_url,
235 size: None,
236 sha1: None,
237 md5: None,
238 sha256: None,
239 sha512: None,
240 bug_tracking_url: None,
241 code_view_url: None,
242 vcs_url,
243 copyright,
244 holder: None,
245 declared_license_expression,
246 declared_license_expression_spdx,
247 license_detections,
248 other_license_expression: None,
249 other_license_expression_spdx: None,
250 other_license_detections: Vec::new(),
251 extracted_license_statement,
252 notice_text: None,
253 source_packages: Vec::new(),
254 file_references,
255 is_private: false,
256 is_virtual: false,
257 extra_data,
258 dependencies: Vec::new(),
259 repository_homepage_url: None,
260 repository_download_url: None,
261 api_data_url: None,
262 datasource_id: Some(DatasourceId::AboutFile),
263 purl,
264 }]
265 }
266
267 fn is_match(path: &Path) -> bool {
268 path.extension()
269 .and_then(|ext| ext.to_str())
270 .is_some_and(|ext| ext == "ABOUT")
271 }
272}
273
274fn read_and_parse_yaml(path: &Path) -> Result<serde_yaml::Mapping, String> {
276 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
277
278 let value: Value =
279 serde_yaml::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
280
281 match value {
282 Value::Mapping(map) => Ok(map),
283 _ => Err("Expected YAML mapping at root".to_string()),
284 }
285}
286
287fn yaml_value_to_string(value: &Value) -> Option<String> {
289 match value {
290 Value::String(s) => Some(s.clone()),
291 Value::Number(n) => Some(n.to_string()),
292 Value::Bool(b) => Some(b.to_string()),
293 _ => None,
294 }
295}
296
297fn extract_owner_party(yaml: &serde_yaml::Mapping) -> Vec<Party> {
299 let owner = yaml
300 .get(Value::String(FIELD_OWNER.to_string()))
301 .map(|v| match v {
302 Value::String(s) => s.clone(),
303 _ => {
304 format!("{:?}", v)
306 }
307 });
308
309 if let Some(owner_name) = owner {
310 if !owner_name.is_empty() {
311 vec![Party {
312 r#type: Some("person".to_string()),
313 role: Some("owner".to_string()),
314 name: Some(owner_name),
315 email: None,
316 url: None,
317 organization: None,
318 organization_url: None,
319 timezone: None,
320 }]
321 } else {
322 Vec::new()
323 }
324 } else {
325 Vec::new()
326 }
327}
328
329fn extract_file_references(yaml: &serde_yaml::Mapping) -> Vec<FileReference> {
331 let about_resource = yaml
332 .get(Value::String(FIELD_ABOUT_RESOURCE.to_string()))
333 .and_then(|v| v.as_str());
334 let license_file = yaml
335 .get(Value::String("license_file".to_string()))
336 .and_then(|v| v.as_str());
337 let notice_file = yaml
338 .get(Value::String("notice_file".to_string()))
339 .and_then(|v| v.as_str());
340
341 let mut refs = Vec::new();
342
343 if let Some(path) = about_resource {
344 refs.push(FileReference {
345 path: path.to_string(),
346 size: None,
347 sha1: None,
348 md5: None,
349 sha256: None,
350 sha512: None,
351 extra_data: None,
352 });
353 }
354
355 for path in [license_file, notice_file].into_iter().flatten() {
356 refs.push(FileReference {
357 path: path.to_string(),
358 size: None,
359 sha1: None,
360 md5: None,
361 sha256: None,
362 sha512: None,
363 extra_data: None,
364 });
365 }
366
367 refs
368}
369
370fn default_package_data() -> PackageData {
372 PackageData {
373 package_type: Some(PackageType::About),
374 datasource_id: Some(DatasourceId::AboutFile),
375 ..Default::default()
376 }
377}
378
379fn infer_about_from_download_url(
380 download_url: Option<&str>,
381 about_name: Option<&str>,
382 about_version: Option<&str>,
383) -> Option<InferredAboutIdentity> {
384 let url = Url::parse(download_url?).ok()?;
385 let host = url.host_str()?;
386
387 if matches!(host, "pypi.python.org" | "files.pythonhosted.org") {
388 let name = about_name.map(str::to_string)?;
389 let version = about_version.map(str::to_string);
390 return Some(InferredAboutIdentity {
391 package_type: PackageType::Pypi,
392 namespace: None,
393 name: Some(name),
394 version,
395 });
396 }
397
398 if matches!(host, "raw.githubusercontent.com" | "github.com") {
399 let mut segments = url.path_segments()?;
400 let owner = segments.next()?.to_string();
401 let repo = segments.next()?.to_string();
402 return Some(InferredAboutIdentity {
403 package_type: PackageType::Github,
404 namespace: Some(owner),
405 name: Some(repo),
406 version: None,
407 });
408 }
409
410 None
411}
412
413fn build_about_purl(
414 package_type: PackageType,
415 namespace: Option<&str>,
416 name: Option<&str>,
417 version: Option<&str>,
418) -> Option<String> {
419 if package_type == PackageType::About {
420 return None;
421 }
422
423 let name = name?;
424 let mut purl = PackageUrl::new(package_type.as_str(), name).ok()?;
425 if let Some(namespace) = namespace {
426 purl.with_namespace(namespace).ok()?;
427 }
428 if let Some(version) = version {
429 purl.with_version(version).ok()?;
430 }
431 Some(purl.to_string())
432}
433
434fn build_extra_data(
435 yaml: &serde_yaml::Mapping,
436) -> Option<std::collections::HashMap<String, serde_json::Value>> {
437 let mut extra_data = std::collections::HashMap::new();
438 for key in ["license_file", "notice_file", "notes"] {
439 if let Some(value) = yaml.get(Value::String(key.to_string()))
440 && let Some(value) = yaml_value_to_string(value)
441 {
442 extra_data.insert(key.to_string(), serde_json::Value::String(value));
443 }
444 }
445 (!extra_data.is_empty()).then_some(extra_data)
446}
447
448crate::register_parser!(
449 "AboutCode .ABOUT metadata file",
450 &["**/*.ABOUT"],
451 "about",
452 "",
453 Some("https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html"),
454);