provenant/parsers/
citation.rs1use std::path::Path;
2
3use crate::models::{DatasourceId, PackageData, PackageType, Party};
4use crate::parser_warn as warn;
5
6use super::PackageParser;
7use super::license_normalization::normalize_spdx_declared_license;
8use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9
10pub struct CitationCffParser;
11
12impl PackageParser for CitationCffParser {
13 const PACKAGE_TYPE: PackageType = PackageType::Generic;
14
15 fn is_match(path: &Path) -> bool {
16 path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
17 }
18
19 fn extract_packages(path: &Path) -> Vec<PackageData> {
20 let content = match read_file_to_string(path, None) {
21 Ok(content) => content,
22 Err(error) => {
23 warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
24 return vec![default_package_data()];
25 }
26 };
27
28 let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
29 Ok(yaml) => yaml,
30 Err(error) => {
31 warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
32 return vec![default_package_data()];
33 }
34 };
35
36 vec![parse_citation_cff(&yaml)]
37 }
38}
39
40fn default_package_data() -> PackageData {
41 PackageData {
42 package_type: Some(CitationCffParser::PACKAGE_TYPE),
43 datasource_id: Some(DatasourceId::CitationCff),
44 ..Default::default()
45 }
46}
47
48fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
49 if yaml
50 .get("cff-version")
51 .and_then(yaml_serde::Value::as_str)
52 .is_none()
53 {
54 return default_package_data();
55 }
56
57 let mut package = default_package_data();
58 package.name = yaml
59 .get("title")
60 .and_then(yaml_serde::Value::as_str)
61 .map(|s| truncate_field(s.to_string()));
62 package.version = yaml
63 .get("version")
64 .and_then(yaml_serde::Value::as_str)
65 .map(|s| truncate_field(s.to_string()));
66 package.description = yaml
67 .get("abstract")
68 .and_then(yaml_serde::Value::as_str)
69 .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
70 .map(|s| truncate_field(s.to_string()));
71 package.homepage_url = yaml
72 .get("url")
73 .and_then(yaml_serde::Value::as_str)
74 .map(|s| truncate_field(s.to_string()));
75 package.vcs_url = yaml
76 .get("repository-code")
77 .and_then(yaml_serde::Value::as_str)
78 .map(|s| truncate_field(s.to_string()));
79 package.parties = extract_author_parties(yaml.get("authors"));
80
81 if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
82 let license = truncate_field(license.to_string());
83 package.extracted_license_statement = Some(license.clone());
84 let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
85 package.declared_license_expression = declared;
86 package.declared_license_expression_spdx = declared_spdx;
87 package.license_detections = detections;
88 }
89
90 package
91}
92
93fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
94 value
95 .and_then(yaml_serde::Value::as_sequence)
96 .into_iter()
97 .flatten()
98 .take(MAX_ITERATION_COUNT)
99 .filter_map(|entry| {
100 let name = entry
101 .get("name")
102 .and_then(yaml_serde::Value::as_str)
103 .map(|s| truncate_field(s.to_string()))
104 .or_else(|| {
105 let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
106 let family = entry
107 .get("family-names")
108 .and_then(yaml_serde::Value::as_str);
109 match (given, family) {
110 (Some(given), Some(family)) => {
111 Some(truncate_field(format!("{given} {family}")))
112 }
113 (Some(given), None) => Some(truncate_field(given.to_string())),
114 (None, Some(family)) => Some(truncate_field(family.to_string())),
115 (None, None) => None,
116 }
117 });
118 let email = entry
119 .get("email")
120 .and_then(yaml_serde::Value::as_str)
121 .map(|s| truncate_field(s.to_string()));
122 let url = entry
123 .get("orcid")
124 .and_then(yaml_serde::Value::as_str)
125 .map(|s| truncate_field(s.to_string()));
126
127 if name.is_none() && email.is_none() && url.is_none() {
128 return None;
129 }
130
131 Some(Party {
132 r#type: Some("person".to_string()),
133 role: Some("author".to_string()),
134 name,
135 email,
136 url,
137 organization: None,
138 organization_url: None,
139 timezone: None,
140 })
141 })
142 .collect()
143}
144
145crate::register_parser!(
146 "citation cff metadata",
147 &["**/CITATION.cff"],
148 "generic",
149 "Text",
150 Some("https://citation-file-format.github.io/"),
151);