provenant/parsers/
citation.rs1use std::path::Path;
5
6use crate::models::{DatasourceId, PackageData, PackageType, Party};
7use crate::parser_warn as warn;
8
9use super::PackageParser;
10use super::license_normalization::normalize_spdx_declared_license;
11use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13pub struct CitationCffParser;
14
15impl PackageParser for CitationCffParser {
16 const PACKAGE_TYPE: PackageType = PackageType::Generic;
17
18 fn is_match(path: &Path) -> bool {
19 path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
20 }
21
22 fn extract_packages(path: &Path) -> Vec<PackageData> {
23 let content = match read_file_to_string(path, None) {
24 Ok(content) => content,
25 Err(error) => {
26 warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
27 return vec![default_package_data()];
28 }
29 };
30
31 let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
32 Ok(yaml) => yaml,
33 Err(error) => {
34 warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
35 return vec![default_package_data()];
36 }
37 };
38
39 vec![parse_citation_cff(&yaml)]
40 }
41}
42
43fn default_package_data() -> PackageData {
44 PackageData {
45 package_type: Some(CitationCffParser::PACKAGE_TYPE),
46 datasource_id: Some(DatasourceId::CitationCff),
47 ..Default::default()
48 }
49}
50
51fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
52 if yaml
53 .get("cff-version")
54 .and_then(yaml_serde::Value::as_str)
55 .is_none()
56 {
57 return default_package_data();
58 }
59
60 let mut package = default_package_data();
61 package.name = yaml
62 .get("title")
63 .and_then(yaml_serde::Value::as_str)
64 .map(|s| truncate_field(s.to_string()));
65 package.version = yaml
66 .get("version")
67 .and_then(yaml_serde::Value::as_str)
68 .map(|s| truncate_field(s.to_string()));
69 package.description = yaml
70 .get("abstract")
71 .and_then(yaml_serde::Value::as_str)
72 .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
73 .map(|s| truncate_field(s.to_string()));
74 package.homepage_url = yaml
75 .get("url")
76 .and_then(yaml_serde::Value::as_str)
77 .map(|s| truncate_field(s.to_string()));
78 package.vcs_url = yaml
79 .get("repository-code")
80 .and_then(yaml_serde::Value::as_str)
81 .map(|s| truncate_field(s.to_string()));
82 package.parties = extract_author_parties(yaml.get("authors"));
83
84 if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
85 let license = truncate_field(license.to_string());
86 package.extracted_license_statement = Some(license.clone());
87 let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
88 package.declared_license_expression = declared;
89 package.declared_license_expression_spdx = declared_spdx;
90 package.license_detections = detections;
91 }
92
93 package
94}
95
96fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
97 value
98 .and_then(yaml_serde::Value::as_sequence)
99 .into_iter()
100 .flatten()
101 .take(MAX_ITERATION_COUNT)
102 .filter_map(|entry| {
103 let name = entry
104 .get("name")
105 .and_then(yaml_serde::Value::as_str)
106 .map(|s| truncate_field(s.to_string()))
107 .or_else(|| {
108 let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
109 let family = entry
110 .get("family-names")
111 .and_then(yaml_serde::Value::as_str);
112 match (given, family) {
113 (Some(given), Some(family)) => {
114 Some(truncate_field(format!("{given} {family}")))
115 }
116 (Some(given), None) => Some(truncate_field(given.to_string())),
117 (None, Some(family)) => Some(truncate_field(family.to_string())),
118 (None, None) => None,
119 }
120 });
121 let email = entry
122 .get("email")
123 .and_then(yaml_serde::Value::as_str)
124 .map(|s| truncate_field(s.to_string()));
125 let url = entry
126 .get("orcid")
127 .and_then(yaml_serde::Value::as_str)
128 .map(|s| truncate_field(s.to_string()));
129
130 if name.is_none() && email.is_none() && url.is_none() {
131 return None;
132 }
133
134 Some(Party {
135 r#type: Some("person".to_string()),
136 role: Some("author".to_string()),
137 name,
138 email,
139 url,
140 organization: None,
141 organization_url: None,
142 timezone: None,
143 })
144 })
145 .collect()
146}
147
148crate::register_parser!(
149 "citation cff metadata",
150 &["**/CITATION.cff"],
151 "generic",
152 "Text",
153 Some("https://citation-file-format.github.io/"),
154);