provenant/parsers/
citation.rs1use std::path::Path;
5
6use crate::models::{DatasourceId, PackageData, PackageType, Party};
7use crate::parser_warn as warn;
8
9use super::PackageParser;
10use super::license_normalization::normalize_spdx_declared_license;
11use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13pub struct CitationCffParser;
14
15impl PackageParser for CitationCffParser {
16 const PACKAGE_TYPE: PackageType = PackageType::Generic;
17
18 fn is_match(path: &Path) -> bool {
19 path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
20 }
21
22 fn extract_packages(path: &Path) -> Vec<PackageData> {
23 let content = match read_file_to_string(path, None) {
24 Ok(content) => content,
25 Err(error) => {
26 warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
27 return vec![default_package_data()];
28 }
29 };
30
31 let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
32 Ok(yaml) => yaml,
33 Err(error) => {
34 warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
35 return vec![default_package_data()];
36 }
37 };
38
39 vec![parse_citation_cff(&yaml)]
40 }
41
42 fn metadata() -> Vec<super::metadata::ParserMetadata> {
43 vec![super::metadata::ParserMetadata {
44 description: "citation cff metadata",
45 file_patterns: &["**/CITATION.cff"],
46 package_type: "generic",
47 primary_language: "Text",
48 documentation_url: Some("https://citation-file-format.github.io/"),
49 }]
50 }
51}
52
53fn default_package_data() -> PackageData {
54 PackageData {
55 package_type: Some(CitationCffParser::PACKAGE_TYPE),
56 datasource_id: Some(DatasourceId::CitationCff),
57 ..Default::default()
58 }
59}
60
61fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
62 if yaml
63 .get("cff-version")
64 .and_then(yaml_serde::Value::as_str)
65 .is_none()
66 {
67 return default_package_data();
68 }
69
70 let mut package = default_package_data();
71 package.name = yaml
72 .get("title")
73 .and_then(yaml_serde::Value::as_str)
74 .map(|s| truncate_field(s.to_string()));
75 package.version = yaml
76 .get("version")
77 .and_then(yaml_serde::Value::as_str)
78 .map(|s| truncate_field(s.to_string()));
79 package.description = yaml
80 .get("abstract")
81 .and_then(yaml_serde::Value::as_str)
82 .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
83 .map(|s| truncate_field(s.to_string()));
84 package.homepage_url = yaml
85 .get("url")
86 .and_then(yaml_serde::Value::as_str)
87 .map(|s| truncate_field(s.to_string()));
88 package.vcs_url = yaml
89 .get("repository-code")
90 .and_then(yaml_serde::Value::as_str)
91 .map(|s| truncate_field(s.to_string()));
92 package.parties = extract_author_parties(yaml.get("authors"));
93
94 if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
95 let license = truncate_field(license.to_string());
96 package.extracted_license_statement = Some(license.clone());
97 let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
98 package.declared_license_expression = declared;
99 package.declared_license_expression_spdx = declared_spdx;
100 package.license_detections = detections;
101 }
102
103 package
104}
105
106fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
107 value
108 .and_then(yaml_serde::Value::as_sequence)
109 .into_iter()
110 .flatten()
111 .take(MAX_ITERATION_COUNT)
112 .filter_map(|entry| {
113 let name = entry
114 .get("name")
115 .and_then(yaml_serde::Value::as_str)
116 .map(|s| truncate_field(s.to_string()))
117 .or_else(|| {
118 let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
119 let family = entry
120 .get("family-names")
121 .and_then(yaml_serde::Value::as_str);
122 match (given, family) {
123 (Some(given), Some(family)) => {
124 Some(truncate_field(format!("{given} {family}")))
125 }
126 (Some(given), None) => Some(truncate_field(given.to_string())),
127 (None, Some(family)) => Some(truncate_field(family.to_string())),
128 (None, None) => None,
129 }
130 });
131 let email = entry
132 .get("email")
133 .and_then(yaml_serde::Value::as_str)
134 .map(|s| truncate_field(s.to_string()));
135 let url = entry
136 .get("orcid")
137 .and_then(yaml_serde::Value::as_str)
138 .map(|s| truncate_field(s.to_string()));
139
140 if name.is_none() && email.is_none() && url.is_none() {
141 return None;
142 }
143
144 Some(Party {
145 r#type: Some("person".to_string()),
146 role: Some("author".to_string()),
147 name,
148 email,
149 url,
150 organization: None,
151 organization_url: None,
152 timezone: None,
153 })
154 })
155 .collect()
156}