Skip to main content

provenant/parsers/
citation.rs

1use std::path::Path;
2
3use crate::models::{DatasourceId, PackageData, PackageType, Party};
4use crate::parser_warn as warn;
5
6use super::PackageParser;
7use super::license_normalization::normalize_spdx_declared_license;
8use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9
10pub struct CitationCffParser;
11
12impl PackageParser for CitationCffParser {
13    const PACKAGE_TYPE: PackageType = PackageType::Generic;
14
15    fn is_match(path: &Path) -> bool {
16        path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
17    }
18
19    fn extract_packages(path: &Path) -> Vec<PackageData> {
20        let content = match read_file_to_string(path, None) {
21            Ok(content) => content,
22            Err(error) => {
23                warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
24                return vec![default_package_data()];
25            }
26        };
27
28        let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
29            Ok(yaml) => yaml,
30            Err(error) => {
31                warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
32                return vec![default_package_data()];
33            }
34        };
35
36        vec![parse_citation_cff(&yaml)]
37    }
38}
39
40fn default_package_data() -> PackageData {
41    PackageData {
42        package_type: Some(CitationCffParser::PACKAGE_TYPE),
43        datasource_id: Some(DatasourceId::CitationCff),
44        ..Default::default()
45    }
46}
47
48fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
49    if yaml
50        .get("cff-version")
51        .and_then(yaml_serde::Value::as_str)
52        .is_none()
53    {
54        return default_package_data();
55    }
56
57    let mut package = default_package_data();
58    package.name = yaml
59        .get("title")
60        .and_then(yaml_serde::Value::as_str)
61        .map(|s| truncate_field(s.to_string()));
62    package.version = yaml
63        .get("version")
64        .and_then(yaml_serde::Value::as_str)
65        .map(|s| truncate_field(s.to_string()));
66    package.description = yaml
67        .get("abstract")
68        .and_then(yaml_serde::Value::as_str)
69        .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
70        .map(|s| truncate_field(s.to_string()));
71    package.homepage_url = yaml
72        .get("url")
73        .and_then(yaml_serde::Value::as_str)
74        .map(|s| truncate_field(s.to_string()));
75    package.vcs_url = yaml
76        .get("repository-code")
77        .and_then(yaml_serde::Value::as_str)
78        .map(|s| truncate_field(s.to_string()));
79    package.parties = extract_author_parties(yaml.get("authors"));
80
81    if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
82        let license = truncate_field(license.to_string());
83        package.extracted_license_statement = Some(license.clone());
84        let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
85        package.declared_license_expression = declared;
86        package.declared_license_expression_spdx = declared_spdx;
87        package.license_detections = detections;
88    }
89
90    package
91}
92
93fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
94    value
95        .and_then(yaml_serde::Value::as_sequence)
96        .into_iter()
97        .flatten()
98        .take(MAX_ITERATION_COUNT)
99        .filter_map(|entry| {
100            let name = entry
101                .get("name")
102                .and_then(yaml_serde::Value::as_str)
103                .map(|s| truncate_field(s.to_string()))
104                .or_else(|| {
105                    let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
106                    let family = entry
107                        .get("family-names")
108                        .and_then(yaml_serde::Value::as_str);
109                    match (given, family) {
110                        (Some(given), Some(family)) => {
111                            Some(truncate_field(format!("{given} {family}")))
112                        }
113                        (Some(given), None) => Some(truncate_field(given.to_string())),
114                        (None, Some(family)) => Some(truncate_field(family.to_string())),
115                        (None, None) => None,
116                    }
117                });
118            let email = entry
119                .get("email")
120                .and_then(yaml_serde::Value::as_str)
121                .map(|s| truncate_field(s.to_string()));
122            let url = entry
123                .get("orcid")
124                .and_then(yaml_serde::Value::as_str)
125                .map(|s| truncate_field(s.to_string()));
126
127            if name.is_none() && email.is_none() && url.is_none() {
128                return None;
129            }
130
131            Some(Party {
132                r#type: Some("person".to_string()),
133                role: Some("author".to_string()),
134                name,
135                email,
136                url,
137                organization: None,
138                organization_url: None,
139                timezone: None,
140            })
141        })
142        .collect()
143}
144
145crate::register_parser!(
146    "citation cff metadata",
147    &["**/CITATION.cff"],
148    "generic",
149    "Text",
150    Some("https://citation-file-format.github.io/"),
151);