Skip to main content

provenant/parsers/
citation.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::path::Path;
5
6use crate::models::{DatasourceId, PackageData, PackageType, Party};
7use crate::parser_warn as warn;
8
9use super::PackageParser;
10use super::license_normalization::normalize_spdx_declared_license;
11use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13pub struct CitationCffParser;
14
15impl PackageParser for CitationCffParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Generic;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match read_file_to_string(path, None) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
27                return vec![default_package_data()];
28            }
29        };
30
31        let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
32            Ok(yaml) => yaml,
33            Err(error) => {
34                warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
35                return vec![default_package_data()];
36            }
37        };
38
39        vec![parse_citation_cff(&yaml)]
40    }
41}
42
43fn default_package_data() -> PackageData {
44    PackageData {
45        package_type: Some(CitationCffParser::PACKAGE_TYPE),
46        datasource_id: Some(DatasourceId::CitationCff),
47        ..Default::default()
48    }
49}
50
51fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
52    if yaml
53        .get("cff-version")
54        .and_then(yaml_serde::Value::as_str)
55        .is_none()
56    {
57        return default_package_data();
58    }
59
60    let mut package = default_package_data();
61    package.name = yaml
62        .get("title")
63        .and_then(yaml_serde::Value::as_str)
64        .map(|s| truncate_field(s.to_string()));
65    package.version = yaml
66        .get("version")
67        .and_then(yaml_serde::Value::as_str)
68        .map(|s| truncate_field(s.to_string()));
69    package.description = yaml
70        .get("abstract")
71        .and_then(yaml_serde::Value::as_str)
72        .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
73        .map(|s| truncate_field(s.to_string()));
74    package.homepage_url = yaml
75        .get("url")
76        .and_then(yaml_serde::Value::as_str)
77        .map(|s| truncate_field(s.to_string()));
78    package.vcs_url = yaml
79        .get("repository-code")
80        .and_then(yaml_serde::Value::as_str)
81        .map(|s| truncate_field(s.to_string()));
82    package.parties = extract_author_parties(yaml.get("authors"));
83
84    if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
85        let license = truncate_field(license.to_string());
86        package.extracted_license_statement = Some(license.clone());
87        let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
88        package.declared_license_expression = declared;
89        package.declared_license_expression_spdx = declared_spdx;
90        package.license_detections = detections;
91    }
92
93    package
94}
95
96fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
97    value
98        .and_then(yaml_serde::Value::as_sequence)
99        .into_iter()
100        .flatten()
101        .take(MAX_ITERATION_COUNT)
102        .filter_map(|entry| {
103            let name = entry
104                .get("name")
105                .and_then(yaml_serde::Value::as_str)
106                .map(|s| truncate_field(s.to_string()))
107                .or_else(|| {
108                    let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
109                    let family = entry
110                        .get("family-names")
111                        .and_then(yaml_serde::Value::as_str);
112                    match (given, family) {
113                        (Some(given), Some(family)) => {
114                            Some(truncate_field(format!("{given} {family}")))
115                        }
116                        (Some(given), None) => Some(truncate_field(given.to_string())),
117                        (None, Some(family)) => Some(truncate_field(family.to_string())),
118                        (None, None) => None,
119                    }
120                });
121            let email = entry
122                .get("email")
123                .and_then(yaml_serde::Value::as_str)
124                .map(|s| truncate_field(s.to_string()));
125            let url = entry
126                .get("orcid")
127                .and_then(yaml_serde::Value::as_str)
128                .map(|s| truncate_field(s.to_string()));
129
130            if name.is_none() && email.is_none() && url.is_none() {
131                return None;
132            }
133
134            Some(Party {
135                r#type: Some("person".to_string()),
136                role: Some("author".to_string()),
137                name,
138                email,
139                url,
140                organization: None,
141                organization_url: None,
142                timezone: None,
143            })
144        })
145        .collect()
146}
147
148crate::register_parser!(
149    "citation cff metadata",
150    &["**/CITATION.cff"],
151    "generic",
152    "Text",
153    Some("https://citation-file-format.github.io/"),
154);