Skip to main content

provenant/parsers/
citation.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::path::Path;
5
6use crate::models::{DatasourceId, PackageData, PackageType, Party};
7use crate::parser_warn as warn;
8
9use super::PackageParser;
10use super::license_normalization::normalize_spdx_declared_license;
11use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13pub struct CitationCffParser;
14
15impl PackageParser for CitationCffParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Generic;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().and_then(|name| name.to_str()) == Some("CITATION.cff")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match read_file_to_string(path, None) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read CITATION.cff at {:?}: {}", path, error);
27                return vec![default_package_data()];
28            }
29        };
30
31        let yaml: yaml_serde::Value = match yaml_serde::from_str(&content) {
32            Ok(yaml) => yaml,
33            Err(error) => {
34                warn!("Failed to parse CITATION.cff at {:?}: {}", path, error);
35                return vec![default_package_data()];
36            }
37        };
38
39        vec![parse_citation_cff(&yaml)]
40    }
41
42    fn metadata() -> Vec<super::metadata::ParserMetadata> {
43        vec![super::metadata::ParserMetadata {
44            description: "citation cff metadata",
45            file_patterns: &["**/CITATION.cff"],
46            package_type: "generic",
47            primary_language: "Text",
48            documentation_url: Some("https://citation-file-format.github.io/"),
49        }]
50    }
51}
52
53fn default_package_data() -> PackageData {
54    PackageData {
55        package_type: Some(CitationCffParser::PACKAGE_TYPE),
56        datasource_id: Some(DatasourceId::CitationCff),
57        ..Default::default()
58    }
59}
60
61fn parse_citation_cff(yaml: &yaml_serde::Value) -> PackageData {
62    if yaml
63        .get("cff-version")
64        .and_then(yaml_serde::Value::as_str)
65        .is_none()
66    {
67        return default_package_data();
68    }
69
70    let mut package = default_package_data();
71    package.name = yaml
72        .get("title")
73        .and_then(yaml_serde::Value::as_str)
74        .map(|s| truncate_field(s.to_string()));
75    package.version = yaml
76        .get("version")
77        .and_then(yaml_serde::Value::as_str)
78        .map(|s| truncate_field(s.to_string()));
79    package.description = yaml
80        .get("abstract")
81        .and_then(yaml_serde::Value::as_str)
82        .or_else(|| yaml.get("message").and_then(yaml_serde::Value::as_str))
83        .map(|s| truncate_field(s.to_string()));
84    package.homepage_url = yaml
85        .get("url")
86        .and_then(yaml_serde::Value::as_str)
87        .map(|s| truncate_field(s.to_string()));
88    package.vcs_url = yaml
89        .get("repository-code")
90        .and_then(yaml_serde::Value::as_str)
91        .map(|s| truncate_field(s.to_string()));
92    package.parties = extract_author_parties(yaml.get("authors"));
93
94    if let Some(license) = yaml.get("license").and_then(yaml_serde::Value::as_str) {
95        let license = truncate_field(license.to_string());
96        package.extracted_license_statement = Some(license.clone());
97        let (declared, declared_spdx, detections) = normalize_spdx_declared_license(Some(&license));
98        package.declared_license_expression = declared;
99        package.declared_license_expression_spdx = declared_spdx;
100        package.license_detections = detections;
101    }
102
103    package
104}
105
106fn extract_author_parties(value: Option<&yaml_serde::Value>) -> Vec<Party> {
107    value
108        .and_then(yaml_serde::Value::as_sequence)
109        .into_iter()
110        .flatten()
111        .take(MAX_ITERATION_COUNT)
112        .filter_map(|entry| {
113            let name = entry
114                .get("name")
115                .and_then(yaml_serde::Value::as_str)
116                .map(|s| truncate_field(s.to_string()))
117                .or_else(|| {
118                    let given = entry.get("given-names").and_then(yaml_serde::Value::as_str);
119                    let family = entry
120                        .get("family-names")
121                        .and_then(yaml_serde::Value::as_str);
122                    match (given, family) {
123                        (Some(given), Some(family)) => {
124                            Some(truncate_field(format!("{given} {family}")))
125                        }
126                        (Some(given), None) => Some(truncate_field(given.to_string())),
127                        (None, Some(family)) => Some(truncate_field(family.to_string())),
128                        (None, None) => None,
129                    }
130                });
131            let email = entry
132                .get("email")
133                .and_then(yaml_serde::Value::as_str)
134                .map(|s| truncate_field(s.to_string()));
135            let url = entry
136                .get("orcid")
137                .and_then(yaml_serde::Value::as_str)
138                .map(|s| truncate_field(s.to_string()));
139
140            if name.is_none() && email.is_none() && url.is_none() {
141                return None;
142            }
143
144            Some(Party {
145                r#type: Some("person".to_string()),
146                role: Some("author".to_string()),
147                name,
148                email,
149                url,
150                organization: None,
151                organization_url: None,
152                timezone: None,
153            })
154        })
155        .collect()
156}