Skip to main content

commonmeta/
data.rs

1//! Core Commonmeta data model.
2//!
3//! `Data` mirrors the commonmeta v1.0 JSON schema
4//! (`resources/commonmeta_v1.0.json`) directly — field names and nesting
5//! match the schema 1:1, the same way `commonmeta-py`'s `Metadata` class
6//! *is* the v1.0 shape rather than an internal model translated to/from it.
7//!
8//! A few sub-structs carry fields beyond what the schema defines, because
9//! other formats' writers genuinely need them for round-tripping (e.g.
10//! `Reference.unstructured`/`.asserted_by`, `Dates.collected`/`.valid`/
11//! `.other`/`.copyrighted`). These ride along unserialized-by-default
12//! wherever empty and don't affect schema validation, since the schema's
13//! nested item definitions don't set `additionalProperties: false`.
14
15use serde::{Deserialize, Deserializer, Serialize};
16
17fn is_zero_i64(n: &i64) -> bool {
18    *n == 0
19}
20
21/// The native Commonmeta record, shaped like the v1.0 JSON schema.
22#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
23pub struct Data {
24    pub id: String,
25    #[serde(rename = "type")]
26    pub type_: String,
27
28    #[serde(default, skip_serializing_if = "Vec::is_empty")]
29    pub additional_descriptions: Vec<Description>,
30    #[serde(default, skip_serializing_if = "Vec::is_empty")]
31    pub additional_titles: Vec<Title>,
32    #[serde(default, skip_serializing_if = "String::is_empty")]
33    pub additional_type: String,
34    #[serde(default, skip_serializing_if = "Vec::is_empty")]
35    pub archive_locations: Vec<String>,
36    #[serde(default, skip_serializing_if = "Vec::is_empty")]
37    pub citations: Vec<Citation>,
38    #[serde(default, skip_serializing_if = "Container::is_empty")]
39    pub container: Container,
40    #[serde(default, skip_serializing_if = "String::is_empty")]
41    pub content: String,
42    #[serde(default, skip_serializing_if = "Vec::is_empty")]
43    pub contributors: Vec<Contributor>,
44    #[serde(default, skip_serializing_if = "String::is_empty")]
45    pub date_published: String,
46    #[serde(default, skip_serializing_if = "String::is_empty")]
47    pub date_updated: String,
48    #[serde(default, skip_serializing_if = "Dates::is_empty")]
49    pub dates: Dates,
50    #[serde(default, skip_serializing_if = "String::is_empty")]
51    pub description: String,
52    #[serde(default, skip_serializing_if = "Vec::is_empty")]
53    pub files: Vec<File>,
54    #[serde(default, skip_serializing_if = "Vec::is_empty")]
55    pub funding_references: Vec<FundingReference>,
56    #[serde(default, skip_serializing_if = "Vec::is_empty")]
57    pub geo_locations: Vec<GeoLocation>,
58    #[serde(default, skip_serializing_if = "Vec::is_empty")]
59    pub identifiers: Vec<Identifier>,
60    #[serde(default, skip_serializing_if = "String::is_empty")]
61    pub image: String,
62    #[serde(default, skip_serializing_if = "String::is_empty")]
63    pub language: String,
64    #[serde(default, skip_serializing_if = "License::is_empty")]
65    pub license: License,
66    #[serde(default, skip_serializing_if = "String::is_empty")]
67    pub provider: String,
68    #[serde(default, skip_serializing_if = "Publisher::is_empty")]
69    pub publisher: Publisher,
70    #[serde(default, skip_serializing_if = "Vec::is_empty")]
71    pub references: Vec<Reference>,
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub relations: Vec<Relation>,
74    #[serde(default, skip_serializing_if = "String::is_empty")]
75    pub schema_version: String,
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub subjects: Vec<Subject>,
78    #[serde(default, skip_serializing_if = "String::is_empty")]
79    pub title: String,
80    #[serde(default, skip_serializing_if = "String::is_empty")]
81    pub url: String,
82    #[serde(default, skip_serializing_if = "String::is_empty")]
83    pub version: String,
84}
85
86#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
87pub struct Affiliation {
88    #[serde(default, skip_serializing_if = "String::is_empty")]
89    pub id: String,
90    #[serde(default, skip_serializing_if = "String::is_empty")]
91    pub name: String,
92    #[serde(default, skip_serializing_if = "String::is_empty")]
93    pub asserted_by: String,
94}
95
96#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
97pub struct Citation {
98    #[serde(default, skip_serializing_if = "String::is_empty")]
99    pub key: String,
100    #[serde(default, skip_serializing_if = "String::is_empty")]
101    pub id: String,
102    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
103    pub type_: String,
104    #[serde(default, skip_serializing_if = "String::is_empty")]
105    pub citation: String,
106}
107
108#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
109pub struct Container {
110    #[serde(default, skip_serializing_if = "String::is_empty")]
111    pub identifier: String,
112    #[serde(default, skip_serializing_if = "String::is_empty")]
113    pub identifier_type: String,
114    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
115    pub type_: String,
116    #[serde(default, skip_serializing_if = "String::is_empty")]
117    pub title: String,
118    #[serde(default, skip_serializing_if = "String::is_empty")]
119    pub description: String,
120    #[serde(default, skip_serializing_if = "String::is_empty")]
121    pub language: String,
122    #[serde(default, skip_serializing_if = "Option::is_none")]
123    pub license: Option<License>,
124    #[serde(default, skip_serializing_if = "String::is_empty")]
125    pub platform: String,
126    #[serde(default, skip_serializing_if = "String::is_empty")]
127    pub image: String,
128    #[serde(default, skip_serializing_if = "String::is_empty")]
129    pub first_page: String,
130    #[serde(default, skip_serializing_if = "String::is_empty")]
131    pub last_page: String,
132    #[serde(default, skip_serializing_if = "String::is_empty")]
133    pub volume: String,
134    #[serde(default, skip_serializing_if = "String::is_empty")]
135    pub issue: String,
136}
137
138impl Container {
139    pub fn is_empty(&self) -> bool {
140        self.identifier.is_empty()
141            && self.identifier_type.is_empty()
142            && self.type_.is_empty()
143            && self.title.is_empty()
144            && self.description.is_empty()
145            && self.language.is_empty()
146            && self.license.is_none()
147            && self.platform.is_empty()
148            && self.image.is_empty()
149            && self.first_page.is_empty()
150            && self.last_page.is_empty()
151            && self.volume.is_empty()
152            && self.issue.is_empty()
153    }
154}
155
156/// `type_` is "Person" or "Organization"; exactly one of `person`/
157/// `organization` is set accordingly.
158#[derive(Debug, Clone, Default, PartialEq, Serialize)]
159pub struct Contributor {
160    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
161    pub type_: String,
162    #[serde(default, skip_serializing_if = "Option::is_none")]
163    pub person: Option<Person>,
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub organization: Option<Organization>,
166    #[serde(default, skip_serializing_if = "Vec::is_empty", alias = "contributor_roles")]
167    pub roles: Vec<String>,
168}
169
170impl Contributor {
171    pub fn person(person: Person, roles: Vec<String>) -> Self {
172        Contributor {
173            type_: "Person".to_string(),
174            person: Some(person),
175            organization: None,
176            roles,
177        }
178    }
179
180    pub fn organization(organization: Organization, roles: Vec<String>) -> Self {
181        Contributor {
182            type_: "Organization".to_string(),
183            person: None,
184            organization: Some(organization),
185            roles,
186        }
187    }
188
189    pub fn given_name(&self) -> &str {
190        self.person.as_ref().map_or("", |p| p.given_name.as_str())
191    }
192
193    pub fn family_name(&self) -> &str {
194        self.person.as_ref().map_or("", |p| p.family_name.as_str())
195    }
196
197    /// The person's ORCID, or the organization's name when this contributor
198    /// has no person (i.e. a display name regardless of contributor type).
199    pub fn name(&self) -> String {
200        if let Some(p) = &self.person {
201            format!("{} {}", p.given_name, p.family_name)
202                .trim()
203                .to_string()
204        } else {
205            self.organization
206                .as_ref()
207                .map(|o| o.name.clone())
208                .unwrap_or_default()
209        }
210    }
211
212    pub fn id(&self) -> &str {
213        self.person
214            .as_ref()
215            .map(|p| p.id.as_str())
216            .or_else(|| self.organization.as_ref().map(|o| o.id.as_str()))
217            .unwrap_or("")
218    }
219
220    pub fn affiliations(&self) -> &[Affiliation] {
221        self.person.as_ref().map_or(&[], |p| p.affiliations.as_slice())
222    }
223}
224
225#[derive(Debug, Clone, Default, Deserialize)]
226struct ContributorInput {
227    #[serde(default)]
228    id: String,
229    #[serde(rename = "type", default)]
230    type_: String,
231    #[serde(default)]
232    name: String,
233    #[serde(default, alias = "givenName", alias = "given_name")]
234    given_name: String,
235    #[serde(default, alias = "familyName", alias = "family_name")]
236    family_name: String,
237    #[serde(default)]
238    affiliations: Vec<Affiliation>,
239    #[serde(default, alias = "roles", alias = "contributor_roles")]
240    roles: Vec<String>,
241    #[serde(default)]
242    person: Option<Person>,
243    #[serde(default)]
244    organization: Option<Organization>,
245}
246
247/// Accepts both the v1.0 `{type, person: {...}, organization: {...}, roles}`
248/// shape and a flat legacy shape (`id`/`name`/`given_name`/`family_name`/
249/// `affiliations`/`contributor_roles` directly on the contributor object).
250impl<'de> Deserialize<'de> for Contributor {
251    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
252    where
253        D: Deserializer<'de>,
254    {
255        let input = ContributorInput::deserialize(deserializer)?;
256
257        let person = input.person.or_else(|| {
258            if input.given_name.is_empty() && input.family_name.is_empty() && input.id.is_empty()
259            {
260                None
261            } else {
262                Some(Person {
263                    id: input.id.clone(),
264                    given_name: input.given_name,
265                    family_name: input.family_name,
266                    affiliations: input.affiliations.clone(),
267                    asserted_by: String::new(),
268                })
269            }
270        });
271
272        let organization = input.organization.or_else(|| {
273            if person.is_some() || input.name.is_empty() {
274                None
275            } else {
276                Some(Organization {
277                    id: input.id.clone(),
278                    name: input.name,
279                    asserted_by: String::new(),
280                })
281            }
282        });
283
284        let type_ = if !input.type_.is_empty() {
285            input.type_
286        } else if person.is_some() {
287            "Person".to_string()
288        } else if organization.is_some() {
289            "Organization".to_string()
290        } else {
291            String::new()
292        };
293
294        Ok(Contributor {
295            type_,
296            person,
297            organization,
298            roles: input.roles,
299        })
300    }
301}
302
303#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
304pub struct Person {
305    #[serde(default, skip_serializing_if = "String::is_empty")]
306    pub id: String,
307    #[serde(default, skip_serializing_if = "String::is_empty")]
308    pub given_name: String,
309    #[serde(default, skip_serializing_if = "String::is_empty")]
310    pub family_name: String,
311    #[serde(default, skip_serializing_if = "Vec::is_empty")]
312    pub affiliations: Vec<Affiliation>,
313    #[serde(default, skip_serializing_if = "String::is_empty")]
314    pub asserted_by: String,
315}
316
317#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
318pub struct Organization {
319    #[serde(default, skip_serializing_if = "String::is_empty")]
320    pub id: String,
321    #[serde(default, skip_serializing_if = "String::is_empty")]
322    pub name: String,
323    #[serde(default, skip_serializing_if = "String::is_empty")]
324    pub asserted_by: String,
325}
326
327/// "Other dates" beyond `date_published`/`date_updated`. All fields are
328/// ISO 8601 date strings. `collected`/`valid`/`other`/`copyrighted` are
329/// carried for DataCite/InvenioRDM round-tripping and aren't part of the
330/// v1.0 schema's `dates` definition.
331#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
332pub struct Dates {
333    #[serde(default, skip_serializing_if = "String::is_empty")]
334    pub created: String,
335    #[serde(default, skip_serializing_if = "String::is_empty")]
336    pub submitted: String,
337    #[serde(default, skip_serializing_if = "String::is_empty")]
338    pub accepted: String,
339    #[serde(default, skip_serializing_if = "String::is_empty")]
340    pub accessed: String,
341    #[serde(default, skip_serializing_if = "String::is_empty")]
342    pub available: String,
343    #[serde(default, skip_serializing_if = "String::is_empty")]
344    pub withdrawn: String,
345    #[serde(default, skip_serializing_if = "String::is_empty")]
346    pub collected: String,
347    #[serde(default, skip_serializing_if = "String::is_empty")]
348    pub valid: String,
349    #[serde(default, skip_serializing_if = "String::is_empty")]
350    pub copyrighted: String,
351    #[serde(default, skip_serializing_if = "String::is_empty")]
352    pub other: String,
353}
354
355impl Dates {
356    pub fn is_empty(&self) -> bool {
357        self.created.is_empty()
358            && self.submitted.is_empty()
359            && self.accepted.is_empty()
360            && self.accessed.is_empty()
361            && self.available.is_empty()
362            && self.withdrawn.is_empty()
363            && self.collected.is_empty()
364            && self.valid.is_empty()
365            && self.copyrighted.is_empty()
366            && self.other.is_empty()
367    }
368}
369
370#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
371pub struct Description {
372    pub description: String,
373    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
374    pub type_: String,
375    #[serde(default, skip_serializing_if = "String::is_empty")]
376    pub language: String,
377}
378
379#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
380pub struct File {
381    #[serde(default, skip_serializing_if = "String::is_empty")]
382    pub bucket: String,
383    #[serde(default, skip_serializing_if = "String::is_empty")]
384    pub key: String,
385    #[serde(default, skip_serializing_if = "String::is_empty")]
386    pub checksum: String,
387    pub url: String,
388    #[serde(default, skip_serializing_if = "is_zero_i64")]
389    pub size: i64,
390    #[serde(default, skip_serializing_if = "String::is_empty")]
391    pub mime_type: String,
392}
393
394#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
395pub struct FundingReference {
396    #[serde(default, skip_serializing_if = "String::is_empty")]
397    pub funder_id: String,
398    #[serde(default, skip_serializing_if = "String::is_empty")]
399    pub funder_name: String,
400    #[serde(default, skip_serializing_if = "String::is_empty")]
401    pub award_id: String,
402    #[serde(default, skip_serializing_if = "String::is_empty")]
403    pub award_title: String,
404    #[serde(default, skip_serializing_if = "String::is_empty")]
405    pub award_number: String,
406    #[serde(default, skip_serializing_if = "String::is_empty")]
407    pub asserted_by: String,
408}
409
410/// Flattened to match the v1.0 schema's `geo_locations` shape directly
411/// (no nested point/box objects).
412#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
413pub struct GeoLocation {
414    #[serde(default, skip_serializing_if = "String::is_empty")]
415    pub geo_location_place: String,
416    #[serde(default, skip_serializing_if = "Option::is_none")]
417    pub geo_location_point_longitude: Option<f64>,
418    #[serde(default, skip_serializing_if = "Option::is_none")]
419    pub geo_location_point_latitude: Option<f64>,
420    #[serde(default, skip_serializing_if = "Option::is_none")]
421    pub geo_location_box_west_longitude: Option<f64>,
422    #[serde(default, skip_serializing_if = "Option::is_none")]
423    pub geo_location_box_east_longitude: Option<f64>,
424    #[serde(default, skip_serializing_if = "Option::is_none")]
425    pub geo_location_box_south_latitude: Option<f64>,
426    #[serde(default, skip_serializing_if = "Option::is_none")]
427    pub geo_location_box_north_latitude: Option<f64>,
428    #[serde(default, skip_serializing_if = "String::is_empty")]
429    pub geo_location_polygon: String,
430}
431
432#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
433pub struct Identifier {
434    pub identifier: String,
435    pub identifier_type: String,
436    #[serde(default, skip_serializing_if = "String::is_empty")]
437    pub asserted_by: String,
438}
439
440#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
441pub struct License {
442    #[serde(default, skip_serializing_if = "String::is_empty")]
443    pub id: String,
444    #[serde(default, skip_serializing_if = "String::is_empty")]
445    pub title: String,
446    #[serde(default, skip_serializing_if = "String::is_empty")]
447    pub url: String,
448    #[serde(default, skip_serializing_if = "String::is_empty")]
449    pub asserted_by: String,
450}
451
452impl License {
453    pub fn is_empty(&self) -> bool {
454        self.id.is_empty()
455            && self.title.is_empty()
456            && self.url.is_empty()
457            && self.asserted_by.is_empty()
458    }
459}
460
461#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
462pub struct Publisher {
463    #[serde(default, skip_serializing_if = "String::is_empty")]
464    pub id: String,
465    #[serde(default, skip_serializing_if = "String::is_empty")]
466    pub name: String,
467    #[serde(default, skip_serializing_if = "String::is_empty")]
468    pub asserted_by: String,
469}
470
471impl Publisher {
472    pub fn is_empty(&self) -> bool {
473        self.id.is_empty() && self.name.is_empty() && self.asserted_by.is_empty()
474    }
475}
476
477/// `publisher`/`publication_year`/`volume`/`issue`/`first_page`/`last_page`/
478/// `unstructured`/`asserted_by` ride along for internal use (e.g. the
479/// crossref_xml and InvenioRDM writers); only `key`/`id`/`type_`/
480/// `reference` are part of the v1.0 schema's `references` definition.
481#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
482pub struct Reference {
483    #[serde(default, skip_serializing_if = "String::is_empty")]
484    pub key: String,
485    #[serde(default, skip_serializing_if = "String::is_empty")]
486    pub id: String,
487    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
488    pub type_: String,
489    #[serde(default, skip_serializing_if = "String::is_empty")]
490    pub reference: String,
491    #[serde(default, skip_serializing_if = "String::is_empty")]
492    pub title: String,
493    #[serde(default, skip_serializing_if = "String::is_empty")]
494    pub publisher: String,
495    #[serde(default, skip_serializing_if = "String::is_empty")]
496    pub publication_year: String,
497    #[serde(default, skip_serializing_if = "String::is_empty")]
498    pub volume: String,
499    #[serde(default, skip_serializing_if = "String::is_empty")]
500    pub issue: String,
501    #[serde(default, skip_serializing_if = "String::is_empty")]
502    pub first_page: String,
503    #[serde(default, skip_serializing_if = "String::is_empty")]
504    pub last_page: String,
505    #[serde(default, skip_serializing_if = "String::is_empty")]
506    pub unstructured: String,
507    #[serde(default, skip_serializing_if = "String::is_empty")]
508    pub asserted_by: String,
509}
510
511#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
512pub struct Relation {
513    pub id: String,
514    #[serde(rename = "type")]
515    pub type_: String,
516    #[serde(default, skip_serializing_if = "String::is_empty")]
517    pub asserted_by: String,
518}
519
520#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
521pub struct Subject {
522    #[serde(default, skip_serializing_if = "String::is_empty")]
523    pub id: String,
524    pub subject: String,
525    #[serde(default, skip_serializing_if = "String::is_empty")]
526    pub language: String,
527}
528
529#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
530pub struct Title {
531    #[serde(default, skip_serializing_if = "String::is_empty")]
532    pub title: String,
533    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
534    pub type_: String,
535    #[serde(default, skip_serializing_if = "String::is_empty")]
536    pub language: String,
537}