1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4use crate::author_utils::{
5 cleanup_author, infer_contributor_type, normalize_contributor_roles, parse_affiliation_value,
6 split_person_name,
7};
8use crate::constants as C;
9use crate::data::{
10 Citation, Container, Contributor, Data, Description, File, FundingReference, Identifier,
11 Organization, Person, Publisher, Reference, Relation, Subject,
12};
13use crate::doi_utils::normalize_doi;
14use crate::error::{Error, Result};
15use crate::utils::{
16 get_language, issn_as_url, normalize_id, normalize_orcid, normalize_ror, normalize_url,
17 sanitize,
18};
19
20#[derive(Deserialize, Default)]
23struct Content {
24 id: Option<Value>,
26 #[serde(default)]
28 doi: String,
29 #[serde(default)]
31 conceptdoi: String,
32 #[serde(default)]
33 parent: Parent,
34 #[serde(default)]
35 pids: Pids,
36 links: Option<ContentLinks>,
37 #[serde(default)]
39 updated: String,
40 metadata: MetadataJSON,
41 #[serde(rename = "custom_fields", default)]
42 custom_fields: CustomFields,
43 #[serde(default)]
45 files: Option<Value>,
46}
47
48#[derive(Deserialize, Default)]
49struct ContentLinks {
50 #[serde(rename = "self_html", default)]
51 self_html: String,
52}
53
54#[derive(Deserialize, Default)]
55struct Parent {
56 #[serde(default)]
57 #[allow(dead_code)]
58 id: String,
59 #[serde(default)]
60 communities: Communities,
61}
62
63#[derive(Deserialize, Default)]
64struct Pids {
65 #[serde(default)]
66 doi: Doi,
67}
68
69#[derive(Deserialize, Default)]
70struct Doi {
71 #[serde(default)]
72 identifier: String,
73}
74
75#[derive(Deserialize, Default)]
76struct MetadataJSON {
77 #[serde(rename = "resource_type", default)]
78 resource_type: ResourceType,
79 #[serde(default)]
80 creators: Vec<Creator>,
81 #[serde(default)]
82 contributors: Vec<Creator>,
83 #[serde(default)]
84 funding: Vec<Funding>,
85 #[serde(default)]
86 grants: Vec<Grant>,
87 #[serde(default)]
89 dates: Vec<DateJSON>,
90 #[serde(default)]
91 description: String,
92 #[serde(default)]
93 notes: String,
94 #[serde(default)]
95 identifiers: Vec<InvenioIdentifier>,
96 #[serde(default)]
97 keywords: Vec<Value>,
98 #[serde(default)]
99 language: String,
100 #[serde(default)]
101 languages: Vec<Language>,
102 license: Option<OldLicense>,
104 #[serde(default)]
105 publisher: String,
106 #[serde(rename = "publication_date", default)]
107 publication_date: String,
108 #[serde(default)]
109 references: Vec<InvenioReference>,
110 #[serde(rename = "related_identifiers", default)]
111 related_identifiers: Vec<RelatedIdentifier>,
112 #[serde(default)]
113 rights: Vec<Right>,
114 #[serde(default)]
115 subjects: Vec<Subject_>,
116 #[serde(default)]
117 title: String,
118 #[serde(default)]
119 version: String,
120}
121
122#[derive(Deserialize, Default)]
123struct ResourceType {
124 #[serde(default)]
125 id: String,
126 #[serde(default)]
127 subtype: String,
128 #[serde(rename = "type", default)]
129 type_: String,
130}
131
132#[derive(Deserialize, Default)]
133struct Creator {
134 #[serde(rename = "person_or_org", default)]
135 person_or_org: PersonOrOrg,
136 #[serde(default)]
137 affiliations: Vec<InvenioAffiliation>,
138 role: Option<ContributorRole>,
140 #[serde(default)]
142 name: String,
143 #[serde(default)]
144 orcid: String,
145 #[serde(default)]
146 affiliation: String,
147}
148
149#[derive(Deserialize, Default)]
150struct ContributorRole {
151 #[serde(default)]
152 id: String,
153}
154
155#[derive(Deserialize, Default)]
156struct PersonOrOrg {
157 #[serde(rename = "type", default)]
158 type_: String,
159 #[serde(default)]
160 name: String,
161 #[serde(rename = "given_name", default)]
162 given_name: String,
163 #[serde(rename = "family_name", default)]
164 family_name: String,
165 #[serde(default)]
166 identifiers: Vec<InvenioIdentifier>,
167}
168
169#[derive(Deserialize, Default)]
170struct InvenioAffiliation {
171 #[serde(default)]
172 id: String,
173 #[serde(default)]
174 name: String,
175}
176
177#[derive(Deserialize, Default, Clone)]
178struct InvenioIdentifier {
179 #[serde(default)]
180 identifier: String,
181 #[serde(default)]
182 scheme: String,
183}
184
185#[derive(Deserialize, Default)]
186struct Funding {
187 #[serde(default)]
188 funder: Funder,
189 #[serde(default)]
190 award: Award,
191}
192
193#[derive(Deserialize, Default)]
194struct Funder {
195 #[serde(default)]
196 id: String,
197 #[serde(default)]
198 name: String,
199}
200
201#[derive(Deserialize, Default)]
202struct Award {
203 #[serde(default)]
204 #[allow(dead_code)]
205 id: String,
206 #[serde(default)]
207 number: String,
208 title: Option<AwardTitle>,
209 #[serde(default)]
210 identifiers: Vec<InvenioIdentifier>,
211}
212
213#[derive(Deserialize, Default)]
214struct AwardTitle {
215 #[serde(default)]
216 en: String,
217}
218
219#[derive(Deserialize, Default)]
220struct Grant {
221 #[serde(default)]
222 code: String,
223 #[serde(default)]
224 funder: LegacyFunder,
225 #[serde(default)]
226 title: String,
227 #[serde(default)]
228 url: String,
229}
230
231#[derive(Deserialize, Default)]
232struct LegacyFunder {
233 #[serde(default)]
234 doi: String,
235 #[serde(default)]
236 name: String,
237}
238
239#[derive(Deserialize, Default)]
240struct DateJSON {
241 #[serde(default)]
242 date: String,
243 #[serde(rename = "type")]
245 type_: Option<Value>,
246}
247
248#[derive(Deserialize, Default)]
249struct Language {
250 #[serde(default)]
251 id: String,
252}
253
254#[derive(Deserialize, Default)]
255struct OldLicense {
256 #[serde(default)]
257 id: String,
258}
259
260#[derive(Deserialize, Default)]
261struct InvenioReference {
262 #[serde(default)]
263 reference: String,
264 #[serde(default)]
265 scheme: String,
266 #[serde(default)]
267 identifier: String,
268}
269
270#[derive(Deserialize, Default)]
271struct RelatedIdentifier {
272 #[serde(default)]
273 identifier: String,
274 #[serde(default)]
275 scheme: String,
276 #[serde(rename = "relation_type", default)]
277 relation_type: RelationType,
278 #[serde(default)]
280 relation: String,
281}
282
283#[derive(Deserialize, Default)]
284struct RelationType {
285 #[serde(default)]
286 id: String,
287}
288
289#[derive(Deserialize, Default)]
290struct Right {
291 #[serde(default)]
292 id: String,
293 #[serde(default)]
294 #[allow(dead_code)]
295 props: RightProps,
296}
297
298#[derive(Deserialize, Default)]
299struct RightProps {
300 #[allow(dead_code)]
301 #[serde(default)]
302 url: String,
303}
304
305#[derive(Deserialize, Default)]
306struct Subject_ {
307 #[serde(default)]
308 #[allow(dead_code)]
309 id: String,
310 #[serde(default)]
311 subject: String,
312 #[serde(default)]
313 scheme: String,
314}
315
316#[derive(Deserialize, Default)]
317struct Communities {
318 #[serde(default)]
319 default: String,
320 #[serde(default)]
321 entries: Vec<Community>,
322}
323
324#[derive(Deserialize, Default)]
325struct Community {
326 #[serde(default)]
327 #[allow(dead_code)]
328 id: String,
329 #[serde(default)]
330 slug: String,
331 #[serde(default)]
332 #[allow(dead_code)]
333 metadata: CommunityMetadata,
334}
335
336#[derive(Deserialize, Default)]
337struct CommunityMetadata {
338 #[serde(rename = "type", default)]
339 #[allow(dead_code)]
340 type_: CommunityType,
341}
342
343#[derive(Deserialize, Default)]
344struct CommunityType {
345 #[serde(default)]
346 #[allow(dead_code)]
347 id: String,
348}
349
350#[derive(Deserialize, Default)]
351struct CustomFields {
352 #[serde(rename = "journal:journal", default)]
353 journal: Journal,
354 #[serde(rename = "rs:content_html", default)]
355 content_html: String,
356 #[serde(rename = "rs:image", default)]
357 feature_image: String,
358 #[serde(rename = "rs:generator", default)]
359 generator: String,
360 #[serde(rename = "rs:citations", default)]
361 citations: Vec<InvenioReference>,
362}
363
364#[derive(Deserialize, Default)]
365struct Journal {
366 #[serde(default)]
367 title: String,
368 #[serde(default)]
369 issn: String,
370 #[serde(default)]
371 volume: String,
372 #[serde(default)]
373 issue: String,
374 #[serde(default)]
375 pages: String,
376}
377
378#[derive(Deserialize, Default)]
379struct ContentFile {
380 #[serde(default)]
381 bucket: String,
382 #[serde(default)]
383 key: String,
384 #[serde(default)]
385 checksum: String,
386 links: Option<FileLinks>,
387 #[serde(default)]
388 size: i64,
389 #[serde(rename = "type", default)]
390 type_: String,
391}
392
393#[derive(Deserialize, Default)]
394struct FileLinks {
395 #[serde(rename = "self", default)]
396 self_: String,
397}
398
399fn invenio_to_cm_type(id: &str) -> &'static str {
402 C::inveniordm_to_cm(id)
403}
404
405fn is_valid_relation_type(t: &str) -> bool {
406 C::COMMONMETA_RELATION_TYPES.contains(&t)
407}
408
409fn invenio_to_cm_relation(id: &str) -> &'static str {
411 match id {
412 "iscitedby" => "IsCitedBy",
413 "cites" => "Cites",
414 "issupplementto" => "IsSupplementTo",
415 "issupplementedby" => "IsSupplementedBy",
416 "iscontinuedby" => "IsContinuedBy",
417 "continues" => "Continues",
418 "isnewversionof" => "IsNewVersionOf",
419 "ispreviousversion" | "ispreviousversionof" => "IsPreviousVersionOf",
420 "ispartof" => "IsPartOf",
421 "haspart" => "HasPart",
422 "isreferencedby" => "IsReferencedBy",
423 "references" => "References",
424 "isdocumentedby" => "IsDocumentedBy",
425 "documents" => "Documents",
426 "iscompiledby" => "IsCompiledBy",
427 "compiles" => "Compiles",
428 "isvariantformof" => "IsVariantFormOf",
429 "isoriginalformof" => "IsOriginalFormOf",
430 "isidenticalto" => "IsIdenticalTo",
431 "istranslationof" => "IsTranslationOf",
432 "isreviewedby" => "HasReview",
433 "reviews" => "IsReviewOf",
434 "ispreprintof" => "IsPreprintOf",
435 "haspreprint" => "HasPreprint",
436 "isderivedfrom" => "IsDerivedFrom",
437 "issourceof" => "IsSourceOf",
438 "describes" => "Describes",
439 "isdescribedby" => "IsDescribedBy",
440 "ismetadatafor" => "IsMetadataFor",
441 "hasmetadata" => "HasMetadata",
442 "isannotatedby" => "IsAnnotatedBy",
443 "annotates" => "Annotates",
444 "iscorrectedby" => "IsCorrectedBy",
445 "corrects" => "Corrects",
446 _ => "",
447 }
448}
449
450fn is_reference_relation(id: &str) -> bool {
451 matches!(id, "cites" | "references")
452}
453
454fn is_rogue_scholar_doi(doi: &str) -> bool {
456 const PREFIXES: &[&str] = &[
457 "10.13003", "10.53731", "10.54900", "10.59347", "10.59348", "10.59349", "10.59350",
458 "10.63485", "10.63517", "10.64000", "10.64395", "10.65527",
459 ];
460 PREFIXES.iter().any(|p| doi.contains(p))
461}
462
463fn get_contributor(v: &Creator, default_role: &str) -> Contributor {
466 if !v.name.is_empty()
468 && v.person_or_org.name.is_empty()
469 && v.person_or_org.family_name.is_empty()
470 {
471 return get_zenodo_contributor(v, default_role);
472 }
473
474 let raw_type = match v.person_or_org.type_.as_str() {
475 "personal" => "Person",
476 "organizational" => "Organization",
477 _ => "",
478 }
479 .to_string();
480
481 let mut id = String::new();
482 for ni in &v.person_or_org.identifiers {
483 match ni.scheme.as_str() {
484 "orcid" => {
485 id = normalize_orcid(&ni.identifier);
486 break;
487 }
488 "ror" | "ROR" => {
489 id = normalize_ror(&ni.identifier);
490 break;
491 }
492 _ => {}
493 }
494 }
495
496 let name = cleanup_author(Some(&v.person_or_org.name)).unwrap_or(v.person_or_org.name.clone());
497 let mut given_name = v.person_or_org.given_name.clone();
498 let mut family_name = v.person_or_org.family_name.clone();
499
500 let mut type_ = infer_contributor_type(
501 &raw_type,
502 &id,
503 &given_name,
504 &family_name,
505 &name,
506 None,
507 );
508
509 if type_.is_empty() {
510 type_ = "Organization".to_string();
511 }
512
513 let mut name_out = name.clone();
515 if type_ == "Person" && !name_out.is_empty() && given_name.is_empty() && family_name.is_empty() {
516 let (given, family, remainder) = split_person_name(&name_out);
517 if !given.is_empty() || !family.is_empty() {
518 given_name = given;
519 family_name = family;
520 name_out = String::new();
521 } else {
522 name_out = remainder;
523 }
524 }
525
526 let affiliations = v
527 .affiliations
528 .iter()
529 .filter_map(|a| {
530 let value = serde_json::json!({"id": a.id, "name": a.name});
531 parse_affiliation_value(&value)
532 })
533 .collect();
534
535 let roles = normalize_contributor_roles(&[default_role.to_string()], default_role);
536
537 if type_ == "Person" {
538 Contributor::person(
539 Person { id, given_name, family_name, affiliations, asserted_by: String::new() },
540 roles,
541 )
542 } else {
543 Contributor::organization(
544 Organization { id, name: name_out, asserted_by: String::new() },
545 roles,
546 )
547 }
548}
549
550fn get_zenodo_contributor(v: &Creator, default_role: &str) -> Contributor {
551 let mut id = String::new();
552
553 if !v.orcid.is_empty() {
554 id = normalize_orcid(&v.orcid);
555 }
556
557 let cleaned_name = cleanup_author(Some(&v.name)).unwrap_or(v.name.clone());
558 let (given_name, family_name, name) = split_person_name(&cleaned_name);
559
560 let mut type_ = infer_contributor_type("", &id, &given_name, &family_name, &cleaned_name, None);
561 if type_.is_empty() {
562 type_ = "Organization".to_string();
563 }
564
565 let mut family_name_out = family_name;
566 let mut name_out = name;
567 if type_ == "Person" && family_name_out.is_empty() && !name_out.is_empty() {
568 family_name_out = name_out.clone();
569 name_out = String::new();
570 }
571
572 let affiliations = if !v.affiliation.is_empty() {
573 parse_affiliation_value(&Value::String(v.affiliation.clone()))
574 .into_iter()
575 .collect()
576 } else {
577 vec![]
578 };
579
580 let roles = normalize_contributor_roles(&[default_role.to_string()], default_role);
581
582 if type_ == "Person" {
583 Contributor::person(
584 Person { id, given_name, family_name: family_name_out, affiliations, asserted_by: String::new() },
585 roles,
586 )
587 } else {
588 Contributor::organization(
589 Organization { id, name: name_out, asserted_by: String::new() },
590 roles,
591 )
592 }
593}
594
595fn normalize_reference_id(scheme: &str, identifier: &str) -> String {
598 if identifier.is_empty() {
599 return String::new();
600 }
601 match scheme {
602 "doi" => normalize_doi(identifier),
603 "url" => normalize_url(identifier, true, false).unwrap_or_default(),
604 _ => normalize_id(identifier),
605 }
606}
607
608fn normalize_relation_id(scheme: &str, identifier: &str) -> String {
611 if identifier.is_empty() {
612 return String::new();
613 }
614 match scheme {
615 "doi" => normalize_doi(identifier),
616 _ => normalize_url(identifier, true, false).unwrap_or_else(|| normalize_id(identifier)),
617 }
618}
619
620fn parse_pages_range(pages: &str) -> (String, String) {
621 let trimmed = pages.trim();
622 if trimmed.is_empty() {
623 return (String::new(), String::new());
624 }
625
626 for sep in ["--", "-", "–", "—"] {
627 if let Some(idx) = trimmed.find(sep) {
628 let first = trimmed[..idx].trim().to_string();
629 let last = trimmed[idx + sep.len()..].trim().to_string();
630 return (first, last);
631 }
632 }
633
634 (trimmed.to_string(), String::new())
635}
636
637fn map_relation_type(raw: &str) -> String {
640 let mapped = invenio_to_cm_relation(raw);
641 if !mapped.is_empty() {
642 return mapped.to_string();
643 }
644 if raw.is_empty() {
646 return String::new();
647 }
648 let mut chars = raw.chars();
649 match chars.next() {
650 None => String::new(),
651 Some(c) => c.to_uppercase().to_string() + chars.as_str(),
652 }
653}
654
655fn from_content(content: Content) -> Data {
658 let mut data = Data {
659 id: if !content.doi.is_empty() {
661 normalize_doi(&content.doi)
662 } else {
663 normalize_doi(&content.pids.doi.identifier)
664 },
665 ..Data::default()
666 };
667
668 let rt = &content.metadata.resource_type;
670 let type_id = if !rt.type_.is_empty() {
671 &rt.type_
672 } else if !rt.id.is_empty() {
673 &rt.id
674 } else {
675 &rt.subtype
676 };
677 let cm_type = invenio_to_cm_type(type_id);
678 data.type_ = if cm_type.is_empty() {
679 "Other".to_string()
680 } else {
681 cm_type.to_string()
682 };
683
684 let self_html = content
686 .links
687 .as_ref()
688 .map(|l| l.self_html.as_str())
689 .unwrap_or("");
690 let host = url::Url::parse(self_html)
691 .ok()
692 .and_then(|u| u.host_str().map(|s| s.to_string()))
693 .unwrap_or_default();
694 let is_zenodo = host == "zenodo.org";
695 let is_rogue_scholar = is_rogue_scholar_doi(&data.id);
696
697 if is_rogue_scholar {
701 if let Some(url_id) = content
702 .metadata
703 .identifiers
704 .iter()
705 .find(|i| i.scheme == "url")
706 {
707 data.url = normalize_url(&url_id.identifier, true, false).unwrap_or_default();
708 }
709 } else if !self_html.is_empty() {
710 data.url = normalize_url(self_html, true, false).unwrap_or_default();
711 }
712
713 if is_zenodo {
715 let container_type = if data.type_ == "Dataset" {
716 "DataRepository"
717 } else {
718 "Repository"
719 };
720 data.container = Container {
721 identifier: "https://www.re3data.org/repository/r3d100010468".to_string(),
722 identifier_type: "URL".to_string(),
723 type_: container_type.to_string(),
724 title: "Zenodo".to_string(),
725 ..Default::default()
726 };
727 data.publisher = Publisher {
728 name: "Zenodo".to_string(),
729 ..Default::default()
730 };
731 } else if is_rogue_scholar {
732 let slug = content
733 .parent
734 .communities
735 .entries
736 .first()
737 .map(|e| e.slug.as_str())
738 .unwrap_or("");
739 let issn = &content.custom_fields.journal.issn;
740 let (identifier, identifier_type) = if !issn.is_empty() {
741 (issn.clone(), "ISSN".to_string())
742 } else if !slug.is_empty() {
743 (
744 format!("https://rogue-scholar.org/communities/{}", slug),
745 "URL".to_string(),
746 )
747 } else {
748 (String::new(), String::new())
749 };
750 let (first_page, last_page) = parse_pages_range(&content.custom_fields.journal.pages);
751 data.container = Container {
752 type_: "Blog".to_string(),
753 title: content.custom_fields.journal.title.clone(),
754 identifier,
755 identifier_type,
756 platform: content.custom_fields.generator.clone(),
757 volume: content.custom_fields.journal.volume.clone(),
758 issue: content.custom_fields.journal.issue.clone(),
759 first_page,
760 last_page,
761 ..Default::default()
762 };
763 data.publisher = Publisher {
764 name: "Front Matter".to_string(),
765 ..Default::default()
766 };
767 } else if !content.custom_fields.journal.title.is_empty()
768 || !content.custom_fields.journal.issn.is_empty()
769 {
770 let issn = &content.custom_fields.journal.issn;
771 let (identifier, identifier_type) = if !issn.is_empty() {
772 (issn.clone(), "ISSN".to_string())
773 } else {
774 (String::new(), String::new())
775 };
776 let (first_page, last_page) = parse_pages_range(&content.custom_fields.journal.pages);
777 data.container = Container {
778 type_: "Periodical".to_string(),
779 title: content.custom_fields.journal.title.clone(),
780 identifier,
781 identifier_type,
782 platform: content.custom_fields.generator.clone(),
783 volume: content.custom_fields.journal.volume.clone(),
784 issue: content.custom_fields.journal.issue.clone(),
785 first_page,
786 last_page,
787 ..Default::default()
788 };
789 }
790
791 if data.publisher.name.is_empty() && !content.metadata.publisher.is_empty() {
793 data.publisher = Publisher {
794 name: content.metadata.publisher.clone(),
795 ..Default::default()
796 };
797 }
798
799 if data.type_ == "Article" && data.publisher.name == "Front Matter" {
801 data.type_ = "BlogPost".to_string();
802 }
803
804 for v in &content.metadata.creators {
806 let contributor = get_contributor(v, "Author");
807 let already = data
808 .contributors
809 .iter()
810 .any(|e| !e.id().is_empty() && e.id() == contributor.id());
811 if !already {
812 data.contributors.push(contributor);
813 }
814 }
815 for v in &content.metadata.contributors {
817 let role = v
818 .role
819 .as_ref()
820 .map(|r| {
821 let mut s = r.id.clone();
822 if let Some(first) = s.get_mut(..1) {
823 first.make_ascii_uppercase();
824 }
825 s
826 })
827 .filter(|s| !s.is_empty())
828 .unwrap_or_else(|| "Other".to_string());
829 let contributor = get_contributor(v, &role);
830 let already = data
831 .contributors
832 .iter()
833 .any(|e| !e.id().is_empty() && e.id() == contributor.id());
834 if !already {
835 data.contributors.push(contributor);
836 }
837 }
838
839 for d in &content.metadata.dates {
841 let t = date_type_str(&d.type_);
842 match t.as_str() {
843 "issued" => data.date_published = d.date.clone(),
844 "updated" => data.date_updated = d.date.clone(),
845 _ => {}
846 }
847 }
848 if data.date_published.is_empty() && !content.metadata.publication_date.is_empty() {
849 data.date_published = content.metadata.publication_date.clone();
850 }
851 if data.date_updated.is_empty() && !content.updated.is_empty() {
853 data.date_updated = strip_milliseconds(&content.updated);
854 }
855
856 if !content.metadata.description.is_empty() {
858 data.description = sanitize(&content.metadata.description);
859 }
860 if !content.metadata.notes.is_empty() {
861 data.additional_descriptions.push(Description {
862 description: sanitize(&content.metadata.notes),
863 type_: "Other".to_string(),
864 ..Default::default()
865 });
866 }
867
868 if !content.custom_fields.feature_image.is_empty() {
870 data.image = content.custom_fields.feature_image.clone();
871 }
872
873 if let Some(files_val) = &content.files
875 && let Ok(files_enabled) = serde_json::from_value::<FilesEnabled>(files_val.clone())
876 && files_enabled.enabled
877 && let Ok(entries) = serde_json::from_value::<FilesWithEntries>(files_val.clone())
878 {
879 for f in entries.entries.values() {
880 if let Ok(cf) = serde_json::from_value::<ContentFile>(f.clone()) {
881 let url = cf
882 .links
883 .as_ref()
884 .map(|l| l.self_.clone())
885 .unwrap_or_default();
886 if !url.is_empty() {
887 let mime_type = if !cf.type_.is_empty() {
888 format!("application/{}", cf.type_)
889 } else {
890 String::new()
891 };
892 data.files.push(File {
893 bucket: cf.bucket,
894 key: cf.key,
895 checksum: cf.checksum,
896 url,
897 size: cf.size,
898 mime_type,
899 });
900 }
901 }
902 }
903 }
904
905 if !content.metadata.funding.is_empty() {
907 for v in &content.metadata.funding {
908 let funder_id = normalize_ror(&v.funder.id);
909 let award_number = v.award.number.clone();
910 let award_title = v
911 .award
912 .title
913 .as_ref()
914 .map(|t| t.en.clone())
915 .unwrap_or_default();
916 let raw_award_uri = v
918 .award
919 .identifiers
920 .first()
921 .map(|i| i.identifier.as_str())
922 .unwrap_or("");
923 let award_uri = if !raw_award_uri.is_empty() {
924 let doi = normalize_doi(raw_award_uri);
925 if !doi.is_empty() {
926 doi
927 } else {
928 normalize_url(raw_award_uri, true, false).unwrap_or_default()
929 }
930 } else {
931 String::new()
932 };
933 data.funding_references.push(FundingReference {
934 funder_id,
935 funder_name: v.funder.name.clone(),
936 award_number,
937 award_title,
938 award_id: award_uri,
939 ..Default::default()
940 });
941 }
942 } else if !content.metadata.grants.is_empty() {
943 for v in &content.metadata.grants {
944 let funder_id = normalize_doi(&v.funder.doi);
945 let award_uri = normalize_url(&v.url, true, false).unwrap_or_default();
946 data.funding_references.push(FundingReference {
947 funder_id,
948 funder_name: v.funder.name.clone(),
949 award_number: v.code.clone(),
950 award_title: v.title.clone(),
951 award_id: award_uri,
952 ..Default::default()
953 });
954 }
955 }
956
957 if !data.id.is_empty() {
959 data.identifiers.push(Identifier {
960 identifier: data.id.clone(),
961 identifier_type: "DOI".to_string(),
962 });
963 }
964 for v in &content.metadata.identifiers {
965 if v.scheme == "url" {
966 continue;
968 }
969 let identifier_type = match v.scheme.as_str() {
970 "doi" => "DOI",
971 "uuid" => "UUID",
972 "guid" => "GUID",
973 _ => continue,
974 };
975 data.identifiers.push(Identifier {
976 identifier: v.identifier.clone(),
977 identifier_type: identifier_type.to_string(),
978 });
979 }
980 if let Some(id_val) = &content.id
982 && let Some(s) = id_val.as_str()
983 && !s.is_empty()
984 {
985 data.identifiers.push(Identifier {
986 identifier: s.to_string(),
987 identifier_type: "RID".to_string(),
988 });
989 }
990
991 if !content.metadata.language.is_empty() {
993 data.language = get_language(&content.metadata.language, "iso639-1");
994 } else if !content.metadata.languages.is_empty() {
995 data.language = get_language(&content.metadata.languages[0].id, "iso639-1");
996 }
997
998 if !content.metadata.rights.is_empty() {
1000 data.license = crate::spdx::from_id(&content.metadata.rights[0].id);
1001 } else if let Some(lic) = &content.metadata.license
1002 && !lic.id.is_empty()
1003 {
1004 data.license = crate::spdx::from_id(&lic.id);
1005 }
1006
1007 data.provider = if is_rogue_scholar {
1009 "Crossref".to_string()
1010 } else {
1011 "DataCite".to_string()
1012 };
1013
1014 for v in &content.metadata.subjects {
1016 if v.id.contains("openalex.org") {
1017 let id_part = v.id.rsplit('/').next().unwrap_or("");
1018 if let Some((id, subject)) = crate::vocabularies::lookup_openalex_subject(id_part) {
1019 let subj = Subject { id, subject, ..Default::default() };
1020 if !data.subjects.contains(&subj) {
1021 data.subjects.push(subj);
1022 }
1023 }
1024 } else {
1025 let s = subject_string(v);
1026 if s.is_empty() {
1027 continue;
1028 }
1029 let subj = Subject { subject: s, ..Default::default() };
1030 if !data.subjects.contains(&subj) {
1031 data.subjects.push(subj);
1032 }
1033 }
1034 }
1035 for kw in &content.metadata.keywords {
1036 let s = match kw {
1037 Value::String(s) => s.clone(),
1038 _ => continue,
1039 };
1040 if s.is_empty() {
1041 continue;
1042 }
1043 let subj = Subject { subject: s, ..Default::default() };
1044 if !data.subjects.contains(&subj) {
1045 data.subjects.push(subj);
1046 }
1047 }
1048
1049 if !content.metadata.references.is_empty() {
1051 for v in &content.metadata.references {
1052 let id = normalize_reference_id(&v.scheme, &v.identifier);
1053 data.references.push(Reference {
1054 id,
1055 unstructured: v.reference.clone(),
1056 ..Default::default()
1057 });
1058 }
1059 } else {
1060 for v in &content.metadata.related_identifiers {
1061 let relation_id = relation_type_id(v);
1062 if is_reference_relation(&relation_id) {
1063 let id = normalize_relation_id(&v.scheme, &v.identifier);
1064 if !id.is_empty() {
1065 data.references.push(Reference {
1066 id,
1067 ..Default::default()
1068 });
1069 }
1070 }
1071 }
1072 }
1073
1074 for v in &content.custom_fields.citations {
1076 let id = normalize_reference_id(&v.scheme, &v.identifier);
1077 data.citations.push(Citation {
1078 id,
1079 citation: v.reference.clone(),
1080 ..Default::default()
1081 });
1082 }
1083
1084 for v in &content.metadata.related_identifiers {
1086 let relation_id = relation_type_id(v);
1087 if is_reference_relation(&relation_id) {
1088 continue;
1089 }
1090 let id = normalize_relation_id(&v.scheme, &v.identifier);
1091 if id.is_empty() {
1092 continue;
1093 }
1094 let type_ = map_relation_type(&relation_id);
1095 if !type_.is_empty() && is_valid_relation_type(&type_) {
1096 let rel = Relation { id, type_, ..Default::default() };
1097 if !data.relations.contains(&rel) {
1098 data.relations.push(rel);
1099 }
1100 }
1101 }
1102
1103 if !content.conceptdoi.is_empty() {
1105 let id = normalize_doi(&content.conceptdoi);
1106 if !id.is_empty() {
1107 data.relations.push(Relation {
1108 id,
1109 type_: "IsVersionOf".to_string(),
1110 ..Default::default()
1111 });
1112 }
1113 } else if data.id.contains("10.59350") && !content.parent.communities.default.is_empty() {
1114 let parent_id = &content.parent.communities.default;
1115 let id = normalize_doi(&format!("10.59350/{}", parent_id));
1116 if !id.is_empty() {
1117 data.relations.push(Relation {
1118 id,
1119 type_: "IsVersionOf".to_string(),
1120 ..Default::default()
1121 });
1122 }
1123 }
1124
1125 if is_rogue_scholar && !content.custom_fields.journal.issn.is_empty() {
1127 let issn_url = issn_as_url(&content.custom_fields.journal.issn);
1128 let rel = Relation {
1129 id: issn_url,
1130 type_: "IsPartOf".to_string(),
1131 ..Default::default()
1132 };
1133 if !data.relations.contains(&rel) {
1134 data.relations.push(rel);
1135 }
1136 }
1137
1138 if !content.metadata.title.is_empty() {
1140 data.title = sanitize(&content.metadata.title);
1141 }
1142
1143 data.version = content.metadata.version.clone();
1145
1146 if !content.custom_fields.content_html.is_empty() {
1148 data.content = content.custom_fields.content_html.clone();
1149 }
1150
1151 data
1152}
1153
1154fn date_type_str(type_: &Option<Value>) -> String {
1157 match type_ {
1158 Some(Value::Object(m)) => m
1159 .get("id")
1160 .and_then(|v| v.as_str())
1161 .unwrap_or("")
1162 .to_string(),
1163 Some(Value::String(s)) => s.clone(),
1164 _ => String::new(),
1165 }
1166}
1167
1168fn strip_milliseconds(ts: &str) -> String {
1169 if let Some(dot) = ts.find('.') {
1171 let rest = &ts[dot + 1..];
1172 let end = rest
1173 .find(|c: char| !c.is_ascii_digit())
1174 .map(|i| i + dot + 1)
1175 .unwrap_or(ts.len());
1176 return format!("{}{}", &ts[..dot], &ts[end..]);
1177 }
1178 ts.to_string()
1179}
1180
1181fn relation_type_id(v: &RelatedIdentifier) -> String {
1182 if !v.relation_type.id.is_empty() {
1183 v.relation_type.id.clone()
1184 } else {
1185 v.relation.to_lowercase()
1186 }
1187}
1188
1189fn subject_string(v: &Subject_) -> String {
1190 if v.subject.is_empty() {
1191 return String::new();
1192 }
1193 match v.scheme.as_str() {
1194 "FOS" => format!("FOS: {}", v.subject),
1195 "Domains" => format!("Domain: {}", v.subject),
1196 "Fields" => format!("Field: {}", v.subject),
1197 "Subfields" => format!("Subfield: {}", v.subject),
1198 "Topics" => format!("Topic: {}", v.subject),
1199 _ => v.subject.clone(),
1200 }
1201}
1202
1203#[derive(Deserialize, Default)]
1205struct FilesEnabled {
1206 #[serde(default)]
1207 enabled: bool,
1208}
1209
1210#[derive(Deserialize, Default)]
1211struct FilesWithEntries {
1212 #[serde(default)]
1213 entries: std::collections::HashMap<String, Value>,
1214}
1215
1216#[derive(Serialize, Default)]
1221struct OutInveniordm {
1222 pids: OutPids,
1223 access: OutAccess,
1224 files: OutFiles,
1225 metadata: OutMetadata,
1226 #[serde(
1227 rename = "custom_fields",
1228 skip_serializing_if = "OutCustomFields::is_empty"
1229 )]
1230 custom_fields: OutCustomFields,
1231}
1232
1233#[derive(Serialize, Default)]
1234struct OutPids {
1235 #[serde(rename = "doi")]
1236 doi: OutDoi,
1237}
1238
1239#[derive(Serialize, Default)]
1240struct OutDoi {
1241 identifier: String,
1242 provider: String,
1243}
1244
1245#[derive(Serialize, Default)]
1246struct OutAccess {
1247 record: String,
1248 files: String,
1249}
1250
1251#[derive(Serialize, Default)]
1252struct OutFiles {
1253 enabled: bool,
1254}
1255
1256#[derive(Serialize, Default)]
1257struct OutMetadata {
1258 resource_type: OutResourceType,
1259 creators: Vec<OutCreator>,
1260 #[serde(skip_serializing_if = "Vec::is_empty")]
1261 contributors: Vec<OutContributor>,
1262 title: String,
1263 publication_date: String,
1264 #[serde(skip_serializing_if = "String::is_empty")]
1265 publisher: String,
1266 #[serde(skip_serializing_if = "Vec::is_empty")]
1267 identifiers: Vec<OutIdentifier>,
1268 #[serde(skip_serializing_if = "Vec::is_empty")]
1269 dates: Vec<OutDate>,
1270 #[serde(skip_serializing_if = "String::is_empty")]
1271 description: String,
1272 #[serde(skip_serializing_if = "Vec::is_empty")]
1273 funding: Vec<OutFunding>,
1274 #[serde(skip_serializing_if = "Vec::is_empty")]
1275 languages: Vec<OutLanguage>,
1276 #[serde(skip_serializing_if = "Vec::is_empty")]
1277 subjects: Vec<OutSubject>,
1278 #[serde(skip_serializing_if = "Vec::is_empty")]
1279 rights: Vec<OutRight>,
1280 #[serde(skip_serializing_if = "Vec::is_empty")]
1281 references: Vec<OutReference>,
1282 #[serde(rename = "related_identifiers", skip_serializing_if = "Vec::is_empty")]
1283 related_identifiers: Vec<OutRelatedIdentifier>,
1284 #[serde(skip_serializing_if = "String::is_empty")]
1285 version: String,
1286}
1287
1288#[derive(Serialize, Default)]
1289struct OutResourceType {
1290 id: String,
1291}
1292
1293#[derive(Serialize, Default)]
1294struct OutCreator {
1295 person_or_org: OutPersonOrOrg,
1296 #[serde(skip_serializing_if = "Vec::is_empty")]
1297 affiliations: Vec<OutAffiliation>,
1298}
1299
1300#[derive(Serialize, Default)]
1301struct OutPersonOrOrg {
1302 #[serde(rename = "type")]
1303 type_: String,
1304 #[serde(skip_serializing_if = "String::is_empty")]
1305 name: String,
1306 #[serde(rename = "given_name", skip_serializing_if = "String::is_empty")]
1307 given_name: String,
1308 #[serde(rename = "family_name", skip_serializing_if = "String::is_empty")]
1309 family_name: String,
1310 #[serde(skip_serializing_if = "Vec::is_empty")]
1311 identifiers: Vec<OutIdentifier>,
1312}
1313
1314#[derive(Serialize, Default)]
1315struct OutContributor {
1316 person_or_org: OutPersonOrOrg,
1317 role: OutTypeId,
1318 #[serde(skip_serializing_if = "Vec::is_empty")]
1319 affiliations: Vec<OutAffiliation>,
1320}
1321
1322#[derive(Serialize, Default)]
1323struct OutAffiliation {
1324 #[serde(skip_serializing_if = "String::is_empty")]
1325 id: String,
1326 #[serde(skip_serializing_if = "String::is_empty")]
1327 name: String,
1328}
1329
1330#[derive(Serialize, Default)]
1331struct OutIdentifier {
1332 identifier: String,
1333 scheme: String,
1334}
1335
1336#[derive(Serialize, Default)]
1337struct OutDate {
1338 date: String,
1339 #[serde(rename = "type")]
1340 type_: OutTypeId,
1341}
1342
1343#[derive(Serialize, Default)]
1344struct OutTypeId {
1345 id: String,
1346}
1347
1348#[derive(Serialize, Default)]
1349struct OutFunding {
1350 funder: OutFunder,
1351 #[serde(skip_serializing_if = "OutAward::is_empty")]
1352 award: OutAward,
1353}
1354
1355#[derive(Serialize, Default)]
1356struct OutFunder {
1357 #[serde(skip_serializing_if = "String::is_empty")]
1358 id: String,
1359 name: String,
1360}
1361
1362#[derive(Serialize, Default)]
1363struct OutAward {
1364 #[serde(skip_serializing_if = "String::is_empty")]
1365 number: String,
1366 #[serde(skip_serializing_if = "OutAwardTitle::is_empty")]
1367 title: OutAwardTitle,
1368 #[serde(skip_serializing_if = "Vec::is_empty")]
1369 identifiers: Vec<OutIdentifier>,
1370}
1371
1372impl OutAward {
1373 fn is_empty(&self) -> bool {
1374 self.number.is_empty() && self.title.is_empty() && self.identifiers.is_empty()
1375 }
1376}
1377
1378#[derive(Serialize, Default)]
1379struct OutAwardTitle {
1380 #[serde(skip_serializing_if = "String::is_empty")]
1381 en: String,
1382}
1383
1384impl OutAwardTitle {
1385 fn is_empty(&self) -> bool {
1386 self.en.is_empty()
1387 }
1388}
1389
1390#[derive(Serialize, Default)]
1391struct OutLanguage {
1392 id: String,
1393}
1394
1395#[derive(Serialize, Default)]
1396struct OutSubject {
1397 subject: String,
1398 #[serde(skip_serializing_if = "String::is_empty")]
1399 id: String,
1400 #[serde(skip_serializing_if = "String::is_empty")]
1401 scheme: String,
1402}
1403
1404#[derive(Serialize, Default)]
1405struct OutRight {
1406 id: String,
1407}
1408
1409#[derive(Serialize, Default)]
1410struct OutReference {
1411 reference: String,
1412 #[serde(skip_serializing_if = "String::is_empty")]
1413 scheme: String,
1414 #[serde(skip_serializing_if = "String::is_empty")]
1415 identifier: String,
1416}
1417
1418#[derive(Serialize, Default)]
1419struct OutRelatedIdentifier {
1420 identifier: String,
1421 scheme: String,
1422 relation_type: OutTypeId,
1423}
1424
1425#[derive(Serialize, Default)]
1426struct OutCustomFields {
1427 #[serde(
1428 rename = "journal:journal",
1429 skip_serializing_if = "OutJournal::is_empty"
1430 )]
1431 journal: OutJournal,
1432 #[serde(rename = "rs:content_html", skip_serializing_if = "String::is_empty")]
1433 content_html: String,
1434 #[serde(rename = "rs:image", skip_serializing_if = "String::is_empty")]
1435 feature_image: String,
1436 #[serde(rename = "feed:generator", skip_serializing_if = "String::is_empty")]
1437 generator: String,
1438}
1439
1440impl OutCustomFields {
1441 fn is_empty(&self) -> bool {
1442 self.journal.is_empty()
1443 && self.content_html.is_empty()
1444 && self.feature_image.is_empty()
1445 && self.generator.is_empty()
1446 }
1447}
1448
1449#[derive(Serialize, Default)]
1450struct OutJournal {
1451 #[serde(skip_serializing_if = "String::is_empty")]
1452 title: String,
1453 #[serde(skip_serializing_if = "String::is_empty")]
1454 issn: String,
1455 #[serde(skip_serializing_if = "String::is_empty")]
1456 volume: String,
1457 #[serde(skip_serializing_if = "String::is_empty")]
1458 issue: String,
1459 #[serde(skip_serializing_if = "String::is_empty")]
1460 pages: String,
1461}
1462
1463impl OutJournal {
1464 fn is_empty(&self) -> bool {
1465 self.title.is_empty()
1466 && self.issn.is_empty()
1467 && self.volume.is_empty()
1468 && self.issue.is_empty()
1469 && self.pages.is_empty()
1470 }
1471}
1472
1473fn cm_to_invenio_type(cm: &str) -> &'static str {
1476 C::cm_to_inveniordm(cm)
1477}
1478
1479fn cm_to_invenio_identifier(cm: &str) -> &'static str {
1480 match cm {
1481 "Ark" => "ark",
1482 "arXiv" => "arxiv",
1483 "Bibcode" => "ads",
1484 "CrossrefFunderID" => "crossreffunderid",
1485 "DOI" => "doi",
1486 "EAN13" => "ean13",
1487 "EISSN" => "eissn",
1488 "GRID" => "grid",
1489 "Handle" => "handle",
1490 "IGSN" => "igsn",
1491 "ISBN" => "isbn",
1492 "ISNI" => "isni",
1493 "ISSN" => "issn",
1494 "ISTC" => "istc",
1495 "LISSN" => "lissn",
1496 "LSID" => "lsid",
1497 "PMID" => "pmid",
1498 "PURL" => "purl",
1499 "UPC" => "upc",
1500 "URL" => "url",
1501 "URN" => "urn",
1502 "W3ID" => "w3id",
1503 "GUID" => "guid",
1504 "UUID" => "uuid",
1505 "Other" => "other",
1506 _ => "",
1507 }
1508}
1509
1510fn cm_to_invenio_contributor_role(cm: &str) -> &'static str {
1511 let r = C::cm_to_inveniordm_role(cm);
1512 if r == "other" { "" } else { r }
1513}
1514
1515fn cm_to_invenio_relation(cm: &str) -> &'static str {
1516 match cm {
1517 "IsCitedBy" => "iscitedby",
1518 "Cites" => "cites",
1519 "IsSupplementTo" => "issupplementto",
1520 "IsSupplementedBy" => "issupplementedby",
1521 "IsContinuedBy" => "iscontinuedby",
1522 "Continues" => "continues",
1523 "IsNewVersionOf" => "isnewversionof",
1524 "IsPreviousVersion" | "IsPreviousVersionOf" => "ispreviousversion",
1525 "IsPartOf" => "ispartof",
1526 "HasPart" => "haspart",
1527 "IsReferencedBy" => "isreferencedby",
1528 "References" => "references",
1529 "IsDocumentedBy" => "isdocumentedby",
1530 "Documents" => "documents",
1531 "IsCompiledBy" => "iscompiledby",
1532 "Compiles" => "compiles",
1533 "IsVariantFormOf" => "isvariantformof",
1534 "IsOriginalFormOf" => "isoriginalformof",
1535 "IsIdenticalTo" => "isidenticalto",
1536 "IsReviewOf" => "reviews",
1537 "HasReview" => "isreviewedby",
1538 "IsDerivedFrom" => "isderivedfrom",
1539 "IsSourceOf" => "issourceof",
1540 "Describes" => "describes",
1541 "IsDescribedBy" => "isdescribedby",
1542 "IsMetadataFor" => "ismetadatafor",
1543 "HasMetadata" => "hasmetadata",
1544 "IsAnnotatedBy" => "isannotatedby",
1545 "Annotates" => "annotates",
1546 "IsCorrectedBy" => "iscorrectedby",
1547 "Corrects" => "corrects",
1548 "IsVersionOf" => "isversionof",
1549 "HasVersion" => "hasversion",
1550 "IsTranslationOf" => "istranslationof",
1551 "IsPreprintOf" => "ispreviousversionof",
1552 "HasPreprint" => "haspreprint",
1553 _ => "",
1554 }
1555}
1556
1557fn convert(data: &Data) -> OutInveniordm {
1560 use crate::doi_utils::validate_doi;
1561 use crate::utils::{get_language, validate_id, validate_orcid, validate_ror};
1562
1563 let mut out = OutInveniordm::default();
1564
1565 let doi = doi_from_identifiers(data)
1567 .or_else(|| validate_doi(&data.id))
1568 .unwrap_or_default();
1569 let provider = if is_rogue_scholar_doi(&data.id) {
1570 "crossref"
1571 } else {
1572 "external"
1573 };
1574 out.pids.doi = OutDoi {
1575 identifier: doi,
1576 provider: provider.to_string(),
1577 };
1578
1579 out.access = OutAccess {
1581 record: "public".to_string(),
1582 files: "public".to_string(),
1583 };
1584 out.files = OutFiles { enabled: false };
1585
1586 out.metadata.resource_type = OutResourceType {
1588 id: cm_to_invenio_type(&data.type_).to_string(),
1589 };
1590
1591 out.metadata.title = if !data.title.is_empty() {
1593 data.title.clone()
1594 } else {
1595 "No title".to_string()
1596 };
1597
1598 out.metadata.publication_date = if !data.date_published.is_empty() {
1600 parse_date(&data.date_published)
1601 } else if !data.dates.available.is_empty() {
1602 parse_date(&data.dates.available)
1603 } else if !data.dates.created.is_empty() {
1604 parse_date(&data.dates.created)
1605 } else {
1606 String::new()
1607 };
1608
1609 if data
1611 .contributors
1612 .iter()
1613 .any(|c| c.roles.contains(&"Author".to_string()))
1614 {
1615 for v in &data.contributors {
1616 if !v.roles.contains(&"Author".to_string()) {
1617 continue;
1618 }
1619 let mut identifiers = vec![];
1620 if !v.id().is_empty()
1621 && let Some(orcid) = validate_orcid(v.id())
1622 {
1623 identifiers.push(OutIdentifier {
1624 identifier: orcid,
1625 scheme: "orcid".to_string(),
1626 });
1627 }
1628
1629 let mut affiliations = vec![];
1630 for a in v.affiliations() {
1631 let aff_id = validate_ror(&a.id).unwrap_or_default();
1632 let aff = OutAffiliation {
1633 id: aff_id,
1634 name: a.name.clone(),
1635 };
1636 let duplicate = affiliations
1637 .iter()
1638 .any(|e: &OutAffiliation| !e.id.is_empty() && e.id == aff.id);
1639 if !duplicate {
1640 affiliations.push(aff);
1641 }
1642 }
1643
1644 let ptype = match v.type_.as_str() {
1646 "Person" => "personal",
1647 "Organization" => "organizational",
1648 _ => "organizational",
1649 };
1650
1651 out.metadata.creators.push(OutCreator {
1652 person_or_org: OutPersonOrOrg {
1653 type_: ptype.to_string(),
1654 name: if v.type_ == "Organization" { v.name() } else { String::new() },
1655 given_name: v.given_name().to_string(),
1656 family_name: v.family_name().to_string(),
1657 identifiers,
1658 },
1659 affiliations,
1660 });
1661 }
1662 } else {
1663 out.metadata.creators.push(OutCreator {
1665 person_or_org: OutPersonOrOrg {
1666 type_: "organizational".to_string(),
1667 name: "No author".to_string(),
1668 ..Default::default()
1669 },
1670 affiliations: vec![],
1671 });
1672 }
1673
1674 for v in &data.contributors {
1676 for role in &v.roles {
1677 if role == "Author" {
1678 continue;
1679 }
1680 let role_id = cm_to_invenio_contributor_role(role);
1681 if role_id.is_empty() {
1682 continue;
1683 }
1684
1685 let mut identifiers = vec![];
1686 if !v.id().is_empty()
1687 && let Some(orcid) = validate_orcid(v.id())
1688 {
1689 identifiers.push(OutIdentifier {
1690 identifier: orcid,
1691 scheme: "orcid".to_string(),
1692 });
1693 }
1694
1695 let mut affiliations = vec![];
1696 if v.type_ == "Person" {
1697 for a in v.affiliations() {
1698 let aff_id = validate_ror(&a.id).unwrap_or_default();
1699 affiliations.push(OutAffiliation {
1700 id: aff_id,
1701 name: a.name.clone(),
1702 });
1703 }
1704 }
1705
1706 let ptype = match v.type_.as_str() {
1707 "Person" => "personal",
1708 "Organization" => "organizational",
1709 _ => "organizational",
1710 };
1711
1712 out.metadata.contributors.push(OutContributor {
1713 person_or_org: OutPersonOrOrg {
1714 type_: ptype.to_string(),
1715 name: if v.type_ == "Organization" { v.name() } else { String::new() },
1716 given_name: v.given_name().to_string(),
1717 family_name: v.family_name().to_string(),
1718 identifiers,
1719 },
1720 role: OutTypeId {
1721 id: role_id.to_string(),
1722 },
1723 affiliations,
1724 });
1725 break; }
1727 }
1728
1729 out.metadata.publisher = data.publisher.name.clone();
1731
1732 let container_type = data.container.type_.as_str();
1735 if !data.container.title.is_empty()
1736 && matches!(container_type, "Journal" | "Periodical" | "Blog")
1737 {
1738 out.custom_fields.journal.title = data.container.title.clone();
1739 }
1740 if !data.container.platform.is_empty() {
1741 out.custom_fields.generator = data.container.platform.clone();
1742 }
1743 if !data.container.volume.is_empty() {
1744 out.custom_fields.journal.volume = data.container.volume.clone();
1745 }
1746 if !data.container.issue.is_empty() {
1747 out.custom_fields.journal.issue = data.container.issue.clone();
1748 }
1749 if !data.container.first_page.is_empty() {
1750 out.custom_fields.journal.pages = container_pages(&data.container);
1751 }
1752 if !data.container.identifier.is_empty() && data.container.identifier_type == "ISSN" {
1753 out.custom_fields.journal.issn = data.container.identifier.clone();
1754 }
1755
1756 out.custom_fields.content_html = data.content.clone();
1758 out.custom_fields.feature_image = data.image.clone();
1759
1760 for v in &data.identifiers {
1762 let scheme = cm_to_invenio_identifier(&v.identifier_type);
1763 if scheme.is_empty() {
1764 continue;
1765 }
1766 if v.identifier_type == "DOI"
1768 && normalize_id_for_doi(&v.identifier) == normalize_id_for_doi(&data.id)
1769 {
1770 continue;
1771 }
1772 out.metadata.identifiers.push(OutIdentifier {
1773 identifier: v.identifier.clone(),
1774 scheme: scheme.to_string(),
1775 });
1776 }
1777 if !data.url.is_empty() {
1779 out.metadata.identifiers.push(OutIdentifier {
1780 identifier: data.url.clone(),
1781 scheme: "url".to_string(),
1782 });
1783 }
1784
1785 let date_fields: &[(&str, &str)] = &[
1787 ("created", &data.dates.created),
1788 ("submitted", &data.dates.submitted),
1789 ("accepted", &data.dates.accepted),
1790 ("issued", &data.date_published), ("updated", &data.date_updated),
1792 ("other", &data.dates.accessed), ("available", &data.dates.available),
1794 ("copyrighted", &data.dates.copyrighted),
1795 ("collected", &data.dates.collected),
1796 ("valid", &data.dates.valid),
1797 ("withdrawn", &data.dates.withdrawn),
1798 ("other", &data.dates.other),
1799 ];
1800 for (id, date) in date_fields {
1801 if !date.is_empty() {
1802 out.metadata.dates.push(OutDate {
1803 date: date.to_string(),
1804 type_: OutTypeId { id: id.to_string() },
1805 });
1806 }
1807 }
1808
1809 if !data.description.is_empty() {
1811 out.metadata.description = data.description.clone();
1812 }
1813
1814 for v in &data.funding_references {
1816 let ror_id = if v.funder_id.starts_with("https://doi.org/10.13039/") {
1817 String::new()
1819 } else {
1820 let (validated_id, funder_id_type) = validate_id(&v.funder_id);
1821 if funder_id_type == "ROR" {
1822 validate_ror(&validated_id).unwrap_or_default()
1823 } else {
1824 String::new()
1825 }
1826 };
1827
1828 let funder = OutFunder {
1829 id: ror_id,
1830 name: v.funder_name.clone(),
1831 };
1832
1833 let award =
1834 if !v.award_number.is_empty() || !v.award_title.is_empty() || !v.award_id.is_empty() {
1835 let mut identifiers = vec![];
1836 if !v.award_id.is_empty() {
1837 let (award_id_val, award_id_type) = validate_id(&v.award_id);
1838 let scheme = cm_to_invenio_identifier(award_id_type);
1839 if !award_id_val.is_empty() && !scheme.is_empty() {
1840 identifiers.push(OutIdentifier {
1841 identifier: award_id_val,
1842 scheme: scheme.to_string(),
1843 });
1844 }
1845 }
1846 OutAward {
1847 number: v.award_number.clone(),
1848 title: OutAwardTitle {
1849 en: v.award_title.clone(),
1850 },
1851 identifiers,
1852 }
1853 } else {
1854 OutAward::default()
1855 };
1856
1857 out.metadata.funding.push(OutFunding { funder, award });
1858 }
1859
1860 if !data.language.is_empty() {
1862 let lang3 = get_language(&data.language, "iso639-3");
1863 if !lang3.is_empty() {
1864 out.metadata.languages.push(OutLanguage { id: lang3 });
1865 }
1866 }
1867
1868 for v in &data.subjects {
1870 out.metadata.subjects.push(OutSubject {
1871 subject: v.subject.clone(),
1872 ..Default::default()
1873 });
1874 }
1875
1876 let right_id = if !data.license.id.is_empty() {
1878 data.license.id.to_lowercase()
1879 } else if !data.license.url.is_empty() {
1880 crate::spdx::from_url(&data.license.url).id.to_lowercase()
1881 } else {
1882 String::new()
1883 };
1884 if !right_id.is_empty() {
1885 out.metadata.rights.push(OutRight { id: right_id });
1886 }
1887
1888 for v in &data.references {
1890 let (ref_id, ref_id_type) = validate_id(&v.id);
1891 let scheme = cm_to_invenio_identifier(ref_id_type).to_string();
1892 let unstructured = if v.unstructured.is_empty() {
1893 let mut u = if !v.reference.is_empty() {
1895 v.reference.clone()
1896 } else {
1897 "Unknown title".to_string()
1898 };
1899 if !v.publication_year.is_empty() {
1900 u.push_str(&format!(" ({}).", v.publication_year));
1901 }
1902 u
1903 } else {
1904 let mut u = v.unstructured.clone();
1905 if !v.id.is_empty() {
1907 u = u.replace(&v.id, "");
1908 }
1909 u.trim_end().to_string()
1910 };
1911 out.metadata.references.push(OutReference {
1912 reference: unstructured,
1913 scheme,
1914 identifier: ref_id,
1915 });
1916 }
1917
1918 for v in &data.relations {
1920 if v.type_ == "IsPartOf" {
1921 continue;
1922 }
1923 let (rel_id, id_type) = validate_id(&v.id);
1924 let scheme = cm_to_invenio_identifier(id_type);
1925 let relation_type = cm_to_invenio_relation(&v.type_);
1926 if !rel_id.is_empty() && !scheme.is_empty() && !relation_type.is_empty() {
1927 out.metadata.related_identifiers.push(OutRelatedIdentifier {
1928 identifier: rel_id,
1929 scheme: scheme.to_string(),
1930 relation_type: OutTypeId {
1931 id: relation_type.to_string(),
1932 },
1933 });
1934 }
1935 }
1936
1937 out.metadata.version = data.version.clone();
1939
1940 out
1941}
1942
1943fn parse_date(d: &str) -> String {
1944 if d.len() >= 10 {
1946 d[..10].to_string()
1947 } else {
1948 d.to_string()
1949 }
1950}
1951
1952fn container_pages(c: &crate::data::Container) -> String {
1953 if !c.first_page.is_empty() && !c.last_page.is_empty() {
1954 format!("{}-{}", c.first_page, c.last_page)
1955 } else {
1956 c.first_page.clone()
1957 }
1958}
1959
1960fn normalize_id_for_doi(id: &str) -> String {
1961 id.trim_start_matches("https://doi.org/")
1963 .trim_start_matches("http://doi.org/")
1964 .to_lowercase()
1965}
1966
1967fn doi_from_identifiers(data: &Data) -> Option<String> {
1968 data.identifiers
1969 .iter()
1970 .find(|id| id.identifier_type == "DOI" && !id.identifier.is_empty())
1971 .and_then(|id| crate::doi_utils::validate_doi(&id.identifier))
1972}
1973
1974pub fn read_json(input: &str) -> Result<Data> {
1977 let content: Content = serde_json::from_str(input).map_err(|e| Error::Parse(e.to_string()))?;
1978 Ok(from_content(content))
1979}
1980
1981pub fn write(data: &Data) -> Result<Vec<u8>> {
1982 let payload = convert(data);
1983 serde_json::to_vec(&payload).map_err(|e| Error::Parse(e.to_string()))
1984}
1985
1986pub fn write_all(list: &[Data]) -> Result<Vec<u8>> {
1987 let payloads: Vec<OutInveniordm> = list.iter().map(convert).collect();
1988 serde_json::to_vec_pretty(&payloads).map_err(|e| Error::Parse(e.to_string()))
1989}
1990
1991#[cfg(test)]
1992mod tests {
1993 use super::*;
1994
1995 #[test]
1996 fn test_read_json_maps_journal_container_details() {
1997 let json = r#"{
1998 "doi": "10.5555/example",
1999 "parent": {},
2000 "pids": {},
2001 "metadata": {
2002 "resource_type": {"id": "publication-article"},
2003 "title": "Example"
2004 },
2005 "custom_fields": {
2006 "journal:journal": {
2007 "title": "Journal of Examples",
2008 "issn": "1234-5678",
2009 "volume": "12",
2010 "issue": "3",
2011 "pages": "100-110"
2012 }
2013 }
2014 }"#;
2015
2016 let data = read_json(json).unwrap();
2017 assert_eq!(data.container.title, "Journal of Examples");
2018 assert_eq!(data.container.identifier, "1234-5678");
2019 assert_eq!(data.container.identifier_type, "ISSN");
2020 assert_eq!(data.container.volume, "12");
2021 assert_eq!(data.container.issue, "3");
2022 assert_eq!(data.container.first_page, "100");
2023 assert_eq!(data.container.last_page, "110");
2024 }
2025
2026 #[test]
2027 fn test_write_prefers_doi_identifier_over_id() {
2028 let data = Data {
2029 id: "https://example.org/not-a-doi".to_string(),
2030 identifiers: vec![Identifier {
2031 identifier: "https://doi.org/10.5555/identifier-doi".to_string(),
2032 identifier_type: "DOI".to_string(),
2033 }],
2034 title: "Example".to_string(),
2035 ..Data::default()
2036 };
2037
2038 let out = write(&data).unwrap();
2039 let json: serde_json::Value = serde_json::from_slice(&out).unwrap();
2040 assert_eq!(json["pids"]["doi"]["identifier"], "10.5555/identifier-doi");
2041 }
2042}
2043
2044pub fn fetch(url: &str) -> Result<Data> {
2046 let parsed = url::Url::parse(url).map_err(|e| Error::Parse(e.to_string()))?;
2047 let host = parsed
2048 .host_str()
2049 .ok_or_else(|| Error::Parse("missing host in URL".to_string()))?;
2050 let record_id = parsed
2051 .path_segments()
2052 .and_then(|mut segs| segs.find(|s| !s.is_empty() && *s != "records" && *s != "api"))
2053 .ok_or_else(|| Error::Parse("cannot extract record ID from URL".to_string()))?
2054 .to_string();
2055
2056 let api_url = format!("https://{}/api/records/{}", host, record_id);
2057 let client = build_client()?;
2058 let json = client
2059 .get(&api_url)
2060 .send()
2061 .map_err(|e| Error::Http(e.to_string()))?
2062 .error_for_status()
2063 .map_err(|e| Error::Http(e.to_string()))?
2064 .text()
2065 .map_err(|e| Error::Http(e.to_string()))?;
2066 read_json(&json)
2067}
2068
2069#[derive(Debug, Default, Clone, Serialize)]
2080pub struct PushResult {
2081 pub id: String,
2083 #[serde(skip_serializing_if = "String::is_empty")]
2084 pub doi: String,
2085 #[serde(skip_serializing_if = "String::is_empty")]
2087 pub record_id: String,
2088 pub status: String,
2090 #[serde(skip_serializing_if = "String::is_empty")]
2091 pub created: String,
2092 #[serde(skip_serializing_if = "String::is_empty")]
2093 pub updated: String,
2094 #[serde(skip_serializing_if = "Option::is_none")]
2095 pub message: Option<String>,
2096}
2097
2098fn build_client() -> Result<reqwest::blocking::Client> {
2099 reqwest::blocking::Client::builder()
2100 .user_agent(format!(
2101 "commonmeta-rs/{} (https://github.com/front-matter/commonmeta-rs; mailto:info@front-matter.de)",
2102 env!("CARGO_PKG_VERSION")
2103 ))
2104 .build()
2105 .map_err(|e| Error::Http(e.to_string()))
2106}
2107
2108fn search_by_doi(
2110 doi: &str,
2111 host: &str,
2112 client: &reqwest::blocking::Client,
2113) -> Result<Option<String>> {
2114 let escaped = crate::doi_utils::escape_doi(doi);
2115 let url = format!("https://{}/api/records?q=doi:{}", host, escaped);
2116 let body: Value = client
2117 .get(&url)
2118 .header("Content-Type", "application/json")
2119 .send()
2120 .map_err(|e| Error::Http(e.to_string()))?
2121 .json()
2122 .map_err(|e| Error::Http(e.to_string()))?;
2123
2124 let total = body
2125 .get("hits")
2126 .and_then(|h| h.get("total"))
2127 .and_then(Value::as_i64)
2128 .unwrap_or(0);
2129 if total == 0 {
2130 return Ok(None);
2131 }
2132 Ok(body
2133 .get("hits")
2134 .and_then(|h| h.get("hits"))
2135 .and_then(|hits| hits.get(0))
2136 .and_then(|first| first.get("id"))
2137 .and_then(Value::as_str)
2138 .map(|s| s.to_string()))
2139}
2140
2141fn create_draft_record(
2142 body: &[u8],
2143 host: &str,
2144 token: &str,
2145 client: &reqwest::blocking::Client,
2146) -> Result<(String, String, String)> {
2147 let url = format!("https://{}/api/records", host);
2148 let resp = client
2149 .post(&url)
2150 .header("Content-Type", "application/json")
2151 .header("Authorization", format!("Bearer {}", token))
2152 .body(body.to_vec())
2153 .send()
2154 .map_err(|e| Error::Http(e.to_string()))?;
2155
2156 let status = resp.status().as_u16();
2157 let text = resp.text().map_err(|e| Error::Http(e.to_string()))?;
2158 if status == 429 {
2159 return Err(Error::Http("rate limited".to_string()));
2160 }
2161 if status != 201 {
2162 return Err(Error::Http(format!(
2163 "failed to create draft record: {}",
2164 text
2165 )));
2166 }
2167 let v: Value = serde_json::from_str(&text).map_err(|e| Error::Parse(e.to_string()))?;
2168 Ok((
2169 v.get("id")
2170 .and_then(Value::as_str)
2171 .unwrap_or_default()
2172 .to_string(),
2173 v.get("created")
2174 .and_then(Value::as_str)
2175 .unwrap_or_default()
2176 .to_string(),
2177 v.get("updated")
2178 .and_then(Value::as_str)
2179 .unwrap_or_default()
2180 .to_string(),
2181 ))
2182}
2183
2184fn edit_published_record(
2185 record_id: &str,
2186 host: &str,
2187 token: &str,
2188 client: &reqwest::blocking::Client,
2189) -> Result<()> {
2190 let url = format!("https://{}/api/records/{}/draft", host, record_id);
2191 client
2192 .post(&url)
2193 .header("Content-Type", "application/json")
2194 .header("Authorization", format!("Bearer {}", token))
2195 .send()
2196 .map_err(|e| Error::Http(e.to_string()))?;
2197 Ok(())
2198}
2199
2200fn update_draft_record(
2201 record_id: &str,
2202 body: &[u8],
2203 host: &str,
2204 token: &str,
2205 client: &reqwest::blocking::Client,
2206) -> Result<()> {
2207 let url = format!("https://{}/api/records/{}/draft", host, record_id);
2208 client
2209 .put(&url)
2210 .header("Content-Type", "application/json")
2211 .header("Authorization", format!("Bearer {}", token))
2212 .body(body.to_vec())
2213 .send()
2214 .map_err(|e| Error::Http(e.to_string()))?;
2215 Ok(())
2216}
2217
2218fn publish_draft_record(
2219 record_id: &str,
2220 host: &str,
2221 token: &str,
2222 client: &reqwest::blocking::Client,
2223) -> Result<(String, String)> {
2224 let url = format!(
2225 "https://{}/api/records/{}/draft/actions/publish",
2226 host, record_id
2227 );
2228 let resp = client
2229 .post(&url)
2230 .header("Content-Type", "application/json")
2231 .header("Authorization", format!("Bearer {}", token))
2232 .send()
2233 .map_err(|e| Error::Http(e.to_string()))?;
2234
2235 let status = resp.status().as_u16();
2236 let text = resp.text().map_err(|e| Error::Http(e.to_string()))?;
2237 if status != 202 {
2238 return Err(Error::Http(format!(
2239 "failed to publish draft record: {}",
2240 text
2241 )));
2242 }
2243 let v: Value = serde_json::from_str(&text).map_err(|e| Error::Parse(e.to_string()))?;
2244 Ok((
2245 v.get("created")
2246 .and_then(Value::as_str)
2247 .unwrap_or_default()
2248 .to_string(),
2249 v.get("updated")
2250 .and_then(Value::as_str)
2251 .unwrap_or_default()
2252 .to_string(),
2253 ))
2254}
2255
2256pub fn upsert(data: &Data, host: &str, token: &str) -> PushResult {
2263 let mut result = PushResult {
2264 id: data.id.clone(),
2265 ..Default::default()
2266 };
2267
2268 let doi = match crate::doi_utils::validate_doi(&data.id) {
2269 Some(d) => d,
2270 None => {
2271 result.status = "failed_missing_doi".to_string();
2272 return result;
2273 }
2274 };
2275 result.doi = doi.clone();
2276
2277 let client = match build_client() {
2278 Ok(c) => c,
2279 Err(e) => {
2280 result.status = "failed".to_string();
2281 result.message = Some(e.to_string());
2282 return result;
2283 }
2284 };
2285
2286 let body = match write(data) {
2287 Ok(b) => b,
2288 Err(e) => {
2289 result.status = "failed".to_string();
2290 result.message = Some(e.to_string());
2291 return result;
2292 }
2293 };
2294
2295 let existing = match search_by_doi(&doi, host, &client) {
2296 Ok(id) => id,
2297 Err(e) => {
2298 result.status = "failed_search".to_string();
2299 result.message = Some(e.to_string());
2300 return result;
2301 }
2302 };
2303
2304 let record_id = match existing {
2305 None => match create_draft_record(&body, host, token, &client) {
2306 Ok((id, created, updated)) => {
2307 result.created = created;
2308 result.updated = updated;
2309 id
2310 }
2311 Err(e) => {
2312 result.status = "failed_create_draft".to_string();
2313 result.message = Some(e.to_string());
2314 return result;
2315 }
2316 },
2317 Some(id) => {
2318 if let Err(e) = edit_published_record(&id, host, token, &client) {
2319 result.status = "failed_edit_published".to_string();
2320 result.message = Some(e.to_string());
2321 return result;
2322 }
2323 if let Err(e) = update_draft_record(&id, &body, host, token, &client) {
2324 result.status = "failed_update_draft".to_string();
2325 result.message = Some(e.to_string());
2326 return result;
2327 }
2328 id
2329 }
2330 };
2331 result.record_id = record_id.clone();
2332
2333 match publish_draft_record(&record_id, host, token, &client) {
2334 Ok((created, updated)) => {
2335 if !created.is_empty() {
2336 result.created = created;
2337 }
2338 result.updated = updated;
2339 result.status = "published".to_string();
2340 }
2341 Err(e) => {
2342 result.status = "failed_publish".to_string();
2343 result.message = Some(e.to_string());
2344 }
2345 }
2346
2347 result
2348}
2349
2350pub fn upsert_all(list: &[Data], host: &str, token: &str) -> Vec<PushResult> {
2352 list.iter().map(|data| upsert(data, host, token)).collect()
2353}
2354
2355#[cfg(test)]
2356mod push_tests {
2357 use super::*;
2358
2359 #[test]
2360 fn test_upsert_rejects_missing_doi() {
2361 let data = Data {
2362 id: "https://example.com/not-a-doi".to_string(),
2363 ..Data::default()
2364 };
2365 let result = upsert(&data, "example.invenio.host", "fake-token");
2366 assert_eq!(result.status, "failed_missing_doi");
2367 assert!(result.record_id.is_empty());
2368 }
2369
2370 #[test]
2371 fn test_upsert_rejects_empty_id() {
2372 let data = Data::default();
2373 let result = upsert(&data, "example.invenio.host", "fake-token");
2374 assert_eq!(result.status, "failed_missing_doi");
2375 }
2376
2377 #[test]
2378 fn test_upsert_all_empty_list() {
2379 let results = upsert_all(&[], "example.invenio.host", "fake-token");
2380 assert!(results.is_empty());
2381 }
2382
2383 #[test]
2384 fn test_push_result_serialization_omits_empty_fields() {
2385 let result = PushResult {
2386 id: "https://doi.org/10.1/a".to_string(),
2387 status: "failed_missing_doi".to_string(),
2388 ..Default::default()
2389 };
2390 let json = serde_json::to_string(&result).unwrap();
2391 assert!(json.contains("\"id\""));
2392 assert!(json.contains("\"status\""));
2393 assert!(!json.contains("\"doi\""));
2394 assert!(!json.contains("\"record_id\""));
2395 assert!(!json.contains("\"message\""));
2396 }
2397}