Skip to main content

commonmeta/formats/
inveniordm.rs

1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4use crate::author_utils::{
5    cleanup_author, infer_contributor_type, normalize_contributor_roles, parse_affiliation_value,
6    split_person_name,
7};
8use crate::constants as C;
9use crate::data::{
10    Citation, Container, Contributor, Data, Description, File, FundingReference, Identifier,
11    Organization, Person, Publisher, Reference, Relation, Subject,
12};
13use crate::doi_utils::normalize_doi;
14use crate::error::{Error, Result};
15use crate::utils::{
16    get_language, issn_as_url, normalize_id, normalize_orcid, normalize_ror, normalize_url,
17    sanitize,
18};
19
20// ── API response structs ───────────────────────────────────────────────────────
21
22#[derive(Deserialize, Default)]
23struct Content {
24    // id can be string or int (Zenodo uses int)
25    id: Option<Value>,
26    // top-level doi field (Zenodo compatibility)
27    #[serde(default)]
28    doi: String,
29    // Zenodo conceptdoi for IsVersionOf relation
30    #[serde(default)]
31    conceptdoi: String,
32    #[serde(default)]
33    parent: Parent,
34    #[serde(default)]
35    pids: Pids,
36    links: Option<ContentLinks>,
37    // ISO 8601 updated timestamp
38    #[serde(default)]
39    updated: String,
40    metadata: MetadataJSON,
41    #[serde(rename = "custom_fields", default)]
42    custom_fields: CustomFields,
43    // top-level files list (new InvenioRDM format)
44    #[serde(default)]
45    files: Option<Value>,
46}
47
48#[derive(Deserialize, Default)]
49struct ContentLinks {
50    #[serde(rename = "self_html", default)]
51    self_html: String,
52}
53
54#[derive(Deserialize, Default)]
55struct Parent {
56    #[serde(default)]
57    #[allow(dead_code)]
58    id: String,
59    #[serde(default)]
60    communities: Communities,
61}
62
63#[derive(Deserialize, Default)]
64struct Pids {
65    #[serde(default)]
66    doi: Doi,
67}
68
69#[derive(Deserialize, Default)]
70struct Doi {
71    #[serde(default)]
72    identifier: String,
73}
74
75#[derive(Deserialize, Default)]
76struct MetadataJSON {
77    #[serde(rename = "resource_type", default)]
78    resource_type: ResourceType,
79    #[serde(default)]
80    creators: Vec<Creator>,
81    #[serde(default)]
82    contributors: Vec<Creator>,
83    #[serde(default)]
84    funding: Vec<Funding>,
85    #[serde(default)]
86    grants: Vec<Grant>,
87    // dates type field can be struct or plain string — use raw Value
88    #[serde(default)]
89    dates: Vec<DateJSON>,
90    #[serde(default)]
91    description: String,
92    #[serde(default)]
93    notes: String,
94    #[serde(default)]
95    identifiers: Vec<InvenioIdentifier>,
96    #[serde(default)]
97    keywords: Vec<Value>,
98    #[serde(default)]
99    language: String,
100    #[serde(default)]
101    languages: Vec<Language>,
102    // old license field (Zenodo)
103    license: Option<OldLicense>,
104    #[serde(default)]
105    publisher: String,
106    #[serde(rename = "publication_date", default)]
107    publication_date: String,
108    #[serde(default)]
109    references: Vec<InvenioReference>,
110    #[serde(rename = "related_identifiers", default)]
111    related_identifiers: Vec<RelatedIdentifier>,
112    #[serde(default)]
113    rights: Vec<Right>,
114    #[serde(default)]
115    subjects: Vec<Subject_>,
116    #[serde(default)]
117    title: String,
118    #[serde(default)]
119    version: String,
120}
121
122#[derive(Deserialize, Default)]
123struct ResourceType {
124    #[serde(default)]
125    id: String,
126    #[serde(default)]
127    subtype: String,
128    #[serde(rename = "type", default)]
129    type_: String,
130}
131
132#[derive(Deserialize, Default)]
133struct Creator {
134    #[serde(rename = "person_or_org", default)]
135    person_or_org: PersonOrOrg,
136    #[serde(default)]
137    affiliations: Vec<InvenioAffiliation>,
138    // contributor role (metadata.contributors only)
139    role: Option<ContributorRole>,
140    // Zenodo legacy fields
141    #[serde(default)]
142    name: String,
143    #[serde(default)]
144    orcid: String,
145    #[serde(default)]
146    affiliation: String,
147}
148
149#[derive(Deserialize, Default)]
150struct ContributorRole {
151    #[serde(default)]
152    id: String,
153}
154
155#[derive(Deserialize, Default)]
156struct PersonOrOrg {
157    #[serde(rename = "type", default)]
158    type_: String,
159    #[serde(default)]
160    name: String,
161    #[serde(rename = "given_name", default)]
162    given_name: String,
163    #[serde(rename = "family_name", default)]
164    family_name: String,
165    #[serde(default)]
166    identifiers: Vec<InvenioIdentifier>,
167}
168
169#[derive(Deserialize, Default)]
170struct InvenioAffiliation {
171    #[serde(default)]
172    id: String,
173    #[serde(default)]
174    name: String,
175}
176
177#[derive(Deserialize, Default, Clone)]
178struct InvenioIdentifier {
179    #[serde(default)]
180    identifier: String,
181    #[serde(default)]
182    scheme: String,
183}
184
185#[derive(Deserialize, Default)]
186struct Funding {
187    #[serde(default)]
188    funder: Funder,
189    #[serde(default)]
190    award: Award,
191}
192
193#[derive(Deserialize, Default)]
194struct Funder {
195    #[serde(default)]
196    id: String,
197    #[serde(default)]
198    name: String,
199}
200
201#[derive(Deserialize, Default)]
202struct Award {
203    #[serde(default)]
204    #[allow(dead_code)]
205    id: String,
206    #[serde(default)]
207    number: String,
208    title: Option<AwardTitle>,
209    #[serde(default)]
210    identifiers: Vec<InvenioIdentifier>,
211}
212
213#[derive(Deserialize, Default)]
214struct AwardTitle {
215    #[serde(default)]
216    en: String,
217}
218
219#[derive(Deserialize, Default)]
220struct Grant {
221    #[serde(default)]
222    code: String,
223    #[serde(default)]
224    funder: LegacyFunder,
225    #[serde(default)]
226    title: String,
227    #[serde(default)]
228    url: String,
229}
230
231#[derive(Deserialize, Default)]
232struct LegacyFunder {
233    #[serde(default)]
234    doi: String,
235    #[serde(default)]
236    name: String,
237}
238
239#[derive(Deserialize, Default)]
240struct DateJSON {
241    #[serde(default)]
242    date: String,
243    // type can be {"id": "..."} or a plain string
244    #[serde(rename = "type")]
245    type_: Option<Value>,
246}
247
248#[derive(Deserialize, Default)]
249struct Language {
250    #[serde(default)]
251    id: String,
252}
253
254#[derive(Deserialize, Default)]
255struct OldLicense {
256    #[serde(default)]
257    id: String,
258}
259
260#[derive(Deserialize, Default)]
261struct InvenioReference {
262    #[serde(default)]
263    reference: String,
264    #[serde(default)]
265    scheme: String,
266    #[serde(default)]
267    identifier: String,
268}
269
270#[derive(Deserialize, Default)]
271struct RelatedIdentifier {
272    #[serde(default)]
273    identifier: String,
274    #[serde(default)]
275    scheme: String,
276    #[serde(rename = "relation_type", default)]
277    relation_type: RelationType,
278    // Zenodo legacy: plain string relation type
279    #[serde(default)]
280    relation: String,
281}
282
283#[derive(Deserialize, Default)]
284struct RelationType {
285    #[serde(default)]
286    id: String,
287}
288
289#[derive(Deserialize, Default)]
290struct Right {
291    #[serde(default)]
292    id: String,
293    #[serde(default)]
294    #[allow(dead_code)]
295    props: RightProps,
296}
297
298#[derive(Deserialize, Default)]
299struct RightProps {
300    #[allow(dead_code)]
301    #[serde(default)]
302    url: String,
303}
304
305#[derive(Deserialize, Default)]
306struct Subject_ {
307    #[serde(default)]
308    #[allow(dead_code)]
309    id: String,
310    #[serde(default)]
311    subject: String,
312    #[serde(default)]
313    scheme: String,
314}
315
316#[derive(Deserialize, Default)]
317struct Communities {
318    #[serde(default)]
319    default: String,
320    #[serde(default)]
321    entries: Vec<Community>,
322}
323
324#[derive(Deserialize, Default)]
325struct Community {
326    #[serde(default)]
327    #[allow(dead_code)]
328    id: String,
329    #[serde(default)]
330    slug: String,
331    #[serde(default)]
332    #[allow(dead_code)]
333    metadata: CommunityMetadata,
334}
335
336#[derive(Deserialize, Default)]
337struct CommunityMetadata {
338    #[serde(rename = "type", default)]
339    #[allow(dead_code)]
340    type_: CommunityType,
341}
342
343#[derive(Deserialize, Default)]
344struct CommunityType {
345    #[serde(default)]
346    #[allow(dead_code)]
347    id: String,
348}
349
350#[derive(Deserialize, Default)]
351struct CustomFields {
352    #[serde(rename = "journal:journal", default)]
353    journal: Journal,
354    #[serde(rename = "rs:content_html", default)]
355    content_html: String,
356    #[serde(rename = "rs:image", default)]
357    feature_image: String,
358    #[serde(rename = "rs:generator", default)]
359    generator: String,
360    #[serde(rename = "rs:citations", default)]
361    citations: Vec<InvenioReference>,
362}
363
364#[derive(Deserialize, Default)]
365struct Journal {
366    #[serde(default)]
367    title: String,
368    #[serde(default)]
369    issn: String,
370    #[serde(default)]
371    volume: String,
372    #[serde(default)]
373    issue: String,
374    #[serde(default)]
375    pages: String,
376}
377
378#[derive(Deserialize, Default)]
379struct ContentFile {
380    #[serde(default)]
381    bucket: String,
382    #[serde(default)]
383    key: String,
384    #[serde(default)]
385    checksum: String,
386    links: Option<FileLinks>,
387    #[serde(default)]
388    size: i64,
389    #[serde(rename = "type", default)]
390    type_: String,
391}
392
393#[derive(Deserialize, Default)]
394struct FileLinks {
395    #[serde(rename = "self", default)]
396    self_: String,
397}
398
399// ── Type mappings ─────────────────────────────────────────────────────────────
400
401fn invenio_to_cm_type(id: &str) -> &'static str {
402    C::inveniordm_to_cm(id)
403}
404
405fn is_valid_relation_type(t: &str) -> bool {
406    C::COMMONMETA_RELATION_TYPES.contains(&t)
407}
408
409/// InvenioRDM lowercase relation_type.id → Commonmeta CamelCase type.
410fn invenio_to_cm_relation(id: &str) -> &'static str {
411    match id {
412        "iscitedby" => "IsCitedBy",
413        "cites" => "Cites",
414        "issupplementto" => "IsSupplementTo",
415        "issupplementedby" => "IsSupplementedBy",
416        "iscontinuedby" => "IsContinuedBy",
417        "continues" => "Continues",
418        "isnewversionof" => "IsNewVersionOf",
419        "ispreviousversion" | "ispreviousversionof" => "IsPreviousVersionOf",
420        "ispartof" => "IsPartOf",
421        "haspart" => "HasPart",
422        "isreferencedby" => "IsReferencedBy",
423        "references" => "References",
424        "isdocumentedby" => "IsDocumentedBy",
425        "documents" => "Documents",
426        "iscompiledby" => "IsCompiledBy",
427        "compiles" => "Compiles",
428        "isvariantformof" => "IsVariantFormOf",
429        "isoriginalformof" => "IsOriginalFormOf",
430        "isidenticalto" => "IsIdenticalTo",
431        "istranslationof" => "IsTranslationOf",
432        "isreviewedby" => "HasReview",
433        "reviews" => "IsReviewOf",
434        "ispreprintof" => "IsPreprintOf",
435        "haspreprint" => "HasPreprint",
436        "isderivedfrom" => "IsDerivedFrom",
437        "issourceof" => "IsSourceOf",
438        "describes" => "Describes",
439        "isdescribedby" => "IsDescribedBy",
440        "ismetadatafor" => "IsMetadataFor",
441        "hasmetadata" => "HasMetadata",
442        "isannotatedby" => "IsAnnotatedBy",
443        "annotates" => "Annotates",
444        "iscorrectedby" => "IsCorrectedBy",
445        "corrects" => "Corrects",
446        _ => "",
447    }
448}
449
450fn is_reference_relation(id: &str) -> bool {
451    matches!(id, "cites" | "references")
452}
453
454/// Rogue Scholar DOI prefixes (Crossref-registered).
455fn is_rogue_scholar_doi(doi: &str) -> bool {
456    const PREFIXES: &[&str] = &[
457        "10.13003", "10.53731", "10.54900", "10.59347", "10.59348", "10.59349", "10.59350",
458        "10.63485", "10.63517", "10.64000", "10.64395", "10.65527",
459    ];
460    PREFIXES.iter().any(|p| doi.contains(p))
461}
462
463// ── Contributor conversion ────────────────────────────────────────────────────
464
465fn get_contributor(v: &Creator, default_role: &str) -> Contributor {
466    // Detect Zenodo legacy format: name directly on creator, no person_or_org
467    if !v.name.is_empty()
468        && v.person_or_org.name.is_empty()
469        && v.person_or_org.family_name.is_empty()
470    {
471        return get_zenodo_contributor(v, default_role);
472    }
473
474    let raw_type = match v.person_or_org.type_.as_str() {
475        "personal" => "Person",
476        "organizational" => "Organization",
477        _ => "",
478    }
479    .to_string();
480
481    let mut id = String::new();
482    for ni in &v.person_or_org.identifiers {
483        match ni.scheme.as_str() {
484            "orcid" => {
485                id = normalize_orcid(&ni.identifier);
486                break;
487            }
488            "ror" | "ROR" => {
489                id = normalize_ror(&ni.identifier);
490                break;
491            }
492            _ => {}
493        }
494    }
495
496    let name = cleanup_author(Some(&v.person_or_org.name)).unwrap_or(v.person_or_org.name.clone());
497    let mut given_name = v.person_or_org.given_name.clone();
498    let mut family_name = v.person_or_org.family_name.clone();
499
500    let mut type_ = infer_contributor_type(
501        &raw_type,
502        &id,
503        &given_name,
504        &family_name,
505        &name,
506        None,
507    );
508
509    if type_.is_empty() {
510        type_ = "Organization".to_string();
511    }
512
513    // Split "Family, Given" for Person type when only name is provided
514    let mut name_out = name.clone();
515    if type_ == "Person" && !name_out.is_empty() && given_name.is_empty() && family_name.is_empty() {
516        let (given, family, remainder) = split_person_name(&name_out);
517        if !given.is_empty() || !family.is_empty() {
518            given_name = given;
519            family_name = family;
520            name_out = String::new();
521        } else {
522            name_out = remainder;
523        }
524    }
525
526    let affiliations = v
527        .affiliations
528        .iter()
529        .filter_map(|a| {
530            let value = serde_json::json!({"id": a.id, "name": a.name});
531            parse_affiliation_value(&value)
532        })
533        .collect();
534
535    let roles = normalize_contributor_roles(&[default_role.to_string()], default_role);
536
537    if type_ == "Person" {
538        Contributor::person(
539            Person { id, given_name, family_name, affiliations, asserted_by: String::new() },
540            roles,
541        )
542    } else {
543        Contributor::organization(
544            Organization { id, name: name_out, asserted_by: String::new() },
545            roles,
546        )
547    }
548}
549
550fn get_zenodo_contributor(v: &Creator, default_role: &str) -> Contributor {
551    let mut id = String::new();
552
553    if !v.orcid.is_empty() {
554        id = normalize_orcid(&v.orcid);
555    }
556
557    let cleaned_name = cleanup_author(Some(&v.name)).unwrap_or(v.name.clone());
558    let (given_name, family_name, name) = split_person_name(&cleaned_name);
559
560    let mut type_ = infer_contributor_type("", &id, &given_name, &family_name, &cleaned_name, None);
561    if type_.is_empty() {
562        type_ = "Organization".to_string();
563    }
564
565    let mut family_name_out = family_name;
566    let mut name_out = name;
567    if type_ == "Person" && family_name_out.is_empty() && !name_out.is_empty() {
568        family_name_out = name_out.clone();
569        name_out = String::new();
570    }
571
572    let affiliations = if !v.affiliation.is_empty() {
573        parse_affiliation_value(&Value::String(v.affiliation.clone()))
574            .into_iter()
575            .collect()
576    } else {
577        vec![]
578    };
579
580    let roles = normalize_contributor_roles(&[default_role.to_string()], default_role);
581
582    if type_ == "Person" {
583        Contributor::person(
584            Person { id, given_name, family_name: family_name_out, affiliations, asserted_by: String::new() },
585            roles,
586        )
587    } else {
588        Contributor::organization(
589            Organization { id, name: name_out, asserted_by: String::new() },
590            roles,
591        )
592    }
593}
594
595// ── Reference helpers ─────────────────────────────────────────────────────────
596
597fn normalize_reference_id(scheme: &str, identifier: &str) -> String {
598    if identifier.is_empty() {
599        return String::new();
600    }
601    match scheme {
602        "doi" => normalize_doi(identifier),
603        "url" => normalize_url(identifier, true, false).unwrap_or_default(),
604        _ => normalize_id(identifier),
605    }
606}
607
608// ── Relation helpers ──────────────────────────────────────────────────────────
609
610fn normalize_relation_id(scheme: &str, identifier: &str) -> String {
611    if identifier.is_empty() {
612        return String::new();
613    }
614    match scheme {
615        "doi" => normalize_doi(identifier),
616        _ => normalize_url(identifier, true, false).unwrap_or_else(|| normalize_id(identifier)),
617    }
618}
619
620fn parse_pages_range(pages: &str) -> (String, String) {
621    let trimmed = pages.trim();
622    if trimmed.is_empty() {
623        return (String::new(), String::new());
624    }
625
626    for sep in ["--", "-", "–", "—"] {
627        if let Some(idx) = trimmed.find(sep) {
628            let first = trimmed[..idx].trim().to_string();
629            let last = trimmed[idx + sep.len()..].trim().to_string();
630            return (first, last);
631        }
632    }
633
634    (trimmed.to_string(), String::new())
635}
636
637/// Map InvenioRDM relation_type.id to Commonmeta type.
638/// Falls back to capitalizing the first letter (Python behaviour).
639fn map_relation_type(raw: &str) -> String {
640    let mapped = invenio_to_cm_relation(raw);
641    if !mapped.is_empty() {
642        return mapped.to_string();
643    }
644    // Fallback: capitalize first letter (handles already-CamelCase values)
645    if raw.is_empty() {
646        return String::new();
647    }
648    let mut chars = raw.chars();
649    match chars.next() {
650        None => String::new(),
651        Some(c) => c.to_uppercase().to_string() + chars.as_str(),
652    }
653}
654
655// ── Core conversion ───────────────────────────────────────────────────────────
656
657fn from_content(content: Content) -> Data {
658    let mut data = Data {
659        // ID
660        id: if !content.doi.is_empty() {
661            normalize_doi(&content.doi)
662        } else {
663            normalize_doi(&content.pids.doi.identifier)
664        },
665        ..Data::default()
666    };
667
668    // Type: Python prefers resource_type.type then resource_type.id
669    let rt = &content.metadata.resource_type;
670    let type_id = if !rt.type_.is_empty() {
671        &rt.type_
672    } else if !rt.id.is_empty() {
673        &rt.id
674    } else {
675        &rt.subtype
676    };
677    let cm_type = invenio_to_cm_type(type_id);
678    data.type_ = if cm_type.is_empty() {
679        "Other".to_string()
680    } else {
681        cm_type.to_string()
682    };
683
684    // Detect host from links.self_html for Zenodo-specific handling
685    let self_html = content
686        .links
687        .as_ref()
688        .map(|l| l.self_html.as_str())
689        .unwrap_or("");
690    let host = url::Url::parse(self_html)
691        .ok()
692        .and_then(|u| u.host_str().map(|s| s.to_string()))
693        .unwrap_or_default();
694    let is_zenodo = host == "zenodo.org";
695    let is_rogue_scholar = is_rogue_scholar_doi(&data.id);
696
697    // URL
698    // For Rogue Scholar: URL from metadata.identifiers with scheme "url"
699    // Otherwise: links.self_html
700    if is_rogue_scholar {
701        if let Some(url_id) = content
702            .metadata
703            .identifiers
704            .iter()
705            .find(|i| i.scheme == "url")
706        {
707            data.url = normalize_url(&url_id.identifier, true, false).unwrap_or_default();
708        }
709    } else if !self_html.is_empty() {
710        data.url = normalize_url(self_html, true, false).unwrap_or_default();
711    }
712
713    // Container
714    if is_zenodo {
715        let container_type = if data.type_ == "Dataset" {
716            "DataRepository"
717        } else {
718            "Repository"
719        };
720        data.container = Container {
721            identifier: "https://www.re3data.org/repository/r3d100010468".to_string(),
722            identifier_type: "URL".to_string(),
723            type_: container_type.to_string(),
724            title: "Zenodo".to_string(),
725            ..Default::default()
726        };
727        data.publisher = Publisher {
728            name: "Zenodo".to_string(),
729            ..Default::default()
730        };
731    } else if is_rogue_scholar {
732        let slug = content
733            .parent
734            .communities
735            .entries
736            .first()
737            .map(|e| e.slug.as_str())
738            .unwrap_or("");
739        let issn = &content.custom_fields.journal.issn;
740        let (identifier, identifier_type) = if !issn.is_empty() {
741            (issn.clone(), "ISSN".to_string())
742        } else if !slug.is_empty() {
743            (
744                format!("https://rogue-scholar.org/communities/{}", slug),
745                "URL".to_string(),
746            )
747        } else {
748            (String::new(), String::new())
749        };
750        let (first_page, last_page) = parse_pages_range(&content.custom_fields.journal.pages);
751        data.container = Container {
752            type_: "Blog".to_string(),
753            title: content.custom_fields.journal.title.clone(),
754            identifier,
755            identifier_type,
756            platform: content.custom_fields.generator.clone(),
757            volume: content.custom_fields.journal.volume.clone(),
758            issue: content.custom_fields.journal.issue.clone(),
759            first_page,
760            last_page,
761            ..Default::default()
762        };
763        data.publisher = Publisher {
764            name: "Front Matter".to_string(),
765            ..Default::default()
766        };
767    } else if !content.custom_fields.journal.title.is_empty()
768        || !content.custom_fields.journal.issn.is_empty()
769    {
770        let issn = &content.custom_fields.journal.issn;
771        let (identifier, identifier_type) = if !issn.is_empty() {
772            (issn.clone(), "ISSN".to_string())
773        } else {
774            (String::new(), String::new())
775        };
776        let (first_page, last_page) = parse_pages_range(&content.custom_fields.journal.pages);
777        data.container = Container {
778            type_: "Periodical".to_string(),
779            title: content.custom_fields.journal.title.clone(),
780            identifier,
781            identifier_type,
782            platform: content.custom_fields.generator.clone(),
783            volume: content.custom_fields.journal.volume.clone(),
784            issue: content.custom_fields.journal.issue.clone(),
785            first_page,
786            last_page,
787            ..Default::default()
788        };
789    }
790
791    // Publisher (from metadata, if not already set by Zenodo/Rogue Scholar logic)
792    if data.publisher.name.is_empty() && !content.metadata.publisher.is_empty() {
793        data.publisher = Publisher {
794            name: content.metadata.publisher.clone(),
795            ..Default::default()
796        };
797    }
798
799    // BlogPost override: Article → BlogPost when publisher is "Front Matter"
800    if data.type_ == "Article" && data.publisher.name == "Front Matter" {
801        data.type_ = "BlogPost".to_string();
802    }
803
804    // Contributors from metadata.creators (all get "Author" role)
805    for v in &content.metadata.creators {
806        let contributor = get_contributor(v, "Author");
807        let already = data
808            .contributors
809            .iter()
810            .any(|e| !e.id().is_empty() && e.id() == contributor.id());
811        if !already {
812            data.contributors.push(contributor);
813        }
814    }
815    // Contributors from metadata.contributors (with role from role.id)
816    for v in &content.metadata.contributors {
817        let role = v
818            .role
819            .as_ref()
820            .map(|r| {
821                let mut s = r.id.clone();
822                if let Some(first) = s.get_mut(..1) {
823                    first.make_ascii_uppercase();
824                }
825                s
826            })
827            .filter(|s| !s.is_empty())
828            .unwrap_or_else(|| "Other".to_string());
829        let contributor = get_contributor(v, &role);
830        let already = data
831            .contributors
832            .iter()
833            .any(|e| !e.id().is_empty() && e.id() == contributor.id());
834        if !already {
835            data.contributors.push(contributor);
836        }
837    }
838
839    // Dates: Python handles "issued" → published and "updated" → updated only
840    for d in &content.metadata.dates {
841        let t = date_type_str(&d.type_);
842        match t.as_str() {
843            "issued" => data.date_published = d.date.clone(),
844            "updated" => data.date_updated = d.date.clone(),
845            _ => {}
846        }
847    }
848    if data.date_published.is_empty() && !content.metadata.publication_date.is_empty() {
849        data.date_published = content.metadata.publication_date.clone();
850    }
851    // Fallback for updated: top-level meta.updated (strip milliseconds)
852    if data.date_updated.is_empty() && !content.updated.is_empty() {
853        data.date_updated = strip_milliseconds(&content.updated);
854    }
855
856    // Descriptions: description (Abstract) + notes (Other)
857    if !content.metadata.description.is_empty() {
858        data.description = sanitize(&content.metadata.description);
859    }
860    if !content.metadata.notes.is_empty() {
861        data.additional_descriptions.push(Description {
862            description: sanitize(&content.metadata.notes),
863            type_: "Other".to_string(),
864            ..Default::default()
865        });
866    }
867
868    // Feature image
869    if !content.custom_fields.feature_image.is_empty() {
870        data.image = content.custom_fields.feature_image.clone();
871    }
872
873    // Files from top-level `files` list
874    if let Some(files_val) = &content.files
875        && let Ok(files_enabled) = serde_json::from_value::<FilesEnabled>(files_val.clone())
876        && files_enabled.enabled
877        && let Ok(entries) = serde_json::from_value::<FilesWithEntries>(files_val.clone())
878    {
879        for f in entries.entries.values() {
880            if let Ok(cf) = serde_json::from_value::<ContentFile>(f.clone()) {
881                let url = cf
882                    .links
883                    .as_ref()
884                    .map(|l| l.self_.clone())
885                    .unwrap_or_default();
886                if !url.is_empty() {
887                    let mime_type = if !cf.type_.is_empty() {
888                        format!("application/{}", cf.type_)
889                    } else {
890                        String::new()
891                    };
892                    data.files.push(File {
893                        bucket: cf.bucket,
894                        key: cf.key,
895                        checksum: cf.checksum,
896                        url,
897                        size: cf.size,
898                        mime_type,
899                    });
900                }
901            }
902        }
903    }
904
905    // Funding references
906    if !content.metadata.funding.is_empty() {
907        for v in &content.metadata.funding {
908            let funder_id = normalize_ror(&v.funder.id);
909            let award_number = v.award.number.clone();
910            let award_title = v
911                .award
912                .title
913                .as_ref()
914                .map(|t| t.en.clone())
915                .unwrap_or_default();
916            // award URI: from award.identifiers[0], normalize DOI if possible
917            let raw_award_uri = v
918                .award
919                .identifiers
920                .first()
921                .map(|i| i.identifier.as_str())
922                .unwrap_or("");
923            let award_uri = if !raw_award_uri.is_empty() {
924                let doi = normalize_doi(raw_award_uri);
925                if !doi.is_empty() {
926                    doi
927                } else {
928                    normalize_url(raw_award_uri, true, false).unwrap_or_default()
929                }
930            } else {
931                String::new()
932            };
933            data.funding_references.push(FundingReference {
934                funder_id,
935                funder_name: v.funder.name.clone(),
936                award_number,
937                award_title,
938                award_id: award_uri,
939                ..Default::default()
940            });
941        }
942    } else if !content.metadata.grants.is_empty() {
943        for v in &content.metadata.grants {
944            let funder_id = normalize_doi(&v.funder.doi);
945            let award_uri = normalize_url(&v.url, true, false).unwrap_or_default();
946            data.funding_references.push(FundingReference {
947                funder_id,
948                funder_name: v.funder.name.clone(),
949                award_number: v.code.clone(),
950                award_title: v.title.clone(),
951                award_id: award_uri,
952                ..Default::default()
953            });
954        }
955    }
956
957    // Identifiers: DOI first, then only doi/uuid/guid schemes from metadata
958    if !data.id.is_empty() {
959        data.identifiers.push(Identifier {
960            identifier: data.id.clone(),
961            identifier_type: "DOI".to_string(),
962        });
963    }
964    for v in &content.metadata.identifiers {
965        if v.scheme == "url" {
966            // URL goes into data.url, not identifiers
967            continue;
968        }
969        let identifier_type = match v.scheme.as_str() {
970            "doi" => "DOI",
971            "uuid" => "UUID",
972            "guid" => "GUID",
973            _ => continue,
974        };
975        data.identifiers.push(Identifier {
976            identifier: v.identifier.clone(),
977            identifier_type: identifier_type.to_string(),
978        });
979    }
980    // RID from record id field
981    if let Some(id_val) = &content.id
982        && let Some(s) = id_val.as_str()
983        && !s.is_empty()
984    {
985        data.identifiers.push(Identifier {
986            identifier: s.to_string(),
987            identifier_type: "RID".to_string(),
988        });
989    }
990
991    // Language: metadata.language first, then metadata.languages[0].id
992    if !content.metadata.language.is_empty() {
993        data.language = get_language(&content.metadata.language, "iso639-1");
994    } else if !content.metadata.languages.is_empty() {
995        data.language = get_language(&content.metadata.languages[0].id, "iso639-1");
996    }
997
998    // License from rights list (new) or legacy license field
999    if !content.metadata.rights.is_empty() {
1000        data.license = crate::spdx::from_id(&content.metadata.rights[0].id);
1001    } else if let Some(lic) = &content.metadata.license
1002        && !lic.id.is_empty()
1003    {
1004        data.license = crate::spdx::from_id(&lic.id);
1005    }
1006
1007    // Provider
1008    data.provider = if is_rogue_scholar {
1009        "Crossref".to_string()
1010    } else {
1011        "DataCite".to_string()
1012    };
1013
1014    // Subjects: metadata.subjects + metadata.keywords merged
1015    for v in &content.metadata.subjects {
1016        if v.id.contains("openalex.org") {
1017            let id_part = v.id.rsplit('/').next().unwrap_or("");
1018            if let Some((id, subject)) = crate::vocabularies::lookup_openalex_subject(id_part) {
1019                let subj = Subject { id, subject, ..Default::default() };
1020                if !data.subjects.contains(&subj) {
1021                    data.subjects.push(subj);
1022                }
1023            }
1024        } else {
1025            let s = subject_string(v);
1026            if s.is_empty() {
1027                continue;
1028            }
1029            let subj = Subject { subject: s, ..Default::default() };
1030            if !data.subjects.contains(&subj) {
1031                data.subjects.push(subj);
1032            }
1033        }
1034    }
1035    for kw in &content.metadata.keywords {
1036        let s = match kw {
1037            Value::String(s) => s.clone(),
1038            _ => continue,
1039        };
1040        if s.is_empty() {
1041            continue;
1042        }
1043        let subj = Subject { subject: s, ..Default::default() };
1044        if !data.subjects.contains(&subj) {
1045            data.subjects.push(subj);
1046        }
1047    }
1048
1049    // References from metadata.references; fall back to related_identifiers if empty
1050    if !content.metadata.references.is_empty() {
1051        for v in &content.metadata.references {
1052            let id = normalize_reference_id(&v.scheme, &v.identifier);
1053            data.references.push(Reference {
1054                id,
1055                unstructured: v.reference.clone(),
1056                ..Default::default()
1057            });
1058        }
1059    } else {
1060        for v in &content.metadata.related_identifiers {
1061            let relation_id = relation_type_id(v);
1062            if is_reference_relation(&relation_id) {
1063                let id = normalize_relation_id(&v.scheme, &v.identifier);
1064                if !id.is_empty() {
1065                    data.references.push(Reference {
1066                        id,
1067                        ..Default::default()
1068                    });
1069                }
1070            }
1071        }
1072    }
1073
1074    // Citations (works that cite this resource) from custom_fields.rs:citations
1075    for v in &content.custom_fields.citations {
1076        let id = normalize_reference_id(&v.scheme, &v.identifier);
1077        data.citations.push(Citation {
1078            id,
1079            citation: v.reference.clone(),
1080            ..Default::default()
1081        });
1082    }
1083
1084    // Relations from related_identifiers (excluding references)
1085    for v in &content.metadata.related_identifiers {
1086        let relation_id = relation_type_id(v);
1087        if is_reference_relation(&relation_id) {
1088            continue;
1089        }
1090        let id = normalize_relation_id(&v.scheme, &v.identifier);
1091        if id.is_empty() {
1092            continue;
1093        }
1094        let type_ = map_relation_type(&relation_id);
1095        if !type_.is_empty() && is_valid_relation_type(&type_) {
1096            let rel = Relation { id, type_, ..Default::default() };
1097            if !data.relations.contains(&rel) {
1098                data.relations.push(rel);
1099            }
1100        }
1101    }
1102
1103    // IsVersionOf relations
1104    if !content.conceptdoi.is_empty() {
1105        let id = normalize_doi(&content.conceptdoi);
1106        if !id.is_empty() {
1107            data.relations.push(Relation {
1108                id,
1109                type_: "IsVersionOf".to_string(),
1110                ..Default::default()
1111            });
1112        }
1113    } else if data.id.contains("10.59350") && !content.parent.communities.default.is_empty() {
1114        let parent_id = &content.parent.communities.default;
1115        let id = normalize_doi(&format!("10.59350/{}", parent_id));
1116        if !id.is_empty() {
1117            data.relations.push(Relation {
1118                id,
1119                type_: "IsVersionOf".to_string(),
1120                ..Default::default()
1121            });
1122        }
1123    }
1124
1125    // ISSN IsPartOf relation for Rogue Scholar
1126    if is_rogue_scholar && !content.custom_fields.journal.issn.is_empty() {
1127        let issn_url = issn_as_url(&content.custom_fields.journal.issn);
1128        let rel = Relation {
1129            id: issn_url,
1130            type_: "IsPartOf".to_string(),
1131            ..Default::default()
1132        };
1133        if !data.relations.contains(&rel) {
1134            data.relations.push(rel);
1135        }
1136    }
1137
1138    // Title
1139    if !content.metadata.title.is_empty() {
1140        data.title = sanitize(&content.metadata.title);
1141    }
1142
1143    // Version
1144    data.version = content.metadata.version.clone();
1145
1146    // Full-text HTML
1147    if !content.custom_fields.content_html.is_empty() {
1148        data.content = content.custom_fields.content_html.clone();
1149    }
1150
1151    data
1152}
1153
1154// ── Small helpers ─────────────────────────────────────────────────────────────
1155
1156fn date_type_str(type_: &Option<Value>) -> String {
1157    match type_ {
1158        Some(Value::Object(m)) => m
1159            .get("id")
1160            .and_then(|v| v.as_str())
1161            .unwrap_or("")
1162            .to_string(),
1163        Some(Value::String(s)) => s.clone(),
1164        _ => String::new(),
1165    }
1166}
1167
1168fn strip_milliseconds(ts: &str) -> String {
1169    // "2024-01-15T12:34:56.789012+00:00" → "2024-01-15T12:34:56+00:00"
1170    if let Some(dot) = ts.find('.') {
1171        let rest = &ts[dot + 1..];
1172        let end = rest
1173            .find(|c: char| !c.is_ascii_digit())
1174            .map(|i| i + dot + 1)
1175            .unwrap_or(ts.len());
1176        return format!("{}{}", &ts[..dot], &ts[end..]);
1177    }
1178    ts.to_string()
1179}
1180
1181fn relation_type_id(v: &RelatedIdentifier) -> String {
1182    if !v.relation_type.id.is_empty() {
1183        v.relation_type.id.clone()
1184    } else {
1185        v.relation.to_lowercase()
1186    }
1187}
1188
1189fn subject_string(v: &Subject_) -> String {
1190    if v.subject.is_empty() {
1191        return String::new();
1192    }
1193    match v.scheme.as_str() {
1194        "FOS" => format!("FOS: {}", v.subject),
1195        "Domains" => format!("Domain: {}", v.subject),
1196        "Fields" => format!("Field: {}", v.subject),
1197        "Subfields" => format!("Subfield: {}", v.subject),
1198        "Topics" => format!("Topic: {}", v.subject),
1199        _ => v.subject.clone(),
1200    }
1201}
1202
1203// Minimal structs for parsing the `files` field
1204#[derive(Deserialize, Default)]
1205struct FilesEnabled {
1206    #[serde(default)]
1207    enabled: bool,
1208}
1209
1210#[derive(Deserialize, Default)]
1211struct FilesWithEntries {
1212    #[serde(default)]
1213    entries: std::collections::HashMap<String, Value>,
1214}
1215
1216// ── Writer ────────────────────────────────────────────────────────────────────
1217
1218// ── Output structs ────────────────────────────────────────────────────────────
1219
1220#[derive(Serialize, Default)]
1221struct OutInveniordm {
1222    pids: OutPids,
1223    access: OutAccess,
1224    files: OutFiles,
1225    metadata: OutMetadata,
1226    #[serde(
1227        rename = "custom_fields",
1228        skip_serializing_if = "OutCustomFields::is_empty"
1229    )]
1230    custom_fields: OutCustomFields,
1231}
1232
1233#[derive(Serialize, Default)]
1234struct OutPids {
1235    #[serde(rename = "doi")]
1236    doi: OutDoi,
1237}
1238
1239#[derive(Serialize, Default)]
1240struct OutDoi {
1241    identifier: String,
1242    provider: String,
1243}
1244
1245#[derive(Serialize, Default)]
1246struct OutAccess {
1247    record: String,
1248    files: String,
1249}
1250
1251#[derive(Serialize, Default)]
1252struct OutFiles {
1253    enabled: bool,
1254}
1255
1256#[derive(Serialize, Default)]
1257struct OutMetadata {
1258    resource_type: OutResourceType,
1259    creators: Vec<OutCreator>,
1260    #[serde(skip_serializing_if = "Vec::is_empty")]
1261    contributors: Vec<OutContributor>,
1262    title: String,
1263    publication_date: String,
1264    #[serde(skip_serializing_if = "String::is_empty")]
1265    publisher: String,
1266    #[serde(skip_serializing_if = "Vec::is_empty")]
1267    identifiers: Vec<OutIdentifier>,
1268    #[serde(skip_serializing_if = "Vec::is_empty")]
1269    dates: Vec<OutDate>,
1270    #[serde(skip_serializing_if = "String::is_empty")]
1271    description: String,
1272    #[serde(skip_serializing_if = "Vec::is_empty")]
1273    funding: Vec<OutFunding>,
1274    #[serde(skip_serializing_if = "Vec::is_empty")]
1275    languages: Vec<OutLanguage>,
1276    #[serde(skip_serializing_if = "Vec::is_empty")]
1277    subjects: Vec<OutSubject>,
1278    #[serde(skip_serializing_if = "Vec::is_empty")]
1279    rights: Vec<OutRight>,
1280    #[serde(skip_serializing_if = "Vec::is_empty")]
1281    references: Vec<OutReference>,
1282    #[serde(rename = "related_identifiers", skip_serializing_if = "Vec::is_empty")]
1283    related_identifiers: Vec<OutRelatedIdentifier>,
1284    #[serde(skip_serializing_if = "String::is_empty")]
1285    version: String,
1286}
1287
1288#[derive(Serialize, Default)]
1289struct OutResourceType {
1290    id: String,
1291}
1292
1293#[derive(Serialize, Default)]
1294struct OutCreator {
1295    person_or_org: OutPersonOrOrg,
1296    #[serde(skip_serializing_if = "Vec::is_empty")]
1297    affiliations: Vec<OutAffiliation>,
1298}
1299
1300#[derive(Serialize, Default)]
1301struct OutPersonOrOrg {
1302    #[serde(rename = "type")]
1303    type_: String,
1304    #[serde(skip_serializing_if = "String::is_empty")]
1305    name: String,
1306    #[serde(rename = "given_name", skip_serializing_if = "String::is_empty")]
1307    given_name: String,
1308    #[serde(rename = "family_name", skip_serializing_if = "String::is_empty")]
1309    family_name: String,
1310    #[serde(skip_serializing_if = "Vec::is_empty")]
1311    identifiers: Vec<OutIdentifier>,
1312}
1313
1314#[derive(Serialize, Default)]
1315struct OutContributor {
1316    person_or_org: OutPersonOrOrg,
1317    role: OutTypeId,
1318    #[serde(skip_serializing_if = "Vec::is_empty")]
1319    affiliations: Vec<OutAffiliation>,
1320}
1321
1322#[derive(Serialize, Default)]
1323struct OutAffiliation {
1324    #[serde(skip_serializing_if = "String::is_empty")]
1325    id: String,
1326    #[serde(skip_serializing_if = "String::is_empty")]
1327    name: String,
1328}
1329
1330#[derive(Serialize, Default)]
1331struct OutIdentifier {
1332    identifier: String,
1333    scheme: String,
1334}
1335
1336#[derive(Serialize, Default)]
1337struct OutDate {
1338    date: String,
1339    #[serde(rename = "type")]
1340    type_: OutTypeId,
1341}
1342
1343#[derive(Serialize, Default)]
1344struct OutTypeId {
1345    id: String,
1346}
1347
1348#[derive(Serialize, Default)]
1349struct OutFunding {
1350    funder: OutFunder,
1351    #[serde(skip_serializing_if = "OutAward::is_empty")]
1352    award: OutAward,
1353}
1354
1355#[derive(Serialize, Default)]
1356struct OutFunder {
1357    #[serde(skip_serializing_if = "String::is_empty")]
1358    id: String,
1359    name: String,
1360}
1361
1362#[derive(Serialize, Default)]
1363struct OutAward {
1364    #[serde(skip_serializing_if = "String::is_empty")]
1365    number: String,
1366    #[serde(skip_serializing_if = "OutAwardTitle::is_empty")]
1367    title: OutAwardTitle,
1368    #[serde(skip_serializing_if = "Vec::is_empty")]
1369    identifiers: Vec<OutIdentifier>,
1370}
1371
1372impl OutAward {
1373    fn is_empty(&self) -> bool {
1374        self.number.is_empty() && self.title.is_empty() && self.identifiers.is_empty()
1375    }
1376}
1377
1378#[derive(Serialize, Default)]
1379struct OutAwardTitle {
1380    #[serde(skip_serializing_if = "String::is_empty")]
1381    en: String,
1382}
1383
1384impl OutAwardTitle {
1385    fn is_empty(&self) -> bool {
1386        self.en.is_empty()
1387    }
1388}
1389
1390#[derive(Serialize, Default)]
1391struct OutLanguage {
1392    id: String,
1393}
1394
1395#[derive(Serialize, Default)]
1396struct OutSubject {
1397    subject: String,
1398    #[serde(skip_serializing_if = "String::is_empty")]
1399    id: String,
1400    #[serde(skip_serializing_if = "String::is_empty")]
1401    scheme: String,
1402}
1403
1404#[derive(Serialize, Default)]
1405struct OutRight {
1406    id: String,
1407}
1408
1409#[derive(Serialize, Default)]
1410struct OutReference {
1411    reference: String,
1412    #[serde(skip_serializing_if = "String::is_empty")]
1413    scheme: String,
1414    #[serde(skip_serializing_if = "String::is_empty")]
1415    identifier: String,
1416}
1417
1418#[derive(Serialize, Default)]
1419struct OutRelatedIdentifier {
1420    identifier: String,
1421    scheme: String,
1422    relation_type: OutTypeId,
1423}
1424
1425#[derive(Serialize, Default)]
1426struct OutCustomFields {
1427    #[serde(
1428        rename = "journal:journal",
1429        skip_serializing_if = "OutJournal::is_empty"
1430    )]
1431    journal: OutJournal,
1432    #[serde(rename = "rs:content_html", skip_serializing_if = "String::is_empty")]
1433    content_html: String,
1434    #[serde(rename = "rs:image", skip_serializing_if = "String::is_empty")]
1435    feature_image: String,
1436    #[serde(rename = "feed:generator", skip_serializing_if = "String::is_empty")]
1437    generator: String,
1438}
1439
1440impl OutCustomFields {
1441    fn is_empty(&self) -> bool {
1442        self.journal.is_empty()
1443            && self.content_html.is_empty()
1444            && self.feature_image.is_empty()
1445            && self.generator.is_empty()
1446    }
1447}
1448
1449#[derive(Serialize, Default)]
1450struct OutJournal {
1451    #[serde(skip_serializing_if = "String::is_empty")]
1452    title: String,
1453    #[serde(skip_serializing_if = "String::is_empty")]
1454    issn: String,
1455    #[serde(skip_serializing_if = "String::is_empty")]
1456    volume: String,
1457    #[serde(skip_serializing_if = "String::is_empty")]
1458    issue: String,
1459    #[serde(skip_serializing_if = "String::is_empty")]
1460    pages: String,
1461}
1462
1463impl OutJournal {
1464    fn is_empty(&self) -> bool {
1465        self.title.is_empty()
1466            && self.issn.is_empty()
1467            && self.volume.is_empty()
1468            && self.issue.is_empty()
1469            && self.pages.is_empty()
1470    }
1471}
1472
1473// ── Writer type mappings ──────────────────────────────────────────────────────
1474
1475fn cm_to_invenio_type(cm: &str) -> &'static str {
1476    C::cm_to_inveniordm(cm)
1477}
1478
1479fn cm_to_invenio_identifier(cm: &str) -> &'static str {
1480    match cm {
1481        "Ark" => "ark",
1482        "arXiv" => "arxiv",
1483        "Bibcode" => "ads",
1484        "CrossrefFunderID" => "crossreffunderid",
1485        "DOI" => "doi",
1486        "EAN13" => "ean13",
1487        "EISSN" => "eissn",
1488        "GRID" => "grid",
1489        "Handle" => "handle",
1490        "IGSN" => "igsn",
1491        "ISBN" => "isbn",
1492        "ISNI" => "isni",
1493        "ISSN" => "issn",
1494        "ISTC" => "istc",
1495        "LISSN" => "lissn",
1496        "LSID" => "lsid",
1497        "PMID" => "pmid",
1498        "PURL" => "purl",
1499        "UPC" => "upc",
1500        "URL" => "url",
1501        "URN" => "urn",
1502        "W3ID" => "w3id",
1503        "GUID" => "guid",
1504        "UUID" => "uuid",
1505        "Other" => "other",
1506        _ => "",
1507    }
1508}
1509
1510fn cm_to_invenio_contributor_role(cm: &str) -> &'static str {
1511    let r = C::cm_to_inveniordm_role(cm);
1512    if r == "other" { "" } else { r }
1513}
1514
1515fn cm_to_invenio_relation(cm: &str) -> &'static str {
1516    match cm {
1517        "IsCitedBy" => "iscitedby",
1518        "Cites" => "cites",
1519        "IsSupplementTo" => "issupplementto",
1520        "IsSupplementedBy" => "issupplementedby",
1521        "IsContinuedBy" => "iscontinuedby",
1522        "Continues" => "continues",
1523        "IsNewVersionOf" => "isnewversionof",
1524        "IsPreviousVersion" | "IsPreviousVersionOf" => "ispreviousversion",
1525        "IsPartOf" => "ispartof",
1526        "HasPart" => "haspart",
1527        "IsReferencedBy" => "isreferencedby",
1528        "References" => "references",
1529        "IsDocumentedBy" => "isdocumentedby",
1530        "Documents" => "documents",
1531        "IsCompiledBy" => "iscompiledby",
1532        "Compiles" => "compiles",
1533        "IsVariantFormOf" => "isvariantformof",
1534        "IsOriginalFormOf" => "isoriginalformof",
1535        "IsIdenticalTo" => "isidenticalto",
1536        "IsReviewOf" => "reviews",
1537        "HasReview" => "isreviewedby",
1538        "IsDerivedFrom" => "isderivedfrom",
1539        "IsSourceOf" => "issourceof",
1540        "Describes" => "describes",
1541        "IsDescribedBy" => "isdescribedby",
1542        "IsMetadataFor" => "ismetadatafor",
1543        "HasMetadata" => "hasmetadata",
1544        "IsAnnotatedBy" => "isannotatedby",
1545        "Annotates" => "annotates",
1546        "IsCorrectedBy" => "iscorrectedby",
1547        "Corrects" => "corrects",
1548        "IsVersionOf" => "isversionof",
1549        "HasVersion" => "hasversion",
1550        "IsTranslationOf" => "istranslationof",
1551        "IsPreprintOf" => "ispreviousversionof",
1552        "HasPreprint" => "haspreprint",
1553        _ => "",
1554    }
1555}
1556
1557// ── Core conversion ───────────────────────────────────────────────────────────
1558
1559fn convert(data: &Data) -> OutInveniordm {
1560    use crate::doi_utils::validate_doi;
1561    use crate::utils::{get_language, validate_id, validate_orcid, validate_ror};
1562
1563    let mut out = OutInveniordm::default();
1564
1565    // DOI
1566    let doi = doi_from_identifiers(data)
1567        .or_else(|| validate_doi(&data.id))
1568        .unwrap_or_default();
1569    let provider = if is_rogue_scholar_doi(&data.id) {
1570        "crossref"
1571    } else {
1572        "external"
1573    };
1574    out.pids.doi = OutDoi {
1575        identifier: doi,
1576        provider: provider.to_string(),
1577    };
1578
1579    // Access
1580    out.access = OutAccess {
1581        record: "public".to_string(),
1582        files: "public".to_string(),
1583    };
1584    out.files = OutFiles { enabled: false };
1585
1586    // Resource type
1587    out.metadata.resource_type = OutResourceType {
1588        id: cm_to_invenio_type(&data.type_).to_string(),
1589    };
1590
1591    // Title
1592    out.metadata.title = if !data.title.is_empty() {
1593        data.title.clone()
1594    } else {
1595        "No title".to_string()
1596    };
1597
1598    // Publication date
1599    out.metadata.publication_date = if !data.date_published.is_empty() {
1600        parse_date(&data.date_published)
1601    } else if !data.dates.available.is_empty() {
1602        parse_date(&data.dates.available)
1603    } else if !data.dates.created.is_empty() {
1604        parse_date(&data.dates.created)
1605    } else {
1606        String::new()
1607    };
1608
1609    // Creators (contributors with "Author" role)
1610    if data
1611        .contributors
1612        .iter()
1613        .any(|c| c.roles.contains(&"Author".to_string()))
1614    {
1615        for v in &data.contributors {
1616            if !v.roles.contains(&"Author".to_string()) {
1617                continue;
1618            }
1619            let mut identifiers = vec![];
1620            if !v.id().is_empty()
1621                && let Some(orcid) = validate_orcid(v.id())
1622            {
1623                identifiers.push(OutIdentifier {
1624                    identifier: orcid,
1625                    scheme: "orcid".to_string(),
1626                });
1627            }
1628
1629            let mut affiliations = vec![];
1630            for a in v.affiliations() {
1631                let aff_id = validate_ror(&a.id).unwrap_or_default();
1632                let aff = OutAffiliation {
1633                    id: aff_id,
1634                    name: a.name.clone(),
1635                };
1636                let duplicate = affiliations
1637                    .iter()
1638                    .any(|e: &OutAffiliation| !e.id.is_empty() && e.id == aff.id);
1639                if !duplicate {
1640                    affiliations.push(aff);
1641                }
1642            }
1643
1644            // type_: "Person"→"personal", "Organization"→"organizational"
1645            let ptype = match v.type_.as_str() {
1646                "Person" => "personal",
1647                "Organization" => "organizational",
1648                _ => "organizational",
1649            };
1650
1651            out.metadata.creators.push(OutCreator {
1652                person_or_org: OutPersonOrOrg {
1653                    type_: ptype.to_string(),
1654                    name: if v.type_ == "Organization" { v.name() } else { String::new() },
1655                    given_name: v.given_name().to_string(),
1656                    family_name: v.family_name().to_string(),
1657                    identifiers,
1658                },
1659                affiliations,
1660            });
1661        }
1662    } else {
1663        // Placeholder when no authors
1664        out.metadata.creators.push(OutCreator {
1665            person_or_org: OutPersonOrOrg {
1666                type_: "organizational".to_string(),
1667                name: "No author".to_string(),
1668                ..Default::default()
1669            },
1670            affiliations: vec![],
1671        });
1672    }
1673
1674    // Contributors (non-Author roles)
1675    for v in &data.contributors {
1676        for role in &v.roles {
1677            if role == "Author" {
1678                continue;
1679            }
1680            let role_id = cm_to_invenio_contributor_role(role);
1681            if role_id.is_empty() {
1682                continue;
1683            }
1684
1685            let mut identifiers = vec![];
1686            if !v.id().is_empty()
1687                && let Some(orcid) = validate_orcid(v.id())
1688            {
1689                identifiers.push(OutIdentifier {
1690                    identifier: orcid,
1691                    scheme: "orcid".to_string(),
1692                });
1693            }
1694
1695            let mut affiliations = vec![];
1696            if v.type_ == "Person" {
1697                for a in v.affiliations() {
1698                    let aff_id = validate_ror(&a.id).unwrap_or_default();
1699                    affiliations.push(OutAffiliation {
1700                        id: aff_id,
1701                        name: a.name.clone(),
1702                    });
1703                }
1704            }
1705
1706            let ptype = match v.type_.as_str() {
1707                "Person" => "personal",
1708                "Organization" => "organizational",
1709                _ => "organizational",
1710            };
1711
1712            out.metadata.contributors.push(OutContributor {
1713                person_or_org: OutPersonOrOrg {
1714                    type_: ptype.to_string(),
1715                    name: if v.type_ == "Organization" { v.name() } else { String::new() },
1716                    given_name: v.given_name().to_string(),
1717                    family_name: v.family_name().to_string(),
1718                    identifiers,
1719                },
1720                role: OutTypeId {
1721                    id: role_id.to_string(),
1722                },
1723                affiliations,
1724            });
1725            break; // use first non-Author role only
1726        }
1727    }
1728
1729    // Publisher
1730    out.metadata.publisher = data.publisher.name.clone();
1731
1732    // Container → custom_fields.journal:journal
1733    // Only include journal title when container type is Journal, Periodical, or Blog
1734    let container_type = data.container.type_.as_str();
1735    if !data.container.title.is_empty()
1736        && matches!(container_type, "Journal" | "Periodical" | "Blog")
1737    {
1738        out.custom_fields.journal.title = data.container.title.clone();
1739    }
1740    if !data.container.platform.is_empty() {
1741        out.custom_fields.generator = data.container.platform.clone();
1742    }
1743    if !data.container.volume.is_empty() {
1744        out.custom_fields.journal.volume = data.container.volume.clone();
1745    }
1746    if !data.container.issue.is_empty() {
1747        out.custom_fields.journal.issue = data.container.issue.clone();
1748    }
1749    if !data.container.first_page.is_empty() {
1750        out.custom_fields.journal.pages = container_pages(&data.container);
1751    }
1752    if !data.container.identifier.is_empty() && data.container.identifier_type == "ISSN" {
1753        out.custom_fields.journal.issn = data.container.identifier.clone();
1754    }
1755
1756    // Optional custom fields
1757    out.custom_fields.content_html = data.content.clone();
1758    out.custom_fields.feature_image = data.image.clone();
1759
1760    // Identifiers: skip the primary DOI, add URL separately
1761    for v in &data.identifiers {
1762        let scheme = cm_to_invenio_identifier(&v.identifier_type);
1763        if scheme.is_empty() {
1764            continue;
1765        }
1766        // skip the record's own DOI
1767        if v.identifier_type == "DOI"
1768            && normalize_id_for_doi(&v.identifier) == normalize_id_for_doi(&data.id)
1769        {
1770            continue;
1771        }
1772        out.metadata.identifiers.push(OutIdentifier {
1773            identifier: v.identifier.clone(),
1774            scheme: scheme.to_string(),
1775        });
1776    }
1777    // Add URL as identifier
1778    if !data.url.is_empty() {
1779        out.metadata.identifiers.push(OutIdentifier {
1780            identifier: data.url.clone(),
1781            scheme: "url".to_string(),
1782        });
1783    }
1784
1785    // Dates: iterate over Date fields
1786    let date_fields: &[(&str, &str)] = &[
1787        ("created", &data.dates.created),
1788        ("submitted", &data.dates.submitted),
1789        ("accepted", &data.dates.accepted),
1790        ("issued", &data.date_published), // "published" → "issued"
1791        ("updated", &data.date_updated),
1792        ("other", &data.dates.accessed), // "accessed" → "other"
1793        ("available", &data.dates.available),
1794        ("copyrighted", &data.dates.copyrighted),
1795        ("collected", &data.dates.collected),
1796        ("valid", &data.dates.valid),
1797        ("withdrawn", &data.dates.withdrawn),
1798        ("other", &data.dates.other),
1799    ];
1800    for (id, date) in date_fields {
1801        if !date.is_empty() {
1802            out.metadata.dates.push(OutDate {
1803                date: date.to_string(),
1804                type_: OutTypeId { id: id.to_string() },
1805            });
1806        }
1807    }
1808
1809    // Description
1810    if !data.description.is_empty() {
1811        out.metadata.description = data.description.clone();
1812    }
1813
1814    // Funding references
1815    for v in &data.funding_references {
1816        let ror_id = if v.funder_id.starts_with("https://doi.org/10.13039/") {
1817            // Crossref Funder IDs are not ROR IDs; no conversion available
1818            String::new()
1819        } else {
1820            let (validated_id, funder_id_type) = validate_id(&v.funder_id);
1821            if funder_id_type == "ROR" {
1822                validate_ror(&validated_id).unwrap_or_default()
1823            } else {
1824                String::new()
1825            }
1826        };
1827
1828        let funder = OutFunder {
1829            id: ror_id,
1830            name: v.funder_name.clone(),
1831        };
1832
1833        let award =
1834            if !v.award_number.is_empty() || !v.award_title.is_empty() || !v.award_id.is_empty() {
1835                let mut identifiers = vec![];
1836                if !v.award_id.is_empty() {
1837                    let (award_id_val, award_id_type) = validate_id(&v.award_id);
1838                    let scheme = cm_to_invenio_identifier(award_id_type);
1839                    if !award_id_val.is_empty() && !scheme.is_empty() {
1840                        identifiers.push(OutIdentifier {
1841                            identifier: award_id_val,
1842                            scheme: scheme.to_string(),
1843                        });
1844                    }
1845                }
1846                OutAward {
1847                    number: v.award_number.clone(),
1848                    title: OutAwardTitle {
1849                        en: v.award_title.clone(),
1850                    },
1851                    identifiers,
1852                }
1853            } else {
1854                OutAward::default()
1855            };
1856
1857        out.metadata.funding.push(OutFunding { funder, award });
1858    }
1859
1860    // Language
1861    if !data.language.is_empty() {
1862        let lang3 = get_language(&data.language, "iso639-3");
1863        if !lang3.is_empty() {
1864            out.metadata.languages.push(OutLanguage { id: lang3 });
1865        }
1866    }
1867
1868    // Subjects
1869    for v in &data.subjects {
1870        out.metadata.subjects.push(OutSubject {
1871            subject: v.subject.clone(),
1872            ..Default::default()
1873        });
1874    }
1875
1876    // License
1877    let right_id = if !data.license.id.is_empty() {
1878        data.license.id.to_lowercase()
1879    } else if !data.license.url.is_empty() {
1880        crate::spdx::from_url(&data.license.url).id.to_lowercase()
1881    } else {
1882        String::new()
1883    };
1884    if !right_id.is_empty() {
1885        out.metadata.rights.push(OutRight { id: right_id });
1886    }
1887
1888    // References
1889    for v in &data.references {
1890        let (ref_id, ref_id_type) = validate_id(&v.id);
1891        let scheme = cm_to_invenio_identifier(ref_id_type).to_string();
1892        let unstructured = if v.unstructured.is_empty() {
1893            // Build from reference + year
1894            let mut u = if !v.reference.is_empty() {
1895                v.reference.clone()
1896            } else {
1897                "Unknown title".to_string()
1898            };
1899            if !v.publication_year.is_empty() {
1900                u.push_str(&format!(" ({}).", v.publication_year));
1901            }
1902            u
1903        } else {
1904            let mut u = v.unstructured.clone();
1905            // Remove duplicate ID from unstructured text
1906            if !v.id.is_empty() {
1907                u = u.replace(&v.id, "");
1908            }
1909            u.trim_end().to_string()
1910        };
1911        out.metadata.references.push(OutReference {
1912            reference: unstructured,
1913            scheme,
1914            identifier: ref_id,
1915        });
1916    }
1917
1918    // Relations (exclude IsPartOf — captured in container/communities)
1919    for v in &data.relations {
1920        if v.type_ == "IsPartOf" {
1921            continue;
1922        }
1923        let (rel_id, id_type) = validate_id(&v.id);
1924        let scheme = cm_to_invenio_identifier(id_type);
1925        let relation_type = cm_to_invenio_relation(&v.type_);
1926        if !rel_id.is_empty() && !scheme.is_empty() && !relation_type.is_empty() {
1927            out.metadata.related_identifiers.push(OutRelatedIdentifier {
1928                identifier: rel_id,
1929                scheme: scheme.to_string(),
1930                relation_type: OutTypeId {
1931                    id: relation_type.to_string(),
1932                },
1933            });
1934        }
1935    }
1936
1937    // Version
1938    out.metadata.version = data.version.clone();
1939
1940    out
1941}
1942
1943fn parse_date(d: &str) -> String {
1944    // Return up to the date portion (first 10 chars if ISO 8601)
1945    if d.len() >= 10 {
1946        d[..10].to_string()
1947    } else {
1948        d.to_string()
1949    }
1950}
1951
1952fn container_pages(c: &crate::data::Container) -> String {
1953    if !c.first_page.is_empty() && !c.last_page.is_empty() {
1954        format!("{}-{}", c.first_page, c.last_page)
1955    } else {
1956        c.first_page.clone()
1957    }
1958}
1959
1960fn normalize_id_for_doi(id: &str) -> String {
1961    // Strip https://doi.org/ prefix for comparison
1962    id.trim_start_matches("https://doi.org/")
1963        .trim_start_matches("http://doi.org/")
1964        .to_lowercase()
1965}
1966
1967fn doi_from_identifiers(data: &Data) -> Option<String> {
1968    data.identifiers
1969        .iter()
1970        .find(|id| id.identifier_type == "DOI" && !id.identifier.is_empty())
1971        .and_then(|id| crate::doi_utils::validate_doi(&id.identifier))
1972}
1973
1974// ── Public API ────────────────────────────────────────────────────────────────
1975
1976pub fn read_json(input: &str) -> Result<Data> {
1977    let content: Content = serde_json::from_str(input).map_err(|e| Error::Parse(e.to_string()))?;
1978    Ok(from_content(content))
1979}
1980
1981pub fn write(data: &Data) -> Result<Vec<u8>> {
1982    let payload = convert(data);
1983    serde_json::to_vec(&payload).map_err(|e| Error::Parse(e.to_string()))
1984}
1985
1986pub fn write_all(list: &[Data]) -> Result<Vec<u8>> {
1987    let payloads: Vec<OutInveniordm> = list.iter().map(convert).collect();
1988    serde_json::to_vec_pretty(&payloads).map_err(|e| Error::Parse(e.to_string()))
1989}
1990
1991#[cfg(test)]
1992mod tests {
1993    use super::*;
1994
1995    #[test]
1996    fn test_read_json_maps_journal_container_details() {
1997        let json = r#"{
1998                    "doi": "10.5555/example",
1999                    "parent": {},
2000                    "pids": {},
2001                    "metadata": {
2002                        "resource_type": {"id": "publication-article"},
2003                        "title": "Example"
2004                    },
2005                    "custom_fields": {
2006                        "journal:journal": {
2007                            "title": "Journal of Examples",
2008                            "issn": "1234-5678",
2009                            "volume": "12",
2010                            "issue": "3",
2011                            "pages": "100-110"
2012                        }
2013                    }
2014                }"#;
2015
2016        let data = read_json(json).unwrap();
2017        assert_eq!(data.container.title, "Journal of Examples");
2018        assert_eq!(data.container.identifier, "1234-5678");
2019        assert_eq!(data.container.identifier_type, "ISSN");
2020        assert_eq!(data.container.volume, "12");
2021        assert_eq!(data.container.issue, "3");
2022        assert_eq!(data.container.first_page, "100");
2023        assert_eq!(data.container.last_page, "110");
2024    }
2025
2026    #[test]
2027    fn test_write_prefers_doi_identifier_over_id() {
2028        let data = Data {
2029            id: "https://example.org/not-a-doi".to_string(),
2030            identifiers: vec![Identifier {
2031                identifier: "https://doi.org/10.5555/identifier-doi".to_string(),
2032                identifier_type: "DOI".to_string(),
2033            }],
2034            title: "Example".to_string(),
2035            ..Data::default()
2036        };
2037
2038        let out = write(&data).unwrap();
2039        let json: serde_json::Value = serde_json::from_slice(&out).unwrap();
2040        assert_eq!(json["pids"]["doi"]["identifier"], "10.5555/identifier-doi");
2041    }
2042}
2043
2044/// Fetch an InvenioRDM record by URL (e.g. `https://rogue-scholar.org/records/7zrtf-jkc81`).
2045pub fn fetch(url: &str) -> Result<Data> {
2046    let parsed = url::Url::parse(url).map_err(|e| Error::Parse(e.to_string()))?;
2047    let host = parsed
2048        .host_str()
2049        .ok_or_else(|| Error::Parse("missing host in URL".to_string()))?;
2050    let record_id = parsed
2051        .path_segments()
2052        .and_then(|mut segs| segs.find(|s| !s.is_empty() && *s != "records" && *s != "api"))
2053        .ok_or_else(|| Error::Parse("cannot extract record ID from URL".to_string()))?
2054        .to_string();
2055
2056    let api_url = format!("https://{}/api/records/{}", host, record_id);
2057    let client = build_client()?;
2058    let json = client
2059        .get(&api_url)
2060        .send()
2061        .map_err(|e| Error::Http(e.to_string()))?
2062        .error_for_status()
2063        .map_err(|e| Error::Http(e.to_string()))?
2064        .text()
2065        .map_err(|e| Error::Http(e.to_string()))?;
2066    read_json(&json)
2067}
2068
2069// ── Push / registration (create, update, publish) ─────────────────────────────
2070//
2071// This is a deliberately scoped port of Go's `inveniordm.UpsertAll`: it
2072// creates-or-updates a draft record and publishes it. It does not implement
2073// community auto-association (subject/blog community lookup) or the Rogue
2074// Scholar legacy-record callback, both of which depend on Go-only
2075// infrastructure (embedded vocabulary files, the `roguescholar` package)
2076// that has no equivalent in commonmeta-rs.
2077
2078/// The outcome of pushing a single record to InvenioRDM.
2079#[derive(Debug, Default, Clone, Serialize)]
2080pub struct PushResult {
2081    /// The commonmeta `Data.id` (typically a DOI URL).
2082    pub id: String,
2083    #[serde(skip_serializing_if = "String::is_empty")]
2084    pub doi: String,
2085    /// The InvenioRDM record ID, once known.
2086    #[serde(skip_serializing_if = "String::is_empty")]
2087    pub record_id: String,
2088    /// "published", "draft", or a "failed_*" status.
2089    pub status: String,
2090    #[serde(skip_serializing_if = "String::is_empty")]
2091    pub created: String,
2092    #[serde(skip_serializing_if = "String::is_empty")]
2093    pub updated: String,
2094    #[serde(skip_serializing_if = "Option::is_none")]
2095    pub message: Option<String>,
2096}
2097
2098fn build_client() -> Result<reqwest::blocking::Client> {
2099    reqwest::blocking::Client::builder()
2100        .user_agent(format!(
2101            "commonmeta-rs/{} (https://github.com/front-matter/commonmeta-rs; mailto:info@front-matter.de)",
2102            env!("CARGO_PKG_VERSION")
2103        ))
2104        .build()
2105        .map_err(|e| Error::Http(e.to_string()))
2106}
2107
2108/// Search InvenioRDM for an existing record by DOI. Returns the record ID if found.
2109fn search_by_doi(
2110    doi: &str,
2111    host: &str,
2112    client: &reqwest::blocking::Client,
2113) -> Result<Option<String>> {
2114    let escaped = crate::doi_utils::escape_doi(doi);
2115    let url = format!("https://{}/api/records?q=doi:{}", host, escaped);
2116    let body: Value = client
2117        .get(&url)
2118        .header("Content-Type", "application/json")
2119        .send()
2120        .map_err(|e| Error::Http(e.to_string()))?
2121        .json()
2122        .map_err(|e| Error::Http(e.to_string()))?;
2123
2124    let total = body
2125        .get("hits")
2126        .and_then(|h| h.get("total"))
2127        .and_then(Value::as_i64)
2128        .unwrap_or(0);
2129    if total == 0 {
2130        return Ok(None);
2131    }
2132    Ok(body
2133        .get("hits")
2134        .and_then(|h| h.get("hits"))
2135        .and_then(|hits| hits.get(0))
2136        .and_then(|first| first.get("id"))
2137        .and_then(Value::as_str)
2138        .map(|s| s.to_string()))
2139}
2140
2141fn create_draft_record(
2142    body: &[u8],
2143    host: &str,
2144    token: &str,
2145    client: &reqwest::blocking::Client,
2146) -> Result<(String, String, String)> {
2147    let url = format!("https://{}/api/records", host);
2148    let resp = client
2149        .post(&url)
2150        .header("Content-Type", "application/json")
2151        .header("Authorization", format!("Bearer {}", token))
2152        .body(body.to_vec())
2153        .send()
2154        .map_err(|e| Error::Http(e.to_string()))?;
2155
2156    let status = resp.status().as_u16();
2157    let text = resp.text().map_err(|e| Error::Http(e.to_string()))?;
2158    if status == 429 {
2159        return Err(Error::Http("rate limited".to_string()));
2160    }
2161    if status != 201 {
2162        return Err(Error::Http(format!(
2163            "failed to create draft record: {}",
2164            text
2165        )));
2166    }
2167    let v: Value = serde_json::from_str(&text).map_err(|e| Error::Parse(e.to_string()))?;
2168    Ok((
2169        v.get("id")
2170            .and_then(Value::as_str)
2171            .unwrap_or_default()
2172            .to_string(),
2173        v.get("created")
2174            .and_then(Value::as_str)
2175            .unwrap_or_default()
2176            .to_string(),
2177        v.get("updated")
2178            .and_then(Value::as_str)
2179            .unwrap_or_default()
2180            .to_string(),
2181    ))
2182}
2183
2184fn edit_published_record(
2185    record_id: &str,
2186    host: &str,
2187    token: &str,
2188    client: &reqwest::blocking::Client,
2189) -> Result<()> {
2190    let url = format!("https://{}/api/records/{}/draft", host, record_id);
2191    client
2192        .post(&url)
2193        .header("Content-Type", "application/json")
2194        .header("Authorization", format!("Bearer {}", token))
2195        .send()
2196        .map_err(|e| Error::Http(e.to_string()))?;
2197    Ok(())
2198}
2199
2200fn update_draft_record(
2201    record_id: &str,
2202    body: &[u8],
2203    host: &str,
2204    token: &str,
2205    client: &reqwest::blocking::Client,
2206) -> Result<()> {
2207    let url = format!("https://{}/api/records/{}/draft", host, record_id);
2208    client
2209        .put(&url)
2210        .header("Content-Type", "application/json")
2211        .header("Authorization", format!("Bearer {}", token))
2212        .body(body.to_vec())
2213        .send()
2214        .map_err(|e| Error::Http(e.to_string()))?;
2215    Ok(())
2216}
2217
2218fn publish_draft_record(
2219    record_id: &str,
2220    host: &str,
2221    token: &str,
2222    client: &reqwest::blocking::Client,
2223) -> Result<(String, String)> {
2224    let url = format!(
2225        "https://{}/api/records/{}/draft/actions/publish",
2226        host, record_id
2227    );
2228    let resp = client
2229        .post(&url)
2230        .header("Content-Type", "application/json")
2231        .header("Authorization", format!("Bearer {}", token))
2232        .send()
2233        .map_err(|e| Error::Http(e.to_string()))?;
2234
2235    let status = resp.status().as_u16();
2236    let text = resp.text().map_err(|e| Error::Http(e.to_string()))?;
2237    if status != 202 {
2238        return Err(Error::Http(format!(
2239            "failed to publish draft record: {}",
2240            text
2241        )));
2242    }
2243    let v: Value = serde_json::from_str(&text).map_err(|e| Error::Parse(e.to_string()))?;
2244    Ok((
2245        v.get("created")
2246            .and_then(Value::as_str)
2247            .unwrap_or_default()
2248            .to_string(),
2249        v.get("updated")
2250            .and_then(Value::as_str)
2251            .unwrap_or_default()
2252            .to_string(),
2253    ))
2254}
2255
2256/// Create-or-update, then publish, a single record in InvenioRDM.
2257///
2258/// If a record with the same DOI already exists (checked via the InvenioRDM
2259/// search API), its published version is reopened as a draft and updated;
2260/// otherwise a new draft is created. Either way, the draft is published
2261/// before returning.
2262pub fn upsert(data: &Data, host: &str, token: &str) -> PushResult {
2263    let mut result = PushResult {
2264        id: data.id.clone(),
2265        ..Default::default()
2266    };
2267
2268    let doi = match crate::doi_utils::validate_doi(&data.id) {
2269        Some(d) => d,
2270        None => {
2271            result.status = "failed_missing_doi".to_string();
2272            return result;
2273        }
2274    };
2275    result.doi = doi.clone();
2276
2277    let client = match build_client() {
2278        Ok(c) => c,
2279        Err(e) => {
2280            result.status = "failed".to_string();
2281            result.message = Some(e.to_string());
2282            return result;
2283        }
2284    };
2285
2286    let body = match write(data) {
2287        Ok(b) => b,
2288        Err(e) => {
2289            result.status = "failed".to_string();
2290            result.message = Some(e.to_string());
2291            return result;
2292        }
2293    };
2294
2295    let existing = match search_by_doi(&doi, host, &client) {
2296        Ok(id) => id,
2297        Err(e) => {
2298            result.status = "failed_search".to_string();
2299            result.message = Some(e.to_string());
2300            return result;
2301        }
2302    };
2303
2304    let record_id = match existing {
2305        None => match create_draft_record(&body, host, token, &client) {
2306            Ok((id, created, updated)) => {
2307                result.created = created;
2308                result.updated = updated;
2309                id
2310            }
2311            Err(e) => {
2312                result.status = "failed_create_draft".to_string();
2313                result.message = Some(e.to_string());
2314                return result;
2315            }
2316        },
2317        Some(id) => {
2318            if let Err(e) = edit_published_record(&id, host, token, &client) {
2319                result.status = "failed_edit_published".to_string();
2320                result.message = Some(e.to_string());
2321                return result;
2322            }
2323            if let Err(e) = update_draft_record(&id, &body, host, token, &client) {
2324                result.status = "failed_update_draft".to_string();
2325                result.message = Some(e.to_string());
2326                return result;
2327            }
2328            id
2329        }
2330    };
2331    result.record_id = record_id.clone();
2332
2333    match publish_draft_record(&record_id, host, token, &client) {
2334        Ok((created, updated)) => {
2335            if !created.is_empty() {
2336                result.created = created;
2337            }
2338            result.updated = updated;
2339            result.status = "published".to_string();
2340        }
2341        Err(e) => {
2342            result.status = "failed_publish".to_string();
2343            result.message = Some(e.to_string());
2344        }
2345    }
2346
2347    result
2348}
2349
2350/// Create-or-update, then publish, a list of records in InvenioRDM.
2351pub fn upsert_all(list: &[Data], host: &str, token: &str) -> Vec<PushResult> {
2352    list.iter().map(|data| upsert(data, host, token)).collect()
2353}
2354
2355#[cfg(test)]
2356mod push_tests {
2357    use super::*;
2358
2359    #[test]
2360    fn test_upsert_rejects_missing_doi() {
2361        let data = Data {
2362            id: "https://example.com/not-a-doi".to_string(),
2363            ..Data::default()
2364        };
2365        let result = upsert(&data, "example.invenio.host", "fake-token");
2366        assert_eq!(result.status, "failed_missing_doi");
2367        assert!(result.record_id.is_empty());
2368    }
2369
2370    #[test]
2371    fn test_upsert_rejects_empty_id() {
2372        let data = Data::default();
2373        let result = upsert(&data, "example.invenio.host", "fake-token");
2374        assert_eq!(result.status, "failed_missing_doi");
2375    }
2376
2377    #[test]
2378    fn test_upsert_all_empty_list() {
2379        let results = upsert_all(&[], "example.invenio.host", "fake-token");
2380        assert!(results.is_empty());
2381    }
2382
2383    #[test]
2384    fn test_push_result_serialization_omits_empty_fields() {
2385        let result = PushResult {
2386            id: "https://doi.org/10.1/a".to_string(),
2387            status: "failed_missing_doi".to_string(),
2388            ..Default::default()
2389        };
2390        let json = serde_json::to_string(&result).unwrap();
2391        assert!(json.contains("\"id\""));
2392        assert!(json.contains("\"status\""));
2393        assert!(!json.contains("\"doi\""));
2394        assert!(!json.contains("\"record_id\""));
2395        assert!(!json.contains("\"message\""));
2396    }
2397}