Skip to main content

citum_engine/api/
refs_input.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Refs input resolution type for interactive APIs.
7
8use crate::reference::{Bibliography, Reference};
9use serde::{Deserialize, Serialize};
10
11/// A refs input that can be resolved locally or by an external resolver.
12///
13/// This union type allows callers to supply reference data by local file path,
14/// inline YAML, inline JSON, or inline BibLaTeX. Enables citum-server and
15/// bindings to accept references from files (e.g., via pipe transport from
16/// LaTeX or Emacs).
17///
18/// Supported tagged-object shapes over JSON/RPC:
19///
20/// ```json
21/// {"kind": "path",     "value": "/abs/path/refs.yaml"}
22/// {"kind": "path",     "value": "/abs/path/refs.bib"}  // .bib detected by extension
23/// {"kind": "yaml",     "value": "references:\n  - id: …"}
24/// {"kind": "json",     "value": {"id": { … }}}
25/// {"kind": "biblatex", "value": "@book{key, title={…}, …}"}
26/// ```
27#[derive(Debug, Clone)]
28pub enum RefsInput {
29    /// Local filesystem path to a refs file.
30    ///
31    /// YAML/JSON/CBOR extensions are loaded as native Citum refs; `.bib`
32    /// extensions are parsed as BibLaTeX.
33    Path(String),
34    /// Inline YAML refs string.
35    Yaml(String),
36    /// Inline JSON map of reference objects.
37    Json(serde_json::Value),
38    /// Inline BibLaTeX (`.bib`) content.
39    Biblatex(String),
40}
41
42impl<'de> Deserialize<'de> for RefsInput {
43    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
44    where
45        D: serde::Deserializer<'de>,
46    {
47        // Deserialize to a generic Value first so we can inspect the shape.
48        let v = serde_json::Value::deserialize(deserializer)?;
49
50        if let Some(object) = v.as_object() {
51            // If the object has a string "kind" and a "value" field it is a
52            // tagged-union wrapper — validate the kind rather than silently
53            // treating an unrecognised kind as a legacy bare-map, which would
54            // produce a confusing downstream parse error.
55            let kind_str = object.get("kind").and_then(|k| k.as_str());
56            let has_value = object.contains_key("value");
57            match (kind_str, has_value) {
58                (Some(k), true) => {
59                    if !matches!(k, "path" | "yaml" | "json" | "biblatex") {
60                        return Err(serde::de::Error::unknown_variant(
61                            k,
62                            &["path", "yaml", "json", "biblatex"],
63                        ));
64                    }
65                    // Recognised kind — fall through to dispatch below.
66                }
67                // No string kind, or no value field: legacy bare refs map.
68                _ => return Ok(RefsInput::Json(v)),
69            }
70        } else {
71            return Err(serde::de::Error::custom(
72                "refs input must be a tagged object or legacy refs object",
73            ));
74        }
75
76        // Tagged union: {"kind": "path"|"yaml"|"json"|"biblatex", "value": ...}
77        let kind = v
78            .get("kind")
79            .and_then(|k| k.as_str())
80            .ok_or_else(|| serde::de::Error::custom("refs input must have a 'kind' field"))?;
81
82        let value = v
83            .get("value")
84            .ok_or_else(|| serde::de::Error::missing_field("value"))?;
85
86        match kind {
87            "path" | "yaml" | "biblatex" => {
88                let s = value
89                    .as_str()
90                    .ok_or_else(|| {
91                        serde::de::Error::custom(
92                            "'value' must be a string for path/yaml/biblatex refs",
93                        )
94                    })?
95                    .to_string();
96                match kind {
97                    "path" => Ok(RefsInput::Path(s)),
98                    "yaml" => Ok(RefsInput::Yaml(s)),
99                    _ => Ok(RefsInput::Biblatex(s)),
100                }
101            }
102            "json" => Ok(RefsInput::Json(value.clone())),
103            k => Err(serde::de::Error::unknown_variant(
104                k,
105                &["path", "yaml", "json", "biblatex"],
106            )),
107        }
108    }
109}
110
111impl Serialize for RefsInput {
112    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
113    where
114        S: serde::Serializer,
115    {
116        use serde::ser::SerializeMap;
117        let mut map = serializer.serialize_map(Some(2))?;
118        match self {
119            RefsInput::Path(s) => {
120                map.serialize_entry("kind", "path")?;
121                map.serialize_entry("value", s)?;
122            }
123            RefsInput::Yaml(s) => {
124                map.serialize_entry("kind", "yaml")?;
125                map.serialize_entry("value", s)?;
126            }
127            RefsInput::Json(v) => {
128                map.serialize_entry("kind", "json")?;
129                map.serialize_entry("value", v)?;
130            }
131            RefsInput::Biblatex(s) => {
132                map.serialize_entry("kind", "biblatex")?;
133                map.serialize_entry("value", s)?;
134            }
135        }
136        map.end()
137    }
138}
139
140#[cfg(feature = "schema")]
141impl schemars::JsonSchema for RefsInput {
142    fn schema_name() -> std::borrow::Cow<'static, str> {
143        "RefsInput".into()
144    }
145
146    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
147        let reference_schema = generator.subschema_for::<crate::reference::Reference>();
148
149        schemars::json_schema!({
150            "oneOf": [
151                {
152                    "type": "object",
153                    "required": ["kind", "value"],
154                    "properties": {
155                        "kind": {
156                            "type": "string",
157                            "enum": ["path", "yaml", "biblatex"]
158                        },
159                        "value": {
160                            "type": "string"
161                        }
162                    },
163                    "additionalProperties": false
164                },
165                {
166                    "type": "object",
167                    "required": ["kind", "value"],
168                    "properties": {
169                        "kind": {
170                            "type": "string",
171                            "const": "json"
172                        },
173                        "value": {
174                            "type": "object",
175                            "additionalProperties": reference_schema
176                        }
177                    },
178                    "additionalProperties": false
179                },
180                {
181                    "type": "object",
182                    "additionalProperties": reference_schema
183                }
184            ]
185        })
186    }
187}
188
189impl RefsInput {
190    /// Resolve refs input locally from Path, Yaml, Json, or Biblatex variants.
191    ///
192    /// For `Path` inputs, `.bib` files are parsed as BibLaTeX; all other
193    /// extensions are parsed as native Citum YAML/JSON/CBOR.
194    ///
195    /// # Errors
196    ///
197    /// Returns error for refs input filesystem or parse failures.
198    pub fn resolve_local(&self) -> Result<Bibliography, crate::api::FormatDocumentError> {
199        match self {
200            RefsInput::Path(path) => {
201                let p = std::path::Path::new(path);
202                if p.extension()
203                    .is_some_and(|ext| ext.eq_ignore_ascii_case("bib"))
204                {
205                    let input = citum_refs::formats::biblatex::load_biblatex(p).map_err(|e| {
206                        crate::api::FormatDocumentError::RefsInputParse(format!(
207                            "Failed to parse BibLaTeX refs from '{}': {}",
208                            path, e
209                        ))
210                    })?;
211                    return Ok(bibliography_from_references(input.references));
212                }
213                let bytes = std::fs::read(path).map_err(|e| {
214                    crate::api::FormatDocumentError::RefsInputPath(format!(
215                        "Failed to read refs input from '{}': {}",
216                        path, e
217                    ))
218                })?;
219                let yaml_str = String::from_utf8_lossy(&bytes);
220                parse_yaml_bibliography(&yaml_str).map_err(|e| {
221                    crate::api::FormatDocumentError::RefsInputParse(format!(
222                        "Failed to parse refs input from '{}': {}",
223                        path, e
224                    ))
225                })
226            }
227            RefsInput::Yaml(yaml_str) => parse_yaml_bibliography(yaml_str).map_err(|e| {
228                crate::api::FormatDocumentError::RefsInputParse(format!(
229                    "Failed to parse inline YAML refs input: {}",
230                    e
231                ))
232            }),
233            RefsInput::Json(json_val) => serde_json::from_value::<Bibliography>(json_val.clone())
234                .map_err(|e| {
235                    crate::api::FormatDocumentError::RefsInputParse(format!(
236                        "Failed to parse JSON refs input: {}",
237                        e
238                    ))
239                }),
240            RefsInput::Biblatex(src) => {
241                let input =
242                    citum_refs::formats::biblatex::parse_biblatex_str(src).map_err(|e| {
243                        crate::api::FormatDocumentError::RefsInputParse(format!(
244                            "Failed to parse inline BibLaTeX refs input: {}",
245                            e
246                        ))
247                    })?;
248                Ok(bibliography_from_references(input.references))
249            }
250        }
251    }
252}
253
254fn parse_yaml_bibliography(yaml_str: &str) -> Result<Bibliography, String> {
255    let native_err = match serde_yaml::from_str::<citum_schema::InputBibliography>(yaml_str) {
256        Ok(input) => return Ok(bibliography_from_references(input.references)),
257        Err(e) => e,
258    };
259
260    if let Ok(bibliography) = serde_yaml::from_str::<Bibliography>(yaml_str) {
261        return Ok(bibliography);
262    }
263
264    if let Ok(references) = serde_yaml::from_str::<Vec<Reference>>(yaml_str) {
265        return Ok(bibliography_from_references(references));
266    }
267
268    Err(format!(
269        "tried native `references:` bibliography, flat id-to-reference map, and reference sequence: {native_err}"
270    ))
271}
272
273fn bibliography_from_references(references: Vec<Reference>) -> Bibliography {
274    references
275        .into_iter()
276        .filter_map(|reference| {
277            let id = reference.id()?.to_string();
278            Some((id, reference))
279        })
280        .collect()
281}
282
283#[cfg(test)]
284#[allow(
285    clippy::unwrap_used,
286    clippy::expect_used,
287    clippy::panic,
288    reason = "test code uses assertions and panic"
289)]
290mod tests {
291    use super::*;
292    use std::io::Write;
293    use tempfile::NamedTempFile;
294
295    #[test]
296    fn refs_input_yaml_resolves_locally() {
297        let yaml_content = "test_ref:\n  id: test_ref\n  class: monograph\n  type: book\n  title: Test\n  issued: '2024'\n";
298        let input = RefsInput::Yaml(yaml_content.to_string());
299        let result = input.resolve_local();
300        assert!(result.is_ok());
301        assert!(result.unwrap().contains_key("test_ref"));
302    }
303
304    #[test]
305    fn refs_input_path_reads_native_input_bibliography() {
306        let mut tmp = NamedTempFile::new().expect("Failed to create temp file");
307        let yaml_content = "info:\n  title: Test Bibliography\nreferences:\n  - id: test_ref\n    class: monograph\n    type: book\n    title: Test\n    issued: '2024'\n";
308        tmp.write_all(yaml_content.as_bytes())
309            .expect("Failed to write temp file");
310        tmp.flush().expect("Failed to flush temp file");
311
312        let input = RefsInput::Path(tmp.path().to_string_lossy().to_string());
313        let result = input
314            .resolve_local()
315            .expect("native bibliography should parse");
316        assert!(result.contains_key("test_ref"));
317    }
318
319    #[test]
320    fn refs_input_yaml_reads_native_input_bibliography() {
321        let yaml_content = "info:\n  title: Test Bibliography\nreferences:\n  - id: test_ref\n    class: monograph\n    type: book\n    title: Test\n    issued: '2024'\n";
322        let input = RefsInput::Yaml(yaml_content.to_string());
323        let result = input
324            .resolve_local()
325            .expect("native bibliography should parse");
326        assert!(result.contains_key("test_ref"));
327    }
328
329    #[test]
330    fn refs_input_json_resolves_locally() {
331        let json_obj = serde_json::json!({
332            "test_ref": {
333                "id": "test_ref",
334                "class": "monograph",
335                "type": "book",
336                "title": "Test",
337                "issued": "2024"
338            }
339        });
340        let input = RefsInput::Json(json_obj);
341        let result = input.resolve_local();
342        assert!(result.is_ok());
343        assert!(result.unwrap().contains_key("test_ref"));
344    }
345
346    #[test]
347    fn refs_input_path_reads_and_parses() {
348        let mut tmp = NamedTempFile::new().expect("Failed to create temp file");
349        let yaml_content = "test_ref:\n  id: test_ref\n  class: monograph\n  type: book\n  title: Test\n  issued: '2024'\n";
350        tmp.write_all(yaml_content.as_bytes())
351            .expect("Failed to write temp file");
352        tmp.flush().expect("Failed to flush temp file");
353
354        let input = RefsInput::Path(tmp.path().to_string_lossy().to_string());
355        let result = input.resolve_local();
356        assert!(result.is_ok());
357        assert!(result.unwrap().contains_key("test_ref"));
358    }
359
360    #[test]
361    fn refs_input_path_missing_returns_error() {
362        let input = RefsInput::Path("/nonexistent/path/refs.yaml".to_string());
363        let result = input.resolve_local();
364        match result {
365            Err(crate::api::FormatDocumentError::RefsInputPath(msg)) => {
366                assert!(msg.contains("Failed to read"));
367            }
368            _ => panic!("Expected RefsInputPath error"),
369        }
370    }
371
372    #[test]
373    fn refs_input_invalid_yaml_returns_parse_error() {
374        let input = RefsInput::Yaml("{ invalid yaml: [".to_string());
375        let result = input.resolve_local();
376        match result {
377            Err(crate::api::FormatDocumentError::RefsInputParse(msg)) => {
378                assert!(msg.contains("Failed to parse"));
379            }
380            _ => panic!("Expected RefsInputParse error"),
381        }
382    }
383
384    #[test]
385    fn refs_input_deserialize_tagged_path() {
386        let json_str = r#"{"kind":"path","value":"/tmp/bib.yaml"}"#;
387        let input: RefsInput = serde_json::from_str(json_str).expect("deserialize");
388        match input {
389            RefsInput::Path(p) => assert_eq!(p, "/tmp/bib.yaml"),
390            _ => panic!("Expected Path variant"),
391        }
392    }
393
394    #[test]
395    fn refs_input_deserialize_tagged_json() {
396        let json_str = r#"{"kind":"json","value":{"key":"value"}}"#;
397        let input: RefsInput = serde_json::from_str(json_str).expect("deserialize");
398        match input {
399            RefsInput::Json(v) => assert_eq!(v.get("key").unwrap(), "value"),
400            _ => panic!("Expected Json variant"),
401        }
402    }
403
404    #[test]
405    fn refs_input_deserialize_bare_object_as_json() {
406        let json_str = r#"{"test_ref":{"id":"test_ref","class":"monograph","type":"book","title":"Test","issued":"2024"}}"#;
407        let input: RefsInput = serde_json::from_str(json_str).expect("deserialize");
408        match input {
409            RefsInput::Json(v) => assert!(v.get("test_ref").is_some()),
410            _ => panic!("Expected Json variant"),
411        }
412    }
413
414    #[test]
415    fn refs_input_deserialize_legacy_kind_ref_id_as_json() {
416        let json_str = r#"{"kind":{"id":"kind","class":"monograph","type":"book","title":"Kind","issued":"2024"}}"#;
417        let input: RefsInput = serde_json::from_str(json_str).expect("deserialize");
418        match input {
419            RefsInput::Json(v) => assert!(v.get("kind").is_some()),
420            _ => panic!("Expected Json variant"),
421        }
422    }
423
424    #[test]
425    fn refs_input_serialize_path() {
426        let input = RefsInput::Path("/tmp/bib.yaml".to_string());
427        let json_str = serde_json::to_string(&input).expect("serialize");
428        assert!(json_str.contains("\"kind\":\"path\""));
429        assert!(json_str.contains("\"/tmp/bib.yaml\""));
430    }
431
432    #[test]
433    fn refs_input_deserialize_tagged_biblatex() {
434        let bib_src = "@book{hawking1988, title = {A Brief History of Time}, author = {Hawking, Stephen}, date = {1988}}";
435        let json_str = format!(
436            r#"{{"kind":"biblatex","value":{}}}"#,
437            serde_json::to_string(bib_src).unwrap()
438        );
439        let input: RefsInput = serde_json::from_str(&json_str).expect("deserialize biblatex");
440        match input {
441            RefsInput::Biblatex(s) => assert!(s.contains("hawking1988")),
442            _ => panic!("Expected Biblatex variant"),
443        }
444    }
445
446    #[test]
447    fn refs_input_biblatex_resolves_locally() {
448        let bib_src = "@book{hawking1988, title = {A Brief History of Time}, author = {Hawking, Stephen}, date = {1988}}";
449        let input = RefsInput::Biblatex(bib_src.to_string());
450        let result = input.resolve_local().expect("biblatex should parse");
451        assert!(result.contains_key("hawking1988"));
452    }
453
454    #[test]
455    fn refs_input_path_bib_extension_parses_biblatex() {
456        let bib_content = "@article{doe2024, title = {Test Article}, author = {Doe, Jane}, journaltitle = {Journal of Tests}, date = {2024}}";
457        let mut tmp = tempfile::Builder::new()
458            .suffix(".bib")
459            .tempfile()
460            .expect("Failed to create temp .bib file");
461        tmp.write_all(bib_content.as_bytes())
462            .expect("Failed to write temp file");
463        tmp.flush().expect("Failed to flush temp file");
464
465        let input = RefsInput::Path(tmp.path().to_string_lossy().to_string());
466        let result = input.resolve_local().expect(".bib path should parse");
467        assert!(result.contains_key("doe2024"));
468    }
469
470    #[test]
471    fn refs_input_serialize_biblatex() {
472        let input = RefsInput::Biblatex("@book{key, title = {T}}".to_string());
473        let json_str = serde_json::to_string(&input).expect("serialize");
474        assert!(json_str.contains("\"kind\":\"biblatex\""));
475        assert!(json_str.contains("@book{key"));
476    }
477
478    #[test]
479    fn refs_input_deserialize_unknown_kind_returns_error() {
480        let json_str = r#"{"kind":"csl-json","value":"..."}"#;
481        let result = serde_json::from_str::<RefsInput>(json_str);
482        assert!(result.is_err());
483        let msg = result.unwrap_err().to_string();
484        assert!(
485            msg.contains("csl-json"),
486            "error should name the unknown variant: {msg}"
487        );
488    }
489}