Skip to main content

citum_engine/api/
document.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Document-level batch formatting API.
7
8use crate::api::AnnotationStyle;
9use crate::error::ProcessorError;
10use crate::processor::Processor;
11use crate::reference::{Bibliography, Citation};
12use crate::render::djot::Djot;
13use crate::render::format::OutputFormat;
14use crate::render::html::Html;
15use crate::render::latex::Latex;
16use crate::render::markdown::Markdown;
17use crate::render::plain::PlainText;
18use crate::render::typst::Typst;
19use citum_schema::Style;
20use citum_schema::locale::{GeneralTerm, TermForm};
21use citum_schema::reference::{
22    ClassExtension, CollectionType, ContributorRole as ReferenceRole, MonographComponentType,
23    MonographType, ReferenceClass, SerialComponentType,
24};
25use citum_schema::template::ContributorRole as TemplateRole;
26
27use serde::{Deserialize, Serialize};
28use std::collections::HashMap;
29
30use super::{
31    BibliographyEntry, CitationOccurrence, DocumentOptions, EntryMetadata, FormattedBibliography,
32    FormattedCitation, OutputFormatKind, RefsInput, StyleInput, Warning, WarningLevel,
33};
34
35/// A request to format a complete document's citations and bibliography.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct FormatDocumentRequest {
38    /// The style to use (may be resolved locally or by an adapter).
39    pub style: StyleInput,
40    /// Optional partial-style overlay (YAML or JSON) merged over the resolved base
41    /// style for this request only.
42    ///
43    /// Accepts any subset of the style YAML schema — e.g. just `options.contributors`
44    /// to change `and`/et-al behaviour, or a full citation spec. Uses the same
45    /// null-aware, typed-merge semantics as `extends` inheritance: supplied fields
46    /// win over base style fields; an explicit `~` (null) value clears an inherited
47    /// field. The base style is never mutated.
48    #[serde(default, skip_serializing_if = "Option::is_none")]
49    pub style_overrides: Option<String>,
50    /// Optional locale override as a BCP 47 language tag (e.g. `en-US`).
51    /// When omitted or set to en-US the engine uses its built-in en-US locale;
52    /// other locales emit a warning and fall back to en-US until adapter-side
53    /// locale resolution is wired through.
54    pub locale: Option<String>,
55    /// Output format (plain, html, djot, latex, typst). Defaults to plain
56    /// when omitted from the request.
57    #[serde(default)]
58    pub output_format: OutputFormatKind,
59    /// Reference input as a local path, inline YAML, inline JSON, or legacy bare map.
60    pub refs: RefsInput,
61    /// Ordered citations as they appear in the document.
62    pub citations: Vec<CitationOccurrence>,
63    /// Optional document-level configuration.
64    pub document_options: Option<DocumentOptions>,
65}
66
67/// The result of formatting a document.
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct FormatDocumentResult {
70    /// Formatted citations in document order.
71    pub formatted_citations: Vec<FormattedCitation>,
72    /// Formatted bibliography.
73    pub bibliography: FormattedBibliography,
74    /// Non-fatal warnings encountered during processing.
75    pub warnings: Vec<Warning>,
76}
77
78/// Errors that can occur during document formatting.
79#[derive(Debug)]
80pub enum FormatDocumentError {
81    /// The style ID or URI requires a resolver chain not available in the engine.
82    UnresolvedInput(String),
83    /// Failed to parse the style YAML.
84    StyleParse(String),
85    /// Failed to read or locate the style file.
86    StylePath(String),
87    /// Failed to read a local refs input path.
88    RefsInputPath(String),
89    /// Failed to parse refs input data.
90    RefsInputParse(String),
91    /// The processor encountered an error during rendering.
92    Processing(ProcessorError),
93    /// Style inheritance (`extends`) could not be resolved.
94    StyleResolution(String),
95}
96
97impl std::fmt::Display for FormatDocumentError {
98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99        match self {
100            Self::UnresolvedInput(msg) => write!(f, "Unresolved style input: {}", msg),
101            Self::StyleParse(msg) => write!(f, "Style parse error: {}", msg),
102            Self::StylePath(msg) => write!(f, "Style path error: {}", msg),
103            Self::RefsInputPath(msg) => write!(f, "Refs input path error: {}", msg),
104            Self::RefsInputParse(msg) => write!(f, "Refs input parse error: {}", msg),
105            Self::Processing(err) => write!(f, "Processing error: {}", err),
106            Self::StyleResolution(msg) => write!(f, "Style resolution error: {}", msg),
107        }
108    }
109}
110
111impl std::error::Error for FormatDocumentError {}
112
113impl From<ProcessorError> for FormatDocumentError {
114    fn from(err: ProcessorError) -> Self {
115        Self::Processing(err)
116    }
117}
118
119/// Parse a partial-style overlay (YAML or JSON) and merge it over `style` in place.
120///
121/// Called internally by `format_document_with_style`; also available to surface crates
122/// (e.g. `citum-server`) that pre-resolve the style before handing it to the processor.
123///
124/// Uses the same null-aware, typed-merge semantics as `extends` inheritance.
125/// Calls `apply_scoped_options` after the merge so that overlay fields that affect
126/// scoped options (label_wrap, date_position, repeated_author_rendering, etc.) take
127/// effect in the same way they do during normal style resolution.
128///
129/// # Errors
130///
131/// Returns `FormatDocumentError::StyleParse` if the overlay cannot be parsed.
132pub fn apply_style_overrides(
133    style: &mut Style,
134    overlay_src: &str,
135) -> Result<(), FormatDocumentError> {
136    let overlay = Style::from_yaml_bytes(overlay_src.as_bytes()).map_err(|e| {
137        FormatDocumentError::StyleParse(format!("Failed to parse style_overrides: {e}"))
138    })?;
139    style.apply_overlay(&overlay);
140    style.apply_scoped_options();
141    Ok(())
142}
143
144/// Format a complete document's citations and bibliography (convenience wrapper).
145///
146/// This function resolves the style locally using `StyleInput::resolve_local`.
147/// For styles requiring a resolver chain (Id or Uri), use `format_document_with_style`
148/// after pre-resolving.
149///
150/// # Errors
151///
152/// Returns an error if the style cannot be resolved, parsed, or if rendering fails.
153pub fn format_document(
154    request: FormatDocumentRequest,
155) -> Result<FormatDocumentResult, FormatDocumentError> {
156    let style = request.style.resolve_local()?;
157    format_document_with_style(style, request)
158}
159
160/// Format a document, resolving the style through an injected resolver.
161///
162/// `Yaml` is parsed inline; `Id`, `Uri`, and `Path` are delegated to
163/// `resolver.resolve_style`. This lets WASM/FFI callers supply their own
164/// resolver chain without pre-resolving the style themselves.
165///
166/// # Errors
167///
168/// Returns an error if the resolver fails, the style cannot be parsed, or
169/// if rendering fails.
170pub fn format_document_with_resolver(
171    request: FormatDocumentRequest,
172    resolver: &citum_schema::StyleResolver,
173) -> Result<FormatDocumentResult, FormatDocumentError> {
174    let style = match &request.style {
175        StyleInput::Yaml(_) => request.style.resolve_local()?,
176        StyleInput::Id(value) | StyleInput::Uri(value) | StyleInput::Path(value) => resolver
177            .resolve_style(value)
178            .map_err(|e| FormatDocumentError::UnresolvedInput(e.to_string()))?,
179    };
180    // Fully resolve any `extends` chain via the injected resolver, then clear
181    // `extends` so the processor's later `into_resolved()` call needs no
182    // resolver. Mirrors `citum-server`'s `load_style`.
183    let mut resolved = style
184        .try_into_resolved_with(Some(resolver))
185        .map_err(|e| FormatDocumentError::StyleResolution(e.to_string()))?;
186    resolved.extends = None;
187    format_document_with_style(resolved, request)
188}
189
190/// Format a document using an already-resolved style.
191///
192/// This is the primary entry point for adapters (citum-server, citum-bindings)
193/// that have a resolver chain and can pre-resolve style IDs and URIs.
194///
195/// # Errors
196///
197/// Returns an error if rendering fails.
198#[allow(
199    clippy::too_many_lines,
200    reason = "match arms grow one-to-one with format variants"
201)]
202pub fn format_document_with_style(
203    style: Style,
204    request: FormatDocumentRequest,
205) -> Result<FormatDocumentResult, FormatDocumentError> {
206    let mut warnings = Vec::new();
207
208    // Apply per-request style overrides (merge over the resolved base style).
209    let mut style = style;
210    if let Some(src) = &request.style_overrides {
211        apply_style_overrides(&mut style, src)?;
212    }
213
214    // Locale: the engine has no resolver chain for non-en-US locales.
215    // Adapters with a citum_store dep can pre-resolve and call
216    // Processor::with_locale directly; for now, emit a warning when a
217    // non-en-US tag is requested and fall back to en-US.
218    if let Some(tag) = &request.locale
219        && !tag.is_empty()
220        && !tag.eq_ignore_ascii_case("en-us")
221    {
222        warnings.push(Warning {
223            level: WarningLevel::Warning,
224            code: "locale_fallback".to_string(),
225            citation_id: None,
226            ref_id: None,
227            message: format!(
228                "Requested locale '{tag}' could not be loaded by the engine; falling back to en-US. Adapter-side locale resolution is not yet wired through."
229            ),
230        });
231    }
232
233    let bibliography = request.refs.resolve_local()?;
234    let mut processor = Processor::new(style, bibliography);
235    warnings.extend(unknown_reference_class_warnings(&processor.bibliography));
236    warnings.extend(unknown_enum_warnings(&processor));
237
238    if let Some(opts) = &request.document_options {
239        // Rebuild the processor with the document-level integral-name override
240        // before applying scalar field mutations (show_semantics etc.) so that
241        // those mutations are not lost when the processor is reconstructed.
242        if let Some(new_proc) = processor
243            .processor_with_document_integral_name_override(opts.integral_name_memory.as_ref())
244        {
245            processor = new_proc;
246        }
247        if let Some(show_semantics) = opts.show_semantics {
248            processor.show_semantics = show_semantics;
249        }
250        if let Some(inject_ast) = opts.inject_ast_indices {
251            processor.set_inject_ast_indices(inject_ast);
252        }
253        if let Some(abbr_map) = opts.abbreviation_map.clone() {
254            processor.abbreviation_map = Some(abbr_map);
255        }
256    }
257
258    // Convert citations, recording missing-ref warnings and dropping items
259    // whose reference IDs are absent from the bibliography. Citations with no
260    // surviving items are kept as empty placeholders so the output preserves
261    // input order and length.
262    let mut citations: Vec<Citation> = Vec::new();
263    for occ in request.citations {
264        let mut citation: Citation = occ.into();
265        citation.items.retain(|item| {
266            if processor.bibliography.contains_key(&item.id) {
267                true
268            } else {
269                warnings.push(Warning {
270                    level: WarningLevel::Warning,
271                    code: "missing_ref".to_string(),
272                    citation_id: citation.id.clone(),
273                    ref_id: Some(item.id.clone()),
274                    message: format!("Reference '{}' not found in bibliography", item.id),
275                });
276                false
277            }
278        });
279        citations.push(citation);
280    }
281
282    // Annotate integral-name First/Subsequent state from the processor's
283    // effective config (no document structure available; all citations share
284    // document scope). Safe no-op when no memory config is present.
285    processor.annotate_flat_integral_name_states(&mut citations);
286
287    // Process citations
288    let formatted_citations = match request.output_format {
289        OutputFormatKind::Plain => format_by_kind::<PlainText>(&processor, &citations)?,
290        OutputFormatKind::Html => format_by_kind::<Html>(&processor, &citations)?,
291        OutputFormatKind::Djot => format_by_kind::<Djot>(&processor, &citations)?,
292        OutputFormatKind::Latex => format_by_kind::<Latex>(&processor, &citations)?,
293        OutputFormatKind::Typst => format_by_kind::<Typst>(&processor, &citations)?,
294        OutputFormatKind::Markdown => format_by_kind::<Markdown>(&processor, &citations)?,
295    };
296
297    // Process bibliography
298    let bibliography = match request.output_format {
299        OutputFormatKind::Plain => format_bibliography::<PlainText>(
300            &processor,
301            request.output_format,
302            request.document_options.as_ref(),
303        )?,
304        OutputFormatKind::Html => format_bibliography::<Html>(
305            &processor,
306            request.output_format,
307            request.document_options.as_ref(),
308        )?,
309        OutputFormatKind::Djot => format_bibliography::<Djot>(
310            &processor,
311            request.output_format,
312            request.document_options.as_ref(),
313        )?,
314        OutputFormatKind::Latex => format_bibliography::<Latex>(
315            &processor,
316            request.output_format,
317            request.document_options.as_ref(),
318        )?,
319        OutputFormatKind::Typst => format_bibliography::<Typst>(
320            &processor,
321            request.output_format,
322            request.document_options.as_ref(),
323        )?,
324        OutputFormatKind::Markdown => format_bibliography::<Markdown>(
325            &processor,
326            request.output_format,
327            request.document_options.as_ref(),
328        )?,
329    };
330
331    Ok(FormatDocumentResult {
332        formatted_citations,
333        bibliography,
334        warnings,
335    })
336}
337
338/// Scan the bibliography for unknown reference classes and return compatibility warnings.
339pub fn unknown_reference_class_warnings(bibliography: &Bibliography) -> Vec<Warning> {
340    bibliography
341        .iter()
342        .filter_map(|(ref_id, reference)| {
343            let ReferenceClass::Unknown(class) = reference.class() else {
344                return None;
345            };
346            Some(Warning {
347                level: WarningLevel::Warning,
348                code: "unknown_reference_class".to_string(),
349                citation_id: None,
350                ref_id: Some(ref_id.clone()),
351                message: format!(
352                    "Reference '{ref_id}' uses unknown class '{class}'; rendering will use only fields this engine understands."
353                ),
354            })
355        })
356        .collect()
357}
358
359/// Scan the style and bibliography for unknown enum variants and term keys.
360///
361/// Returns a list of structured compatibility warnings for encounter of
362/// unknown variants that were captured via the tolerant-enum mechanism.
363pub fn unknown_enum_warnings(processor: &Processor) -> Vec<Warning> {
364    let mut warnings = Vec::new();
365
366    // 1. Scan bibliography
367    for (ref_id, reference) in &processor.bibliography {
368        match reference.extension() {
369            ClassExtension::Monograph(r) => {
370                if let MonographType::Unknown(s) = &r.r#type {
371                    warnings.push(Warning {
372                        level: WarningLevel::Warning,
373                        code: "unknown_enum_variant".to_string(),
374                        citation_id: None,
375                        ref_id: Some(ref_id.clone()),
376                        message: format!("Reference '{ref_id}' uses unknown monograph type '{s}'; rendering will use default monograph formatting."),
377                    });
378                }
379            }
380            ClassExtension::Collection(r) => {
381                if let CollectionType::Unknown(s) = &r.r#type {
382                    warnings.push(Warning {
383                        level: WarningLevel::Warning,
384                        code: "unknown_enum_variant".to_string(),
385                        citation_id: None,
386                        ref_id: Some(ref_id.clone()),
387                        message: format!("Reference '{ref_id}' uses unknown collection type '{s}'; rendering will use default collection formatting."),
388                    });
389                }
390            }
391            ClassExtension::CollectionComponent(r) => {
392                if let MonographComponentType::Unknown(s) = &r.r#type {
393                    warnings.push(Warning {
394                        level: WarningLevel::Warning,
395                        code: "unknown_enum_variant".to_string(),
396                        citation_id: None,
397                        ref_id: Some(ref_id.clone()),
398                        message: format!("Reference '{ref_id}' uses unknown monograph component type '{s}'; rendering will use default chapter formatting."),
399                    });
400                }
401            }
402            ClassExtension::SerialComponent(r) => {
403                if let SerialComponentType::Unknown(s) = &r.r#type {
404                    warnings.push(Warning {
405                        level: WarningLevel::Warning,
406                        code: "unknown_enum_variant".to_string(),
407                        citation_id: None,
408                        ref_id: Some(ref_id.clone()),
409                        message: format!("Reference '{ref_id}' uses unknown serial component type '{s}'; rendering will use default article formatting."),
410                    });
411                }
412            }
413            _ => {}
414        }
415
416        for contributor in reference.all_contributor_entries() {
417            if let ReferenceRole::Unknown(s) = &contributor.role {
418                warnings.push(Warning {
419                    level: WarningLevel::Warning,
420                    code: "unknown_enum_variant".to_string(),
421                    citation_id: None,
422                    ref_id: Some(ref_id.clone()),
423                    message: format!("Reference '{ref_id}' uses unknown contributor role '{s}'; this role may be ignored during rendering."),
424                });
425            }
426        }
427    }
428
429    // 2. Scan Style
430    if let Some(templates) = &processor.style.templates {
431        for (name, template) in templates {
432            scan_template_for_unknowns(template, &format!("template '{name}'"), &mut warnings);
433        }
434    }
435    if let Some(citation) = &processor.style.citation
436        && let Some(template) = &citation.template
437    {
438        scan_template_for_unknowns(template, "citation layout", &mut warnings);
439    }
440    if let Some(bib) = &processor.style.bibliography
441        && let Some(template) = &bib.template
442    {
443        scan_template_for_unknowns(template, "bibliography layout", &mut warnings);
444    }
445
446    warnings
447}
448
449fn scan_template_for_unknowns(
450    components: &[citum_schema::template::TemplateComponent],
451    location: &str,
452    warnings: &mut Vec<Warning>,
453) {
454    use citum_schema::template::TemplateComponent;
455    for component in components {
456        match component {
457            TemplateComponent::Term(t) => {
458                if let GeneralTerm::Unknown(s) = &t.term {
459                    warnings.push(Warning {
460                        level: WarningLevel::Warning,
461                        code: "unknown_enum_variant".to_string(),
462                        citation_id: None,
463                        ref_id: None,
464                        message: format!("Style {location} uses unknown locale term key '{s}'; this term may render as empty."),
465                    });
466                }
467                if let Some(TermForm::Unknown(s)) = &t.form {
468                    warnings.push(Warning {
469                        level: WarningLevel::Warning,
470                        code: "unknown_enum_variant".to_string(),
471                        citation_id: None,
472                        ref_id: None,
473                        message: format!("Style {location} uses unknown term form '{s}'; falling back to long form."),
474                    });
475                }
476            }
477            TemplateComponent::Contributor(c) => {
478                if let TemplateRole::Unknown(s) = &c.contributor {
479                    warnings.push(Warning {
480                        level: WarningLevel::Warning,
481                        code: "unknown_enum_variant".to_string(),
482                        citation_id: None,
483                        ref_id: None,
484                        message: format!("Style {location} uses unknown contributor role '{s}'; this role may be ignored."),
485                    });
486                }
487            }
488            TemplateComponent::Date(d) => {
489                if let citum_schema::template::DateForm::Unknown(s) = &d.form {
490                    warnings.push(Warning {
491                        level: WarningLevel::Warning,
492                        code: "unknown_enum_variant".to_string(),
493                        citation_id: None,
494                        ref_id: None,
495                        message: format!("Style {location} uses unknown date form '{s}'; falling back to year only."),
496                    });
497                }
498            }
499            TemplateComponent::Group(g) => {
500                scan_template_for_unknowns(&g.group, location, warnings);
501            }
502            _ => {}
503        }
504    }
505}
506
507/// Process citations and return formatted text.
508pub(crate) fn format_by_kind<F>(
509    processor: &Processor,
510    citations: &[Citation],
511) -> Result<Vec<FormattedCitation>, FormatDocumentError>
512where
513    F: OutputFormat<Output = String>,
514{
515    let texts = processor.process_citations_with_format::<F>(citations)?;
516
517    let formatted = citations
518        .iter()
519        .zip(texts.iter())
520        .map(|(citation, text)| {
521            let ref_ids = citation.items.iter().map(|item| item.id.clone()).collect();
522            FormattedCitation {
523                id: citation.id.clone().unwrap_or_default(),
524                text: text.clone(),
525                ref_ids,
526            }
527        })
528        .collect();
529
530    Ok(formatted)
531}
532
533/// Format the bibliography by output kind.
534pub(crate) fn format_bibliography<F>(
535    processor: &Processor,
536    format_kind: OutputFormatKind,
537    doc_opts: Option<&DocumentOptions>,
538) -> Result<FormattedBibliography, FormatDocumentError>
539where
540    F: OutputFormat<Output = String>,
541{
542    // Extract annotation map and style if present
543    let (annotations, annotation_style) = if let Some(opts) = doc_opts {
544        if let Some(anns) = &opts.annotations {
545            let style = opts.annotation_format.as_ref().map(|fmt| AnnotationStyle {
546                format: fmt.clone(),
547            });
548            (anns.clone(), style)
549        } else {
550            (HashMap::new(), None)
551        }
552    } else {
553        (HashMap::new(), None)
554    };
555
556    // Render bibliography as string
557    let content = if annotations.is_empty() {
558        processor
559            .render_bibliography_with_format_and_annotations::<F>(None, annotation_style.as_ref())
560    } else {
561        processor.render_bibliography_with_format_and_annotations::<F>(
562            Some(&annotations),
563            annotation_style.as_ref(),
564        )
565    };
566
567    // Extract per-entry text in the requested output format and capture metadata.
568    let proc_entries = processor.process_references_with_format::<F>().bibliography;
569    let entries = proc_entries
570        .into_iter()
571        .map(|entry| {
572            let entry_anns = if annotations.is_empty() {
573                None
574            } else {
575                Some(&annotations)
576            };
577            let text = crate::render::bibliography::refs_to_string_with_format::<F>(
578                vec![entry.clone()],
579                entry_anns,
580                annotation_style.as_ref(),
581            );
582            let metadata = EntryMetadata {
583                author: entry.metadata.author.unwrap_or_default(),
584                year: entry.metadata.year.unwrap_or_default(),
585                title: entry.metadata.title.unwrap_or_default(),
586            };
587            BibliographyEntry {
588                id: entry.id,
589                text,
590                metadata,
591            }
592        })
593        .collect();
594
595    Ok(FormattedBibliography {
596        format: format_kind,
597        content,
598        entries,
599    })
600}
601
602#[cfg(test)]
603#[allow(
604    clippy::unwrap_used,
605    clippy::expect_used,
606    clippy::panic,
607    clippy::indexing_slicing,
608    reason = "test code uses assertions and panic"
609)]
610mod tests {
611    use super::*;
612    use crate::api::CitationOccurrenceItem;
613    use crate::{
614        Config, ContributorForm, ContributorRole, DateForm, Processing, Rendering,
615        TemplateComponent, TemplateContributor, TemplateDate, TemplateDateVariable,
616        WrapPunctuation,
617    };
618    use citum_schema::options::{AndOptions, ContributorConfig};
619    use citum_schema::reference::{EdtfString, InputReference, Monograph, MonographType, Title};
620    use citum_schema::template::{TemplateTitle, TitleType};
621    use citum_schema::{BibliographySpec, CitationSpec, StyleInfo};
622
623    fn make_test_style() -> Style {
624        Style {
625            info: StyleInfo {
626                title: Some("Test Style".to_string()),
627                id: Some("test".into()),
628                ..Default::default()
629            },
630            options: Some(Config {
631                processing: Some(Processing::AuthorDate),
632                ..Default::default()
633            }),
634            citation: Some(CitationSpec {
635                template: Some(vec![
636                    TemplateComponent::Contributor(TemplateContributor {
637                        contributor: ContributorRole::Author,
638                        form: ContributorForm::Short,
639                        rendering: Rendering::default(),
640                        ..Default::default()
641                    }),
642                    TemplateComponent::Date(TemplateDate {
643                        date: TemplateDateVariable::Issued,
644                        form: DateForm::Year,
645                        rendering: Rendering::default(),
646                        ..Default::default()
647                    }),
648                ]),
649                wrap: Some(WrapPunctuation::Parentheses.into()),
650                ..Default::default()
651            }),
652            ..Default::default()
653        }
654    }
655
656    fn make_test_bibliography() -> RefsInput {
657        let mut refs = Bibliography::new();
658        refs.insert(
659            "smith2020".to_string(),
660            InputReference::Monograph(Box::new(Monograph {
661                id: Some("smith2020".into()),
662                r#type: MonographType::Book,
663                title: Some(Title::Single("Sample Work".to_string())),
664                issued: EdtfString("2020".to_string()),
665                ..Default::default()
666            })),
667        );
668        RefsInput::Json(serde_json::to_value(refs).unwrap())
669    }
670
671    fn make_markup_bibliography() -> RefsInput {
672        let mut refs = Bibliography::new();
673        refs.insert(
674            "art1".to_string(),
675            InputReference::Monograph(Box::new(Monograph {
676                id: Some("art1".into()),
677                r#type: MonographType::Book,
678                title: Some(Title::Single(
679                    "_Homo sapiens_ and *modern* world".to_string(),
680                )),
681                issued: EdtfString("2023".to_string()),
682                ..Default::default()
683            })),
684        );
685        RefsInput::Json(serde_json::to_value(refs).unwrap())
686    }
687
688    #[test]
689    fn format_document_with_style_empty_citations() {
690        let style = make_test_style();
691        let refs = make_test_bibliography();
692        let request = FormatDocumentRequest {
693            style: StyleInput::Yaml("dummy".to_string()),
694            style_overrides: None,
695            locale: None,
696            output_format: OutputFormatKind::Plain,
697            refs,
698            citations: vec![],
699            document_options: None,
700        };
701
702        let result = format_document_with_style(style, request);
703        assert!(result.is_ok());
704        let res = result.unwrap();
705        assert_eq!(res.formatted_citations.len(), 0);
706    }
707
708    #[test]
709    fn format_document_html_bibliography_entries_preserve_inline_markup() {
710        let mut style = make_test_style();
711        style.bibliography = Some(BibliographySpec {
712            template: Some(vec![TemplateComponent::Title(TemplateTitle {
713                title: TitleType::Primary,
714                ..Default::default()
715            })]),
716            ..Default::default()
717        });
718
719        let request = FormatDocumentRequest {
720            style: StyleInput::Yaml("dummy".to_string()),
721            style_overrides: None,
722            locale: None,
723            output_format: OutputFormatKind::Html,
724            refs: make_markup_bibliography(),
725            citations: vec![],
726            document_options: None,
727        };
728
729        let result = format_document_with_style(style, request).expect("should render");
730
731        assert_eq!(
732            result.bibliography.entries[0].text, result.bibliography.content,
733            "single-entry bibliography should mirror the full bibliography payload"
734        );
735        assert!(
736            result.bibliography.entries[0].text.contains(
737                "<span class=\"citum-title\"><em>Homo sapiens</em> and <b>modern</b> world</span>"
738            ),
739            "per-entry HTML should preserve inline markup for Djot-bearing titles"
740        );
741    }
742
743    #[test]
744    fn format_document_missing_ref_warning() {
745        let style = make_test_style();
746        let refs = make_test_bibliography();
747
748        let citation_occ = CitationOccurrence {
749            id: "cite1".to_string(),
750            items: vec![CitationOccurrenceItem {
751                id: "unknown_ref".to_string(),
752                locator: None,
753                prefix: None,
754                suffix: None,
755                integral_name_state: None,
756                org_abbreviation_state: None,
757            }],
758            mode: None,
759            note_number: None,
760            suppress_author: None,
761            grouped: None,
762            prefix: None,
763            suffix: None,
764            sentence_start: None,
765        };
766
767        let request = FormatDocumentRequest {
768            style: StyleInput::Yaml("dummy".to_string()),
769            style_overrides: None,
770            locale: None,
771            output_format: OutputFormatKind::Plain,
772            refs,
773            citations: vec![citation_occ],
774            document_options: None,
775        };
776
777        let result = format_document_with_style(style, request);
778        assert!(result.is_ok());
779        let res = result.unwrap();
780        assert!(res.warnings.iter().any(|w| w.code == "missing_ref"));
781    }
782
783    #[test]
784    fn format_document_unknown_reference_class_warning() {
785        let style = make_test_style();
786        let mut refs = Bibliography::new();
787        let unknown_ref: InputReference = serde_json::from_str(
788            r#"{
789                "class": "dance-performance",
790                "id": "pina2011",
791                "title": "Pina",
792                "issued": "2011",
793                "venue": "Berlin"
794            }"#,
795        )
796        .expect("unknown class should parse through the compatibility path");
797        refs.insert("pina2011".to_string(), unknown_ref);
798
799        let citation_occ = CitationOccurrence {
800            id: "cite1".to_string(),
801            items: vec![CitationOccurrenceItem {
802                id: "pina2011".to_string(),
803                locator: None,
804                prefix: None,
805                suffix: None,
806                integral_name_state: None,
807                org_abbreviation_state: None,
808            }],
809            mode: None,
810            note_number: None,
811            suppress_author: None,
812            grouped: None,
813            prefix: None,
814            suffix: None,
815            sentence_start: None,
816        };
817
818        let request = FormatDocumentRequest {
819            style: StyleInput::Yaml("dummy".to_string()),
820            style_overrides: None,
821            locale: None,
822            output_format: OutputFormatKind::Plain,
823            refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
824            citations: vec![citation_occ],
825            document_options: None,
826        };
827
828        let result = format_document_with_style(style, request).unwrap();
829        let warning = result
830            .warnings
831            .iter()
832            .find(|w| w.code == "unknown_reference_class")
833            .expect("unknown class warning should be emitted");
834        assert_eq!(warning.ref_id.as_deref(), Some("pina2011"));
835        assert!(warning.message.contains("dance-performance"));
836    }
837
838    #[test]
839    fn format_document_yaml_style_input() {
840        let style = make_test_style();
841        let yaml_style = serde_yaml::to_string(&style).expect("serialize test style");
842
843        let mut refs = Bibliography::new();
844        refs.insert(
845            "test2024".to_string(),
846            InputReference::Monograph(Box::new(Monograph {
847                id: Some("test2024".into()),
848                r#type: MonographType::Book,
849                title: Some(Title::Single("Test Work".to_string())),
850                issued: EdtfString("2024".to_string()),
851                ..Default::default()
852            })),
853        );
854
855        let citation_occ = CitationOccurrence {
856            id: "c1".to_string(),
857            items: vec![CitationOccurrenceItem {
858                id: "test2024".to_string(),
859                locator: None,
860                prefix: None,
861                suffix: None,
862                integral_name_state: None,
863                org_abbreviation_state: None,
864            }],
865            mode: None,
866            note_number: None,
867            suppress_author: None,
868            grouped: None,
869            prefix: None,
870            suffix: None,
871            sentence_start: None,
872        };
873
874        let request = FormatDocumentRequest {
875            style: StyleInput::Yaml(yaml_style),
876            style_overrides: None,
877            locale: None,
878            output_format: OutputFormatKind::Plain,
879            refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
880            citations: vec![citation_occ],
881            document_options: None,
882        };
883
884        let result = format_document(request);
885        assert!(result.is_ok());
886        let res = result.unwrap();
887        assert_eq!(res.formatted_citations.len(), 1);
888        assert!(!res.formatted_citations[0].text.is_empty());
889    }
890
891    #[test]
892    fn format_document_uri_input_unresolved() {
893        let request = FormatDocumentRequest {
894            style: StyleInput::Uri("https://example.com/style.yaml".to_string()),
895            style_overrides: None,
896            locale: None,
897            output_format: OutputFormatKind::Plain,
898            refs: RefsInput::Json(serde_json::Value::Object(Default::default())),
899            citations: vec![],
900            document_options: None,
901        };
902
903        let result = format_document(request);
904        match result {
905            Err(FormatDocumentError::UnresolvedInput(_)) => {
906                // Expected
907            }
908            _ => panic!("Expected UnresolvedInput error"),
909        }
910    }
911
912    /// A minimal resolver that returns a fixed style for any ID.
913    struct MockResolver(Style);
914
915    impl citum_resolver_api::StyleResolver for MockResolver {
916        type Style = Style;
917        type Locale = citum_schema::locale::Locale;
918
919        fn resolve_style(&self, _uri: &str) -> Result<Style, citum_schema::ResolverError> {
920            Ok(self.0.clone())
921        }
922
923        fn resolve_locale(
924            &self,
925            id: &str,
926        ) -> Result<citum_schema::locale::Locale, citum_schema::ResolverError> {
927            Err(citum_schema::ResolverError::LocaleNotFound(
928                std::borrow::Cow::Owned(id.to_string()),
929            ))
930        }
931    }
932
933    #[test]
934    fn format_document_with_resolver_injects_style_for_id_input() {
935        let style = make_test_style();
936        let resolver = MockResolver(style);
937        let refs = make_test_bibliography();
938
939        let citation_occ = CitationOccurrence {
940            id: "c1".to_string(),
941            items: vec![CitationOccurrenceItem {
942                id: "smith2020".to_string(),
943                locator: None,
944                prefix: None,
945                suffix: None,
946                integral_name_state: None,
947                org_abbreviation_state: None,
948            }],
949            mode: None,
950            note_number: None,
951            suppress_author: None,
952            grouped: None,
953            prefix: None,
954            suffix: None,
955            sentence_start: None,
956        };
957
958        let request = FormatDocumentRequest {
959            style: StyleInput::Id("any-id".to_string()),
960            style_overrides: None,
961            locale: None,
962            output_format: OutputFormatKind::Plain,
963            refs,
964            citations: vec![citation_occ],
965            document_options: None,
966        };
967
968        // Without a resolver, the same Id input must be rejected.
969        match format_document(request.clone()) {
970            Err(FormatDocumentError::UnresolvedInput(_)) => {}
971            other => panic!("expected UnresolvedInput without resolver, got: {other:?}"),
972        }
973
974        // With the injected resolver it must succeed.
975        let result = format_document_with_resolver(request, &resolver);
976        assert!(result.is_ok(), "expected Ok, got: {:?}", result.err());
977        let res = result.unwrap();
978        assert_eq!(res.formatted_citations.len(), 1);
979        assert!(
980            !res.formatted_citations[0].text.is_empty(),
981            "formatted citation text should not be empty"
982        );
983    }
984
985    /// Build an author-date style whose citation template renders contributor short form.
986    fn make_two_author_style() -> Style {
987        Style {
988            info: StyleInfo {
989                title: Some("Override Test Style".to_string()),
990                id: Some("override-test".into()),
991                ..Default::default()
992            },
993            options: Some(Config {
994                processing: Some(Processing::AuthorDate),
995                // Explicitly set `and: text` so the override to `symbol` is observable
996                // in rendered output without relying on any default connector.
997                contributors: Some(ContributorConfig {
998                    and: Some(AndOptions::Text),
999                    ..Default::default()
1000                }),
1001                ..Default::default()
1002            }),
1003            citation: Some(CitationSpec {
1004                template: Some(vec![
1005                    TemplateComponent::Contributor(TemplateContributor {
1006                        contributor: ContributorRole::Author,
1007                        form: ContributorForm::Short,
1008                        rendering: Rendering::default(),
1009                        ..Default::default()
1010                    }),
1011                    TemplateComponent::Date(TemplateDate {
1012                        date: TemplateDateVariable::Issued,
1013                        form: DateForm::Year,
1014                        rendering: Rendering {
1015                            prefix: Some(", ".to_string()),
1016                            ..Default::default()
1017                        },
1018                        ..Default::default()
1019                    }),
1020                ]),
1021                wrap: Some(WrapPunctuation::Parentheses.into()),
1022                ..Default::default()
1023            }),
1024            ..Default::default()
1025        }
1026    }
1027
1028    /// Build a refs input with a two-author book so the "and" connector is exercised.
1029    ///
1030    /// Uses inline YAML (the reliably tested deserialization path) rather than
1031    /// round-tripping through `serde_json::to_value` which may not preserve the
1032    /// contributor tagged-enum layout the engine expects.
1033    fn make_two_author_refs() -> RefsInput {
1034        RefsInput::Yaml(
1035            r#"duo2024:
1036  class: monograph
1037  id: duo2024
1038  type: book
1039  title: Duo Work
1040  issued: "2024"
1041  author:
1042    - family: Smith
1043      given: Alice
1044    - family: Jones
1045      given: Bob
1046"#
1047            .to_string(),
1048        )
1049    }
1050
1051    /// Helper: produce a single-item citation occurrence for a given ref id.
1052    fn cite(ref_id: &str) -> CitationOccurrence {
1053        CitationOccurrence {
1054            id: "c1".to_string(),
1055            items: vec![CitationOccurrenceItem {
1056                id: ref_id.to_string(),
1057                locator: None,
1058                prefix: None,
1059                suffix: None,
1060                integral_name_state: None,
1061                org_abbreviation_state: None,
1062            }],
1063            mode: None,
1064            note_number: None,
1065            suppress_author: None,
1066            grouped: None,
1067            prefix: None,
1068            suffix: None,
1069            sentence_start: None,
1070        }
1071    }
1072
1073    #[test]
1074    fn style_overrides_and_symbol_changes_rendered_output() {
1075        let base_style = make_two_author_style();
1076        let refs = make_two_author_refs();
1077
1078        // given: base style produces a citation containing "and"
1079        let request_base = FormatDocumentRequest {
1080            style: StyleInput::Yaml("dummy".to_string()),
1081            style_overrides: None,
1082            locale: None,
1083            output_format: OutputFormatKind::Plain,
1084            refs: refs.clone(),
1085            citations: vec![cite("duo2024")],
1086            document_options: None,
1087        };
1088        let result_base = format_document_with_style(base_style.clone(), request_base).unwrap();
1089        let text_base = &result_base.formatted_citations[0].text;
1090        assert!(
1091            text_base.contains("and"),
1092            "base style should use text 'and' connector, got: {text_base:?}"
1093        );
1094
1095        // when: style_overrides switches connector to symbol "&"
1096        let request_override = FormatDocumentRequest {
1097            style: StyleInput::Yaml("dummy".to_string()),
1098            style_overrides: Some("options:\n  contributors:\n    and: symbol\n".to_string()),
1099            locale: None,
1100            output_format: OutputFormatKind::Plain,
1101            refs,
1102            citations: vec![cite("duo2024")],
1103            document_options: None,
1104        };
1105        let result_override =
1106            format_document_with_style(base_style.clone(), request_override).unwrap();
1107        let text_override = &result_override.formatted_citations[0].text;
1108        assert!(
1109            text_override.contains('&'),
1110            "overridden style should use '&' connector, got: {text_override:?}"
1111        );
1112
1113        // then: base style struct is untouched — still has Text, not Symbol
1114        let base_and = base_style
1115            .options
1116            .as_ref()
1117            .and_then(|o| o.contributors.as_ref())
1118            .and_then(|c| c.and.as_ref());
1119        assert!(
1120            matches!(base_and, Some(&AndOptions::Text)),
1121            "base style must not be mutated; expected And::Text, got: {base_and:?}"
1122        );
1123    }
1124
1125    #[test]
1126    fn style_overrides_invalid_yaml_returns_parse_error() {
1127        let style = make_test_style();
1128        let refs = make_test_bibliography();
1129
1130        let request = FormatDocumentRequest {
1131            style: StyleInput::Yaml("dummy".to_string()),
1132            style_overrides: Some("{ unclosed yaml: [".to_string()),
1133            locale: None,
1134            output_format: OutputFormatKind::Plain,
1135            refs,
1136            citations: vec![],
1137            document_options: None,
1138        };
1139
1140        match format_document_with_style(style, request) {
1141            Err(FormatDocumentError::StyleParse(msg)) => {
1142                assert!(
1143                    msg.contains("style_overrides"),
1144                    "error message should mention style_overrides, got: {msg}"
1145                );
1146            }
1147            other => panic!("expected StyleParse error, got: {other:?}"),
1148        }
1149    }
1150
1151    #[test]
1152    fn apply_style_overrides_merges_option_field() {
1153        let mut style = make_test_style();
1154        apply_style_overrides(&mut style, "options:\n  contributors:\n    and: symbol\n")
1155            .expect("apply_style_overrides should succeed");
1156
1157        let and_option = style
1158            .options
1159            .as_ref()
1160            .and_then(|o| o.contributors.as_ref())
1161            .and_then(|c| c.and.as_ref());
1162        assert!(
1163            matches!(and_option, Some(&AndOptions::Symbol)),
1164            "expected And::Symbol after override, got: {and_option:?}"
1165        );
1166    }
1167
1168    // --- integral_name_memory wiring ---
1169
1170    /// Build a style that has integral-name memory configured with scope=Document,
1171    /// contexts=BodyAndNotes, subsequent_form=Short, and an integral sub-template
1172    /// that renders the author in Long (given + family) form.
1173    fn make_integral_name_style() -> Style {
1174        use citum_schema::options::{
1175            IntegralNameContexts, IntegralNameMemoryConfig, IntegralNameScope, SubsequentNameForm,
1176        };
1177        Style {
1178            info: StyleInfo {
1179                title: Some("Integral Name Memory Test".to_string()),
1180                id: Some("integral-name-memory-test".into()),
1181                ..Default::default()
1182            },
1183            options: Some(Config {
1184                processing: Some(Processing::AuthorDate),
1185                integral_name_memory: Some(IntegralNameMemoryConfig {
1186                    scope: Some(IntegralNameScope::Document),
1187                    contexts: Some(IntegralNameContexts::BodyAndNotes),
1188                    subsequent_form: Some(SubsequentNameForm::Short),
1189                    ..Default::default()
1190                }),
1191                ..Default::default()
1192            }),
1193            citation: Some(CitationSpec {
1194                integral: Some(Box::new(CitationSpec {
1195                    template: Some(vec![TemplateComponent::Contributor(TemplateContributor {
1196                        contributor: ContributorRole::Author,
1197                        form: ContributorForm::Long,
1198                        rendering: Rendering::default(),
1199                        ..Default::default()
1200                    })]),
1201                    ..Default::default()
1202                })),
1203                template: Some(vec![
1204                    TemplateComponent::Contributor(TemplateContributor {
1205                        contributor: ContributorRole::Author,
1206                        form: ContributorForm::Short,
1207                        rendering: Rendering::default(),
1208                        ..Default::default()
1209                    }),
1210                    TemplateComponent::Date(TemplateDate {
1211                        date: TemplateDateVariable::Issued,
1212                        form: DateForm::Year,
1213                        rendering: Rendering::default(),
1214                        ..Default::default()
1215                    }),
1216                ]),
1217                wrap: Some(WrapPunctuation::Parentheses.into()),
1218                ..Default::default()
1219            }),
1220            ..Default::default()
1221        }
1222    }
1223
1224    fn make_smith_refs() -> RefsInput {
1225        RefsInput::Yaml(
1226            r#"smith2020:
1227  class: monograph
1228  id: smith2020
1229  type: book
1230  title: Smith Book
1231  issued: "2020"
1232  author:
1233    - family: Smith
1234      given: John
1235"#
1236            .to_string(),
1237        )
1238    }
1239
1240    fn make_integral_occ(id: &str, ref_id: &str) -> CitationOccurrence {
1241        CitationOccurrence {
1242            id: id.to_string(),
1243            items: vec![CitationOccurrenceItem {
1244                id: ref_id.to_string(),
1245                locator: None,
1246                prefix: None,
1247                suffix: None,
1248                integral_name_state: None,
1249                org_abbreviation_state: None,
1250            }],
1251            mode: Some(citum_schema::data::citation::CitationMode::Integral),
1252            note_number: None,
1253            suppress_author: None,
1254            grouped: None,
1255            prefix: None,
1256            suffix: None,
1257            sentence_start: None,
1258        }
1259    }
1260
1261    #[test]
1262    fn document_options_integral_name_memory_first_full_then_short() {
1263        use crate::processor::document::DocumentIntegralNameOverride;
1264
1265        let style = make_integral_name_style();
1266        let refs = make_smith_refs();
1267
1268        let request = FormatDocumentRequest {
1269            style: StyleInput::Yaml("dummy".to_string()),
1270            style_overrides: None,
1271            locale: None,
1272            output_format: OutputFormatKind::Plain,
1273            refs,
1274            citations: vec![
1275                make_integral_occ("c1", "smith2020"),
1276                make_integral_occ("c2", "smith2020"),
1277            ],
1278            document_options: Some(DocumentOptions {
1279                integral_name_memory: Some(DocumentIntegralNameOverride {
1280                    enabled: Some(true),
1281                    ..Default::default()
1282                }),
1283                ..Default::default()
1284            }),
1285        };
1286
1287        let result = format_document_with_style(style, request).expect("should render");
1288
1289        assert!(
1290            !result
1291                .warnings
1292                .iter()
1293                .any(|w| w.code == "integral_name_memory_not_applied"),
1294            "stale warning must not appear: {:?}",
1295            result.warnings
1296        );
1297        assert_eq!(
1298            result.formatted_citations[0].text, "John Smith",
1299            "first integral cite should render full name form"
1300        );
1301        assert_eq!(
1302            result.formatted_citations[1].text, "Smith",
1303            "second integral cite of same author should render short form"
1304        );
1305    }
1306
1307    #[test]
1308    fn document_options_integral_name_memory_disabled_keeps_full_form() {
1309        use crate::processor::document::DocumentIntegralNameOverride;
1310
1311        let style = make_integral_name_style();
1312        let refs = make_smith_refs();
1313
1314        let request = FormatDocumentRequest {
1315            style: StyleInput::Yaml("dummy".to_string()),
1316            style_overrides: None,
1317            locale: None,
1318            output_format: OutputFormatKind::Plain,
1319            refs,
1320            citations: vec![
1321                make_integral_occ("c1", "smith2020"),
1322                make_integral_occ("c2", "smith2020"),
1323            ],
1324            document_options: Some(DocumentOptions {
1325                integral_name_memory: Some(DocumentIntegralNameOverride {
1326                    enabled: Some(false),
1327                    ..Default::default()
1328                }),
1329                ..Default::default()
1330            }),
1331        };
1332
1333        let result = format_document_with_style(style, request).expect("should render");
1334
1335        // With memory disabled both occurrences should render the natural integral
1336        // template form (Long = "John Smith") without any subsequent rewrite.
1337        assert_eq!(
1338            result.formatted_citations[0].text, "John Smith",
1339            "first integral cite: {}",
1340            result.formatted_citations[0].text
1341        );
1342        assert_eq!(
1343            result.formatted_citations[1].text, "John Smith",
1344            "second integral cite should also be full when memory is disabled"
1345        );
1346    }
1347
1348    #[test]
1349    fn style_native_integral_name_memory_applied_without_document_override() {
1350        // Style has integral_name_memory in its own options; no document_options
1351        // override is supplied. The flat API must still annotate First/Subsequent.
1352        let style = make_integral_name_style();
1353        let refs = make_smith_refs();
1354
1355        let request = FormatDocumentRequest {
1356            style: StyleInput::Yaml("dummy".to_string()),
1357            style_overrides: None,
1358            locale: None,
1359            output_format: OutputFormatKind::Plain,
1360            refs,
1361            citations: vec![
1362                make_integral_occ("c1", "smith2020"),
1363                make_integral_occ("c2", "smith2020"),
1364            ],
1365            document_options: None,
1366        };
1367
1368        let result = format_document_with_style(style, request).expect("should render");
1369
1370        assert_eq!(
1371            result.formatted_citations[0].text, "John Smith",
1372            "first integral cite should render full name form"
1373        );
1374        assert_eq!(
1375            result.formatted_citations[1].text, "Smith",
1376            "second integral cite should render short form from style-native config"
1377        );
1378    }
1379}