Skip to main content

citum_engine/api/
document.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Document-level batch formatting API.
7
8use crate::api::AnnotationStyle;
9use crate::error::ProcessorError;
10use crate::processor::Processor;
11use crate::reference::{Bibliography, Citation};
12use crate::render::djot::Djot;
13use crate::render::format::OutputFormat;
14use crate::render::html::Html;
15use crate::render::latex::Latex;
16use crate::render::markdown::Markdown;
17use crate::render::plain::PlainText;
18use crate::render::typst::Typst;
19use citum_schema::Style;
20use citum_schema::locale::{GeneralTerm, TermForm};
21use citum_schema::reference::{
22    ClassExtension, CollectionType, ContributorRole as ReferenceRole, MonographComponentType,
23    MonographType, ReferenceClass, SerialComponentType,
24};
25use citum_schema::template::ContributorRole as TemplateRole;
26
27use serde::{Deserialize, Serialize};
28use std::collections::HashMap;
29
30use super::{
31    BibliographyEntry, CitationOccurrence, DocumentOptions, EntryMetadata, FormattedBibliography,
32    FormattedBibliographyBlock, FormattedCitation, OutputFormatKind, RefsInput, StyleInput,
33    Warning, WarningLevel,
34};
35
36/// A request to format a complete document's citations and bibliography.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct FormatDocumentRequest {
39    /// The style to use (may be resolved locally or by an adapter).
40    pub style: StyleInput,
41    /// Optional partial-style overlay (YAML or JSON) merged over the resolved base
42    /// style for this request only.
43    ///
44    /// Accepts any subset of the style YAML schema — e.g. just `options.contributors`
45    /// to change `and`/et-al behaviour, or a full citation spec. Uses the same
46    /// null-aware, typed-merge semantics as `extends` inheritance: supplied fields
47    /// win over base style fields; an explicit `~` (null) value clears an inherited
48    /// field. The base style is never mutated.
49    #[serde(default, skip_serializing_if = "Option::is_none")]
50    pub style_overrides: Option<String>,
51    /// Optional locale override as a BCP 47 language tag (e.g. `en-US`).
52    /// When omitted or set to en-US the engine uses its built-in en-US locale;
53    /// other locales emit a warning and fall back to en-US until adapter-side
54    /// locale resolution is wired through.
55    pub locale: Option<String>,
56    /// Output format (plain, html, djot, latex, typst). Defaults to plain
57    /// when omitted from the request.
58    #[serde(default)]
59    pub output_format: OutputFormatKind,
60    /// Reference input as a local path, inline YAML, inline JSON, or legacy bare map.
61    pub refs: RefsInput,
62    /// Ordered citations as they appear in the document.
63    pub citations: Vec<CitationOccurrence>,
64    /// Ordered sectional bibliography blocks to render after citations.
65    #[serde(default)]
66    pub bibliography_blocks: Vec<super::BibliographyBlockRequest>,
67    /// Optional document-level configuration.
68    pub document_options: Option<DocumentOptions>,
69    /// Reference IDs to include in the bibliography without emitting an in-text citation.
70    ///
71    /// Nocite entries appear in `bibliography.entries` (and match `CitedStatus::Visible`
72    /// selectors for grouped / block bibliographies) but produce no `formatted_citations`
73    /// entry. This matches standard citeproc / Pandoc `nocite` semantics.
74    ///
75    /// IDs absent from `refs` are ignored and trigger a `nocite_missing_ref` warning.
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub nocite: Vec<String>,
78}
79
80/// The result of formatting a document.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct FormatDocumentResult {
83    /// Formatted citations in document order.
84    pub formatted_citations: Vec<FormattedCitation>,
85    /// Formatted bibliography.
86    pub bibliography: FormattedBibliography,
87    /// Rendered bibliography blocks, in request order.
88    pub bibliography_blocks: Vec<FormattedBibliographyBlock>,
89    /// Non-fatal warnings encountered during processing.
90    pub warnings: Vec<Warning>,
91}
92
93/// Errors that can occur during document formatting.
94#[derive(Debug)]
95pub enum FormatDocumentError {
96    /// The style ID or URI requires a resolver chain not available in the engine.
97    UnresolvedInput(String),
98    /// Failed to parse the style YAML.
99    StyleParse(String),
100    /// Failed to read or locate the style file.
101    StylePath(String),
102    /// Failed to read a local refs input path.
103    RefsInputPath(String),
104    /// Failed to parse refs input data.
105    RefsInputParse(String),
106    /// The processor encountered an error during rendering.
107    Processing(ProcessorError),
108    /// Style inheritance (`extends`) could not be resolved.
109    StyleResolution(String),
110}
111
112impl std::fmt::Display for FormatDocumentError {
113    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114        match self {
115            Self::UnresolvedInput(msg) => write!(f, "Unresolved style input: {}", msg),
116            Self::StyleParse(msg) => write!(f, "Style parse error: {}", msg),
117            Self::StylePath(msg) => write!(f, "Style path error: {}", msg),
118            Self::RefsInputPath(msg) => write!(f, "Refs input path error: {}", msg),
119            Self::RefsInputParse(msg) => write!(f, "Refs input parse error: {}", msg),
120            Self::Processing(err) => write!(f, "Processing error: {}", err),
121            Self::StyleResolution(msg) => write!(f, "Style resolution error: {}", msg),
122        }
123    }
124}
125
126impl std::error::Error for FormatDocumentError {}
127
128impl From<ProcessorError> for FormatDocumentError {
129    fn from(err: ProcessorError) -> Self {
130        Self::Processing(err)
131    }
132}
133
134/// Parse a partial-style overlay (YAML or JSON) and merge it over `style` in place.
135///
136/// Called internally by `format_document_with_style`; also available to surface crates
137/// (e.g. `citum-server`) that pre-resolve the style before handing it to the processor.
138///
139/// Uses the same null-aware, typed-merge semantics as `extends` inheritance.
140/// Calls `apply_scoped_options` after the merge so that overlay fields that affect
141/// scoped options (label_wrap, date_position, repeated_author_rendering, etc.) take
142/// effect in the same way they do during normal style resolution.
143///
144/// # Errors
145///
146/// Returns `FormatDocumentError::StyleParse` if the overlay cannot be parsed.
147pub fn apply_style_overrides(
148    style: &mut Style,
149    overlay_src: &str,
150) -> Result<(), FormatDocumentError> {
151    let overlay = Style::from_yaml_bytes(overlay_src.as_bytes()).map_err(|e| {
152        FormatDocumentError::StyleParse(format!("Failed to parse style_overrides: {e}"))
153    })?;
154    style.apply_overlay(&overlay);
155    style.apply_scoped_options();
156    Ok(())
157}
158
159/// Format a complete document's citations and bibliography (convenience wrapper).
160///
161/// This function resolves the style locally using `StyleInput::resolve_local`.
162/// For styles requiring a resolver chain (Id or Uri), use `format_document_with_style`
163/// after pre-resolving.
164///
165/// # Errors
166///
167/// Returns an error if the style cannot be resolved, parsed, or if rendering fails.
168pub fn format_document(
169    request: FormatDocumentRequest,
170) -> Result<FormatDocumentResult, FormatDocumentError> {
171    let style = request.style.resolve_local()?;
172    format_document_with_style(style, request)
173}
174
175/// Format a document, resolving the style through an injected resolver.
176///
177/// `Yaml` is parsed inline; `Id`, `Uri`, and `Path` are delegated to
178/// `resolver.resolve_style`. This lets WASM/FFI callers supply their own
179/// resolver chain without pre-resolving the style themselves.
180///
181/// # Errors
182///
183/// Returns an error if the resolver fails, the style cannot be parsed, or
184/// if rendering fails.
185pub fn format_document_with_resolver(
186    request: FormatDocumentRequest,
187    resolver: &citum_schema::StyleResolver,
188) -> Result<FormatDocumentResult, FormatDocumentError> {
189    let style = match &request.style {
190        StyleInput::Yaml(_) => request.style.resolve_local()?,
191        StyleInput::Id(value) | StyleInput::Uri(value) | StyleInput::Path(value) => resolver
192            .resolve_style(value)
193            .map_err(|e| FormatDocumentError::UnresolvedInput(e.to_string()))?,
194    };
195    // Fully resolve any `extends` chain via the injected resolver, then clear
196    // `extends` so the processor's later `into_resolved()` call needs no
197    // resolver. Mirrors `citum-server`'s `load_style`.
198    let mut resolved = style
199        .try_into_resolved_with(Some(resolver))
200        .map_err(|e| FormatDocumentError::StyleResolution(e.to_string()))?;
201    resolved.extends = None;
202    format_document_with_style(resolved, request)
203}
204
205/// Format a document using an already-resolved style.
206///
207/// This is the primary entry point for adapters (citum-server, citum-bindings)
208/// that have a resolver chain and can pre-resolve style IDs and URIs.
209///
210/// # Errors
211///
212/// Returns an error if rendering fails.
213#[allow(
214    clippy::too_many_lines,
215    reason = "match arms grow one-to-one with format variants"
216)]
217pub fn format_document_with_style(
218    style: Style,
219    request: FormatDocumentRequest,
220) -> Result<FormatDocumentResult, FormatDocumentError> {
221    let mut warnings = Vec::new();
222
223    // Apply per-request style overrides (merge over the resolved base style).
224    let mut style = style;
225    if let Some(src) = &request.style_overrides {
226        apply_style_overrides(&mut style, src)?;
227    }
228
229    // Locale: the engine has no resolver chain for non-en-US locales.
230    // Adapters with a citum_store dep can pre-resolve and call
231    // Processor::with_locale directly; for now, emit a warning when a
232    // non-en-US tag is requested and fall back to en-US.
233    if let Some(tag) = &request.locale
234        && !tag.is_empty()
235        && !tag.eq_ignore_ascii_case("en-us")
236    {
237        warnings.push(Warning {
238            level: WarningLevel::Warning,
239            code: "locale_fallback".to_string(),
240            citation_id: None,
241            ref_id: None,
242            message: format!(
243                "Requested locale '{tag}' could not be loaded by the engine; falling back to en-US. Adapter-side locale resolution is not yet wired through."
244            ),
245        });
246    }
247
248    let bibliography = request.refs.resolve_local()?;
249    let mut processor = Processor::new(style, bibliography);
250    warnings.extend(unknown_reference_class_warnings(&processor.bibliography));
251    warnings.extend(unknown_enum_warnings(&processor));
252
253    if let Some(opts) = &request.document_options {
254        // Rebuild the processor with the document-level integral-name override
255        // before applying scalar field mutations (show_semantics etc.) so that
256        // those mutations are not lost when the processor is reconstructed.
257        if let Some(new_proc) = processor
258            .processor_with_document_integral_name_override(opts.integral_name_memory.as_ref())
259        {
260            processor = new_proc;
261        }
262        if let Some(show_semantics) = opts.show_semantics {
263            processor.show_semantics = show_semantics;
264        }
265        if let Some(inject_ast) = opts.inject_ast_indices {
266            processor.set_inject_ast_indices(inject_ast);
267        }
268        if let Some(abbr_map) = opts.abbreviation_map.clone() {
269            processor.abbreviation_map = Some(abbr_map);
270        }
271    }
272
273    // Convert citations, recording missing-ref warnings and dropping items
274    // whose reference IDs are absent from the bibliography. Citations with no
275    // surviving items are kept as empty placeholders so the output preserves
276    // input order and length.
277    let mut citations: Vec<Citation> = Vec::new();
278    for occ in request.citations {
279        let mut citation: Citation = occ.into();
280        citation.items.retain(|item| {
281            if processor.bibliography.contains_key(&item.id) {
282                true
283            } else {
284                warnings.push(Warning {
285                    level: WarningLevel::Warning,
286                    code: "missing_ref".to_string(),
287                    citation_id: citation.id.clone(),
288                    ref_id: Some(item.id.clone()),
289                    message: format!("Reference '{}' not found in bibliography", item.id),
290                });
291                false
292            }
293        });
294        citations.push(citation);
295    }
296
297    // Annotate integral-name First/Subsequent state from the processor's
298    // effective config (no document structure available; all citations share
299    // document scope). Safe no-op when no memory config is present.
300    processor.annotate_flat_integral_name_states(&mut citations);
301
302    // Process citations
303    let formatted_citations = match request.output_format {
304        OutputFormatKind::Plain => format_by_kind::<PlainText>(&processor, &citations)?,
305        OutputFormatKind::Html => format_by_kind::<Html>(&processor, &citations)?,
306        OutputFormatKind::Djot => format_by_kind::<Djot>(&processor, &citations)?,
307        OutputFormatKind::Latex => format_by_kind::<Latex>(&processor, &citations)?,
308        OutputFormatKind::Typst => format_by_kind::<Typst>(&processor, &citations)?,
309        OutputFormatKind::Markdown => format_by_kind::<Markdown>(&processor, &citations)?,
310    };
311
312    // Register nocite IDs: validate against bibliography, warn on missing, then add
313    // to cited_ids so they appear in bibliography.entries but produce no citation text.
314    let nocite_ids: Vec<String> = request
315        .nocite
316        .iter()
317        .filter_map(|id| {
318            if processor.bibliography.contains_key(id) {
319                Some(id.clone())
320            } else {
321                warnings.push(Warning {
322                    level: WarningLevel::Warning,
323                    code: "nocite_missing_ref".to_string(),
324                    citation_id: None,
325                    ref_id: Some(id.clone()),
326                    message: format!("Nocite reference '{id}' not found in bibliography"),
327                });
328                None
329            }
330        })
331        .collect();
332    processor.register_nocite_ids(nocite_ids);
333
334    // Process bibliography
335    let bibliography = match request.output_format {
336        OutputFormatKind::Plain => format_bibliography::<PlainText>(
337            &processor,
338            request.output_format,
339            request.document_options.as_ref(),
340        )?,
341        OutputFormatKind::Html => format_bibliography::<Html>(
342            &processor,
343            request.output_format,
344            request.document_options.as_ref(),
345        )?,
346        OutputFormatKind::Djot => format_bibliography::<Djot>(
347            &processor,
348            request.output_format,
349            request.document_options.as_ref(),
350        )?,
351        OutputFormatKind::Latex => format_bibliography::<Latex>(
352            &processor,
353            request.output_format,
354            request.document_options.as_ref(),
355        )?,
356        OutputFormatKind::Typst => format_bibliography::<Typst>(
357            &processor,
358            request.output_format,
359            request.document_options.as_ref(),
360        )?,
361        OutputFormatKind::Markdown => format_bibliography::<Markdown>(
362            &processor,
363            request.output_format,
364            request.document_options.as_ref(),
365        )?,
366    };
367
368    // Process bibliography blocks
369    let bibliography_blocks = match request.output_format {
370        OutputFormatKind::Plain => format_bibliography_blocks::<PlainText>(
371            &processor,
372            &request.bibliography_blocks,
373            request.document_options.as_ref(),
374        )?,
375        OutputFormatKind::Html => format_bibliography_blocks::<Html>(
376            &processor,
377            &request.bibliography_blocks,
378            request.document_options.as_ref(),
379        )?,
380        OutputFormatKind::Djot => format_bibliography_blocks::<Djot>(
381            &processor,
382            &request.bibliography_blocks,
383            request.document_options.as_ref(),
384        )?,
385        OutputFormatKind::Latex => format_bibliography_blocks::<Latex>(
386            &processor,
387            &request.bibliography_blocks,
388            request.document_options.as_ref(),
389        )?,
390        OutputFormatKind::Typst => format_bibliography_blocks::<Typst>(
391            &processor,
392            &request.bibliography_blocks,
393            request.document_options.as_ref(),
394        )?,
395        OutputFormatKind::Markdown => format_bibliography_blocks::<Markdown>(
396            &processor,
397            &request.bibliography_blocks,
398            request.document_options.as_ref(),
399        )?,
400    };
401
402    Ok(FormatDocumentResult {
403        formatted_citations,
404        bibliography,
405        bibliography_blocks,
406        warnings,
407    })
408}
409
410/// Scan the bibliography for unknown reference classes and return compatibility warnings.
411pub fn unknown_reference_class_warnings(bibliography: &Bibliography) -> Vec<Warning> {
412    bibliography
413        .iter()
414        .filter_map(|(ref_id, reference)| {
415            let ReferenceClass::Unknown(class) = reference.class() else {
416                return None;
417            };
418            Some(Warning {
419                level: WarningLevel::Warning,
420                code: "unknown_reference_class".to_string(),
421                citation_id: None,
422                ref_id: Some(ref_id.clone()),
423                message: format!(
424                    "Reference '{ref_id}' uses unknown class '{class}'; rendering will use only fields this engine understands."
425                ),
426            })
427        })
428        .collect()
429}
430
431/// Scan the style and bibliography for unknown enum variants and term keys.
432///
433/// Returns a list of structured compatibility warnings for encounter of
434/// unknown variants that were captured via the tolerant-enum mechanism.
435pub fn unknown_enum_warnings(processor: &Processor) -> Vec<Warning> {
436    let mut warnings = Vec::new();
437
438    // 1. Scan bibliography
439    for (ref_id, reference) in &processor.bibliography {
440        match reference.extension() {
441            ClassExtension::Monograph(r) => {
442                if let MonographType::Unknown(s) = &r.r#type {
443                    warnings.push(Warning {
444                        level: WarningLevel::Warning,
445                        code: "unknown_enum_variant".to_string(),
446                        citation_id: None,
447                        ref_id: Some(ref_id.clone()),
448                        message: format!("Reference '{ref_id}' uses unknown monograph type '{s}'; rendering will use default monograph formatting."),
449                    });
450                }
451            }
452            ClassExtension::Collection(r) => {
453                if let CollectionType::Unknown(s) = &r.r#type {
454                    warnings.push(Warning {
455                        level: WarningLevel::Warning,
456                        code: "unknown_enum_variant".to_string(),
457                        citation_id: None,
458                        ref_id: Some(ref_id.clone()),
459                        message: format!("Reference '{ref_id}' uses unknown collection type '{s}'; rendering will use default collection formatting."),
460                    });
461                }
462            }
463            ClassExtension::CollectionComponent(r) => {
464                if let MonographComponentType::Unknown(s) = &r.r#type {
465                    warnings.push(Warning {
466                        level: WarningLevel::Warning,
467                        code: "unknown_enum_variant".to_string(),
468                        citation_id: None,
469                        ref_id: Some(ref_id.clone()),
470                        message: format!("Reference '{ref_id}' uses unknown monograph component type '{s}'; rendering will use default chapter formatting."),
471                    });
472                }
473            }
474            ClassExtension::SerialComponent(r) => {
475                if let SerialComponentType::Unknown(s) = &r.r#type {
476                    warnings.push(Warning {
477                        level: WarningLevel::Warning,
478                        code: "unknown_enum_variant".to_string(),
479                        citation_id: None,
480                        ref_id: Some(ref_id.clone()),
481                        message: format!("Reference '{ref_id}' uses unknown serial component type '{s}'; rendering will use default article formatting."),
482                    });
483                }
484            }
485            _ => {}
486        }
487
488        for contributor in reference.all_contributor_entries() {
489            if let ReferenceRole::Unknown(s) = &contributor.role {
490                warnings.push(Warning {
491                    level: WarningLevel::Warning,
492                    code: "unknown_enum_variant".to_string(),
493                    citation_id: None,
494                    ref_id: Some(ref_id.clone()),
495                    message: format!("Reference '{ref_id}' uses unknown contributor role '{s}'; this role may be ignored during rendering."),
496                });
497            }
498        }
499    }
500
501    // 2. Scan Style
502    if let Some(templates) = &processor.style.templates {
503        for (name, template) in templates {
504            scan_template_for_unknowns(template, &format!("template '{name}'"), &mut warnings);
505        }
506    }
507    if let Some(citation) = &processor.style.citation
508        && let Some(template) = &citation.template
509    {
510        scan_template_for_unknowns(template, "citation layout", &mut warnings);
511    }
512    if let Some(bib) = &processor.style.bibliography
513        && let Some(template) = &bib.template
514    {
515        scan_template_for_unknowns(template, "bibliography layout", &mut warnings);
516    }
517
518    warnings
519}
520
521fn scan_template_for_unknowns(
522    components: &[citum_schema::template::TemplateComponent],
523    location: &str,
524    warnings: &mut Vec<Warning>,
525) {
526    use citum_schema::template::TemplateComponent;
527    for component in components {
528        match component {
529            TemplateComponent::Term(t) => {
530                if let GeneralTerm::Unknown(s) = &t.term {
531                    warnings.push(Warning {
532                        level: WarningLevel::Warning,
533                        code: "unknown_enum_variant".to_string(),
534                        citation_id: None,
535                        ref_id: None,
536                        message: format!("Style {location} uses unknown locale term key '{s}'; this term may render as empty."),
537                    });
538                }
539                if let Some(TermForm::Unknown(s)) = &t.form {
540                    warnings.push(Warning {
541                        level: WarningLevel::Warning,
542                        code: "unknown_enum_variant".to_string(),
543                        citation_id: None,
544                        ref_id: None,
545                        message: format!("Style {location} uses unknown term form '{s}'; falling back to long form."),
546                    });
547                }
548            }
549            TemplateComponent::Contributor(c) => {
550                if let TemplateRole::Unknown(s) = &c.contributor {
551                    warnings.push(Warning {
552                        level: WarningLevel::Warning,
553                        code: "unknown_enum_variant".to_string(),
554                        citation_id: None,
555                        ref_id: None,
556                        message: format!("Style {location} uses unknown contributor role '{s}'; this role may be ignored."),
557                    });
558                }
559            }
560            TemplateComponent::Date(d) => {
561                if let citum_schema::template::DateForm::Unknown(s) = &d.form {
562                    warnings.push(Warning {
563                        level: WarningLevel::Warning,
564                        code: "unknown_enum_variant".to_string(),
565                        citation_id: None,
566                        ref_id: None,
567                        message: format!("Style {location} uses unknown date form '{s}'; falling back to year only."),
568                    });
569                }
570            }
571            TemplateComponent::Group(g) => {
572                scan_template_for_unknowns(&g.group, location, warnings);
573            }
574            _ => {}
575        }
576    }
577}
578
579/// Process citations and return formatted text.
580pub(crate) fn format_by_kind<F>(
581    processor: &Processor,
582    citations: &[Citation],
583) -> Result<Vec<FormattedCitation>, FormatDocumentError>
584where
585    F: OutputFormat<Output = String>,
586{
587    let texts = processor.process_citations_with_format::<F>(citations)?;
588
589    let formatted = citations
590        .iter()
591        .zip(texts.iter())
592        .map(|(citation, text)| {
593            let ref_ids = citation.items.iter().map(|item| item.id.clone()).collect();
594            FormattedCitation {
595                id: citation.id.clone().unwrap_or_default(),
596                text: text.clone(),
597                ref_ids,
598            }
599        })
600        .collect();
601
602    Ok(formatted)
603}
604
605/// Format the bibliography by output kind, restricted to the document's cited set.
606///
607/// Only references that appear in `processor.cited_ids` — either via an in-text
608/// citation or via a `nocite` registration — are included in the output. Delegates
609/// to [`Processor::render_document_bibliography`], the unified facade that ensures
610/// both `content` and `entries` are computed from the same cited subset so
611/// subsequent-author substitution stays consistent.
612pub(crate) fn format_bibliography<F>(
613    processor: &Processor,
614    format_kind: OutputFormatKind,
615    doc_opts: Option<&DocumentOptions>,
616) -> Result<FormattedBibliography, FormatDocumentError>
617where
618    F: OutputFormat<Output = String>,
619{
620    let (annotations, annotation_style) = annotation_options(doc_opts);
621    let doc_bib = processor.render_document_bibliography::<F>(
622        true,
623        if annotations.is_empty() {
624            None
625        } else {
626            Some(&annotations)
627        },
628        annotation_style.as_ref(),
629    );
630    let entries = doc_bib
631        .entries
632        .into_iter()
633        .map(|entry| {
634            proc_entry_to_bibliography_entry::<F>(
635                entry,
636                if annotations.is_empty() {
637                    None
638                } else {
639                    Some(&annotations)
640                },
641                annotation_style.as_ref(),
642            )
643        })
644        .collect();
645    Ok(FormattedBibliography {
646        format: format_kind,
647        content: doc_bib.content,
648        entries,
649    })
650}
651
652/// Format ordered sectional bibliography blocks.
653///
654/// Threads a single `assigned` dedup set through all blocks so each reference
655/// appears in only one block. Renders entries with annotations if configured.
656pub(crate) fn format_bibliography_blocks<F>(
657    processor: &Processor,
658    requests: &[super::BibliographyBlockRequest],
659    doc_opts: Option<&DocumentOptions>,
660) -> Result<Vec<super::FormattedBibliographyBlock>, FormatDocumentError>
661where
662    F: OutputFormat<Output = String>,
663{
664    if requests.is_empty() {
665        return Ok(Vec::new());
666    }
667
668    let (annotations, annotation_style) = annotation_options(doc_opts);
669    let groups: Vec<_> = requests.iter().map(|r| r.group.clone()).collect();
670    let rendered = processor.render_document_bibliography_blocks::<F>(
671        &groups,
672        if annotations.is_empty() {
673            None
674        } else {
675            Some(&annotations)
676        },
677        annotation_style.as_ref(),
678    );
679
680    Ok(requests
681        .iter()
682        .zip(rendered)
683        .map(|(req, rg)| super::FormattedBibliographyBlock {
684            id: req.id.clone(),
685            heading: rg.heading,
686            content: rg.body,
687            entries: rg
688                .entries
689                .into_iter()
690                .map(|entry| {
691                    proc_entry_to_bibliography_entry::<F>(
692                        entry,
693                        if annotations.is_empty() {
694                            None
695                        } else {
696                            Some(&annotations)
697                        },
698                        annotation_style.as_ref(),
699                    )
700                })
701                .collect(),
702        })
703        .collect())
704}
705
706/// Extract annotation map and style from document options.
707fn annotation_options(
708    doc_opts: Option<&DocumentOptions>,
709) -> (HashMap<String, String>, Option<AnnotationStyle>) {
710    if let Some(opts) = doc_opts
711        && let Some(anns) = &opts.annotations
712    {
713        let style = opts.annotation_format.as_ref().map(|fmt| AnnotationStyle {
714            format: fmt.clone(),
715        });
716        return (anns.clone(), style);
717    }
718    (HashMap::new(), None)
719}
720
721/// Convert a processor entry to a bibliography entry with annotations.
722fn proc_entry_to_bibliography_entry<F>(
723    entry: crate::render::ProcEntry,
724    annotations: Option<&HashMap<String, String>>,
725    annotation_style: Option<&AnnotationStyle>,
726) -> BibliographyEntry
727where
728    F: OutputFormat<Output = String>,
729{
730    let text = crate::render::bibliography::refs_to_string_slice_with_format::<F>(
731        std::slice::from_ref(&entry),
732        annotations,
733        annotation_style,
734    );
735    let metadata = EntryMetadata {
736        author: entry.metadata.author.unwrap_or_default(),
737        year: entry.metadata.year.unwrap_or_default(),
738        title: entry.metadata.title.unwrap_or_default(),
739    };
740    BibliographyEntry {
741        id: entry.id,
742        text,
743        metadata,
744    }
745}
746
747#[cfg(test)]
748#[allow(
749    clippy::unwrap_used,
750    clippy::expect_used,
751    clippy::panic,
752    clippy::indexing_slicing,
753    reason = "test code uses assertions and panic"
754)]
755mod tests {
756    use super::*;
757    use crate::api::CitationOccurrenceItem;
758    use crate::{
759        Config, ContributorForm, ContributorRole, DateForm, Processing, Rendering,
760        TemplateComponent, TemplateContributor, TemplateDate, TemplateDateVariable,
761        WrapPunctuation,
762    };
763    use citum_schema::options::{AndOptions, ContributorConfig};
764    use citum_schema::reference::{EdtfString, InputReference, Monograph, MonographType, Title};
765    use citum_schema::template::{TemplateTitle, TitleType};
766    use citum_schema::{BibliographySpec, CitationSpec, StyleInfo};
767
768    fn make_test_style() -> Style {
769        Style {
770            info: StyleInfo {
771                title: Some("Test Style".to_string()),
772                id: Some("test".into()),
773                ..Default::default()
774            },
775            options: Some(Config {
776                processing: Some(Processing::AuthorDate),
777                ..Default::default()
778            }),
779            citation: Some(CitationSpec {
780                template: Some(vec![
781                    TemplateComponent::Contributor(TemplateContributor {
782                        contributor: ContributorRole::Author,
783                        form: ContributorForm::Short,
784                        rendering: Rendering::default(),
785                        ..Default::default()
786                    }),
787                    TemplateComponent::Date(TemplateDate {
788                        date: TemplateDateVariable::Issued,
789                        form: DateForm::Year,
790                        rendering: Rendering::default(),
791                        ..Default::default()
792                    }),
793                ]),
794                wrap: Some(WrapPunctuation::Parentheses.into()),
795                ..Default::default()
796            }),
797            ..Default::default()
798        }
799    }
800
801    fn make_test_bibliography() -> RefsInput {
802        let mut refs = Bibliography::new();
803        refs.insert(
804            "smith2020".to_string(),
805            InputReference::Monograph(Box::new(Monograph {
806                id: Some("smith2020".into()),
807                r#type: MonographType::Book,
808                title: Some(Title::Single("Sample Work".to_string())),
809                issued: EdtfString("2020".to_string()),
810                ..Default::default()
811            })),
812        );
813        RefsInput::Json(serde_json::to_value(refs).unwrap())
814    }
815
816    fn make_markup_bibliography() -> RefsInput {
817        let mut refs = Bibliography::new();
818        refs.insert(
819            "art1".to_string(),
820            InputReference::Monograph(Box::new(Monograph {
821                id: Some("art1".into()),
822                r#type: MonographType::Book,
823                title: Some(Title::Single(
824                    "_Homo sapiens_ and *modern* world".to_string(),
825                )),
826                issued: EdtfString("2023".to_string()),
827                ..Default::default()
828            })),
829        );
830        RefsInput::Json(serde_json::to_value(refs).unwrap())
831    }
832
833    #[test]
834    fn format_document_with_style_empty_citations() {
835        let style = make_test_style();
836        let refs = make_test_bibliography();
837        let request = FormatDocumentRequest {
838            style: StyleInput::Yaml("dummy".to_string()),
839            style_overrides: None,
840            locale: None,
841            output_format: OutputFormatKind::Plain,
842            refs,
843            citations: vec![],
844            bibliography_blocks: Vec::new(),
845            document_options: None,
846            nocite: vec![],
847        };
848
849        let result = format_document_with_style(style, request);
850        assert!(result.is_ok());
851        let res = result.unwrap();
852        assert_eq!(res.formatted_citations.len(), 0);
853    }
854
855    #[test]
856    fn format_document_html_bibliography_entries_preserve_inline_markup() {
857        let mut style = make_test_style();
858        style.bibliography = Some(BibliographySpec {
859            template: Some(vec![TemplateComponent::Title(TemplateTitle {
860                title: TitleType::Primary,
861                ..Default::default()
862            })]),
863            ..Default::default()
864        });
865
866        let request = FormatDocumentRequest {
867            style: StyleInput::Yaml("dummy".to_string()),
868            style_overrides: None,
869            locale: None,
870            output_format: OutputFormatKind::Html,
871            refs: make_markup_bibliography(),
872            citations: vec![],
873            bibliography_blocks: Vec::new(),
874            document_options: None,
875            // Use nocite to include art1 in the bibliography without an in-text citation;
876            // the test is validating bibliography HTML rendering, not citation rendering.
877            nocite: vec!["art1".to_string()],
878        };
879
880        let result = format_document_with_style(style, request).expect("should render");
881
882        assert_eq!(
883            result.bibliography.entries[0].text, result.bibliography.content,
884            "single-entry bibliography should mirror the full bibliography payload"
885        );
886        assert!(
887            result.bibliography.entries[0].text.contains(
888                "<span class=\"citum-title\"><em>Homo sapiens</em> and <b>modern</b> world</span>"
889            ),
890            "per-entry HTML should preserve inline markup for Djot-bearing titles"
891        );
892    }
893
894    #[test]
895    fn format_document_missing_ref_warning() {
896        let style = make_test_style();
897        let refs = make_test_bibliography();
898
899        let citation_occ = CitationOccurrence {
900            id: "cite1".to_string(),
901            items: vec![CitationOccurrenceItem {
902                id: "unknown_ref".to_string(),
903                locator: None,
904                prefix: None,
905                suffix: None,
906                integral_name_state: None,
907                org_abbreviation_state: None,
908            }],
909            mode: None,
910            note_number: None,
911            suppress_author: None,
912            grouped: None,
913            prefix: None,
914            suffix: None,
915            sentence_start: None,
916        };
917
918        let request = FormatDocumentRequest {
919            style: StyleInput::Yaml("dummy".to_string()),
920            style_overrides: None,
921            locale: None,
922            output_format: OutputFormatKind::Plain,
923            refs,
924            citations: vec![citation_occ],
925            bibliography_blocks: Vec::new(),
926            document_options: None,
927            nocite: vec![],
928        };
929
930        let result = format_document_with_style(style, request);
931        assert!(result.is_ok());
932        let res = result.unwrap();
933        assert!(res.warnings.iter().any(|w| w.code == "missing_ref"));
934    }
935
936    #[test]
937    fn format_document_unknown_reference_class_warning() {
938        let style = make_test_style();
939        let mut refs = Bibliography::new();
940        let unknown_ref: InputReference = serde_json::from_str(
941            r#"{
942                "class": "dance-performance",
943                "id": "pina2011",
944                "title": "Pina",
945                "issued": "2011",
946                "venue": "Berlin"
947            }"#,
948        )
949        .expect("unknown class should parse through the compatibility path");
950        refs.insert("pina2011".to_string(), unknown_ref);
951
952        let citation_occ = CitationOccurrence {
953            id: "cite1".to_string(),
954            items: vec![CitationOccurrenceItem {
955                id: "pina2011".to_string(),
956                locator: None,
957                prefix: None,
958                suffix: None,
959                integral_name_state: None,
960                org_abbreviation_state: None,
961            }],
962            mode: None,
963            note_number: None,
964            suppress_author: None,
965            grouped: None,
966            prefix: None,
967            suffix: None,
968            sentence_start: None,
969        };
970
971        let request = FormatDocumentRequest {
972            style: StyleInput::Yaml("dummy".to_string()),
973            style_overrides: None,
974            locale: None,
975            output_format: OutputFormatKind::Plain,
976            refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
977            citations: vec![citation_occ],
978            bibliography_blocks: Vec::new(),
979            document_options: None,
980            nocite: vec![],
981        };
982
983        let result = format_document_with_style(style, request).unwrap();
984        let warning = result
985            .warnings
986            .iter()
987            .find(|w| w.code == "unknown_reference_class")
988            .expect("unknown class warning should be emitted");
989        assert_eq!(warning.ref_id.as_deref(), Some("pina2011"));
990        assert!(warning.message.contains("dance-performance"));
991    }
992
993    #[test]
994    fn format_document_yaml_style_input() {
995        let style = make_test_style();
996        let yaml_style = serde_yaml::to_string(&style).expect("serialize test style");
997
998        let mut refs = Bibliography::new();
999        refs.insert(
1000            "test2024".to_string(),
1001            InputReference::Monograph(Box::new(Monograph {
1002                id: Some("test2024".into()),
1003                r#type: MonographType::Book,
1004                title: Some(Title::Single("Test Work".to_string())),
1005                issued: EdtfString("2024".to_string()),
1006                ..Default::default()
1007            })),
1008        );
1009
1010        let citation_occ = CitationOccurrence {
1011            id: "c1".to_string(),
1012            items: vec![CitationOccurrenceItem {
1013                id: "test2024".to_string(),
1014                locator: None,
1015                prefix: None,
1016                suffix: None,
1017                integral_name_state: None,
1018                org_abbreviation_state: None,
1019            }],
1020            mode: None,
1021            note_number: None,
1022            suppress_author: None,
1023            grouped: None,
1024            prefix: None,
1025            suffix: None,
1026            sentence_start: None,
1027        };
1028
1029        let request = FormatDocumentRequest {
1030            style: StyleInput::Yaml(yaml_style),
1031            style_overrides: None,
1032            locale: None,
1033            output_format: OutputFormatKind::Plain,
1034            refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
1035            citations: vec![citation_occ],
1036            bibliography_blocks: Vec::new(),
1037            document_options: None,
1038            nocite: vec![],
1039        };
1040
1041        let result = format_document(request);
1042        assert!(result.is_ok());
1043        let res = result.unwrap();
1044        assert_eq!(res.formatted_citations.len(), 1);
1045        assert!(!res.formatted_citations[0].text.is_empty());
1046    }
1047
1048    #[test]
1049    fn format_document_uri_input_unresolved() {
1050        let request = FormatDocumentRequest {
1051            style: StyleInput::Uri("https://example.com/style.yaml".to_string()),
1052            style_overrides: None,
1053            locale: None,
1054            output_format: OutputFormatKind::Plain,
1055            refs: RefsInput::Json(serde_json::Value::Object(Default::default())),
1056            citations: vec![],
1057            bibliography_blocks: Vec::new(),
1058            document_options: None,
1059            nocite: vec![],
1060        };
1061
1062        let result = format_document(request);
1063        match result {
1064            Err(FormatDocumentError::UnresolvedInput(_)) => {
1065                // Expected
1066            }
1067            _ => panic!("Expected UnresolvedInput error"),
1068        }
1069    }
1070
1071    /// A minimal resolver that returns a fixed style for any ID.
1072    struct MockResolver(Style);
1073
1074    impl citum_resolver_api::StyleResolver for MockResolver {
1075        type Style = Style;
1076        type Locale = citum_schema::locale::Locale;
1077
1078        fn resolve_style(&self, _uri: &str) -> Result<Style, citum_schema::ResolverError> {
1079            Ok(self.0.clone())
1080        }
1081
1082        fn resolve_locale(
1083            &self,
1084            id: &str,
1085        ) -> Result<citum_schema::locale::Locale, citum_schema::ResolverError> {
1086            Err(citum_schema::ResolverError::LocaleNotFound(
1087                std::borrow::Cow::Owned(id.to_string()),
1088            ))
1089        }
1090    }
1091
1092    #[test]
1093    fn format_document_with_resolver_injects_style_for_id_input() {
1094        let style = make_test_style();
1095        let resolver = MockResolver(style);
1096        let refs = make_test_bibliography();
1097
1098        let citation_occ = CitationOccurrence {
1099            id: "c1".to_string(),
1100            items: vec![CitationOccurrenceItem {
1101                id: "smith2020".to_string(),
1102                locator: None,
1103                prefix: None,
1104                suffix: None,
1105                integral_name_state: None,
1106                org_abbreviation_state: None,
1107            }],
1108            mode: None,
1109            note_number: None,
1110            suppress_author: None,
1111            grouped: None,
1112            prefix: None,
1113            suffix: None,
1114            sentence_start: None,
1115        };
1116
1117        let request = FormatDocumentRequest {
1118            style: StyleInput::Id("any-id".to_string()),
1119            style_overrides: None,
1120            locale: None,
1121            output_format: OutputFormatKind::Plain,
1122            refs,
1123            citations: vec![citation_occ],
1124            bibliography_blocks: Vec::new(),
1125            document_options: None,
1126            nocite: vec![],
1127        };
1128
1129        // Without a resolver, the same Id input must be rejected.
1130        match format_document(request.clone()) {
1131            Err(FormatDocumentError::UnresolvedInput(_)) => {}
1132            other => panic!("expected UnresolvedInput without resolver, got: {other:?}"),
1133        }
1134
1135        // With the injected resolver it must succeed.
1136        let result = format_document_with_resolver(request, &resolver);
1137        assert!(result.is_ok(), "expected Ok, got: {:?}", result.err());
1138        let res = result.unwrap();
1139        assert_eq!(res.formatted_citations.len(), 1);
1140        assert!(
1141            !res.formatted_citations[0].text.is_empty(),
1142            "formatted citation text should not be empty"
1143        );
1144    }
1145
1146    /// Build an author-date style whose citation template renders contributor short form.
1147    fn make_two_author_style() -> Style {
1148        Style {
1149            info: StyleInfo {
1150                title: Some("Override Test Style".to_string()),
1151                id: Some("override-test".into()),
1152                ..Default::default()
1153            },
1154            options: Some(Config {
1155                processing: Some(Processing::AuthorDate),
1156                // Explicitly set `and: text` so the override to `symbol` is observable
1157                // in rendered output without relying on any default connector.
1158                contributors: Some(ContributorConfig {
1159                    and: Some(AndOptions::Text),
1160                    ..Default::default()
1161                }),
1162                ..Default::default()
1163            }),
1164            citation: Some(CitationSpec {
1165                template: Some(vec![
1166                    TemplateComponent::Contributor(TemplateContributor {
1167                        contributor: ContributorRole::Author,
1168                        form: ContributorForm::Short,
1169                        rendering: Rendering::default(),
1170                        ..Default::default()
1171                    }),
1172                    TemplateComponent::Date(TemplateDate {
1173                        date: TemplateDateVariable::Issued,
1174                        form: DateForm::Year,
1175                        rendering: Rendering {
1176                            prefix: Some(", ".to_string()),
1177                            ..Default::default()
1178                        },
1179                        ..Default::default()
1180                    }),
1181                ]),
1182                wrap: Some(WrapPunctuation::Parentheses.into()),
1183                ..Default::default()
1184            }),
1185            ..Default::default()
1186        }
1187    }
1188
1189    /// Build a refs input with a two-author book so the "and" connector is exercised.
1190    ///
1191    /// Uses inline YAML (the reliably tested deserialization path) rather than
1192    /// round-tripping through `serde_json::to_value` which may not preserve the
1193    /// contributor tagged-enum layout the engine expects.
1194    fn make_two_author_refs() -> RefsInput {
1195        RefsInput::Yaml(
1196            r#"duo2024:
1197  class: monograph
1198  id: duo2024
1199  type: book
1200  title: Duo Work
1201  issued: "2024"
1202  author:
1203    - family: Smith
1204      given: Alice
1205    - family: Jones
1206      given: Bob
1207"#
1208            .to_string(),
1209        )
1210    }
1211
1212    /// Helper: produce a single-item citation occurrence for a given ref id.
1213    fn cite(ref_id: &str) -> CitationOccurrence {
1214        CitationOccurrence {
1215            id: "c1".to_string(),
1216            items: vec![CitationOccurrenceItem {
1217                id: ref_id.to_string(),
1218                locator: None,
1219                prefix: None,
1220                suffix: None,
1221                integral_name_state: None,
1222                org_abbreviation_state: None,
1223            }],
1224            mode: None,
1225            note_number: None,
1226            suppress_author: None,
1227            grouped: None,
1228            prefix: None,
1229            suffix: None,
1230            sentence_start: None,
1231        }
1232    }
1233
1234    #[test]
1235    fn style_overrides_and_symbol_changes_rendered_output() {
1236        let base_style = make_two_author_style();
1237        let refs = make_two_author_refs();
1238
1239        // given: base style produces a citation containing "and"
1240        let request_base = FormatDocumentRequest {
1241            style: StyleInput::Yaml("dummy".to_string()),
1242            style_overrides: None,
1243            locale: None,
1244            output_format: OutputFormatKind::Plain,
1245            refs: refs.clone(),
1246            citations: vec![cite("duo2024")],
1247            bibliography_blocks: Vec::new(),
1248            document_options: None,
1249            nocite: vec![],
1250        };
1251        let result_base = format_document_with_style(base_style.clone(), request_base).unwrap();
1252        let text_base = &result_base.formatted_citations[0].text;
1253        assert!(
1254            text_base.contains("and"),
1255            "base style should use text 'and' connector, got: {text_base:?}"
1256        );
1257
1258        // when: style_overrides switches connector to symbol "&"
1259        let request_override = FormatDocumentRequest {
1260            style: StyleInput::Yaml("dummy".to_string()),
1261            style_overrides: Some("options:\n  contributors:\n    and: symbol\n".to_string()),
1262            locale: None,
1263            output_format: OutputFormatKind::Plain,
1264            refs,
1265            citations: vec![cite("duo2024")],
1266            bibliography_blocks: Vec::new(),
1267            document_options: None,
1268            nocite: vec![],
1269        };
1270        let result_override =
1271            format_document_with_style(base_style.clone(), request_override).unwrap();
1272        let text_override = &result_override.formatted_citations[0].text;
1273        assert!(
1274            text_override.contains('&'),
1275            "overridden style should use '&' connector, got: {text_override:?}"
1276        );
1277
1278        // then: base style struct is untouched — still has Text, not Symbol
1279        let base_and = base_style
1280            .options
1281            .as_ref()
1282            .and_then(|o| o.contributors.as_ref())
1283            .and_then(|c| c.and.as_ref());
1284        assert!(
1285            matches!(base_and, Some(&AndOptions::Text)),
1286            "base style must not be mutated; expected And::Text, got: {base_and:?}"
1287        );
1288    }
1289
1290    #[test]
1291    fn style_overrides_invalid_yaml_returns_parse_error() {
1292        let style = make_test_style();
1293        let refs = make_test_bibliography();
1294
1295        let request = FormatDocumentRequest {
1296            style: StyleInput::Yaml("dummy".to_string()),
1297            style_overrides: Some("{ unclosed yaml: [".to_string()),
1298            locale: None,
1299            output_format: OutputFormatKind::Plain,
1300            refs,
1301            citations: vec![],
1302            bibliography_blocks: Vec::new(),
1303            document_options: None,
1304            nocite: vec![],
1305        };
1306
1307        match format_document_with_style(style, request) {
1308            Err(FormatDocumentError::StyleParse(msg)) => {
1309                assert!(
1310                    msg.contains("style_overrides"),
1311                    "error message should mention style_overrides, got: {msg}"
1312                );
1313            }
1314            other => panic!("expected StyleParse error, got: {other:?}"),
1315        }
1316    }
1317
1318    #[test]
1319    fn apply_style_overrides_merges_option_field() {
1320        let mut style = make_test_style();
1321        apply_style_overrides(&mut style, "options:\n  contributors:\n    and: symbol\n")
1322            .expect("apply_style_overrides should succeed");
1323
1324        let and_option = style
1325            .options
1326            .as_ref()
1327            .and_then(|o| o.contributors.as_ref())
1328            .and_then(|c| c.and.as_ref());
1329        assert!(
1330            matches!(and_option, Some(&AndOptions::Symbol)),
1331            "expected And::Symbol after override, got: {and_option:?}"
1332        );
1333    }
1334
1335    // --- integral_name_memory wiring ---
1336
1337    /// Build a style that has integral-name memory configured with scope=Document,
1338    /// contexts=BodyAndNotes, subsequent_form=Short, and an integral sub-template
1339    /// that renders the author in Long (given + family) form.
1340    fn make_integral_name_style() -> Style {
1341        use citum_schema::options::{
1342            IntegralNameContexts, IntegralNameMemoryConfig, IntegralNameScope, SubsequentNameForm,
1343        };
1344        Style {
1345            info: StyleInfo {
1346                title: Some("Integral Name Memory Test".to_string()),
1347                id: Some("integral-name-memory-test".into()),
1348                ..Default::default()
1349            },
1350            options: Some(Config {
1351                processing: Some(Processing::AuthorDate),
1352                integral_name_memory: Some(IntegralNameMemoryConfig {
1353                    scope: Some(IntegralNameScope::Document),
1354                    contexts: Some(IntegralNameContexts::BodyAndNotes),
1355                    subsequent_form: Some(SubsequentNameForm::Short),
1356                    ..Default::default()
1357                }),
1358                ..Default::default()
1359            }),
1360            citation: Some(CitationSpec {
1361                integral: Some(Box::new(CitationSpec {
1362                    template: Some(vec![TemplateComponent::Contributor(TemplateContributor {
1363                        contributor: ContributorRole::Author,
1364                        form: ContributorForm::Long,
1365                        rendering: Rendering::default(),
1366                        ..Default::default()
1367                    })]),
1368                    ..Default::default()
1369                })),
1370                template: Some(vec![
1371                    TemplateComponent::Contributor(TemplateContributor {
1372                        contributor: ContributorRole::Author,
1373                        form: ContributorForm::Short,
1374                        rendering: Rendering::default(),
1375                        ..Default::default()
1376                    }),
1377                    TemplateComponent::Date(TemplateDate {
1378                        date: TemplateDateVariable::Issued,
1379                        form: DateForm::Year,
1380                        rendering: Rendering::default(),
1381                        ..Default::default()
1382                    }),
1383                ]),
1384                wrap: Some(WrapPunctuation::Parentheses.into()),
1385                ..Default::default()
1386            }),
1387            ..Default::default()
1388        }
1389    }
1390
1391    fn make_smith_refs() -> RefsInput {
1392        RefsInput::Yaml(
1393            r#"smith2020:
1394  class: monograph
1395  id: smith2020
1396  type: book
1397  title: Smith Book
1398  issued: "2020"
1399  author:
1400    - family: Smith
1401      given: John
1402"#
1403            .to_string(),
1404        )
1405    }
1406
1407    fn make_integral_occ(id: &str, ref_id: &str) -> CitationOccurrence {
1408        CitationOccurrence {
1409            id: id.to_string(),
1410            items: vec![CitationOccurrenceItem {
1411                id: ref_id.to_string(),
1412                locator: None,
1413                prefix: None,
1414                suffix: None,
1415                integral_name_state: None,
1416                org_abbreviation_state: None,
1417            }],
1418            mode: Some(citum_schema::data::citation::CitationMode::Integral),
1419            note_number: None,
1420            suppress_author: None,
1421            grouped: None,
1422            prefix: None,
1423            suffix: None,
1424            sentence_start: None,
1425        }
1426    }
1427
1428    #[test]
1429    fn document_options_integral_name_memory_first_full_then_short() {
1430        use crate::processor::document::DocumentIntegralNameOverride;
1431
1432        let style = make_integral_name_style();
1433        let refs = make_smith_refs();
1434
1435        let request = FormatDocumentRequest {
1436            style: StyleInput::Yaml("dummy".to_string()),
1437            style_overrides: None,
1438            locale: None,
1439            output_format: OutputFormatKind::Plain,
1440            refs,
1441            citations: vec![
1442                make_integral_occ("c1", "smith2020"),
1443                make_integral_occ("c2", "smith2020"),
1444            ],
1445            bibliography_blocks: Vec::new(),
1446            document_options: Some(DocumentOptions {
1447                integral_name_memory: Some(DocumentIntegralNameOverride {
1448                    enabled: Some(true),
1449                    ..Default::default()
1450                }),
1451                ..Default::default()
1452            }),
1453            nocite: vec![],
1454        };
1455
1456        let result = format_document_with_style(style, request).expect("should render");
1457
1458        assert!(
1459            !result
1460                .warnings
1461                .iter()
1462                .any(|w| w.code == "integral_name_memory_not_applied"),
1463            "stale warning must not appear: {:?}",
1464            result.warnings
1465        );
1466        assert_eq!(
1467            result.formatted_citations[0].text, "John Smith",
1468            "first integral cite should render full name form"
1469        );
1470        assert_eq!(
1471            result.formatted_citations[1].text, "Smith",
1472            "second integral cite of same author should render short form"
1473        );
1474    }
1475
1476    #[test]
1477    fn document_options_integral_name_memory_disabled_keeps_full_form() {
1478        use crate::processor::document::DocumentIntegralNameOverride;
1479
1480        let style = make_integral_name_style();
1481        let refs = make_smith_refs();
1482
1483        let request = FormatDocumentRequest {
1484            style: StyleInput::Yaml("dummy".to_string()),
1485            style_overrides: None,
1486            locale: None,
1487            output_format: OutputFormatKind::Plain,
1488            refs,
1489            citations: vec![
1490                make_integral_occ("c1", "smith2020"),
1491                make_integral_occ("c2", "smith2020"),
1492            ],
1493            bibliography_blocks: Vec::new(),
1494            document_options: Some(DocumentOptions {
1495                integral_name_memory: Some(DocumentIntegralNameOverride {
1496                    enabled: Some(false),
1497                    ..Default::default()
1498                }),
1499                ..Default::default()
1500            }),
1501            nocite: vec![],
1502        };
1503
1504        let result = format_document_with_style(style, request).expect("should render");
1505
1506        // With memory disabled both occurrences should render the natural integral
1507        // template form (Long = "John Smith") without any subsequent rewrite.
1508        assert_eq!(
1509            result.formatted_citations[0].text, "John Smith",
1510            "first integral cite: {}",
1511            result.formatted_citations[0].text
1512        );
1513        assert_eq!(
1514            result.formatted_citations[1].text, "John Smith",
1515            "second integral cite should also be full when memory is disabled"
1516        );
1517    }
1518
1519    #[test]
1520    fn style_native_integral_name_memory_applied_without_document_override() {
1521        // Style has integral_name_memory in its own options; no document_options
1522        // override is supplied. The flat API must still annotate First/Subsequent.
1523        let style = make_integral_name_style();
1524        let refs = make_smith_refs();
1525
1526        let request = FormatDocumentRequest {
1527            style: StyleInput::Yaml("dummy".to_string()),
1528            style_overrides: None,
1529            locale: None,
1530            output_format: OutputFormatKind::Plain,
1531            refs,
1532            citations: vec![
1533                make_integral_occ("c1", "smith2020"),
1534                make_integral_occ("c2", "smith2020"),
1535            ],
1536            bibliography_blocks: Vec::new(),
1537            document_options: None,
1538            nocite: vec![],
1539        };
1540
1541        let result = format_document_with_style(style, request).expect("should render");
1542
1543        assert_eq!(
1544            result.formatted_citations[0].text, "John Smith",
1545            "first integral cite should render full name form"
1546        );
1547        assert_eq!(
1548            result.formatted_citations[1].text, "Smith",
1549            "second integral cite should render short form from style-native config"
1550        );
1551    }
1552
1553    #[test]
1554    fn format_document_bibliography_blocks_ordered_with_dedup() {
1555        use citum_schema::grouping::CitedStatus;
1556        use citum_schema::grouping::{BibliographyGroup, GroupSelector};
1557
1558        let mut style = make_test_style();
1559        style.bibliography = Some(BibliographySpec {
1560            template: Some(vec![TemplateComponent::Title(TemplateTitle {
1561                title: TitleType::Primary,
1562                ..Default::default()
1563            })]),
1564            ..Default::default()
1565        });
1566        let mut refs = Bibliography::new();
1567        refs.insert(
1568            "smith2020".to_string(),
1569            InputReference::Monograph(Box::new(Monograph {
1570                id: Some("smith2020".into()),
1571                r#type: MonographType::Book,
1572                title: Some(Title::Single("Sample Work".to_string())),
1573                issued: EdtfString("2020".to_string()),
1574                ..Default::default()
1575            })),
1576        );
1577        refs.insert(
1578            "jones2019".to_string(),
1579            InputReference::Monograph(Box::new(Monograph {
1580                id: Some("jones2019".into()),
1581                r#type: MonographType::Book,
1582                title: Some(Title::Single("Another Work".to_string())),
1583                issued: EdtfString("2019".to_string()),
1584                ..Default::default()
1585            })),
1586        );
1587
1588        let make_block = |id: &str| crate::BibliographyBlockRequest {
1589            id: id.to_string(),
1590            group: BibliographyGroup {
1591                id: id.to_string(),
1592                selector: GroupSelector {
1593                    cited: Some(CitedStatus::Any),
1594                    ..Default::default()
1595                },
1596                ..Default::default()
1597            },
1598        };
1599
1600        let request = FormatDocumentRequest {
1601            style: StyleInput::Yaml("dummy".to_string()),
1602            style_overrides: None,
1603            locale: None,
1604            output_format: OutputFormatKind::Plain,
1605            refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
1606            citations: vec![],
1607            bibliography_blocks: vec![make_block("block-a"), make_block("block-b")],
1608            document_options: None,
1609            nocite: vec![],
1610        };
1611
1612        let result = format_document_with_style(style, request).expect("should render");
1613
1614        assert_eq!(result.bibliography_blocks.len(), 2, "both blocks returned");
1615        assert_eq!(result.bibliography_blocks[0].id, "block-a");
1616        assert_eq!(result.bibliography_blocks[1].id, "block-b");
1617
1618        let block_a_count = result.bibliography_blocks[0].entries.len();
1619        let block_b_count = result.bibliography_blocks[1].entries.len();
1620
1621        assert_eq!(block_a_count, 2, "block-a captures both refs");
1622        assert_eq!(
1623            block_b_count, 0,
1624            "block-b is empty: dedup set prevents re-assignment from block-a"
1625        );
1626    }
1627
1628    // --- nocite tests ---
1629
1630    /// A ref listed only in `nocite` must appear in the bibliography but produce
1631    /// no `formatted_citations` entry (standard citeproc nocite semantics).
1632    #[test]
1633    fn nocite_ref_in_bibliography_not_in_formatted_citations() {
1634        let mut style = make_test_style();
1635        // A bibliography template is required for entries to be produced.
1636        style.bibliography = Some(BibliographySpec {
1637            template: Some(vec![TemplateComponent::Title(TemplateTitle {
1638                title: TitleType::Primary,
1639                ..Default::default()
1640            })]),
1641            ..Default::default()
1642        });
1643        let refs = make_test_bibliography(); // contains "smith2020"
1644
1645        let request = FormatDocumentRequest {
1646            style: StyleInput::Yaml("dummy".to_string()),
1647            style_overrides: None,
1648            locale: None,
1649            output_format: OutputFormatKind::Plain,
1650            refs,
1651            citations: vec![],
1652            bibliography_blocks: Vec::new(),
1653            document_options: None,
1654            nocite: vec!["smith2020".to_string()],
1655        };
1656
1657        let result = format_document_with_style(style, request).expect("should render");
1658
1659        assert_eq!(
1660            result.formatted_citations.len(),
1661            0,
1662            "nocite refs must not produce a formatted citation"
1663        );
1664        assert_eq!(
1665            result.bibliography.entries.len(),
1666            1,
1667            "nocite ref must appear in bibliography entries"
1668        );
1669        assert_eq!(
1670            result.bibliography.entries[0].id, "smith2020",
1671            "bibliography entry id should match nocite ref"
1672        );
1673        assert!(
1674            !result.bibliography.content.is_empty(),
1675            "bibliography content must be non-empty for nocite ref"
1676        );
1677        assert!(
1678            result.warnings.is_empty(),
1679            "no warnings expected: {:?}",
1680            result.warnings
1681        );
1682    }
1683
1684    /// An ID listed in `nocite` that is absent from `refs` must emit a
1685    /// `nocite_missing_ref` warning and not appear in the bibliography.
1686    #[test]
1687    fn nocite_missing_ref_emits_warning() {
1688        let style = make_test_style();
1689        let refs = make_test_bibliography();
1690
1691        let request = FormatDocumentRequest {
1692            style: StyleInput::Yaml("dummy".to_string()),
1693            style_overrides: None,
1694            locale: None,
1695            output_format: OutputFormatKind::Plain,
1696            refs,
1697            citations: vec![],
1698            bibliography_blocks: Vec::new(),
1699            document_options: None,
1700            nocite: vec!["does_not_exist".to_string()],
1701        };
1702
1703        let result = format_document_with_style(style, request).expect("should render");
1704
1705        assert_eq!(
1706            result.bibliography.entries.len(),
1707            0,
1708            "absent nocite ref must not produce a bibliography entry"
1709        );
1710        let warning = result
1711            .warnings
1712            .iter()
1713            .find(|w| w.code == "nocite_missing_ref")
1714            .expect("nocite_missing_ref warning should be emitted");
1715        assert_eq!(
1716            warning.ref_id.as_deref(),
1717            Some("does_not_exist"),
1718            "warning ref_id should name the absent nocite key"
1719        );
1720    }
1721
1722    /// A nocite ref must sort alongside the cited ref when both are present
1723    /// (i.e., citation status does not affect bibliography sort order).
1724    #[test]
1725    fn nocite_ref_sorts_alongside_cited_ref() {
1726        let mut style = make_test_style();
1727        style.bibliography = Some(BibliographySpec {
1728            template: Some(vec![TemplateComponent::Title(TemplateTitle {
1729                title: TitleType::Primary,
1730                ..Default::default()
1731            })]),
1732            ..Default::default()
1733        });
1734
1735        let citation_occ = CitationOccurrence {
1736            id: "c1".to_string(),
1737            items: vec![CitationOccurrenceItem {
1738                id: "duo2024".to_string(),
1739                locator: None,
1740                prefix: None,
1741                suffix: None,
1742                integral_name_state: None,
1743                org_abbreviation_state: None,
1744            }],
1745            mode: None,
1746            note_number: None,
1747            suppress_author: None,
1748            grouped: None,
1749            prefix: None,
1750            suffix: None,
1751            sentence_start: None,
1752        };
1753
1754        // Two refs: duo2024 (cited via citation_occ) + smith2020 (nocite-only).
1755        let combined_refs = RefsInput::Yaml(
1756            r#"duo2024:
1757  class: monograph
1758  id: duo2024
1759  type: book
1760  title: Duo Work
1761  issued: "2024"
1762  author:
1763    - family: Smith
1764      given: Alice
1765    - family: Jones
1766      given: Bob
1767smith2020:
1768  class: monograph
1769  id: smith2020
1770  type: book
1771  title: Smith Work
1772  issued: "2020"
1773  author:
1774    - family: Smith
1775      given: Alex
1776"#
1777            .to_string(),
1778        );
1779
1780        let request = FormatDocumentRequest {
1781            style: StyleInput::Yaml("dummy".to_string()),
1782            style_overrides: None,
1783            locale: None,
1784            output_format: OutputFormatKind::Plain,
1785            refs: combined_refs,
1786            citations: vec![citation_occ],
1787            bibliography_blocks: Vec::new(),
1788            document_options: None,
1789            nocite: vec!["smith2020".to_string()],
1790        };
1791
1792        let result = format_document_with_style(style, request).expect("should render");
1793
1794        assert_eq!(result.formatted_citations.len(), 1, "one in-text citation");
1795        assert_eq!(
1796            result.bibliography.entries.len(),
1797            2,
1798            "both cited and nocite refs must appear in the bibliography"
1799        );
1800        let ids: Vec<&str> = result
1801            .bibliography
1802            .entries
1803            .iter()
1804            .map(|e| e.id.as_str())
1805            .collect();
1806        assert!(
1807            ids.contains(&"duo2024"),
1808            "cited ref must be in bibliography: {ids:?}"
1809        );
1810        assert!(
1811            ids.contains(&"smith2020"),
1812            "nocite ref must be in bibliography: {ids:?}"
1813        );
1814    }
1815}