Skip to main content

citum_engine/processor/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! The Citum processor for rendering citations and bibliographies.
7//!
8//! ## Architecture
9//!
10//! `Processor` is intentionally a thin facade over a small set of focused
11//! implementation modules:
12//! - `setup`: construction, configuration resolution, and numbering setup
13//! - `note_context`: note-number normalization and citation position inference
14//! - `citation`: citation rendering orchestration
15//! - `bibliography`: bibliography rendering, grouping, and document-facing helpers
16//!
17//! The processor remains intentionally "dumb": it applies the style as written
18//! without implicit logic. Style-specific behavior (for example, suppressing a
19//! publisher for journals) should be expressed in the style YAML via
20//! `overrides`, not hardcoded here.
21//!
22//! ## CSL 1.0 Compatibility
23//!
24//! The processor implements the CSL 1.0 "variable-once" rule:
25//! > "Substituted variables are suppressed in the rest of the output to
26//! > prevent duplication."
27//!
28//! This is tracked by `TemplateComponentTracker` during template rendering.
29//! Suppressed components do not claim variables; see
30//! `docs/specs/TEMPLATE_RENDERING_SEMANTICS.md`.
31
32mod bibliography;
33mod citation;
34mod note_context;
35mod setup;
36
37/// Author/date disambiguation and year-suffix assignment.
38pub mod disambiguation;
39pub mod document;
40pub mod labels;
41/// Matching helpers for substitution and repeated-contributor detection.
42pub mod matching;
43/// Template rendering orchestration and per-component state handling.
44pub mod rendering;
45/// Citation and bibliography sorting helpers.
46pub mod sorting;
47
48#[cfg(test)]
49#[allow(
50    clippy::unwrap_used,
51    clippy::expect_used,
52    clippy::panic,
53    clippy::indexing_slicing,
54    clippy::todo,
55    clippy::unimplemented,
56    clippy::unreachable,
57    clippy::get_unwrap,
58    reason = "Panicking is acceptable and often desired in tests."
59)]
60mod tests;
61
62use crate::reference::Bibliography;
63use crate::render::ProcEntry;
64use crate::values::ProcHints;
65use citum_schema::Style;
66use citum_schema::locale::Locale;
67use citum_schema::options::Config;
68use indexmap::IndexMap;
69use std::cell::RefCell;
70use std::collections::{HashMap, HashSet};
71
72/// The Citum processor facade.
73///
74/// Takes a style, bibliography, and locale context, then delegates citation
75/// and bibliography work to the processor submodules.
76#[derive(Debug)]
77pub struct Processor {
78    /// The style definition.
79    pub style: Style,
80    /// The bibliography (references keyed by ID).
81    pub bibliography: Bibliography,
82    /// The locale for terms and formatting.
83    pub locale: Locale,
84    /// Default configuration.
85    pub default_config: Config,
86    /// Pre-calculated processing hints.
87    pub hints: HashMap<String, ProcHints>,
88    /// Citation numbers assigned to references (for numeric styles).
89    pub citation_numbers: RefCell<HashMap<String, usize>>,
90    /// IDs of items that were cited in a visible way.
91    pub cited_ids: RefCell<HashSet<String>>,
92    /// Compound sets keyed by set ID.
93    pub compound_sets: IndexMap<String, Vec<String>>,
94    /// Reverse lookup for set membership by reference ID.
95    pub compound_set_by_ref: HashMap<String, String>,
96    /// Position within a set (0-based) for each reference ID.
97    pub compound_member_index: HashMap<String, usize>,
98    /// Compound numeric groups: citation number → ordered ref IDs in the group.
99    pub compound_groups: RefCell<IndexMap<usize, Vec<String>>>,
100    /// Dynamic equivalent of `compound_set_by_ref` for cite-time groups.
101    ///
102    /// Maps each dynamic group member (head and tails) to the head's ref ID,
103    /// which acts as the set identifier. Merged with static data at render time.
104    pub dynamic_compound_set_by_ref: RefCell<HashMap<String, String>>,
105    /// Dynamic equivalent of `compound_member_index` for cite-time groups.
106    ///
107    /// Maps each dynamic group member to its 0-based position within the group.
108    /// Merged with static data at render time.
109    pub dynamic_compound_member_index: RefCell<HashMap<String, usize>>,
110    /// Dynamic equivalent of `compound_sets` for cite-time groups.
111    ///
112    /// Maps each dynamic group's head ref ID to the ordered list of all members.
113    /// Merged with static `compound_sets` at render time so sub-label lookup works.
114    pub dynamic_compound_sets: RefCell<IndexMap<String, Vec<String>>>,
115    /// Whether to output semantic markup (HTML spans, Djot attributes).
116    /// Defaults to true; set to false to suppress class attributes (e.g. `--no-semantics`).
117    pub show_semantics: bool,
118    /// Whether to annotate semantic HTML wrappers with source template indices.
119    pub inject_ast_indices: bool,
120    /// Document-level abbreviation map for post-render substitution.
121    pub abbreviation_map: Option<crate::api::AbbreviationMap>,
122    /// First note number in which each reference was cited (note styles only).
123    /// Populated during `normalize_note_context`; keyed by reference ID.
124    pub first_note_by_id: RefCell<HashMap<String, u32>>,
125}
126
127/// Processed output containing citations and bibliography.
128#[derive(Debug, Default)]
129pub struct ProcessedReferences {
130    /// Rendered bibliography entries with metadata.
131    pub bibliography: Vec<ProcEntry>,
132    /// Rendered citations as formatted strings.
133    ///
134    /// None if no citations were processed; Some(vec) otherwise.
135    pub citations: Option<Vec<String>>,
136}
137
138/// Validate optional compound sets against the loaded bibliography.
139///
140/// Validation rules:
141/// - Every member ID must exist in `bibliography`.
142/// - A member ID must not appear more than once in a single set.
143/// - A member ID must not appear across multiple sets.
144///
145/// # Errors
146///
147/// Returns an error when a compound set references an unknown ID or reuses the
148/// same member within or across sets.
149pub fn validate_compound_sets(
150    sets: Option<IndexMap<String, Vec<String>>>,
151    bibliography: &Bibliography,
152) -> Result<Option<IndexMap<String, Vec<String>>>, crate::error::ProcessorError> {
153    let Some(sets) = sets else {
154        return Ok(None);
155    };
156
157    let mut member_owner: HashMap<String, String> = HashMap::new();
158    for (set_id, members) in &sets {
159        let mut seen_in_set: std::collections::HashSet<String> = std::collections::HashSet::new();
160        for member in members {
161            if !seen_in_set.insert(member.clone()) {
162                return Err(crate::error::ProcessorError::ParseError(
163                    "BIBLIOGRAPHY".to_string(),
164                    format!(
165                        "reference '{member}' appears more than once in compound set '{set_id}'"
166                    ),
167                ));
168            }
169            if !bibliography.contains_key(member) {
170                return Err(crate::error::ProcessorError::ParseError(
171                    "BIBLIOGRAPHY".to_string(),
172                    format!("compound set '{set_id}' references unknown id '{member}'"),
173                ));
174            }
175            if let Some(existing) = member_owner.insert(member.clone(), set_id.clone()) {
176                return Err(crate::error::ProcessorError::ParseError(
177                    "BIBLIOGRAPHY".to_string(),
178                    format!(
179                        "reference '{member}' appears in both compound sets '{existing}' and '{set_id}'"
180                    ),
181                ));
182            }
183        }
184    }
185
186    Ok(Some(sets))
187}