citum_engine/processor/mod.rs
1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! The Citum processor for rendering citations and bibliographies.
7//!
8//! ## Architecture
9//!
10//! `Processor` is intentionally a thin facade over a small set of focused
11//! implementation modules:
12//! - `setup`: construction, configuration resolution, and numbering setup
13//! - `note_context`: note-number normalization and citation position inference
14//! - `citation`: citation rendering orchestration
15//! - `bibliography`: bibliography rendering, grouping, and document-facing helpers
16//!
17//! The processor remains intentionally "dumb": it applies the style as written
18//! without implicit logic. Style-specific behavior (for example, suppressing a
19//! publisher for journals) should be expressed in the style YAML via
20//! `overrides`, not hardcoded here.
21//!
22//! ## CSL 1.0 Compatibility
23//!
24//! The processor implements the CSL 1.0 "variable-once" rule:
25//! > "Substituted variables are suppressed in the rest of the output to
26//! > prevent duplication."
27//!
28//! This is tracked via `rendered_vars` in `process_template()`.
29
30mod bibliography;
31mod citation;
32mod note_context;
33mod setup;
34
35/// Author/date disambiguation and year-suffix assignment.
36pub mod disambiguation;
37pub mod document;
38pub mod labels;
39/// Matching helpers for substitution and repeated-contributor detection.
40pub mod matching;
41/// Template rendering orchestration and per-component state handling.
42pub mod rendering;
43/// Citation and bibliography sorting helpers.
44pub mod sorting;
45
46#[cfg(test)]
47#[allow(
48 clippy::unwrap_used,
49 clippy::expect_used,
50 clippy::panic,
51 clippy::indexing_slicing,
52 clippy::todo,
53 clippy::unimplemented,
54 clippy::unreachable,
55 clippy::get_unwrap,
56 reason = "Panicking is acceptable and often desired in tests."
57)]
58mod tests;
59
60use crate::reference::Bibliography;
61use crate::render::ProcEntry;
62use crate::values::ProcHints;
63use citum_schema::Style;
64use citum_schema::locale::Locale;
65use citum_schema::options::Config;
66use indexmap::IndexMap;
67use std::cell::RefCell;
68use std::collections::{HashMap, HashSet};
69
70/// The Citum processor facade.
71///
72/// Takes a style, bibliography, and locale context, then delegates citation
73/// and bibliography work to the processor submodules.
74#[derive(Debug)]
75pub struct Processor {
76 /// The style definition.
77 pub style: Style,
78 /// The bibliography (references keyed by ID).
79 pub bibliography: Bibliography,
80 /// The locale for terms and formatting.
81 pub locale: Locale,
82 /// Default configuration.
83 pub default_config: Config,
84 /// Pre-calculated processing hints.
85 pub hints: HashMap<String, ProcHints>,
86 /// Citation numbers assigned to references (for numeric styles).
87 pub citation_numbers: RefCell<HashMap<String, usize>>,
88 /// IDs of items that were cited in a visible way.
89 pub cited_ids: RefCell<HashSet<String>>,
90 /// Compound sets keyed by set ID.
91 pub compound_sets: IndexMap<String, Vec<String>>,
92 /// Reverse lookup for set membership by reference ID.
93 pub compound_set_by_ref: HashMap<String, String>,
94 /// Position within a set (0-based) for each reference ID.
95 pub compound_member_index: HashMap<String, usize>,
96 /// Compound numeric groups: citation number → ordered ref IDs in the group.
97 pub compound_groups: RefCell<IndexMap<usize, Vec<String>>>,
98 /// Dynamic equivalent of `compound_set_by_ref` for cite-time groups.
99 ///
100 /// Maps each dynamic group member (head and tails) to the head's ref ID,
101 /// which acts as the set identifier. Merged with static data at render time.
102 pub dynamic_compound_set_by_ref: RefCell<HashMap<String, String>>,
103 /// Dynamic equivalent of `compound_member_index` for cite-time groups.
104 ///
105 /// Maps each dynamic group member to its 0-based position within the group.
106 /// Merged with static data at render time.
107 pub dynamic_compound_member_index: RefCell<HashMap<String, usize>>,
108 /// Dynamic equivalent of `compound_sets` for cite-time groups.
109 ///
110 /// Maps each dynamic group's head ref ID to the ordered list of all members.
111 /// Merged with static `compound_sets` at render time so sub-label lookup works.
112 pub dynamic_compound_sets: RefCell<IndexMap<String, Vec<String>>>,
113 /// Whether to output semantic markup (HTML spans, Djot attributes).
114 /// Defaults to true; set to false to suppress class attributes (e.g. `--no-semantics`).
115 pub show_semantics: bool,
116 /// Whether to annotate semantic HTML wrappers with source template indices.
117 pub inject_ast_indices: bool,
118 /// Document-level abbreviation map for post-render substitution.
119 pub abbreviation_map: Option<crate::api::AbbreviationMap>,
120 /// First note number in which each reference was cited (note styles only).
121 /// Populated during `normalize_note_context`; keyed by reference ID.
122 pub first_note_by_id: RefCell<HashMap<String, u32>>,
123}
124
125/// Processed output containing citations and bibliography.
126#[derive(Debug, Default)]
127pub struct ProcessedReferences {
128 /// Rendered bibliography entries with metadata.
129 pub bibliography: Vec<ProcEntry>,
130 /// Rendered citations as formatted strings.
131 ///
132 /// None if no citations were processed; Some(vec) otherwise.
133 pub citations: Option<Vec<String>>,
134}
135
136/// Validate optional compound sets against the loaded bibliography.
137///
138/// Validation rules:
139/// - Every member ID must exist in `bibliography`.
140/// - A member ID must not appear more than once in a single set.
141/// - A member ID must not appear across multiple sets.
142///
143/// # Errors
144///
145/// Returns an error when a compound set references an unknown ID or reuses the
146/// same member within or across sets.
147pub fn validate_compound_sets(
148 sets: Option<IndexMap<String, Vec<String>>>,
149 bibliography: &Bibliography,
150) -> Result<Option<IndexMap<String, Vec<String>>>, crate::error::ProcessorError> {
151 let Some(sets) = sets else {
152 return Ok(None);
153 };
154
155 let mut member_owner: HashMap<String, String> = HashMap::new();
156 for (set_id, members) in &sets {
157 let mut seen_in_set: std::collections::HashSet<String> = std::collections::HashSet::new();
158 for member in members {
159 if !seen_in_set.insert(member.clone()) {
160 return Err(crate::error::ProcessorError::ParseError(
161 "BIBLIOGRAPHY".to_string(),
162 format!(
163 "reference '{member}' appears more than once in compound set '{set_id}'"
164 ),
165 ));
166 }
167 if !bibliography.contains_key(member) {
168 return Err(crate::error::ProcessorError::ParseError(
169 "BIBLIOGRAPHY".to_string(),
170 format!("compound set '{set_id}' references unknown id '{member}'"),
171 ));
172 }
173 if let Some(existing) = member_owner.insert(member.clone(), set_id.clone()) {
174 return Err(crate::error::ProcessorError::ParseError(
175 "BIBLIOGRAPHY".to_string(),
176 format!(
177 "reference '{member}' appears in both compound sets '{existing}' and '{set_id}'"
178 ),
179 ));
180 }
181 }
182 }
183
184 Ok(Some(sets))
185}