Skip to main content

citum_engine/processor/
setup.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Processor construction and configuration helpers.
7//!
8//! This module owns setup-time concerns for [`Processor`]: constructor paths,
9//! locale/config resolution, compound-set validation, and numeric citation
10//! number initialization. It intentionally does not contain citation or
11//! bibliography rendering logic.
12
13use super::Processor;
14use super::disambiguation::Disambiguator;
15use super::sorting::Sorter;
16use crate::error::ProcessorError;
17use crate::reference::{Bibliography, CitationItem, Reference};
18use crate::values::ProcHints;
19use citum_schema::Style;
20use citum_schema::locale::Locale;
21use citum_schema::options::{Config, bibliography::BibliographyConfig};
22use indexmap::IndexMap;
23use std::cell::RefCell;
24use std::collections::{HashMap, HashSet};
25
26impl Default for Processor {
27    fn default() -> Self {
28        let compound_sets = IndexMap::new();
29        let (compound_set_by_ref, compound_member_index) =
30            Self::build_compound_set_indexes(&compound_sets);
31        Self {
32            style: Style::default(),
33            bibliography: Bibliography::default(),
34            locale: Locale::en_us(),
35            default_config: Config::default(),
36            hints: HashMap::new(),
37            citation_numbers: RefCell::new(HashMap::new()),
38            cited_ids: RefCell::new(HashSet::new()),
39            compound_sets,
40            compound_set_by_ref,
41            compound_member_index,
42            compound_groups: RefCell::new(IndexMap::new()),
43            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
44            dynamic_compound_member_index: RefCell::new(HashMap::new()),
45            dynamic_compound_sets: RefCell::new(IndexMap::new()),
46            show_semantics: true,
47            inject_ast_indices: false,
48            abbreviation_map: None,
49            first_note_by_id: RefCell::new(HashMap::new()),
50        }
51    }
52}
53
54impl Processor {
55    /// Core internal constructor path.
56    ///
57    /// Resolves the style presets before initializing the processor.
58    fn build_processor(
59        style: Style,
60        bibliography: Bibliography,
61        locale: Locale,
62        compound_sets: IndexMap<String, Vec<String>>,
63    ) -> Self {
64        let style = style.into_resolved();
65        Self::build_processor_pre_resolved(style, bibliography, locale, compound_sets)
66    }
67
68    /// Build a processor from an already-resolved style, skipping preset resolution.
69    ///
70    /// Use this when the style was cloned from a processor that has already
71    /// called `into_resolved()`, to avoid a second resolution pass that would
72    /// re-apply preset defaults and overwrite null-cleared fields.
73    pub(super) fn build_processor_pre_resolved(
74        style: Style,
75        bibliography: Bibliography,
76        locale: Locale,
77        compound_sets: IndexMap<String, Vec<String>>,
78    ) -> Self {
79        let (compound_set_by_ref, compound_member_index) =
80            Self::build_compound_set_indexes(&compound_sets);
81        let mut processor = Processor {
82            style,
83            bibliography,
84            locale,
85            default_config: Config::default(),
86            hints: HashMap::new(),
87            citation_numbers: RefCell::new(HashMap::new()),
88            cited_ids: RefCell::new(HashSet::new()),
89            compound_sets,
90            compound_set_by_ref,
91            compound_member_index,
92            compound_groups: RefCell::new(IndexMap::new()),
93            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
94            dynamic_compound_member_index: RefCell::new(HashMap::new()),
95            dynamic_compound_sets: RefCell::new(IndexMap::new()),
96            show_semantics: true,
97            inject_ast_indices: false,
98            abbreviation_map: None,
99            first_note_by_id: RefCell::new(HashMap::new()),
100        };
101
102        // Pre-calculate hints for disambiguation.
103        processor.hints = processor.calculate_hints();
104        processor
105    }
106
107    /// Validate compound sets against the bibliography.
108    fn try_validate_compound_sets(
109        bibliography: &Bibliography,
110        compound_sets: IndexMap<String, Vec<String>>,
111    ) -> Result<IndexMap<String, Vec<String>>, ProcessorError> {
112        super::validate_compound_sets(Some(compound_sets), bibliography)
113            .map(Option::unwrap_or_default)
114    }
115
116    /// Validate compound sets, falling back to an empty map on error.
117    fn validate_compound_sets_or_default(
118        bibliography: &Bibliography,
119        compound_sets: IndexMap<String, Vec<String>>,
120    ) -> IndexMap<String, Vec<String>> {
121        Self::try_validate_compound_sets(bibliography, compound_sets).unwrap_or_default()
122    }
123
124    /// Build flat reverse-lookup maps for compound sets.
125    ///
126    /// Maps reference IDs to their parent set ID and their 0-based position
127    /// within that set.
128    fn build_compound_set_indexes(
129        sets: &IndexMap<String, Vec<String>>,
130    ) -> (HashMap<String, String>, HashMap<String, usize>) {
131        let mut by_ref = HashMap::new();
132        let mut member_index = HashMap::new();
133        for (set_id, members) in sets {
134            for (idx, member) in members.iter().enumerate() {
135                by_ref.insert(member.clone(), set_id.clone());
136                member_index.insert(member.clone(), idx);
137            }
138        }
139        (by_ref, member_index)
140    }
141
142    /// Check whether the style uses note-based citations (footnotes/endnotes).
143    pub(crate) fn is_note_style(&self) -> bool {
144        self.get_config()
145            .processing
146            .as_ref()
147            .is_some_and(|processing| matches!(processing, citum_schema::options::Processing::Note))
148    }
149
150    /// Check whether the style uses numeric citation rendering.
151    fn is_numeric_style(&self) -> bool {
152        self.get_config()
153            .processing
154            .as_ref()
155            .is_some_and(|processing| {
156                matches!(processing, citum_schema::options::Processing::Numeric)
157            })
158    }
159
160    /// Check whether the style uses numeric bibliography rendering.
161    fn is_numeric_bibliography_style(&self) -> bool {
162        self.get_bibliography_config()
163            .processing
164            .as_ref()
165            .is_some_and(|processing| {
166                matches!(processing, citum_schema::options::Processing::Numeric)
167            })
168    }
169
170    /// Resolve the effective bibliography sort specification.
171    ///
172    /// Accounts for style overrides and preset defaults.
173    fn resolved_bibliography_sort(&self) -> Option<citum_schema::grouping::GroupSort> {
174        if let Some(sort_spec) = self
175            .style
176            .bibliography
177            .as_ref()
178            .and_then(|bibliography| bibliography.sort.as_ref())
179        {
180            return Some(sort_spec.resolve());
181        }
182
183        self.get_bibliography_config()
184            .processing
185            .as_ref()
186            .and_then(citum_schema::options::Processing::default_bibliography_sort)
187            .map(|preset| preset.group_sort())
188    }
189
190    /// Initialize numeric citation numbers from bibliography insertion order.
191    ///
192    /// citeproc-js registers all bibliography items before citation rendering in
193    /// the oracle workflow, so numeric labels are stable by reference registry
194    /// order rather than first-citation order.
195    ///
196    /// When the style declares an explicit bibliography sort, or the
197    /// processing family provides a bibliography default, citation numbers
198    /// must follow that resolved bibliography order.
199    pub(crate) fn initialize_numeric_citation_numbers(&self) {
200        if !self.is_numeric_style() {
201            return;
202        }
203
204        self.initialize_numeric_numbers(self.sort_citation_number_order());
205    }
206
207    /// Initialize numeric bibliography numbers from resolved bibliography order.
208    pub(crate) fn initialize_numeric_bibliography_numbers(&self) {
209        if !self.is_numeric_bibliography_style() {
210            return;
211        }
212
213        self.initialize_numeric_numbers(self.sort_bibliography_number_order());
214    }
215
216    /// Initialize citation numbers if the map is currently empty.
217    fn initialize_numeric_numbers(&self, ordered_ids: Vec<String>) {
218        if !self.citation_numbers.borrow().is_empty() {
219            return;
220        }
221
222        self.initialize_numeric_citation_numbers_from_ordered_ids(ordered_ids);
223    }
224
225    /// Calculate the document-wide reference order for citation numbering.
226    fn sort_citation_number_order(&self) -> Vec<String> {
227        self.sort_references(self.bibliography.values().collect())
228            .into_iter()
229            .filter_map(citum_schema::reference::InputReference::id)
230            .map(String::from)
231            .collect()
232    }
233
234    /// Calculate the reference order for bibliography numbering.
235    fn sort_bibliography_number_order(&self) -> Vec<String> {
236        self.sort_references(self.bibliography.values().collect())
237            .into_iter()
238            .filter_map(citum_schema::reference::InputReference::id)
239            .map(String::from)
240            .collect()
241    }
242
243    /// Assign stable numeric labels to references based on a document order.
244    ///
245    /// Also populates compound groups for numeric styles that enable compound
246    /// numbering.
247    fn initialize_numeric_citation_numbers_from_ordered_ids(&self, ordered_ids: Vec<String>) {
248        let mut numbers = self.citation_numbers.borrow_mut();
249        if !numbers.is_empty() {
250            return;
251        }
252
253        let compound_config = self.get_bibliography_options().compound_numeric.clone();
254
255        if compound_config.is_some() {
256            let mut set_first_seen: IndexMap<String, usize> = IndexMap::new();
257            let mut current_number = 1usize;
258            let mut compound_groups = self.compound_groups.borrow_mut();
259            compound_groups.clear();
260
261            for ref_id in &ordered_ids {
262                if let Some(set_id) = self.compound_set_by_ref.get(ref_id) {
263                    if let Some(&number) = set_first_seen.get(set_id) {
264                        numbers.insert(ref_id.clone(), number);
265                    } else {
266                        set_first_seen.insert(set_id.clone(), current_number);
267                        if let Some(members) = self.compound_sets.get(set_id) {
268                            let present_members: Vec<String> = members
269                                .iter()
270                                .filter(|id| self.bibliography.contains_key(*id))
271                                .cloned()
272                                .collect();
273                            for member in &present_members {
274                                numbers.insert(member.clone(), current_number);
275                            }
276                            if present_members.len() > 1 {
277                                compound_groups.insert(current_number, present_members);
278                            }
279                        } else {
280                            numbers.insert(ref_id.clone(), current_number);
281                        }
282                        current_number += 1;
283                    }
284                } else if !numbers.contains_key(ref_id) {
285                    numbers.insert(ref_id.clone(), current_number);
286                    current_number += 1;
287                }
288            }
289        } else {
290            for (index, ref_id) in ordered_ids.into_iter().enumerate() {
291                numbers.insert(ref_id, index + 1);
292            }
293        }
294    }
295
296    /// Create a new processor with default English locale (en-US).
297    #[must_use]
298    pub fn new(style: Style, bibliography: Bibliography) -> Self {
299        Self::with_compound_sets(style, bibliography, IndexMap::new())
300    }
301
302    /// Create a new processor with explicit compound sets, returning an error for invalid sets.
303    ///
304    /// # Errors
305    ///
306    /// Returns an error when any compound set references unknown bibliography
307    /// entries or reuses the same member more than once.
308    pub fn try_with_compound_sets(
309        style: Style,
310        bibliography: Bibliography,
311        compound_sets: IndexMap<String, Vec<String>>,
312    ) -> Result<Self, ProcessorError> {
313        Self::try_with_locale_and_compound_sets(style, bibliography, Locale::en_us(), compound_sets)
314    }
315
316    /// Create a new processor with explicit compound sets.
317    ///
318    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
319    /// and falls back to a processor without compound sets.
320    #[must_use]
321    pub fn with_compound_sets(
322        style: Style,
323        bibliography: Bibliography,
324        compound_sets: IndexMap<String, Vec<String>>,
325    ) -> Self {
326        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
327        Self::build_processor(style, bibliography, Locale::en_us(), validated_sets)
328    }
329
330    /// Create a new processor with a specified locale.
331    ///
332    /// The locale determines term translations and locale-specific formatting behavior.
333    #[must_use]
334    pub fn with_locale(style: Style, bibliography: Bibliography, locale: Locale) -> Self {
335        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
336    }
337
338    /// Create a new processor with explicit locale and compound sets, returning
339    /// an error for invalid sets.
340    ///
341    /// # Errors
342    ///
343    /// Returns an error when any compound set references unknown bibliography
344    /// entries or reuses the same member more than once.
345    pub fn try_with_locale_and_compound_sets(
346        style: Style,
347        bibliography: Bibliography,
348        locale: Locale,
349        compound_sets: IndexMap<String, Vec<String>>,
350    ) -> Result<Self, ProcessorError> {
351        let validated_sets = Self::try_validate_compound_sets(&bibliography, compound_sets)?;
352        Ok(Self::build_processor(
353            style,
354            bibliography,
355            locale,
356            validated_sets,
357        ))
358    }
359
360    /// Create a new processor with a specified locale and explicit compound sets.
361    ///
362    /// The locale determines term translations and locale-specific formatting behavior.
363    ///
364    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
365    /// and falls back to a processor without compound sets.
366    #[must_use]
367    pub fn with_locale_and_compound_sets(
368        style: Style,
369        bibliography: Bibliography,
370        locale: Locale,
371        compound_sets: IndexMap<String, Vec<String>>,
372    ) -> Self {
373        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
374        Self::build_processor(style, bibliography, locale, validated_sets)
375    }
376
377    /// Create a new processor, loading the locale from disk.
378    ///
379    /// Loads the locale specified in the style's `default_locale` field from the given directory,
380    /// falling back to en-US if not found or not specified.
381    #[must_use]
382    pub fn with_style_locale(
383        style: Style,
384        bibliography: Bibliography,
385        locales_dir: &std::path::Path,
386    ) -> Self {
387        let style = style.into_resolved();
388        let locale = if let Some(ref locale_id) = style.info.default_locale {
389            Locale::load(locale_id, locales_dir)
390        } else {
391            Locale::en_us()
392        };
393        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
394    }
395
396    /// Return a copy of the processor that injects source template indices into semantic HTML.
397    #[must_use]
398    pub fn with_inject_ast_indices(mut self, inject_ast_indices: bool) -> Self {
399        self.inject_ast_indices = inject_ast_indices;
400        self
401    }
402
403    /// Enable or disable source template index injection for semantic HTML output.
404    pub fn set_inject_ast_indices(&mut self, inject_ast_indices: bool) {
405        self.inject_ast_indices = inject_ast_indices;
406    }
407
408    /// Return the global style configuration.
409    pub fn get_config(&self) -> &Config {
410        self.style.options.as_ref().unwrap_or(&self.default_config)
411    }
412
413    /// Return merged config for citation rendering.
414    ///
415    /// Combines global style options with citation-specific overrides.
416    pub fn get_citation_config(&self) -> std::borrow::Cow<'_, Config> {
417        let base = self.get_config();
418        match self
419            .style
420            .citation
421            .as_ref()
422            .and_then(|citation| citation.options.as_ref())
423        {
424            Some(citation_options) => std::borrow::Cow::Owned(citation_options.merged_with(base)),
425            None => std::borrow::Cow::Borrowed(base),
426        }
427    }
428
429    /// Return merged shared config for bibliography rendering.
430    ///
431    /// Combines global shared style options with bibliography-local shared overrides.
432    pub fn get_bibliography_config(&self) -> std::borrow::Cow<'_, Config> {
433        let base = self.get_config();
434        match self
435            .style
436            .bibliography
437            .as_ref()
438            .and_then(|bibliography| bibliography.options.as_ref())
439        {
440            Some(bibliography_options) => {
441                std::borrow::Cow::Owned(bibliography_options.merged_with(base))
442            }
443            None => std::borrow::Cow::Borrowed(base),
444        }
445    }
446
447    /// Return effective bibliography-only configuration.
448    pub fn get_bibliography_options(&self) -> std::borrow::Cow<'_, BibliographyConfig> {
449        match self
450            .style
451            .bibliography
452            .as_ref()
453            .and_then(|bibliography| bibliography.options.as_ref())
454        {
455            Some(bibliography_options) => {
456                std::borrow::Cow::Owned(bibliography_options.to_bibliography_config())
457            }
458            None => std::borrow::Cow::Owned(BibliographyConfig::default()),
459        }
460    }
461
462    /// Sort references according to the style's bibliography sort specification.
463    ///
464    /// Uses style-specified sort keys (author, title, issued, etc.) and sort order.
465    pub fn sort_references<'a>(&self, references: Vec<&'a Reference>) -> Vec<&'a Reference> {
466        let mut sorted_refs = if let Some(sort_spec) = self.resolved_bibliography_sort() {
467            let sorter = crate::grouping::GroupSorter::new(&self.locale);
468            sorter.sort_references(references, &sort_spec)
469        } else {
470            let bibliography_config = self.get_bibliography_config();
471            let sorter = Sorter::new(&bibliography_config, &self.locale);
472            sorter.sort_references(references)
473        };
474
475        let bibliography_options = self.get_bibliography_options();
476        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
477            && crate::sort_partitioning::should_sort_flat(partitioning)
478        {
479            crate::sort_partitioning::sort_by_partition(
480                sorted_refs.as_mut_slice(),
481                &self.locale,
482                partitioning,
483            );
484        }
485
486        sorted_refs
487    }
488
489    /// Sort citation items according to the style's citation sort specification.
490    pub fn sort_citation_items(
491        &self,
492        items: Vec<CitationItem>,
493        spec: &citum_schema::CitationSpec,
494    ) -> Vec<CitationItem> {
495        if let Some(sort_spec) = &spec.sort {
496            let mut items_with_refs: Vec<(CitationItem, &Reference)> = items
497                .into_iter()
498                .filter_map(|item| {
499                    self.bibliography
500                        .get(&item.id)
501                        .map(|reference| (item, reference))
502                })
503                .collect();
504
505            let resolved_sort = sort_spec.resolve();
506            let sorter = crate::grouping::GroupSorter::new(&self.locale);
507            items_with_refs.sort_by(|left, right| {
508                for sort_key in &resolved_sort.template {
509                    let cmp = sorter.compare_by_key(left.1, right.1, sort_key);
510                    if cmp != std::cmp::Ordering::Equal {
511                        return cmp;
512                    }
513                }
514                std::cmp::Ordering::Equal
515            });
516
517            return items_with_refs
518                .into_iter()
519                .map(|(item, _reference)| item)
520                .collect();
521        }
522
523        items
524    }
525
526    /// Calculate disambiguation hints needed for the style.
527    ///
528    /// Analyzes the bibliography to determine which items need disambiguation
529    /// (year suffixes, etc.) and calculates hints for efficient rendering.
530    pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
531        let citation_config = self.get_citation_config();
532        let config = citation_config.as_ref();
533        let bibliography_sort = self.resolved_bibliography_sort();
534
535        let disambiguator = if let Some(resolved_sort) = &bibliography_sort {
536            Disambiguator::with_group_sort(&self.bibliography, config, &self.locale, resolved_sort)
537        } else {
538            Disambiguator::new(&self.bibliography, config, &self.locale)
539        };
540
541        disambiguator.calculate_hints()
542    }
543}