Skip to main content

citum_engine/processor/
setup.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Processor construction and configuration helpers.
7//!
8//! This module owns setup-time concerns for [`Processor`]: constructor paths,
9//! locale/config resolution, compound-set validation, and numeric citation
10//! number initialization. It intentionally does not contain citation or
11//! bibliography rendering logic.
12
13use super::Processor;
14use super::disambiguation::Disambiguator;
15use super::sorting::Sorter;
16use crate::error::ProcessorError;
17use crate::reference::{Bibliography, CitationItem, Reference};
18use crate::values::ProcHints;
19use citum_schema::Style;
20use citum_schema::locale::Locale;
21use citum_schema::options::{Config, bibliography::BibliographyConfig};
22use indexmap::IndexMap;
23use std::cell::RefCell;
24use std::collections::{HashMap, HashSet};
25
26impl Default for Processor {
27    fn default() -> Self {
28        let compound_sets = IndexMap::new();
29        let (compound_set_by_ref, compound_member_index) =
30            Self::build_compound_set_indexes(&compound_sets);
31        Self {
32            style: Style::default(),
33            bibliography: Bibliography::default(),
34            locale: Locale::en_us(),
35            default_config: Config::default(),
36            hints: HashMap::new(),
37            citation_numbers: RefCell::new(HashMap::new()),
38            cited_ids: RefCell::new(HashSet::new()),
39            compound_sets,
40            compound_set_by_ref,
41            compound_member_index,
42            compound_groups: RefCell::new(IndexMap::new()),
43            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
44            dynamic_compound_member_index: RefCell::new(HashMap::new()),
45            dynamic_compound_sets: RefCell::new(IndexMap::new()),
46            show_semantics: true,
47            inject_ast_indices: false,
48            abbreviation_map: None,
49        }
50    }
51}
52
53impl Processor {
54    /// Core internal constructor path.
55    ///
56    /// Resolves the style presets before initializing the processor.
57    fn build_processor(
58        style: Style,
59        bibliography: Bibliography,
60        locale: Locale,
61        compound_sets: IndexMap<String, Vec<String>>,
62    ) -> Self {
63        let style = style.into_resolved();
64        Self::build_processor_pre_resolved(style, bibliography, locale, compound_sets)
65    }
66
67    /// Build a processor from an already-resolved style, skipping preset resolution.
68    ///
69    /// Use this when the style was cloned from a processor that has already
70    /// called `into_resolved()`, to avoid a second resolution pass that would
71    /// re-apply preset defaults and overwrite null-cleared fields.
72    pub(super) fn build_processor_pre_resolved(
73        style: Style,
74        bibliography: Bibliography,
75        locale: Locale,
76        compound_sets: IndexMap<String, Vec<String>>,
77    ) -> Self {
78        let (compound_set_by_ref, compound_member_index) =
79            Self::build_compound_set_indexes(&compound_sets);
80        let mut processor = Processor {
81            style,
82            bibliography,
83            locale,
84            default_config: Config::default(),
85            hints: HashMap::new(),
86            citation_numbers: RefCell::new(HashMap::new()),
87            cited_ids: RefCell::new(HashSet::new()),
88            compound_sets,
89            compound_set_by_ref,
90            compound_member_index,
91            compound_groups: RefCell::new(IndexMap::new()),
92            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
93            dynamic_compound_member_index: RefCell::new(HashMap::new()),
94            dynamic_compound_sets: RefCell::new(IndexMap::new()),
95            show_semantics: true,
96            inject_ast_indices: false,
97            abbreviation_map: None,
98        };
99
100        // Pre-calculate hints for disambiguation.
101        processor.hints = processor.calculate_hints();
102        processor
103    }
104
105    /// Validate compound sets against the bibliography.
106    fn try_validate_compound_sets(
107        bibliography: &Bibliography,
108        compound_sets: IndexMap<String, Vec<String>>,
109    ) -> Result<IndexMap<String, Vec<String>>, ProcessorError> {
110        super::validate_compound_sets(Some(compound_sets), bibliography)
111            .map(Option::unwrap_or_default)
112    }
113
114    /// Validate compound sets, falling back to an empty map on error.
115    fn validate_compound_sets_or_default(
116        bibliography: &Bibliography,
117        compound_sets: IndexMap<String, Vec<String>>,
118    ) -> IndexMap<String, Vec<String>> {
119        Self::try_validate_compound_sets(bibliography, compound_sets).unwrap_or_default()
120    }
121
122    /// Build flat reverse-lookup maps for compound sets.
123    ///
124    /// Maps reference IDs to their parent set ID and their 0-based position
125    /// within that set.
126    fn build_compound_set_indexes(
127        sets: &IndexMap<String, Vec<String>>,
128    ) -> (HashMap<String, String>, HashMap<String, usize>) {
129        let mut by_ref = HashMap::new();
130        let mut member_index = HashMap::new();
131        for (set_id, members) in sets {
132            for (idx, member) in members.iter().enumerate() {
133                by_ref.insert(member.clone(), set_id.clone());
134                member_index.insert(member.clone(), idx);
135            }
136        }
137        (by_ref, member_index)
138    }
139
140    /// Check whether the style uses note-based citations (footnotes/endnotes).
141    pub(crate) fn is_note_style(&self) -> bool {
142        self.get_config()
143            .processing
144            .as_ref()
145            .is_some_and(|processing| matches!(processing, citum_schema::options::Processing::Note))
146    }
147
148    /// Check whether the style uses numeric citation rendering.
149    fn is_numeric_style(&self) -> bool {
150        self.get_config()
151            .processing
152            .as_ref()
153            .is_some_and(|processing| {
154                matches!(processing, citum_schema::options::Processing::Numeric)
155            })
156    }
157
158    /// Check whether the style uses numeric bibliography rendering.
159    fn is_numeric_bibliography_style(&self) -> bool {
160        self.get_bibliography_config()
161            .processing
162            .as_ref()
163            .is_some_and(|processing| {
164                matches!(processing, citum_schema::options::Processing::Numeric)
165            })
166    }
167
168    /// Resolve the effective bibliography sort specification.
169    ///
170    /// Accounts for style overrides and preset defaults.
171    fn resolved_bibliography_sort(&self) -> Option<citum_schema::grouping::GroupSort> {
172        if let Some(sort_spec) = self
173            .style
174            .bibliography
175            .as_ref()
176            .and_then(|bibliography| bibliography.sort.as_ref())
177        {
178            return Some(sort_spec.resolve());
179        }
180
181        self.get_bibliography_config()
182            .processing
183            .as_ref()
184            .and_then(citum_schema::options::Processing::default_bibliography_sort)
185            .map(|preset| preset.group_sort())
186    }
187
188    /// Initialize numeric citation numbers from bibliography insertion order.
189    ///
190    /// citeproc-js registers all bibliography items before citation rendering in
191    /// the oracle workflow, so numeric labels are stable by reference registry
192    /// order rather than first-citation order.
193    ///
194    /// When the style declares an explicit bibliography sort, or the
195    /// processing family provides a bibliography default, citation numbers
196    /// must follow that resolved bibliography order.
197    pub(crate) fn initialize_numeric_citation_numbers(&self) {
198        if !self.is_numeric_style() {
199            return;
200        }
201
202        self.initialize_numeric_numbers(self.sort_citation_number_order());
203    }
204
205    /// Initialize numeric bibliography numbers from resolved bibliography order.
206    pub(crate) fn initialize_numeric_bibliography_numbers(&self) {
207        if !self.is_numeric_bibliography_style() {
208            return;
209        }
210
211        self.initialize_numeric_numbers(self.sort_bibliography_number_order());
212    }
213
214    /// Initialize citation numbers if the map is currently empty.
215    fn initialize_numeric_numbers(&self, ordered_ids: Vec<String>) {
216        if !self.citation_numbers.borrow().is_empty() {
217            return;
218        }
219
220        self.initialize_numeric_citation_numbers_from_ordered_ids(ordered_ids);
221    }
222
223    /// Calculate the document-wide reference order for citation numbering.
224    fn sort_citation_number_order(&self) -> Vec<String> {
225        self.sort_references(self.bibliography.values().collect())
226            .into_iter()
227            .filter_map(citum_schema::reference::InputReference::id)
228            .map(String::from)
229            .collect()
230    }
231
232    /// Calculate the reference order for bibliography numbering.
233    fn sort_bibliography_number_order(&self) -> Vec<String> {
234        self.sort_references(self.bibliography.values().collect())
235            .into_iter()
236            .filter_map(citum_schema::reference::InputReference::id)
237            .map(String::from)
238            .collect()
239    }
240
241    /// Assign stable numeric labels to references based on a document order.
242    ///
243    /// Also populates compound groups for numeric styles that enable compound
244    /// numbering.
245    fn initialize_numeric_citation_numbers_from_ordered_ids(&self, ordered_ids: Vec<String>) {
246        let mut numbers = self.citation_numbers.borrow_mut();
247        if !numbers.is_empty() {
248            return;
249        }
250
251        let compound_config = self.get_bibliography_options().compound_numeric.clone();
252
253        if compound_config.is_some() {
254            let mut set_first_seen: IndexMap<String, usize> = IndexMap::new();
255            let mut current_number = 1usize;
256            let mut compound_groups = self.compound_groups.borrow_mut();
257            compound_groups.clear();
258
259            for ref_id in &ordered_ids {
260                if let Some(set_id) = self.compound_set_by_ref.get(ref_id) {
261                    if let Some(&number) = set_first_seen.get(set_id) {
262                        numbers.insert(ref_id.clone(), number);
263                    } else {
264                        set_first_seen.insert(set_id.clone(), current_number);
265                        if let Some(members) = self.compound_sets.get(set_id) {
266                            let present_members: Vec<String> = members
267                                .iter()
268                                .filter(|id| self.bibliography.contains_key(*id))
269                                .cloned()
270                                .collect();
271                            for member in &present_members {
272                                numbers.insert(member.clone(), current_number);
273                            }
274                            if present_members.len() > 1 {
275                                compound_groups.insert(current_number, present_members);
276                            }
277                        } else {
278                            numbers.insert(ref_id.clone(), current_number);
279                        }
280                        current_number += 1;
281                    }
282                } else if !numbers.contains_key(ref_id) {
283                    numbers.insert(ref_id.clone(), current_number);
284                    current_number += 1;
285                }
286            }
287        } else {
288            for (index, ref_id) in ordered_ids.into_iter().enumerate() {
289                numbers.insert(ref_id, index + 1);
290            }
291        }
292    }
293
294    /// Create a new processor with default English locale (en-US).
295    #[must_use]
296    pub fn new(style: Style, bibliography: Bibliography) -> Self {
297        Self::with_compound_sets(style, bibliography, IndexMap::new())
298    }
299
300    /// Create a new processor with explicit compound sets, returning an error for invalid sets.
301    ///
302    /// # Errors
303    ///
304    /// Returns an error when any compound set references unknown bibliography
305    /// entries or reuses the same member more than once.
306    pub fn try_with_compound_sets(
307        style: Style,
308        bibliography: Bibliography,
309        compound_sets: IndexMap<String, Vec<String>>,
310    ) -> Result<Self, ProcessorError> {
311        Self::try_with_locale_and_compound_sets(style, bibliography, Locale::en_us(), compound_sets)
312    }
313
314    /// Create a new processor with explicit compound sets.
315    ///
316    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
317    /// and falls back to a processor without compound sets.
318    #[must_use]
319    pub fn with_compound_sets(
320        style: Style,
321        bibliography: Bibliography,
322        compound_sets: IndexMap<String, Vec<String>>,
323    ) -> Self {
324        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
325        Self::build_processor(style, bibliography, Locale::en_us(), validated_sets)
326    }
327
328    /// Create a new processor with a specified locale.
329    ///
330    /// The locale determines term translations and locale-specific formatting behavior.
331    #[must_use]
332    pub fn with_locale(style: Style, bibliography: Bibliography, locale: Locale) -> Self {
333        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
334    }
335
336    /// Create a new processor with explicit locale and compound sets, returning
337    /// an error for invalid sets.
338    ///
339    /// # Errors
340    ///
341    /// Returns an error when any compound set references unknown bibliography
342    /// entries or reuses the same member more than once.
343    pub fn try_with_locale_and_compound_sets(
344        style: Style,
345        bibliography: Bibliography,
346        locale: Locale,
347        compound_sets: IndexMap<String, Vec<String>>,
348    ) -> Result<Self, ProcessorError> {
349        let validated_sets = Self::try_validate_compound_sets(&bibliography, compound_sets)?;
350        Ok(Self::build_processor(
351            style,
352            bibliography,
353            locale,
354            validated_sets,
355        ))
356    }
357
358    /// Create a new processor with a specified locale and explicit compound sets.
359    ///
360    /// The locale determines term translations and locale-specific formatting behavior.
361    ///
362    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
363    /// and falls back to a processor without compound sets.
364    #[must_use]
365    pub fn with_locale_and_compound_sets(
366        style: Style,
367        bibliography: Bibliography,
368        locale: Locale,
369        compound_sets: IndexMap<String, Vec<String>>,
370    ) -> Self {
371        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
372        Self::build_processor(style, bibliography, locale, validated_sets)
373    }
374
375    /// Create a new processor, loading the locale from disk.
376    ///
377    /// Loads the locale specified in the style's `default_locale` field from the given directory,
378    /// falling back to en-US if not found or not specified.
379    #[must_use]
380    pub fn with_style_locale(
381        style: Style,
382        bibliography: Bibliography,
383        locales_dir: &std::path::Path,
384    ) -> Self {
385        let style = style.into_resolved();
386        let locale = if let Some(ref locale_id) = style.info.default_locale {
387            Locale::load(locale_id, locales_dir)
388        } else {
389            Locale::en_us()
390        };
391        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
392    }
393
394    /// Return a copy of the processor that injects source template indices into semantic HTML.
395    #[must_use]
396    pub fn with_inject_ast_indices(mut self, inject_ast_indices: bool) -> Self {
397        self.inject_ast_indices = inject_ast_indices;
398        self
399    }
400
401    /// Enable or disable source template index injection for semantic HTML output.
402    pub fn set_inject_ast_indices(&mut self, inject_ast_indices: bool) {
403        self.inject_ast_indices = inject_ast_indices;
404    }
405
406    /// Return the global style configuration.
407    pub fn get_config(&self) -> &Config {
408        self.style.options.as_ref().unwrap_or(&self.default_config)
409    }
410
411    /// Return merged config for citation rendering.
412    ///
413    /// Combines global style options with citation-specific overrides.
414    pub fn get_citation_config(&self) -> std::borrow::Cow<'_, Config> {
415        let base = self.get_config();
416        match self
417            .style
418            .citation
419            .as_ref()
420            .and_then(|citation| citation.options.as_ref())
421        {
422            Some(citation_options) => std::borrow::Cow::Owned(citation_options.merged_with(base)),
423            None => std::borrow::Cow::Borrowed(base),
424        }
425    }
426
427    /// Return merged shared config for bibliography rendering.
428    ///
429    /// Combines global shared style options with bibliography-local shared overrides.
430    pub fn get_bibliography_config(&self) -> std::borrow::Cow<'_, Config> {
431        let base = self.get_config();
432        match self
433            .style
434            .bibliography
435            .as_ref()
436            .and_then(|bibliography| bibliography.options.as_ref())
437        {
438            Some(bibliography_options) => {
439                std::borrow::Cow::Owned(bibliography_options.merged_with(base))
440            }
441            None => std::borrow::Cow::Borrowed(base),
442        }
443    }
444
445    /// Return effective bibliography-only configuration.
446    pub fn get_bibliography_options(&self) -> std::borrow::Cow<'_, BibliographyConfig> {
447        match self
448            .style
449            .bibliography
450            .as_ref()
451            .and_then(|bibliography| bibliography.options.as_ref())
452        {
453            Some(bibliography_options) => {
454                std::borrow::Cow::Owned(bibliography_options.to_bibliography_config())
455            }
456            None => std::borrow::Cow::Owned(BibliographyConfig::default()),
457        }
458    }
459
460    /// Sort references according to the style's bibliography sort specification.
461    ///
462    /// Uses style-specified sort keys (author, title, issued, etc.) and sort order.
463    pub fn sort_references<'a>(&self, references: Vec<&'a Reference>) -> Vec<&'a Reference> {
464        let mut sorted_refs = if let Some(sort_spec) = self.resolved_bibliography_sort() {
465            let sorter = crate::grouping::GroupSorter::new(&self.locale);
466            sorter.sort_references(references, &sort_spec)
467        } else {
468            let bibliography_config = self.get_bibliography_config();
469            let sorter = Sorter::new(&bibliography_config, &self.locale);
470            sorter.sort_references(references)
471        };
472
473        let bibliography_options = self.get_bibliography_options();
474        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
475            && crate::sort_partitioning::should_sort_flat(partitioning)
476        {
477            crate::sort_partitioning::sort_by_partition(
478                sorted_refs.as_mut_slice(),
479                &self.locale,
480                partitioning,
481            );
482        }
483
484        sorted_refs
485    }
486
487    /// Sort citation items according to the style's citation sort specification.
488    pub fn sort_citation_items(
489        &self,
490        items: Vec<CitationItem>,
491        spec: &citum_schema::CitationSpec,
492    ) -> Vec<CitationItem> {
493        if let Some(sort_spec) = &spec.sort {
494            let mut items_with_refs: Vec<(CitationItem, &Reference)> = items
495                .into_iter()
496                .filter_map(|item| {
497                    self.bibliography
498                        .get(&item.id)
499                        .map(|reference| (item, reference))
500                })
501                .collect();
502
503            let resolved_sort = sort_spec.resolve();
504            let sorter = crate::grouping::GroupSorter::new(&self.locale);
505            items_with_refs.sort_by(|left, right| {
506                for sort_key in &resolved_sort.template {
507                    let cmp = sorter.compare_by_key(left.1, right.1, sort_key);
508                    if cmp != std::cmp::Ordering::Equal {
509                        return cmp;
510                    }
511                }
512                std::cmp::Ordering::Equal
513            });
514
515            return items_with_refs
516                .into_iter()
517                .map(|(item, _reference)| item)
518                .collect();
519        }
520
521        items
522    }
523
524    /// Calculate disambiguation hints needed for the style.
525    ///
526    /// Analyzes the bibliography to determine which items need disambiguation
527    /// (year suffixes, etc.) and calculates hints for efficient rendering.
528    pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
529        let citation_config = self.get_citation_config();
530        let config = citation_config.as_ref();
531        let bibliography_sort = self.resolved_bibliography_sort();
532
533        let disambiguator = if let Some(resolved_sort) = &bibliography_sort {
534            Disambiguator::with_group_sort(&self.bibliography, config, &self.locale, resolved_sort)
535        } else {
536            Disambiguator::new(&self.bibliography, config, &self.locale)
537        };
538
539        disambiguator.calculate_hints()
540    }
541}