Skip to main content

citum_engine/processor/
setup.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Processor construction and configuration helpers.
7//!
8//! This module owns setup-time concerns for [`Processor`]: constructor paths,
9//! locale/config resolution, compound-set validation, and numeric citation
10//! number initialization. It intentionally does not contain citation or
11//! bibliography rendering logic.
12
13use super::Processor;
14use super::disambiguation::Disambiguator;
15use super::sorting::Sorter;
16use crate::error::ProcessorError;
17use crate::reference::{Bibliography, CitationItem, Reference};
18use crate::values::ProcHints;
19use citum_schema::Style;
20use citum_schema::locale::Locale;
21use citum_schema::options::{Config, bibliography::BibliographyConfig};
22use indexmap::IndexMap;
23use std::cell::RefCell;
24use std::collections::{HashMap, HashSet};
25
26impl Default for Processor {
27    fn default() -> Self {
28        let compound_sets = IndexMap::new();
29        let (compound_set_by_ref, compound_member_index) =
30            Self::build_compound_set_indexes(&compound_sets);
31        Self {
32            style: Style::default(),
33            bibliography: Bibliography::default(),
34            locale: Locale::en_us(),
35            default_config: Config::default(),
36            hints: HashMap::new(),
37            citation_numbers: RefCell::new(HashMap::new()),
38            cited_ids: RefCell::new(HashSet::new()),
39            compound_sets,
40            compound_set_by_ref,
41            compound_member_index,
42            compound_groups: RefCell::new(IndexMap::new()),
43            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
44            dynamic_compound_member_index: RefCell::new(HashMap::new()),
45            dynamic_compound_sets: RefCell::new(IndexMap::new()),
46            show_semantics: true,
47            inject_ast_indices: false,
48            abbreviation_map: None,
49        }
50    }
51}
52
53impl Processor {
54    fn build_processor(
55        style: Style,
56        bibliography: Bibliography,
57        locale: Locale,
58        compound_sets: IndexMap<String, Vec<String>>,
59    ) -> Self {
60        let style = style.into_resolved();
61        Self::build_processor_pre_resolved(style, bibliography, locale, compound_sets)
62    }
63
64    /// Build a processor from an already-resolved style, skipping preset resolution.
65    ///
66    /// Use this when the style was cloned from a processor that has already
67    /// called `into_resolved()`, to avoid a second resolution pass that would
68    /// re-apply preset defaults and overwrite null-cleared fields.
69    pub(super) fn build_processor_pre_resolved(
70        style: Style,
71        bibliography: Bibliography,
72        locale: Locale,
73        compound_sets: IndexMap<String, Vec<String>>,
74    ) -> Self {
75        let (compound_set_by_ref, compound_member_index) =
76            Self::build_compound_set_indexes(&compound_sets);
77        let mut processor = Processor {
78            style,
79            bibliography,
80            locale,
81            default_config: Config::default(),
82            hints: HashMap::new(),
83            citation_numbers: RefCell::new(HashMap::new()),
84            cited_ids: RefCell::new(HashSet::new()),
85            compound_sets,
86            compound_set_by_ref,
87            compound_member_index,
88            compound_groups: RefCell::new(IndexMap::new()),
89            dynamic_compound_set_by_ref: RefCell::new(HashMap::new()),
90            dynamic_compound_member_index: RefCell::new(HashMap::new()),
91            dynamic_compound_sets: RefCell::new(IndexMap::new()),
92            show_semantics: true,
93            inject_ast_indices: false,
94            abbreviation_map: None,
95        };
96
97        // Pre-calculate hints for disambiguation.
98        processor.hints = processor.calculate_hints();
99        processor
100    }
101
102    fn try_validate_compound_sets(
103        bibliography: &Bibliography,
104        compound_sets: IndexMap<String, Vec<String>>,
105    ) -> Result<IndexMap<String, Vec<String>>, ProcessorError> {
106        super::validate_compound_sets(Some(compound_sets), bibliography)
107            .map(Option::unwrap_or_default)
108    }
109
110    fn validate_compound_sets_or_default(
111        bibliography: &Bibliography,
112        compound_sets: IndexMap<String, Vec<String>>,
113    ) -> IndexMap<String, Vec<String>> {
114        Self::try_validate_compound_sets(bibliography, compound_sets).unwrap_or_default()
115    }
116
117    fn build_compound_set_indexes(
118        sets: &IndexMap<String, Vec<String>>,
119    ) -> (HashMap<String, String>, HashMap<String, usize>) {
120        let mut by_ref = HashMap::new();
121        let mut member_index = HashMap::new();
122        for (set_id, members) in sets {
123            for (idx, member) in members.iter().enumerate() {
124                by_ref.insert(member.clone(), set_id.clone());
125                member_index.insert(member.clone(), idx);
126            }
127        }
128        (by_ref, member_index)
129    }
130
131    /// Check whether the style uses note-based citations (footnotes/endnotes).
132    pub(crate) fn is_note_style(&self) -> bool {
133        self.get_config()
134            .processing
135            .as_ref()
136            .is_some_and(|processing| matches!(processing, citum_schema::options::Processing::Note))
137    }
138
139    /// Check whether the style uses numeric citation rendering.
140    fn is_numeric_style(&self) -> bool {
141        self.get_config()
142            .processing
143            .as_ref()
144            .is_some_and(|processing| {
145                matches!(processing, citum_schema::options::Processing::Numeric)
146            })
147    }
148
149    fn is_numeric_bibliography_style(&self) -> bool {
150        self.get_bibliography_config()
151            .processing
152            .as_ref()
153            .is_some_and(|processing| {
154                matches!(processing, citum_schema::options::Processing::Numeric)
155            })
156    }
157
158    fn resolved_bibliography_sort(&self) -> Option<citum_schema::grouping::GroupSort> {
159        if let Some(sort_spec) = self
160            .style
161            .bibliography
162            .as_ref()
163            .and_then(|bibliography| bibliography.sort.as_ref())
164        {
165            return Some(sort_spec.resolve());
166        }
167
168        self.get_bibliography_config()
169            .processing
170            .as_ref()
171            .and_then(citum_schema::options::Processing::default_bibliography_sort)
172            .map(|preset| preset.group_sort())
173    }
174
175    /// Initialize numeric citation numbers from bibliography insertion order.
176    ///
177    /// citeproc-js registers all bibliography items before citation rendering in
178    /// the oracle workflow, so numeric labels are stable by reference registry
179    /// order rather than first-citation order.
180    ///
181    /// When the style declares an explicit bibliography sort, or the
182    /// processing family provides a bibliography default, citation numbers
183    /// must follow that resolved bibliography order.
184    pub(crate) fn initialize_numeric_citation_numbers(&self) {
185        if !self.is_numeric_style() {
186            return;
187        }
188
189        self.initialize_numeric_numbers(self.sort_citation_number_order());
190    }
191
192    /// Initialize numeric bibliography numbers from resolved bibliography order.
193    pub(crate) fn initialize_numeric_bibliography_numbers(&self) {
194        if !self.is_numeric_bibliography_style() {
195            return;
196        }
197
198        self.initialize_numeric_numbers(self.sort_bibliography_number_order());
199    }
200
201    fn initialize_numeric_numbers(&self, ordered_ids: Vec<String>) {
202        if !self.citation_numbers.borrow().is_empty() {
203            return;
204        }
205
206        self.initialize_numeric_citation_numbers_from_ordered_ids(ordered_ids);
207    }
208
209    fn sort_citation_number_order(&self) -> Vec<String> {
210        self.sort_references(self.bibliography.values().collect())
211            .into_iter()
212            .filter_map(citum_schema::reference::InputReference::id)
213            .map(String::from)
214            .collect()
215    }
216
217    fn sort_bibliography_number_order(&self) -> Vec<String> {
218        self.sort_references(self.bibliography.values().collect())
219            .into_iter()
220            .filter_map(citum_schema::reference::InputReference::id)
221            .map(String::from)
222            .collect()
223    }
224
225    /// Assign numeric citation numbers from a pre-resolved reference order.
226    fn initialize_numeric_citation_numbers_from_ordered_ids(&self, ordered_ids: Vec<String>) {
227        let mut numbers = self.citation_numbers.borrow_mut();
228        if !numbers.is_empty() {
229            return;
230        }
231
232        let compound_config = self.get_bibliography_options().compound_numeric.clone();
233
234        if compound_config.is_some() {
235            let mut set_first_seen: IndexMap<String, usize> = IndexMap::new();
236            let mut current_number = 1usize;
237            let mut compound_groups = self.compound_groups.borrow_mut();
238            compound_groups.clear();
239
240            for ref_id in &ordered_ids {
241                if let Some(set_id) = self.compound_set_by_ref.get(ref_id) {
242                    if let Some(&number) = set_first_seen.get(set_id) {
243                        numbers.insert(ref_id.clone(), number);
244                    } else {
245                        set_first_seen.insert(set_id.clone(), current_number);
246                        if let Some(members) = self.compound_sets.get(set_id) {
247                            let present_members: Vec<String> = members
248                                .iter()
249                                .filter(|id| self.bibliography.contains_key(*id))
250                                .cloned()
251                                .collect();
252                            for member in &present_members {
253                                numbers.insert(member.clone(), current_number);
254                            }
255                            if present_members.len() > 1 {
256                                compound_groups.insert(current_number, present_members);
257                            }
258                        } else {
259                            numbers.insert(ref_id.clone(), current_number);
260                        }
261                        current_number += 1;
262                    }
263                } else if !numbers.contains_key(ref_id) {
264                    numbers.insert(ref_id.clone(), current_number);
265                    current_number += 1;
266                }
267            }
268        } else {
269            for (index, ref_id) in ordered_ids.into_iter().enumerate() {
270                numbers.insert(ref_id, index + 1);
271            }
272        }
273    }
274
275    /// Create a new processor with default English locale (en-US).
276    #[must_use]
277    pub fn new(style: Style, bibliography: Bibliography) -> Self {
278        Self::with_compound_sets(style, bibliography, IndexMap::new())
279    }
280
281    /// Create a new processor with explicit compound sets, returning an error for invalid sets.
282    ///
283    /// # Errors
284    ///
285    /// Returns an error when any compound set references unknown bibliography
286    /// entries or reuses the same member more than once.
287    pub fn try_with_compound_sets(
288        style: Style,
289        bibliography: Bibliography,
290        compound_sets: IndexMap<String, Vec<String>>,
291    ) -> Result<Self, ProcessorError> {
292        Self::try_with_locale_and_compound_sets(style, bibliography, Locale::en_us(), compound_sets)
293    }
294
295    /// Create a new processor with explicit compound sets.
296    ///
297    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
298    /// and falls back to a processor without compound sets.
299    #[must_use]
300    pub fn with_compound_sets(
301        style: Style,
302        bibliography: Bibliography,
303        compound_sets: IndexMap<String, Vec<String>>,
304    ) -> Self {
305        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
306        Self::build_processor(style, bibliography, Locale::en_us(), validated_sets)
307    }
308
309    /// Create a new processor with a specified locale.
310    ///
311    /// The locale determines term translations and locale-specific formatting behavior.
312    #[must_use]
313    pub fn with_locale(style: Style, bibliography: Bibliography, locale: Locale) -> Self {
314        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
315    }
316
317    /// Create a new processor with explicit locale and compound sets, returning
318    /// an error for invalid sets.
319    ///
320    /// # Errors
321    ///
322    /// Returns an error when any compound set references unknown bibliography
323    /// entries or reuses the same member more than once.
324    pub fn try_with_locale_and_compound_sets(
325        style: Style,
326        bibliography: Bibliography,
327        locale: Locale,
328        compound_sets: IndexMap<String, Vec<String>>,
329    ) -> Result<Self, ProcessorError> {
330        let validated_sets = Self::try_validate_compound_sets(&bibliography, compound_sets)?;
331        Ok(Self::build_processor(
332            style,
333            bibliography,
334            locale,
335            validated_sets,
336        ))
337    }
338
339    /// Create a new processor with a specified locale and explicit compound sets.
340    ///
341    /// The locale determines term translations and locale-specific formatting behavior.
342    ///
343    /// If `compound_sets` is invalid, this constructor ignores the supplied sets
344    /// and falls back to a processor without compound sets.
345    #[must_use]
346    pub fn with_locale_and_compound_sets(
347        style: Style,
348        bibliography: Bibliography,
349        locale: Locale,
350        compound_sets: IndexMap<String, Vec<String>>,
351    ) -> Self {
352        let validated_sets = Self::validate_compound_sets_or_default(&bibliography, compound_sets);
353        Self::build_processor(style, bibliography, locale, validated_sets)
354    }
355
356    /// Create a new processor, loading the locale from disk.
357    ///
358    /// Loads the locale specified in the style's `default_locale` field from the given directory,
359    /// falling back to en-US if not found or not specified.
360    #[must_use]
361    pub fn with_style_locale(
362        style: Style,
363        bibliography: Bibliography,
364        locales_dir: &std::path::Path,
365    ) -> Self {
366        let style = style.into_resolved();
367        let locale = if let Some(ref locale_id) = style.info.default_locale {
368            Locale::load(locale_id, locales_dir)
369        } else {
370            Locale::en_us()
371        };
372        Self::with_locale_and_compound_sets(style, bibliography, locale, IndexMap::new())
373    }
374
375    /// Return a copy of the processor that injects source template indices into semantic HTML.
376    #[must_use]
377    pub fn with_inject_ast_indices(mut self, inject_ast_indices: bool) -> Self {
378        self.inject_ast_indices = inject_ast_indices;
379        self
380    }
381
382    /// Enable or disable source template index injection for semantic HTML output.
383    pub fn set_inject_ast_indices(&mut self, inject_ast_indices: bool) {
384        self.inject_ast_indices = inject_ast_indices;
385    }
386
387    /// Return the global style configuration.
388    pub fn get_config(&self) -> &Config {
389        self.style.options.as_ref().unwrap_or(&self.default_config)
390    }
391
392    /// Return merged config for citation rendering.
393    ///
394    /// Combines global style options with citation-specific overrides.
395    pub fn get_citation_config(&self) -> std::borrow::Cow<'_, Config> {
396        let base = self.get_config();
397        match self
398            .style
399            .citation
400            .as_ref()
401            .and_then(|citation| citation.options.as_ref())
402        {
403            Some(citation_options) => std::borrow::Cow::Owned(citation_options.merged_with(base)),
404            None => std::borrow::Cow::Borrowed(base),
405        }
406    }
407
408    /// Return merged shared config for bibliography rendering.
409    ///
410    /// Combines global shared style options with bibliography-local shared overrides.
411    pub fn get_bibliography_config(&self) -> std::borrow::Cow<'_, Config> {
412        let base = self.get_config();
413        match self
414            .style
415            .bibliography
416            .as_ref()
417            .and_then(|bibliography| bibliography.options.as_ref())
418        {
419            Some(bibliography_options) => {
420                std::borrow::Cow::Owned(bibliography_options.merged_with(base))
421            }
422            None => std::borrow::Cow::Borrowed(base),
423        }
424    }
425
426    /// Return effective bibliography-only configuration.
427    pub fn get_bibliography_options(&self) -> std::borrow::Cow<'_, BibliographyConfig> {
428        match self
429            .style
430            .bibliography
431            .as_ref()
432            .and_then(|bibliography| bibliography.options.as_ref())
433        {
434            Some(bibliography_options) => {
435                std::borrow::Cow::Owned(bibliography_options.to_bibliography_config())
436            }
437            None => std::borrow::Cow::Owned(BibliographyConfig::default()),
438        }
439    }
440
441    /// Sort references according to the style's bibliography sort specification.
442    ///
443    /// Uses style-specified sort keys (author, title, issued, etc.) and sort order.
444    pub fn sort_references<'a>(&self, references: Vec<&'a Reference>) -> Vec<&'a Reference> {
445        let mut sorted_refs = if let Some(sort_spec) = self.resolved_bibliography_sort() {
446            let sorter = crate::grouping::GroupSorter::new(&self.locale);
447            sorter.sort_references(references, &sort_spec)
448        } else {
449            let bibliography_config = self.get_bibliography_config();
450            let sorter = Sorter::new(&bibliography_config, &self.locale);
451            sorter.sort_references(references)
452        };
453
454        let bibliography_options = self.get_bibliography_options();
455        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
456            && crate::sort_partitioning::should_sort_flat(partitioning)
457        {
458            crate::sort_partitioning::sort_by_partition(
459                sorted_refs.as_mut_slice(),
460                &self.locale,
461                partitioning,
462            );
463        }
464
465        sorted_refs
466    }
467
468    /// Sort citation items according to the style's citation sort specification.
469    pub fn sort_citation_items(
470        &self,
471        items: Vec<CitationItem>,
472        spec: &citum_schema::CitationSpec,
473    ) -> Vec<CitationItem> {
474        if let Some(sort_spec) = &spec.sort {
475            let mut items_with_refs: Vec<(CitationItem, &Reference)> = items
476                .into_iter()
477                .filter_map(|item| {
478                    self.bibliography
479                        .get(&item.id)
480                        .map(|reference| (item, reference))
481                })
482                .collect();
483
484            let resolved_sort = sort_spec.resolve();
485            let sorter = crate::grouping::GroupSorter::new(&self.locale);
486            items_with_refs.sort_by(|left, right| {
487                for sort_key in &resolved_sort.template {
488                    let cmp = sorter.compare_by_key(left.1, right.1, sort_key);
489                    if cmp != std::cmp::Ordering::Equal {
490                        return cmp;
491                    }
492                }
493                std::cmp::Ordering::Equal
494            });
495
496            return items_with_refs
497                .into_iter()
498                .map(|(item, _reference)| item)
499                .collect();
500        }
501
502        items
503    }
504
505    /// Calculate disambiguation hints needed for the style.
506    ///
507    /// Analyzes the bibliography to determine which items need disambiguation
508    /// (year suffixes, etc.) and calculates hints for efficient rendering.
509    pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
510        let citation_config = self.get_citation_config();
511        let config = citation_config.as_ref();
512        let bibliography_sort = self.resolved_bibliography_sort();
513
514        let disambiguator = if let Some(resolved_sort) = &bibliography_sort {
515            Disambiguator::with_group_sort(&self.bibliography, config, &self.locale, resolved_sort)
516        } else {
517            Disambiguator::new(&self.bibliography, config, &self.locale)
518        };
519
520        disambiguator.calculate_hints()
521    }
522}