Skip to main content

citum_engine/processor/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation rendering orchestration.
7//!
8//! This module resolves the effective citation spec for each citation, prepares
9//! renderer delimiters and affixes. Template-level rendering, including
10//! sentence-initial note-start handling, lives in `rendering`.
11
12use super::Processor;
13use super::disambiguation::Disambiguator;
14use super::rendering::{CompoundRenderData, GroupRenderParams, Renderer, RendererResources};
15use crate::error::ProcessorError;
16use crate::reference::Citation;
17use crate::values::ProcHints;
18use citum_schema::NoteStartTextCase;
19use citum_schema::locale::{GeneralTerm, Locale, TermForm};
20use citum_schema::options::{Config, GivennameRule};
21use citum_schema::template::DelimiterPunctuation;
22use indexmap::IndexMap;
23use std::collections::HashMap;
24
25/// Join rendered integral (narrative) groups with localized conjunctions.
26///
27/// Uses the locale's "and" term to join groups according to document grammar
28/// rules (e.g., "A and B" or "A, B, and C" with optional serial comma).
29fn join_integral_groups(rendered_groups: Vec<String>, locale: &Locale) -> String {
30    match rendered_groups.len() {
31        0 => String::new(),
32        1 => rendered_groups.into_iter().next().unwrap_or_default(),
33        2 => {
34            let conjunction = locale
35                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
36                .unwrap_or_else(|| locale.and_term(false).to_string());
37            rendered_groups.join(&format!(" {} ", conjunction.trim()))
38        }
39        _ => {
40            let conjunction = locale
41                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
42                .unwrap_or_else(|| locale.and_term(false).to_string());
43            let final_delimiter = if locale.grammar_options.serial_comma {
44                format!(", {} ", conjunction.trim())
45            } else {
46                format!(" {} ", conjunction.trim())
47            };
48
49            let mut rendered_groups = rendered_groups;
50            let last = rendered_groups.pop().unwrap_or_default();
51            format!("{}{}{}", rendered_groups.join(", "), final_delimiter, last)
52        }
53    }
54}
55
56impl Processor {
57    /// Determine the text-case policy for a citation at the start of a note.
58    ///
59    /// Only applies for note-based styles when a repeated-citation position (Ibid)
60    /// is at the start of the note and has no user-supplied or spec-defined prefix.
61    fn sentence_initial_note_start_text_case(
62        &self,
63        citation: &Citation,
64        effective_spec: &citum_schema::CitationSpec,
65    ) -> Option<NoteStartTextCase> {
66        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
67        if self.is_note_style()
68            && matches!(
69                citation.position,
70                Some(
71                    citum_schema::citation::Position::Ibid
72                        | citum_schema::citation::Position::IbidWithLocator
73                )
74            )
75            && matches!(
76                citation.mode,
77                citum_schema::citation::CitationMode::NonIntegral
78            )
79            && citation.prefix.as_deref().unwrap_or("").is_empty()
80            && spec_prefix.is_empty()
81        {
82            effective_spec.note_start_text_case
83        } else {
84            None
85        }
86    }
87
88    /// Resolve the citation specification based on the citation's document position.
89    ///
90    /// Delegates to the style's citation spec to handle ibid, subsequent, or first
91    /// position overrides.
92    fn resolve_positioned_citation_spec(
93        &self,
94        citation: &Citation,
95    ) -> std::borrow::Cow<'_, citum_schema::CitationSpec> {
96        self.style.citation.as_ref().map_or_else(
97            || std::borrow::Cow::Owned(citum_schema::CitationSpec::default()),
98            |spec| spec.resolve_for_position(citation.position.as_ref()),
99        )
100    }
101
102    /// Register cited reference IDs and ensure numeric labels are initialized.
103    ///
104    /// This maintains the set of all references cited in the document and ensures
105    /// that numeric styles have a stable numbering map.
106    fn track_cited_ids_and_init_numbers(&self, citation: &Citation) {
107        self.initialize_numeric_citation_numbers();
108        let mut cited_ids = self.cited_ids.borrow_mut();
109        for item in &citation.items {
110            cited_ids.insert(item.id.clone());
111        }
112    }
113
114    /// Resolve the final effective citation spec for a given mode and position.
115    fn resolve_effective_citation_spec(&self, citation: &Citation) -> citum_schema::CitationSpec {
116        self.resolve_positioned_citation_spec(citation)
117            .into_owned()
118            .resolve_for_mode(&citation.mode)
119            .into_owned()
120    }
121
122    /// Resolve intra-item and inter-citation delimiters for a citation spec.
123    fn resolve_citation_delimiters<'a>(
124        &self,
125        effective_spec: &'a citum_schema::CitationSpec,
126    ) -> (&'a str, &'a str) {
127        let intra_delimiter = effective_spec.delimiter.as_deref().unwrap_or(", ");
128        let inter_delimiter = effective_spec
129            .multi_cite_delimiter
130            .as_deref()
131            .unwrap_or("; ");
132
133        (
134            if matches!(
135                DelimiterPunctuation::from_csl_string(intra_delimiter),
136                DelimiterPunctuation::None
137            ) {
138                ""
139            } else {
140                intra_delimiter
141            },
142            if matches!(
143                DelimiterPunctuation::from_csl_string(inter_delimiter),
144                DelimiterPunctuation::None
145            ) {
146                ""
147            } else {
148                inter_delimiter
149            },
150        )
151    }
152
153    /// Register a dynamic compound group for a `grouped` citation.
154    ///
155    /// The first item in `citation.items` is the head; subsequent items are tails.
156    /// Skips silently when:
157    /// - The style has no `compound-numeric` bibliography configuration (non-numeric style).
158    /// - A static compound set already covers the head or any tail (static sets take precedence).
159    /// - The head or any tail was previously cited in any context (first occurrence wins).
160    ///
161    /// This method must be called before `track_cited_ids_and_init_numbers` so that
162    /// `cited_ids` reflects only references from prior citations, not the current one.
163    fn resolve_dynamic_group(&self, citation: &Citation) {
164        if self.get_bibliography_options().compound_numeric.is_none() {
165            return;
166        }
167
168        if citation.items.len() < 2 {
169            return;
170        }
171
172        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
173        let head_id = &citation.items[0].id;
174        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
175        let tail_ids: Vec<String> = citation.items[1..].iter().map(|i| i.id.clone()).collect();
176
177        // Static sets take precedence — skip if head or any tail is in a static set.
178        if self.compound_set_by_ref.contains_key(head_id) {
179            return;
180        }
181        for tail in &tail_ids {
182            if self.compound_set_by_ref.contains_key(tail.as_str()) {
183                return;
184            }
185        }
186
187        // First-occurrence wins: reject if the head or any tail was already cited in any
188        // context — whether via a prior dynamic group or a previous ungrouped citation.
189        // Because this method is called before cited_ids is updated for the current
190        // citation, `cited_ids` contains only references from earlier citations.
191        {
192            let dyn_set = self.dynamic_compound_set_by_ref.borrow();
193            let cited = self.cited_ids.borrow();
194
195            if dyn_set.contains_key(head_id.as_str()) || cited.contains(head_id.as_str()) {
196                return;
197            }
198            for tail in &tail_ids {
199                if dyn_set.contains_key(tail.as_str()) || cited.contains(tail.as_str()) {
200                    return;
201                }
202            }
203        }
204
205        let head_number = {
206            let numbers = self.citation_numbers.borrow();
207            let Some(&n) = numbers.get(head_id.as_str()) else {
208                return;
209            };
210            n
211        };
212
213        // Assign all tails the same citation number as the head.
214        {
215            let mut numbers = self.citation_numbers.borrow_mut();
216            for tail in &tail_ids {
217                numbers.insert(tail.clone(), head_number);
218            }
219        }
220
221        // Build the ordered member list for this group.
222        let all_members: Vec<String> = std::iter::once(head_id.clone())
223            .chain(tail_ids.iter().cloned())
224            .collect();
225
226        // Populate dynamic index maps so the renderer can assign sub-labels.
227        {
228            let mut dyn_set = self.dynamic_compound_set_by_ref.borrow_mut();
229            let mut dyn_idx = self.dynamic_compound_member_index.borrow_mut();
230            for (idx, member) in all_members.iter().enumerate() {
231                dyn_set.insert(member.clone(), head_id.clone());
232                dyn_idx.insert(member.clone(), idx);
233            }
234        }
235
236        // Inject into compound_groups for bibliography rendering.
237        {
238            let mut groups = self.compound_groups.borrow_mut();
239            let members = groups
240                .entry(head_number)
241                .or_insert_with(|| vec![head_id.clone()]);
242            for tail in &tail_ids {
243                if !members.contains(tail) {
244                    members.push(tail.clone());
245                }
246            }
247        }
248
249        // Register dynamic set so citation_sub_label_for_ref can find members.
250        self.dynamic_compound_sets
251            .borrow_mut()
252            .insert(head_id.clone(), all_members);
253    }
254
255    /// Build a citation-local hint overlay for CSL `givenname-disambiguation-rule: by-cite`.
256    ///
257    /// Global hints remain authoritative for bibliography rendering, year-suffix ordering,
258    /// numeric state, and note-position state. This overlay only recalculates name expansion
259    /// fields for the references rendered by the current citation.
260    fn citation_scoped_by_cite_hints(
261        &self,
262        items: &[crate::reference::CitationItem],
263        config: &Config,
264    ) -> Option<HashMap<String, ProcHints>> {
265        if !Self::uses_by_cite_givenname(config) {
266            return None;
267        }
268
269        let mut scoped_hints = HashMap::new();
270        let mut scoped_bibliography = IndexMap::new();
271
272        for item in items {
273            let mut hint = self.hints.get(&item.id).cloned().unwrap_or_default();
274            hint.expand_given_names = false;
275            hint.expand_given_names_primary_only = false;
276            hint.min_names_to_show = None;
277            scoped_hints.insert(item.id.clone(), hint);
278
279            if let Some(reference) = self.bibliography.get(&item.id) {
280                scoped_bibliography.insert(item.id.clone(), reference.clone());
281            }
282        }
283
284        if scoped_bibliography.len() < 2 {
285            return Some(scoped_hints);
286        }
287
288        let local_hints =
289            Disambiguator::new(&scoped_bibliography, config, &self.locale).calculate_hints();
290
291        for item in items {
292            let Some(local) = local_hints.get(&item.id) else {
293                continue;
294            };
295            let target = scoped_hints.entry(item.id.clone()).or_default();
296            target.expand_given_names = local.expand_given_names;
297            target.expand_given_names_primary_only = local.expand_given_names_primary_only;
298            target.min_names_to_show = local.min_names_to_show;
299        }
300
301        Some(scoped_hints)
302    }
303
304    /// Return true when the active citation config requests CSL by-cite given-name expansion.
305    fn uses_by_cite_givenname(config: &Config) -> bool {
306        let disambiguate = match config.processing.as_ref() {
307            Some(processing) => processing.config().disambiguate,
308            None => {
309                citum_schema::options::Processing::AuthorDate
310                    .config()
311                    .disambiguate
312            }
313        };
314
315        disambiguate
316            .as_ref()
317            .is_some_and(|d| d.add_givenname && matches!(d.givenname_rule, GivennameRule::ByCite))
318    }
319
320    /// Build the merged static + dynamic compound lookup maps for the renderer.
321    ///
322    /// When no dynamic groups exist (the common case) the static maps are returned
323    /// via references with no allocation. Owned merged maps are only constructed when
324    /// at least one dynamic group is registered.
325    fn merged_compound_data(
326        &self,
327    ) -> (
328        Option<HashMap<String, String>>,
329        Option<HashMap<String, usize>>,
330        Option<IndexMap<String, Vec<String>>>,
331    ) {
332        if self.dynamic_compound_set_by_ref.borrow().is_empty() {
333            return (None, None, None);
334        }
335        let merged_set: HashMap<String, String> = self
336            .compound_set_by_ref
337            .iter()
338            .chain(self.dynamic_compound_set_by_ref.borrow().iter())
339            .map(|(k, v)| (k.clone(), v.clone()))
340            .collect();
341        let merged_idx: HashMap<String, usize> = self
342            .compound_member_index
343            .iter()
344            .chain(self.dynamic_compound_member_index.borrow().iter())
345            .map(|(k, v)| (k.clone(), *v))
346            .collect();
347        let merged_sets: IndexMap<String, Vec<String>> = self
348            .compound_sets
349            .iter()
350            .chain(self.dynamic_compound_sets.borrow().iter())
351            .map(|(k, v)| (k.clone(), v.clone()))
352            .collect();
353        (Some(merged_set), Some(merged_idx), Some(merged_sets))
354    }
355
356    /// Render the core content of a citation, handling sorting and grouping.
357    ///
358    /// This is the main orchestration point for template rendering, compound data
359    /// resolution, and mode-specific (integral vs non-integral) formatting.
360    fn render_citation_content<F>(
361        &self,
362        citation: &Citation,
363        effective_spec: &citum_schema::CitationSpec,
364        renderer_delimiter: &str,
365        renderer_inter_delimiter: &str,
366        note_start_text_case: Option<NoteStartTextCase>,
367    ) -> Result<String, ProcessorError>
368    where
369        F: crate::render::format::OutputFormat<Output = String>,
370    {
371        // Grouped citations preserve item order (dynamic grouping was already resolved
372        // in process_citation_with_format before cited_ids was updated).
373        let sorted_items = if citation.grouped {
374            citation.items.clone()
375        } else {
376            self.sort_citation_items(citation.items.clone(), effective_spec)
377        };
378
379        // Build merged compound lookup maps (static + dynamic).
380        // Return owned maps only when dynamic groups exist; otherwise use static maps directly.
381        let (dyn_set_owned, dyn_idx_owned, dyn_sets_owned) = self.merged_compound_data();
382        let effective_set_by_ref = dyn_set_owned.as_ref().unwrap_or(&self.compound_set_by_ref);
383        let effective_member_index = dyn_idx_owned
384            .as_ref()
385            .unwrap_or(&self.compound_member_index);
386        let effective_compound_sets = dyn_sets_owned.as_ref().unwrap_or(&self.compound_sets);
387
388        let citation_config = self.get_citation_config();
389        let citation_config = match effective_spec.options.as_ref() {
390            Some(mode_options) => {
391                let mut config = citation_config.into_owned();
392                config.merge(&mode_options.to_config());
393                std::borrow::Cow::Owned(config)
394            }
395            None => citation_config,
396        };
397        let scoped_hints = self.citation_scoped_by_cite_hints(&sorted_items, &citation_config);
398        let renderer_hints = scoped_hints.as_ref().unwrap_or(&self.hints);
399        let renderer = Renderer::new(
400            RendererResources {
401                style: &self.style,
402                bibliography: &self.bibliography,
403                locale: &self.locale,
404                config: &citation_config,
405                bibliography_config: Some(self.get_bibliography_options().into_owned()),
406                first_note_by_id: Some(&self.first_note_by_id),
407            },
408            renderer_hints,
409            &self.citation_numbers,
410            CompoundRenderData {
411                set_by_ref: effective_set_by_ref,
412                member_index: effective_member_index,
413                sets: effective_compound_sets,
414            },
415            self.show_semantics,
416            self.inject_ast_indices,
417            self.abbreviation_map.as_ref(),
418        );
419        let processing = citation_config.processing.clone().unwrap_or_default();
420        let has_explicit_integral_multi_cite_delimiter = matches!(
421            citation.mode,
422            citum_schema::citation::CitationMode::Integral
423        ) && self
424            .resolve_positioned_citation_spec(citation)
425            .integral
426            .as_ref()
427            .and_then(|spec| spec.multi_cite_delimiter.as_ref())
428            .is_some();
429        let rendered_groups = if matches!(
430            processing,
431            citum_schema::options::Processing::Numeric
432                | citum_schema::options::Processing::Label(_)
433        ) {
434            renderer.render_ungrouped_citation_with_format::<F>(
435                &sorted_items,
436                effective_spec,
437                &citation.mode,
438                renderer_delimiter,
439                citation.suppress_author,
440                citation.position.as_ref(),
441                note_start_text_case,
442            )?
443        } else {
444            renderer.render_grouped_citation_with_format::<F>(
445                &sorted_items,
446                &GroupRenderParams {
447                    spec: effective_spec,
448                    mode: &citation.mode,
449                    intra_delimiter: renderer_delimiter,
450                    suppress_author: citation.suppress_author,
451                    position: citation.position.as_ref(),
452                    note_start_text_case,
453                },
454            )?
455        };
456
457        Ok(
458            if matches!(
459                citation.mode,
460                citum_schema::citation::CitationMode::Integral
461            ) && !has_explicit_integral_multi_cite_delimiter
462            {
463                join_integral_groups(rendered_groups, &self.locale)
464            } else {
465                F::default().join(rendered_groups, renderer_inter_delimiter)
466            },
467        )
468    }
469
470    /// Apply user-supplied prefix and suffix from the citation input.
471    ///
472    /// Automatically adds a trailing space to the prefix and a leading space to
473    /// the suffix if they are not already present and not empty.
474    fn apply_citation_input_affixes<F>(
475        &self,
476        citation: &Citation,
477        content: String,
478        fmt: &F,
479    ) -> String
480    where
481        F: crate::render::format::OutputFormat<Output = String>,
482    {
483        let citation_prefix = citation.prefix.as_deref().unwrap_or("");
484        let citation_suffix = citation.suffix.as_deref().unwrap_or("");
485
486        if citation_prefix.is_empty() && citation_suffix.is_empty() {
487            return content;
488        }
489
490        let formatted_prefix =
491            if !citation_prefix.is_empty() && !citation_prefix.ends_with(char::is_whitespace) {
492                format!("{citation_prefix} ")
493            } else {
494                citation_prefix.to_string()
495            };
496
497        let formatted_suffix =
498            if !citation_suffix.is_empty() && !citation_suffix.starts_with(char::is_whitespace) {
499                format!(" {citation_suffix}")
500            } else {
501                citation_suffix.to_string()
502            };
503
504        fmt.affix(&formatted_prefix, content, &formatted_suffix)
505    }
506
507    /// Apply style-defined wrapping and affixes to the rendered citation output.
508    ///
509    /// Handles `wrap` logic (inner prefixes/suffixes and punctuation) based on
510    /// the citation mode and position.
511    fn apply_spec_wrap_and_affixes<F>(
512        &self,
513        citation: &Citation,
514        effective_spec: &citum_schema::CitationSpec,
515        output: String,
516        fmt: &F,
517    ) -> String
518    where
519        F: crate::render::format::OutputFormat<Output = String>,
520    {
521        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
522        let spec_suffix = effective_spec.suffix.as_deref().unwrap_or("");
523
524        if matches!(
525            citation.mode,
526            citum_schema::citation::CitationMode::Integral
527        ) {
528            if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
529                fmt.affix(spec_prefix, output, spec_suffix)
530            } else {
531                output
532            }
533        } else if let Some(wrap) = effective_spec.wrap.as_ref() {
534            let inner_prefix = wrap.inner_prefix.as_deref().unwrap_or("");
535            let inner_suffix = wrap.inner_suffix.as_deref().unwrap_or("");
536            let inner_wrapped = if !inner_prefix.is_empty() || !inner_suffix.is_empty() {
537                fmt.inner_affix(inner_prefix, output, inner_suffix)
538            } else {
539                output
540            };
541            fmt.wrap_punctuation(&wrap.punctuation, inner_wrapped)
542        } else if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
543            fmt.affix(spec_prefix, output, spec_suffix)
544        } else {
545            output
546        }
547    }
548
549    /// Render a single citation to plain text.
550    ///
551    /// This is the primary entry point for citation processing. It handles:
552    /// 1. Looking up references in the bibliography.
553    /// 2. Annotating positions (ibid, subsequent, etc.).
554    /// 3. Resolving disambiguation (name expansion, year suffixes).
555    /// 4. Applying the style's citation template.
556    ///
557    /// Returns the formatted citation string or an error if processing fails.
558    ///
559    /// # Errors
560    ///
561    /// Returns an error when referenced items are missing or rendering fails.
562    pub fn process_citation(&self, citation: &Citation) -> Result<String, ProcessorError> {
563        self.process_citation_with_format::<crate::render::plain::PlainText>(citation)
564    }
565
566    /// Render a citation to a string using a specific output format.
567    ///
568    /// This resolves the effective citation spec for the citation's mode and
569    /// position, renders the citation body, and applies input and style affixes.
570    ///
571    /// # Errors
572    ///
573    /// Returns an error when referenced items are missing or rendering fails.
574    pub fn process_citation_with_format<F>(
575        &self,
576        citation: &Citation,
577    ) -> Result<String, ProcessorError>
578    where
579        F: crate::render::format::OutputFormat<Output = String>,
580    {
581        let fmt = F::default();
582
583        // For grouped citations, resolve the dynamic compound group BEFORE updating
584        // cited_ids with the current citation's items. This ensures the first-occurrence
585        // check in resolve_dynamic_group sees only references from prior citations.
586        if citation.grouped {
587            self.initialize_numeric_citation_numbers();
588            self.resolve_dynamic_group(citation);
589        }
590
591        self.track_cited_ids_and_init_numbers(citation);
592
593        let effective_spec = self.resolve_effective_citation_spec(citation);
594        let note_start_text_case =
595            self.sentence_initial_note_start_text_case(citation, &effective_spec);
596        let (renderer_delimiter, renderer_inter_delimiter) =
597            self.resolve_citation_delimiters(&effective_spec);
598        let content = self.render_citation_content::<F>(
599            citation,
600            &effective_spec,
601            renderer_delimiter,
602            renderer_inter_delimiter,
603            note_start_text_case,
604        )?;
605        let output = self.apply_citation_input_affixes(citation, content, &fmt);
606        let wrapped = self.apply_spec_wrap_and_affixes(citation, &effective_spec, output, &fmt);
607
608        Ok(fmt.finish(wrapped))
609    }
610
611    /// Render multiple citations in document order.
612    ///
613    /// For note-based styles, normalizes context and assigns citation positions.
614    ///
615    /// # Errors
616    ///
617    /// Returns an error when any citation in the sequence fails to render.
618    pub fn process_citations(&self, citations: &[Citation]) -> Result<Vec<String>, ProcessorError> {
619        self.process_citations_with_format::<crate::render::plain::PlainText>(citations)
620    }
621
622    /// Render multiple citations with a custom output format.
623    ///
624    /// # Errors
625    ///
626    /// Returns an error when any citation in the sequence fails to render.
627    pub fn process_citations_with_format<F>(
628        &self,
629        citations: &[Citation],
630    ) -> Result<Vec<String>, ProcessorError>
631    where
632        F: crate::render::format::OutputFormat<Output = String>,
633    {
634        let mut normalized = self.normalize_note_context(citations);
635        self.annotate_positions(&mut normalized);
636        normalized
637            .iter()
638            .map(|citation| self.process_citation_with_format::<F>(citation))
639            .collect()
640    }
641}