Skip to main content

citum_engine/processor/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation rendering orchestration.
7//!
8//! This module resolves the effective citation spec for each citation, prepares
9//! renderer delimiters and affixes. Template-level rendering, including
10//! sentence-initial note-start handling, lives in `rendering`.
11
12use super::Processor;
13use super::disambiguation::Disambiguator;
14use super::rendering::{CompoundRenderData, GroupRenderParams, Renderer, RendererResources};
15use crate::error::ProcessorError;
16use crate::reference::Citation;
17use crate::values::ProcHints;
18use citum_schema::NoteStartTextCase;
19use citum_schema::locale::{GeneralTerm, Locale, TermForm};
20use citum_schema::options::{Config, GivennameRule};
21use citum_schema::template::DelimiterPunctuation;
22use indexmap::IndexMap;
23use std::collections::HashMap;
24
25/// Join rendered integral (narrative) groups with localized conjunctions.
26///
27/// Uses the locale's "and" term to join groups according to document grammar
28/// rules (e.g., "A and B" or "A, B, and C" with optional serial comma).
29fn join_integral_groups(rendered_groups: Vec<String>, locale: &Locale) -> String {
30    match rendered_groups.len() {
31        0 => String::new(),
32        1 => rendered_groups.into_iter().next().unwrap_or_default(),
33        2 => {
34            let conjunction = locale
35                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
36                .unwrap_or_else(|| locale.and_term(false).to_string());
37            rendered_groups.join(&format!(" {} ", conjunction.trim()))
38        }
39        _ => {
40            let conjunction = locale
41                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
42                .unwrap_or_else(|| locale.and_term(false).to_string());
43            let final_delimiter = if locale.grammar_options.serial_comma {
44                format!(", {} ", conjunction.trim())
45            } else {
46                format!(" {} ", conjunction.trim())
47            };
48
49            let mut rendered_groups = rendered_groups;
50            let last = rendered_groups.pop().unwrap_or_default();
51            format!("{}{}{}", rendered_groups.join(", "), final_delimiter, last)
52        }
53    }
54}
55
56impl Processor {
57    /// Determine the text-case policy for a citation at the start of a note.
58    ///
59    /// Only applies for note-based styles when a repeated-citation position (Ibid)
60    /// is at the start of the note and has no user-supplied or spec-defined prefix.
61    fn sentence_initial_note_start_text_case(
62        &self,
63        citation: &Citation,
64        effective_spec: &citum_schema::CitationSpec,
65    ) -> Option<NoteStartTextCase> {
66        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
67        if self.is_note_style()
68            && matches!(
69                citation.position,
70                Some(
71                    citum_schema::citation::Position::Ibid
72                        | citum_schema::citation::Position::IbidWithLocator
73                )
74            )
75            && matches!(
76                citation.mode,
77                citum_schema::citation::CitationMode::NonIntegral
78            )
79            && citation.prefix.as_deref().unwrap_or("").is_empty()
80            && spec_prefix.is_empty()
81        {
82            effective_spec.note_start_text_case
83        } else {
84            None
85        }
86    }
87
88    /// Resolve the citation specification based on the citation's document position.
89    ///
90    /// Delegates to the style's citation spec to handle ibid, subsequent, or first
91    /// position overrides.
92    fn resolve_positioned_citation_spec(
93        &self,
94        citation: &Citation,
95    ) -> std::borrow::Cow<'_, citum_schema::CitationSpec> {
96        self.style.citation.as_ref().map_or_else(
97            || std::borrow::Cow::Owned(citum_schema::CitationSpec::default()),
98            |spec| spec.resolve_for_position(citation.position.as_ref()),
99        )
100    }
101
102    /// Register nocite reference IDs into the cited set.
103    ///
104    /// Nocite IDs are treated as cited for bibliography-selection purposes (they
105    /// appear in `bibliography.entries` alongside normally cited refs and are
106    /// matched by `CitedStatus::Visible` selectors), but no `formatted_citations`
107    /// entry is produced for them. This matches standard citeproc / Pandoc `nocite`
108    /// semantics.
109    ///
110    /// IDs that are absent from `self.bibliography` are silently ignored here;
111    /// callers are responsible for emitting `nocite_missing_ref` warnings first.
112    pub fn register_nocite_ids(&self, ids: impl IntoIterator<Item = String>) {
113        let mut cited_ids = self.cited_ids.borrow_mut();
114        for id in ids {
115            cited_ids.insert(id);
116        }
117    }
118
119    /// Register cited reference IDs and ensure numeric labels are initialized.
120    ///
121    /// This maintains the set of all references cited in the document and ensures
122    /// that numeric styles have a stable numbering map.
123    fn track_cited_ids_and_init_numbers(&self, citation: &Citation) {
124        self.initialize_numeric_citation_numbers();
125        let mut cited_ids = self.cited_ids.borrow_mut();
126        for item in &citation.items {
127            cited_ids.insert(item.id.clone());
128        }
129    }
130
131    /// Resolve the final effective citation spec for a given mode and position.
132    fn resolve_effective_citation_spec(&self, citation: &Citation) -> citum_schema::CitationSpec {
133        self.resolve_positioned_citation_spec(citation)
134            .into_owned()
135            .resolve_for_mode(&citation.mode)
136            .into_owned()
137    }
138
139    /// Resolve intra-item and inter-citation delimiters for a citation spec.
140    fn resolve_citation_delimiters<'a>(
141        &self,
142        effective_spec: &'a citum_schema::CitationSpec,
143    ) -> (&'a str, &'a str) {
144        let intra_delimiter = effective_spec.delimiter.as_deref().unwrap_or(", ");
145        let inter_delimiter = effective_spec
146            .multi_cite_delimiter
147            .as_deref()
148            .unwrap_or("; ");
149
150        (
151            if matches!(
152                DelimiterPunctuation::from_csl_string(intra_delimiter),
153                DelimiterPunctuation::None
154            ) {
155                ""
156            } else {
157                intra_delimiter
158            },
159            if matches!(
160                DelimiterPunctuation::from_csl_string(inter_delimiter),
161                DelimiterPunctuation::None
162            ) {
163                ""
164            } else {
165                inter_delimiter
166            },
167        )
168    }
169
170    /// Register a dynamic compound group for a `grouped` citation.
171    ///
172    /// The first item in `citation.items` is the head; subsequent items are tails.
173    /// Skips silently when:
174    /// - The style has no `compound-numeric` bibliography configuration (non-numeric style).
175    /// - A static compound set already covers the head or any tail (static sets take precedence).
176    /// - The head or any tail was previously cited in any context (first occurrence wins).
177    ///
178    /// This method must be called before `track_cited_ids_and_init_numbers` so that
179    /// `cited_ids` reflects only references from prior citations, not the current one.
180    fn resolve_dynamic_group(&self, citation: &Citation) {
181        if self.get_bibliography_options().compound_numeric.is_none() {
182            return;
183        }
184
185        if citation.items.len() < 2 {
186            return;
187        }
188
189        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
190        let head_id = &citation.items[0].id;
191        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
192        let tail_ids: Vec<String> = citation.items[1..].iter().map(|i| i.id.clone()).collect();
193
194        // Static sets take precedence — skip if head or any tail is in a static set.
195        if self.compound_set_by_ref.contains_key(head_id) {
196            return;
197        }
198        for tail in &tail_ids {
199            if self.compound_set_by_ref.contains_key(tail.as_str()) {
200                return;
201            }
202        }
203
204        // First-occurrence wins: reject if the head or any tail was already cited in any
205        // context — whether via a prior dynamic group or a previous ungrouped citation.
206        // Because this method is called before cited_ids is updated for the current
207        // citation, `cited_ids` contains only references from earlier citations.
208        {
209            let dyn_set = self.dynamic_compound_set_by_ref.borrow();
210            let cited = self.cited_ids.borrow();
211
212            if dyn_set.contains_key(head_id.as_str()) || cited.contains(head_id.as_str()) {
213                return;
214            }
215            for tail in &tail_ids {
216                if dyn_set.contains_key(tail.as_str()) || cited.contains(tail.as_str()) {
217                    return;
218                }
219            }
220        }
221
222        let head_number = {
223            let numbers = self.citation_numbers.borrow();
224            let Some(&n) = numbers.get(head_id.as_str()) else {
225                return;
226            };
227            n
228        };
229
230        // Assign all tails the same citation number as the head.
231        {
232            let mut numbers = self.citation_numbers.borrow_mut();
233            for tail in &tail_ids {
234                numbers.insert(tail.clone(), head_number);
235            }
236        }
237
238        // Build the ordered member list for this group.
239        let all_members: Vec<String> = std::iter::once(head_id.clone())
240            .chain(tail_ids.iter().cloned())
241            .collect();
242
243        // Populate dynamic index maps so the renderer can assign sub-labels.
244        {
245            let mut dyn_set = self.dynamic_compound_set_by_ref.borrow_mut();
246            let mut dyn_idx = self.dynamic_compound_member_index.borrow_mut();
247            for (idx, member) in all_members.iter().enumerate() {
248                dyn_set.insert(member.clone(), head_id.clone());
249                dyn_idx.insert(member.clone(), idx);
250            }
251        }
252
253        // Inject into compound_groups for bibliography rendering.
254        {
255            let mut groups = self.compound_groups.borrow_mut();
256            let members = groups
257                .entry(head_number)
258                .or_insert_with(|| vec![head_id.clone()]);
259            for tail in &tail_ids {
260                if !members.contains(tail) {
261                    members.push(tail.clone());
262                }
263            }
264        }
265
266        // Register dynamic set so citation_sub_label_for_ref can find members.
267        self.dynamic_compound_sets
268            .borrow_mut()
269            .insert(head_id.clone(), all_members);
270    }
271
272    /// Build a citation-local hint overlay for CSL `givenname-disambiguation-rule: by-cite`.
273    ///
274    /// Global hints remain authoritative for bibliography rendering, year-suffix ordering,
275    /// numeric state, and note-position state. This overlay only recalculates name expansion
276    /// fields for the references rendered by the current citation.
277    fn citation_scoped_by_cite_hints(
278        &self,
279        items: &[crate::reference::CitationItem],
280        config: &Config,
281    ) -> Option<HashMap<String, ProcHints>> {
282        if !Self::uses_by_cite_givenname(config) {
283            return None;
284        }
285
286        let mut scoped_hints = HashMap::new();
287        let mut scoped_bibliography = IndexMap::new();
288
289        for item in items {
290            let mut hint = self.hints.get(&item.id).cloned().unwrap_or_default();
291            hint.expand_given_names = false;
292            hint.expand_given_names_primary_only = false;
293            hint.min_names_to_show = None;
294            scoped_hints.insert(item.id.clone(), hint);
295
296            if let Some(reference) = self.bibliography.get(&item.id) {
297                scoped_bibliography.insert(item.id.clone(), reference.clone());
298            }
299        }
300
301        if scoped_bibliography.len() < 2 {
302            return Some(scoped_hints);
303        }
304
305        let local_hints =
306            Disambiguator::new(&scoped_bibliography, config, &self.locale).calculate_hints();
307
308        for item in items {
309            let Some(local) = local_hints.get(&item.id) else {
310                continue;
311            };
312            let target = scoped_hints.entry(item.id.clone()).or_default();
313            target.expand_given_names = local.expand_given_names;
314            target.expand_given_names_primary_only = local.expand_given_names_primary_only;
315            target.min_names_to_show = local.min_names_to_show;
316        }
317
318        Some(scoped_hints)
319    }
320
321    /// Return true when the active citation config requests CSL by-cite given-name expansion.
322    fn uses_by_cite_givenname(config: &Config) -> bool {
323        let disambiguate = config.effective_processing().config().disambiguate;
324
325        disambiguate
326            .as_ref()
327            .is_some_and(|d| d.add_givenname && matches!(d.givenname_rule, GivennameRule::ByCite))
328    }
329
330    /// Build the merged static + dynamic compound lookup maps for the renderer.
331    ///
332    /// When no dynamic groups exist (the common case) the static maps are returned
333    /// via references with no allocation. Owned merged maps are only constructed when
334    /// at least one dynamic group is registered.
335    fn merged_compound_data(
336        &self,
337    ) -> (
338        Option<HashMap<String, String>>,
339        Option<HashMap<String, usize>>,
340        Option<IndexMap<String, Vec<String>>>,
341    ) {
342        if self.dynamic_compound_set_by_ref.borrow().is_empty() {
343            return (None, None, None);
344        }
345        let merged_set: HashMap<String, String> = self
346            .compound_set_by_ref
347            .iter()
348            .chain(self.dynamic_compound_set_by_ref.borrow().iter())
349            .map(|(k, v)| (k.clone(), v.clone()))
350            .collect();
351        let merged_idx: HashMap<String, usize> = self
352            .compound_member_index
353            .iter()
354            .chain(self.dynamic_compound_member_index.borrow().iter())
355            .map(|(k, v)| (k.clone(), *v))
356            .collect();
357        let merged_sets: IndexMap<String, Vec<String>> = self
358            .compound_sets
359            .iter()
360            .chain(self.dynamic_compound_sets.borrow().iter())
361            .map(|(k, v)| (k.clone(), v.clone()))
362            .collect();
363        (Some(merged_set), Some(merged_idx), Some(merged_sets))
364    }
365
366    /// Render the core content of a citation, handling sorting and grouping.
367    ///
368    /// This is the main orchestration point for template rendering, compound data
369    /// resolution, and mode-specific (integral vs non-integral) formatting.
370    fn render_citation_content<F>(
371        &self,
372        citation: &Citation,
373        effective_spec: &citum_schema::CitationSpec,
374        renderer_delimiter: &str,
375        renderer_inter_delimiter: &str,
376        note_start_text_case: Option<NoteStartTextCase>,
377    ) -> Result<String, ProcessorError>
378    where
379        F: crate::render::format::OutputFormat<Output = String>,
380    {
381        // Grouped citations preserve item order (dynamic grouping was already resolved
382        // in process_citation_with_format before cited_ids was updated).
383        let sorted_items = if citation.grouped {
384            citation.items.clone()
385        } else {
386            self.sort_citation_items(citation.items.clone(), effective_spec)
387        };
388
389        // Build merged compound lookup maps (static + dynamic).
390        // Return owned maps only when dynamic groups exist; otherwise use static maps directly.
391        let (dyn_set_owned, dyn_idx_owned, dyn_sets_owned) = self.merged_compound_data();
392        let effective_set_by_ref = dyn_set_owned.as_ref().unwrap_or(&self.compound_set_by_ref);
393        let effective_member_index = dyn_idx_owned
394            .as_ref()
395            .unwrap_or(&self.compound_member_index);
396        let effective_compound_sets = dyn_sets_owned.as_ref().unwrap_or(&self.compound_sets);
397
398        let citation_config = self.get_citation_config();
399        let citation_config = match effective_spec.options.as_ref() {
400            Some(mode_options) => {
401                let mut config = citation_config.into_owned();
402                config.merge(&mode_options.to_config());
403                std::borrow::Cow::Owned(config)
404            }
405            None => citation_config,
406        };
407        let scoped_hints = self.citation_scoped_by_cite_hints(&sorted_items, &citation_config);
408        let renderer_hints = scoped_hints.as_ref().unwrap_or(&self.hints);
409        let renderer = Renderer::new(
410            RendererResources {
411                style: &self.style,
412                bibliography: &self.bibliography,
413                locale: &self.locale,
414                config: &citation_config,
415                bibliography_config: Some(self.get_bibliography_options().into_owned()),
416                first_note_by_id: Some(&self.first_note_by_id),
417            },
418            renderer_hints,
419            &self.citation_numbers,
420            CompoundRenderData {
421                set_by_ref: effective_set_by_ref,
422                member_index: effective_member_index,
423                sets: effective_compound_sets,
424            },
425            self.show_semantics,
426            self.inject_ast_indices,
427            self.abbreviation_map.as_ref(),
428        );
429        let processing = citation_config.processing.clone().unwrap_or_default();
430        let has_explicit_integral_multi_cite_delimiter = matches!(
431            citation.mode,
432            citum_schema::citation::CitationMode::Integral
433        ) && self
434            .resolve_positioned_citation_spec(citation)
435            .integral
436            .as_ref()
437            .and_then(|spec| spec.multi_cite_delimiter.as_ref())
438            .is_some();
439        let rendered_groups = if matches!(
440            processing,
441            citum_schema::options::Processing::Numeric
442                | citum_schema::options::Processing::Label(_)
443        ) {
444            renderer.render_ungrouped_citation_with_format::<F>(
445                &sorted_items,
446                effective_spec,
447                &citation.mode,
448                renderer_delimiter,
449                citation.suppress_author,
450                citation.position.as_ref(),
451                note_start_text_case,
452            )?
453        } else {
454            renderer.render_grouped_citation_with_format::<F>(
455                &sorted_items,
456                &GroupRenderParams {
457                    spec: effective_spec,
458                    mode: &citation.mode,
459                    intra_delimiter: renderer_delimiter,
460                    suppress_author: citation.suppress_author,
461                    position: citation.position.as_ref(),
462                    note_start_text_case,
463                },
464            )?
465        };
466
467        Ok(
468            if matches!(
469                citation.mode,
470                citum_schema::citation::CitationMode::Integral
471            ) && !has_explicit_integral_multi_cite_delimiter
472            {
473                join_integral_groups(rendered_groups, &self.locale)
474            } else {
475                F::default().join(rendered_groups, renderer_inter_delimiter)
476            },
477        )
478    }
479
480    /// Apply user-supplied prefix and suffix from the citation input.
481    ///
482    /// Automatically adds a trailing space to the prefix and a leading space to
483    /// the suffix if they are not already present and not empty.
484    fn apply_citation_input_affixes<F>(
485        &self,
486        citation: &Citation,
487        content: String,
488        fmt: &F,
489    ) -> String
490    where
491        F: crate::render::format::OutputFormat<Output = String>,
492    {
493        let citation_prefix = citation.prefix.as_deref().unwrap_or("");
494        let citation_suffix = citation.suffix.as_deref().unwrap_or("");
495
496        if citation_prefix.is_empty() && citation_suffix.is_empty() {
497            return content;
498        }
499
500        let formatted_prefix =
501            if !citation_prefix.is_empty() && !citation_prefix.ends_with(char::is_whitespace) {
502                format!("{citation_prefix} ")
503            } else {
504                citation_prefix.to_string()
505            };
506
507        let formatted_suffix =
508            if !citation_suffix.is_empty() && !citation_suffix.starts_with(char::is_whitespace) {
509                format!(" {citation_suffix}")
510            } else {
511                citation_suffix.to_string()
512            };
513
514        fmt.affix(&formatted_prefix, content, &formatted_suffix)
515    }
516
517    /// Apply style-defined wrapping and affixes to the rendered citation output.
518    ///
519    /// Handles `wrap` logic (inner prefixes/suffixes and punctuation) based on
520    /// the citation mode and position.
521    fn apply_spec_wrap_and_affixes<F>(
522        &self,
523        citation: &Citation,
524        effective_spec: &citum_schema::CitationSpec,
525        output: String,
526        fmt: &F,
527    ) -> String
528    where
529        F: crate::render::format::OutputFormat<Output = String>,
530    {
531        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
532        let spec_suffix = effective_spec.suffix.as_deref().unwrap_or("");
533
534        if matches!(
535            citation.mode,
536            citum_schema::citation::CitationMode::Integral
537        ) {
538            if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
539                fmt.affix(spec_prefix, output, spec_suffix)
540            } else {
541                output
542            }
543        } else if let Some(wrap) = effective_spec.wrap.as_ref() {
544            let inner_prefix = wrap.inner_prefix.as_deref().unwrap_or("");
545            let inner_suffix = wrap.inner_suffix.as_deref().unwrap_or("");
546            let inner_wrapped = if !inner_prefix.is_empty() || !inner_suffix.is_empty() {
547                fmt.inner_affix(inner_prefix, output, inner_suffix)
548            } else {
549                output
550            };
551            fmt.wrap_punctuation(&wrap.punctuation, inner_wrapped)
552        } else if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
553            fmt.affix(spec_prefix, output, spec_suffix)
554        } else {
555            output
556        }
557    }
558
559    /// Render a single citation to plain text.
560    ///
561    /// This is the primary entry point for citation processing. It handles:
562    /// 1. Looking up references in the bibliography.
563    /// 2. Annotating positions (ibid, subsequent, etc.).
564    /// 3. Resolving disambiguation (name expansion, year suffixes).
565    /// 4. Applying the style's citation template.
566    ///
567    /// Returns the formatted citation string or an error if processing fails.
568    ///
569    /// # Errors
570    ///
571    /// Returns an error when referenced items are missing or rendering fails.
572    pub fn process_citation(&self, citation: &Citation) -> Result<String, ProcessorError> {
573        self.process_citation_with_format::<crate::render::plain::PlainText>(citation)
574    }
575
576    /// Render a citation to a string using a specific output format.
577    ///
578    /// This resolves the effective citation spec for the citation's mode and
579    /// position, renders the citation body, and applies input and style affixes.
580    ///
581    /// # Errors
582    ///
583    /// Returns an error when referenced items are missing or rendering fails.
584    pub fn process_citation_with_format<F>(
585        &self,
586        citation: &Citation,
587    ) -> Result<String, ProcessorError>
588    where
589        F: crate::render::format::OutputFormat<Output = String>,
590    {
591        let fmt = F::default();
592
593        // For grouped citations, resolve the dynamic compound group BEFORE updating
594        // cited_ids with the current citation's items. This ensures the first-occurrence
595        // check in resolve_dynamic_group sees only references from prior citations.
596        if citation.grouped {
597            self.initialize_numeric_citation_numbers();
598            self.resolve_dynamic_group(citation);
599        }
600
601        self.track_cited_ids_and_init_numbers(citation);
602
603        let effective_spec = self.resolve_effective_citation_spec(citation);
604        let note_start_text_case =
605            self.sentence_initial_note_start_text_case(citation, &effective_spec);
606        let (renderer_delimiter, renderer_inter_delimiter) =
607            self.resolve_citation_delimiters(&effective_spec);
608        let content = self.render_citation_content::<F>(
609            citation,
610            &effective_spec,
611            renderer_delimiter,
612            renderer_inter_delimiter,
613            note_start_text_case,
614        )?;
615        let output = self.apply_citation_input_affixes(citation, content, &fmt);
616        let wrapped = self.apply_spec_wrap_and_affixes(citation, &effective_spec, output, &fmt);
617
618        // If the host signals that this cluster opens a sentence, capitalize
619        // the leading character of the composed output.  The markup-aware
620        // variant skips leading punctuation (e.g. an opening parenthesis) so
621        // only the first alphabetic character is affected.
622        let finalized = if citation.sentence_start {
623            let case = crate::values::text_case::resolve_text_case(
624                citum_schema::options::titles::TextCase::CapitalizeFirst,
625                Some(self.locale.locale.as_str()),
626            );
627            crate::values::text_case::apply_text_case_markup_aware(&wrapped, case)
628        } else {
629            wrapped
630        };
631
632        Ok(fmt.finish(finalized))
633    }
634
635    /// Render multiple citations in document order.
636    ///
637    /// For note-based styles, normalizes context and assigns citation positions.
638    ///
639    /// # Errors
640    ///
641    /// Returns an error when any citation in the sequence fails to render.
642    pub fn process_citations(&self, citations: &[Citation]) -> Result<Vec<String>, ProcessorError> {
643        self.process_citations_with_format::<crate::render::plain::PlainText>(citations)
644    }
645
646    /// Render multiple citations with a custom output format.
647    ///
648    /// # Errors
649    ///
650    /// Returns an error when any citation in the sequence fails to render.
651    pub fn process_citations_with_format<F>(
652        &self,
653        citations: &[Citation],
654    ) -> Result<Vec<String>, ProcessorError>
655    where
656        F: crate::render::format::OutputFormat<Output = String>,
657    {
658        let mut normalized = self.normalize_note_context(citations);
659        self.annotate_positions(&mut normalized);
660        normalized
661            .iter()
662            .map(|citation| self.process_citation_with_format::<F>(citation))
663            .collect()
664    }
665}