Skip to main content

citum_engine/processor/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation rendering orchestration.
7//!
8//! This module resolves the effective citation spec for each citation, prepares
9//! renderer delimiters and affixes. Template-level rendering, including
10//! sentence-initial note-start handling, lives in `rendering`.
11
12use super::Processor;
13use super::rendering::{CompoundRenderData, GroupRenderParams, Renderer, RendererResources};
14use crate::error::ProcessorError;
15use crate::reference::Citation;
16use citum_schema::NoteStartTextCase;
17use citum_schema::locale::{GeneralTerm, Locale, TermForm};
18use citum_schema::template::DelimiterPunctuation;
19use indexmap::IndexMap;
20use std::collections::HashMap;
21
22/// Join rendered integral (narrative) groups with localized conjunctions.
23///
24/// Uses the locale's "and" term to join groups according to document grammar
25/// rules (e.g., "A and B" or "A, B, and C" with optional serial comma).
26fn join_integral_groups(rendered_groups: Vec<String>, locale: &Locale) -> String {
27    match rendered_groups.len() {
28        0 => String::new(),
29        1 => rendered_groups.into_iter().next().unwrap_or_default(),
30        2 => {
31            let conjunction = locale
32                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
33                .unwrap_or_else(|| locale.and_term(false).to_string());
34            rendered_groups.join(&format!(" {} ", conjunction.trim()))
35        }
36        _ => {
37            let conjunction = locale
38                .resolved_general_term(&GeneralTerm::And, &TermForm::Long, None)
39                .unwrap_or_else(|| locale.and_term(false).to_string());
40            let final_delimiter = if locale.grammar_options.serial_comma {
41                format!(", {} ", conjunction.trim())
42            } else {
43                format!(" {} ", conjunction.trim())
44            };
45
46            let mut rendered_groups = rendered_groups;
47            let last = rendered_groups.pop().unwrap_or_default();
48            format!("{}{}{}", rendered_groups.join(", "), final_delimiter, last)
49        }
50    }
51}
52
53impl Processor {
54    /// Determine the text-case policy for a citation at the start of a note.
55    ///
56    /// Only applies for note-based styles when a repeated-citation position (Ibid)
57    /// is at the start of the note and has no user-supplied or spec-defined prefix.
58    fn sentence_initial_note_start_text_case(
59        &self,
60        citation: &Citation,
61        effective_spec: &citum_schema::CitationSpec,
62    ) -> Option<NoteStartTextCase> {
63        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
64        if self.is_note_style()
65            && matches!(
66                citation.position,
67                Some(
68                    citum_schema::citation::Position::Ibid
69                        | citum_schema::citation::Position::IbidWithLocator
70                )
71            )
72            && matches!(
73                citation.mode,
74                citum_schema::citation::CitationMode::NonIntegral
75            )
76            && citation.prefix.as_deref().unwrap_or("").is_empty()
77            && spec_prefix.is_empty()
78        {
79            effective_spec.note_start_text_case
80        } else {
81            None
82        }
83    }
84
85    /// Resolve the citation specification based on the citation's document position.
86    ///
87    /// Delegates to the style's citation spec to handle ibid, subsequent, or first
88    /// position overrides.
89    fn resolve_positioned_citation_spec(
90        &self,
91        citation: &Citation,
92    ) -> std::borrow::Cow<'_, citum_schema::CitationSpec> {
93        self.style.citation.as_ref().map_or_else(
94            || std::borrow::Cow::Owned(citum_schema::CitationSpec::default()),
95            |spec| spec.resolve_for_position(citation.position.as_ref()),
96        )
97    }
98
99    /// Register cited reference IDs and ensure numeric labels are initialized.
100    ///
101    /// This maintains the set of all references cited in the document and ensures
102    /// that numeric styles have a stable numbering map.
103    fn track_cited_ids_and_init_numbers(&self, citation: &Citation) {
104        self.initialize_numeric_citation_numbers();
105        let mut cited_ids = self.cited_ids.borrow_mut();
106        for item in &citation.items {
107            cited_ids.insert(item.id.clone());
108        }
109    }
110
111    /// Resolve the final effective citation spec for a given mode and position.
112    fn resolve_effective_citation_spec(&self, citation: &Citation) -> citum_schema::CitationSpec {
113        self.resolve_positioned_citation_spec(citation)
114            .into_owned()
115            .resolve_for_mode(&citation.mode)
116            .into_owned()
117    }
118
119    /// Resolve intra-item and inter-citation delimiters for a citation spec.
120    fn resolve_citation_delimiters<'a>(
121        &self,
122        effective_spec: &'a citum_schema::CitationSpec,
123    ) -> (&'a str, &'a str) {
124        let intra_delimiter = effective_spec.delimiter.as_deref().unwrap_or(", ");
125        let inter_delimiter = effective_spec
126            .multi_cite_delimiter
127            .as_deref()
128            .unwrap_or("; ");
129
130        (
131            if matches!(
132                DelimiterPunctuation::from_csl_string(intra_delimiter),
133                DelimiterPunctuation::None
134            ) {
135                ""
136            } else {
137                intra_delimiter
138            },
139            if matches!(
140                DelimiterPunctuation::from_csl_string(inter_delimiter),
141                DelimiterPunctuation::None
142            ) {
143                ""
144            } else {
145                inter_delimiter
146            },
147        )
148    }
149
150    /// Register a dynamic compound group for a `grouped` citation.
151    ///
152    /// The first item in `citation.items` is the head; subsequent items are tails.
153    /// Skips silently when:
154    /// - The style has no `compound-numeric` bibliography configuration (non-numeric style).
155    /// - A static compound set already covers the head or any tail (static sets take precedence).
156    /// - The head or any tail was previously cited in any context (first occurrence wins).
157    ///
158    /// This method must be called before `track_cited_ids_and_init_numbers` so that
159    /// `cited_ids` reflects only references from prior citations, not the current one.
160    fn resolve_dynamic_group(&self, citation: &Citation) {
161        if self.get_bibliography_options().compound_numeric.is_none() {
162            return;
163        }
164
165        if citation.items.len() < 2 {
166            return;
167        }
168
169        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
170        let head_id = &citation.items[0].id;
171        #[allow(clippy::indexing_slicing, reason = "citation.items.len() >= 2")]
172        let tail_ids: Vec<String> = citation.items[1..].iter().map(|i| i.id.clone()).collect();
173
174        // Static sets take precedence — skip if head or any tail is in a static set.
175        if self.compound_set_by_ref.contains_key(head_id) {
176            return;
177        }
178        for tail in &tail_ids {
179            if self.compound_set_by_ref.contains_key(tail.as_str()) {
180                return;
181            }
182        }
183
184        // First-occurrence wins: reject if the head or any tail was already cited in any
185        // context — whether via a prior dynamic group or a previous ungrouped citation.
186        // Because this method is called before cited_ids is updated for the current
187        // citation, `cited_ids` contains only references from earlier citations.
188        {
189            let dyn_set = self.dynamic_compound_set_by_ref.borrow();
190            let cited = self.cited_ids.borrow();
191
192            if dyn_set.contains_key(head_id.as_str()) || cited.contains(head_id.as_str()) {
193                return;
194            }
195            for tail in &tail_ids {
196                if dyn_set.contains_key(tail.as_str()) || cited.contains(tail.as_str()) {
197                    return;
198                }
199            }
200        }
201
202        let head_number = {
203            let numbers = self.citation_numbers.borrow();
204            let Some(&n) = numbers.get(head_id.as_str()) else {
205                return;
206            };
207            n
208        };
209
210        // Assign all tails the same citation number as the head.
211        {
212            let mut numbers = self.citation_numbers.borrow_mut();
213            for tail in &tail_ids {
214                numbers.insert(tail.clone(), head_number);
215            }
216        }
217
218        // Build the ordered member list for this group.
219        let all_members: Vec<String> = std::iter::once(head_id.clone())
220            .chain(tail_ids.iter().cloned())
221            .collect();
222
223        // Populate dynamic index maps so the renderer can assign sub-labels.
224        {
225            let mut dyn_set = self.dynamic_compound_set_by_ref.borrow_mut();
226            let mut dyn_idx = self.dynamic_compound_member_index.borrow_mut();
227            for (idx, member) in all_members.iter().enumerate() {
228                dyn_set.insert(member.clone(), head_id.clone());
229                dyn_idx.insert(member.clone(), idx);
230            }
231        }
232
233        // Inject into compound_groups for bibliography rendering.
234        {
235            let mut groups = self.compound_groups.borrow_mut();
236            let members = groups
237                .entry(head_number)
238                .or_insert_with(|| vec![head_id.clone()]);
239            for tail in &tail_ids {
240                if !members.contains(tail) {
241                    members.push(tail.clone());
242                }
243            }
244        }
245
246        // Register dynamic set so citation_sub_label_for_ref can find members.
247        self.dynamic_compound_sets
248            .borrow_mut()
249            .insert(head_id.clone(), all_members);
250    }
251
252    /// Build the merged static + dynamic compound lookup maps for the renderer.
253    ///
254    /// When no dynamic groups exist (the common case) the static maps are returned
255    /// via references with no allocation. Owned merged maps are only constructed when
256    /// at least one dynamic group is registered.
257    fn merged_compound_data(
258        &self,
259    ) -> (
260        Option<HashMap<String, String>>,
261        Option<HashMap<String, usize>>,
262        Option<IndexMap<String, Vec<String>>>,
263    ) {
264        if self.dynamic_compound_set_by_ref.borrow().is_empty() {
265            return (None, None, None);
266        }
267        let merged_set: HashMap<String, String> = self
268            .compound_set_by_ref
269            .iter()
270            .chain(self.dynamic_compound_set_by_ref.borrow().iter())
271            .map(|(k, v)| (k.clone(), v.clone()))
272            .collect();
273        let merged_idx: HashMap<String, usize> = self
274            .compound_member_index
275            .iter()
276            .chain(self.dynamic_compound_member_index.borrow().iter())
277            .map(|(k, v)| (k.clone(), *v))
278            .collect();
279        let merged_sets: IndexMap<String, Vec<String>> = self
280            .compound_sets
281            .iter()
282            .chain(self.dynamic_compound_sets.borrow().iter())
283            .map(|(k, v)| (k.clone(), v.clone()))
284            .collect();
285        (Some(merged_set), Some(merged_idx), Some(merged_sets))
286    }
287
288    /// Render the core content of a citation, handling sorting and grouping.
289    ///
290    /// This is the main orchestration point for template rendering, compound data
291    /// resolution, and mode-specific (integral vs non-integral) formatting.
292    fn render_citation_content<F>(
293        &self,
294        citation: &Citation,
295        effective_spec: &citum_schema::CitationSpec,
296        renderer_delimiter: &str,
297        renderer_inter_delimiter: &str,
298        note_start_text_case: Option<NoteStartTextCase>,
299    ) -> Result<String, ProcessorError>
300    where
301        F: crate::render::format::OutputFormat<Output = String>,
302    {
303        // Grouped citations preserve item order (dynamic grouping was already resolved
304        // in process_citation_with_format before cited_ids was updated).
305        let sorted_items = if citation.grouped {
306            citation.items.clone()
307        } else {
308            self.sort_citation_items(citation.items.clone(), effective_spec)
309        };
310
311        // Build merged compound lookup maps (static + dynamic).
312        // Return owned maps only when dynamic groups exist; otherwise use static maps directly.
313        let (dyn_set_owned, dyn_idx_owned, dyn_sets_owned) = self.merged_compound_data();
314        let effective_set_by_ref = dyn_set_owned.as_ref().unwrap_or(&self.compound_set_by_ref);
315        let effective_member_index = dyn_idx_owned
316            .as_ref()
317            .unwrap_or(&self.compound_member_index);
318        let effective_compound_sets = dyn_sets_owned.as_ref().unwrap_or(&self.compound_sets);
319
320        let citation_config = self.get_citation_config();
321        let renderer = Renderer::new(
322            RendererResources {
323                style: &self.style,
324                bibliography: &self.bibliography,
325                locale: &self.locale,
326                config: &citation_config,
327                bibliography_config: Some(self.get_bibliography_options().into_owned()),
328            },
329            &self.hints,
330            &self.citation_numbers,
331            CompoundRenderData {
332                set_by_ref: effective_set_by_ref,
333                member_index: effective_member_index,
334                sets: effective_compound_sets,
335            },
336            self.show_semantics,
337            self.inject_ast_indices,
338            self.abbreviation_map.as_ref(),
339        );
340        let processing = citation_config.processing.clone().unwrap_or_default();
341        let has_explicit_integral_multi_cite_delimiter = matches!(
342            citation.mode,
343            citum_schema::citation::CitationMode::Integral
344        ) && self
345            .resolve_positioned_citation_spec(citation)
346            .integral
347            .as_ref()
348            .and_then(|spec| spec.multi_cite_delimiter.as_ref())
349            .is_some();
350        let rendered_groups = if matches!(
351            processing,
352            citum_schema::options::Processing::Numeric
353                | citum_schema::options::Processing::Label(_)
354        ) {
355            renderer.render_ungrouped_citation_with_format::<F>(
356                &sorted_items,
357                effective_spec,
358                &citation.mode,
359                renderer_delimiter,
360                citation.suppress_author,
361                citation.position.as_ref(),
362                note_start_text_case,
363            )?
364        } else {
365            renderer.render_grouped_citation_with_format::<F>(
366                &sorted_items,
367                &GroupRenderParams {
368                    spec: effective_spec,
369                    mode: &citation.mode,
370                    intra_delimiter: renderer_delimiter,
371                    suppress_author: citation.suppress_author,
372                    position: citation.position.as_ref(),
373                    note_start_text_case,
374                },
375            )?
376        };
377
378        Ok(
379            if matches!(
380                citation.mode,
381                citum_schema::citation::CitationMode::Integral
382            ) && !has_explicit_integral_multi_cite_delimiter
383            {
384                join_integral_groups(rendered_groups, &self.locale)
385            } else {
386                F::default().join(rendered_groups, renderer_inter_delimiter)
387            },
388        )
389    }
390
391    /// Apply user-supplied prefix and suffix from the citation input.
392    ///
393    /// Automatically adds a trailing space to the prefix and a leading space to
394    /// the suffix if they are not already present and not empty.
395    fn apply_citation_input_affixes<F>(
396        &self,
397        citation: &Citation,
398        content: String,
399        fmt: &F,
400    ) -> String
401    where
402        F: crate::render::format::OutputFormat<Output = String>,
403    {
404        let citation_prefix = citation.prefix.as_deref().unwrap_or("");
405        let citation_suffix = citation.suffix.as_deref().unwrap_or("");
406
407        if citation_prefix.is_empty() && citation_suffix.is_empty() {
408            return content;
409        }
410
411        let formatted_prefix =
412            if !citation_prefix.is_empty() && !citation_prefix.ends_with(char::is_whitespace) {
413                format!("{citation_prefix} ")
414            } else {
415                citation_prefix.to_string()
416            };
417
418        let formatted_suffix =
419            if !citation_suffix.is_empty() && !citation_suffix.starts_with(char::is_whitespace) {
420                format!(" {citation_suffix}")
421            } else {
422                citation_suffix.to_string()
423            };
424
425        fmt.affix(&formatted_prefix, content, &formatted_suffix)
426    }
427
428    /// Apply style-defined wrapping and affixes to the rendered citation output.
429    ///
430    /// Handles `wrap` logic (inner prefixes/suffixes and punctuation) based on
431    /// the citation mode and position.
432    fn apply_spec_wrap_and_affixes<F>(
433        &self,
434        citation: &Citation,
435        effective_spec: &citum_schema::CitationSpec,
436        output: String,
437        fmt: &F,
438    ) -> String
439    where
440        F: crate::render::format::OutputFormat<Output = String>,
441    {
442        let spec_prefix = effective_spec.prefix.as_deref().unwrap_or("");
443        let spec_suffix = effective_spec.suffix.as_deref().unwrap_or("");
444
445        if matches!(
446            citation.mode,
447            citum_schema::citation::CitationMode::Integral
448        ) {
449            if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
450                fmt.affix(spec_prefix, output, spec_suffix)
451            } else {
452                output
453            }
454        } else if let Some(wrap) = effective_spec.wrap.as_ref() {
455            let inner_prefix = wrap.inner_prefix.as_deref().unwrap_or("");
456            let inner_suffix = wrap.inner_suffix.as_deref().unwrap_or("");
457            let inner_wrapped = if !inner_prefix.is_empty() || !inner_suffix.is_empty() {
458                fmt.inner_affix(inner_prefix, output, inner_suffix)
459            } else {
460                output
461            };
462            fmt.wrap_punctuation(&wrap.punctuation, inner_wrapped)
463        } else if !spec_prefix.is_empty() || !spec_suffix.is_empty() {
464            fmt.affix(spec_prefix, output, spec_suffix)
465        } else {
466            output
467        }
468    }
469
470    /// Render a single citation to plain text.
471    ///
472    /// This is the primary entry point for citation processing. It handles:
473    /// 1. Looking up references in the bibliography.
474    /// 2. Annotating positions (ibid, subsequent, etc.).
475    /// 3. Resolving disambiguation (name expansion, year suffixes).
476    /// 4. Applying the style's citation template.
477    ///
478    /// Returns the formatted citation string or an error if processing fails.
479    ///
480    /// # Errors
481    ///
482    /// Returns an error when referenced items are missing or rendering fails.
483    pub fn process_citation(&self, citation: &Citation) -> Result<String, ProcessorError> {
484        self.process_citation_with_format::<crate::render::plain::PlainText>(citation)
485    }
486
487    /// Render a citation to a string using a specific output format.
488    ///
489    /// This resolves the effective citation spec for the citation's mode and
490    /// position, renders the citation body, and applies input and style affixes.
491    ///
492    /// # Errors
493    ///
494    /// Returns an error when referenced items are missing or rendering fails.
495    pub fn process_citation_with_format<F>(
496        &self,
497        citation: &Citation,
498    ) -> Result<String, ProcessorError>
499    where
500        F: crate::render::format::OutputFormat<Output = String>,
501    {
502        let fmt = F::default();
503
504        // For grouped citations, resolve the dynamic compound group BEFORE updating
505        // cited_ids with the current citation's items. This ensures the first-occurrence
506        // check in resolve_dynamic_group sees only references from prior citations.
507        if citation.grouped {
508            self.initialize_numeric_citation_numbers();
509            self.resolve_dynamic_group(citation);
510        }
511
512        self.track_cited_ids_and_init_numbers(citation);
513
514        let effective_spec = self.resolve_effective_citation_spec(citation);
515        let note_start_text_case =
516            self.sentence_initial_note_start_text_case(citation, &effective_spec);
517        let (renderer_delimiter, renderer_inter_delimiter) =
518            self.resolve_citation_delimiters(&effective_spec);
519        let content = self.render_citation_content::<F>(
520            citation,
521            &effective_spec,
522            renderer_delimiter,
523            renderer_inter_delimiter,
524            note_start_text_case,
525        )?;
526        let output = self.apply_citation_input_affixes(citation, content, &fmt);
527        let wrapped = self.apply_spec_wrap_and_affixes(citation, &effective_spec, output, &fmt);
528
529        Ok(fmt.finish(wrapped))
530    }
531
532    /// Render multiple citations in document order.
533    ///
534    /// For note-based styles, normalizes context and assigns citation positions.
535    ///
536    /// # Errors
537    ///
538    /// Returns an error when any citation in the sequence fails to render.
539    pub fn process_citations(&self, citations: &[Citation]) -> Result<Vec<String>, ProcessorError> {
540        self.process_citations_with_format::<crate::render::plain::PlainText>(citations)
541    }
542
543    /// Render multiple citations with a custom output format.
544    ///
545    /// # Errors
546    ///
547    /// Returns an error when any citation in the sequence fails to render.
548    pub fn process_citations_with_format<F>(
549        &self,
550        citations: &[Citation],
551    ) -> Result<Vec<String>, ProcessorError>
552    where
553        F: crate::render::format::OutputFormat<Output = String>,
554    {
555        let mut normalized = self.normalize_note_context(citations);
556        self.annotate_positions(&mut normalized);
557        normalized
558            .iter()
559            .map(|citation| self.process_citation_with_format::<F>(citation))
560            .collect()
561    }
562}