Skip to main content

citum_engine/processor/bibliography/
grouping.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Grouped bibliography rendering with configurable selectors and sorting.
7
8use super::RenderedBibliographyGroup;
9use crate::api::AnnotationStyle;
10use crate::grouping::{GroupSorter, SelectorEvaluator};
11use crate::processor::Processor;
12use crate::processor::disambiguation::Disambiguator;
13use crate::processor::rendering::{CompoundRenderData, Renderer, RendererResources};
14use crate::reference::{Bibliography, Reference};
15use crate::render::ProcEntry;
16use crate::render::format::{OutputFormat, ProcEntryMetadata};
17use crate::values::{ProcHints, RenderContext, RenderOptions, format_contributors_short};
18use citum_schema::grouping::{BibliographyGroup, DisambiguationScope, GroupHeading};
19use citum_schema::options::{BibliographyPartitionHeading, BibliographySortPartitioning};
20use std::borrow::Cow;
21use std::collections::{HashMap, HashSet};
22
23impl Processor {
24    pub(super) fn resolve_group_heading(&self, heading: &GroupHeading) -> Option<String> {
25        match heading {
26            GroupHeading::Literal { literal } => Some(literal.clone()),
27            GroupHeading::Term { term, form } => self.locale.resolved_general_term(
28                term,
29                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
30                None,
31            ),
32            GroupHeading::Localized { localized } => self.resolve_localized_heading(localized),
33        }
34    }
35
36    fn resolve_localized_heading(&self, localized: &HashMap<String, String>) -> Option<String> {
37        fn language_tag(locale: &str) -> &str {
38            locale.split('-').next().unwrap_or(locale)
39        }
40
41        let mut candidates = Vec::new();
42        let mut push_candidate = |locale: &str| {
43            let candidate = locale.to_string();
44            if !candidates.contains(&candidate) {
45                candidates.push(candidate);
46            }
47        };
48
49        push_candidate(&self.locale.locale);
50        push_candidate(language_tag(&self.locale.locale));
51
52        if let Some(default_locale) = self.style.info.default_locale.as_deref() {
53            push_candidate(default_locale);
54            push_candidate(language_tag(default_locale));
55        }
56
57        push_candidate("en-US");
58        push_candidate("en");
59
60        for locale in candidates {
61            if let Some(value) = localized.get(&locale) {
62                return Some(value.clone());
63            }
64        }
65
66        localized
67            .iter()
68            .min_by(|left, right| left.0.cmp(right.0))
69            .map(|(_locale, value)| value.clone())
70    }
71
72    fn resolve_partition_heading(&self, heading: &BibliographyPartitionHeading) -> Option<String> {
73        match heading {
74            BibliographyPartitionHeading::Literal { literal } => Some(literal.clone()),
75            BibliographyPartitionHeading::Term { term, form } => self.locale.resolved_general_term(
76                term,
77                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
78                None,
79            ),
80            BibliographyPartitionHeading::Localized { localized } => {
81                self.resolve_localized_heading(localized)
82            }
83        }
84    }
85
86    fn collect_matching_group_refs<'a>(
87        &'a self,
88        bibliography: &'a [ProcEntry],
89        assigned: &HashSet<String>,
90        evaluator: &SelectorEvaluator<'_>,
91        group: &BibliographyGroup,
92    ) -> Vec<&'a Reference> {
93        bibliography
94            .iter()
95            .filter(|entry| !assigned.contains(&entry.id))
96            .filter_map(|entry| {
97                self.bibliography
98                    .get(&entry.id)
99                    .filter(|reference| evaluator.matches(reference, &group.selector))
100            })
101            .collect()
102    }
103
104    /// Returns `ProcEntry` stubs with only `id` populated, in sort order.
105    ///
106    /// Used for grouping paths that only need IDs for selector matching — avoids
107    /// the full PlainText render pass that `process_references` performs.
108    fn sorted_id_stubs(&self) -> Vec<ProcEntry> {
109        self.initialize_numeric_bibliography_numbers();
110        self.sort_references(self.bibliography.values().collect())
111            .into_iter()
112            .filter_map(|r| {
113                r.id().map(|id| ProcEntry {
114                    id: id.to_string(),
115                    template: vec![],
116                    metadata: ProcEntryMetadata::default(),
117                })
118            })
119            .collect()
120    }
121
122    fn mark_group_members_assigned(assigned: &mut HashSet<String>, references: &[&Reference]) {
123        for reference in references {
124            if let Some(id) = reference.id() {
125                assigned.insert(id.to_string());
126            }
127        }
128    }
129
130    fn build_group_local_hints(
131        &self,
132        sorted_refs: &[&Reference],
133        group: &BibliographyGroup,
134    ) -> Option<HashMap<String, ProcHints>> {
135        if !matches!(group.disambiguate, Some(DisambiguationScope::Locally)) {
136            return None;
137        }
138
139        let mut group_bibliography = Bibliography::new();
140        for reference in sorted_refs {
141            group_bibliography.insert(
142                reference.id().unwrap_or_default().to_string(),
143                (*reference).clone(),
144            );
145        }
146
147        let resolved_sort = group
148            .sort
149            .as_ref()
150            .map(citum_schema::GroupSortEntry::resolve);
151        let bibliography_config = self.get_bibliography_config();
152        let disambiguator = if let Some(sort) = resolved_sort.as_ref() {
153            Disambiguator::with_group_sort(
154                &group_bibliography,
155                &bibliography_config,
156                &self.locale,
157                sort,
158            )
159        } else {
160            Disambiguator::new(&group_bibliography, &bibliography_config, &self.locale)
161        };
162
163        Some(disambiguator.calculate_hints())
164    }
165
166    fn effective_group_style<'a>(
167        &'a self,
168        group: &'a BibliographyGroup,
169    ) -> Cow<'a, citum_schema::Style> {
170        if let Some(group_template) = &group.template {
171            let mut local_style = self.style.clone();
172            if let Some(bibliography) = local_style.bibliography.as_mut() {
173                bibliography.template = Some(group_template.clone());
174            }
175            Cow::Owned(local_style)
176        } else {
177            Cow::Borrowed(&self.style)
178        }
179    }
180
181    fn render_group_entries<F>(
182        &self,
183        _bibliography: &[ProcEntry],
184        sorted_refs: Vec<&Reference>,
185        group: &BibliographyGroup,
186        local_hints: Option<&HashMap<String, ProcHints>>,
187    ) -> Vec<ProcEntry>
188    where
189        F: OutputFormat<Output = String>,
190    {
191        // Always process entries with format F so that group components (pre_formatted=true)
192        // contain markup in the target format rather than PlainText (_..._).
193        let hints = local_hints.unwrap_or(&self.hints);
194        let effective_style = self.effective_group_style(group);
195        let bibliography_config = self.get_bibliography_config();
196        let bibliography_options = self.get_bibliography_options().into_owned();
197        let substitute = bibliography_options.subsequent_author_substitute.clone();
198        let renderer = Renderer::new(
199            RendererResources {
200                style: &effective_style,
201                bibliography: &self.bibliography,
202                locale: &self.locale,
203                config: &bibliography_config,
204                bibliography_config: Some(bibliography_options),
205            },
206            hints,
207            &self.citation_numbers,
208            CompoundRenderData {
209                set_by_ref: &self.compound_set_by_ref,
210                member_index: &self.compound_member_index,
211                sets: &self.compound_sets,
212            },
213            self.show_semantics,
214            self.inject_ast_indices,
215            self.abbreviation_map.as_ref(),
216        );
217
218        let mut entries = Vec::new();
219        let mut previous_reference: Option<&Reference> = None;
220
221        for (index, reference) in sorted_refs.into_iter().enumerate() {
222            let ref_id = reference.id().unwrap_or_default().to_string();
223            let entry_number = self
224                .citation_numbers
225                .borrow()
226                .get(&ref_id)
227                .copied()
228                .unwrap_or(index + 1);
229
230            if let Some(mut processed) =
231                renderer.process_bibliography_entry_with_format::<F>(reference, entry_number)
232            {
233                if let Some(substitute_string) = substitute.as_deref()
234                    && let Some(previous) = previous_reference
235                    && self.contributors_match(previous, reference)
236                {
237                    renderer.apply_author_substitution_with_format::<F>(
238                        &mut processed,
239                        substitute_string,
240                    );
241                }
242
243                entries.push(ProcEntry {
244                    id: ref_id,
245                    template: processed,
246                    metadata: self.extract_metadata(reference),
247                });
248                previous_reference = Some(reference);
249            }
250        }
251
252        entries
253    }
254
255    fn append_rendered_group<F>(
256        &self,
257        result: &mut String,
258        group: &BibliographyGroup,
259        entries: Vec<ProcEntry>,
260        annotations: Option<&HashMap<String, String>>,
261        annotation_style: Option<&AnnotationStyle>,
262        suppress_heading: bool,
263    ) where
264        F: OutputFormat<Output = String>,
265    {
266        if !result.is_empty() {
267            result.push_str("\n\n");
268        }
269
270        if !suppress_heading
271            && let Some(heading) = group
272                .heading
273                .as_ref()
274                .and_then(|group_heading| self.resolve_group_heading(group_heading))
275        {
276            result.push_str(&self.render_group_heading::<F>(&heading));
277        }
278
279        result.push_str(&crate::render::refs_to_string_with_format::<F>(
280            entries,
281            annotations,
282            annotation_style,
283        ));
284    }
285
286    fn append_rendered_partition<F>(
287        &self,
288        result: &mut String,
289        heading: Option<&BibliographyPartitionHeading>,
290        entries: Vec<ProcEntry>,
291        annotations: Option<&HashMap<String, String>>,
292        annotation_style: Option<&AnnotationStyle>,
293    ) where
294        F: OutputFormat<Output = String>,
295    {
296        if !result.is_empty() {
297            result.push_str("\n\n");
298        }
299
300        if let Some(heading) =
301            heading.and_then(|group_heading| self.resolve_partition_heading(group_heading))
302        {
303            result.push_str(&self.render_group_heading::<F>(&heading));
304        }
305
306        result.push_str(&crate::render::refs_to_string_with_format::<F>(
307            entries,
308            annotations,
309            annotation_style,
310        ));
311    }
312
313    pub(super) fn render_with_partition_sections<F>(
314        &self,
315        sorted_refs: Vec<&Reference>,
316        partitioning: &BibliographySortPartitioning,
317        annotations: Option<&HashMap<String, String>>,
318        annotation_style: Option<&AnnotationStyle>,
319    ) -> String
320    where
321        F: OutputFormat<Output = String>,
322    {
323        let fmt = F::default();
324        let mut result = String::new();
325
326        for (partition_key, references) in
327            crate::sort_partitioning::partition_references(sorted_refs, &self.locale, partitioning)
328        {
329            let heading = partition_key
330                .as_ref()
331                .and_then(|key| partitioning.headings.get(key));
332            let entries = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
333                references.into_iter(),
334                |reference, entry_number| {
335                    self.process_bibliography_entry_with_format::<F>(reference, entry_number)
336                },
337            ));
338            self.append_rendered_partition::<F>(
339                &mut result,
340                heading,
341                entries,
342                annotations,
343                annotation_style,
344            );
345        }
346
347        fmt.finish(result)
348    }
349
350    pub(super) fn render_with_custom_groups<F>(
351        &self,
352        all_entries: &[ProcEntry],
353        groups: &[BibliographyGroup],
354    ) -> String
355    where
356        F: OutputFormat<Output = String>,
357    {
358        let selected: HashSet<String> = all_entries.iter().map(|e| e.id.clone()).collect();
359        self.render_with_custom_groups_filtered::<F>(all_entries, groups, &selected, None, None)
360    }
361
362    pub(super) fn render_with_custom_groups_filtered<F>(
363        &self,
364        all_entries: &[ProcEntry],
365        groups: &[BibliographyGroup],
366        selected: &HashSet<String>,
367        annotations: Option<&HashMap<String, String>>,
368        annotation_style: Option<&AnnotationStyle>,
369    ) -> String
370    where
371        F: OutputFormat<Output = String>,
372    {
373        let fmt = F::default();
374        let cited_ids = self.cited_ids.borrow();
375        let evaluator = SelectorEvaluator::new(&cited_ids);
376        let sorter = GroupSorter::new(&self.locale);
377
378        let mut assigned = HashSet::new();
379        let mut result = String::new();
380
381        // First pass: collect all populated groups with their rendered entries
382        let mut populated_groups: Vec<(&BibliographyGroup, Vec<ProcEntry>)> = Vec::new();
383
384        for group in groups {
385            let matching_refs =
386                self.collect_matching_group_refs(all_entries, &assigned, &evaluator, group);
387
388            let matching_refs: Vec<&Reference> = matching_refs
389                .into_iter()
390                .filter(|r| r.id().as_deref().is_some_and(|id| selected.contains(id)))
391                .collect();
392
393            if matching_refs.is_empty() {
394                continue;
395            }
396
397            Self::mark_group_members_assigned(&mut assigned, &matching_refs);
398
399            let sorted_refs = if let Some(sort_spec) = &group.sort {
400                sorter.sort_references(matching_refs, &sort_spec.resolve())
401            } else {
402                matching_refs
403            };
404            let local_hints = self.build_group_local_hints(&sorted_refs, group);
405            let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
406                all_entries,
407                sorted_refs,
408                group,
409                local_hints.as_ref(),
410            ));
411
412            populated_groups.push((group, entries));
413        }
414
415        // Compute unassigned entries to determine if heading suppression applies
416        let unassigned_refs: Vec<&Reference> = all_entries
417            .iter()
418            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
419            .filter_map(|entry| self.bibliography.get(&entry.id))
420            .collect();
421
422        let suppress_heading = populated_groups.len() == 1 && unassigned_refs.is_empty();
423
424        // Second pass: render populated groups with optional heading suppression
425        for (group, entries) in populated_groups {
426            self.append_rendered_group::<F>(
427                &mut result,
428                group,
429                entries,
430                annotations,
431                annotation_style,
432                suppress_heading,
433            );
434        }
435
436        self.append_unassigned_entries_filtered::<F>(
437            &mut result,
438            all_entries,
439            &assigned,
440            selected,
441            annotations,
442            annotation_style,
443        );
444        fmt.finish(result)
445    }
446
447    fn append_unassigned_entries_filtered<F>(
448        &self,
449        result: &mut String,
450        bibliography: &[ProcEntry],
451        assigned: &HashSet<String>,
452        selected: &HashSet<String>,
453        annotations: Option<&HashMap<String, String>>,
454        annotation_style: Option<&AnnotationStyle>,
455    ) where
456        F: OutputFormat<Output = String>,
457    {
458        let unassigned_refs: Vec<&Reference> = bibliography
459            .iter()
460            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
461            .filter_map(|entry| self.bibliography.get(&entry.id))
462            .collect();
463
464        if unassigned_refs.is_empty() {
465            return;
466        }
467
468        // Re-process references to ensure correct author substitution and disambiguation
469        // within the unassigned subset.
470        let unassigned = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
471            unassigned_refs.into_iter(),
472            |reference, entry_number| {
473                self.process_bibliography_entry_with_format::<F>(reference, entry_number)
474            },
475        ));
476
477        if !result.is_empty() {
478            result.push_str("\n\n");
479        }
480
481        result.push_str(&crate::render::refs_to_string_with_format::<F>(
482            unassigned,
483            annotations,
484            annotation_style,
485        ));
486    }
487
488    fn render_with_legacy_grouping<F>(
489        &self,
490        bibliography: &[ProcEntry],
491        annotations: Option<&HashMap<String, String>>,
492        annotation_style: Option<&AnnotationStyle>,
493    ) -> String
494    where
495        F: OutputFormat<Output = String>,
496    {
497        let fmt = F::default();
498        let cited_ids = self.cited_ids.borrow();
499        let cited_entries: Vec<ProcEntry> = bibliography
500            .iter()
501            .filter(|entry| cited_ids.contains(&entry.id))
502            .cloned()
503            .collect();
504
505        let mut result = String::new();
506        if !cited_entries.is_empty() {
507            result.push_str(&crate::render::refs_to_string_with_format::<F>(
508                cited_entries,
509                annotations,
510                annotation_style,
511            ));
512        }
513
514        fmt.finish(result)
515    }
516
517    fn render_bibliography_for_group<F>(
518        &self,
519        group: &BibliographyGroup,
520        annotations: Option<&HashMap<String, String>>,
521        annotation_style: Option<&AnnotationStyle>,
522    ) -> String
523    where
524        F: OutputFormat<Output = String>,
525    {
526        let bibliography = self.sorted_id_stubs();
527        let fmt = F::default();
528        let cited_ids = self.cited_ids.borrow();
529        let evaluator = SelectorEvaluator::new(&cited_ids);
530        let sorter = GroupSorter::new(&self.locale);
531
532        let matching_refs =
533            self.collect_matching_group_refs(&bibliography, &HashSet::new(), &evaluator, group);
534
535        if matching_refs.is_empty() {
536            return fmt.finish(String::new());
537        }
538
539        let sorted_refs = if let Some(sort_spec) = &group.sort {
540            sorter.sort_references(matching_refs, &sort_spec.resolve())
541        } else {
542            matching_refs
543        };
544
545        let local_hints = self.build_group_local_hints(&sorted_refs, group);
546        let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
547            &bibliography,
548            sorted_refs,
549            group,
550            local_hints.as_ref(),
551        ));
552
553        fmt.finish(crate::render::refs_to_string_with_format::<F>(
554            entries,
555            annotations,
556            annotation_style,
557        ))
558    }
559
560    /// Render the bibliography with grouping for uncited (nocite) items.
561    ///
562    /// If `style.bibliography.groups` is defined, uses configurable grouping
563    /// with per-group sorting. Group selectors apply to individual references
564    /// before compound numeric rows are merged, so each rendered group only
565    /// includes the members that matched its selector. Otherwise, falls back to
566    /// hardcoded cited/uncited grouping for backward compatibility.
567    pub fn render_grouped_bibliography_with_format<F>(&self) -> String
568    where
569        F: OutputFormat<Output = String>,
570    {
571        self.render_grouped_bibliography_with_format_and_annotations::<F>(None, None)
572    }
573
574    /// Render the bibliography with grouping and annotations.
575    pub fn render_grouped_bibliography_with_format_and_annotations<F>(
576        &self,
577        annotations: Option<&HashMap<String, String>>,
578        annotation_style: Option<&AnnotationStyle>,
579    ) -> String
580    where
581        F: OutputFormat<Output = String>,
582    {
583        if let Some(groups) = self
584            .style
585            .bibliography
586            .as_ref()
587            .and_then(|bibliography| bibliography.groups.as_ref())
588        {
589            let id_stubs = self.sorted_id_stubs();
590            let selected = id_stubs
591                .iter()
592                .map(|e| e.id.clone())
593                .collect::<HashSet<_>>();
594            return self.render_with_custom_groups_filtered::<F>(
595                &id_stubs,
596                groups,
597                &selected,
598                annotations,
599                annotation_style,
600            );
601        }
602
603        let bibliography_options = self.get_bibliography_options();
604        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
605            && crate::sort_partitioning::should_render_sections(partitioning)
606        {
607            self.initialize_numeric_bibliography_numbers();
608            let sorted_refs = self.sort_references(self.bibliography.values().collect());
609            return self.render_with_partition_sections::<F>(
610                sorted_refs,
611                partitioning,
612                annotations,
613                annotation_style,
614            );
615        }
616
617        let all_entries = self.process_references().bibliography;
618        self.render_with_legacy_grouping::<F>(
619            &self.merge_compound_entries::<F>(all_entries),
620            annotations,
621            annotation_style,
622        )
623    }
624
625    /// Render frontmatter-defined bibliography groups for document output.
626    ///
627    /// This uses the same pre-merge selector semantics as
628    /// [`Self::render_grouped_bibliography_with_format`].
629    pub(crate) fn render_document_bibliography_groups<F>(
630        &self,
631        groups: &[BibliographyGroup],
632    ) -> String
633    where
634        F: OutputFormat<Output = String>,
635    {
636        let all_entries = self.sorted_id_stubs();
637        self.render_with_custom_groups::<F>(&all_entries, groups)
638    }
639
640    /// Render one bibliography block for document output.
641    ///
642    /// Returns heading and body separately so callers can insert headings
643    /// in their own output format.
644    pub(crate) fn render_document_bibliography_block<F>(
645        &self,
646        group: &BibliographyGroup,
647    ) -> RenderedBibliographyGroup
648    where
649        F: OutputFormat<Output = String>,
650    {
651        let mut headingless = group.clone();
652        let heading = headingless
653            .heading
654            .take()
655            .and_then(|group_heading| self.resolve_group_heading(&group_heading));
656        let body = self.render_bibliography_for_group::<F>(&headingless, None, None);
657
658        RenderedBibliographyGroup { heading, body }
659    }
660
661    pub(super) fn extract_metadata(&self, reference: &Reference) -> ProcEntryMetadata {
662        let bibliography_config = self.get_bibliography_config();
663        let options = RenderOptions {
664            config: &bibliography_config,
665            bibliography_config: Some(self.get_bibliography_options().into_owned()),
666            locale: &self.locale,
667            context: RenderContext::Bibliography,
668            mode: citum_schema::citation::CitationMode::NonIntegral,
669            suppress_author: false,
670            locator_raw: None,
671            ref_type: None,
672            show_semantics: self.show_semantics,
673            current_template_index: None,
674            abbreviation_map: self.abbreviation_map.as_ref(),
675        };
676
677        ProcEntryMetadata {
678            author: reference
679                .author()
680                .map(|authors| format_contributors_short(&authors.to_names_vec(), &options)),
681            year: reference
682                .csl_issued_date()
683                .map(|issued| issued.year().clone()),
684            title: reference.title().map(|title| title.to_string()),
685        }
686    }
687
688    fn render_group_heading<F>(&self, heading: &str) -> String
689    where
690        F: OutputFormat<Output = String>,
691    {
692        if std::any::type_name::<F>() == std::any::type_name::<crate::render::html::Html>() {
693            return format!("<h2>{heading}</h2>\n\n");
694        }
695
696        format!("# {heading}\n\n")
697    }
698}