Skip to main content

citum_engine/processor/bibliography/
grouping.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Grouped bibliography rendering with configurable selectors and sorting.
7
8use super::RenderedBibliographyGroup;
9use crate::api::AnnotationStyle;
10use crate::grouping::{GroupSorter, SelectorEvaluator};
11use crate::processor::Processor;
12use crate::processor::disambiguation::Disambiguator;
13use crate::processor::rendering::{CompoundRenderData, Renderer, RendererResources};
14use crate::reference::{Bibliography, Reference};
15use crate::render::ProcEntry;
16use crate::render::format::{OutputFormat, ProcEntryMetadata};
17use crate::values::{ProcHints, RenderContext, RenderOptions, format_contributors_short};
18use citum_schema::grouping::{BibliographyGroup, DisambiguationScope, GroupHeading};
19use citum_schema::options::{BibliographyPartitionHeading, BibliographySortPartitioning};
20use std::borrow::Cow;
21use std::collections::{HashMap, HashSet};
22
23impl Processor {
24    /// Resolve a localized or literal group heading.
25    pub(super) fn resolve_group_heading(&self, heading: &GroupHeading) -> Option<String> {
26        match heading {
27            GroupHeading::Literal { literal } => Some(literal.clone()),
28            GroupHeading::Term { term, form } => self.locale.resolved_general_term(
29                term,
30                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
31                None,
32            ),
33            GroupHeading::Localized { localized } => self.resolve_localized_heading(localized),
34        }
35    }
36
37    /// Resolve a localized heading map based on the processor locale.
38    ///
39    /// Matches in order:
40    /// 1. Exact locale (e.g., "en-GB")
41    /// 2. Primary language (e.g., "en")
42    /// 3. Style default locale
43    /// 4. en-US fallback
44    /// 5. First alphabetically defined key
45    fn resolve_localized_heading(&self, localized: &HashMap<String, String>) -> Option<String> {
46        fn language_tag(locale: &str) -> &str {
47            locale.split('-').next().unwrap_or(locale)
48        }
49
50        let mut candidates = Vec::new();
51        let mut push_candidate = |locale: &str| {
52            let candidate = locale.to_string();
53            if !candidates.contains(&candidate) {
54                candidates.push(candidate);
55            }
56        };
57
58        push_candidate(&self.locale.locale);
59        push_candidate(language_tag(&self.locale.locale));
60
61        if let Some(default_locale) = self.style.info.default_locale.as_deref() {
62            push_candidate(default_locale);
63            push_candidate(language_tag(default_locale));
64        }
65
66        push_candidate("en-US");
67        push_candidate("en");
68
69        for locale in candidates {
70            if let Some(value) = localized.get(&locale) {
71                return Some(value.clone());
72            }
73        }
74
75        localized
76            .iter()
77            .min_by(|left, right| left.0.cmp(right.0))
78            .map(|(_locale, value)| value.clone())
79    }
80
81    /// Resolve a bibliography partition heading.
82    fn resolve_partition_heading(&self, heading: &BibliographyPartitionHeading) -> Option<String> {
83        match heading {
84            BibliographyPartitionHeading::Literal { literal } => Some(literal.clone()),
85            BibliographyPartitionHeading::Term { term, form } => self.locale.resolved_general_term(
86                term,
87                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
88                None,
89            ),
90            BibliographyPartitionHeading::Localized { localized } => {
91                self.resolve_localized_heading(localized)
92            }
93        }
94    }
95
96    /// Find unassigned bibliography entries that match a group's selector.
97    fn collect_matching_group_refs<'a>(
98        &'a self,
99        bibliography: &'a [ProcEntry],
100        assigned: &HashSet<String>,
101        evaluator: &SelectorEvaluator<'_>,
102        group: &BibliographyGroup,
103    ) -> Vec<&'a Reference> {
104        bibliography
105            .iter()
106            .filter(|entry| !assigned.contains(&entry.id))
107            .filter_map(|entry| {
108                self.bibliography
109                    .get(&entry.id)
110                    .filter(|reference| evaluator.matches(reference, &group.selector))
111            })
112            .collect()
113    }
114
115    /// Returns `ProcEntry` stubs with only `id` populated, in sort order.
116    ///
117    /// Used for grouping paths that only need IDs for selector matching — avoids
118    /// the full PlainText render pass that `process_references` performs.
119    fn sorted_id_stubs(&self) -> Vec<ProcEntry> {
120        self.initialize_numeric_bibliography_numbers();
121        self.sort_references(self.bibliography.values().collect())
122            .into_iter()
123            .filter_map(|r| {
124                r.id().map(|id| ProcEntry {
125                    id: id.to_string(),
126                    template: vec![],
127                    metadata: ProcEntryMetadata::default(),
128                })
129            })
130            .collect()
131    }
132
133    /// Mark references as assigned to a bibliography group.
134    fn mark_group_members_assigned(assigned: &mut HashSet<String>, references: &[&Reference]) {
135        for reference in references {
136            if let Some(id) = reference.id() {
137                assigned.insert(id.to_string());
138            }
139        }
140    }
141
142    /// Calculate disambiguation hints locally within a bibliography group.
143    ///
144    /// Only calculates hints if the group specifies local disambiguation scope.
145    fn build_group_local_hints(
146        &self,
147        sorted_refs: &[&Reference],
148        group: &BibliographyGroup,
149    ) -> Option<HashMap<String, ProcHints>> {
150        if !matches!(group.disambiguate, Some(DisambiguationScope::Locally)) {
151            return None;
152        }
153
154        let mut group_bibliography = Bibliography::new();
155        for reference in sorted_refs {
156            group_bibliography.insert(
157                reference.id().unwrap_or_default().to_string(),
158                (*reference).clone(),
159            );
160        }
161
162        let resolved_sort = group
163            .sort
164            .as_ref()
165            .map(citum_schema::GroupSortEntry::resolve);
166        let bibliography_config = self.get_bibliography_config();
167        let disambiguator = if let Some(sort) = resolved_sort.as_ref() {
168            Disambiguator::with_group_sort(
169                &group_bibliography,
170                &bibliography_config,
171                &self.locale,
172                sort,
173            )
174        } else {
175            Disambiguator::new(&group_bibliography, &bibliography_config, &self.locale)
176        };
177
178        Some(disambiguator.calculate_hints())
179    }
180
181    /// Resolve the effective style to use for a bibliography group.
182    fn effective_group_style<'a>(
183        &'a self,
184        group: &'a BibliographyGroup,
185    ) -> Cow<'a, citum_schema::Style> {
186        if let Some(group_template) = &group.template {
187            let mut local_style = self.style.clone();
188            if let Some(bibliography) = local_style.bibliography.as_mut() {
189                bibliography.template = Some(group_template.clone());
190            }
191            Cow::Owned(local_style)
192        } else {
193            Cow::Borrowed(&self.style)
194        }
195    }
196
197    /// Render bibliography entries for a specific group.
198    fn render_group_entries<F>(
199        &self,
200        _bibliography: &[ProcEntry],
201        sorted_refs: Vec<&Reference>,
202        group: &BibliographyGroup,
203        local_hints: Option<&HashMap<String, ProcHints>>,
204    ) -> Vec<ProcEntry>
205    where
206        F: OutputFormat<Output = String>,
207    {
208        // Always process entries with format F so that group components (pre_formatted=true)
209        // contain markup in the target format rather than PlainText (_..._).
210        let hints = local_hints.unwrap_or(&self.hints);
211        let effective_style = self.effective_group_style(group);
212        let bibliography_config = self.get_bibliography_config();
213        let bibliography_options = self.get_bibliography_options().into_owned();
214        let substitute = bibliography_options.subsequent_author_substitute.clone();
215        let renderer = Renderer::new(
216            RendererResources {
217                style: &effective_style,
218                bibliography: &self.bibliography,
219                locale: &self.locale,
220                config: &bibliography_config,
221                bibliography_config: Some(bibliography_options),
222            },
223            hints,
224            &self.citation_numbers,
225            CompoundRenderData {
226                set_by_ref: &self.compound_set_by_ref,
227                member_index: &self.compound_member_index,
228                sets: &self.compound_sets,
229            },
230            self.show_semantics,
231            self.inject_ast_indices,
232            self.abbreviation_map.as_ref(),
233        );
234
235        let mut entries = Vec::new();
236        let mut previous_reference: Option<&Reference> = None;
237
238        for (index, reference) in sorted_refs.into_iter().enumerate() {
239            let ref_id = reference.id().unwrap_or_default().to_string();
240            let entry_number = self
241                .citation_numbers
242                .borrow()
243                .get(&ref_id)
244                .copied()
245                .unwrap_or(index + 1);
246
247            if let Some(mut processed) =
248                renderer.process_bibliography_entry_with_format::<F>(reference, entry_number)
249            {
250                if let Some(substitute_string) = substitute.as_deref()
251                    && let Some(previous) = previous_reference
252                    && self.contributors_match(previous, reference)
253                {
254                    renderer.apply_author_substitution_with_format::<F>(
255                        &mut processed,
256                        substitute_string,
257                    );
258                }
259
260                entries.push(ProcEntry {
261                    id: ref_id,
262                    template: processed,
263                    metadata: self.extract_metadata(reference),
264                });
265                previous_reference = Some(reference);
266            }
267        }
268
269        entries
270    }
271
272    /// Append a rendered bibliography group to the output string.
273    fn append_rendered_group<F>(
274        &self,
275        result: &mut String,
276        group: &BibliographyGroup,
277        entries: Vec<ProcEntry>,
278        annotations: Option<&HashMap<String, String>>,
279        annotation_style: Option<&AnnotationStyle>,
280        suppress_heading: bool,
281    ) where
282        F: OutputFormat<Output = String>,
283    {
284        if !result.is_empty() {
285            result.push_str("\n\n");
286        }
287
288        if !suppress_heading
289            && let Some(heading) = group
290                .heading
291                .as_ref()
292                .and_then(|group_heading| self.resolve_group_heading(group_heading))
293        {
294            result.push_str(&self.render_group_heading::<F>(&heading));
295        }
296
297        result.push_str(&crate::render::refs_to_string_with_format::<F>(
298            entries,
299            annotations,
300            annotation_style,
301        ));
302    }
303
304    /// Append a rendered bibliography partition to the output string.
305    fn append_rendered_partition<F>(
306        &self,
307        result: &mut String,
308        heading: Option<&BibliographyPartitionHeading>,
309        entries: Vec<ProcEntry>,
310        annotations: Option<&HashMap<String, String>>,
311        annotation_style: Option<&AnnotationStyle>,
312    ) where
313        F: OutputFormat<Output = String>,
314    {
315        if !result.is_empty() {
316            result.push_str("\n\n");
317        }
318
319        if let Some(heading) =
320            heading.and_then(|group_heading| self.resolve_partition_heading(group_heading))
321        {
322            result.push_str(&self.render_group_heading::<F>(&heading));
323        }
324
325        result.push_str(&crate::render::refs_to_string_with_format::<F>(
326            entries,
327            annotations,
328            annotation_style,
329        ));
330    }
331
332    /// Orchestrate the rendering of automatic bibliography partitions with headings.
333    pub(super) fn render_with_partition_sections<F>(
334        &self,
335        sorted_refs: Vec<&Reference>,
336        partitioning: &BibliographySortPartitioning,
337        annotations: Option<&HashMap<String, String>>,
338        annotation_style: Option<&AnnotationStyle>,
339    ) -> String
340    where
341        F: OutputFormat<Output = String>,
342    {
343        let fmt = F::default();
344        let mut result = String::new();
345
346        for (partition_key, references) in
347            crate::sort_partitioning::partition_references(sorted_refs, &self.locale, partitioning)
348        {
349            let heading = partition_key
350                .as_ref()
351                .and_then(|key| partitioning.headings.get(key));
352            let entries = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
353                references.into_iter(),
354                |reference, entry_number| {
355                    self.process_bibliography_entry_with_format::<F>(reference, entry_number)
356                },
357            ));
358            self.append_rendered_partition::<F>(
359                &mut result,
360                heading,
361                entries,
362                annotations,
363                annotation_style,
364            );
365        }
366
367        fmt.finish(result)
368    }
369
370    /// Render all entries using custom bibliography grouping.
371    pub(super) fn render_with_custom_groups<F>(
372        &self,
373        all_entries: &[ProcEntry],
374        groups: &[BibliographyGroup],
375    ) -> String
376    where
377        F: OutputFormat<Output = String>,
378    {
379        let selected: HashSet<String> = all_entries.iter().map(|e| e.id.clone()).collect();
380        self.render_with_custom_groups_filtered::<F>(all_entries, groups, &selected, None, None)
381    }
382
383    /// Render a filtered subset of entries using custom bibliography grouping.
384    ///
385    /// This uses a two-pass grouping strategy:
386    /// 1. Collect and render all populated groups.
387    /// 2. Determine if heading suppression applies (only one group populated).
388    /// 3. Append groups and any remaining unassigned entries.
389    pub(super) fn render_with_custom_groups_filtered<F>(
390        &self,
391        all_entries: &[ProcEntry],
392        groups: &[BibliographyGroup],
393        selected: &HashSet<String>,
394        annotations: Option<&HashMap<String, String>>,
395        annotation_style: Option<&AnnotationStyle>,
396    ) -> String
397    where
398        F: OutputFormat<Output = String>,
399    {
400        let fmt = F::default();
401        let cited_ids = self.cited_ids.borrow();
402        let evaluator = SelectorEvaluator::new(&cited_ids);
403        let sorter = GroupSorter::new(&self.locale);
404
405        let mut assigned = HashSet::new();
406        let mut result = String::new();
407
408        // First pass: collect all populated groups with their rendered entries
409        let mut populated_groups: Vec<(&BibliographyGroup, Vec<ProcEntry>)> = Vec::new();
410
411        for group in groups {
412            let matching_refs =
413                self.collect_matching_group_refs(all_entries, &assigned, &evaluator, group);
414
415            let matching_refs: Vec<&Reference> = matching_refs
416                .into_iter()
417                .filter(|r| r.id().as_deref().is_some_and(|id| selected.contains(id)))
418                .collect();
419
420            if matching_refs.is_empty() {
421                continue;
422            }
423
424            Self::mark_group_members_assigned(&mut assigned, &matching_refs);
425
426            let sorted_refs = if let Some(sort_spec) = &group.sort {
427                sorter.sort_references(matching_refs, &sort_spec.resolve())
428            } else {
429                matching_refs
430            };
431            let local_hints = self.build_group_local_hints(&sorted_refs, group);
432            let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
433                all_entries,
434                sorted_refs,
435                group,
436                local_hints.as_ref(),
437            ));
438
439            populated_groups.push((group, entries));
440        }
441
442        // Compute unassigned entries to determine if heading suppression applies
443        let unassigned_refs: Vec<&Reference> = all_entries
444            .iter()
445            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
446            .filter_map(|entry| self.bibliography.get(&entry.id))
447            .collect();
448
449        let suppress_heading = populated_groups.len() == 1 && unassigned_refs.is_empty();
450
451        // Second pass: render populated groups with optional heading suppression
452        for (group, entries) in populated_groups {
453            self.append_rendered_group::<F>(
454                &mut result,
455                group,
456                entries,
457                annotations,
458                annotation_style,
459                suppress_heading,
460            );
461        }
462
463        self.append_unassigned_entries_filtered::<F>(
464            &mut result,
465            all_entries,
466            &assigned,
467            selected,
468            annotations,
469            annotation_style,
470        );
471        fmt.finish(result)
472    }
473
474    /// Append unassigned bibliography entries to the output string.
475    fn append_unassigned_entries_filtered<F>(
476        &self,
477        result: &mut String,
478        bibliography: &[ProcEntry],
479        assigned: &HashSet<String>,
480        selected: &HashSet<String>,
481        annotations: Option<&HashMap<String, String>>,
482        annotation_style: Option<&AnnotationStyle>,
483    ) where
484        F: OutputFormat<Output = String>,
485    {
486        let unassigned_refs: Vec<&Reference> = bibliography
487            .iter()
488            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
489            .filter_map(|entry| self.bibliography.get(&entry.id))
490            .collect();
491
492        if unassigned_refs.is_empty() {
493            return;
494        }
495
496        // Re-process references to ensure correct author substitution and disambiguation
497        // within the unassigned subset.
498        let unassigned = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
499            unassigned_refs.into_iter(),
500            |reference, entry_number| {
501                self.process_bibliography_entry_with_format::<F>(reference, entry_number)
502            },
503        ));
504
505        if !result.is_empty() {
506            result.push_str("\n\n");
507        }
508
509        result.push_str(&crate::render::refs_to_string_with_format::<F>(
510            unassigned,
511            annotations,
512            annotation_style,
513        ));
514    }
515
516    /// Render bibliography using legacy (cited/uncited) grouping.
517    fn render_with_legacy_grouping<F>(
518        &self,
519        bibliography: &[ProcEntry],
520        annotations: Option<&HashMap<String, String>>,
521        annotation_style: Option<&AnnotationStyle>,
522    ) -> String
523    where
524        F: OutputFormat<Output = String>,
525    {
526        let fmt = F::default();
527        let cited_ids = self.cited_ids.borrow();
528        let cited_entries: Vec<ProcEntry> = bibliography
529            .iter()
530            .filter(|entry| cited_ids.contains(&entry.id))
531            .cloned()
532            .collect();
533
534        let mut result = String::new();
535        if !cited_entries.is_empty() {
536            result.push_str(&crate::render::refs_to_string_with_format::<F>(
537                cited_entries,
538                annotations,
539                annotation_style,
540            ));
541        }
542
543        fmt.finish(result)
544    }
545
546    /// Render a standalone bibliography block for a group.
547    fn render_bibliography_for_group<F>(
548        &self,
549        group: &BibliographyGroup,
550        annotations: Option<&HashMap<String, String>>,
551        annotation_style: Option<&AnnotationStyle>,
552    ) -> String
553    where
554        F: OutputFormat<Output = String>,
555    {
556        let bibliography = self.sorted_id_stubs();
557        let fmt = F::default();
558        let cited_ids = self.cited_ids.borrow();
559        let evaluator = SelectorEvaluator::new(&cited_ids);
560        let sorter = GroupSorter::new(&self.locale);
561
562        let matching_refs =
563            self.collect_matching_group_refs(&bibliography, &HashSet::new(), &evaluator, group);
564
565        if matching_refs.is_empty() {
566            return fmt.finish(String::new());
567        }
568
569        let sorted_refs = if let Some(sort_spec) = &group.sort {
570            sorter.sort_references(matching_refs, &sort_spec.resolve())
571        } else {
572            matching_refs
573        };
574
575        let local_hints = self.build_group_local_hints(&sorted_refs, group);
576        let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
577            &bibliography,
578            sorted_refs,
579            group,
580            local_hints.as_ref(),
581        ));
582
583        fmt.finish(crate::render::refs_to_string_with_format::<F>(
584            entries,
585            annotations,
586            annotation_style,
587        ))
588    }
589
590    /// Render the bibliography with grouping for uncited (nocite) items.
591    ///
592    /// If `style.bibliography.groups` is defined, uses configurable grouping
593    /// with per-group sorting. Group selectors apply to individual references
594    /// before compound numeric rows are merged, so each rendered group only
595    /// includes the members that matched its selector. Otherwise, falls back to
596    /// hardcoded cited/uncited grouping for backward compatibility.
597    pub fn render_grouped_bibliography_with_format<F>(&self) -> String
598    where
599        F: OutputFormat<Output = String>,
600    {
601        self.render_grouped_bibliography_with_format_and_annotations::<F>(None, None)
602    }
603
604    /// Render the bibliography with grouping and annotations.
605    pub fn render_grouped_bibliography_with_format_and_annotations<F>(
606        &self,
607        annotations: Option<&HashMap<String, String>>,
608        annotation_style: Option<&AnnotationStyle>,
609    ) -> String
610    where
611        F: OutputFormat<Output = String>,
612    {
613        if let Some(groups) = self
614            .style
615            .bibliography
616            .as_ref()
617            .and_then(|bibliography| bibliography.groups.as_ref())
618        {
619            let id_stubs = self.sorted_id_stubs();
620            let selected = id_stubs
621                .iter()
622                .map(|e| e.id.clone())
623                .collect::<HashSet<_>>();
624            return self.render_with_custom_groups_filtered::<F>(
625                &id_stubs,
626                groups,
627                &selected,
628                annotations,
629                annotation_style,
630            );
631        }
632
633        let bibliography_options = self.get_bibliography_options();
634        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
635            && crate::sort_partitioning::should_render_sections(partitioning)
636        {
637            self.initialize_numeric_bibliography_numbers();
638            let sorted_refs = self.sort_references(self.bibliography.values().collect());
639            return self.render_with_partition_sections::<F>(
640                sorted_refs,
641                partitioning,
642                annotations,
643                annotation_style,
644            );
645        }
646
647        let all_entries = self.process_references().bibliography;
648        self.render_with_legacy_grouping::<F>(
649            &self.merge_compound_entries::<F>(all_entries),
650            annotations,
651            annotation_style,
652        )
653    }
654
655    /// Render frontmatter-defined bibliography groups for document output.
656    ///
657    /// This uses the same pre-merge selector semantics as
658    /// [`Self::render_grouped_bibliography_with_format`].
659    pub(crate) fn render_document_bibliography_groups<F>(
660        &self,
661        groups: &[BibliographyGroup],
662    ) -> String
663    where
664        F: OutputFormat<Output = String>,
665    {
666        let all_entries = self.sorted_id_stubs();
667        self.render_with_custom_groups::<F>(&all_entries, groups)
668    }
669
670    /// Render one bibliography block for document output.
671    ///
672    /// Returns heading and body separately so callers can insert headings
673    /// in their own output format.
674    pub(crate) fn render_document_bibliography_block<F>(
675        &self,
676        group: &BibliographyGroup,
677    ) -> RenderedBibliographyGroup
678    where
679        F: OutputFormat<Output = String>,
680    {
681        let mut headingless = group.clone();
682        let heading = headingless
683            .heading
684            .take()
685            .and_then(|group_heading| self.resolve_group_heading(&group_heading));
686        let body = self.render_bibliography_for_group::<F>(&headingless, None, None);
687
688        RenderedBibliographyGroup { heading, body }
689    }
690
691    pub(super) fn extract_metadata(&self, reference: &Reference) -> ProcEntryMetadata {
692        let bibliography_config = self.get_bibliography_config();
693        let options = RenderOptions {
694            config: &bibliography_config,
695            bibliography_config: Some(self.get_bibliography_options().into_owned()),
696            locale: &self.locale,
697            context: RenderContext::Bibliography,
698            mode: citum_schema::citation::CitationMode::NonIntegral,
699            suppress_author: false,
700            locator_raw: None,
701            ref_type: None,
702            show_semantics: self.show_semantics,
703            current_template_index: None,
704            abbreviation_map: self.abbreviation_map.as_ref(),
705        };
706
707        ProcEntryMetadata {
708            author: reference
709                .author()
710                .map(|authors| format_contributors_short(&authors.to_names_vec(), &options)),
711            year: reference
712                .csl_issued_date()
713                .map(|issued| issued.year().clone()),
714            title: reference.title().map(|title| title.to_string()),
715        }
716    }
717
718    fn render_group_heading<F>(&self, heading: &str) -> String
719    where
720        F: OutputFormat<Output = String>,
721    {
722        if std::any::type_name::<F>() == std::any::type_name::<crate::render::html::Html>() {
723            return format!("<h2>{heading}</h2>\n\n");
724        }
725
726        format!("# {heading}\n\n")
727    }
728}