Skip to main content

citum_engine/processor/bibliography/
grouping.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Grouped bibliography rendering with configurable selectors and sorting.
7
8use super::RenderedBibliographyGroup;
9use crate::api::AnnotationStyle;
10use crate::grouping::{GroupSorter, SelectorEvaluator};
11use crate::processor::Processor;
12use crate::processor::disambiguation::Disambiguator;
13use crate::processor::rendering::{CompoundRenderData, Renderer, RendererResources};
14use crate::reference::{Bibliography, Reference};
15use crate::render::ProcEntry;
16use crate::render::format::{OutputFormat, ProcEntryMetadata};
17use crate::values::{ProcHints, RenderContext, RenderOptions, format_contributors_short};
18use citum_schema::grouping::{BibliographyGroup, DisambiguationScope, GroupHeading};
19use citum_schema::options::{BibliographyPartitionHeading, BibliographySortPartitioning};
20use std::borrow::Cow;
21use std::collections::{HashMap, HashSet};
22
23impl Processor {
24    pub(super) fn resolve_group_heading(&self, heading: &GroupHeading) -> Option<String> {
25        match heading {
26            GroupHeading::Literal { literal } => Some(literal.clone()),
27            GroupHeading::Term { term, form } => self.locale.resolved_general_term(
28                term,
29                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
30                None,
31            ),
32            GroupHeading::Localized { localized } => self.resolve_localized_heading(localized),
33        }
34    }
35
36    fn resolve_localized_heading(&self, localized: &HashMap<String, String>) -> Option<String> {
37        fn language_tag(locale: &str) -> &str {
38            locale.split('-').next().unwrap_or(locale)
39        }
40
41        let mut candidates = Vec::new();
42        let mut push_candidate = |locale: &str| {
43            let candidate = locale.to_string();
44            if !candidates.contains(&candidate) {
45                candidates.push(candidate);
46            }
47        };
48
49        push_candidate(&self.locale.locale);
50        push_candidate(language_tag(&self.locale.locale));
51
52        if let Some(default_locale) = self.style.info.default_locale.as_deref() {
53            push_candidate(default_locale);
54            push_candidate(language_tag(default_locale));
55        }
56
57        push_candidate("en-US");
58        push_candidate("en");
59
60        for locale in candidates {
61            if let Some(value) = localized.get(&locale) {
62                return Some(value.clone());
63            }
64        }
65
66        localized
67            .iter()
68            .min_by(|left, right| left.0.cmp(right.0))
69            .map(|(_locale, value)| value.clone())
70    }
71
72    fn resolve_partition_heading(&self, heading: &BibliographyPartitionHeading) -> Option<String> {
73        match heading {
74            BibliographyPartitionHeading::Literal { literal } => Some(literal.clone()),
75            BibliographyPartitionHeading::Term { term, form } => self.locale.resolved_general_term(
76                term,
77                &form.clone().unwrap_or(citum_schema::locale::TermForm::Long),
78                None,
79            ),
80            BibliographyPartitionHeading::Localized { localized } => {
81                self.resolve_localized_heading(localized)
82            }
83        }
84    }
85
86    fn collect_matching_group_refs<'a>(
87        &'a self,
88        bibliography: &'a [ProcEntry],
89        assigned: &HashSet<String>,
90        evaluator: &SelectorEvaluator<'_>,
91        group: &BibliographyGroup,
92    ) -> Vec<&'a Reference> {
93        bibliography
94            .iter()
95            .filter(|entry| !assigned.contains(&entry.id))
96            .filter_map(|entry| {
97                self.bibliography
98                    .get(&entry.id)
99                    .filter(|reference| evaluator.matches(reference, &group.selector))
100            })
101            .collect()
102    }
103
104    fn mark_group_members_assigned(assigned: &mut HashSet<String>, references: &[&Reference]) {
105        for reference in references {
106            if let Some(id) = reference.id() {
107                assigned.insert(id.to_string());
108            }
109        }
110    }
111
112    fn build_group_local_hints(
113        &self,
114        sorted_refs: &[&Reference],
115        group: &BibliographyGroup,
116    ) -> Option<HashMap<String, ProcHints>> {
117        if !matches!(group.disambiguate, Some(DisambiguationScope::Locally)) {
118            return None;
119        }
120
121        let mut group_bibliography = Bibliography::new();
122        for reference in sorted_refs {
123            group_bibliography.insert(
124                reference.id().unwrap_or_default().to_string(),
125                (*reference).clone(),
126            );
127        }
128
129        let resolved_sort = group
130            .sort
131            .as_ref()
132            .map(citum_schema::GroupSortEntry::resolve);
133        let bibliography_config = self.get_bibliography_config();
134        let disambiguator = if let Some(sort) = resolved_sort.as_ref() {
135            Disambiguator::with_group_sort(
136                &group_bibliography,
137                &bibliography_config,
138                &self.locale,
139                sort,
140            )
141        } else {
142            Disambiguator::new(&group_bibliography, &bibliography_config, &self.locale)
143        };
144
145        Some(disambiguator.calculate_hints())
146    }
147
148    fn effective_group_style<'a>(
149        &'a self,
150        group: &'a BibliographyGroup,
151    ) -> Cow<'a, citum_schema::Style> {
152        if let Some(group_template) = &group.template {
153            let mut local_style = self.style.clone();
154            if let Some(bibliography) = local_style.bibliography.as_mut() {
155                bibliography.template = Some(group_template.clone());
156            }
157            Cow::Owned(local_style)
158        } else {
159            Cow::Borrowed(&self.style)
160        }
161    }
162
163    fn render_group_entries<F>(
164        &self,
165        bibliography: &[ProcEntry],
166        sorted_refs: Vec<&Reference>,
167        group: &BibliographyGroup,
168        local_hints: Option<&HashMap<String, ProcHints>>,
169    ) -> Vec<ProcEntry>
170    where
171        F: OutputFormat<Output = String>,
172    {
173        if local_hints.is_none() && group.template.is_none() {
174            return sorted_refs
175                .into_iter()
176                .filter_map(|reference| {
177                    let id = reference.id()?;
178                    bibliography.iter().find(|entry| entry.id == id).cloned()
179                })
180                .collect();
181        }
182
183        let hints = local_hints.unwrap_or(&self.hints);
184        let effective_style = self.effective_group_style(group);
185        let bibliography_config = self.get_bibliography_config();
186        let bibliography_options = self.get_bibliography_options().into_owned();
187        let substitute = bibliography_options.subsequent_author_substitute.clone();
188        let renderer = Renderer::new(
189            RendererResources {
190                style: &effective_style,
191                bibliography: &self.bibliography,
192                locale: &self.locale,
193                config: &bibliography_config,
194                bibliography_config: Some(bibliography_options),
195            },
196            hints,
197            &self.citation_numbers,
198            CompoundRenderData {
199                set_by_ref: &self.compound_set_by_ref,
200                member_index: &self.compound_member_index,
201                sets: &self.compound_sets,
202            },
203            self.show_semantics,
204            self.inject_ast_indices,
205            self.abbreviation_map.as_ref(),
206        );
207
208        let mut entries = Vec::new();
209        let mut previous_reference: Option<&Reference> = None;
210
211        for (index, reference) in sorted_refs.into_iter().enumerate() {
212            let ref_id = reference.id().unwrap_or_default().to_string();
213            let entry_number = self
214                .citation_numbers
215                .borrow()
216                .get(&ref_id)
217                .copied()
218                .unwrap_or(index + 1);
219
220            if let Some(mut processed) =
221                renderer.process_bibliography_entry(reference, entry_number)
222            {
223                if let Some(substitute_string) = substitute.as_deref()
224                    && let Some(previous) = previous_reference
225                    && self.contributors_match(previous, reference)
226                {
227                    renderer.apply_author_substitution_with_format::<F>(
228                        &mut processed,
229                        substitute_string,
230                    );
231                }
232
233                entries.push(ProcEntry {
234                    id: ref_id,
235                    template: processed,
236                    metadata: self.extract_metadata(reference),
237                });
238                previous_reference = Some(reference);
239            }
240        }
241
242        entries
243    }
244
245    fn append_rendered_group<F>(
246        &self,
247        result: &mut String,
248        group: &BibliographyGroup,
249        entries: Vec<ProcEntry>,
250        annotations: Option<&HashMap<String, String>>,
251        annotation_style: Option<&AnnotationStyle>,
252        suppress_heading: bool,
253    ) where
254        F: OutputFormat<Output = String>,
255    {
256        if !result.is_empty() {
257            result.push_str("\n\n");
258        }
259
260        if !suppress_heading
261            && let Some(heading) = group
262                .heading
263                .as_ref()
264                .and_then(|group_heading| self.resolve_group_heading(group_heading))
265        {
266            result.push_str(&self.render_group_heading::<F>(&heading));
267        }
268
269        result.push_str(&crate::render::refs_to_string_with_format::<F>(
270            entries,
271            annotations,
272            annotation_style,
273        ));
274    }
275
276    fn append_rendered_partition<F>(
277        &self,
278        result: &mut String,
279        heading: Option<&BibliographyPartitionHeading>,
280        entries: Vec<ProcEntry>,
281        annotations: Option<&HashMap<String, String>>,
282        annotation_style: Option<&AnnotationStyle>,
283    ) where
284        F: OutputFormat<Output = String>,
285    {
286        if !result.is_empty() {
287            result.push_str("\n\n");
288        }
289
290        if let Some(heading) =
291            heading.and_then(|group_heading| self.resolve_partition_heading(group_heading))
292        {
293            result.push_str(&self.render_group_heading::<F>(&heading));
294        }
295
296        result.push_str(&crate::render::refs_to_string_with_format::<F>(
297            entries,
298            annotations,
299            annotation_style,
300        ));
301    }
302
303    pub(super) fn render_with_partition_sections<F>(
304        &self,
305        sorted_refs: Vec<&Reference>,
306        partitioning: &BibliographySortPartitioning,
307        annotations: Option<&HashMap<String, String>>,
308        annotation_style: Option<&AnnotationStyle>,
309    ) -> String
310    where
311        F: OutputFormat<Output = String>,
312    {
313        let fmt = F::default();
314        let mut result = String::new();
315
316        for (partition_key, references) in
317            crate::sort_partitioning::partition_references(sorted_refs, &self.locale, partitioning)
318        {
319            let heading = partition_key
320                .as_ref()
321                .and_then(|key| partitioning.headings.get(key));
322            let entries = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
323                references.into_iter(),
324                |reference, entry_number| {
325                    self.process_bibliography_entry_with_format::<F>(reference, entry_number)
326                },
327            ));
328            self.append_rendered_partition::<F>(
329                &mut result,
330                heading,
331                entries,
332                annotations,
333                annotation_style,
334            );
335        }
336
337        fmt.finish(result)
338    }
339
340    pub(super) fn render_with_custom_groups<F>(
341        &self,
342        all_entries: &[ProcEntry],
343        groups: &[BibliographyGroup],
344    ) -> String
345    where
346        F: OutputFormat<Output = String>,
347    {
348        let selected: HashSet<String> = all_entries.iter().map(|e| e.id.clone()).collect();
349        self.render_with_custom_groups_filtered::<F>(all_entries, groups, &selected, None, None)
350    }
351
352    pub(super) fn render_with_custom_groups_filtered<F>(
353        &self,
354        all_entries: &[ProcEntry],
355        groups: &[BibliographyGroup],
356        selected: &HashSet<String>,
357        annotations: Option<&HashMap<String, String>>,
358        annotation_style: Option<&AnnotationStyle>,
359    ) -> String
360    where
361        F: OutputFormat<Output = String>,
362    {
363        let fmt = F::default();
364        let cited_ids = self.cited_ids.borrow();
365        let evaluator = SelectorEvaluator::new(&cited_ids);
366        let sorter = GroupSorter::new(&self.locale);
367
368        let mut assigned = HashSet::new();
369        let mut result = String::new();
370
371        // First pass: collect all populated groups with their rendered entries
372        let mut populated_groups: Vec<(&BibliographyGroup, Vec<ProcEntry>)> = Vec::new();
373
374        for group in groups {
375            let matching_refs =
376                self.collect_matching_group_refs(all_entries, &assigned, &evaluator, group);
377
378            let matching_refs: Vec<&Reference> = matching_refs
379                .into_iter()
380                .filter(|r| r.id().as_deref().is_some_and(|id| selected.contains(id)))
381                .collect();
382
383            if matching_refs.is_empty() {
384                continue;
385            }
386
387            Self::mark_group_members_assigned(&mut assigned, &matching_refs);
388
389            let sorted_refs = if let Some(sort_spec) = &group.sort {
390                sorter.sort_references(matching_refs, &sort_spec.resolve())
391            } else {
392                matching_refs
393            };
394            let local_hints = self.build_group_local_hints(&sorted_refs, group);
395            let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
396                all_entries,
397                sorted_refs,
398                group,
399                local_hints.as_ref(),
400            ));
401
402            populated_groups.push((group, entries));
403        }
404
405        // Compute unassigned entries to determine if heading suppression applies
406        let unassigned_refs: Vec<&Reference> = all_entries
407            .iter()
408            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
409            .filter_map(|entry| self.bibliography.get(&entry.id))
410            .collect();
411
412        let suppress_heading = populated_groups.len() == 1 && unassigned_refs.is_empty();
413
414        // Second pass: render populated groups with optional heading suppression
415        for (group, entries) in populated_groups {
416            self.append_rendered_group::<F>(
417                &mut result,
418                group,
419                entries,
420                annotations,
421                annotation_style,
422                suppress_heading,
423            );
424        }
425
426        self.append_unassigned_entries_filtered::<F>(
427            &mut result,
428            all_entries,
429            &assigned,
430            selected,
431            annotations,
432            annotation_style,
433        );
434        fmt.finish(result)
435    }
436
437    fn append_unassigned_entries_filtered<F>(
438        &self,
439        result: &mut String,
440        bibliography: &[ProcEntry],
441        assigned: &HashSet<String>,
442        selected: &HashSet<String>,
443        annotations: Option<&HashMap<String, String>>,
444        annotation_style: Option<&AnnotationStyle>,
445    ) where
446        F: OutputFormat<Output = String>,
447    {
448        let unassigned_refs: Vec<&Reference> = bibliography
449            .iter()
450            .filter(|entry| !assigned.contains(&entry.id) && selected.contains(&entry.id))
451            .filter_map(|entry| self.bibliography.get(&entry.id))
452            .collect();
453
454        if unassigned_refs.is_empty() {
455            return;
456        }
457
458        // Re-process references to ensure correct author substitution and disambiguation
459        // within the unassigned subset.
460        let unassigned = self.merge_compound_entries::<F>(self.process_sorted_refs::<_, F>(
461            unassigned_refs.into_iter(),
462            |reference, entry_number| {
463                self.process_bibliography_entry_with_format::<F>(reference, entry_number)
464            },
465        ));
466
467        if !result.is_empty() {
468            result.push_str("\n\n");
469        }
470
471        result.push_str(&crate::render::refs_to_string_with_format::<F>(
472            unassigned,
473            annotations,
474            annotation_style,
475        ));
476    }
477
478    fn render_with_legacy_grouping<F>(
479        &self,
480        bibliography: &[ProcEntry],
481        annotations: Option<&HashMap<String, String>>,
482        annotation_style: Option<&AnnotationStyle>,
483    ) -> String
484    where
485        F: OutputFormat<Output = String>,
486    {
487        let fmt = F::default();
488        let cited_ids = self.cited_ids.borrow();
489        let cited_entries: Vec<ProcEntry> = bibliography
490            .iter()
491            .filter(|entry| cited_ids.contains(&entry.id))
492            .cloned()
493            .collect();
494
495        let mut result = String::new();
496        if !cited_entries.is_empty() {
497            result.push_str(&crate::render::refs_to_string_with_format::<F>(
498                cited_entries,
499                annotations,
500                annotation_style,
501            ));
502        }
503
504        fmt.finish(result)
505    }
506
507    fn render_bibliography_for_group<F>(
508        &self,
509        group: &BibliographyGroup,
510        annotations: Option<&HashMap<String, String>>,
511        annotation_style: Option<&AnnotationStyle>,
512    ) -> String
513    where
514        F: OutputFormat<Output = String>,
515    {
516        let processed = self.process_references();
517        let bibliography = processed.bibliography;
518        let fmt = F::default();
519        let cited_ids = self.cited_ids.borrow();
520        let evaluator = SelectorEvaluator::new(&cited_ids);
521        let sorter = GroupSorter::new(&self.locale);
522
523        let matching_refs =
524            self.collect_matching_group_refs(&bibliography, &HashSet::new(), &evaluator, group);
525
526        if matching_refs.is_empty() {
527            return fmt.finish(String::new());
528        }
529
530        let sorted_refs = if let Some(sort_spec) = &group.sort {
531            sorter.sort_references(matching_refs, &sort_spec.resolve())
532        } else {
533            matching_refs
534        };
535
536        let local_hints = self.build_group_local_hints(&sorted_refs, group);
537        let entries = self.merge_compound_entries::<F>(self.render_group_entries::<F>(
538            &bibliography,
539            sorted_refs,
540            group,
541            local_hints.as_ref(),
542        ));
543
544        fmt.finish(crate::render::refs_to_string_with_format::<F>(
545            entries,
546            annotations,
547            annotation_style,
548        ))
549    }
550
551    /// Render the bibliography with grouping for uncited (nocite) items.
552    ///
553    /// If `style.bibliography.groups` is defined, uses configurable grouping
554    /// with per-group sorting. Group selectors apply to individual references
555    /// before compound numeric rows are merged, so each rendered group only
556    /// includes the members that matched its selector. Otherwise, falls back to
557    /// hardcoded cited/uncited grouping for backward compatibility.
558    pub fn render_grouped_bibliography_with_format<F>(&self) -> String
559    where
560        F: OutputFormat<Output = String>,
561    {
562        self.render_grouped_bibliography_with_format_and_annotations::<F>(None, None)
563    }
564
565    /// Render the bibliography with grouping and annotations.
566    pub fn render_grouped_bibliography_with_format_and_annotations<F>(
567        &self,
568        annotations: Option<&HashMap<String, String>>,
569        annotation_style: Option<&AnnotationStyle>,
570    ) -> String
571    where
572        F: OutputFormat<Output = String>,
573    {
574        let processed = self.process_references();
575        let all_entries = processed.bibliography;
576
577        if let Some(groups) = self
578            .style
579            .bibliography
580            .as_ref()
581            .and_then(|bibliography| bibliography.groups.as_ref())
582        {
583            return self.render_with_custom_groups_filtered::<F>(
584                &all_entries,
585                groups,
586                &all_entries
587                    .iter()
588                    .map(|e| e.id.clone())
589                    .collect::<HashSet<_>>(),
590                annotations,
591                annotation_style,
592            );
593        }
594
595        let bibliography_options = self.get_bibliography_options();
596        if let Some(partitioning) = bibliography_options.sort_partitioning.as_ref()
597            && crate::sort_partitioning::should_render_sections(partitioning)
598        {
599            self.initialize_numeric_bibliography_numbers();
600            let sorted_refs = self.sort_references(self.bibliography.values().collect());
601            return self.render_with_partition_sections::<F>(
602                sorted_refs,
603                partitioning,
604                annotations,
605                annotation_style,
606            );
607        }
608
609        self.render_with_legacy_grouping::<F>(
610            &self.merge_compound_entries::<F>(all_entries),
611            annotations,
612            annotation_style,
613        )
614    }
615
616    /// Render frontmatter-defined bibliography groups for document output.
617    ///
618    /// This uses the same pre-merge selector semantics as
619    /// [`Self::render_grouped_bibliography_with_format`].
620    pub(crate) fn render_document_bibliography_groups<F>(
621        &self,
622        groups: &[BibliographyGroup],
623    ) -> String
624    where
625        F: OutputFormat<Output = String>,
626    {
627        let all_entries = self.process_references().bibliography;
628        self.render_with_custom_groups::<F>(&all_entries, groups)
629    }
630
631    /// Render one bibliography block for document output.
632    ///
633    /// Returns heading and body separately so callers can insert headings
634    /// in their own output format.
635    pub(crate) fn render_document_bibliography_block<F>(
636        &self,
637        group: &BibliographyGroup,
638    ) -> RenderedBibliographyGroup
639    where
640        F: OutputFormat<Output = String>,
641    {
642        let mut headingless = group.clone();
643        let heading = headingless
644            .heading
645            .take()
646            .and_then(|group_heading| self.resolve_group_heading(&group_heading));
647        let body = self.render_bibliography_for_group::<F>(&headingless, None, None);
648
649        RenderedBibliographyGroup { heading, body }
650    }
651
652    pub(super) fn extract_metadata(&self, reference: &Reference) -> ProcEntryMetadata {
653        let bibliography_config = self.get_bibliography_config();
654        let options = RenderOptions {
655            config: &bibliography_config,
656            bibliography_config: Some(self.get_bibliography_options().into_owned()),
657            locale: &self.locale,
658            context: RenderContext::Bibliography,
659            mode: citum_schema::citation::CitationMode::NonIntegral,
660            suppress_author: false,
661            locator_raw: None,
662            ref_type: None,
663            show_semantics: self.show_semantics,
664            current_template_index: None,
665            abbreviation_map: self.abbreviation_map.as_ref(),
666        };
667
668        ProcEntryMetadata {
669            author: reference
670                .author()
671                .map(|authors| format_contributors_short(&authors.to_names_vec(), &options)),
672            year: reference
673                .csl_issued_date()
674                .map(|issued| issued.year().clone()),
675            title: reference.title().map(|title| title.to_string()),
676        }
677    }
678
679    fn render_group_heading<F>(&self, heading: &str) -> String
680    where
681        F: OutputFormat<Output = String>,
682    {
683        if std::any::type_name::<F>() == std::any::type_name::<crate::render::html::Html>() {
684            return format!("<h2>{heading}</h2>\n\n");
685        }
686
687        format!("# {heading}\n\n")
688    }
689}