Skip to main content

citum_engine/processor/
disambiguation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6use crate::reference::{Bibliography, Reference};
7use crate::values::ProcHints;
8use citum_schema::options::{Config, GivennameRule};
9use std::collections::{HashMap, HashSet};
10use std::fmt::Write as _;
11
12use crate::grouping::GroupSorter;
13use citum_schema::grouping::GroupSort;
14use citum_schema::locale::Locale;
15
16/// Handles disambiguation logic for author-date citations.
17///
18/// Disambiguation resolves ambiguities when multiple references produce
19/// identical rendered strings. The processor applies strategies in cascade:
20///
21/// 1. **Name expansion** (`disambiguate-add-names`): If et-al is triggered
22///    in the base citation, try expanding the author list to differentiate
23///    references with same first author and year.
24///
25/// 2. **Given name expansion** (`disambiguate-add-givenname`): Add initials
26///    or full given names to author list to resolve remaining collisions
27///    (e.g., "Smith, John" vs "Smith, Jane").
28///
29/// 3. **Combined expansion**: Try showing both more names AND given names
30///    to maximize differentiation before falling back to year suffix.
31///
32/// 4. **Year suffix fallback** (`disambiguate-add-year-suffix`): If above
33///    strategies fail, append letters (a, b, c, ..., z, aa, ab, ...) to
34///    the year. Ordering follows the resolved per-group sort when one is
35///    configured, otherwise lowercase reference title order.
36///
37/// ## Algorithm Overview
38///
39/// - References are grouped by their base collision key
40///   (for example, `smith:2020` or a label key)
41/// - For each group with 2+ collisions, strategies are applied in order
42/// - Once a strategy resolves ambiguity, higher-priority strategies skip
43/// - Year suffix assignment is deterministic from the resolved per-group sort
44///
45/// ## Output
46///
47/// Returns `ProcHints` for each reference containing:
48/// - `group_index`: Position within collision group (1-indexed)
49/// - `group_length`: Total references in collision group
50/// - `group_key`: Author-year key used for grouping
51/// - `disamb_condition`: Whether year suffix should be applied
52/// - `expand_given_names`: Whether to show given names/initials
53/// - `min_names_to_show`: Minimum author count for name expansion
54pub struct Disambiguator<'a> {
55    bibliography: &'a Bibliography,
56    config: &'a Config,
57    locale: &'a Locale,
58    group_sort: Option<&'a GroupSort>,
59}
60
61#[derive(Clone, Copy, Default)]
62struct DisambiguationFlags {
63    add_names: bool,
64    add_givenname: bool,
65    year_suffix: bool,
66    is_label_mode: bool,
67    primary_givenname_only: bool,
68}
69
70struct GroupDisambiguationContext<'a> {
71    key: &'a str,
72    group: &'a [&'a Reference],
73    flags: DisambiguationFlags,
74    author_group_lengths: &'a HashMap<String, usize>,
75    cache: &'a ReferenceCache,
76}
77
78#[derive(Clone, Copy)]
79struct HintPlan<'a> {
80    key: &'a str,
81    expand_given_names: bool,
82    expand_given_names_primary_only: bool,
83    min_names_to_show: Option<usize>,
84    disamb_condition: bool,
85}
86
87#[derive(Clone, Copy)]
88enum HintOrder {
89    Encountered,
90    GroupSorted,
91}
92
93type ReferenceCache = HashMap<usize, CachedReferenceData>;
94
95struct CachedReferenceData {
96    author_key: String,
97    group_key: String,
98    names: Vec<crate::reference::FlatName>,
99    title_key: Option<String>,
100}
101
102impl<'a> Disambiguator<'a> {
103    /// Creates a disambiguator that uses the default title-based fallback order.
104    #[must_use]
105    pub fn new(bibliography: &'a Bibliography, config: &'a Config, locale: &'a Locale) -> Self {
106        Self {
107            bibliography,
108            config,
109            locale,
110            group_sort: None,
111        }
112    }
113
114    /// Creates a disambiguator with an explicit per-group sort specification.
115    #[must_use]
116    pub fn with_group_sort(
117        bibliography: &'a Bibliography,
118        config: &'a Config,
119        locale: &'a Locale,
120        group_sort: &'a GroupSort,
121    ) -> Self {
122        Self {
123            bibliography,
124            config,
125            locale,
126            group_sort: Some(group_sort),
127        }
128    }
129
130    /// Calculate processing hints for disambiguation across all references.
131    ///
132    /// This is a single-pass algorithm that:
133    /// 1. Groups references by their base collision key
134    /// 2. For each group with multiple references, applies disambiguation
135    ///    strategies in cascade order
136    /// 3. Returns pre-calculated hints for the renderer
137    ///
138    /// ## Cascade Order
139    ///
140    /// For each collision group:
141    /// - Try expanding author list (et-al → full names)
142    /// - Try adding given names/initials
143    /// - Try combined approach (more names + given names)
144    /// - Fall back to year suffix (a, b, c, ...)
145    ///
146    /// ## Performance
147    ///
148    /// - O(n) for grouping, where n = number of references
149    /// - O(g²) for collision detection within each group g
150    /// - Total: O(n + Σ(g²)) where typical g << n
151    ///
152    /// ## Example
153    ///
154    /// Input bibliography:
155    /// - Smith, John (2020) - "Article A"
156    /// - Smith, Jane (2020) - "Article B"
157    /// - Brown, Tom (2020) - "Article C"
158    ///
159    /// Output hints:
160    /// - "item-1": { `group_key`: "smith:2020", `expand_given_names`: true, `group_length`: 2 }
161    /// - "item-2": { `group_key`: "smith:2020", `expand_given_names`: true, `group_length`: 2 }
162    /// - "item-3": { `group_key`: "brown:2020" } (no collision)
163    #[must_use]
164    pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
165        let mut hints = HashMap::new();
166        let refs: Vec<&Reference> = self.bibliography.values().collect();
167        let flags = self.disambiguation_flags();
168        // Always populate title_key when year-suffix disambiguation is active so that
169        // sort_group_for_year_suffix can use it as a stable tie-breaker regardless of
170        // whether a group_sort is configured.
171        let needs_title_key = flags.year_suffix;
172        let cache = self.build_reference_cache(&refs, needs_title_key);
173        let grouped = self.group_references(&refs, &cache);
174        let author_group_lengths = self.author_group_lengths(&refs, &cache);
175
176        for (key, group) in grouped {
177            self.apply_group_hints(
178                &mut hints,
179                GroupDisambiguationContext {
180                    key: &key,
181                    group: &group,
182                    flags,
183                    author_group_lengths: &author_group_lengths,
184                    cache: &cache,
185                },
186            );
187        }
188
189        hints
190    }
191
192    /// Resolves disambiguation configuration from the processor config.
193    fn disambiguation_flags(&self) -> DisambiguationFlags {
194        let disamb_config = match self.config.processing.as_ref() {
195            Some(processing) => processing.config().disambiguate,
196            None => {
197                citum_schema::options::Processing::AuthorDate
198                    .config()
199                    .disambiguate
200            }
201        };
202
203        DisambiguationFlags {
204            add_names: disamb_config.as_ref().is_some_and(|d| d.names),
205            add_givenname: disamb_config.as_ref().is_some_and(|d| d.add_givenname),
206            year_suffix: disamb_config.as_ref().is_some_and(|d| d.year_suffix),
207            is_label_mode: self
208                .config
209                .processing
210                .as_ref()
211                .is_some_and(|p| matches!(p, citum_schema::options::Processing::Label(_))),
212            primary_givenname_only: disamb_config.as_ref().is_some_and(|d| {
213                matches!(
214                    d.givenname_rule,
215                    GivennameRule::PrimaryName | GivennameRule::PrimaryNameWithInitials
216                )
217            }),
218        }
219    }
220
221    /// Builds an internal cache of reference data (author keys, group keys, titles)
222    /// to avoid redundant string generation during disambiguation.
223    fn build_reference_cache(&self, refs: &[&Reference], needs_title_key: bool) -> ReferenceCache {
224        let mut cache = HashMap::with_capacity(refs.len());
225
226        for reference in refs {
227            let names = reference.author().map_or_else(Vec::new, |authors| {
228                self.render_name_for_disambiguation(&authors)
229            });
230            let author_key = self.build_author_key(&names);
231            let group_key = self.build_group_key(reference, &author_key);
232            let title_key = needs_title_key.then(|| {
233                reference
234                    .title()
235                    .map(|title| title.to_string())
236                    .unwrap_or_default()
237                    .to_lowercase()
238            });
239
240            cache.insert(
241                Self::reference_cache_key(reference),
242                CachedReferenceData {
243                    author_key,
244                    group_key,
245                    names,
246                    title_key,
247                },
248            );
249        }
250
251        cache
252    }
253
254    /// Calculates how many references in `refs` share the same `author_key`.
255    /// The returned map is keyed only by `author_key` and is later used when
256    /// populating `ProcHints::group_length`, rather than representing the size
257    /// of a per-`group_key` collision group.
258    fn author_group_lengths(
259        &self,
260        refs: &[&Reference],
261        cache: &ReferenceCache,
262    ) -> HashMap<String, usize> {
263        let mut author_group_lengths = HashMap::new();
264        for reference in refs {
265            let author_key = &self.reference_data(reference, cache).author_key;
266            if !author_key.is_empty() {
267                *author_group_lengths.entry(author_key.clone()).or_insert(0) += 1;
268            }
269        }
270        author_group_lengths
271    }
272
273    /// Orchestrates the disambiguation cascade for a single collision group.
274    /// It attempts strategies in increasing order of disruptiveness (expansion -> year suffix).
275    fn apply_group_hints(
276        &self,
277        hints: &mut HashMap<String, ProcHints>,
278        context: GroupDisambiguationContext<'_>,
279    ) {
280        if self.try_apply_singleton_hint(hints, &context) {
281            return;
282        }
283
284        if self.try_apply_label_mode_year_suffix(hints, &context) {
285            return;
286        }
287
288        if self.try_apply_name_partitions(hints, &context) {
289            return;
290        }
291
292        if self.try_apply_givenname_resolution(hints, &context) {
293            return;
294        }
295
296        if self.try_apply_combined_resolution(hints, &context) {
297            return;
298        }
299
300        self.apply_year_suffix(hints, &context, false, None);
301    }
302
303    /// Optimization for groups with only one reference (no collision).
304    fn try_apply_singleton_hint(
305        &self,
306        hints: &mut HashMap<String, ProcHints>,
307        context: &GroupDisambiguationContext<'_>,
308    ) -> bool {
309        if context.group.len() != 1 {
310            return false;
311        }
312
313        #[allow(clippy::indexing_slicing, reason = "context.group.len() == 1")]
314        let head = context.group[0];
315
316        self.insert_hint(
317            hints,
318            head,
319            context.author_group_lengths,
320            context.cache,
321            ProcHints::default(),
322        );
323        true
324    }
325
326    /// Handles year-suffix disambiguation specifically for label-based styles (e.g. [Knu84a]).
327    fn try_apply_label_mode_year_suffix(
328        &self,
329        hints: &mut HashMap<String, ProcHints>,
330        context: &GroupDisambiguationContext<'_>,
331    ) -> bool {
332        if !(context.flags.is_label_mode && context.flags.year_suffix) {
333            return false;
334        }
335
336        self.apply_year_suffix(hints, context, false, None);
337        true
338    }
339
340    /// Attempts to resolve collisions by expanding the number of names shown (et al. expansion).
341    fn try_apply_name_partitions(
342        &self,
343        hints: &mut HashMap<String, ProcHints>,
344        context: &GroupDisambiguationContext<'_>,
345    ) -> bool {
346        if !context.flags.add_names {
347            return false;
348        }
349
350        let Some((min_names_to_show, partitions)) =
351            self.partition_by_name_expansion(context.group, context.cache)
352        else {
353            return false;
354        };
355
356        for subgroup in partitions.values() {
357            if subgroup.len() == 1 {
358                self.apply_resolution(hints, subgroup, context, false, Some(min_names_to_show));
359                continue;
360            }
361
362            if context.flags.add_givenname
363                && self.check_givenname_resolution(
364                    subgroup,
365                    context.cache,
366                    Some(min_names_to_show),
367                    false,
368                )
369            {
370                // Under primary-name rules, secondary given names are not rendered.
371                // If the full-expansion check passes but primary-only does not, the
372                // subgroup must fall back to year-suffix (with expansion retained).
373                if context.flags.primary_givenname_only
374                    && !self.check_givenname_resolution(
375                        subgroup,
376                        context.cache,
377                        Some(min_names_to_show),
378                        true,
379                    )
380                {
381                    self.apply_year_suffix_for_group(
382                        hints,
383                        subgroup,
384                        context,
385                        true,
386                        Some(min_names_to_show),
387                    );
388                } else {
389                    self.apply_resolution(hints, subgroup, context, true, Some(min_names_to_show));
390                }
391                continue;
392            }
393
394            self.apply_year_suffix_for_group(
395                hints,
396                subgroup,
397                context,
398                false,
399                Some(min_names_to_show),
400            );
401        }
402
403        true
404    }
405
406    /// Attempts to resolve collisions by adding given names or initials.
407    fn try_apply_givenname_resolution(
408        &self,
409        hints: &mut HashMap<String, ProcHints>,
410        context: &GroupDisambiguationContext<'_>,
411    ) -> bool {
412        // Use full-expansion keys to determine whether givenname expansion can help at all.
413        // (With n=1, the full and primary-only keys are equivalent — both inspect only the
414        // primary author — so no separate primary-only check is needed here.)
415        if !(context.flags.add_givenname
416            && self.check_givenname_resolution(context.group, context.cache, None, false))
417        {
418            return false;
419        }
420
421        self.apply_resolution(hints, context.group, context, true, None);
422        true
423    }
424
425    /// Attempts to resolve collisions by using both more names AND given name expansion.
426    ///
427    /// When `primary_givenname_only` is active, the renderer only shows given names for
428    /// the first author. `find_combined_resolution` uses full-expansion keys to find the
429    /// minimum name count that would work in theory; this function then verifies whether
430    /// that resolution also holds under the restricted primary-only rendering.  If not,
431    /// the group cannot be resolved by expansion alone and falls back to year-suffix while
432    /// retaining the et-al expansion that was found.
433    fn try_apply_combined_resolution(
434        &self,
435        hints: &mut HashMap<String, ProcHints>,
436        context: &GroupDisambiguationContext<'_>,
437    ) -> bool {
438        if !context.flags.add_names || !context.flags.add_givenname {
439            return false;
440        }
441
442        let Some(min_names_to_show) = self.find_combined_resolution(context.group, context.cache)
443        else {
444            return false;
445        };
446
447        // When primary-name expansion is active, confirm the resolution still holds when
448        // only the first author's given name is rendered.  If it does not, the expansion
449        // alone is insufficient; emit year-suffix while preserving the et-al expansion.
450        if context.flags.primary_givenname_only
451            && !self.check_givenname_resolution(
452                context.group,
453                context.cache,
454                Some(min_names_to_show),
455                true,
456            )
457        {
458            self.apply_year_suffix_for_group(
459                hints,
460                context.group,
461                context,
462                true,
463                Some(min_names_to_show),
464            );
465            return true;
466        }
467
468        self.apply_resolution(hints, context.group, context, true, Some(min_names_to_show));
469        true
470    }
471
472    /// Searches for the minimum number of names that, when combined with given name expansion,
473    /// resolves the collision group.
474    fn find_combined_resolution(
475        &self,
476        group: &[&Reference],
477        cache: &ReferenceCache,
478    ) -> Option<usize> {
479        let max_authors = group
480            .iter()
481            .map(|reference| self.reference_data(reference, cache).names.len())
482            .max()
483            .unwrap_or(0);
484
485        // Use full-expansion keys (primary_only: false) to find the minimum name count.
486        // The caller is responsible for verifying the result under primary-only rendering
487        // when primary_givenname_only is active.
488        (2..=max_authors).find(|&n| self.check_givenname_resolution(group, cache, Some(n), false))
489    }
490
491    /// Finalizes a successful disambiguation strategy by inserting the calculated hints into the map.
492    fn apply_resolution(
493        &self,
494        hints: &mut HashMap<String, ProcHints>,
495        group: &[&Reference],
496        context: &GroupDisambiguationContext<'_>,
497        expand_given_names: bool,
498        min_names_to_show: Option<usize>,
499    ) {
500        self.insert_group_hints(
501            hints,
502            group,
503            context.author_group_lengths,
504            HintPlan {
505                key: context.key,
506                expand_given_names,
507                expand_given_names_primary_only: context.flags.primary_givenname_only,
508                min_names_to_show,
509                disamb_condition: false,
510            },
511            HintOrder::Encountered,
512            context.cache,
513        );
514    }
515
516    /// Inserts a single hint into the hints map, ensuring the author group length is correctly set.
517    fn insert_hint(
518        &self,
519        hints: &mut HashMap<String, ProcHints>,
520        reference: &Reference,
521        author_group_lengths: &HashMap<String, usize>,
522        cache: &ReferenceCache,
523        mut hint: ProcHints,
524    ) {
525        hint.group_length = self
526            .author_group_length(reference, author_group_lengths, cache)
527            .unwrap_or(1);
528        hints.insert(reference.id().unwrap_or_default().to_string(), hint);
529    }
530
531    /// Retrieves the number of references sharing the author key for a specific reference.
532    fn author_group_length(
533        &self,
534        reference: &Reference,
535        author_group_lengths: &HashMap<String, usize>,
536        cache: &ReferenceCache,
537    ) -> Option<usize> {
538        let author_key = &self.reference_data(reference, cache).author_key;
539        author_group_lengths.get(author_key).copied()
540    }
541
542    /// Applies year-suffix disambiguation to the entire group in the context.
543    fn apply_year_suffix(
544        &self,
545        hints: &mut HashMap<String, ProcHints>,
546        context: &GroupDisambiguationContext<'_>,
547        expand_given_names: bool,
548        min_names_to_show: Option<usize>,
549    ) {
550        self.apply_year_suffix_for_group(
551            hints,
552            context.group,
553            context,
554            expand_given_names,
555            min_names_to_show,
556        );
557    }
558
559    /// Applies year-suffix disambiguation to a specific (sub)group of references.
560    fn apply_year_suffix_for_group(
561        &self,
562        hints: &mut HashMap<String, ProcHints>,
563        group: &[&Reference],
564        context: &GroupDisambiguationContext<'_>,
565        expand_given_names: bool,
566        min_names_to_show: Option<usize>,
567    ) {
568        self.insert_group_hints(
569            hints,
570            group,
571            context.author_group_lengths,
572            HintPlan {
573                key: context.key,
574                expand_given_names,
575                expand_given_names_primary_only: context.flags.primary_givenname_only,
576                min_names_to_show,
577                disamb_condition: true,
578            },
579            HintOrder::GroupSorted,
580            context.cache,
581        );
582    }
583
584    /// Iterates through a group of references and inserts hints according to the specified order.
585    fn insert_group_hints(
586        &self,
587        hints: &mut HashMap<String, ProcHints>,
588        group: &[&Reference],
589        author_group_lengths: &HashMap<String, usize>,
590        plan: HintPlan<'_>,
591        order: HintOrder,
592        cache: &ReferenceCache,
593    ) {
594        match order {
595            HintOrder::Encountered => {
596                for (idx, reference) in group.iter().enumerate() {
597                    self.insert_planned_hint(
598                        hints,
599                        reference,
600                        author_group_lengths,
601                        plan,
602                        idx + 1,
603                        cache,
604                    );
605                }
606            }
607            HintOrder::GroupSorted => {
608                for (idx, reference) in self
609                    .sort_group_for_year_suffix(group, cache)
610                    .iter()
611                    .enumerate()
612                {
613                    self.insert_planned_hint(
614                        hints,
615                        reference,
616                        author_group_lengths,
617                        plan,
618                        idx + 1,
619                        cache,
620                    );
621                }
622            }
623        }
624    }
625
626    /// Helper to insert a hint with common planned fields (key, expand flags, group index).
627    fn insert_planned_hint(
628        &self,
629        hints: &mut HashMap<String, ProcHints>,
630        reference: &Reference,
631        author_group_lengths: &HashMap<String, usize>,
632        plan: HintPlan<'_>,
633        group_index: usize,
634        cache: &ReferenceCache,
635    ) {
636        self.insert_hint(
637            hints,
638            reference,
639            author_group_lengths,
640            cache,
641            ProcHints {
642                disamb_condition: plan.disamb_condition,
643                group_index,
644                group_key: plan.key.to_string(),
645                expand_given_names: plan.expand_given_names,
646                expand_given_names_primary_only: plan.expand_given_names_primary_only,
647                min_names_to_show: plan.min_names_to_show,
648                ..Default::default()
649            },
650        );
651    }
652
653    /// Sorts a collision group to determine the deterministic order for year-suffix assignment.
654    /// It uses the provided group sort specification or falls back to title-based sorting.
655    fn sort_group_for_year_suffix<'b>(
656        &self,
657        group: &[&'b Reference],
658        cache: &ReferenceCache,
659    ) -> Vec<&'b Reference> {
660        if let Some(sort_spec) = self.group_sort {
661            let sorter = GroupSorter::new(self.locale);
662            // Pre-sort by title_key so that entries which compare equal under the primary
663            // sort_spec retain a stable, deterministic order (title ascending as tiebreaker).
664            // GroupSorter::sort_references uses sort_by (stable), so the pre-sort order is
665            // preserved for entries that compare equal under the primary key.
666            let mut pre_sorted: Vec<&Reference> = group.to_vec();
667            pre_sorted.sort_by(|a, b| {
668                let a_title = self
669                    .reference_data(a, cache)
670                    .title_key
671                    .as_deref()
672                    .unwrap_or_default();
673                let b_title = self
674                    .reference_data(b, cache)
675                    .title_key
676                    .as_deref()
677                    .unwrap_or_default();
678                a_title.cmp(b_title)
679            });
680            sorter.sort_references(pre_sorted, sort_spec)
681        } else {
682            let mut sorted: Vec<&Reference> = group.to_vec();
683            sorted.sort_by(|a, b| {
684                let a_title = self
685                    .reference_data(a, cache)
686                    .title_key
687                    .as_deref()
688                    .unwrap_or_default();
689                let b_title = self
690                    .reference_data(b, cache)
691                    .title_key
692                    .as_deref()
693                    .unwrap_or_default();
694                a_title.cmp(b_title)
695            });
696            sorted
697        }
698    }
699
700    /// Partition a collision group by showing more names, preserving `et al.`
701    /// distinction when some references still have hidden trailing names.
702    fn partition_by_name_expansion<'b>(
703        &self,
704        group: &[&'b Reference],
705        cache: &ReferenceCache,
706    ) -> Option<(usize, HashMap<String, Vec<&'b Reference>>)> {
707        let max_authors = group
708            .iter()
709            .map(|reference| self.reference_data(reference, cache).names.len())
710            .max()
711            .unwrap_or(0);
712
713        let mut buf = String::new();
714        for n in 2..=max_authors {
715            let mut partitions: HashMap<String, Vec<&Reference>> = HashMap::new();
716            for reference in group {
717                let names = &self.reference_data(reference, cache).names;
718                buf.clear();
719                self.append_name_expansion_key(&mut buf, names, n);
720                if let Some(v) = partitions.get_mut(buf.as_str()) {
721                    v.push(*reference);
722                } else {
723                    partitions.insert(buf.clone(), vec![*reference]);
724                }
725            }
726
727            if partitions.len() > 1 {
728                return Some((n, partitions));
729            }
730        }
731
732        None
733    }
734
735    /// Check if expanding to full names resolves ambiguity in the group.
736    ///
737    /// If `min_names` is `Some(n)`, it checks resolution when showing `n` names.
738    ///
739    /// When `primary_only` is `true`, only the first author's given name is included
740    /// in the resolution key — mirroring what `primary-name` and
741    /// `primary-name-with-initials` actually render.  Use this to validate that a
742    /// candidate expansion still works under restricted rendering before committing.
743    fn check_givenname_resolution(
744        &self,
745        group: &[&Reference],
746        cache: &ReferenceCache,
747        min_names: Option<usize>,
748        primary_only: bool,
749    ) -> bool {
750        let mut seen = HashSet::new();
751        let mut buf = String::new();
752        let n = min_names.unwrap_or(1);
753        for reference in group {
754            let names = &self.reference_data(reference, cache).names;
755            buf.clear();
756            self.append_givenname_resolution_key(&mut buf, names, n, primary_only);
757            if !seen.insert(buf.clone()) {
758                return false;
759            }
760        }
761        true
762    }
763
764    /// Group references by their base collision key for disambiguation.
765    fn group_references<'b>(
766        &self,
767        references: &[&'b Reference],
768        cache: &ReferenceCache,
769    ) -> HashMap<String, Vec<&'b Reference>> {
770        let mut groups: HashMap<String, Vec<&'b Reference>> = HashMap::new();
771
772        for reference in references {
773            let key = self.reference_data(reference, cache).group_key.clone();
774            groups.entry(key).or_default().push(*reference);
775        }
776
777        groups
778    }
779
780    /// Flattens a contributor to names using the style's active multilingual display
781    /// mode, so the collision key reflects the same surface form the style renders
782    /// (DISAMBIGUATION.md §4). Monolingual contributors fall through to the original.
783    fn render_name_for_disambiguation(
784        &self,
785        contributor: &citum_schema::reference::Contributor,
786    ) -> Vec<crate::reference::FlatName> {
787        let ml = self.config.multilingual.as_ref();
788        crate::values::resolve_multilingual_name(
789            contributor,
790            ml.and_then(|m| m.name_mode.as_ref()),
791            ml.and_then(|m| m.preferred_transliteration.as_deref()),
792            ml.and_then(|m| m.preferred_script.as_ref()),
793            &self.locale.locale,
794        )
795    }
796
797    /// Generates a normalized author string used for grouping and et-al detection.
798    fn build_author_key(&self, names: &[crate::reference::FlatName]) -> String {
799        let shorten = self
800            .config
801            .contributors
802            .as_ref()
803            .and_then(|c| c.shorten.as_ref());
804
805        if names.is_empty() {
806            return String::new();
807        }
808
809        let mut key = String::new();
810        if let Some(opts) = shorten
811            && names.len() >= opts.min as usize
812        {
813            self.append_lowercased_families(&mut key, names, opts.use_first as usize, ',');
814            if !key.is_empty() {
815                key.push(',');
816            }
817            key.push_str("et-al");
818            return key;
819        }
820
821        self.append_lowercased_families(&mut key, names, names.len(), ',');
822        key
823    }
824
825    /// Create a grouping key for a reference based on its base citation form.
826    fn build_group_key(&self, reference: &Reference, author_key: &str) -> String {
827        // In label mode, group by base label string rather than author-year.
828        // This ensures disambiguation happens at the label level (Knu84a/Knu84b)
829        // rather than the author-year level.
830        if let Some(citum_schema::options::Processing::Label(config)) = &self.config.processing {
831            let params = config.effective_params();
832            return crate::processor::labels::generate_base_label(reference, &params);
833        }
834
835        // Anonymous entries (no author key) must not be grouped together for year-suffix
836        // assignment. CSL year-suffix disambiguates entries with the same *author* —
837        // anonymous entries are already distinguished by their title substitution.
838        // Give each anonymous reference a unique key so it forms its own singleton group.
839        if author_key.is_empty() {
840            if let Some(ref_id) = reference.id().filter(|id| !id.is_empty()) {
841                return format!("anon:{ref_id}");
842            }
843            return format!("anon:{}", Self::reference_cache_key(reference));
844        }
845
846        let mut key = String::with_capacity(author_key.len() + 8);
847        key.push_str(author_key);
848        key.push(':');
849        let Some(year) = reference
850            .csl_issued_date()
851            .and_then(|d| d.year().parse::<i32>().ok())
852        else {
853            return key;
854        };
855        let _ = write!(key, "{year}");
856        key
857    }
858
859    /// Appends a sequence of family names to the key buffer, lowercased.
860    fn append_lowercased_families(
861        &self,
862        key: &mut String,
863        names: &[crate::reference::FlatName],
864        take: usize,
865        separator: char,
866    ) {
867        for (idx, name) in names.iter().take(take).enumerate() {
868            if idx > 0 {
869                key.push(separator);
870            }
871            Self::push_lowercased(key, name.family_or_literal());
872        }
873    }
874
875    /// Creates a key representing the citation form when n names are shown.
876    fn append_name_expansion_key(
877        &self,
878        key: &mut String,
879        names: &[crate::reference::FlatName],
880        n: usize,
881    ) {
882        self.append_lowercased_families(key, names, n, '|');
883        if names.len() > n {
884            if !key.is_empty() {
885                key.push('|');
886            }
887            key.push_str("et-al");
888        }
889    }
890
891    /// Creates a key including full name parts (given names, particles) for exact resolution.
892    ///
893    /// When `primary_only` is `true`, only the first author (index 0) receives full
894    /// given-name/particle parts; subsequent authors contribute only their family name.
895    /// This mirrors what `primary-name` and `primary-name-with-initials` actually render,
896    /// allowing resolution checks to validate against the real rendered surface form.
897    fn append_givenname_resolution_key(
898        &self,
899        key: &mut String,
900        names: &[crate::reference::FlatName],
901        n: usize,
902        primary_only: bool,
903    ) {
904        for (idx, name) in names.iter().take(n).enumerate() {
905            if idx > 0 {
906                key.push_str("||");
907            }
908            Self::append_optional_part(key, name.family.as_deref());
909            if primary_only && idx > 0 {
910                // Secondary authors: family name only under primary-name rules.
911                continue;
912            }
913            key.push('|');
914            Self::append_optional_part(key, name.given.as_deref());
915            key.push('|');
916            Self::append_optional_part(key, name.non_dropping_particle.as_deref());
917            key.push('|');
918            Self::append_optional_part(key, name.dropping_particle.as_deref());
919        }
920    }
921
922    /// Serializes an optional name part into the key buffer with its length.
923    fn append_optional_part(key: &mut String, value: Option<&str>) {
924        match value {
925            Some(value) => {
926                let _ = write!(key, "{}:", value.len());
927                key.push_str(value);
928            }
929            None => key.push('-'),
930        }
931    }
932
933    /// Pushes a lowercased version of the string to the buffer, optimized for ASCII.
934    fn push_lowercased(key: &mut String, value: &str) {
935        if value.is_ascii() {
936            key.reserve(value.len());
937            for byte in value.bytes() {
938                key.push((byte as char).to_ascii_lowercase());
939            }
940        } else {
941            key.push_str(&value.to_lowercase());
942        }
943    }
944
945    /// Returns an internal cache key for a reference based on its current pointer address.
946    ///
947    /// This key is only intended for in-memory caching during the current run. Its
948    /// uniqueness and stability are limited to the lifetime of the referenced
949    /// allocation, so it must not be treated as globally unique or persisted.
950    fn reference_cache_key(reference: &Reference) -> usize {
951        std::ptr::from_ref(reference) as usize
952    }
953
954    /// Retrieves cached metadata for a specific reference.
955    #[allow(
956        clippy::expect_used,
957        reason = "Internal cache hydration guarantees presence"
958    )]
959    fn reference_data<'b>(
960        &self,
961        reference: &Reference,
962        cache: &'b ReferenceCache,
963    ) -> &'b CachedReferenceData {
964        cache
965            .get(&Self::reference_cache_key(reference))
966            .expect("disambiguation cache missing reference")
967    }
968}
969
970#[cfg(test)]
971#[allow(
972    clippy::unwrap_used,
973    clippy::expect_used,
974    clippy::panic,
975    clippy::indexing_slicing,
976    clippy::todo,
977    clippy::unimplemented,
978    clippy::unreachable,
979    clippy::get_unwrap,
980    reason = "Panicking is acceptable and often desired in tests."
981)]
982mod tests {
983    use super::*;
984    use crate::Processor;
985    use citum_schema::citation::Citation;
986    use citum_schema::grouping::{GroupSort, GroupSortKey, SortKey};
987    use citum_schema::options::{Config, ContributorConfig, DisplayAsSort, NameForm};
988    use citum_schema::reference::{
989        Contributor, EdtfString, InputReference as Reference, Monograph, MonographType,
990        MultilingualString, StructuredName, Title,
991    };
992    use citum_schema::template::{TemplateComponent, WrapPunctuation};
993    use citum_schema::{BibliographySpec, CitationSpec, Style, StyleInfo};
994
995    fn make_ref(id: &str, family: &str, given: &str, year: i32) -> Reference {
996        let title = format!("Title {id}");
997        Reference::Monograph(Box::new(Monograph {
998            id: Some(id.into()),
999            r#type: MonographType::Book,
1000            title: Some(Title::Single(title.clone())),
1001            short_title: None,
1002            container: None,
1003            author: Some(Contributor::StructuredName(StructuredName {
1004                family: MultilingualString::Simple(family.to_string()),
1005                given: MultilingualString::Simple(given.to_string()),
1006                suffix: None,
1007                dropping_particle: None,
1008                non_dropping_particle: None,
1009            })),
1010            editor: None,
1011            translator: None,
1012            issued: EdtfString(year.to_string()),
1013            ..Default::default()
1014        }))
1015    }
1016
1017    fn make_ref_without_id(title_suffix: &str, family: &str, given: &str, year: i32) -> Reference {
1018        let title = format!("Title {title_suffix}");
1019        Reference::Monograph(Box::new(Monograph {
1020            id: None,
1021            r#type: MonographType::Book,
1022            title: Some(Title::Single(title)),
1023            short_title: None,
1024            container: None,
1025            author: Some(Contributor::StructuredName(StructuredName {
1026                family: MultilingualString::Simple(family.to_string()),
1027                given: MultilingualString::Simple(given.to_string()),
1028                suffix: None,
1029                dropping_particle: None,
1030                non_dropping_particle: None,
1031            })),
1032            editor: None,
1033            translator: None,
1034            issued: EdtfString(year.to_string()),
1035            ..Default::default()
1036        }))
1037    }
1038
1039    fn make_multi_author_ref(id: &str, authors: &[(&str, &str)], year: i32) -> Reference {
1040        let title = format!("Title {id}");
1041        Reference::Monograph(Box::new(Monograph {
1042            id: Some(id.into()),
1043            r#type: MonographType::Book,
1044            title: Some(Title::Single(title)),
1045            short_title: None,
1046            container: None,
1047            author: Some(Contributor::ContributorList(
1048                citum_schema::reference::ContributorList(
1049                    authors
1050                        .iter()
1051                        .map(|(family, given)| {
1052                            Contributor::StructuredName(StructuredName {
1053                                family: MultilingualString::Simple((*family).to_string()),
1054                                given: MultilingualString::Simple((*given).to_string()),
1055                                suffix: None,
1056                                dropping_particle: None,
1057                                non_dropping_particle: None,
1058                            })
1059                        })
1060                        .collect(),
1061                ),
1062            )),
1063            editor: None,
1064            translator: None,
1065            issued: EdtfString(year.to_string()),
1066            ..Default::default()
1067        }))
1068    }
1069
1070    fn make_author_date_style(config: Config, bibliography_sort: Option<GroupSort>) -> Style {
1071        Style {
1072            info: StyleInfo {
1073                title: Some("Disambiguation Test".to_string()),
1074                id: Some("disambiguation-test".into()),
1075                ..Default::default()
1076            },
1077            options: Some(config),
1078            citation: Some(CitationSpec {
1079                template: Some(vec![
1080                    citum_schema::tc_contributor!(Author, Short),
1081                    citum_schema::tc_date!(Issued, Year, prefix = ", "),
1082                ]),
1083                wrap: Some(WrapPunctuation::Parentheses.into()),
1084                ..Default::default()
1085            }),
1086            bibliography: Some(BibliographySpec {
1087                sort: bibliography_sort.map(citum_schema::grouping::GroupSortEntry::Explicit),
1088                template: Some(vec![TemplateComponent::Title(
1089                    citum_schema::template::TemplateTitle {
1090                        title: citum_schema::template::TitleType::Primary,
1091                        ..Default::default()
1092                    },
1093                )]),
1094                ..Default::default()
1095            }),
1096            ..Default::default()
1097        }
1098    }
1099
1100    #[test]
1101    fn test_group_aware_year_suffix_sort() {
1102        use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1103
1104        let r1 = make_ref("r1", "Smith", "Same", 2020);
1105        let r2 = make_ref("r2", "Smith", "Same", 2020);
1106
1107        let mut bib = Bibliography::new();
1108        bib.insert("r1".to_string(), r1);
1109        bib.insert("r2".to_string(), r2);
1110
1111        let config = Config::default();
1112        let locale = Locale::en_us();
1113
1114        // 1. Default sorting (by title): r1 should be 'a', r2 should be 'b'.
1115        // Title r1 < Title r2 alphabetically, so r1 gets group_index 1.
1116        let disamb_default = Disambiguator::new(&bib, &config, &locale);
1117        let hints_default = disamb_default.calculate_hints();
1118
1119        assert_eq!(hints_default.get("r1").unwrap().group_index, 1);
1120        assert_eq!(hints_default.get("r2").unwrap().group_index, 2);
1121
1122        // 2. Custom group sort: Sort by title descending -> r2 should be 'a', r1 should be 'b'
1123        let sort_spec = GroupSort {
1124            template: vec![GroupSortKey {
1125                key: SortKey::Title,
1126                ascending: false,
1127                order: None,
1128                sort_order: None,
1129            }],
1130        };
1131
1132        let disamb_custom = Disambiguator::with_group_sort(&bib, &config, &locale, &sort_spec);
1133        let hints_custom = disamb_custom.calculate_hints();
1134
1135        assert_eq!(hints_custom.get("r2").unwrap().group_index, 1);
1136        assert_eq!(hints_custom.get("r1").unwrap().group_index, 2);
1137
1138        let style = make_author_date_style(
1139            Config {
1140                processing: Some(Processing::Custom(ProcessingCustom {
1141                    disambiguate: Some(Disambiguation {
1142                        names: false,
1143                        add_givenname: false,
1144                        givenname_rule: GivennameRule::default(),
1145                        year_suffix: true,
1146                    }),
1147                    ..Default::default()
1148                })),
1149                contributors: Some(ContributorConfig {
1150                    display_as_sort: Some(DisplayAsSort::First),
1151                    ..Default::default()
1152                }),
1153                ..Default::default()
1154            },
1155            Some(sort_spec),
1156        );
1157        let processor = Processor::new(style, bib);
1158
1159        let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
1160        let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
1161
1162        assert!(
1163            rendered_r1.contains("2020b"),
1164            "expected r1 to sort second: {rendered_r1}"
1165        );
1166        assert!(
1167            rendered_r2.contains("2020a"),
1168            "expected r2 to sort first: {rendered_r2}"
1169        );
1170    }
1171
1172    #[test]
1173    fn test_disambiguate_given_names() {
1174        use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1175
1176        // Use different given names to test if expansion resolves the collision
1177        let r1 = make_ref("r1", "Smith", "John", 2020);
1178        let r2 = make_ref("r2", "Smith", "Alice", 2020);
1179
1180        let mut bib = Bibliography::new();
1181        bib.insert("r1".to_string(), r1);
1182        bib.insert("r2".to_string(), r2);
1183
1184        let config = Config {
1185            processing: Some(Processing::Custom(ProcessingCustom {
1186                disambiguate: Some(Disambiguation {
1187                    names: false,
1188                    add_givenname: true,
1189                    givenname_rule: GivennameRule::AllNames,
1190                    year_suffix: false,
1191                }),
1192                ..Default::default()
1193            })),
1194            ..Default::default()
1195        };
1196        let locale = Locale::en_us();
1197
1198        let disamb = Disambiguator::new(&bib, &config, &locale);
1199        let hints = disamb.calculate_hints();
1200
1201        // Both should have expand_given_names set to true to resolve the Smith (2020) collision
1202        assert!(hints.get("r1").unwrap().expand_given_names);
1203        assert!(hints.get("r2").unwrap().expand_given_names);
1204
1205        // Should NOT have year suffix since it's disabled in config (and given names resolve it)
1206        assert!(!hints.get("r1").unwrap().disamb_condition);
1207        assert!(!hints.get("r2").unwrap().disamb_condition);
1208
1209        // Collision resolved: entries occupy distinct positions
1210        assert_ne!(
1211            hints.get("r1").unwrap().group_index,
1212            hints.get("r2").unwrap().group_index
1213        );
1214
1215        let style = make_author_date_style(
1216            Config {
1217                processing: Some(Processing::Custom(ProcessingCustom {
1218                    disambiguate: Some(Disambiguation {
1219                        names: false,
1220                        add_givenname: true,
1221                        givenname_rule: GivennameRule::AllNames,
1222                        year_suffix: false,
1223                    }),
1224                    ..Default::default()
1225                })),
1226                contributors: Some(ContributorConfig {
1227                    initialize_with: Some(". ".to_string()),
1228                    name_form: Some(NameForm::Initials),
1229                    ..Default::default()
1230                }),
1231                ..Default::default()
1232            },
1233            None,
1234        );
1235        let processor = Processor::new(style, bib);
1236
1237        let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
1238        let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
1239
1240        assert!(
1241            rendered_r1.contains("J. Smith"),
1242            "expected expanded given name for r1: {rendered_r1}"
1243        );
1244        assert!(
1245            rendered_r2.contains("A. Smith"),
1246            "expected expanded given name for r2: {rendered_r2}"
1247        );
1248    }
1249
1250    /// When `primary-name` is active and expanding the first author's given name does
1251    /// not resolve the collision (both works share an identical primary author), the
1252    /// disambiguator must fall back to year-suffix while retaining the et-al expansion
1253    /// that was found.  Concretely: hints must have `expand_given_names: true`,
1254    /// `expand_given_names_primary_only: true`, `min_names_to_show: Some(2)`, and
1255    /// `disamb_condition: true` (year-suffix), with distinct `group_index` values.
1256    #[test]
1257    fn test_primary_name_identical_primary_falls_back_to_year_suffix() {
1258        use citum_schema::options::{
1259            Disambiguation, Processing, ProcessingCustom, ShortenListOptions,
1260        };
1261
1262        // Primary author ("Asthma/Albert") is identical; secondary authors differ only
1263        // in given name ("Brandon" vs "Edward") — identical families.
1264        let r1 = make_multi_author_ref(
1265            "r1",
1266            &[
1267                ("Asthma", "Albert"),
1268                ("Bronchitis", "Brandon"),
1269                ("Cold", "Crispin"),
1270            ],
1271            1990,
1272        );
1273        let r2 = make_multi_author_ref(
1274            "r2",
1275            &[
1276                ("Asthma", "Albert"),
1277                ("Bronchitis", "Edward"),
1278                ("Cold", "Crispin"),
1279            ],
1280            1990,
1281        );
1282
1283        let mut bib = Bibliography::new();
1284        bib.insert("r1".to_string(), r1);
1285        bib.insert("r2".to_string(), r2);
1286
1287        let config = Config {
1288            processing: Some(Processing::Custom(ProcessingCustom {
1289                disambiguate: Some(Disambiguation {
1290                    names: true,
1291                    add_givenname: true,
1292                    givenname_rule: GivennameRule::PrimaryName,
1293                    year_suffix: true,
1294                }),
1295                ..Default::default()
1296            })),
1297            contributors: Some(ContributorConfig {
1298                shorten: Some(ShortenListOptions {
1299                    min: 3,
1300                    use_first: 1,
1301                    ..Default::default()
1302                }),
1303                ..Default::default()
1304            }),
1305            ..Default::default()
1306        };
1307        let locale = Locale::en_us();
1308
1309        let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1310
1311        let h1 = hints.get("r1").expect("r1 must have a hint");
1312        let h2 = hints.get("r2").expect("r2 must have a hint");
1313
1314        // Et-al expansion to two names must be retained.
1315        assert_eq!(
1316            h1.min_names_to_show,
1317            Some(2),
1318            "r1: expected min_names_to_show=2"
1319        );
1320        assert_eq!(
1321            h2.min_names_to_show,
1322            Some(2),
1323            "r2: expected min_names_to_show=2"
1324        );
1325
1326        // Given-name expansion must be active (primary author initial shown).
1327        assert!(h1.expand_given_names, "r1: expected expand_given_names");
1328        assert!(h2.expand_given_names, "r2: expected expand_given_names");
1329
1330        // Primary-only flag must be propagated.
1331        assert!(
1332            h1.expand_given_names_primary_only,
1333            "r1: expected primary-only"
1334        );
1335        assert!(
1336            h2.expand_given_names_primary_only,
1337            "r2: expected primary-only"
1338        );
1339
1340        // Year-suffix must be assigned (disamb_condition true, distinct indices).
1341        assert!(
1342            h1.disamb_condition,
1343            "r1: expected disamb_condition (year-suffix)"
1344        );
1345        assert!(
1346            h2.disamb_condition,
1347            "r2: expected disamb_condition (year-suffix)"
1348        );
1349        assert_ne!(
1350            h1.group_index, h2.group_index,
1351            "r1 and r2 must receive distinct year-suffix positions"
1352        );
1353    }
1354
1355    #[test]
1356    fn test_build_reference_cache_populates_title_keys_when_year_suffix_is_active() {
1357        // title_key must be populated whenever year-suffix is on (regardless of group_sort)
1358        // so that sort_group_for_year_suffix can use it as a stable tie-breaker.
1359        use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1360
1361        let mut bib = Bibliography::new();
1362        bib.insert("r1".to_string(), make_ref("r1", "Smith", "John", 2020));
1363        let refs: Vec<&Reference> = bib.values().collect();
1364        let locale = Locale::en_us();
1365
1366        let disabled_config = Config {
1367            processing: Some(Processing::Custom(ProcessingCustom {
1368                disambiguate: Some(Disambiguation {
1369                    names: false,
1370                    add_givenname: true,
1371                    givenname_rule: GivennameRule::default(),
1372                    year_suffix: false,
1373                }),
1374                ..Default::default()
1375            })),
1376            ..Default::default()
1377        };
1378        let disabled = Disambiguator::new(&bib, &disabled_config, &locale);
1379        let disabled_flags = disabled.disambiguation_flags();
1380        // year_suffix=false → title_key must be None
1381        let disabled_cache = disabled.build_reference_cache(&refs, disabled_flags.year_suffix);
1382        assert!(disabled_cache.values().all(|data| data.title_key.is_none()));
1383
1384        let enabled_config = Config {
1385            processing: Some(Processing::Custom(ProcessingCustom {
1386                disambiguate: Some(Disambiguation {
1387                    names: false,
1388                    add_givenname: false,
1389                    givenname_rule: GivennameRule::default(),
1390                    year_suffix: true,
1391                }),
1392                ..Default::default()
1393            })),
1394            ..Default::default()
1395        };
1396        let enabled = Disambiguator::new(&bib, &enabled_config, &locale);
1397        let enabled_flags = enabled.disambiguation_flags();
1398        // year_suffix=true → title_key must be Some regardless of group_sort
1399        let enabled_cache = enabled.build_reference_cache(&refs, enabled_flags.year_suffix);
1400        assert!(enabled_cache.values().all(|data| data.title_key.is_some()));
1401    }
1402
1403    #[test]
1404    fn test_anonymous_refs_do_not_receive_year_suffix() {
1405        // Anonymous entries (no author) sharing the same year must each be placed in
1406        // their own singleton group, even when an embedded reference id is empty or missing.
1407        use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1408
1409        let mut bib = Bibliography::new();
1410        bib.insert("a1".to_string(), make_ref("a1", "", "", 2020));
1411        bib.insert("a2".to_string(), make_ref("a2", "", "", 2020));
1412        bib.insert("a3".to_string(), make_ref("", "", "", 2020));
1413        bib.insert(
1414            "a4".to_string(),
1415            make_ref_without_id("missing-id", "", "", 2020),
1416        );
1417        let locale = Locale::en_us();
1418        let config = Config {
1419            processing: Some(Processing::Custom(ProcessingCustom {
1420                disambiguate: Some(Disambiguation {
1421                    names: true,
1422                    add_givenname: true,
1423                    givenname_rule: GivennameRule::default(),
1424                    year_suffix: true,
1425                }),
1426                ..Default::default()
1427            })),
1428            ..Default::default()
1429        };
1430        let disambiguator = Disambiguator::new(&bib, &config, &locale);
1431        let refs: Vec<&Reference> = bib.values().collect();
1432        let cache = disambiguator.build_reference_cache(&refs, false);
1433        let grouped = disambiguator.group_references(&refs, &cache);
1434
1435        assert_eq!(grouped.len(), 4);
1436        assert!(!grouped.contains_key("anon:"));
1437        assert!(grouped.values().all(|group| group.len() == 1));
1438    }
1439
1440    #[test]
1441    fn test_push_lowercased_matches_str_lowercase_for_non_ascii() {
1442        let mut key = String::new();
1443        let value = "ΟΣ";
1444
1445        Disambiguator::push_lowercased(&mut key, value);
1446
1447        assert_eq!(key, value.to_lowercase());
1448    }
1449
1450    #[test]
1451    fn test_partitioned_name_expansion_keeps_unique_items_and_suffixes_remainders() {
1452        use citum_schema::options::{
1453            ContributorConfig, Disambiguation, Processing, ProcessingCustom, ShortenListOptions,
1454        };
1455
1456        let mut bib = Bibliography::new();
1457        bib.insert(
1458            "r1".to_string(),
1459            make_multi_author_ref("r1", &[("Smith", "John"), ("Jones", "Peter")], 2020),
1460        );
1461        bib.insert(
1462            "r2".to_string(),
1463            make_multi_author_ref("r2", &[("Smith", "John"), ("Brown", "Alice")], 2020),
1464        );
1465        bib.insert(
1466            "r3".to_string(),
1467            make_multi_author_ref("r3", &[("Smith", "John"), ("Brown", "Adam")], 2020),
1468        );
1469
1470        let config = Config {
1471            processing: Some(Processing::Custom(ProcessingCustom {
1472                disambiguate: Some(Disambiguation {
1473                    names: true,
1474                    add_givenname: false,
1475                    givenname_rule: GivennameRule::default(),
1476                    year_suffix: true,
1477                }),
1478                ..Default::default()
1479            })),
1480            contributors: Some(ContributorConfig {
1481                shorten: Some(ShortenListOptions {
1482                    min: 2,
1483                    use_first: 1,
1484                    ..Default::default()
1485                }),
1486                ..Default::default()
1487            }),
1488            ..Default::default()
1489        };
1490        let locale = Locale::en_us();
1491
1492        let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1493
1494        let unique = hints.get("r1").unwrap();
1495        assert!(!unique.disamb_condition);
1496        assert_eq!(unique.group_index, 1);
1497        assert_eq!(unique.min_names_to_show, Some(2));
1498        assert_eq!(unique.group_length, 3);
1499
1500        let remaining_a = hints.get("r2").unwrap();
1501        let remaining_b = hints.get("r3").unwrap();
1502        assert!(remaining_a.disamb_condition);
1503        assert!(remaining_b.disamb_condition);
1504        assert_eq!(remaining_a.min_names_to_show, Some(2));
1505        assert_eq!(remaining_b.min_names_to_show, Some(2));
1506        assert_eq!(remaining_a.group_length, 3);
1507        assert_eq!(remaining_b.group_length, 3);
1508        assert_ne!(remaining_a.group_index, remaining_b.group_index);
1509    }
1510
1511    #[test]
1512    fn test_label_mode_skips_name_strategies_and_suffixes_by_label_group() {
1513        use citum_schema::options::{LabelConfig, LabelPreset, Processing};
1514
1515        let mut bib = Bibliography::new();
1516        bib.insert("r1".to_string(), make_ref("r1", "Kuhn", "Thomas", 1962));
1517        bib.insert("r2".to_string(), make_ref("r2", "Kuhn", "Thomas", 1962));
1518
1519        let config = Config {
1520            processing: Some(Processing::Label(LabelConfig {
1521                preset: LabelPreset::Din,
1522                ..Default::default()
1523            })),
1524            ..Default::default()
1525        };
1526        let locale = Locale::en_us();
1527
1528        let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1529        let first = hints.get("r1").unwrap();
1530        let second = hints.get("r2").unwrap();
1531
1532        assert!(first.disamb_condition);
1533        assert!(second.disamb_condition);
1534        assert!(!first.expand_given_names);
1535        assert!(!second.expand_given_names);
1536        assert_eq!(first.min_names_to_show, None);
1537        assert_eq!(second.min_names_to_show, None);
1538        assert_eq!(first.group_key, second.group_key);
1539        assert!(!first.group_key.contains(':'));
1540        assert_ne!(first.group_index, second.group_index);
1541    }
1542
1543    /// Build a reference whose author is `Contributor::Multilingual` with distinct
1544    /// `original` but a shared `transliterations` entry keyed by `translit_tag`.
1545    fn make_multilingual_ref(
1546        id: &str,
1547        original_family: &str,
1548        translit_family: &str,
1549        translit_tag: &str,
1550        year: i32,
1551    ) -> Reference {
1552        use citum_schema::reference::contributor::MultilingualName;
1553        use std::collections::HashMap;
1554
1555        let mut transliterations = HashMap::new();
1556        transliterations.insert(
1557            translit_tag.to_string(),
1558            StructuredName {
1559                family: MultilingualString::Simple(translit_family.to_string()),
1560                given: MultilingualString::Simple("A.".to_string()),
1561                ..Default::default()
1562            },
1563        );
1564        Reference::Monograph(Box::new(Monograph {
1565            id: Some(id.into()),
1566            r#type: MonographType::Book,
1567            title: Some(Title::Single(format!("Title {id}"))),
1568            author: Some(Contributor::Multilingual(MultilingualName {
1569                original: StructuredName {
1570                    family: MultilingualString::Simple(original_family.to_string()),
1571                    given: MultilingualString::Simple("A.".to_string()),
1572                    ..Default::default()
1573                },
1574                lang: Some("ja".into()),
1575                transliterations,
1576                translations: HashMap::new(),
1577            })),
1578            issued: EdtfString(year.to_string()),
1579            ..Default::default()
1580        }))
1581    }
1582
1583    /// DISAMBIGUATION.md §4: when display mode is `Transliterated`, two references
1584    /// whose transliterations collide must produce the same author key (→ one
1585    /// collision group). When mode is `Primary` (distinct originals), keys must differ.
1586    #[test]
1587    fn test_multilingual_key_generation_respects_display_mode() {
1588        use citum_schema::options::MultilingualConfig;
1589        use citum_schema::options::MultilingualMode;
1590
1591        // Two distinct Japanese authors that share the same romanisation.
1592        // Original families differ ("田中" vs "谷中"), but transliteration is "Tanaka".
1593        let r1 = make_multilingual_ref("r1", "田中", "Tanaka", "ja-Latn", 2020);
1594        let r2 = make_multilingual_ref("r2", "谷中", "Tanaka", "ja-Latn", 2020);
1595
1596        let mut bib = Bibliography::new();
1597        bib.insert("r1".to_string(), r1);
1598        bib.insert("r2".to_string(), r2);
1599
1600        let locale = Locale::en_us();
1601
1602        // --- case 1: Transliterated mode → same key (collision) ---
1603        let config_translit = Config {
1604            multilingual: Some(MultilingualConfig {
1605                name_mode: Some(MultilingualMode::Transliterated),
1606                preferred_transliteration: Some(vec!["ja-Latn".to_string()]),
1607                ..Default::default()
1608            }),
1609            ..Default::default()
1610        };
1611
1612        let cache_translit = Disambiguator::new(&bib, &config_translit, &locale)
1613            .build_reference_cache(&[bib.get("r1").unwrap(), bib.get("r2").unwrap()], false);
1614
1615        let ck_r1 = Disambiguator::reference_cache_key(bib.get("r1").unwrap());
1616        let ck_r2 = Disambiguator::reference_cache_key(bib.get("r2").unwrap());
1617        let ak_r1 = &cache_translit[&ck_r1].author_key;
1618        let ak_r2 = &cache_translit[&ck_r2].author_key;
1619
1620        assert_eq!(
1621            ak_r1, ak_r2,
1622            "transliterated mode: colliding transliterations must produce the same author key"
1623        );
1624        assert_eq!(
1625            ak_r1, "tanaka",
1626            "key should be the lowercased transliteration"
1627        );
1628
1629        // --- case 2: Primary mode → distinct keys (no collision) ---
1630        let config_primary = Config::default(); // multilingual: None → falls through to original
1631
1632        let cache_primary = Disambiguator::new(&bib, &config_primary, &locale)
1633            .build_reference_cache(&[bib.get("r1").unwrap(), bib.get("r2").unwrap()], false);
1634
1635        let ak_r1_primary = &cache_primary[&ck_r1].author_key;
1636        let ak_r2_primary = &cache_primary[&ck_r2].author_key;
1637
1638        assert_ne!(
1639            ak_r1_primary, ak_r2_primary,
1640            "primary mode: distinct originals must produce different author keys"
1641        );
1642    }
1643}