1use crate::reference::{Bibliography, Reference};
7use crate::values::ProcHints;
8use citum_schema::options::{Config, GivennameRule};
9use std::collections::{HashMap, HashSet};
10use std::fmt::Write as _;
11
12use crate::grouping::GroupSorter;
13use citum_schema::grouping::GroupSort;
14use citum_schema::locale::Locale;
15
16pub struct Disambiguator<'a> {
55 bibliography: &'a Bibliography,
56 config: &'a Config,
57 locale: &'a Locale,
58 group_sort: Option<&'a GroupSort>,
59}
60
61#[derive(Clone, Copy, Default)]
62struct DisambiguationFlags {
63 add_names: bool,
64 add_givenname: bool,
65 year_suffix: bool,
66 is_label_mode: bool,
67 primary_givenname_only: bool,
68}
69
70struct GroupDisambiguationContext<'a> {
71 key: &'a str,
72 group: &'a [&'a Reference],
73 flags: DisambiguationFlags,
74 author_group_lengths: &'a HashMap<String, usize>,
75 cache: &'a ReferenceCache,
76}
77
78#[derive(Clone, Copy)]
79struct HintPlan<'a> {
80 key: &'a str,
81 expand_given_names: bool,
82 expand_given_names_primary_only: bool,
83 min_names_to_show: Option<usize>,
84 disamb_condition: bool,
85}
86
87#[derive(Clone, Copy)]
88enum HintOrder {
89 Encountered,
90 GroupSorted,
91}
92
93type ReferenceCache = HashMap<usize, CachedReferenceData>;
94
95struct CachedReferenceData {
96 author_key: String,
97 group_key: String,
98 names: Vec<crate::reference::FlatName>,
99 title_key: Option<String>,
100}
101
102impl<'a> Disambiguator<'a> {
103 #[must_use]
105 pub fn new(bibliography: &'a Bibliography, config: &'a Config, locale: &'a Locale) -> Self {
106 Self {
107 bibliography,
108 config,
109 locale,
110 group_sort: None,
111 }
112 }
113
114 #[must_use]
116 pub fn with_group_sort(
117 bibliography: &'a Bibliography,
118 config: &'a Config,
119 locale: &'a Locale,
120 group_sort: &'a GroupSort,
121 ) -> Self {
122 Self {
123 bibliography,
124 config,
125 locale,
126 group_sort: Some(group_sort),
127 }
128 }
129
130 #[must_use]
164 pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
165 let mut hints = HashMap::new();
166 let refs: Vec<&Reference> = self.bibliography.values().collect();
167 let flags = self.disambiguation_flags();
168 let needs_title_key = flags.year_suffix;
172 let cache = self.build_reference_cache(&refs, needs_title_key);
173 let grouped = self.group_references(&refs, &cache);
174 let author_group_lengths = self.author_group_lengths(&refs, &cache);
175
176 for (key, group) in grouped {
177 self.apply_group_hints(
178 &mut hints,
179 GroupDisambiguationContext {
180 key: &key,
181 group: &group,
182 flags,
183 author_group_lengths: &author_group_lengths,
184 cache: &cache,
185 },
186 );
187 }
188
189 hints
190 }
191
192 fn disambiguation_flags(&self) -> DisambiguationFlags {
194 let disamb_config = match self.config.processing.as_ref() {
195 Some(processing) => processing.config().disambiguate,
196 None => {
197 citum_schema::options::Processing::AuthorDate
198 .config()
199 .disambiguate
200 }
201 };
202
203 DisambiguationFlags {
204 add_names: disamb_config.as_ref().is_some_and(|d| d.names),
205 add_givenname: disamb_config.as_ref().is_some_and(|d| d.add_givenname),
206 year_suffix: disamb_config.as_ref().is_some_and(|d| d.year_suffix),
207 is_label_mode: self
208 .config
209 .processing
210 .as_ref()
211 .is_some_and(|p| matches!(p, citum_schema::options::Processing::Label(_))),
212 primary_givenname_only: disamb_config.as_ref().is_some_and(|d| {
213 matches!(
214 d.givenname_rule,
215 GivennameRule::PrimaryName | GivennameRule::PrimaryNameWithInitials
216 )
217 }),
218 }
219 }
220
221 fn build_reference_cache(&self, refs: &[&Reference], needs_title_key: bool) -> ReferenceCache {
224 let mut cache = HashMap::with_capacity(refs.len());
225
226 for reference in refs {
227 let names = reference.author().map_or_else(Vec::new, |authors| {
228 self.render_name_for_disambiguation(&authors)
229 });
230 let author_key = self.build_author_key(&names);
231 let group_key = self.build_group_key(reference, &author_key);
232 let title_key = needs_title_key.then(|| {
233 reference
234 .title()
235 .map(|title| title.to_string())
236 .unwrap_or_default()
237 .to_lowercase()
238 });
239
240 cache.insert(
241 Self::reference_cache_key(reference),
242 CachedReferenceData {
243 author_key,
244 group_key,
245 names,
246 title_key,
247 },
248 );
249 }
250
251 cache
252 }
253
254 fn author_group_lengths(
259 &self,
260 refs: &[&Reference],
261 cache: &ReferenceCache,
262 ) -> HashMap<String, usize> {
263 let mut author_group_lengths = HashMap::new();
264 for reference in refs {
265 let author_key = &self.reference_data(reference, cache).author_key;
266 if !author_key.is_empty() {
267 *author_group_lengths.entry(author_key.clone()).or_insert(0) += 1;
268 }
269 }
270 author_group_lengths
271 }
272
273 fn apply_group_hints(
276 &self,
277 hints: &mut HashMap<String, ProcHints>,
278 context: GroupDisambiguationContext<'_>,
279 ) {
280 if self.try_apply_singleton_hint(hints, &context) {
281 return;
282 }
283
284 if self.try_apply_label_mode_year_suffix(hints, &context) {
285 return;
286 }
287
288 if self.try_apply_name_partitions(hints, &context) {
289 return;
290 }
291
292 if self.try_apply_givenname_resolution(hints, &context) {
293 return;
294 }
295
296 if self.try_apply_combined_resolution(hints, &context) {
297 return;
298 }
299
300 self.apply_year_suffix(hints, &context, false, None);
301 }
302
303 fn try_apply_singleton_hint(
305 &self,
306 hints: &mut HashMap<String, ProcHints>,
307 context: &GroupDisambiguationContext<'_>,
308 ) -> bool {
309 if context.group.len() != 1 {
310 return false;
311 }
312
313 #[allow(clippy::indexing_slicing, reason = "context.group.len() == 1")]
314 let head = context.group[0];
315
316 self.insert_hint(
317 hints,
318 head,
319 context.author_group_lengths,
320 context.cache,
321 ProcHints::default(),
322 );
323 true
324 }
325
326 fn try_apply_label_mode_year_suffix(
328 &self,
329 hints: &mut HashMap<String, ProcHints>,
330 context: &GroupDisambiguationContext<'_>,
331 ) -> bool {
332 if !(context.flags.is_label_mode && context.flags.year_suffix) {
333 return false;
334 }
335
336 self.apply_year_suffix(hints, context, false, None);
337 true
338 }
339
340 fn try_apply_name_partitions(
342 &self,
343 hints: &mut HashMap<String, ProcHints>,
344 context: &GroupDisambiguationContext<'_>,
345 ) -> bool {
346 if !context.flags.add_names {
347 return false;
348 }
349
350 let Some((min_names_to_show, partitions)) =
351 self.partition_by_name_expansion(context.group, context.cache)
352 else {
353 return false;
354 };
355
356 for subgroup in partitions.values() {
357 if subgroup.len() == 1 {
358 self.apply_resolution(hints, subgroup, context, false, Some(min_names_to_show));
359 continue;
360 }
361
362 if context.flags.add_givenname
363 && self.check_givenname_resolution(
364 subgroup,
365 context.cache,
366 Some(min_names_to_show),
367 false,
368 )
369 {
370 if context.flags.primary_givenname_only
374 && !self.check_givenname_resolution(
375 subgroup,
376 context.cache,
377 Some(min_names_to_show),
378 true,
379 )
380 {
381 self.apply_year_suffix_for_group(
382 hints,
383 subgroup,
384 context,
385 true,
386 Some(min_names_to_show),
387 );
388 } else {
389 self.apply_resolution(hints, subgroup, context, true, Some(min_names_to_show));
390 }
391 continue;
392 }
393
394 self.apply_year_suffix_for_group(
395 hints,
396 subgroup,
397 context,
398 false,
399 Some(min_names_to_show),
400 );
401 }
402
403 true
404 }
405
406 fn try_apply_givenname_resolution(
408 &self,
409 hints: &mut HashMap<String, ProcHints>,
410 context: &GroupDisambiguationContext<'_>,
411 ) -> bool {
412 if !(context.flags.add_givenname
416 && self.check_givenname_resolution(context.group, context.cache, None, false))
417 {
418 return false;
419 }
420
421 self.apply_resolution(hints, context.group, context, true, None);
422 true
423 }
424
425 fn try_apply_combined_resolution(
434 &self,
435 hints: &mut HashMap<String, ProcHints>,
436 context: &GroupDisambiguationContext<'_>,
437 ) -> bool {
438 if !context.flags.add_names || !context.flags.add_givenname {
439 return false;
440 }
441
442 let Some(min_names_to_show) = self.find_combined_resolution(context.group, context.cache)
443 else {
444 return false;
445 };
446
447 if context.flags.primary_givenname_only
451 && !self.check_givenname_resolution(
452 context.group,
453 context.cache,
454 Some(min_names_to_show),
455 true,
456 )
457 {
458 self.apply_year_suffix_for_group(
459 hints,
460 context.group,
461 context,
462 true,
463 Some(min_names_to_show),
464 );
465 return true;
466 }
467
468 self.apply_resolution(hints, context.group, context, true, Some(min_names_to_show));
469 true
470 }
471
472 fn find_combined_resolution(
475 &self,
476 group: &[&Reference],
477 cache: &ReferenceCache,
478 ) -> Option<usize> {
479 let max_authors = group
480 .iter()
481 .map(|reference| self.reference_data(reference, cache).names.len())
482 .max()
483 .unwrap_or(0);
484
485 (2..=max_authors).find(|&n| self.check_givenname_resolution(group, cache, Some(n), false))
489 }
490
491 fn apply_resolution(
493 &self,
494 hints: &mut HashMap<String, ProcHints>,
495 group: &[&Reference],
496 context: &GroupDisambiguationContext<'_>,
497 expand_given_names: bool,
498 min_names_to_show: Option<usize>,
499 ) {
500 self.insert_group_hints(
501 hints,
502 group,
503 context.author_group_lengths,
504 HintPlan {
505 key: context.key,
506 expand_given_names,
507 expand_given_names_primary_only: context.flags.primary_givenname_only,
508 min_names_to_show,
509 disamb_condition: false,
510 },
511 HintOrder::Encountered,
512 context.cache,
513 );
514 }
515
516 fn insert_hint(
518 &self,
519 hints: &mut HashMap<String, ProcHints>,
520 reference: &Reference,
521 author_group_lengths: &HashMap<String, usize>,
522 cache: &ReferenceCache,
523 mut hint: ProcHints,
524 ) {
525 hint.group_length = self
526 .author_group_length(reference, author_group_lengths, cache)
527 .unwrap_or(1);
528 hints.insert(reference.id().unwrap_or_default().to_string(), hint);
529 }
530
531 fn author_group_length(
533 &self,
534 reference: &Reference,
535 author_group_lengths: &HashMap<String, usize>,
536 cache: &ReferenceCache,
537 ) -> Option<usize> {
538 let author_key = &self.reference_data(reference, cache).author_key;
539 author_group_lengths.get(author_key).copied()
540 }
541
542 fn apply_year_suffix(
544 &self,
545 hints: &mut HashMap<String, ProcHints>,
546 context: &GroupDisambiguationContext<'_>,
547 expand_given_names: bool,
548 min_names_to_show: Option<usize>,
549 ) {
550 self.apply_year_suffix_for_group(
551 hints,
552 context.group,
553 context,
554 expand_given_names,
555 min_names_to_show,
556 );
557 }
558
559 fn apply_year_suffix_for_group(
561 &self,
562 hints: &mut HashMap<String, ProcHints>,
563 group: &[&Reference],
564 context: &GroupDisambiguationContext<'_>,
565 expand_given_names: bool,
566 min_names_to_show: Option<usize>,
567 ) {
568 self.insert_group_hints(
569 hints,
570 group,
571 context.author_group_lengths,
572 HintPlan {
573 key: context.key,
574 expand_given_names,
575 expand_given_names_primary_only: context.flags.primary_givenname_only,
576 min_names_to_show,
577 disamb_condition: true,
578 },
579 HintOrder::GroupSorted,
580 context.cache,
581 );
582 }
583
584 fn insert_group_hints(
586 &self,
587 hints: &mut HashMap<String, ProcHints>,
588 group: &[&Reference],
589 author_group_lengths: &HashMap<String, usize>,
590 plan: HintPlan<'_>,
591 order: HintOrder,
592 cache: &ReferenceCache,
593 ) {
594 match order {
595 HintOrder::Encountered => {
596 for (idx, reference) in group.iter().enumerate() {
597 self.insert_planned_hint(
598 hints,
599 reference,
600 author_group_lengths,
601 plan,
602 idx + 1,
603 cache,
604 );
605 }
606 }
607 HintOrder::GroupSorted => {
608 for (idx, reference) in self
609 .sort_group_for_year_suffix(group, cache)
610 .iter()
611 .enumerate()
612 {
613 self.insert_planned_hint(
614 hints,
615 reference,
616 author_group_lengths,
617 plan,
618 idx + 1,
619 cache,
620 );
621 }
622 }
623 }
624 }
625
626 fn insert_planned_hint(
628 &self,
629 hints: &mut HashMap<String, ProcHints>,
630 reference: &Reference,
631 author_group_lengths: &HashMap<String, usize>,
632 plan: HintPlan<'_>,
633 group_index: usize,
634 cache: &ReferenceCache,
635 ) {
636 self.insert_hint(
637 hints,
638 reference,
639 author_group_lengths,
640 cache,
641 ProcHints {
642 disamb_condition: plan.disamb_condition,
643 group_index,
644 group_key: plan.key.to_string(),
645 expand_given_names: plan.expand_given_names,
646 expand_given_names_primary_only: plan.expand_given_names_primary_only,
647 min_names_to_show: plan.min_names_to_show,
648 ..Default::default()
649 },
650 );
651 }
652
653 fn sort_group_for_year_suffix<'b>(
656 &self,
657 group: &[&'b Reference],
658 cache: &ReferenceCache,
659 ) -> Vec<&'b Reference> {
660 if let Some(sort_spec) = self.group_sort {
661 let sorter = GroupSorter::new(self.locale);
662 let mut pre_sorted: Vec<&Reference> = group.to_vec();
667 pre_sorted.sort_by(|a, b| {
668 let a_title = self
669 .reference_data(a, cache)
670 .title_key
671 .as_deref()
672 .unwrap_or_default();
673 let b_title = self
674 .reference_data(b, cache)
675 .title_key
676 .as_deref()
677 .unwrap_or_default();
678 a_title.cmp(b_title)
679 });
680 sorter.sort_references(pre_sorted, sort_spec)
681 } else {
682 let mut sorted: Vec<&Reference> = group.to_vec();
683 sorted.sort_by(|a, b| {
684 let a_title = self
685 .reference_data(a, cache)
686 .title_key
687 .as_deref()
688 .unwrap_or_default();
689 let b_title = self
690 .reference_data(b, cache)
691 .title_key
692 .as_deref()
693 .unwrap_or_default();
694 a_title.cmp(b_title)
695 });
696 sorted
697 }
698 }
699
700 fn partition_by_name_expansion<'b>(
703 &self,
704 group: &[&'b Reference],
705 cache: &ReferenceCache,
706 ) -> Option<(usize, HashMap<String, Vec<&'b Reference>>)> {
707 let max_authors = group
708 .iter()
709 .map(|reference| self.reference_data(reference, cache).names.len())
710 .max()
711 .unwrap_or(0);
712
713 let mut buf = String::new();
714 for n in 2..=max_authors {
715 let mut partitions: HashMap<String, Vec<&Reference>> = HashMap::new();
716 for reference in group {
717 let names = &self.reference_data(reference, cache).names;
718 buf.clear();
719 self.append_name_expansion_key(&mut buf, names, n);
720 if let Some(v) = partitions.get_mut(buf.as_str()) {
721 v.push(*reference);
722 } else {
723 partitions.insert(buf.clone(), vec![*reference]);
724 }
725 }
726
727 if partitions.len() > 1 {
728 return Some((n, partitions));
729 }
730 }
731
732 None
733 }
734
735 fn check_givenname_resolution(
744 &self,
745 group: &[&Reference],
746 cache: &ReferenceCache,
747 min_names: Option<usize>,
748 primary_only: bool,
749 ) -> bool {
750 let mut seen = HashSet::new();
751 let mut buf = String::new();
752 let n = min_names.unwrap_or(1);
753 for reference in group {
754 let names = &self.reference_data(reference, cache).names;
755 buf.clear();
756 self.append_givenname_resolution_key(&mut buf, names, n, primary_only);
757 if !seen.insert(buf.clone()) {
758 return false;
759 }
760 }
761 true
762 }
763
764 fn group_references<'b>(
766 &self,
767 references: &[&'b Reference],
768 cache: &ReferenceCache,
769 ) -> HashMap<String, Vec<&'b Reference>> {
770 let mut groups: HashMap<String, Vec<&'b Reference>> = HashMap::new();
771
772 for reference in references {
773 let key = self.reference_data(reference, cache).group_key.clone();
774 groups.entry(key).or_default().push(*reference);
775 }
776
777 groups
778 }
779
780 fn render_name_for_disambiguation(
784 &self,
785 contributor: &citum_schema::reference::Contributor,
786 ) -> Vec<crate::reference::FlatName> {
787 let ml = self.config.multilingual.as_ref();
788 crate::values::resolve_multilingual_name(
789 contributor,
790 ml.and_then(|m| m.name_mode.as_ref()),
791 ml.and_then(|m| m.preferred_transliteration.as_deref()),
792 ml.and_then(|m| m.preferred_script.as_ref()),
793 &self.locale.locale,
794 )
795 }
796
797 fn build_author_key(&self, names: &[crate::reference::FlatName]) -> String {
799 let shorten = self
800 .config
801 .contributors
802 .as_ref()
803 .and_then(|c| c.shorten.as_ref());
804
805 if names.is_empty() {
806 return String::new();
807 }
808
809 let mut key = String::new();
810 if let Some(opts) = shorten
811 && names.len() >= opts.min as usize
812 {
813 self.append_lowercased_families(&mut key, names, opts.use_first as usize, ',');
814 if !key.is_empty() {
815 key.push(',');
816 }
817 key.push_str("et-al");
818 return key;
819 }
820
821 self.append_lowercased_families(&mut key, names, names.len(), ',');
822 key
823 }
824
825 fn build_group_key(&self, reference: &Reference, author_key: &str) -> String {
827 if let Some(citum_schema::options::Processing::Label(config)) = &self.config.processing {
831 let params = config.effective_params();
832 return crate::processor::labels::generate_base_label(reference, ¶ms);
833 }
834
835 if author_key.is_empty() {
840 if let Some(ref_id) = reference.id().filter(|id| !id.is_empty()) {
841 return format!("anon:{ref_id}");
842 }
843 return format!("anon:{}", Self::reference_cache_key(reference));
844 }
845
846 let mut key = String::with_capacity(author_key.len() + 8);
847 key.push_str(author_key);
848 key.push(':');
849 let Some(year) = reference
850 .csl_issued_date()
851 .and_then(|d| d.year().parse::<i32>().ok())
852 else {
853 return key;
854 };
855 let _ = write!(key, "{year}");
856 key
857 }
858
859 fn append_lowercased_families(
861 &self,
862 key: &mut String,
863 names: &[crate::reference::FlatName],
864 take: usize,
865 separator: char,
866 ) {
867 for (idx, name) in names.iter().take(take).enumerate() {
868 if idx > 0 {
869 key.push(separator);
870 }
871 Self::push_lowercased(key, name.family_or_literal());
872 }
873 }
874
875 fn append_name_expansion_key(
877 &self,
878 key: &mut String,
879 names: &[crate::reference::FlatName],
880 n: usize,
881 ) {
882 self.append_lowercased_families(key, names, n, '|');
883 if names.len() > n {
884 if !key.is_empty() {
885 key.push('|');
886 }
887 key.push_str("et-al");
888 }
889 }
890
891 fn append_givenname_resolution_key(
898 &self,
899 key: &mut String,
900 names: &[crate::reference::FlatName],
901 n: usize,
902 primary_only: bool,
903 ) {
904 for (idx, name) in names.iter().take(n).enumerate() {
905 if idx > 0 {
906 key.push_str("||");
907 }
908 Self::append_optional_part(key, name.family.as_deref());
909 if primary_only && idx > 0 {
910 continue;
912 }
913 key.push('|');
914 Self::append_optional_part(key, name.given.as_deref());
915 key.push('|');
916 Self::append_optional_part(key, name.non_dropping_particle.as_deref());
917 key.push('|');
918 Self::append_optional_part(key, name.dropping_particle.as_deref());
919 }
920 }
921
922 fn append_optional_part(key: &mut String, value: Option<&str>) {
924 match value {
925 Some(value) => {
926 let _ = write!(key, "{}:", value.len());
927 key.push_str(value);
928 }
929 None => key.push('-'),
930 }
931 }
932
933 fn push_lowercased(key: &mut String, value: &str) {
935 if value.is_ascii() {
936 key.reserve(value.len());
937 for byte in value.bytes() {
938 key.push((byte as char).to_ascii_lowercase());
939 }
940 } else {
941 key.push_str(&value.to_lowercase());
942 }
943 }
944
945 fn reference_cache_key(reference: &Reference) -> usize {
951 std::ptr::from_ref(reference) as usize
952 }
953
954 #[allow(
956 clippy::expect_used,
957 reason = "Internal cache hydration guarantees presence"
958 )]
959 fn reference_data<'b>(
960 &self,
961 reference: &Reference,
962 cache: &'b ReferenceCache,
963 ) -> &'b CachedReferenceData {
964 cache
965 .get(&Self::reference_cache_key(reference))
966 .expect("disambiguation cache missing reference")
967 }
968}
969
970#[cfg(test)]
971#[allow(
972 clippy::unwrap_used,
973 clippy::expect_used,
974 clippy::panic,
975 clippy::indexing_slicing,
976 clippy::todo,
977 clippy::unimplemented,
978 clippy::unreachable,
979 clippy::get_unwrap,
980 reason = "Panicking is acceptable and often desired in tests."
981)]
982mod tests {
983 use super::*;
984 use crate::Processor;
985 use citum_schema::citation::Citation;
986 use citum_schema::grouping::{GroupSort, GroupSortKey, SortKey};
987 use citum_schema::options::{Config, ContributorConfig, DisplayAsSort, NameForm};
988 use citum_schema::reference::{
989 Contributor, EdtfString, InputReference as Reference, Monograph, MonographType,
990 MultilingualString, StructuredName, Title,
991 };
992 use citum_schema::template::{TemplateComponent, WrapPunctuation};
993 use citum_schema::{BibliographySpec, CitationSpec, Style, StyleInfo};
994
995 fn make_ref(id: &str, family: &str, given: &str, year: i32) -> Reference {
996 let title = format!("Title {id}");
997 Reference::Monograph(Box::new(Monograph {
998 id: Some(id.into()),
999 r#type: MonographType::Book,
1000 title: Some(Title::Single(title.clone())),
1001 short_title: None,
1002 container: None,
1003 author: Some(Contributor::StructuredName(StructuredName {
1004 family: MultilingualString::Simple(family.to_string()),
1005 given: MultilingualString::Simple(given.to_string()),
1006 suffix: None,
1007 dropping_particle: None,
1008 non_dropping_particle: None,
1009 })),
1010 editor: None,
1011 translator: None,
1012 issued: EdtfString(year.to_string()),
1013 ..Default::default()
1014 }))
1015 }
1016
1017 fn make_ref_without_id(title_suffix: &str, family: &str, given: &str, year: i32) -> Reference {
1018 let title = format!("Title {title_suffix}");
1019 Reference::Monograph(Box::new(Monograph {
1020 id: None,
1021 r#type: MonographType::Book,
1022 title: Some(Title::Single(title)),
1023 short_title: None,
1024 container: None,
1025 author: Some(Contributor::StructuredName(StructuredName {
1026 family: MultilingualString::Simple(family.to_string()),
1027 given: MultilingualString::Simple(given.to_string()),
1028 suffix: None,
1029 dropping_particle: None,
1030 non_dropping_particle: None,
1031 })),
1032 editor: None,
1033 translator: None,
1034 issued: EdtfString(year.to_string()),
1035 ..Default::default()
1036 }))
1037 }
1038
1039 fn make_multi_author_ref(id: &str, authors: &[(&str, &str)], year: i32) -> Reference {
1040 let title = format!("Title {id}");
1041 Reference::Monograph(Box::new(Monograph {
1042 id: Some(id.into()),
1043 r#type: MonographType::Book,
1044 title: Some(Title::Single(title)),
1045 short_title: None,
1046 container: None,
1047 author: Some(Contributor::ContributorList(
1048 citum_schema::reference::ContributorList(
1049 authors
1050 .iter()
1051 .map(|(family, given)| {
1052 Contributor::StructuredName(StructuredName {
1053 family: MultilingualString::Simple((*family).to_string()),
1054 given: MultilingualString::Simple((*given).to_string()),
1055 suffix: None,
1056 dropping_particle: None,
1057 non_dropping_particle: None,
1058 })
1059 })
1060 .collect(),
1061 ),
1062 )),
1063 editor: None,
1064 translator: None,
1065 issued: EdtfString(year.to_string()),
1066 ..Default::default()
1067 }))
1068 }
1069
1070 fn make_author_date_style(config: Config, bibliography_sort: Option<GroupSort>) -> Style {
1071 Style {
1072 info: StyleInfo {
1073 title: Some("Disambiguation Test".to_string()),
1074 id: Some("disambiguation-test".into()),
1075 ..Default::default()
1076 },
1077 options: Some(config),
1078 citation: Some(CitationSpec {
1079 template: Some(vec![
1080 citum_schema::tc_contributor!(Author, Short),
1081 citum_schema::tc_date!(Issued, Year, prefix = ", "),
1082 ]),
1083 wrap: Some(WrapPunctuation::Parentheses.into()),
1084 ..Default::default()
1085 }),
1086 bibliography: Some(BibliographySpec {
1087 sort: bibliography_sort.map(citum_schema::grouping::GroupSortEntry::Explicit),
1088 template: Some(vec![TemplateComponent::Title(
1089 citum_schema::template::TemplateTitle {
1090 title: citum_schema::template::TitleType::Primary,
1091 ..Default::default()
1092 },
1093 )]),
1094 ..Default::default()
1095 }),
1096 ..Default::default()
1097 }
1098 }
1099
1100 #[test]
1101 fn test_group_aware_year_suffix_sort() {
1102 use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1103
1104 let r1 = make_ref("r1", "Smith", "Same", 2020);
1105 let r2 = make_ref("r2", "Smith", "Same", 2020);
1106
1107 let mut bib = Bibliography::new();
1108 bib.insert("r1".to_string(), r1);
1109 bib.insert("r2".to_string(), r2);
1110
1111 let config = Config::default();
1112 let locale = Locale::en_us();
1113
1114 let disamb_default = Disambiguator::new(&bib, &config, &locale);
1117 let hints_default = disamb_default.calculate_hints();
1118
1119 assert_eq!(hints_default.get("r1").unwrap().group_index, 1);
1120 assert_eq!(hints_default.get("r2").unwrap().group_index, 2);
1121
1122 let sort_spec = GroupSort {
1124 template: vec![GroupSortKey {
1125 key: SortKey::Title,
1126 ascending: false,
1127 order: None,
1128 sort_order: None,
1129 }],
1130 };
1131
1132 let disamb_custom = Disambiguator::with_group_sort(&bib, &config, &locale, &sort_spec);
1133 let hints_custom = disamb_custom.calculate_hints();
1134
1135 assert_eq!(hints_custom.get("r2").unwrap().group_index, 1);
1136 assert_eq!(hints_custom.get("r1").unwrap().group_index, 2);
1137
1138 let style = make_author_date_style(
1139 Config {
1140 processing: Some(Processing::Custom(ProcessingCustom {
1141 disambiguate: Some(Disambiguation {
1142 names: false,
1143 add_givenname: false,
1144 givenname_rule: GivennameRule::default(),
1145 year_suffix: true,
1146 }),
1147 ..Default::default()
1148 })),
1149 contributors: Some(ContributorConfig {
1150 display_as_sort: Some(DisplayAsSort::First),
1151 ..Default::default()
1152 }),
1153 ..Default::default()
1154 },
1155 Some(sort_spec),
1156 );
1157 let processor = Processor::new(style, bib);
1158
1159 let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
1160 let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
1161
1162 assert!(
1163 rendered_r1.contains("2020b"),
1164 "expected r1 to sort second: {rendered_r1}"
1165 );
1166 assert!(
1167 rendered_r2.contains("2020a"),
1168 "expected r2 to sort first: {rendered_r2}"
1169 );
1170 }
1171
1172 #[test]
1173 fn test_disambiguate_given_names() {
1174 use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1175
1176 let r1 = make_ref("r1", "Smith", "John", 2020);
1178 let r2 = make_ref("r2", "Smith", "Alice", 2020);
1179
1180 let mut bib = Bibliography::new();
1181 bib.insert("r1".to_string(), r1);
1182 bib.insert("r2".to_string(), r2);
1183
1184 let config = Config {
1185 processing: Some(Processing::Custom(ProcessingCustom {
1186 disambiguate: Some(Disambiguation {
1187 names: false,
1188 add_givenname: true,
1189 givenname_rule: GivennameRule::AllNames,
1190 year_suffix: false,
1191 }),
1192 ..Default::default()
1193 })),
1194 ..Default::default()
1195 };
1196 let locale = Locale::en_us();
1197
1198 let disamb = Disambiguator::new(&bib, &config, &locale);
1199 let hints = disamb.calculate_hints();
1200
1201 assert!(hints.get("r1").unwrap().expand_given_names);
1203 assert!(hints.get("r2").unwrap().expand_given_names);
1204
1205 assert!(!hints.get("r1").unwrap().disamb_condition);
1207 assert!(!hints.get("r2").unwrap().disamb_condition);
1208
1209 assert_ne!(
1211 hints.get("r1").unwrap().group_index,
1212 hints.get("r2").unwrap().group_index
1213 );
1214
1215 let style = make_author_date_style(
1216 Config {
1217 processing: Some(Processing::Custom(ProcessingCustom {
1218 disambiguate: Some(Disambiguation {
1219 names: false,
1220 add_givenname: true,
1221 givenname_rule: GivennameRule::AllNames,
1222 year_suffix: false,
1223 }),
1224 ..Default::default()
1225 })),
1226 contributors: Some(ContributorConfig {
1227 initialize_with: Some(". ".to_string()),
1228 name_form: Some(NameForm::Initials),
1229 ..Default::default()
1230 }),
1231 ..Default::default()
1232 },
1233 None,
1234 );
1235 let processor = Processor::new(style, bib);
1236
1237 let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
1238 let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
1239
1240 assert!(
1241 rendered_r1.contains("J. Smith"),
1242 "expected expanded given name for r1: {rendered_r1}"
1243 );
1244 assert!(
1245 rendered_r2.contains("A. Smith"),
1246 "expected expanded given name for r2: {rendered_r2}"
1247 );
1248 }
1249
1250 #[test]
1257 fn test_primary_name_identical_primary_falls_back_to_year_suffix() {
1258 use citum_schema::options::{
1259 Disambiguation, Processing, ProcessingCustom, ShortenListOptions,
1260 };
1261
1262 let r1 = make_multi_author_ref(
1265 "r1",
1266 &[
1267 ("Asthma", "Albert"),
1268 ("Bronchitis", "Brandon"),
1269 ("Cold", "Crispin"),
1270 ],
1271 1990,
1272 );
1273 let r2 = make_multi_author_ref(
1274 "r2",
1275 &[
1276 ("Asthma", "Albert"),
1277 ("Bronchitis", "Edward"),
1278 ("Cold", "Crispin"),
1279 ],
1280 1990,
1281 );
1282
1283 let mut bib = Bibliography::new();
1284 bib.insert("r1".to_string(), r1);
1285 bib.insert("r2".to_string(), r2);
1286
1287 let config = Config {
1288 processing: Some(Processing::Custom(ProcessingCustom {
1289 disambiguate: Some(Disambiguation {
1290 names: true,
1291 add_givenname: true,
1292 givenname_rule: GivennameRule::PrimaryName,
1293 year_suffix: true,
1294 }),
1295 ..Default::default()
1296 })),
1297 contributors: Some(ContributorConfig {
1298 shorten: Some(ShortenListOptions {
1299 min: 3,
1300 use_first: 1,
1301 ..Default::default()
1302 }),
1303 ..Default::default()
1304 }),
1305 ..Default::default()
1306 };
1307 let locale = Locale::en_us();
1308
1309 let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1310
1311 let h1 = hints.get("r1").expect("r1 must have a hint");
1312 let h2 = hints.get("r2").expect("r2 must have a hint");
1313
1314 assert_eq!(
1316 h1.min_names_to_show,
1317 Some(2),
1318 "r1: expected min_names_to_show=2"
1319 );
1320 assert_eq!(
1321 h2.min_names_to_show,
1322 Some(2),
1323 "r2: expected min_names_to_show=2"
1324 );
1325
1326 assert!(h1.expand_given_names, "r1: expected expand_given_names");
1328 assert!(h2.expand_given_names, "r2: expected expand_given_names");
1329
1330 assert!(
1332 h1.expand_given_names_primary_only,
1333 "r1: expected primary-only"
1334 );
1335 assert!(
1336 h2.expand_given_names_primary_only,
1337 "r2: expected primary-only"
1338 );
1339
1340 assert!(
1342 h1.disamb_condition,
1343 "r1: expected disamb_condition (year-suffix)"
1344 );
1345 assert!(
1346 h2.disamb_condition,
1347 "r2: expected disamb_condition (year-suffix)"
1348 );
1349 assert_ne!(
1350 h1.group_index, h2.group_index,
1351 "r1 and r2 must receive distinct year-suffix positions"
1352 );
1353 }
1354
1355 #[test]
1356 fn test_build_reference_cache_populates_title_keys_when_year_suffix_is_active() {
1357 use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1360
1361 let mut bib = Bibliography::new();
1362 bib.insert("r1".to_string(), make_ref("r1", "Smith", "John", 2020));
1363 let refs: Vec<&Reference> = bib.values().collect();
1364 let locale = Locale::en_us();
1365
1366 let disabled_config = Config {
1367 processing: Some(Processing::Custom(ProcessingCustom {
1368 disambiguate: Some(Disambiguation {
1369 names: false,
1370 add_givenname: true,
1371 givenname_rule: GivennameRule::default(),
1372 year_suffix: false,
1373 }),
1374 ..Default::default()
1375 })),
1376 ..Default::default()
1377 };
1378 let disabled = Disambiguator::new(&bib, &disabled_config, &locale);
1379 let disabled_flags = disabled.disambiguation_flags();
1380 let disabled_cache = disabled.build_reference_cache(&refs, disabled_flags.year_suffix);
1382 assert!(disabled_cache.values().all(|data| data.title_key.is_none()));
1383
1384 let enabled_config = Config {
1385 processing: Some(Processing::Custom(ProcessingCustom {
1386 disambiguate: Some(Disambiguation {
1387 names: false,
1388 add_givenname: false,
1389 givenname_rule: GivennameRule::default(),
1390 year_suffix: true,
1391 }),
1392 ..Default::default()
1393 })),
1394 ..Default::default()
1395 };
1396 let enabled = Disambiguator::new(&bib, &enabled_config, &locale);
1397 let enabled_flags = enabled.disambiguation_flags();
1398 let enabled_cache = enabled.build_reference_cache(&refs, enabled_flags.year_suffix);
1400 assert!(enabled_cache.values().all(|data| data.title_key.is_some()));
1401 }
1402
1403 #[test]
1404 fn test_anonymous_refs_do_not_receive_year_suffix() {
1405 use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
1408
1409 let mut bib = Bibliography::new();
1410 bib.insert("a1".to_string(), make_ref("a1", "", "", 2020));
1411 bib.insert("a2".to_string(), make_ref("a2", "", "", 2020));
1412 bib.insert("a3".to_string(), make_ref("", "", "", 2020));
1413 bib.insert(
1414 "a4".to_string(),
1415 make_ref_without_id("missing-id", "", "", 2020),
1416 );
1417 let locale = Locale::en_us();
1418 let config = Config {
1419 processing: Some(Processing::Custom(ProcessingCustom {
1420 disambiguate: Some(Disambiguation {
1421 names: true,
1422 add_givenname: true,
1423 givenname_rule: GivennameRule::default(),
1424 year_suffix: true,
1425 }),
1426 ..Default::default()
1427 })),
1428 ..Default::default()
1429 };
1430 let disambiguator = Disambiguator::new(&bib, &config, &locale);
1431 let refs: Vec<&Reference> = bib.values().collect();
1432 let cache = disambiguator.build_reference_cache(&refs, false);
1433 let grouped = disambiguator.group_references(&refs, &cache);
1434
1435 assert_eq!(grouped.len(), 4);
1436 assert!(!grouped.contains_key("anon:"));
1437 assert!(grouped.values().all(|group| group.len() == 1));
1438 }
1439
1440 #[test]
1441 fn test_push_lowercased_matches_str_lowercase_for_non_ascii() {
1442 let mut key = String::new();
1443 let value = "ΟΣ";
1444
1445 Disambiguator::push_lowercased(&mut key, value);
1446
1447 assert_eq!(key, value.to_lowercase());
1448 }
1449
1450 #[test]
1451 fn test_partitioned_name_expansion_keeps_unique_items_and_suffixes_remainders() {
1452 use citum_schema::options::{
1453 ContributorConfig, Disambiguation, Processing, ProcessingCustom, ShortenListOptions,
1454 };
1455
1456 let mut bib = Bibliography::new();
1457 bib.insert(
1458 "r1".to_string(),
1459 make_multi_author_ref("r1", &[("Smith", "John"), ("Jones", "Peter")], 2020),
1460 );
1461 bib.insert(
1462 "r2".to_string(),
1463 make_multi_author_ref("r2", &[("Smith", "John"), ("Brown", "Alice")], 2020),
1464 );
1465 bib.insert(
1466 "r3".to_string(),
1467 make_multi_author_ref("r3", &[("Smith", "John"), ("Brown", "Adam")], 2020),
1468 );
1469
1470 let config = Config {
1471 processing: Some(Processing::Custom(ProcessingCustom {
1472 disambiguate: Some(Disambiguation {
1473 names: true,
1474 add_givenname: false,
1475 givenname_rule: GivennameRule::default(),
1476 year_suffix: true,
1477 }),
1478 ..Default::default()
1479 })),
1480 contributors: Some(ContributorConfig {
1481 shorten: Some(ShortenListOptions {
1482 min: 2,
1483 use_first: 1,
1484 ..Default::default()
1485 }),
1486 ..Default::default()
1487 }),
1488 ..Default::default()
1489 };
1490 let locale = Locale::en_us();
1491
1492 let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1493
1494 let unique = hints.get("r1").unwrap();
1495 assert!(!unique.disamb_condition);
1496 assert_eq!(unique.group_index, 1);
1497 assert_eq!(unique.min_names_to_show, Some(2));
1498 assert_eq!(unique.group_length, 3);
1499
1500 let remaining_a = hints.get("r2").unwrap();
1501 let remaining_b = hints.get("r3").unwrap();
1502 assert!(remaining_a.disamb_condition);
1503 assert!(remaining_b.disamb_condition);
1504 assert_eq!(remaining_a.min_names_to_show, Some(2));
1505 assert_eq!(remaining_b.min_names_to_show, Some(2));
1506 assert_eq!(remaining_a.group_length, 3);
1507 assert_eq!(remaining_b.group_length, 3);
1508 assert_ne!(remaining_a.group_index, remaining_b.group_index);
1509 }
1510
1511 #[test]
1512 fn test_label_mode_skips_name_strategies_and_suffixes_by_label_group() {
1513 use citum_schema::options::{LabelConfig, LabelPreset, Processing};
1514
1515 let mut bib = Bibliography::new();
1516 bib.insert("r1".to_string(), make_ref("r1", "Kuhn", "Thomas", 1962));
1517 bib.insert("r2".to_string(), make_ref("r2", "Kuhn", "Thomas", 1962));
1518
1519 let config = Config {
1520 processing: Some(Processing::Label(LabelConfig {
1521 preset: LabelPreset::Din,
1522 ..Default::default()
1523 })),
1524 ..Default::default()
1525 };
1526 let locale = Locale::en_us();
1527
1528 let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
1529 let first = hints.get("r1").unwrap();
1530 let second = hints.get("r2").unwrap();
1531
1532 assert!(first.disamb_condition);
1533 assert!(second.disamb_condition);
1534 assert!(!first.expand_given_names);
1535 assert!(!second.expand_given_names);
1536 assert_eq!(first.min_names_to_show, None);
1537 assert_eq!(second.min_names_to_show, None);
1538 assert_eq!(first.group_key, second.group_key);
1539 assert!(!first.group_key.contains(':'));
1540 assert_ne!(first.group_index, second.group_index);
1541 }
1542
1543 fn make_multilingual_ref(
1546 id: &str,
1547 original_family: &str,
1548 translit_family: &str,
1549 translit_tag: &str,
1550 year: i32,
1551 ) -> Reference {
1552 use citum_schema::reference::contributor::MultilingualName;
1553 use std::collections::HashMap;
1554
1555 let mut transliterations = HashMap::new();
1556 transliterations.insert(
1557 translit_tag.to_string(),
1558 StructuredName {
1559 family: MultilingualString::Simple(translit_family.to_string()),
1560 given: MultilingualString::Simple("A.".to_string()),
1561 ..Default::default()
1562 },
1563 );
1564 Reference::Monograph(Box::new(Monograph {
1565 id: Some(id.into()),
1566 r#type: MonographType::Book,
1567 title: Some(Title::Single(format!("Title {id}"))),
1568 author: Some(Contributor::Multilingual(MultilingualName {
1569 original: StructuredName {
1570 family: MultilingualString::Simple(original_family.to_string()),
1571 given: MultilingualString::Simple("A.".to_string()),
1572 ..Default::default()
1573 },
1574 lang: Some("ja".into()),
1575 transliterations,
1576 translations: HashMap::new(),
1577 })),
1578 issued: EdtfString(year.to_string()),
1579 ..Default::default()
1580 }))
1581 }
1582
1583 #[test]
1587 fn test_multilingual_key_generation_respects_display_mode() {
1588 use citum_schema::options::MultilingualConfig;
1589 use citum_schema::options::MultilingualMode;
1590
1591 let r1 = make_multilingual_ref("r1", "田中", "Tanaka", "ja-Latn", 2020);
1594 let r2 = make_multilingual_ref("r2", "谷中", "Tanaka", "ja-Latn", 2020);
1595
1596 let mut bib = Bibliography::new();
1597 bib.insert("r1".to_string(), r1);
1598 bib.insert("r2".to_string(), r2);
1599
1600 let locale = Locale::en_us();
1601
1602 let config_translit = Config {
1604 multilingual: Some(MultilingualConfig {
1605 name_mode: Some(MultilingualMode::Transliterated),
1606 preferred_transliteration: Some(vec!["ja-Latn".to_string()]),
1607 ..Default::default()
1608 }),
1609 ..Default::default()
1610 };
1611
1612 let cache_translit = Disambiguator::new(&bib, &config_translit, &locale)
1613 .build_reference_cache(&[bib.get("r1").unwrap(), bib.get("r2").unwrap()], false);
1614
1615 let ck_r1 = Disambiguator::reference_cache_key(bib.get("r1").unwrap());
1616 let ck_r2 = Disambiguator::reference_cache_key(bib.get("r2").unwrap());
1617 let ak_r1 = &cache_translit[&ck_r1].author_key;
1618 let ak_r2 = &cache_translit[&ck_r2].author_key;
1619
1620 assert_eq!(
1621 ak_r1, ak_r2,
1622 "transliterated mode: colliding transliterations must produce the same author key"
1623 );
1624 assert_eq!(
1625 ak_r1, "tanaka",
1626 "key should be the lowercased transliteration"
1627 );
1628
1629 let config_primary = Config::default(); let cache_primary = Disambiguator::new(&bib, &config_primary, &locale)
1633 .build_reference_cache(&[bib.get("r1").unwrap(), bib.get("r2").unwrap()], false);
1634
1635 let ak_r1_primary = &cache_primary[&ck_r1].author_key;
1636 let ak_r2_primary = &cache_primary[&ck_r2].author_key;
1637
1638 assert_ne!(
1639 ak_r1_primary, ak_r2_primary,
1640 "primary mode: distinct originals must produce different author keys"
1641 );
1642 }
1643}