use crate::reference::{Bibliography, Reference};
use crate::values::ProcHints;
use citum_schema::options::Config;
use std::collections::{HashMap, HashSet};
use std::fmt::Write as _;
use crate::grouping::GroupSorter;
use citum_schema::grouping::GroupSort;
use citum_schema::locale::Locale;
pub struct Disambiguator<'a> {
bibliography: &'a Bibliography,
config: &'a Config,
locale: &'a Locale,
group_sort: Option<&'a GroupSort>,
}
#[derive(Clone, Copy, Default)]
struct DisambiguationFlags {
add_names: bool,
add_givenname: bool,
year_suffix: bool,
is_label_mode: bool,
}
struct GroupDisambiguationContext<'a> {
key: &'a str,
group: &'a [&'a Reference],
flags: DisambiguationFlags,
author_group_lengths: &'a HashMap<String, usize>,
cache: &'a ReferenceCache,
}
#[derive(Clone, Copy)]
struct HintPlan<'a> {
key: &'a str,
expand_given_names: bool,
min_names_to_show: Option<usize>,
disamb_condition: bool,
}
#[derive(Clone, Copy)]
enum HintOrder {
Encountered,
GroupSorted,
}
type ReferenceCache = HashMap<usize, CachedReferenceData>;
struct CachedReferenceData {
author_key: String,
group_key: String,
names: Vec<crate::reference::FlatName>,
title_key: Option<String>,
}
impl<'a> Disambiguator<'a> {
#[must_use]
pub fn new(bibliography: &'a Bibliography, config: &'a Config, locale: &'a Locale) -> Self {
Self {
bibliography,
config,
locale,
group_sort: None,
}
}
#[must_use]
pub fn with_group_sort(
bibliography: &'a Bibliography,
config: &'a Config,
locale: &'a Locale,
group_sort: &'a GroupSort,
) -> Self {
Self {
bibliography,
config,
locale,
group_sort: Some(group_sort),
}
}
#[must_use]
pub fn calculate_hints(&self) -> HashMap<String, ProcHints> {
let mut hints = HashMap::new();
let refs: Vec<&Reference> = self.bibliography.values().collect();
let flags = self.disambiguation_flags();
let needs_title_key = flags.year_suffix;
let cache = self.build_reference_cache(&refs, needs_title_key);
let grouped = self.group_references(&refs, &cache);
let author_group_lengths = self.author_group_lengths(&refs, &cache);
for (key, group) in grouped {
self.apply_group_hints(
&mut hints,
GroupDisambiguationContext {
key: &key,
group: &group,
flags,
author_group_lengths: &author_group_lengths,
cache: &cache,
},
);
}
hints
}
fn disambiguation_flags(&self) -> DisambiguationFlags {
let disamb_config = match self.config.processing.as_ref() {
Some(processing) => processing.config().disambiguate,
None => {
citum_schema::options::Processing::AuthorDate
.config()
.disambiguate
}
};
DisambiguationFlags {
add_names: disamb_config.as_ref().is_some_and(|d| d.names),
add_givenname: disamb_config.as_ref().is_some_and(|d| d.add_givenname),
year_suffix: disamb_config.as_ref().is_some_and(|d| d.year_suffix),
is_label_mode: self
.config
.processing
.as_ref()
.is_some_and(|p| matches!(p, citum_schema::options::Processing::Label(_))),
}
}
fn build_reference_cache(&self, refs: &[&Reference], needs_title_key: bool) -> ReferenceCache {
let mut cache = HashMap::with_capacity(refs.len());
for reference in refs {
let names = reference
.author()
.map_or_else(Vec::new, |authors| authors.to_names_vec());
let author_key = self.build_author_key(&names);
let group_key = self.build_group_key(reference, &author_key);
let title_key = needs_title_key.then(|| {
reference
.title()
.map(|title| title.to_string())
.unwrap_or_default()
.to_lowercase()
});
cache.insert(
Self::reference_cache_key(reference),
CachedReferenceData {
author_key,
group_key,
names,
title_key,
},
);
}
cache
}
fn author_group_lengths(
&self,
refs: &[&Reference],
cache: &ReferenceCache,
) -> HashMap<String, usize> {
let mut author_group_lengths = HashMap::new();
for reference in refs {
let author_key = &self.reference_data(reference, cache).author_key;
if !author_key.is_empty() {
*author_group_lengths.entry(author_key.clone()).or_insert(0) += 1;
}
}
author_group_lengths
}
fn apply_group_hints(
&self,
hints: &mut HashMap<String, ProcHints>,
context: GroupDisambiguationContext<'_>,
) {
if self.try_apply_singleton_hint(hints, &context) {
return;
}
if self.try_apply_label_mode_year_suffix(hints, &context) {
return;
}
if self.try_apply_name_partitions(hints, &context) {
return;
}
if self.try_apply_givenname_resolution(hints, &context) {
return;
}
if self.try_apply_combined_resolution(hints, &context) {
return;
}
self.apply_year_suffix(hints, &context, false, None);
}
fn try_apply_singleton_hint(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
) -> bool {
if context.group.len() != 1 {
return false;
}
#[allow(clippy::indexing_slicing, reason = "context.group.len() == 1")]
let head = context.group[0];
self.insert_hint(
hints,
head,
context.author_group_lengths,
context.cache,
ProcHints::default(),
);
true
}
fn try_apply_label_mode_year_suffix(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
) -> bool {
if !(context.flags.is_label_mode && context.flags.year_suffix) {
return false;
}
self.apply_year_suffix(hints, context, false, None);
true
}
fn try_apply_name_partitions(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
) -> bool {
if !context.flags.add_names {
return false;
}
let Some((min_names_to_show, partitions)) =
self.partition_by_name_expansion(context.group, context.cache)
else {
return false;
};
for subgroup in partitions.values() {
if subgroup.len() == 1 {
self.apply_resolution(hints, subgroup, context, false, Some(min_names_to_show));
continue;
}
if context.flags.add_givenname
&& self.check_givenname_resolution(subgroup, context.cache, Some(min_names_to_show))
{
self.apply_resolution(hints, subgroup, context, true, Some(min_names_to_show));
continue;
}
self.apply_year_suffix_for_group(
hints,
subgroup,
context,
false,
Some(min_names_to_show),
);
}
true
}
fn try_apply_givenname_resolution(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
) -> bool {
if !(context.flags.add_givenname
&& self.check_givenname_resolution(context.group, context.cache, None))
{
return false;
}
self.apply_resolution(hints, context.group, context, true, None);
true
}
fn try_apply_combined_resolution(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
) -> bool {
if !context.flags.add_names || !context.flags.add_givenname {
return false;
}
let Some(min_names_to_show) = self.find_combined_resolution(context.group, context.cache)
else {
return false;
};
self.apply_resolution(hints, context.group, context, true, Some(min_names_to_show));
true
}
fn find_combined_resolution(
&self,
group: &[&Reference],
cache: &ReferenceCache,
) -> Option<usize> {
let max_authors = group
.iter()
.map(|reference| self.reference_data(reference, cache).names.len())
.max()
.unwrap_or(0);
(2..=max_authors).find(|&n| self.check_givenname_resolution(group, cache, Some(n)))
}
fn apply_resolution(
&self,
hints: &mut HashMap<String, ProcHints>,
group: &[&Reference],
context: &GroupDisambiguationContext<'_>,
expand_given_names: bool,
min_names_to_show: Option<usize>,
) {
self.insert_group_hints(
hints,
group,
context.author_group_lengths,
HintPlan {
key: context.key,
expand_given_names,
min_names_to_show,
disamb_condition: false,
},
HintOrder::Encountered,
context.cache,
);
}
fn insert_hint(
&self,
hints: &mut HashMap<String, ProcHints>,
reference: &Reference,
author_group_lengths: &HashMap<String, usize>,
cache: &ReferenceCache,
mut hint: ProcHints,
) {
hint.group_length = self
.author_group_length(reference, author_group_lengths, cache)
.unwrap_or(1);
hints.insert(reference.id().unwrap_or_default().to_string(), hint);
}
fn author_group_length(
&self,
reference: &Reference,
author_group_lengths: &HashMap<String, usize>,
cache: &ReferenceCache,
) -> Option<usize> {
let author_key = &self.reference_data(reference, cache).author_key;
author_group_lengths.get(author_key).copied()
}
fn apply_year_suffix(
&self,
hints: &mut HashMap<String, ProcHints>,
context: &GroupDisambiguationContext<'_>,
expand_given_names: bool,
min_names_to_show: Option<usize>,
) {
self.apply_year_suffix_for_group(
hints,
context.group,
context,
expand_given_names,
min_names_to_show,
);
}
fn apply_year_suffix_for_group(
&self,
hints: &mut HashMap<String, ProcHints>,
group: &[&Reference],
context: &GroupDisambiguationContext<'_>,
expand_given_names: bool,
min_names_to_show: Option<usize>,
) {
self.insert_group_hints(
hints,
group,
context.author_group_lengths,
HintPlan {
key: context.key,
expand_given_names,
min_names_to_show,
disamb_condition: true,
},
HintOrder::GroupSorted,
context.cache,
);
}
fn insert_group_hints(
&self,
hints: &mut HashMap<String, ProcHints>,
group: &[&Reference],
author_group_lengths: &HashMap<String, usize>,
plan: HintPlan<'_>,
order: HintOrder,
cache: &ReferenceCache,
) {
match order {
HintOrder::Encountered => {
for (idx, reference) in group.iter().enumerate() {
self.insert_planned_hint(
hints,
reference,
author_group_lengths,
plan,
idx + 1,
cache,
);
}
}
HintOrder::GroupSorted => {
for (idx, reference) in self
.sort_group_for_year_suffix(group, cache)
.iter()
.enumerate()
{
self.insert_planned_hint(
hints,
reference,
author_group_lengths,
plan,
idx + 1,
cache,
);
}
}
}
}
fn insert_planned_hint(
&self,
hints: &mut HashMap<String, ProcHints>,
reference: &Reference,
author_group_lengths: &HashMap<String, usize>,
plan: HintPlan<'_>,
group_index: usize,
cache: &ReferenceCache,
) {
self.insert_hint(
hints,
reference,
author_group_lengths,
cache,
ProcHints {
disamb_condition: plan.disamb_condition,
group_index,
group_key: plan.key.to_string(),
expand_given_names: plan.expand_given_names,
min_names_to_show: plan.min_names_to_show,
..Default::default()
},
);
}
fn sort_group_for_year_suffix<'b>(
&self,
group: &[&'b Reference],
cache: &ReferenceCache,
) -> Vec<&'b Reference> {
if let Some(sort_spec) = self.group_sort {
let sorter = GroupSorter::new(self.locale);
let mut pre_sorted: Vec<&Reference> = group.to_vec();
pre_sorted.sort_by(|a, b| {
let a_title = self
.reference_data(a, cache)
.title_key
.as_deref()
.unwrap_or_default();
let b_title = self
.reference_data(b, cache)
.title_key
.as_deref()
.unwrap_or_default();
a_title.cmp(b_title)
});
sorter.sort_references(pre_sorted, sort_spec)
} else {
let mut sorted: Vec<&Reference> = group.to_vec();
sorted.sort_by(|a, b| {
let a_title = self
.reference_data(a, cache)
.title_key
.as_deref()
.unwrap_or_default();
let b_title = self
.reference_data(b, cache)
.title_key
.as_deref()
.unwrap_or_default();
a_title.cmp(b_title)
});
sorted
}
}
fn partition_by_name_expansion<'b>(
&self,
group: &[&'b Reference],
cache: &ReferenceCache,
) -> Option<(usize, HashMap<String, Vec<&'b Reference>>)> {
let max_authors = group
.iter()
.map(|reference| self.reference_data(reference, cache).names.len())
.max()
.unwrap_or(0);
let mut buf = String::new();
for n in 2..=max_authors {
let mut partitions: HashMap<String, Vec<&Reference>> = HashMap::new();
for reference in group {
let names = &self.reference_data(reference, cache).names;
buf.clear();
self.append_name_expansion_key(&mut buf, names, n);
if let Some(v) = partitions.get_mut(buf.as_str()) {
v.push(*reference);
} else {
partitions.insert(buf.clone(), vec![*reference]);
}
}
if partitions.len() > 1 {
return Some((n, partitions));
}
}
None
}
fn check_givenname_resolution(
&self,
group: &[&Reference],
cache: &ReferenceCache,
min_names: Option<usize>,
) -> bool {
let mut seen = HashSet::new();
let mut buf = String::new();
let n = min_names.unwrap_or(1);
for reference in group {
let names = &self.reference_data(reference, cache).names;
buf.clear();
self.append_givenname_resolution_key(&mut buf, names, n);
if !seen.insert(buf.clone()) {
return false;
}
}
true
}
fn group_references<'b>(
&self,
references: &[&'b Reference],
cache: &ReferenceCache,
) -> HashMap<String, Vec<&'b Reference>> {
let mut groups: HashMap<String, Vec<&'b Reference>> = HashMap::new();
for reference in references {
let key = self.reference_data(reference, cache).group_key.clone();
groups.entry(key).or_default().push(*reference);
}
groups
}
fn build_author_key(&self, names: &[crate::reference::FlatName]) -> String {
let shorten = self
.config
.contributors
.as_ref()
.and_then(|c| c.shorten.as_ref());
if names.is_empty() {
return String::new();
}
let mut key = String::new();
if let Some(opts) = shorten
&& names.len() >= opts.min as usize
{
self.append_lowercased_families(&mut key, names, opts.use_first as usize, ',');
if !key.is_empty() {
key.push(',');
}
key.push_str("et-al");
return key;
}
self.append_lowercased_families(&mut key, names, names.len(), ',');
key
}
fn build_group_key(&self, reference: &Reference, author_key: &str) -> String {
if let Some(citum_schema::options::Processing::Label(config)) = &self.config.processing {
let params = config.effective_params();
return crate::processor::labels::generate_base_label(reference, ¶ms);
}
if author_key.is_empty() {
if let Some(ref_id) = reference.id().filter(|id| !id.is_empty()) {
return format!("anon:{ref_id}");
}
return format!("anon:{}", Self::reference_cache_key(reference));
}
let mut key = String::with_capacity(author_key.len() + 8);
key.push_str(author_key);
key.push(':');
let Some(year) = reference
.csl_issued_date()
.and_then(|d| d.year().parse::<i32>().ok())
else {
return key;
};
let _ = write!(key, "{year}");
key
}
fn append_lowercased_families(
&self,
key: &mut String,
names: &[crate::reference::FlatName],
take: usize,
separator: char,
) {
for (idx, name) in names.iter().take(take).enumerate() {
if idx > 0 {
key.push(separator);
}
Self::push_lowercased(key, name.family_or_literal());
}
}
fn append_name_expansion_key(
&self,
key: &mut String,
names: &[crate::reference::FlatName],
n: usize,
) {
self.append_lowercased_families(key, names, n, '|');
if names.len() > n {
if !key.is_empty() {
key.push('|');
}
key.push_str("et-al");
}
}
fn append_givenname_resolution_key(
&self,
key: &mut String,
names: &[crate::reference::FlatName],
n: usize,
) {
for (idx, name) in names.iter().take(n).enumerate() {
if idx > 0 {
key.push_str("||");
}
Self::append_optional_part(key, name.family.as_deref());
key.push('|');
Self::append_optional_part(key, name.given.as_deref());
key.push('|');
Self::append_optional_part(key, name.non_dropping_particle.as_deref());
key.push('|');
Self::append_optional_part(key, name.dropping_particle.as_deref());
}
}
fn append_optional_part(key: &mut String, value: Option<&str>) {
match value {
Some(value) => {
let _ = write!(key, "{}:", value.len());
key.push_str(value);
}
None => key.push('-'),
}
}
fn push_lowercased(key: &mut String, value: &str) {
if value.is_ascii() {
key.reserve(value.len());
for byte in value.bytes() {
key.push((byte as char).to_ascii_lowercase());
}
} else {
key.push_str(&value.to_lowercase());
}
}
fn reference_cache_key(reference: &Reference) -> usize {
std::ptr::from_ref(reference) as usize
}
#[allow(
clippy::expect_used,
reason = "Internal cache hydration guarantees presence"
)]
fn reference_data<'b>(
&self,
reference: &Reference,
cache: &'b ReferenceCache,
) -> &'b CachedReferenceData {
cache
.get(&Self::reference_cache_key(reference))
.expect("disambiguation cache missing reference")
}
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::indexing_slicing,
clippy::todo,
clippy::unimplemented,
clippy::unreachable,
clippy::get_unwrap,
reason = "Panicking is acceptable and often desired in tests."
)]
mod tests {
use super::*;
use crate::Processor;
use citum_schema::citation::Citation;
use citum_schema::grouping::{GroupSort, GroupSortKey, SortKey};
use citum_schema::options::{Config, ContributorConfig, DisplayAsSort, NameForm};
use citum_schema::reference::{
Contributor, EdtfString, InputReference as Reference, Monograph, MonographType,
MultilingualString, StructuredName, Title,
};
use citum_schema::template::{TemplateComponent, WrapPunctuation};
use citum_schema::{BibliographySpec, CitationSpec, Style, StyleInfo};
fn make_ref(id: &str, family: &str, given: &str, year: i32) -> Reference {
let title = format!("Title {id}");
Reference::Monograph(Box::new(Monograph {
id: Some(id.into()),
r#type: MonographType::Book,
title: Some(Title::Single(title.clone())),
short_title: None,
container: None,
author: Some(Contributor::StructuredName(StructuredName {
family: MultilingualString::Simple(family.to_string()),
given: MultilingualString::Simple(given.to_string()),
suffix: None,
dropping_particle: None,
non_dropping_particle: None,
})),
editor: None,
translator: None,
issued: EdtfString(year.to_string()),
..Default::default()
}))
}
fn make_ref_without_id(title_suffix: &str, family: &str, given: &str, year: i32) -> Reference {
let title = format!("Title {title_suffix}");
Reference::Monograph(Box::new(Monograph {
id: None,
r#type: MonographType::Book,
title: Some(Title::Single(title)),
short_title: None,
container: None,
author: Some(Contributor::StructuredName(StructuredName {
family: MultilingualString::Simple(family.to_string()),
given: MultilingualString::Simple(given.to_string()),
suffix: None,
dropping_particle: None,
non_dropping_particle: None,
})),
editor: None,
translator: None,
issued: EdtfString(year.to_string()),
..Default::default()
}))
}
fn make_multi_author_ref(id: &str, authors: &[(&str, &str)], year: i32) -> Reference {
let title = format!("Title {id}");
Reference::Monograph(Box::new(Monograph {
id: Some(id.into()),
r#type: MonographType::Book,
title: Some(Title::Single(title)),
short_title: None,
container: None,
author: Some(Contributor::ContributorList(
citum_schema::reference::ContributorList(
authors
.iter()
.map(|(family, given)| {
Contributor::StructuredName(StructuredName {
family: MultilingualString::Simple((*family).to_string()),
given: MultilingualString::Simple((*given).to_string()),
suffix: None,
dropping_particle: None,
non_dropping_particle: None,
})
})
.collect(),
),
)),
editor: None,
translator: None,
issued: EdtfString(year.to_string()),
..Default::default()
}))
}
fn make_author_date_style(config: Config, bibliography_sort: Option<GroupSort>) -> Style {
Style {
info: StyleInfo {
title: Some("Disambiguation Test".to_string()),
id: Some("disambiguation-test".into()),
..Default::default()
},
options: Some(config),
citation: Some(CitationSpec {
template: Some(vec![
citum_schema::tc_contributor!(Author, Short),
citum_schema::tc_date!(Issued, Year, prefix = ", "),
]),
wrap: Some(WrapPunctuation::Parentheses.into()),
..Default::default()
}),
bibliography: Some(BibliographySpec {
sort: bibliography_sort.map(citum_schema::grouping::GroupSortEntry::Explicit),
template: Some(vec![TemplateComponent::Title(
citum_schema::template::TemplateTitle {
title: citum_schema::template::TitleType::Primary,
..Default::default()
},
)]),
..Default::default()
}),
..Default::default()
}
}
#[test]
fn test_group_aware_year_suffix_sort() {
use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
let r1 = make_ref("r1", "Smith", "Same", 2020);
let r2 = make_ref("r2", "Smith", "Same", 2020);
let mut bib = Bibliography::new();
bib.insert("r1".to_string(), r1);
bib.insert("r2".to_string(), r2);
let config = Config::default();
let locale = Locale::en_us();
let disamb_default = Disambiguator::new(&bib, &config, &locale);
let hints_default = disamb_default.calculate_hints();
assert_eq!(hints_default.get("r1").unwrap().group_index, 1);
assert_eq!(hints_default.get("r2").unwrap().group_index, 2);
let sort_spec = GroupSort {
template: vec![GroupSortKey {
key: SortKey::Title,
ascending: false,
order: None,
sort_order: None,
}],
};
let disamb_custom = Disambiguator::with_group_sort(&bib, &config, &locale, &sort_spec);
let hints_custom = disamb_custom.calculate_hints();
assert_eq!(hints_custom.get("r2").unwrap().group_index, 1);
assert_eq!(hints_custom.get("r1").unwrap().group_index, 2);
let style = make_author_date_style(
Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: false,
add_givenname: false,
year_suffix: true,
}),
..Default::default()
})),
contributors: Some(ContributorConfig {
display_as_sort: Some(DisplayAsSort::First),
..Default::default()
}),
..Default::default()
},
Some(sort_spec),
);
let processor = Processor::new(style, bib);
let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
assert!(
rendered_r1.contains("2020b"),
"expected r1 to sort second: {rendered_r1}"
);
assert!(
rendered_r2.contains("2020a"),
"expected r2 to sort first: {rendered_r2}"
);
}
#[test]
fn test_disambiguate_given_names() {
use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
let r1 = make_ref("r1", "Smith", "John", 2020);
let r2 = make_ref("r2", "Smith", "Alice", 2020);
let mut bib = Bibliography::new();
bib.insert("r1".to_string(), r1);
bib.insert("r2".to_string(), r2);
let config = Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: false,
add_givenname: true,
year_suffix: false,
}),
..Default::default()
})),
..Default::default()
};
let locale = Locale::en_us();
let disamb = Disambiguator::new(&bib, &config, &locale);
let hints = disamb.calculate_hints();
assert!(hints.get("r1").unwrap().expand_given_names);
assert!(hints.get("r2").unwrap().expand_given_names);
assert!(!hints.get("r1").unwrap().disamb_condition);
assert!(!hints.get("r2").unwrap().disamb_condition);
assert_ne!(
hints.get("r1").unwrap().group_index,
hints.get("r2").unwrap().group_index
);
let style = make_author_date_style(
Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: false,
add_givenname: true,
year_suffix: false,
}),
..Default::default()
})),
contributors: Some(ContributorConfig {
initialize_with: Some(". ".to_string()),
name_form: Some(NameForm::Initials),
..Default::default()
}),
..Default::default()
},
None,
);
let processor = Processor::new(style, bib);
let rendered_r1 = processor.process_citation(&Citation::simple("r1")).unwrap();
let rendered_r2 = processor.process_citation(&Citation::simple("r2")).unwrap();
assert!(
rendered_r1.contains("J. Smith"),
"expected expanded given name for r1: {rendered_r1}"
);
assert!(
rendered_r2.contains("A. Smith"),
"expected expanded given name for r2: {rendered_r2}"
);
}
#[test]
fn test_build_reference_cache_populates_title_keys_when_year_suffix_is_active() {
use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
let mut bib = Bibliography::new();
bib.insert("r1".to_string(), make_ref("r1", "Smith", "John", 2020));
let refs: Vec<&Reference> = bib.values().collect();
let locale = Locale::en_us();
let disabled_config = Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: false,
add_givenname: true,
year_suffix: false,
}),
..Default::default()
})),
..Default::default()
};
let disabled = Disambiguator::new(&bib, &disabled_config, &locale);
let disabled_flags = disabled.disambiguation_flags();
let disabled_cache = disabled.build_reference_cache(&refs, disabled_flags.year_suffix);
assert!(disabled_cache.values().all(|data| data.title_key.is_none()));
let enabled_config = Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: false,
add_givenname: false,
year_suffix: true,
}),
..Default::default()
})),
..Default::default()
};
let enabled = Disambiguator::new(&bib, &enabled_config, &locale);
let enabled_flags = enabled.disambiguation_flags();
let enabled_cache = enabled.build_reference_cache(&refs, enabled_flags.year_suffix);
assert!(enabled_cache.values().all(|data| data.title_key.is_some()));
}
#[test]
fn test_anonymous_refs_do_not_receive_year_suffix() {
use citum_schema::options::{Disambiguation, Processing, ProcessingCustom};
let mut bib = Bibliography::new();
bib.insert("a1".to_string(), make_ref("a1", "", "", 2020));
bib.insert("a2".to_string(), make_ref("a2", "", "", 2020));
bib.insert("a3".to_string(), make_ref("", "", "", 2020));
bib.insert(
"a4".to_string(),
make_ref_without_id("missing-id", "", "", 2020),
);
let locale = Locale::en_us();
let config = Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: true,
add_givenname: true,
year_suffix: true,
}),
..Default::default()
})),
..Default::default()
};
let disambiguator = Disambiguator::new(&bib, &config, &locale);
let refs: Vec<&Reference> = bib.values().collect();
let cache = disambiguator.build_reference_cache(&refs, false);
let grouped = disambiguator.group_references(&refs, &cache);
assert_eq!(grouped.len(), 4);
assert!(!grouped.contains_key("anon:"));
assert!(grouped.values().all(|group| group.len() == 1));
}
#[test]
fn test_push_lowercased_matches_str_lowercase_for_non_ascii() {
let mut key = String::new();
let value = "ΟΣ";
Disambiguator::push_lowercased(&mut key, value);
assert_eq!(key, value.to_lowercase());
}
#[test]
fn test_partitioned_name_expansion_keeps_unique_items_and_suffixes_remainders() {
use citum_schema::options::{
ContributorConfig, Disambiguation, Processing, ProcessingCustom, ShortenListOptions,
};
let mut bib = Bibliography::new();
bib.insert(
"r1".to_string(),
make_multi_author_ref("r1", &[("Smith", "John"), ("Jones", "Peter")], 2020),
);
bib.insert(
"r2".to_string(),
make_multi_author_ref("r2", &[("Smith", "John"), ("Brown", "Alice")], 2020),
);
bib.insert(
"r3".to_string(),
make_multi_author_ref("r3", &[("Smith", "John"), ("Brown", "Adam")], 2020),
);
let config = Config {
processing: Some(Processing::Custom(ProcessingCustom {
disambiguate: Some(Disambiguation {
names: true,
add_givenname: false,
year_suffix: true,
}),
..Default::default()
})),
contributors: Some(ContributorConfig {
shorten: Some(ShortenListOptions {
min: 2,
use_first: 1,
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
let locale = Locale::en_us();
let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
let unique = hints.get("r1").unwrap();
assert!(!unique.disamb_condition);
assert_eq!(unique.group_index, 1);
assert_eq!(unique.min_names_to_show, Some(2));
assert_eq!(unique.group_length, 3);
let remaining_a = hints.get("r2").unwrap();
let remaining_b = hints.get("r3").unwrap();
assert!(remaining_a.disamb_condition);
assert!(remaining_b.disamb_condition);
assert_eq!(remaining_a.min_names_to_show, Some(2));
assert_eq!(remaining_b.min_names_to_show, Some(2));
assert_eq!(remaining_a.group_length, 3);
assert_eq!(remaining_b.group_length, 3);
assert_ne!(remaining_a.group_index, remaining_b.group_index);
}
#[test]
fn test_label_mode_skips_name_strategies_and_suffixes_by_label_group() {
use citum_schema::options::{LabelConfig, LabelPreset, Processing};
let mut bib = Bibliography::new();
bib.insert("r1".to_string(), make_ref("r1", "Kuhn", "Thomas", 1962));
bib.insert("r2".to_string(), make_ref("r2", "Kuhn", "Thomas", 1962));
let config = Config {
processing: Some(Processing::Label(LabelConfig {
preset: LabelPreset::Din,
..Default::default()
})),
..Default::default()
};
let locale = Locale::en_us();
let hints = Disambiguator::new(&bib, &config, &locale).calculate_hints();
let first = hints.get("r1").unwrap();
let second = hints.get("r2").unwrap();
assert!(first.disamb_condition);
assert!(second.disamb_condition);
assert!(!first.expand_given_names);
assert!(!second.expand_given_names);
assert_eq!(first.min_names_to_show, None);
assert_eq!(second.min_names_to_show, None);
assert_eq!(first.group_key, second.group_key);
assert!(!first.group_key.contains(':'));
assert_ne!(first.group_index, second.group_index);
}
}