use super::key::{EncodedOverride, encode_primary_pinyin_unchecked};
use super::lookup::{all_pinyin_for_char, primary_pinyin_for_char};
use super::model::PinYinRecord;
use crate::collator::Collator;
use crate::error::Result;
use crate::r#override::PinyinOverride;
#[derive(Debug, Clone, Default)]
pub struct PinyinCollator {
override_data: Option<PinyinOverride>,
encoded_override: Option<EncodedOverride>,
}
impl PinyinCollator {
pub fn new() -> Self {
Self::default()
}
pub fn with_override(override_data: PinyinOverride) -> Result<Self> {
let encoded_override = Some(EncodedOverride::try_from(&override_data)?);
Ok(Self {
override_data: Some(override_data),
encoded_override,
})
}
pub fn pinyin_of(&self, value: &str) -> Vec<PinYinRecord> {
if let Some(override_data) = &self.override_data
&& let Some(pinyins) = override_data.phrase_override.get(value)
{
return value
.chars()
.zip(pinyins.iter())
.map(|(character, pinyin)| PinYinRecord {
pinyin: vec![pinyin.clone()],
character,
})
.collect();
}
value
.chars()
.map(|character| self.lookup_char(character))
.collect()
}
fn lookup_char(&self, character: char) -> PinYinRecord {
if let Some(override_data) = &self.override_data
&& let Some(pinyin) = override_data.char_override.get(&character)
{
return PinYinRecord {
pinyin: vec![pinyin.clone()],
character,
};
}
PinYinRecord {
pinyin: all_pinyin_for_char(character),
character,
}
}
}
impl Collator for PinyinCollator {
type Data = u128;
fn data_for(&self, character: char) -> Option<u128> {
if let Some(encoded_override) = &self.encoded_override
&& let Some(primary_pinyin) = encoded_override.char_override(character)
{
return Some(primary_pinyin);
}
primary_pinyin_for_char(character).map(encode_primary_pinyin_unchecked)
}
fn phrase_data(&self, phrase: &str) -> Option<Vec<u128>> {
self.encoded_override
.as_ref()?
.phrase_override(phrase)
.map(<[u128]>::to_vec)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::collator::sort_strings_with;
use crate::r#override::PinyinOverride;
use std::collections::HashMap;
#[test]
fn pinyin_of_known_characters() {
let collator = PinyinCollator::default();
let expected = vec![
PinYinRecord {
pinyin: vec!["han4".to_string()],
character: '汉',
},
PinYinRecord {
pinyin: vec!["zi4".to_string()],
character: '字',
},
];
assert_eq!(collator.pinyin_of("汉字"), expected);
}
#[test]
fn includes_generated_first_record() {
let collator = PinyinCollator::default();
let records = collator.pinyin_of("〇");
assert_eq!(records.len(), 1);
assert_eq!(records[0].primary_pinyin(), Some("ling2"));
}
#[test]
fn unknown_characters_are_preserved() {
let collator = PinyinCollator::default();
let records = collator.pinyin_of("a1");
assert_eq!(
records,
vec![
PinYinRecord {
pinyin: vec![],
character: 'a',
},
PinYinRecord {
pinyin: vec![],
character: '1',
},
]
);
}
#[test]
fn phrase_override_takes_precedence() {
let collator = PinyinCollator::with_override(PinyinOverride {
char_override: HashMap::from([('重', "zhong4".to_string())]),
phrase_override: HashMap::from([(
"重庆".to_string(),
vec!["chong2".to_string(), "qing4".to_string()],
)]),
})
.expect("valid override should construct");
let records = collator.pinyin_of("重庆");
assert_eq!(records[0].primary_pinyin(), Some("chong2"));
}
#[test]
fn char_override_applies_without_phrase_override() {
let collator = PinyinCollator::with_override(PinyinOverride {
char_override: HashMap::from([('重', "chong2".to_string())]),
phrase_override: HashMap::new(),
})
.expect("valid override should construct");
let records = collator.pinyin_of("重要");
assert_eq!(records[0].primary_pinyin(), Some("chong2"));
}
#[test]
fn polyphonic_characters_expose_all_readings() {
let collator = PinyinCollator::default();
let records = collator.pinyin_of("乐");
assert_eq!(records.len(), 1);
assert!(records[0].pinyin.len() > 1);
assert!(records[0].pinyin.iter().any(|item| item == "le4"));
assert!(records[0].pinyin.iter().any(|item| item == "yue4"));
}
#[test]
fn collator_returns_phrase_data_for_known_phrase() {
let collator = PinyinCollator::with_override(PinyinOverride {
char_override: HashMap::new(),
phrase_override: HashMap::from([(
"重庆".to_string(),
vec!["chong2".to_string(), "qing4".to_string()],
)]),
})
.expect("valid override should construct");
let phrase = collator.phrase_data("重庆").expect("phrase override should hit");
assert_eq!(
phrase,
vec![
encode_primary_pinyin_unchecked("chong2"),
encode_primary_pinyin_unchecked("qing4"),
]
);
}
#[test]
fn with_override_rejects_unencodable_syllable() {
let bad = PinyinOverride {
char_override: HashMap::from([('女', "nü3".to_string())]),
phrase_override: HashMap::new(),
};
let err = PinyinCollator::with_override(bad).expect_err("non-ASCII should fail");
assert!(err.to_string().contains("ASCII"), "got: {err}");
}
#[test]
fn new_is_infallible_for_no_override_case() {
let _collator = PinyinCollator::new();
}
#[test]
fn library_users_can_call_sort_strings_with_directly() {
let collator = PinyinCollator::new();
let sorted = sort_strings_with(
vec![
"汉字".to_string(),
"照相".to_string(),
"赵云".to_string(),
"赵四".to_string(),
"张三".to_string(),
],
&collator,
);
assert_eq!(sorted, vec!["汉字", "张三", "照相", "赵四", "赵云"]);
}
#[test]
fn neutral_tone_sorts_after_marked_tones() {
let collator = PinyinCollator::default();
let sorted = sort_strings_with(
vec![
"吗".to_string(),
"马".to_string(),
"麻".to_string(),
"骂".to_string(),
],
&collator,
);
assert_eq!(sorted, vec!["麻", "马", "骂", "吗"]);
}
}