use piper_plus::phonemize::custom_dict::CustomDictionary;
use std::io::Write;
use std::sync::atomic::{AtomicU32, Ordering};
static COUNTER: AtomicU32 = AtomicU32::new(0);
fn write_temp_json(content: &str) -> std::path::PathBuf {
let id = COUNTER.fetch_add(1, Ordering::SeqCst);
let path = std::env::temp_dir().join(format!(
"piper_integ_dict2_{}_{}.json",
std::process::id(),
id
));
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(content.as_bytes()).unwrap();
f.flush().unwrap();
path
}
#[cfg(feature = "japanese")]
#[test]
fn test_custom_dict_applied_before_phonemization_ja() {
use piper_plus::phonemize::Phonemizer;
use piper_plus::phonemize::japanese::JapanesePhonemizer;
let json = r#"{"version":"1.0","entries":{"テスト":"テ ス ト"}}"#;
let path = write_temp_json(json);
let mut dict = CustomDictionary::new();
dict.load_dictionary(&path).unwrap();
let replaced = dict.apply_to_text("テストです");
assert_eq!(replaced, "テ ス トです");
#[cfg(feature = "naist-jdic")]
{
let mut phonemizer =
JapanesePhonemizer::new_bundled().expect("Failed to create JapanesePhonemizer");
phonemizer.set_dictionary(dict);
let (tokens, prosody) = phonemizer
.phonemize_with_prosody("テストです")
.expect("phonemization should succeed");
assert_eq!(tokens.first().map(|s| s.as_str()), Some("^"));
assert!(
tokens.last().map(|s| s.as_str()) == Some("$")
|| tokens.last().map(|s| s.as_str()) == Some("?"),
"expected sentence-end marker, got {:?}",
tokens.last()
);
assert_eq!(tokens.len(), prosody.len());
}
}
#[test]
fn test_custom_dict_applied_before_phonemization_en() {
let json = r#"{"version":"1.0","entries":{"hello":"world"}}"#;
let path = write_temp_json(json);
let mut dict = CustomDictionary::new();
dict.load_dictionary(&path).unwrap();
let result = dict.apply_to_text("hello there");
assert_eq!(result, "world there");
}
#[test]
fn test_custom_dict_multiple_files() {
let json1 = r#"{"version":"1.0","entries":{"API":"エーピーアイ"}}"#;
let json2 = r#"{"version":"1.0","entries":{"GPU":"ジーピーユー"}}"#;
let path1 = write_temp_json(json1);
let path2 = write_temp_json(json2);
let mut dict = CustomDictionary::new();
dict.load_dictionary(&path1).unwrap();
dict.load_dictionary(&path2).unwrap();
assert_eq!(dict.get_pronunciation("api"), Some("エーピーアイ"));
assert_eq!(dict.get_pronunciation("gpu"), Some("ジーピーユー"));
let result = dict.apply_to_text("API and GPU");
assert_eq!(result, "エーピーアイ and ジーピーユー");
}
#[test]
fn test_custom_dict_priority_override() {
let json_low =
r#"{"version":"2.0","entries":{"API":{"pronunciation":"エーピーアイ低","priority":3}}}"#;
let json_high =
r#"{"version":"2.0","entries":{"API":{"pronunciation":"エーピーアイ高","priority":8}}}"#;
let path_low = write_temp_json(json_low);
let path_high = write_temp_json(json_high);
let mut dict = CustomDictionary::new();
dict.load_dictionary(&path_low).unwrap();
dict.load_dictionary(&path_high).unwrap();
assert_eq!(dict.get_pronunciation("api"), Some("エーピーアイ高"));
let mut dict2 = CustomDictionary::new();
dict2.load_dictionary(&path_high).unwrap();
dict2.load_dictionary(&path_low).unwrap();
assert_eq!(dict2.get_pronunciation("api"), Some("エーピーアイ高"));
}
#[test]
fn test_custom_dict_case_sensitivity() {
let mut dict = CustomDictionary::new();
dict.add_word("PyTorch", "パイトーチ", 5);
assert_eq!(dict.get_pronunciation("PyTorch"), Some("パイトーチ"));
assert_eq!(dict.get_pronunciation("pytorch"), None);
assert_eq!(dict.get_pronunciation("PYTORCH"), None);
dict.add_word("tensorflow", "テンソルフロー", 5);
assert_eq!(dict.get_pronunciation("tensorflow"), Some("テンソルフロー"));
assert_eq!(dict.get_pronunciation("TensorFlow"), Some("テンソルフロー"));
assert_eq!(dict.get_pronunciation("TENSORFLOW"), Some("テンソルフロー"));
dict.add_word("CUDA", "クーダ", 5);
assert_eq!(dict.get_pronunciation("cuda"), Some("クーダ"));
assert_eq!(dict.get_pronunciation("CUDA"), Some("クーダ"));
assert_eq!(dict.get_pronunciation("Cuda"), Some("クーダ"));
let result = dict.apply_to_text("PyTorch and pytorch");
assert_eq!(result, "パイトーチ and pytorch");
}
#[test]
fn test_custom_dict_empty_file_graceful() {
let path_empty = write_temp_json("");
let mut dict = CustomDictionary::new();
let result = dict.load_dictionary(&path_empty);
assert!(
result.is_err(),
"loading an empty file should return an error"
);
let path_bad = write_temp_json("this is not json");
let result2 = dict.load_dictionary(&path_bad);
assert!(
result2.is_err(),
"loading invalid JSON should return an error"
);
let result3 = dict.load_dictionary(std::path::Path::new("/no/such/file/dict.json"));
assert!(
result3.is_err(),
"loading a nonexistent file should return an error"
);
dict.add_word("test", "テスト", 5);
assert_eq!(dict.get_pronunciation("test"), Some("テスト"));
}
#[test]
fn test_custom_dict_japanese_word_boundary() {
let mut dict = CustomDictionary::new();
dict.add_word("AI", "エーアイ", 5);
let result = dict.apply_to_text("AI技術");
assert_eq!(result, "エーアイ技術");
dict.add_word("人工知能", "ジンコウチノウ", 5);
let result2 = dict.apply_to_text("人工知能とAI技術");
assert_eq!(result2, "ジンコウチノウとエーアイ技術");
}
#[test]
fn test_custom_dict_no_partial_match_english() {
let mut dict = CustomDictionary::new();
dict.add_word("API", "エーピーアイ", 5);
let result = dict.apply_to_text("rapid API call");
assert_eq!(result, "rapid エーピーアイ call");
let result2 = dict.apply_to_text("rapid development");
assert_eq!(result2, "rapid development");
let result3 = dict.apply_to_text("myAPIkey");
assert_eq!(result3, "myAPIkey");
let result4 = dict.apply_to_text("Use (API) here");
assert_eq!(result4, "Use (エーピーアイ) here");
}