#![cfg(feature = "persistent-artrie")]
use libdictenstein::persistent_artrie_char::{PersistentARTrieChar, PersistentARTrieCharZipper};
use libdictenstein::zipper::DictZipper;
use libdictenstein::{DictionaryNode, MappedDictionary};
#[test]
fn test_basic_unicode_insertion() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("cafΓ©");
trie.insert("naΓ―ve");
trie.insert("rΓ©sumΓ©");
assert!(trie.contains("cafΓ©"));
assert!(trie.contains("naΓ―ve"));
assert!(trie.contains("rΓ©sumΓ©"));
assert!(!trie.contains("cafe")); assert_eq!(trie.len(), 3);
}
#[test]
fn test_cjk_characters() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("δΈζ");
trie.insert("δ½ ε₯½");
trie.insert("δΈη");
trie.insert("ζ₯ζ¬θͺ");
trie.insert("γγγ«γ‘γ―");
trie.insert("γγγγ¨γ");
trie.insert("νκ΅μ΄");
trie.insert("μλ
νμΈμ");
assert!(trie.contains("δΈζ"));
assert!(trie.contains("γγγ«γ‘γ―"));
assert!(trie.contains("νκ΅μ΄"));
assert!(!trie.contains("δΈ")); assert_eq!(trie.len(), 8);
}
#[test]
fn test_emoji_handling() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("π");
trie.insert("π");
trie.insert("β€οΈ");
trie.insert("π¨βπ©βπ§"); trie.insert("π³οΈβπ");
trie.insert("Hello π!");
trie.insert("I β€οΈ Rust");
assert!(trie.contains("π"));
assert!(trie.contains("Hello π!"));
assert!(!trie.contains("π")); assert_eq!(trie.len(), 7);
}
#[test]
fn test_combining_characters() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
let precomposed = "Γ©"; let decomposed = "Γ©";
trie.insert(precomposed);
trie.insert(decomposed);
assert!(trie.contains(precomposed));
assert!(trie.contains(decomposed));
}
#[test]
fn test_rtl_text() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("Ω
Ψ±ΨΨ¨Ψ§");
trie.insert("Ψ§ΩΨΉΨ§ΩΩ
");
trie.insert("Χ©ΧΧΧ");
trie.insert("Χ’ΧΧΧ");
assert!(trie.contains("Ω
Ψ±ΨΨ¨Ψ§"));
assert!(trie.contains("Χ©ΧΧΧ"));
assert_eq!(trie.len(), 4);
}
#[test]
fn test_mixed_scripts() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("CafΓ© δΈζ");
trie.insert("TΕkyΕ ζ±δΊ¬");
trie.insert("MΓΌnchen MΓΌnchen");
trie.insert("SΓ£o Paulo");
assert!(trie.contains("CafΓ© δΈζ"));
assert!(trie.contains("TΕkyΕ ζ±δΊ¬"));
assert_eq!(trie.len(), 4);
}
#[test]
fn test_special_unicode_categories() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("βββ«β");
trie.insert("βxβy");
trie.insert("$β¬Β£Β₯βΏ");
trie.insert("β©βͺβ«β¬");
trie.insert("β‘βοΈπ§");
assert!(trie.contains("βββ«β"));
assert!(trie.contains("$β¬Β£Β₯βΏ"));
assert_eq!(trie.len(), 5);
}
#[test]
fn test_unicode_keys_with_values() {
let trie: PersistentARTrieChar<i32> = PersistentARTrieChar::new();
trie.insert_with_value("cafΓ©", 1);
trie.insert_with_value("δΈζ", 2);
trie.insert_with_value("π", 3);
assert_eq!(trie.get_value("cafΓ©"), Some(1));
assert_eq!(trie.get_value("δΈζ"), Some(2));
assert_eq!(trie.get_value("π"), Some(3));
assert_eq!(trie.get_value("notfound"), None);
}
#[test]
fn test_unicode_keys_with_string_values() {
let trie: PersistentARTrieChar<String> = PersistentARTrieChar::new();
trie.insert_with_value("hello", "greeting".to_string());
trie.insert_with_value("δΈη", "world".to_string());
trie.insert_with_value("cafΓ©", "coffee place".to_string());
assert_eq!(trie.get_value("hello"), Some("greeting".to_string()));
assert_eq!(trie.get_value("δΈη"), Some("world".to_string()));
assert_eq!(trie.get_value("cafΓ©"), Some("coffee place".to_string()));
}
#[test]
fn test_zipper_unicode_navigation() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("cafΓ©");
trie.insert("cat");
let zipper = PersistentARTrieCharZipper::new(&trie);
let z = zipper
.descend('c')
.and_then(|z| z.descend('a'))
.and_then(|z| z.descend('f'))
.and_then(|z| z.descend('Γ©'));
assert!(z.is_some());
let z = z.unwrap();
assert!(z.is_final());
let path = z.path();
assert_eq!(path, vec!['c', 'a', 'f', 'Γ©']);
}
#[test]
fn test_zipper_cjk_navigation() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("δΈζ");
let zipper = PersistentARTrieCharZipper::new(&trie);
let z = zipper.descend('δΈ').and_then(|z| z.descend('ζ'));
assert!(z.is_some());
let z = z.unwrap();
assert!(z.is_final());
assert_eq!(z.path(), vec!['δΈ', 'ζ']);
}
#[test]
fn test_zipper_children_with_unicode() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("ab");
trie.insert("aΓ©");
trie.insert("aδΈ");
trie.insert("aπ");
let zipper = PersistentARTrieCharZipper::new(&trie);
let a_zipper = zipper.descend('a').expect("should have 'a'");
let children: Vec<char> = a_zipper.children().map(|(c, _)| c).collect();
assert!(children.contains(&'b'));
assert!(children.contains(&'Γ©'));
assert!(children.contains(&'δΈ'));
assert!(children.contains(&'π'));
assert_eq!(children.len(), 4);
}
#[test]
fn test_dictionary_trait_unicode() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("hello");
trie.insert("δΈη");
trie.insert("cafΓ©");
assert!(trie.contains("hello"));
assert!(trie.contains("δΈη"));
assert_eq!(trie.len(), 3);
let root = trie.root();
assert!(!root.is_final()); }
#[test]
fn test_dictionary_node_trait_unicode() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("abc");
trie.insert("aΓ©c");
let root = trie.root();
let a_node = root.transition('a');
assert!(a_node.is_some());
let a_node = a_node.unwrap();
let edges: Vec<char> = a_node.edges().map(|(c, _)| c).collect();
assert!(edges.contains(&'b'));
assert!(edges.contains(&'Γ©'));
}
#[test]
fn test_from_iterator_unicode() {
let terms = vec!["cafΓ©", "naΓ―ve", "δΈζ", "π"];
let trie: PersistentARTrieChar<()> = terms.into_iter().collect();
assert_eq!(trie.len(), 4);
assert!(trie.contains("cafΓ©"));
assert!(trie.contains("δΈζ"));
assert!(trie.contains("π"));
}
#[test]
fn test_from_iterator_owned_strings() {
let terms: Vec<String> = vec![
"rΓ©sumΓ©".to_string(),
"ζ±δΊ¬".to_string(),
"πππ".to_string(),
];
let trie: PersistentARTrieChar<()> = terms.into_iter().collect();
assert_eq!(trie.len(), 3);
assert!(trie.contains("rΓ©sumΓ©"));
assert!(trie.contains("ζ±δΊ¬"));
}
#[test]
fn test_iterator_unicode() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("aaa");
trie.insert("cafΓ©");
trie.insert("δΈζ");
let terms: Vec<String> = trie.iter().collect();
assert_eq!(terms.len(), 3);
assert!(terms.contains(&"aaa".to_string()));
assert!(terms.contains(&"cafΓ©".to_string()));
assert!(terms.contains(&"δΈζ".to_string()));
}
#[test]
fn test_unicode_prefix_sharing() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("δΈζ");
trie.insert("δΈε½");
trie.insert("δΈεΏ");
assert_eq!(trie.len(), 3);
assert!(trie.contains("δΈζ"));
assert!(trie.contains("δΈε½"));
assert!(trie.contains("δΈεΏ"));
}
#[test]
fn test_emoji_prefix_sharing() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("ππ");
trie.insert("ππ");
trie.insert("ππ");
assert_eq!(trie.len(), 3);
let zipper = PersistentARTrieCharZipper::new(&trie);
let party = zipper.descend('π').expect("should have party emoji");
let children: Vec<char> = party.children().map(|(c, _)| c).collect();
assert_eq!(children.len(), 3);
}
#[test]
fn test_empty_string() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("");
assert!(trie.contains(""));
assert_eq!(trie.len(), 1);
}
#[test]
fn test_single_character_unicode() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("a");
trie.insert("Γ©");
trie.insert("δΈ");
trie.insert("π");
assert_eq!(trie.len(), 4);
assert!(trie.contains("a"));
assert!(trie.contains("Γ©"));
assert!(trie.contains("δΈ"));
assert!(trie.contains("π"));
}
#[test]
fn test_long_unicode_string() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
let long_unicode = "θΏζ―δΈδΈͺιεΈΈιΏηδΈζε符串οΌε
ε«εΎε€ε符";
trie.insert(long_unicode);
assert!(trie.contains(long_unicode));
assert_eq!(trie.len(), 1);
}
#[test]
fn test_duplicate_unicode_insertion() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
assert!(trie.insert("cafΓ©").expect("insert failed"));
assert!(!trie.insert("cafΓ©").expect("insert failed")); assert_eq!(trie.len(), 1);
}
#[test]
fn test_concurrent_unicode_reads() {
use std::sync::Arc;
use std::thread;
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
let terms = vec!["cafΓ©", "δΈζ", "π", "ζ₯ζ¬θͺ", "νκ΅μ΄"];
for term in &terms {
trie.insert(term);
}
let trie: Arc<PersistentARTrieChar<()>> = Arc::new(trie);
let handles: Vec<_> = (0..4)
.map(|_| {
let trie_clone: Arc<PersistentARTrieChar<()>> = Arc::clone(&trie);
let terms_clone = terms.clone();
thread::spawn(move || {
for term in &terms_clone {
assert!(trie_clone.contains(term));
}
})
})
.collect();
for handle in handles {
handle.join().expect("thread join");
}
}
#[test]
fn test_mapped_dictionary_unicode() {
let trie: PersistentARTrieChar<i32> = PersistentARTrieChar::new();
trie.insert_with_value("one", 1);
trie.insert_with_value("δΈ", 1); trie.insert_with_value("νλ", 1);
assert_eq!(trie.get_value("one"), Some(1));
assert_eq!(trie.get_value("δΈ"), Some(1));
assert_eq!(trie.get_value("νλ"), Some(1));
}
#[test]
fn test_supplementary_plane_characters() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("π");
trie.insert("πππ");
trie.insert("π");
trie.insert("ππ½");
assert!(trie.contains("π"));
assert!(trie.contains("πππ"));
assert!(trie.contains("π"));
assert!(trie.contains("ππ½"));
assert_eq!(trie.len(), 4);
}
#[test]
fn test_zero_width_characters() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
let with_zwj = "a\u{200D}b"; let without_zwj = "ab";
trie.insert(with_zwj);
trie.insert(without_zwj);
assert!(trie.contains(with_zwj));
assert!(trie.contains(without_zwj));
assert!(trie.len() >= 1); }
#[test]
fn test_unicode_whitespace() {
let trie: PersistentARTrieChar<()> = PersistentARTrieChar::new();
trie.insert("hello world"); trie.insert("hello\u{00A0}world"); trie.insert("hello\u{2003}world"); trie.insert("hello\u{3000}world");
assert!(trie.contains("hello world"));
assert!(trie.contains("hello\u{00A0}world"));
assert!(trie.contains("hello\u{2003}world"));
assert!(trie.contains("hello\u{3000}world"));
assert_eq!(trie.len(), 4);
}
#[test]
fn test_deep_trie_no_stack_overflow() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
let path = temp_dir.path().join("deep_trie_char");
let num_strings = 10usize;
let string_length = 500usize;
{
let trie = PersistentARTrieChar::<u64>::create(&path).expect("Failed to create trie");
for i in 0..num_strings {
let long_key: String = (0..string_length)
.map(|j| {
let ch = (b'a' + ((i + j) % 26) as u8) as char;
ch
})
.collect();
trie.upsert(&long_key, i as u64).expect("Failed to insert");
}
println!("=== Before checkpoint ===");
println!("Trie len: {}", trie.len());
for i in 0..num_strings {
let long_key: String = (0..string_length)
.map(|j| {
let ch = (b'a' + ((i + j) % 26) as u8) as char;
ch
})
.collect();
let present = trie.contains(&long_key);
println!("String {} present: {}", i, present);
}
trie.checkpoint().expect("Failed to checkpoint");
}
let reopened = PersistentARTrieChar::<u64>::open(&path)
.expect("Failed to reopen trie - possible stack overflow in recursive loading");
println!("=== After reopen ===");
println!("Reopened len: {}", reopened.len());
for i in 0..num_strings {
let long_key: String = (0..string_length)
.map(|j| {
let ch = (b'a' + ((i + j) % 26) as u8) as char;
ch
})
.collect();
let present = reopened.contains(&long_key);
println!(
"String {} present after reopen: {} (first char: '{}')",
i,
present,
long_key.chars().next().unwrap()
);
if i == 9 && !present {
println!("DEBUG: Tracing string 9 lookup failure");
let value = reopened.get(&long_key);
println!("DEBUG: get() for string 9 returned: {:?}", value.is_some());
}
}
assert_eq!(
reopened.len(),
num_strings,
"All strings should be present after reopen"
);
for i in 0..num_strings {
let long_key: String = (0..string_length)
.map(|j| {
let ch = (b'a' + ((i + j) % 26) as u8) as char;
ch
})
.collect();
assert!(
reopened.contains(&long_key),
"String {} should be present after reopen",
i
);
if let Some(value) = reopened.get(&long_key) {
assert_eq!(value, i as u64, "Value for string {} should match", i);
}
}
}
#[test]
fn test_deep_unicode_trie_no_stack_overflow() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
let path = temp_dir.path().join("deep_unicode_trie");
let num_strings = 5usize;
let string_length = 300usize;
{
let trie = PersistentARTrieChar::<u64>::create(&path).expect("Failed to create trie");
for i in 0..num_strings {
let long_key: String = (0..string_length)
.map(|j| {
let codepoint = 0x4E00 + ((i * 17 + j * 13) % 0x51FF) as u32;
char::from_u32(codepoint).unwrap_or('δΈ')
})
.collect();
trie.upsert(&long_key, i as u64).expect("Failed to insert");
}
trie.checkpoint().expect("Failed to checkpoint");
}
let reopened = PersistentARTrieChar::<u64>::open(&path).expect("Failed to reopen Unicode trie");
assert_eq!(reopened.len(), num_strings);
}