use std::path::Path;
use std::sync::Arc;
use parking_lot::RwLock;
use thiserror::Error;
pub use libdictenstein::persistent_artrie::dict_impl::DurabilityPolicy;
pub use libdictenstein::persistent_artrie::recovery::RecoveryReport;
pub use libdictenstein::persistent_vocab_artrie::{
PersistentVocabARTrie, SharedVocabARTrie, VocabSyncHandle,
};
pub const FIRST_VALID_INDEX: u64 = 1;
#[derive(Error, Debug)]
pub enum VocabularyError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Trie error: {0}")]
Trie(String),
#[error("Vocabulary version mismatch: expected {expected}, found {found}")]
VersionMismatch {
expected: u64,
found: u64,
},
#[error("Persistent ARTrie error: {0}")]
PersistentARTrie(#[from] libdictenstein::persistent_artrie::error::PersistentARTrieError),
}
pub type VocabularyResult<T> = Result<T, VocabularyError>;
pub fn create_vocabulary(path: &Path) -> VocabularyResult<SharedVocabARTrie> {
let trie = PersistentVocabARTrie::create_with_start_index(path, FIRST_VALID_INDEX)?;
trie.enable_slot_tracking();
Ok(Arc::new(RwLock::new(trie)))
}
pub fn create_vocabulary_with_bloom(
path: &Path,
bloom_capacity: usize,
) -> VocabularyResult<SharedVocabARTrie> {
let _ = bloom_capacity;
let trie = PersistentVocabARTrie::create_with_start_index(path, FIRST_VALID_INDEX)?;
trie.enable_slot_tracking();
Ok(Arc::new(RwLock::new(trie)))
}
pub fn open_vocabulary(path: &Path) -> VocabularyResult<SharedVocabARTrie> {
let (trie, _report) = PersistentVocabARTrie::open_with_recovery(path)?;
trie.enable_slot_tracking();
Ok(Arc::new(RwLock::new(trie)))
}
pub fn open_vocabulary_with_recovery(
path: &Path,
) -> VocabularyResult<(SharedVocabARTrie, RecoveryReport)> {
let (trie, report) = PersistentVocabARTrie::open_with_recovery(path)?;
trie.enable_slot_tracking();
Ok((Arc::new(RwLock::new(trie)), report))
}
pub fn open_or_create_vocabulary(path: &Path) -> VocabularyResult<SharedVocabARTrie> {
if path.exists() {
open_vocabulary(path)
} else {
create_vocabulary(path)
}
}
pub fn open_or_create_vocabulary_with_bloom(
path: &Path,
bloom_capacity: usize,
) -> VocabularyResult<SharedVocabARTrie> {
if path.exists() {
open_vocabulary(path)
} else {
create_vocabulary_with_bloom(path, bloom_capacity)
}
}
pub fn create_concurrent_vocabulary_lockfree(
vocab: PersistentVocabARTrie,
) -> Arc<PersistentVocabARTrie> {
Arc::new(vocab)
}
pub fn open_or_create_concurrent_vocabulary_lockfree(
path: &Path,
) -> VocabularyResult<Arc<PersistentVocabARTrie>> {
let trie = if path.exists() {
let (trie, _report) = PersistentVocabARTrie::open_with_recovery(path)?;
trie
} else {
PersistentVocabARTrie::create_with_start_index(path, FIRST_VALID_INDEX)?
};
Ok(Arc::new(trie))
}
pub fn open_or_create_concurrent_vocabulary_lockfree_with_capacity(
path: &Path,
_estimated_terms: usize,
) -> VocabularyResult<Arc<PersistentVocabARTrie>> {
open_or_create_concurrent_vocabulary_lockfree(path)
}
pub fn open_or_create_concurrent_vocabulary_lockfree_with_bloom(
path: &Path,
_bloom_capacity: usize,
) -> VocabularyResult<Arc<PersistentVocabARTrie>> {
open_or_create_concurrent_vocabulary_lockfree(path)
}
pub type SharedConcurrentVocab = Arc<PersistentVocabARTrie>;
#[inline]
pub fn encode_varint(mut value: u64, buf: &mut Vec<u8>) {
loop {
let byte = (value & 0x7F) as u8;
value >>= 7;
if value == 0 {
buf.push(byte);
break;
} else {
buf.push(byte | 0x80);
}
}
}
#[inline]
pub fn decode_varint(bytes: &[u8]) -> Option<(u64, usize)> {
let mut result: u64 = 0;
let mut shift = 0;
for (i, &byte) in bytes.iter().enumerate() {
if shift >= 64 {
return None; }
result |= ((byte & 0x7F) as u64) << shift;
if byte & 0x80 == 0 {
return Some((result, i + 1));
}
shift += 7;
}
None }
pub fn encode_ngram_key(words: &[&str], vocab: &SharedVocabARTrie) -> String {
let mut buf = Vec::with_capacity(words.len() * 2); let guard = vocab.write();
for word in words {
let index = guard
.insert(word)
.expect("vocabulary insert: persistent ARTrie I/O failed");
encode_varint(index, &mut buf);
}
buf.into_iter().map(|b| char::from(b)).collect()
}
pub fn try_encode_ngram_key(words: &[&str], vocab: &SharedVocabARTrie) -> VocabularyResult<String> {
let mut buf = Vec::with_capacity(words.len() * 2);
let guard = vocab.write();
for word in words {
let index = guard.insert(word)?;
encode_varint(index, &mut buf);
}
Ok(buf.into_iter().map(|b| char::from(b)).collect())
}
pub fn encode_ngram_key_batch(words: &[&str], vocab: &SharedVocabARTrie) -> String {
if words.is_empty() {
return String::new();
}
let indices = vocab
.write()
.insert_batch(words)
.expect("vocabulary batch insert: persistent ARTrie I/O failed");
let mut buf = Vec::with_capacity(indices.len() * 2);
for index in indices {
encode_varint(index, &mut buf);
}
buf.into_iter().map(|b| char::from(b)).collect()
}
pub fn try_encode_ngram_key_batch(
words: &[&str],
vocab: &SharedVocabARTrie,
) -> VocabularyResult<String> {
if words.is_empty() {
return Ok(String::new());
}
let indices = vocab.write().insert_batch(words)?;
let mut buf = Vec::with_capacity(indices.len() * 2);
for index in indices {
encode_varint(index, &mut buf);
}
Ok(buf.into_iter().map(|b| char::from(b)).collect())
}
pub fn encode_ngram_key_lockfree(words: &[&str], vocab: &PersistentVocabARTrie) -> String {
if words.is_empty() {
return String::new();
}
let indices = vocab
.insert_batch(words)
.expect("vocab insert_batch failed");
let mut buf = Vec::with_capacity(indices.len() * 2);
for index in indices {
encode_varint(index, &mut buf);
}
buf.into_iter().map(|b| char::from(b)).collect()
}
pub fn try_encode_ngram_key_lockfree(
words: &[&str],
vocab: &PersistentVocabARTrie,
) -> VocabularyResult<String> {
Ok(encode_ngram_key_lockfree(words, vocab))
}
pub fn encode_ngram_key_with_lockfree_vocab(
words: &[&str],
vocab: &PersistentVocabARTrie,
) -> String {
if words.is_empty() {
return String::new();
}
let indices = vocab
.insert_batch(words)
.expect("vocab insert_batch failed");
let mut buf = Vec::with_capacity(indices.len() * 2);
for index in indices {
encode_varint(index, &mut buf);
}
buf.into_iter().map(|b| char::from(b)).collect()
}
use std::cell::RefCell;
thread_local! {
static ENCODE_BUF: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(64));
}
pub fn with_encoded_ngram_key_lockfree<R>(
words: &[&str],
vocab: &PersistentVocabARTrie,
f: impl FnOnce(&[u8]) -> R,
) -> R {
ENCODE_BUF.with(|buf| {
let mut buf = buf.borrow_mut();
buf.clear();
let indices = vocab
.insert_batch(words)
.expect("vocab insert_batch failed");
for index in indices {
encode_varint(index, &mut *buf);
}
f(&buf)
})
}
pub fn encode_ngram_key_lockfree_bytes(words: &[&str], vocab: &PersistentVocabARTrie) -> Vec<u8> {
with_encoded_ngram_key_lockfree(words, vocab, |key| key.to_vec())
}
pub fn encode_ngram_key_existing(words: &[&str], vocab: &SharedVocabARTrie) -> Option<String> {
let mut buf = Vec::with_capacity(words.len() * 2);
let guard = vocab.read();
for word in words {
let index = guard.get_index(word)?;
encode_varint(index, &mut buf);
}
Some(buf.into_iter().map(|b| char::from(b)).collect())
}
pub fn encode_ngram_key_bytes(words: &[&str], vocab: &SharedVocabARTrie) -> Vec<u8> {
let mut buf = Vec::with_capacity(words.len() * 2);
let guard = vocab.write();
for word in words {
let index = guard
.insert(word)
.expect("vocabulary insert: persistent ARTrie I/O failed");
encode_varint(index, &mut buf);
}
buf
}
pub fn encode_ngram_key_existing_bytes(
words: &[&str],
vocab: &SharedVocabARTrie,
) -> Option<Vec<u8>> {
let mut buf = Vec::with_capacity(words.len() * 2);
let guard = vocab.read();
for word in words {
let index = guard.get_index(word)?;
encode_varint(index, &mut buf);
}
Some(buf)
}
pub fn decode_ngram_key(key: &str) -> Vec<u64> {
let bytes: Vec<u8> = key.chars().map(|c| c as u8).collect();
let mut indices = Vec::new();
let mut offset = 0;
while offset < bytes.len() {
if let Some((index, consumed)) = decode_varint(&bytes[offset..]) {
indices.push(index);
offset += consumed;
} else {
break;
}
}
indices
}
#[inline]
pub fn ngram_order(key: &str) -> u8 {
decode_ngram_key(key).len() as u8
}
pub fn encode_indices_to_key(indices: &[u64]) -> String {
let mut buf = Vec::with_capacity(indices.len() * 2);
for &index in indices {
encode_varint(index, &mut buf);
}
buf.into_iter().map(|b| char::from(b)).collect()
}
#[inline]
pub fn decode_ngram_key_bytes(key: &[u8]) -> Vec<u64> {
let mut indices = Vec::new();
let mut offset = 0;
while offset < key.len() {
if let Some((index, consumed)) = decode_varint(&key[offset..]) {
indices.push(index);
offset += consumed;
} else {
break;
}
}
indices
}
pub fn encode_indices_to_key_bytes(indices: &[u64]) -> Vec<u8> {
let mut buf = Vec::with_capacity(indices.len() * 2);
for &index in indices {
encode_varint(index, &mut buf);
}
buf
}
#[inline]
pub fn ngram_order_bytes(key: &[u8]) -> u8 {
let mut count: u8 = 0;
let mut offset = 0;
while offset < key.len() {
if let Some((_index, consumed)) = decode_varint(&key[offset..]) {
count += 1;
offset += consumed;
} else {
break;
}
}
count
}
pub fn try_encode_ngram_key_lockfree_bytes(
words: &[&str],
vocab: &PersistentVocabARTrie,
) -> VocabularyResult<Vec<u8>> {
Ok(encode_ngram_key_lockfree_bytes(words, vocab))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_temp_vocab() -> (TempDir, SharedVocabARTrie) {
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let vocab = create_vocabulary(&path).expect("Failed to create vocab");
(dir, vocab)
}
#[test]
fn test_insert_new_word() {
let (_dir, vocab) = create_temp_vocab();
let idx1 = vocab.write().insert("the").expect("test insert");
let idx2 = vocab.write().insert("quick").expect("test insert");
let idx3 = vocab.write().insert("brown").expect("test insert");
assert_ne!(idx1, idx2);
assert_ne!(idx2, idx3);
assert_ne!(idx1, idx3);
assert_eq!(idx1, 1);
assert_eq!(idx2, 2);
assert_eq!(idx3, 3);
}
#[test]
fn test_insert_existing_word() {
let (_dir, vocab) = create_temp_vocab();
let idx1 = vocab.write().insert("hello").expect("test insert");
let idx2 = vocab.write().insert("hello").expect("test insert");
assert_eq!(idx1, idx2);
assert_eq!(vocab.read().len(), 1);
}
#[test]
fn test_get_existing() {
let (_dir, vocab) = create_temp_vocab();
assert!(vocab.read().get_index("nonexistent").is_none());
let idx1 = vocab.write().insert("test").expect("test insert");
let idx2 = vocab.read().get_index("test");
assert_eq!(idx2, Some(idx1));
}
#[test]
fn test_contains() {
let (_dir, vocab) = create_temp_vocab();
assert!(!vocab.read().contains("word"));
vocab.write().insert("word").expect("test insert");
assert!(vocab.read().contains("word"));
}
#[test]
fn test_varint_encoding() {
let test_values: [u64; 10] = [0, 1, 127, 128, 255, 256, 16383, 16384, 2097151, u64::MAX];
for &value in &test_values {
let mut buf = Vec::new();
encode_varint(value, &mut buf);
let (decoded, len) = decode_varint(&buf).expect("Should decode");
assert_eq!(decoded, value, "Value {} should roundtrip", value);
assert_eq!(len, buf.len(), "Should consume all bytes for {}", value);
}
}
#[test]
fn test_varint_encoding_sizes() {
let mut buf = Vec::new();
buf.clear();
encode_varint(0, &mut buf);
assert_eq!(buf.len(), 1);
buf.clear();
encode_varint(127, &mut buf);
assert_eq!(buf.len(), 1);
buf.clear();
encode_varint(128, &mut buf);
assert_eq!(buf.len(), 2);
buf.clear();
encode_varint(16383, &mut buf);
assert_eq!(buf.len(), 2);
buf.clear();
encode_varint(16384, &mut buf);
assert_eq!(buf.len(), 3);
}
#[test]
fn test_encode_ngram_key() {
let (_dir, vocab) = create_temp_vocab();
let key = encode_ngram_key(&["the", "quick", "brown"], &vocab);
let indices = decode_ngram_key(&key);
assert_eq!(indices.len(), 3);
assert_eq!(indices, vec![1, 2, 3]); }
#[test]
fn test_encode_ngram_key_batch() {
let (_dir, vocab) = create_temp_vocab();
let words = ["the", "quick", "brown"];
let key_batch = encode_ngram_key_batch(&words, &vocab);
let indices = decode_ngram_key(&key_batch);
assert_eq!(indices.len(), 3);
assert_eq!(indices, vec![1, 2, 3]);
assert_eq!(vocab.read().len(), 3);
let key_batch2 = encode_ngram_key_batch(&words, &vocab);
assert_eq!(key_batch, key_batch2);
assert_eq!(vocab.read().len(), 3); }
#[test]
fn test_encode_ngram_key_batch_empty() {
let (_dir, vocab) = create_temp_vocab();
let key = encode_ngram_key_batch(&[], &vocab);
assert!(key.is_empty());
assert_eq!(vocab.read().len(), 0);
}
#[test]
fn test_encode_ngram_key_batch_mixed() {
let (_dir, vocab) = create_temp_vocab();
vocab.write().insert("the").expect("test insert");
vocab.write().insert("quick").expect("test insert");
let words = ["the", "quick", "brown", "fox"];
let key = encode_ngram_key_batch(&words, &vocab);
let indices = decode_ngram_key(&key);
assert_eq!(indices, vec![1, 2, 3, 4]);
assert_eq!(vocab.read().len(), 4);
}
#[test]
fn test_encode_ngram_key_with_large_indices() {
let (_dir, vocab) = create_temp_vocab();
for i in 0..200 {
vocab
.write()
.insert(&format!("word{}", i))
.expect("insert word");
}
let key = encode_ngram_key(&["word0", "word126", "word127", "word199"], &vocab);
let indices = decode_ngram_key(&key);
assert_eq!(indices, vec![1, 127, 128, 200]);
assert_eq!(key.chars().count(), 6);
}
#[test]
fn test_encode_decode_roundtrip() {
let (_dir, vocab) = create_temp_vocab();
let words = ["the", "quick", "brown", "fox"];
let key = encode_ngram_key(&words, &vocab);
let indices = decode_ngram_key(&key);
assert_eq!(indices.len(), words.len());
let decoded: Vec<_> = indices
.iter()
.map(|&idx| vocab.read().get_term(idx).expect("index should exist"))
.collect();
assert_eq!(decoded, words);
}
#[test]
fn test_pipe_in_token_no_longer_corrupts() {
let (_dir, vocab) = create_temp_vocab();
let tokens = ["foo|bar", "baz"];
let key = encode_ngram_key(&tokens, &vocab);
let indices = decode_ngram_key(&key);
assert_eq!(indices.len(), 2);
let decoded: Vec<_> = indices
.iter()
.map(|&idx| vocab.read().get_term(idx).expect("index should exist"))
.collect();
assert_eq!(decoded, tokens);
}
#[test]
fn test_get_term_reverse_lookup() {
let (_dir, vocab) = create_temp_vocab();
let idx1 = vocab.write().insert("hello").expect("test insert");
let idx2 = vocab.write().insert("world").expect("test insert");
let idx3 = vocab.write().insert("rust").expect("test insert");
assert_eq!(vocab.read().get_term(idx1), Some("hello".to_string()));
assert_eq!(vocab.read().get_term(idx2), Some("world".to_string()));
assert_eq!(vocab.read().get_term(idx3), Some("rust".to_string()));
assert_eq!(vocab.read().get_term(0), None); assert_eq!(vocab.read().get_term(999), None); }
#[test]
fn test_ngram_order() {
let (_dir, vocab) = create_temp_vocab();
let unigram = encode_ngram_key(&["word"], &vocab);
let bigram = encode_ngram_key(&["the", "quick"], &vocab);
let trigram = encode_ngram_key(&["a", "b", "c"], &vocab);
let fivegram = encode_ngram_key(&["1", "2", "3", "4", "5"], &vocab);
assert_eq!(ngram_order(&unigram), 1);
assert_eq!(ngram_order(&bigram), 2);
assert_eq!(ngram_order(&trigram), 3);
assert_eq!(ngram_order(&fivegram), 5);
}
#[test]
fn test_case_sensitivity() {
let (_dir, vocab) = create_temp_vocab();
let lower = vocab.write().insert("the").expect("test insert");
let upper = vocab.write().insert("The").expect("test insert");
let all_caps = vocab.write().insert("THE").expect("test insert");
assert_ne!(lower, upper);
assert_ne!(upper, all_caps);
assert_ne!(lower, all_caps);
}
#[test]
fn test_punctuation_as_tokens() {
let (_dir, vocab) = create_temp_vocab();
let comma = vocab.write().insert(",").expect("test insert");
let period = vocab.write().insert(".").expect("test insert");
let quote = vocab.write().insert("\"").expect("test insert");
assert_ne!(comma, period);
assert_ne!(period, quote);
assert_eq!(comma, 1);
assert_eq!(period, 2);
assert_eq!(quote, 3);
}
#[test]
fn test_persistence() {
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let idx1;
let idx2;
{
let vocab = create_vocabulary(&path).expect("Failed to create vocab");
idx1 = vocab.write().insert("hello").expect("test insert");
idx2 = vocab.write().insert("world").expect("test insert");
vocab.write().checkpoint().expect("Checkpoint failed");
}
{
let vocab = open_vocabulary(&path).expect("Failed to open vocab");
assert_eq!(vocab.read().len(), 2);
assert_eq!(vocab.read().get_index("hello"), Some(idx1));
assert_eq!(vocab.read().get_index("world"), Some(idx2));
let idx3 = vocab.write().insert("new").expect("test insert");
assert_eq!(idx3, 3); }
}
#[test]
fn test_encode_ngram_key_existing() {
let (_dir, vocab) = create_temp_vocab();
assert!(encode_ngram_key_existing(&["unknown"], &vocab).is_none());
vocab.write().insert("the").expect("test insert");
vocab.write().insert("quick").expect("test insert");
let key = encode_ngram_key_existing(&["the", "quick"], &vocab);
assert!(key.is_some());
let indices = decode_ngram_key(&key.unwrap());
assert_eq!(indices, vec![1, 2]);
assert!(encode_ngram_key_existing(&["the", "unknown"], &vocab).is_none());
}
#[test]
fn test_concurrent_insert() {
use std::sync::Arc;
use std::thread;
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let vocab = Arc::new(create_vocabulary(&path).expect("Failed to create vocab"));
let mut handles = vec![];
for _ in 0..10 {
let vocab = Arc::clone(&vocab);
handles.push(thread::spawn(move || {
vocab.write().insert("shared_word").expect("test insert")
}));
}
let indices: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
let first = indices[0];
for idx in &indices {
assert_eq!(*idx, first);
}
assert_eq!(vocab.read().len(), 1);
}
#[test]
fn test_latin1_encoding_preserves_bytes() {
for byte in 0u8..=255 {
let c = char::from(byte);
assert_eq!(
c as u8, byte,
"Byte {} should round-trip through char",
byte
);
}
}
#[test]
fn test_large_vocabulary() {
let (_dir, vocab) = create_temp_vocab();
for i in 0..1000 {
let idx = vocab
.write()
.insert(&format!("word{}", i))
.expect("test insert");
assert_eq!(idx, (i + 1) as u64);
}
assert_eq!(vocab.read().len(), 1000);
}
#[test]
fn test_start_index() {
let (_dir, vocab) = create_temp_vocab();
assert_eq!(vocab.read().start_index(), 1);
}
#[test]
fn test_is_dirty() {
let (_dir, vocab) = create_temp_vocab();
vocab.write().insert("test").expect("test insert");
assert!(vocab.read().is_dirty());
vocab.write().checkpoint().expect("checkpoint failed");
assert!(!vocab.read().is_dirty());
}
#[test]
fn test_contains_index() {
let (_dir, vocab) = create_temp_vocab();
assert!(!vocab.read().contains_index(0));
vocab.write().insert("test").expect("test insert");
assert!(vocab.read().contains_index(1));
assert!(!vocab.read().contains_index(2)); }
#[test]
fn test_encode_ngram_key_lockfree() {
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let concurrent = open_or_create_concurrent_vocabulary_lockfree(&path)
.expect("Failed to create concurrent vocab");
let key = encode_ngram_key_lockfree(&["the", "quick", "brown"], &concurrent);
let indices = decode_ngram_key(&key);
assert_eq!(indices.len(), 3);
assert_eq!(concurrent.get_index("the"), Some(1));
assert_eq!(concurrent.get_index("quick"), Some(2));
assert_eq!(concurrent.get_index("brown"), Some(3));
}
#[test]
fn test_encode_ngram_key_lockfree_concurrent() {
use std::thread;
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let concurrent = open_or_create_concurrent_vocabulary_lockfree(&path)
.expect("Failed to create concurrent vocab");
let num_threads = 8;
let terms_per_thread = 100;
let handles: Vec<_> = (0..num_threads)
.map(|t| {
let c = Arc::clone(&concurrent);
thread::spawn(move || {
let mut keys = Vec::new();
for i in 0..terms_per_thread {
let words = [
format!("thread{}_word{}", t, i),
format!("thread{}_word{}", t, i + 1),
];
let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
let key = encode_ngram_key_lockfree(&word_refs, &c);
keys.push(key);
}
keys
})
})
.collect();
let all_keys: Vec<Vec<String>> = handles
.into_iter()
.map(|h| h.join().expect("thread complete"))
.collect();
for thread_keys in &all_keys {
for key in thread_keys {
let indices = decode_ngram_key(key);
assert_eq!(indices.len(), 2, "Each n-gram should have 2 indices");
}
}
let expected_vocab_size = num_threads * (terms_per_thread + 1);
assert!(concurrent.next_index() >= expected_vocab_size as u64 + 1);
}
#[test]
fn test_open_or_create_concurrent_vocabulary_lockfree() {
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let concurrent1 = open_or_create_concurrent_vocabulary_lockfree(&path)
.expect("Failed to create concurrent vocab");
concurrent1.insert("hello").expect("insert");
concurrent1.insert("world").expect("insert");
concurrent1.checkpoint().expect("checkpoint failed");
drop(concurrent1);
let concurrent2 = open_or_create_concurrent_vocabulary_lockfree(&path)
.expect("Failed to open concurrent vocab");
assert_eq!(concurrent2.get_index("hello"), Some(1));
assert_eq!(concurrent2.get_index("world"), Some(2));
let idx3 = concurrent2.insert("new").expect("insert");
assert!(
idx3 >= 3,
"New term index should be at least 3, got {}",
idx3
);
}
#[test]
fn test_lockfree_vocab_stats() {
let dir = TempDir::new().expect("Failed to create temp dir");
let path = dir.path().join("vocab.artrie");
let concurrent = open_or_create_concurrent_vocabulary_lockfree(&path)
.expect("Failed to create concurrent vocab");
concurrent.insert("one").expect("insert");
concurrent.insert("two").expect("insert");
concurrent.insert("three").expect("insert");
assert_eq!(concurrent.len(), 3);
assert_eq!(concurrent.next_index(), 4); }
}