use std::sync::Arc;
use pathmap::PathMap;
use super::traits::{LatticeBackend, VocabId};
pub type PathId = u64;
pub trait PathMapSharingBackend: LatticeBackend {
fn share_prefix(&self, prefix: &[u8]) -> Option<Self>
where
Self: Sized;
fn shares_structure_with(&self, other: &Self) -> bool;
}
#[derive(Clone)]
pub struct PathMapBackend {
storage: Arc<PathMap<VocabMetadata>>,
vocab: indexmap::IndexMap<Arc<str>, VocabId, ahash::RandomState>,
vocab_reverse: Vec<Arc<str>>,
}
#[derive(Clone, Debug, Default)]
pub struct VocabMetadata {
pub frequency: u64,
pub pos_tags: Vec<String>,
}
impl PathMapBackend {
pub fn new() -> Self {
Self {
storage: Arc::new(PathMap::new()),
vocab: indexmap::IndexMap::default(),
vocab_reverse: Vec::new(),
}
}
pub fn storage(&self) -> &Arc<PathMap<VocabMetadata>> {
&self.storage
}
}
impl Default for PathMapBackend {
fn default() -> Self {
Self::new()
}
}
impl LatticeBackend for PathMapBackend {
fn intern(&mut self, word: &str) -> VocabId {
if let Some(&id) = self.vocab.get(word) {
return id;
}
let id = self.vocab_reverse.len() as VocabId;
let word_arc: Arc<str> = word.into();
self.vocab.insert(word_arc.clone(), id);
self.vocab_reverse.push(word_arc);
id
}
fn lookup(&self, id: VocabId) -> Option<&str> {
self.vocab_reverse.get(id as usize).map(|s| s.as_ref())
}
fn vocab_size(&self) -> usize {
self.vocab_reverse.len()
}
fn supports_sharing(&self) -> bool {
true
}
fn contains(&self, word: &str) -> bool {
self.vocab.contains_key(word)
}
fn get_id(&self, word: &str) -> Option<VocabId> {
self.vocab.get(word).copied()
}
fn iter(&self) -> impl Iterator<Item = (VocabId, &str)> {
self.vocab_reverse
.iter()
.enumerate()
.map(|(i, s)| (i as VocabId, s.as_ref()))
}
}
impl PathMapSharingBackend for PathMapBackend {
fn share_prefix(&self, prefix: &[u8]) -> Option<Self> {
let prefix_str = std::str::from_utf8(prefix).ok()?;
let mut new_vocab = indexmap::IndexMap::default();
let mut new_vocab_reverse = Vec::new();
for (word, _old_id) in &self.vocab {
if word.starts_with(prefix_str) {
let new_id = new_vocab_reverse.len() as VocabId;
new_vocab.insert(word.clone(), new_id);
new_vocab_reverse.push(word.clone());
}
}
if new_vocab_reverse.is_empty() {
return None;
}
Some(Self {
storage: Arc::clone(&self.storage),
vocab: new_vocab,
vocab_reverse: new_vocab_reverse,
})
}
fn shares_structure_with(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.storage, &other.storage)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pathmap_backend_new() {
let backend = PathMapBackend::new();
assert_eq!(backend.vocab_size(), 0);
assert!(backend.supports_sharing());
}
#[test]
fn test_pathmap_backend_intern() {
let mut backend = PathMapBackend::new();
let id1 = backend.intern("hello");
let id2 = backend.intern("world");
let id3 = backend.intern("hello");
assert_eq!(id1, id3);
assert_ne!(id1, id2);
assert_eq!(backend.vocab_size(), 2);
}
#[test]
fn test_pathmap_backend_lookup() {
let mut backend = PathMapBackend::new();
let id = backend.intern("test");
assert_eq!(backend.lookup(id), Some("test"));
assert_eq!(backend.lookup(999), None);
}
#[test]
fn test_pathmap_backend_sharing() {
let backend1 = PathMapBackend::new();
let backend2 = backend1.clone();
assert!(backend1.shares_structure_with(&backend2));
let backend3 = PathMapBackend::new();
assert!(!backend1.shares_structure_with(&backend3));
}
#[test]
fn test_pathmap_backend_share_prefix() {
let mut backend = PathMapBackend::new();
backend.intern("hello");
backend.intern("help");
backend.intern("helicopter");
backend.intern("world");
backend.intern("wonder");
let shared = backend.share_prefix(b"hel");
assert!(shared.is_some());
let shared = shared.expect("backend/pathmap.rs: required value was None/Err");
assert_eq!(shared.vocab_size(), 3); assert!(shared.contains("hello"));
assert!(shared.contains("help"));
assert!(shared.contains("helicopter"));
assert!(!shared.contains("world"));
assert!(!shared.contains("wonder"));
assert!(backend.shares_structure_with(&shared));
}
#[test]
fn test_pathmap_backend_share_prefix_no_match() {
let mut backend = PathMapBackend::new();
backend.intern("hello");
backend.intern("world");
let shared = backend.share_prefix(b"xyz");
assert!(shared.is_none());
}
#[test]
fn test_pathmap_backend_share_prefix_empty() {
let backend = PathMapBackend::new();
let shared = backend.share_prefix(b"any");
assert!(shared.is_none());
}
}