use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::time::{Duration, SystemTime};
use crate::error::{DictError, Result};
use crate::user_dict::{UserDictionary, UserEntry};
#[cfg(test)]
use crate::DictEntry;
use crate::{Entry, SystemDictionary};
pub type Version = u64;
const DEFAULT_MAX_VERSION_HISTORY: usize = 10;
const DEFAULT_MAX_DELTA_QUEUE: usize = 100;
#[derive(Clone)]
struct VersionedDictionary {
version: Version,
system_dict: Arc<SystemDictionary>,
user_dict: Arc<UserDictionary>,
timestamp: SystemTime,
}
impl VersionedDictionary {
fn new_version(&self, user_dict: UserDictionary) -> Self {
Self {
version: self.version + 1,
system_dict: Arc::clone(&self.system_dict),
user_dict: Arc::new(user_dict),
timestamp: SystemTime::now(),
}
}
fn with_system_dict(&self, system_dict: SystemDictionary) -> Self {
Self {
version: self.version + 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::clone(&self.user_dict),
timestamp: SystemTime::now(),
}
}
}
pub struct HotReloadDictionary {
current: Arc<RwLock<VersionedDictionary>>,
history: Arc<RwLock<VecDeque<VersionedDictionary>>>,
max_history: usize,
delta_queue: Arc<RwLock<VecDeque<DeltaUpdate>>>,
max_delta_queue: usize,
dicdir: PathBuf,
}
impl HotReloadDictionary {
pub fn new<P: AsRef<Path>>(dicdir: P) -> Result<Self> {
let dicdir = dicdir.as_ref().to_path_buf();
let system_dict = SystemDictionary::load(&dicdir)?;
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
Ok(Self {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: DEFAULT_MAX_VERSION_HISTORY,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: DEFAULT_MAX_DELTA_QUEUE,
dicdir,
})
}
pub fn new_default() -> Result<Self> {
let system_dict = SystemDictionary::load_default()?;
let dicdir = system_dict.dicdir().to_path_buf();
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
Ok(Self {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: DEFAULT_MAX_VERSION_HISTORY,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: DEFAULT_MAX_DELTA_QUEUE,
dicdir,
})
}
#[must_use]
pub const fn with_max_history(mut self, max_history: usize) -> Self {
self.max_history = max_history;
self
}
#[must_use]
pub const fn with_max_delta_queue(mut self, max_delta_queue: usize) -> Self {
self.max_delta_queue = max_delta_queue;
self
}
#[must_use]
pub fn current_version(&self) -> Version {
self.current.read().map(|dict| dict.version).unwrap_or(0)
}
#[must_use]
pub fn dicdir(&self) -> &Path {
&self.dicdir
}
pub fn lookup(&self, surface: &str) -> Result<Vec<Entry>> {
let dict = self.current.read().map_err(|_| {
DictError::Format("Failed to acquire read lock on dictionary".to_string())
})?;
let mut results = Vec::new();
if let Some(index) = dict.system_dict.trie().exact_match(surface) {
if let Ok(entry) = dict.system_dict.get_entry(index) {
results.push(entry.to_entry());
}
}
let user_entries = dict.user_dict.lookup(surface);
results.extend(user_entries.iter().map(|e| e.to_entry()));
drop(dict);
Ok(results)
}
pub fn add_entry(
&self,
surface: impl Into<String>,
pos: impl Into<String>,
cost: i16,
reading: Option<String>,
) -> Result<Version> {
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
let mut new_user_dict = (*dict.user_dict).clone();
new_user_dict.add_entry(surface, pos, Some(cost), reading);
self.save_to_history(&dict)?;
*dict = dict.new_version(new_user_dict);
Ok(dict.version)
}
pub fn remove_entry(&self, surface: &str) -> Result<(Version, usize)> {
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
let new_user_dict = (*dict.user_dict).clone();
let removed_count = new_user_dict
.entries()
.iter()
.filter(|e| e.surface == surface)
.count();
if removed_count == 0 {
return Ok((dict.version, 0));
}
let filtered_entries: Vec<_> = new_user_dict
.entries()
.iter()
.filter(|e| e.surface != surface)
.cloned()
.collect();
let mut rebuilt_dict = UserDictionary::new();
for entry in filtered_entries {
rebuilt_dict.add_entry_with_ids(
entry.surface,
entry.pos,
entry.cost,
entry.left_id,
entry.right_id,
entry.reading,
);
}
self.save_to_history(&dict)?;
*dict = dict.new_version(rebuilt_dict);
Ok((dict.version, removed_count))
}
pub fn update_entry<F>(&self, surface: &str, update_fn: F) -> Result<Version>
where
F: Fn(&mut UserEntry),
{
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
let new_user_dict = (*dict.user_dict).clone();
let updated_entries: Vec<_> = new_user_dict
.entries()
.iter()
.map(|e| {
let mut updated = e.clone();
if updated.surface == surface {
update_fn(&mut updated);
}
updated
})
.collect();
let mut rebuilt_dict = UserDictionary::new();
for entry in updated_entries {
rebuilt_dict.add_entry_with_ids(
entry.surface,
entry.pos,
entry.cost,
entry.left_id,
entry.right_id,
entry.reading,
);
}
self.save_to_history(&dict)?;
*dict = dict.new_version(rebuilt_dict);
Ok(dict.version)
}
pub fn apply_delta(&self, delta: DeltaUpdate) -> Result<Version> {
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
let mut new_user_dict = (*dict.user_dict).clone();
for surface in &delta.removals {
let filtered_entries: Vec<_> = new_user_dict
.entries()
.iter()
.filter(|e| e.surface != *surface)
.cloned()
.collect();
let mut rebuilt_dict = UserDictionary::new();
for entry in filtered_entries {
rebuilt_dict.add_entry_with_ids(
entry.surface,
entry.pos,
entry.cost,
entry.left_id,
entry.right_id,
entry.reading,
);
}
new_user_dict = rebuilt_dict;
}
for addition in &delta.additions {
new_user_dict.add_entry(
addition.surface.clone(),
addition.pos.clone(),
Some(addition.cost),
addition.reading.clone(),
);
}
for modification in &delta.modifications {
let updated_entries: Vec<_> = new_user_dict
.entries()
.iter()
.map(|e| {
if e.surface == modification.surface {
modification.to_user_entry()
} else {
e.clone()
}
})
.collect();
let mut rebuilt_dict = UserDictionary::new();
for entry in updated_entries {
rebuilt_dict.add_entry_with_ids(
entry.surface,
entry.pos,
entry.cost,
entry.left_id,
entry.right_id,
entry.reading,
);
}
new_user_dict = rebuilt_dict;
}
self.save_to_history(&dict)?;
self.enqueue_delta(delta)?;
*dict = dict.new_version(new_user_dict);
Ok(dict.version)
}
pub fn reload_system_dict(&self) -> Result<Version> {
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
let new_system_dict = SystemDictionary::load(&self.dicdir)?;
self.save_to_history(&dict)?;
*dict = dict.with_system_dict(new_system_dict);
Ok(dict.version)
}
pub fn rollback(&self, target_version: Version) -> Result<()> {
let target = {
let history = self.history.read().map_err(|_| {
DictError::Format("Failed to acquire read lock on history".to_string())
})?;
history
.iter()
.find(|v| v.version == target_version)
.ok_or_else(|| {
DictError::Format(format!("Version {target_version} not found in history"))
})?
.clone()
};
*self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})? = target;
Ok(())
}
pub fn version_history(&self) -> Result<Vec<VersionInfo>> {
let history = self
.history
.read()
.map_err(|_| DictError::Format("Failed to acquire read lock on history".to_string()))?;
let current = self.current.read().map_err(|_| {
DictError::Format("Failed to acquire read lock on dictionary".to_string())
})?;
let mut versions = vec![VersionInfo {
version: current.version,
timestamp: current.timestamp,
user_entry_count: current.user_dict.len(),
}];
versions.extend(history.iter().map(|v| VersionInfo {
version: v.version,
timestamp: v.timestamp,
user_entry_count: v.user_dict.len(),
}));
drop(history);
drop(current);
versions.sort_by_key(|v| std::cmp::Reverse(v.version));
Ok(versions)
}
fn save_to_history(&self, dict: &VersionedDictionary) -> Result<()> {
let mut history = self.history.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on history".to_string())
})?;
history.push_back(dict.clone());
while history.len() > self.max_history {
history.pop_front();
}
drop(history);
Ok(())
}
fn enqueue_delta(&self, delta: DeltaUpdate) -> Result<()> {
let mut queue = self.delta_queue.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on delta queue".to_string())
})?;
queue.push_back(delta);
while queue.len() > self.max_delta_queue {
queue.pop_front();
}
drop(queue);
Ok(())
}
pub fn delta_history(&self) -> Result<Vec<DeltaUpdate>> {
let queue = self.delta_queue.read().map_err(|_| {
DictError::Format("Failed to acquire read lock on delta queue".to_string())
})?;
Ok(queue.iter().cloned().collect())
}
pub fn export_user_dict(&self) -> Result<UserDictionary> {
let dict = self.current.read().map_err(|_| {
DictError::Format("Failed to acquire read lock on dictionary".to_string())
})?;
let user_dict = (*dict.user_dict).clone();
drop(dict);
Ok(user_dict)
}
pub fn import_user_dict(&self, user_dict: UserDictionary) -> Result<Version> {
let mut dict = self.current.write().map_err(|_| {
DictError::Format("Failed to acquire write lock on dictionary".to_string())
})?;
self.save_to_history(&dict)?;
*dict = dict.new_version(user_dict);
Ok(dict.version)
}
}
#[derive(Debug, Clone)]
pub struct DeltaUpdate {
additions: Vec<EntryChange>,
removals: Vec<String>,
modifications: Vec<EntryChange>,
}
impl Default for DeltaUpdate {
fn default() -> Self {
Self::new()
}
}
impl DeltaUpdate {
#[must_use]
pub const fn new() -> Self {
Self {
additions: Vec::new(),
removals: Vec::new(),
modifications: Vec::new(),
}
}
#[must_use]
pub const fn builder() -> DeltaUpdateBuilder {
DeltaUpdateBuilder::new()
}
#[must_use]
pub fn addition_count(&self) -> usize {
self.additions.len()
}
#[must_use]
pub fn removal_count(&self) -> usize {
self.removals.len()
}
#[must_use]
pub fn modification_count(&self) -> usize {
self.modifications.len()
}
#[must_use]
pub fn total_changes(&self) -> usize {
self.additions.len() + self.removals.len() + self.modifications.len()
}
}
#[derive(Debug, Clone)]
pub struct EntryChange {
pub surface: String,
pub pos: String,
pub cost: i16,
pub reading: Option<String>,
pub left_id: u16,
pub right_id: u16,
}
impl EntryChange {
fn to_user_entry(&self) -> UserEntry {
UserEntry::new(
self.surface.clone(),
self.pos.clone(),
self.cost,
self.reading.clone(),
)
.with_context_ids(self.left_id, self.right_id)
}
}
pub struct DeltaUpdateBuilder {
delta: DeltaUpdate,
}
impl Default for DeltaUpdateBuilder {
fn default() -> Self {
Self::new()
}
}
impl DeltaUpdateBuilder {
#[must_use]
pub const fn new() -> Self {
Self {
delta: DeltaUpdate::new(),
}
}
#[must_use]
pub fn add(mut self, surface: impl Into<String>, pos: impl Into<String>, cost: i16) -> Self {
self.delta.additions.push(EntryChange {
surface: surface.into(),
pos: pos.into(),
cost,
reading: None,
left_id: 0,
right_id: 0,
});
self
}
#[must_use]
pub fn add_with_reading(
mut self,
surface: impl Into<String>,
pos: impl Into<String>,
cost: i16,
reading: impl Into<String>,
) -> Self {
self.delta.additions.push(EntryChange {
surface: surface.into(),
pos: pos.into(),
cost,
reading: Some(reading.into()),
left_id: 0,
right_id: 0,
});
self
}
#[must_use]
pub fn remove(mut self, surface: impl Into<String>) -> Self {
self.delta.removals.push(surface.into());
self
}
#[must_use]
pub fn modify(mut self, surface: impl Into<String>, pos: impl Into<String>, cost: i16) -> Self {
self.delta.modifications.push(EntryChange {
surface: surface.into(),
pos: pos.into(),
cost,
reading: None,
left_id: 0,
right_id: 0,
});
self
}
#[must_use]
pub fn build(self) -> DeltaUpdate {
self.delta
}
}
#[derive(Debug, Clone)]
pub struct VersionInfo {
pub version: Version,
pub timestamp: SystemTime,
pub user_entry_count: usize,
}
impl VersionInfo {
#[must_use]
pub fn age(&self) -> Option<Duration> {
SystemTime::now().duration_since(self.timestamp).ok()
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::vec_init_then_push)]
mod tests {
use super::*;
use crate::matrix::DenseMatrix;
use crate::trie::TrieBuilder;
fn create_test_system_dict() -> SystemDictionary {
let entries = vec![("가", 0u32), ("가다", 1), ("가방", 2)];
let trie_bytes = TrieBuilder::build(&entries).expect("should build trie");
let trie = crate::trie::Trie::from_vec(trie_bytes);
let matrix = crate::matrix::ConnectionMatrix::Dense(DenseMatrix::new(10, 10, 100));
let mut dict_entries = Vec::new();
dict_entries.push(DictEntry::new("가", 1, 1, 100, "NNG,*,T,가,*,*,*,*"));
dict_entries.push(DictEntry::new("가다", 2, 2, 200, "VV,*,F,가다,*,*,*,*"));
dict_entries.push(DictEntry::new("가방", 3, 3, 300, "NNG,*,T,가방,*,*,*,*"));
SystemDictionary::new_test(PathBuf::from("./test_dic"), trie, matrix, dict_entries)
}
#[test]
fn test_hot_reload_dictionary_add_entry() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
let v1 = dict.current_version();
assert_eq!(v1, 1);
let v2 = dict
.add_entry("딥러닝", "NNG", -1000, None)
.expect("should add entry");
assert_eq!(v2, 2);
let entries = dict.lookup("딥러닝").expect("should lookup");
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].surface, "딥러닝");
}
#[test]
fn test_hot_reload_dictionary_remove_entry() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let mut user_dict = UserDictionary::new();
user_dict.add_entry("딥러닝", "NNG", Some(-1000), None);
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(user_dict),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
let (version, removed) = dict.remove_entry("딥러닝").expect("should remove");
assert_eq!(version, 2);
assert_eq!(removed, 1);
let entries = dict.lookup("딥러닝").expect("should lookup");
assert!(entries.is_empty());
}
#[test]
fn test_delta_update() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
let delta = DeltaUpdate::builder()
.add("딥러닝", "NNG", -1000)
.add("머신러닝", "NNG", -1000)
.add("자연어처리", "NNG", -1000)
.build();
assert_eq!(delta.addition_count(), 3);
let version = dict.apply_delta(delta).expect("should apply delta");
assert_eq!(version, 2);
let entries = dict.lookup("딥러닝").expect("should lookup");
assert_eq!(entries.len(), 1);
let entries = dict.lookup("머신러닝").expect("should lookup");
assert_eq!(entries.len(), 1);
}
#[test]
fn test_version_rollback() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
let v1 = dict.current_version();
dict.add_entry("딥러닝", "NNG", -1000, None)
.expect("should add");
dict.add_entry("머신러닝", "NNG", -1000, None)
.expect("should add");
assert_eq!(dict.current_version(), 3);
dict.rollback(v1).expect("should rollback");
assert_eq!(dict.current_version(), v1);
let entries = dict.lookup("딥러닝").expect("should lookup");
assert!(entries.is_empty());
}
#[test]
fn test_version_history() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(UserDictionary::new()),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
dict.add_entry("A", "NNG", 0, None).expect("should add");
dict.add_entry("B", "NNG", 0, None).expect("should add");
dict.add_entry("C", "NNG", 0, None).expect("should add");
let history = dict.version_history().expect("should get history");
assert_eq!(history.len(), 4); assert_eq!(history[0].version, 4); }
#[test]
fn test_update_entry() {
let system_dict = create_test_system_dict();
let dicdir = system_dict.dicdir().to_path_buf();
let mut user_dict = UserDictionary::new();
user_dict.add_entry("딥러닝", "NNG", Some(-1000), None);
let versioned = VersionedDictionary {
version: 1,
system_dict: Arc::new(system_dict),
user_dict: Arc::new(user_dict),
timestamp: SystemTime::now(),
};
let dict = HotReloadDictionary {
current: Arc::new(RwLock::new(versioned)),
history: Arc::new(RwLock::new(VecDeque::new())),
max_history: 10,
delta_queue: Arc::new(RwLock::new(VecDeque::new())),
max_delta_queue: 100,
dicdir,
};
dict.update_entry("딥러닝", |entry| {
entry.cost = -2000;
entry.reading = Some("딥러닝".to_string());
})
.expect("should update");
let entries = dict.lookup("딥러닝").expect("should lookup");
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].cost, -2000);
}
}