use std::collections::HashSet;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::PathBuf;
use anyhow::{Context, Result};
use fs2::FileExt;
use rust_stemmers::{Algorithm, Stemmer};
use serde::{Deserialize, Serialize};
use crate::knowledge::normalize_trigger;
const DEFAULT_FIRED_PATH: &str = "/tmp/wonka-triggered-fired.json";
const FIRED_PATH_ENV: &str = "MX_TRIGGER_FIRED_PATH";
pub fn fired_path() -> PathBuf {
match std::env::var(FIRED_PATH_ENV) {
Ok(p) if !p.trim().is_empty() => PathBuf::from(p),
_ => PathBuf::from(DEFAULT_FIRED_PATH),
}
}
fn stem_tokens(raw: &str) -> Vec<String> {
let Some(normalized) = normalize_trigger(raw) else {
return Vec::new();
};
let stemmer = Stemmer::create(Algorithm::English);
normalized
.split(|c: char| !c.is_alphanumeric())
.filter(|tok| !tok.is_empty())
.map(|tok| stemmer.stem(tok).into_owned())
.collect()
}
fn contains_contiguous(haystack: &[String], needle: &[String]) -> bool {
if needle.is_empty() || needle.len() > haystack.len() {
return false;
}
haystack
.windows(needle.len())
.any(|window| window == needle)
}
pub fn match_triggers(message_tokens: &[String], triggers: &[String]) -> Vec<String> {
triggers
.iter()
.filter(|trig| {
let trig_tokens = stem_tokens(trig);
contains_contiguous(message_tokens, &trig_tokens)
})
.cloned()
.collect()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TriggerMatch {
pub id: String,
pub triggers_matched: Vec<String>,
}
pub fn match_entries<'a, I>(message: &str, entries: I) -> Vec<TriggerMatch>
where
I: IntoIterator<Item = (&'a str, &'a [String])>,
{
let message_tokens = stem_tokens(message);
if message_tokens.is_empty() {
return Vec::new();
}
let mut out = Vec::new();
for (id, triggers) in entries {
let matched = match_triggers(&message_tokens, triggers);
if !matched.is_empty() {
out.push(TriggerMatch {
id: id.to_string(),
triggers_matched: matched,
});
}
}
out
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct FiredState {
#[serde(default)]
fired: Vec<String>,
}
pub struct FiredStore {
path: PathBuf,
}
impl FiredStore {
pub fn open() -> Self {
Self { path: fired_path() }
}
pub fn at(path: PathBuf) -> Self {
Self { path }
}
pub fn mark_survivors(&self, matched: &[String]) -> Result<Vec<String>> {
if matched.is_empty() {
return Ok(Vec::new());
}
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(&self.path)
.with_context(|| format!("failed to open fired-state file: {}", self.path.display()))?;
file.lock_exclusive()
.with_context(|| format!("failed to flock {}", self.path.display()))?;
let result = self.read_modify_write(&mut file, matched);
let _ = FileExt::unlock(&file);
result
}
fn read_modify_write(
&self,
file: &mut std::fs::File,
matched: &[String],
) -> Result<Vec<String>> {
let mut contents = String::new();
file.seek(SeekFrom::Start(0))?;
file.read_to_string(&mut contents)
.with_context(|| format!("failed to read fired-state file: {}", self.path.display()))?;
let mut state: FiredState = if contents.trim().is_empty() {
FiredState::default()
} else {
serde_json::from_str(&contents).unwrap_or_default()
};
let already: HashSet<&String> = state.fired.iter().collect();
let survivors: Vec<String> = matched
.iter()
.filter(|id| !already.contains(id))
.cloned()
.collect();
if survivors.is_empty() {
return Ok(survivors);
}
state.fired.extend(survivors.iter().cloned());
let serialized = serde_json::to_string(&state)?;
file.seek(SeekFrom::Start(0))?;
file.set_len(0)?;
file.write_all(serialized.as_bytes()).with_context(|| {
format!("failed to write fired-state file: {}", self.path.display())
})?;
file.flush()?;
file.sync_all()?;
Ok(survivors)
}
pub fn read_fired(&self) -> Result<HashSet<String>> {
if !self.path.exists() {
return Ok(HashSet::new());
}
let file = std::fs::File::open(&self.path)
.with_context(|| format!("failed to open fired-state file: {}", self.path.display()))?;
file.lock_shared()
.with_context(|| format!("failed to flock(shared) {}", self.path.display()))?;
let mut contents = String::new();
let mut f = &file;
let read = f.read_to_string(&mut contents);
let _ = FileExt::unlock(&file);
read.with_context(|| format!("failed to read fired-state file: {}", self.path.display()))?;
if contents.trim().is_empty() {
return Ok(HashSet::new());
}
let state: FiredState = serde_json::from_str(&contents).unwrap_or_default();
Ok(state.fired.into_iter().collect())
}
pub fn reset(&self) -> Result<()> {
match std::fs::remove_file(&self.path) {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) => Err(e).with_context(|| {
format!("failed to remove fired-state file: {}", self.path.display())
}),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn toks(s: &str) -> Vec<String> {
stem_tokens(s)
}
#[test]
fn word_boundary_ai_does_not_fire_on_said_or_maintain() {
let msg = toks("he said we should maintain it");
assert!(match_triggers(&msg, &["ai".to_string()]).is_empty());
}
#[test]
fn word_boundary_ai_fires_as_whole_token() {
let msg = toks("the ai is helpful");
assert_eq!(
match_triggers(&msg, &["ai".to_string()]),
vec!["ai".to_string()]
);
}
#[test]
fn stemming_diabetes_fires_on_diabetic() {
let msg = toks("he is diabetic");
assert_eq!(
match_triggers(&msg, &["diabetes".to_string()]),
vec!["diabetes".to_string()]
);
}
#[test]
fn stemming_run_fires_on_running() {
let msg = toks("she is running today");
assert_eq!(
match_triggers(&msg, &["run".to_string()]),
vec!["run".to_string()]
);
}
#[test]
fn phrase_fires_on_contiguous_in_order() {
let msg = toks("what is his blood sugar today");
assert_eq!(
match_triggers(&msg, &["blood sugar".to_string()]),
vec!["blood sugar".to_string()]
);
}
#[test]
fn phrase_does_not_fire_out_of_order() {
let msg = toks("there is sugar in blood");
assert!(match_triggers(&msg, &["blood sugar".to_string()]).is_empty());
}
#[test]
fn phrase_does_not_fire_when_not_contiguous() {
let msg = toks("blood pressure and high sugar");
assert!(match_triggers(&msg, &["blood sugar".to_string()]).is_empty());
}
#[test]
fn nfc_precomposed_and_decomposed_cafe_match() {
let msg = toks("meet me at the cafe\u{0301} later");
assert_eq!(
match_triggers(&msg, &["caf\u{00e9}".to_string()]),
vec!["caf\u{00e9}".to_string()]
);
}
#[test]
fn match_entries_returns_per_entry_matched_triggers() {
let brad = vec!["brad".to_string(), "blood sugar".to_string()];
let drew = vec!["drew".to_string()];
let entries: Vec<(&str, &[String])> =
vec![("kn-brad", brad.as_slice()), ("kn-drew", drew.as_slice())];
let matches = match_entries("can you check brad's blood sugar?", entries);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].id, "kn-brad");
assert_eq!(
matches[0].triggers_matched,
vec!["brad".to_string(), "blood sugar".to_string()]
);
}
#[test]
fn match_entries_empty_message_matches_nothing() {
let trig = vec!["brad".to_string()];
let entries: Vec<(&str, &[String])> = vec![("kn-brad", trig.as_slice())];
assert!(match_entries(" ", entries).is_empty());
}
fn temp_store() -> (tempfile::TempDir, FiredStore) {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("fired.json");
(dir, FiredStore::at(path))
}
#[test]
fn fired_store_marks_and_dedupes() {
let (_dir, store) = temp_store();
let survivors = store
.mark_survivors(&["kn-a".to_string(), "kn-b".to_string()])
.unwrap();
assert_eq!(survivors, vec!["kn-a".to_string(), "kn-b".to_string()]);
let survivors = store
.mark_survivors(&["kn-a".to_string(), "kn-c".to_string()])
.unwrap();
assert_eq!(survivors, vec!["kn-c".to_string()]);
let survivors = store
.mark_survivors(&["kn-a".to_string(), "kn-b".to_string(), "kn-c".to_string()])
.unwrap();
assert!(survivors.is_empty());
}
#[test]
fn fired_store_reset_clears_state() {
let (_dir, store) = temp_store();
store.mark_survivors(&["kn-a".to_string()]).unwrap();
assert!(store.read_fired().unwrap().contains("kn-a"));
store.reset().unwrap();
assert!(store.read_fired().unwrap().is_empty());
let survivors = store.mark_survivors(&["kn-a".to_string()]).unwrap();
assert_eq!(survivors, vec!["kn-a".to_string()]);
}
#[test]
fn fired_store_reset_missing_file_is_ok() {
let (_dir, store) = temp_store();
store.reset().unwrap();
assert!(store.read_fired().unwrap().is_empty());
}
#[test]
fn mark_survivors_empty_input_does_no_file_io() {
let (_dir, store) = temp_store();
let survivors = store.mark_survivors(&[]).unwrap();
assert!(survivors.is_empty());
assert!(
!store.path.exists(),
"empty mark_survivors must not create the fired-state file"
);
}
#[test]
fn fired_store_read_does_not_mark() {
let (_dir, store) = temp_store();
assert!(store.read_fired().unwrap().is_empty());
let survivors = store.mark_survivors(&["kn-a".to_string()]).unwrap();
assert_eq!(survivors, vec!["kn-a".to_string()]);
}
}