use fst::{IntoStreamer, Set, SetBuilder, Streamer};
use fst::automaton::Str;
use thiserror::Error;
use fst::Automaton;
#[derive(Error, Debug)]
pub enum FstError {
#[error("Failed to build or parse the FST graph: {0}")]
FstInternal(#[from] fst::Error),
#[error("Failed to parse string from FST bytes: {0}")]
InvalidUtf8(#[from] std::str::Utf8Error),
}
pub struct Fst {
_bytes: Vec<u8>,
set: Set<Vec<u8>>,
}
impl Fst {
pub fn new<I, S>(keywords: I) -> Result<Self, FstError>
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut data: Vec<String> = keywords
.into_iter()
.map(|s| s.as_ref().to_string())
.collect();
data.sort_unstable();
data.dedup();
let mut bytes = Vec::new();
let mut builder = SetBuilder::new(&mut bytes)?;
for word in data {
builder.insert(word)?;
}
builder.finish()?;
let set = Set::new(bytes.clone())?;
Ok(Fst { _bytes: bytes, set })
}
pub fn search(&self, prefix: &str) -> Vec<String> {
let automaton = Str::new(prefix).starts_with();
let mut stream = self.set.search(automaton).into_stream();
let mut results = Vec::new();
while let Some(key) = stream.next() {
if let Ok(key_str) = std::str::from_utf8(key) {
results.push(key_str.to_string());
}
}
results
}
}