use alloc::vec::Vec;
const HEADER_LEN: usize = 18;
#[derive(Debug, PartialEq, Eq)]
pub enum FsaError {
BadMagic,
BadVersion(u8),
Truncated,
}
pub struct Fsa<D> {
data: D,
value_width: usize,
value_count: u32,
state_count: u32,
root_rel: u32,
blob_start: usize,
values_start: usize,
}
#[inline]
pub(crate) fn rd_u32(b: &[u8], at: usize) -> u32 {
u32::from_le_bytes([b[at], b[at + 1], b[at + 2], b[at + 3]])
}
#[inline]
pub(crate) fn rd_uvarint(b: &[u8], p: &mut usize) -> Option<u64> {
let mut v = 0u64;
let mut shift = 0u32;
loop {
let byte = *b.get(*p)?;
*p += 1;
v |= u64::from(byte & 0x7f) << shift;
if byte & 0x80 == 0 {
return Some(v);
}
shift += 7;
if shift >= 64 {
return None; }
}
}
const MAX_WALK_DEPTH: usize = 4096;
#[inline]
fn decode_target(rel: u32, delta: u64) -> Option<u32> {
u32::try_from(delta)
.ok()
.filter(|&d| d > 0)
.and_then(|d| rel.checked_sub(d))
}
impl<D: AsRef<[u8]>> Fsa<D> {
pub fn new(data: D) -> Result<Self, FsaError> {
let b = data.as_ref();
if b.len() < HEADER_LEN {
return Err(FsaError::Truncated);
}
if &b[0..4] != b"IXFA" {
return Err(FsaError::BadMagic);
}
if b[4] != 3 {
return Err(FsaError::BadVersion(b[4]));
}
let value_width = b[5] as usize;
let value_count = rd_u32(b, 6);
let root_rel = rd_u32(b, 10);
let state_count = rd_u32(b, 14);
let blob_start = HEADER_LEN;
let values_len = value_count as usize * value_width;
if b.len() < blob_start + values_len {
return Err(FsaError::Truncated);
}
let values_start = b.len() - values_len;
Ok(Self {
data,
value_width,
value_count,
state_count,
root_rel,
blob_start,
values_start,
})
}
pub fn len(&self) -> u64 {
u64::from(self.value_count)
}
pub fn is_empty(&self) -> bool {
self.value_count == 0
}
pub fn state_count(&self) -> u32 {
self.state_count
}
#[inline]
fn read_value(&self, ord: u64) -> u64 {
let b = self.data.as_ref();
let at = self.values_start + ord as usize * self.value_width;
let mut v = 0u64;
for i in 0..self.value_width {
v |= u64::from(b.get(at + i).copied().unwrap_or(0)) << (8 * i);
}
v
}
#[inline]
fn is_final(&self, rel: u32) -> bool {
self.data
.as_ref()
.get(self.blob_start + rel as usize)
.is_some_and(|x| x & 1 != 0)
}
#[inline]
fn step(&self, rel: u32, byte: u8, ord: &mut u64) -> Option<u32> {
let b = self.data.as_ref();
let mut p = self.blob_start + rel as usize;
let flags = *b.get(p)?;
p += 1;
if flags & 1 != 0 {
*ord += 1; }
if flags & 0b10 != 0 {
let label = *b.get(p)?;
p += 1;
let delta = rd_uvarint(b, &mut p)?;
if label == byte {
return rel.checked_sub(u32::try_from(delta).ok()?);
}
return None;
}
let ntrans = rd_uvarint(b, &mut p)?;
for _ in 0..ntrans {
let label = *b.get(p)?;
p += 1;
let delta = rd_uvarint(b, &mut p)?;
let numt = rd_uvarint(b, &mut p)?;
if label < byte {
*ord += numt;
} else if label == byte {
return rel.checked_sub(u32::try_from(delta).ok()?);
} else {
break; }
}
None
}
pub fn get(&self, key: &[u8]) -> Option<u64> {
if self.value_count == 0 {
return None;
}
let mut rel = self.root_rel;
let mut ord = 0u64;
for &byte in key {
rel = self.step(rel, byte, &mut ord)?;
}
if self.is_final(rel) {
Some(self.read_value(ord))
} else {
None
}
}
pub fn contains_prefix(&self, prefix: &[u8]) -> bool {
self.walk_to(prefix).is_some()
}
pub fn prefix(&self, prefix: &[u8]) -> Vec<(Vec<u8>, u64)> {
let mut out = Vec::new();
self.prefix_for_each(prefix, |k, v| out.push((k.to_vec(), v)));
out
}
pub fn prefix_for_each<F: FnMut(&[u8], u64)>(&self, prefix: &[u8], mut visit: F) {
if let Some((rel, ord)) = self.walk_to(prefix) {
let mut cur = prefix.to_vec();
let mut ord = ord;
self.visit_subtree(rel, &mut cur, &mut ord, &mut visit);
}
}
pub fn iter(&self) -> FsaIter<'_, D> {
self.range(b"")
}
pub fn range(&self, prefix: &[u8]) -> FsaIter<'_, D> {
let (to_enter, ord) = match self.walk_to(prefix) {
Some((rel, ord)) => (Some(rel), ord),
None => (None, 0),
};
FsaIter {
fsa: self,
stack: Vec::new(),
cur: prefix.to_vec(),
ord,
to_enter,
}
}
fn walk_to(&self, prefix: &[u8]) -> Option<(u32, u64)> {
if self.value_count == 0 {
return None;
}
let mut rel = self.root_rel;
let mut ord = 0u64;
for &byte in prefix {
rel = self.step(rel, byte, &mut ord)?;
}
Some((rel, ord))
}
fn visit_subtree<F: FnMut(&[u8], u64)>(
&self,
rel: u32,
cur: &mut Vec<u8>,
ord: &mut u64,
visit: &mut F,
) {
if cur.len() > MAX_WALK_DEPTH {
return;
}
let b = self.data.as_ref();
let mut p = self.blob_start + rel as usize;
let Some(&flags) = b.get(p) else { return };
p += 1;
if flags & 1 != 0 {
visit(cur, self.read_value(*ord));
*ord += 1;
}
if flags & 0b10 != 0 {
let Some(&label) = b.get(p) else { return };
p += 1;
let Some(delta) = rd_uvarint(b, &mut p) else { return };
if let Some(target) = decode_target(rel, delta) {
cur.push(label);
self.visit_subtree(target, cur, ord, visit);
cur.pop();
}
return;
}
let Some(ntrans) = rd_uvarint(b, &mut p) else { return };
for _ in 0..ntrans {
let Some(&label) = b.get(p) else { return };
p += 1;
let Some(delta) = rd_uvarint(b, &mut p) else { return };
let Some(_num) = rd_uvarint(b, &mut p) else { return };
if let Some(target) = decode_target(rel, delta) {
cur.push(label);
self.visit_subtree(target, cur, ord, visit);
cur.pop();
}
}
}
}
struct IterFrame {
rel: u32,
p: usize, remaining: u64, base_len: usize, single: bool, }
pub struct FsaIter<'a, D> {
fsa: &'a Fsa<D>,
stack: Vec<IterFrame>,
cur: Vec<u8>,
ord: u64,
to_enter: Option<u32>,
}
impl<D: AsRef<[u8]>> Iterator for FsaIter<'_, D> {
type Item = (Vec<u8>, u64);
fn next(&mut self) -> Option<Self::Item> {
let b = self.fsa.data.as_ref();
loop {
if let Some(rel) = self.to_enter.take() {
let mut p = self.fsa.blob_start + rel as usize;
let flags = *b.get(p)?;
p += 1;
let single = flags & 0b10 != 0;
let final_ = flags & 1 != 0;
let remaining = if single { 1 } else { rd_uvarint(b, &mut p)? };
self.stack.push(IterFrame {
rel,
p,
remaining,
base_len: self.cur.len(),
single,
});
if final_ {
let v = self.fsa.read_value(self.ord);
self.ord += 1;
return Some((self.cur.clone(), v));
}
continue;
}
let frame = self.stack.last_mut()?;
self.cur.truncate(frame.base_len);
if frame.remaining == 0 {
self.stack.pop();
continue;
}
let mut p = frame.p;
let label = *b.get(p)?;
p += 1;
let delta = rd_uvarint(b, &mut p)?;
if !frame.single {
rd_uvarint(b, &mut p)?; }
frame.p = p;
frame.remaining -= 1;
let target = decode_target(frame.rel, delta)?;
self.cur.push(label);
self.to_enter = Some(target);
}
}
}