1use alloc::vec::Vec;
13
14const HEADER_LEN: usize = 18; #[derive(Debug, PartialEq, Eq)]
18pub enum FsaError {
19 BadMagic,
20 BadVersion(u8),
21 Truncated,
22}
23
24pub struct Fsa<D> {
26 data: D,
27 value_width: usize,
28 value_count: u32,
29 state_count: u32,
30 root_rel: u32,
31 blob_start: usize,
32 values_start: usize,
33}
34
35#[inline]
36pub(crate) fn rd_u32(b: &[u8], at: usize) -> u32 {
37 u32::from_le_bytes([b[at], b[at + 1], b[at + 2], b[at + 3]])
38}
39
40#[inline]
45pub(crate) fn rd_uvarint(b: &[u8], p: &mut usize) -> Option<u64> {
46 let mut v = 0u64;
47 let mut shift = 0u32;
48 loop {
49 let byte = *b.get(*p)?;
50 *p += 1;
51 v |= u64::from(byte & 0x7f) << shift;
52 if byte & 0x80 == 0 {
53 return Some(v);
54 }
55 shift += 7;
56 if shift >= 64 {
57 return None; }
59 }
60}
61
62const MAX_WALK_DEPTH: usize = 4096;
66
67#[inline]
70fn decode_target(rel: u32, delta: u64) -> Option<u32> {
71 u32::try_from(delta)
72 .ok()
73 .filter(|&d| d > 0)
74 .and_then(|d| rel.checked_sub(d))
75}
76
77impl<D: AsRef<[u8]>> Fsa<D> {
78 pub fn new(data: D) -> Result<Self, FsaError> {
79 let b = data.as_ref();
80 if b.len() < HEADER_LEN {
81 return Err(FsaError::Truncated);
82 }
83 if &b[0..4] != b"IXFA" {
84 return Err(FsaError::BadMagic);
85 }
86 if b[4] != 3 {
87 return Err(FsaError::BadVersion(b[4]));
88 }
89 let value_width = b[5] as usize;
90 let value_count = rd_u32(b, 6);
91 let root_rel = rd_u32(b, 10);
92 let state_count = rd_u32(b, 14);
93 let blob_start = HEADER_LEN;
94 let values_len = value_count as usize * value_width;
95 if b.len() < blob_start + values_len {
96 return Err(FsaError::Truncated);
97 }
98 let values_start = b.len() - values_len;
99 Ok(Self {
100 data,
101 value_width,
102 value_count,
103 state_count,
104 root_rel,
105 blob_start,
106 values_start,
107 })
108 }
109
110 pub fn len(&self) -> u64 {
112 u64::from(self.value_count)
113 }
114
115 pub fn is_empty(&self) -> bool {
116 self.value_count == 0
117 }
118
119 pub fn state_count(&self) -> u32 {
121 self.state_count
122 }
123
124 #[inline]
125 fn read_value(&self, ord: u64) -> u64 {
126 let b = self.data.as_ref();
127 let at = self.values_start + ord as usize * self.value_width;
128 let mut v = 0u64;
129 for i in 0..self.value_width {
130 v |= u64::from(b.get(at + i).copied().unwrap_or(0)) << (8 * i);
131 }
132 v
133 }
134
135 #[inline]
136 fn is_final(&self, rel: u32) -> bool {
137 self.data
138 .as_ref()
139 .get(self.blob_start + rel as usize)
140 .is_some_and(|x| x & 1 != 0)
141 }
142
143 #[inline]
149 fn step(&self, rel: u32, byte: u8, ord: &mut u64) -> Option<u32> {
150 let b = self.data.as_ref();
151 let mut p = self.blob_start + rel as usize;
152 let flags = *b.get(p)?;
153 p += 1;
154 if flags & 1 != 0 {
155 *ord += 1; }
157 if flags & 0b10 != 0 {
158 let label = *b.get(p)?;
160 p += 1;
161 let delta = rd_uvarint(b, &mut p)?;
162 if label == byte {
164 return rel.checked_sub(u32::try_from(delta).ok()?);
165 }
166 return None;
167 }
168 let ntrans = rd_uvarint(b, &mut p)?;
169 for _ in 0..ntrans {
170 let label = *b.get(p)?;
171 p += 1;
172 let delta = rd_uvarint(b, &mut p)?;
173 let numt = rd_uvarint(b, &mut p)?;
174 if label < byte {
175 *ord += numt;
176 } else if label == byte {
177 return rel.checked_sub(u32::try_from(delta).ok()?);
178 } else {
179 break; }
181 }
182 None
183 }
184
185 pub fn get(&self, key: &[u8]) -> Option<u64> {
187 if self.value_count == 0 {
188 return None;
189 }
190 let mut rel = self.root_rel;
191 let mut ord = 0u64;
192 for &byte in key {
193 rel = self.step(rel, byte, &mut ord)?;
194 }
195 if self.is_final(rel) {
196 Some(self.read_value(ord))
197 } else {
198 None
199 }
200 }
201
202 pub fn contains_prefix(&self, prefix: &[u8]) -> bool {
204 self.walk_to(prefix).is_some()
205 }
206
207 pub fn prefix(&self, prefix: &[u8]) -> Vec<(Vec<u8>, u64)> {
209 let mut out = Vec::new();
210 self.prefix_for_each(prefix, |k, v| out.push((k.to_vec(), v)));
211 out
212 }
213
214 pub fn prefix_for_each<F: FnMut(&[u8], u64)>(&self, prefix: &[u8], mut visit: F) {
220 if let Some((rel, ord)) = self.walk_to(prefix) {
221 let mut cur = prefix.to_vec();
222 let mut ord = ord;
223 self.visit_subtree(rel, &mut cur, &mut ord, &mut visit);
224 }
225 }
226
227 pub fn iter(&self) -> FsaIter<'_, D> {
229 self.range(b"")
230 }
231
232 pub fn range(&self, prefix: &[u8]) -> FsaIter<'_, D> {
236 let (to_enter, ord) = match self.walk_to(prefix) {
237 Some((rel, ord)) => (Some(rel), ord),
238 None => (None, 0),
239 };
240 FsaIter {
241 fsa: self,
242 stack: Vec::new(),
243 cur: prefix.to_vec(),
244 ord,
245 to_enter,
246 }
247 }
248
249 fn walk_to(&self, prefix: &[u8]) -> Option<(u32, u64)> {
250 if self.value_count == 0 {
251 return None;
252 }
253 let mut rel = self.root_rel;
254 let mut ord = 0u64;
255 for &byte in prefix {
256 rel = self.step(rel, byte, &mut ord)?;
257 }
258 Some((rel, ord))
259 }
260
261 fn visit_subtree<F: FnMut(&[u8], u64)>(
264 &self,
265 rel: u32,
266 cur: &mut Vec<u8>,
267 ord: &mut u64,
268 visit: &mut F,
269 ) {
270 if cur.len() > MAX_WALK_DEPTH {
272 return;
273 }
274 let b = self.data.as_ref();
275 let mut p = self.blob_start + rel as usize;
276 let Some(&flags) = b.get(p) else { return };
277 p += 1;
278 if flags & 1 != 0 {
279 visit(cur, self.read_value(*ord));
280 *ord += 1;
281 }
282 if flags & 0b10 != 0 {
283 let Some(&label) = b.get(p) else { return };
285 p += 1;
286 let Some(delta) = rd_uvarint(b, &mut p) else { return };
287 if let Some(target) = decode_target(rel, delta) {
288 cur.push(label);
289 self.visit_subtree(target, cur, ord, visit);
290 cur.pop();
291 }
292 return;
293 }
294 let Some(ntrans) = rd_uvarint(b, &mut p) else { return };
295 for _ in 0..ntrans {
296 let Some(&label) = b.get(p) else { return };
297 p += 1;
298 let Some(delta) = rd_uvarint(b, &mut p) else { return };
299 let Some(_num) = rd_uvarint(b, &mut p) else { return };
300 if let Some(target) = decode_target(rel, delta) {
301 cur.push(label);
302 self.visit_subtree(target, cur, ord, visit);
303 cur.pop();
304 }
305 }
306 }
307}
308
309struct IterFrame {
311 rel: u32,
312 p: usize, remaining: u64, base_len: usize, single: bool, }
317
318pub struct FsaIter<'a, D> {
324 fsa: &'a Fsa<D>,
325 stack: Vec<IterFrame>,
326 cur: Vec<u8>,
327 ord: u64,
328 to_enter: Option<u32>,
329}
330
331impl<D: AsRef<[u8]>> Iterator for FsaIter<'_, D> {
332 type Item = (Vec<u8>, u64);
333
334 fn next(&mut self) -> Option<Self::Item> {
335 let b = self.fsa.data.as_ref();
336 loop {
337 if let Some(rel) = self.to_enter.take() {
338 let mut p = self.fsa.blob_start + rel as usize;
340 let flags = *b.get(p)?;
341 p += 1;
342 let single = flags & 0b10 != 0;
343 let final_ = flags & 1 != 0;
344 let remaining = if single { 1 } else { rd_uvarint(b, &mut p)? };
345 self.stack.push(IterFrame {
346 rel,
347 p,
348 remaining,
349 base_len: self.cur.len(),
350 single,
351 });
352 if final_ {
353 let v = self.fsa.read_value(self.ord);
354 self.ord += 1;
355 return Some((self.cur.clone(), v));
356 }
357 continue;
358 }
359 let frame = self.stack.last_mut()?;
360 self.cur.truncate(frame.base_len);
362 if frame.remaining == 0 {
363 self.stack.pop();
364 continue;
365 }
366 let mut p = frame.p;
367 let label = *b.get(p)?;
368 p += 1;
369 let delta = rd_uvarint(b, &mut p)?;
370 if !frame.single {
371 rd_uvarint(b, &mut p)?; }
373 frame.p = p;
374 frame.remaining -= 1;
375 let target = decode_target(frame.rel, delta)?;
376 self.cur.push(label);
377 self.to_enter = Some(target);
378 }
379 }
380}