1#![forbid(unsafe_code)]
58#![cfg_attr(not(feature = "std"), no_std)]
59
60extern crate alloc;
61
62mod builder;
63mod dict;
64mod reader;
65
66pub use builder::Builder;
67pub use dict::{Dict, DictBuilder};
68pub use reader::{Fsa, FsaError, FsaIter};
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73 use std::collections::BTreeMap;
74
75 fn build(pairs: &[(&[u8], u64)]) -> Vec<u8> {
77 let mut b = Builder::new();
78 for &(k, v) in pairs {
79 b.insert(k, v);
80 }
81 b.finish()
82 }
83
84 #[test]
85 fn empty() {
86 let bytes = build(&[]);
87 let fsa = Fsa::new(bytes).unwrap();
88 assert_eq!(fsa.get(b"anything"), None);
89 assert_eq!(fsa.len(), 0);
90 }
91
92 #[test]
93 fn basic_get() {
94 let bytes = build(&[(b"a", 1), (b"ab", 2), (b"ac", 3), (b"b", 4)]);
95 let fsa = Fsa::new(bytes).unwrap();
96 assert_eq!(fsa.get(b"a"), Some(1));
97 assert_eq!(fsa.get(b"ab"), Some(2));
98 assert_eq!(fsa.get(b"ac"), Some(3));
99 assert_eq!(fsa.get(b"b"), Some(4));
100 assert_eq!(fsa.get(b"c"), None);
101 assert_eq!(fsa.get(b"abc"), None);
102 assert_eq!(fsa.get(b""), None);
103 assert_eq!(fsa.len(), 4);
104 }
105
106 #[test]
107 fn empty_key_member() {
108 let bytes = build(&[(b"", 7), (b"x", 9)]);
109 let fsa = Fsa::new(bytes).unwrap();
110 assert_eq!(fsa.get(b""), Some(7));
111 assert_eq!(fsa.get(b"x"), Some(9));
112 }
113
114 #[test]
115 fn shared_suffix_minimization() {
116 let bytes = build(&[
119 (b"nation", 1),
120 (b"ration", 2),
121 (b"station", 3),
122 ]);
123 let fsa = Fsa::new(bytes).unwrap();
124 assert_eq!(fsa.get(b"nation"), Some(1));
125 assert_eq!(fsa.get(b"ration"), Some(2));
126 assert_eq!(fsa.get(b"station"), Some(3));
127 assert_eq!(fsa.get(b"ation"), None);
128 }
129
130 #[test]
131 fn lazy_iter() {
132 let bytes = build(&[(b"a", 1), (b"ab", 2), (b"ac", 3), (b"b", 4)]);
133 let fsa = Fsa::new(bytes).unwrap();
134 assert_eq!(fsa.iter().collect::<Vec<_>>(), fsa.prefix(b""));
136 assert_eq!(fsa.range(b"a").collect::<Vec<_>>(), fsa.prefix(b"a"));
138 assert_eq!(
139 fsa.range(b"a").take(2).collect::<Vec<_>>(),
140 vec![(b"a".to_vec(), 1), (b"ab".to_vec(), 2)]
141 );
142 assert_eq!(fsa.iter().find(|(k, _)| k == b"ac"), Some((b"ac".to_vec(), 3)));
144 assert_eq!(fsa.range(b"z").next(), None);
145 }
146
147 #[test]
148 fn prefix_scan() {
149 let bytes = build(&[(b"a", 1), (b"ab", 2), (b"ac", 3), (b"b", 4)]);
150 let fsa = Fsa::new(bytes).unwrap();
151 let got: Vec<_> = fsa.prefix(b"a");
152 assert_eq!(
153 got,
154 vec![
155 (b"a".to_vec(), 1),
156 (b"ab".to_vec(), 2),
157 (b"ac".to_vec(), 3)
158 ]
159 );
160 assert_eq!(fsa.prefix(b"b"), vec![(b"b".to_vec(), 4)]);
161 assert_eq!(fsa.prefix(b"z"), Vec::<(Vec<u8>, u64)>::new());
162 assert_eq!(fsa.prefix(b"").len(), 4); }
164
165 #[test]
166 fn value_widths() {
167 for &maxv in &[0xFFu64, 0xFFFF, 0xFFFF_FFFF, 0xFFFF_FFFF_FF] {
169 let bytes = build(&[(b"lo", 0), (b"hi", maxv)]);
170 let fsa = Fsa::new(bytes).unwrap();
171 assert_eq!(fsa.get(b"hi"), Some(maxv));
172 assert_eq!(fsa.get(b"lo"), Some(0));
173 }
174 }
175
176 #[test]
177 fn duplicate_key_last_wins() {
178 let bytes = build(&[(b"k", 1), (b"k", 2), (b"k", 3)]);
179 let fsa = Fsa::new(bytes).unwrap();
180 assert_eq!(fsa.get(b"k"), Some(3));
181 assert_eq!(fsa.len(), 1);
182 }
183
184 #[test]
196 #[ignore]
197 fn perfgate_get_prefix_under_budget() {
198 let mut b = DictBuilder::new();
199 for i in 0..20_000u32 {
200 let code = format!("code{i:05}");
201 for j in 0..5u32 {
202 b.insert(
203 code.as_bytes(),
204 format!("word{i}_{j}").as_bytes(),
205 u64::from(i * 10 + j),
206 );
207 }
208 }
209 let dict = Dict::new(b.finish()).unwrap();
210 let codes: Vec<String> = (0..1000).map(|i| format!("code{:05}", (i * 19) % 20_000)).collect();
211
212 let iters = 50;
213 let t = std::time::Instant::now();
214 for _ in 0..iters {
215 for c in &codes {
216 std::hint::black_box(dict.get(c.as_bytes()));
217 }
218 }
219 let per_get = t.elapsed().as_nanos() as f64 / (iters * codes.len()) as f64;
220 eprintln!("[perfgate] get = {per_get:.0} ns/op");
221 assert!(per_get < 20_000.0, "get regressed: {per_get:.0} ns/op (budget 20µs)");
222
223 let t = std::time::Instant::now();
224 for _ in 0..iters {
225 let mut n = 0u64;
226 dict.prefix_for_each(b"code0", |_, _, v| n = n.wrapping_add(v));
227 std::hint::black_box(n);
228 }
229 let per_pref = t.elapsed().as_nanos() as f64 / iters as f64;
230 eprintln!("[perfgate] prefix(code0* = 5000 items) = {per_pref:.0} ns/op");
231 assert!(per_pref < 5_000_000.0, "prefix regressed: {per_pref:.0} ns (budget 5ms)");
232 }
233
234 use proptest::prelude::*;
235
236 fn key_strategy() -> impl Strategy<Value = Vec<u8>> {
237 proptest::collection::vec(b'a'..=b'e', 0..6)
238 }
239
240 proptest! {
241 #![proptest_config(ProptestConfig { cases: 200, ..ProptestConfig::default() })]
242
243 #[test]
244 fn diff_get_and_prefix(
245 entries in proptest::collection::vec((key_strategy(), any::<u64>()), 0..64),
246 probes in proptest::collection::vec(key_strategy(), 0..32),
247 ) {
248 let mut oracle: BTreeMap<Vec<u8>, u64> = BTreeMap::new();
250 for (k, v) in &entries {
251 oracle.insert(k.clone(), *v);
252 }
253
254 let mut b = Builder::new();
255 for (k, v) in &entries {
256 b.insert(k, *v);
257 }
258 let fsa = Fsa::new(b.finish()).unwrap();
259
260 prop_assert_eq!(fsa.len(), oracle.len() as u64);
261
262 prop_assert_eq!(fsa.iter().collect::<Vec<_>>(), fsa.prefix(b""));
264
265 for (k, v) in &oracle {
267 prop_assert_eq!(fsa.get(k), Some(*v), "get({:?})", k);
268 }
269 for p in &probes {
270 prop_assert_eq!(fsa.get(p), oracle.get(p).copied(), "get probe {:?}", p);
271 }
272
273 for p in &probes {
275 let want: Vec<(Vec<u8>, u64)> = oracle
276 .iter()
277 .filter(|(k, _)| k.starts_with(p))
278 .map(|(k, v)| (k.clone(), *v))
279 .collect();
280 prop_assert_eq!(fsa.prefix(p), want, "prefix {:?}", p);
281 }
282 }
283 }
284
285 #[test]
288 fn rejects_bad_buffers() {
289 assert!(matches!(Fsa::new(&b""[..]), Err(FsaError::Truncated)));
290 assert!(matches!(
291 Fsa::new(&b"XXXX..............."[..]),
292 Err(FsaError::BadMagic)
293 ));
294 let mut bad = b"IXFA".to_vec();
295 bad.push(99); bad.extend(std::iter::repeat_n(0u8, 20));
297 assert!(matches!(
298 Fsa::new(bad.as_slice()),
299 Err(FsaError::BadVersion(99))
300 ));
301 }
302
303 #[test]
304 fn keys_with_zero_and_high_bytes() {
305 let bytes = build(&[
307 (b"\x00", 1),
308 (b"\x00\xff", 2),
309 (b"\xff", 3),
310 (b"a\x00b", 4),
311 ]);
312 let fsa = Fsa::new(bytes).unwrap();
313 assert_eq!(fsa.get(b"\x00"), Some(1));
314 assert_eq!(fsa.get(b"\x00\xff"), Some(2));
315 assert_eq!(fsa.get(b"\xff"), Some(3));
316 assert_eq!(fsa.get(b"a\x00b"), Some(4));
317 assert_eq!(fsa.get(b"\x00\x00"), None);
318 assert_eq!(fsa.prefix(b"\x00").len(), 2);
319 }
320
321 #[test]
322 fn wide_alphabet_single_state() {
323 let pairs: Vec<(Vec<u8>, u64)> = (0u16..256).map(|b| (vec![b as u8], u64::from(b))).collect();
325 let mut bld = Builder::new();
326 for (k, v) in &pairs {
327 bld.insert(k, *v);
328 }
329 let fsa = Fsa::new(bld.finish()).unwrap();
330 assert_eq!(fsa.len(), 256);
331 for (k, v) in &pairs {
332 assert_eq!(fsa.get(k), Some(*v));
333 }
334 }
335
336 fn wide_key() -> impl Strategy<Value = Vec<u8>> {
337 proptest::collection::vec(any::<u8>(), 0..5)
338 }
339
340 proptest! {
341 #![proptest_config(ProptestConfig { cases: 200, ..ProptestConfig::default() })]
342
343 #[test]
345 fn diff_wide_bytes(
346 entries in proptest::collection::vec((wide_key(), any::<u64>()), 0..48),
347 probes in proptest::collection::vec(wide_key(), 0..24),
348 ) {
349 let mut oracle: BTreeMap<Vec<u8>, u64> = BTreeMap::new();
350 for (k, v) in &entries { oracle.insert(k.clone(), *v); }
351 let mut b = Builder::new();
352 for (k, v) in &entries { b.insert(k, *v); }
353 let fsa = Fsa::new(b.finish()).unwrap();
354 for (k, v) in &oracle { prop_assert_eq!(fsa.get(k), Some(*v)); }
355 for p in &probes {
356 let want: Vec<(Vec<u8>, u64)> = oracle.iter()
357 .filter(|(k, _)| k.starts_with(p))
358 .map(|(k, v)| (k.clone(), *v)).collect();
359 prop_assert_eq!(fsa.prefix(p), want);
360 }
361 }
362
363 #[test]
365 fn new_no_panic(bytes in proptest::collection::vec(any::<u8>(), 0..128)) {
366 let _ = Fsa::new(bytes.as_slice());
367 }
368
369 #[test]
374 fn fuzz_mutated_buffer_no_panic(
375 muts in proptest::collection::vec((any::<usize>(), any::<u8>()), 0..40),
376 probe in proptest::collection::vec(any::<u8>(), 0..6),
377 ) {
378 let mut fb = Builder::new();
380 for i in 0..60u32 {
381 fb.insert(format!("key{i:03}").as_bytes(), u64::from(i) * 7);
382 }
383 let mut bytes = fb.finish();
384 for (idx, val) in &muts {
385 if !bytes.is_empty() {
386 let i = idx % bytes.len();
387 bytes[i] = *val;
388 }
389 }
390 if let Ok(fsa) = Fsa::new(bytes.as_slice()) {
391 let _ = fsa.get(&probe);
392 let _ = fsa.contains_prefix(&probe);
393 let _ = fsa.prefix(&probe); }
395
396 let mut db = DictBuilder::new();
398 for i in 0..60u32 {
399 db.insert(format!("c{}", i % 12).as_bytes(), format!("item{i}").as_bytes(), u64::from(i));
400 }
401 let mut dbytes = db.finish();
402 for (idx, val) in &muts {
403 if !dbytes.is_empty() {
404 let i = idx % dbytes.len();
405 dbytes[i] = *val;
406 }
407 }
408 if let Ok(dict) = Dict::new(dbytes.as_slice()) {
409 let _ = dict.get(&probe);
410 let _ = dict.prefix(&probe);
411 dict.get_for_each(&probe, |_, _| {});
412 }
413 }
414 }
415}