1use crate::bytes;
2use crate::error::Result;
3use crate::raw::counting_writer::CountingWriter;
4use crate::raw::error::Error;
5use crate::raw::registry::Registry;
6use crate::raw::registry::RegistryEntry;
7use crate::raw::Output;
8use crate::raw::{CompiledAddr, Transition};
9use crate::raw::{Fst, FstType, EMPTY_ADDRESS, NONE_ADDRESS, VERSION};
10use crate::stream::{IntoStreamer, Streamer};
11use alloc::{vec, vec::Vec};
12use std::io;
13
14#[cfg(feature = "std")]
43pub struct Builder<W> {
44 wtr: CountingWriter<W>,
48 unfinished: UnfinishedNodes,
53 registry: Registry,
59 last: Option<Vec<u8>>,
64 last_addr: CompiledAddr,
71 len: usize,
73}
74
75#[derive(Debug)]
76#[cfg(feature = "alloc")]
77struct UnfinishedNodes {
78 stack: Vec<BuilderNodeUnfinished>,
79}
80
81#[derive(Debug)]
82#[cfg(feature = "alloc")]
83struct BuilderNodeUnfinished {
84 node: BuilderNode,
85 last: Option<LastTransition>,
86}
87
88#[derive(Debug, Hash, Eq, PartialEq)]
89#[cfg(feature = "alloc")]
90pub struct BuilderNode {
91 pub is_final: bool,
92 pub final_output: Output,
93 pub trans: Vec<Transition>,
94}
95
96#[derive(Debug)]
97struct LastTransition {
98 inp: u8,
99 out: Output,
100}
101
102#[cfg(feature = "std")]
103impl Builder<Vec<u8>> {
104 #[inline]
106 #[must_use]
107 pub fn memory() -> Builder<Vec<u8>> {
108 Builder::new(Vec::with_capacity(10 * (1 << 10))).unwrap()
109 }
110
111 #[inline]
113 pub fn into_fst(self) -> Fst<Vec<u8>> {
114 self.into_inner().and_then(Fst::new).unwrap()
115 }
116}
117
118#[cfg(feature = "std")]
119impl<W: io::Write> Builder<W> {
120 pub fn new(wtr: W) -> Result<Builder<W>> {
123 Builder::new_type(wtr, 0)
124 }
125
126 pub fn new_type(wtr: W, ty: FstType) -> Result<Builder<W>> {
129 let mut wtr = CountingWriter::new(wtr);
130 bytes::io_write_u64_le(VERSION, &mut wtr)?;
134 bytes::io_write_u64_le(ty, &mut wtr)?;
136 Ok(Builder {
137 wtr,
138 unfinished: UnfinishedNodes::new(),
139 registry: Registry::new(10_000, 2),
140 last: None,
141 last_addr: NONE_ADDRESS,
142 len: 0,
143 })
144 }
145
146 pub fn add<B>(&mut self, bs: B) -> Result<()>
148 where
149 B: AsRef<[u8]>,
150 {
151 self.check_last_key(bs.as_ref(), false)?;
152 self.insert_output(bs, None)
153 }
154
155 pub fn insert<B>(&mut self, bs: B, val: u64) -> Result<()>
165 where
166 B: AsRef<[u8]>,
167 {
168 self.check_last_key(bs.as_ref(), true)?;
169 self.insert_output(bs, Some(Output::new(val)))
170 }
171
172 pub fn extend_iter<T, I>(&mut self, iter: I) -> Result<()>
181 where
182 T: AsRef<[u8]>,
183 I: IntoIterator<Item = (T, Output)>,
184 {
185 for (key, out) in iter {
186 self.insert(key, out.value())?;
187 }
188 Ok(())
189 }
190
191 pub fn extend_stream<'f, I, S>(&mut self, stream: I) -> Result<()>
200 where
201 I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
202 S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
203 {
204 let mut stream = stream.into_stream();
205 while let Some((key, out)) = stream.next() {
206 self.insert(key, out.value())?;
207 }
208 Ok(())
209 }
210
211 pub fn finish(self) -> Result<()> {
215 self.into_inner()?;
216 Ok(())
217 }
218
219 pub fn into_inner(mut self) -> Result<W> {
222 self.compile_from(0)?;
223 let root_node = self.unfinished.pop_root();
224 let root_addr = self.compile(&root_node)?;
225 bytes::io_write_u64_le(self.len as u64, &mut self.wtr)?;
226 bytes::io_write_u64_le(root_addr as u64, &mut self.wtr)?;
227
228 let sum = self.wtr.masked_checksum();
229 let mut wtr = self.wtr.into_inner();
230 bytes::io_write_u32_le(sum, &mut wtr)?;
231 wtr.flush()?;
232 Ok(wtr)
233 }
234
235 fn insert_output<B>(&mut self, bs: B, out: Option<Output>) -> Result<()>
236 where
237 B: AsRef<[u8]>,
238 {
239 let bs = bs.as_ref();
240 if bs.is_empty() {
241 self.len = 1; self.unfinished.set_root_output(out.unwrap_or(Output::zero()));
243 return Ok(());
244 }
245 let (prefix_len, out) = if let Some(out) = out {
246 self.unfinished.find_common_prefix_and_set_output(bs, out)
247 } else {
248 (self.unfinished.find_common_prefix(bs), Output::zero())
249 };
250 if prefix_len == bs.len() {
251 assert!(out.is_zero());
258 return Ok(());
259 }
260 self.len += 1;
261 self.compile_from(prefix_len)?;
262 self.unfinished.add_suffix(&bs[prefix_len..], out);
263 Ok(())
264 }
265
266 fn compile_from(&mut self, istate: usize) -> Result<()> {
267 let mut addr = NONE_ADDRESS;
268 while istate + 1 < self.unfinished.len() {
269 let node = if addr == NONE_ADDRESS {
270 self.unfinished.pop_empty()
271 } else {
272 self.unfinished.pop_freeze(addr)
273 };
274 addr = self.compile(&node)?;
275 assert!(addr != NONE_ADDRESS);
276 }
277 self.unfinished.top_last_freeze(addr);
278 Ok(())
279 }
280
281 fn compile(&mut self, node: &BuilderNode) -> Result<CompiledAddr> {
282 if node.is_final
283 && node.trans.is_empty()
284 && node.final_output.is_zero()
285 {
286 return Ok(EMPTY_ADDRESS);
287 }
288 let entry = self.registry.entry(node);
289 if let RegistryEntry::Found(ref addr) = entry {
290 return Ok(*addr);
291 }
292 let start_addr = self.wtr.count() as CompiledAddr;
293 node.compile_to(&mut self.wtr, self.last_addr, start_addr)?;
294 self.last_addr = self.wtr.count() as CompiledAddr - 1;
295 if let RegistryEntry::NotFound(cell) = entry {
296 cell.insert(self.last_addr);
297 }
298 Ok(self.last_addr)
299 }
300
301 fn check_last_key(&mut self, bs: &[u8], check_dupe: bool) -> Result<()> {
302 if let Some(ref mut last) = self.last {
303 if check_dupe && bs == &**last {
304 return Err(Error::DuplicateKey { got: bs.to_vec() }.into());
305 }
306 if bs < &**last {
307 return Err(Error::OutOfOrder {
308 previous: last.clone(),
309 got: bs.to_vec(),
310 }
311 .into());
312 }
313 last.clear();
314 for &b in bs {
315 last.push(b);
316 }
317 } else {
318 self.last = Some(bs.to_vec());
319 }
320 Ok(())
321 }
322
323 pub fn get_ref(&self) -> &W {
325 self.wtr.get_ref()
326 }
327
328 pub fn bytes_written(&self) -> u64 {
330 self.wtr.count()
331 }
332}
333
334#[cfg(feature = "std")]
335impl UnfinishedNodes {
336 fn new() -> UnfinishedNodes {
337 let mut unfinished = UnfinishedNodes { stack: Vec::with_capacity(64) };
338 unfinished.push_empty(false);
339 unfinished
340 }
341
342 fn len(&self) -> usize {
343 self.stack.len()
344 }
345
346 fn push_empty(&mut self, is_final: bool) {
347 self.stack.push(BuilderNodeUnfinished {
348 node: BuilderNode { is_final, ..BuilderNode::default() },
349 last: None,
350 });
351 }
352
353 fn pop_root(&mut self) -> BuilderNode {
354 assert!(self.stack.len() == 1);
355 assert!(self.stack[0].last.is_none());
356 self.stack.pop().unwrap().node
357 }
358
359 fn pop_freeze(&mut self, addr: CompiledAddr) -> BuilderNode {
360 let mut unfinished = self.stack.pop().unwrap();
361 unfinished.last_compiled(addr);
362 unfinished.node
363 }
364
365 fn pop_empty(&mut self) -> BuilderNode {
366 let unfinished = self.stack.pop().unwrap();
367 assert!(unfinished.last.is_none());
368 unfinished.node
369 }
370
371 fn set_root_output(&mut self, out: Output) {
372 self.stack[0].node.is_final = true;
373 self.stack[0].node.final_output = out;
374 }
375
376 fn top_last_freeze(&mut self, addr: CompiledAddr) {
377 let last = self.stack.len().checked_sub(1).unwrap();
378 self.stack[last].last_compiled(addr);
379 }
380
381 fn add_suffix(&mut self, bs: &[u8], out: Output) {
382 if bs.is_empty() {
383 return;
384 }
385 let last = self.stack.len().checked_sub(1).unwrap();
386 assert!(self.stack[last].last.is_none());
387 self.stack[last].last = Some(LastTransition { inp: bs[0], out });
388 for &b in &bs[1..] {
389 self.stack.push(BuilderNodeUnfinished {
390 node: BuilderNode::default(),
391 last: Some(LastTransition { inp: b, out: Output::zero() }),
392 });
393 }
394 self.push_empty(true);
395 }
396
397 fn find_common_prefix(&mut self, bs: &[u8]) -> usize {
398 bs.iter()
399 .zip(&self.stack)
400 .take_while(|&(&b, node)| {
401 node.last.as_ref().is_some_and(|t| t.inp == b)
402 })
403 .count()
404 }
405
406 fn find_common_prefix_and_set_output(
407 &mut self,
408 bs: &[u8],
409 mut out: Output,
410 ) -> (usize, Output) {
411 let mut i = 0;
412 while i < bs.len() {
413 let add_prefix = match self.stack[i].last.as_mut() {
414 Some(ref mut t) if t.inp == bs[i] => {
415 i += 1;
416 let common_pre = t.out.prefix(out);
417 let add_prefix = t.out - common_pre;
418 out = out - common_pre;
419 t.out = common_pre;
420 add_prefix
421 }
422 _ => break,
423 };
424 if !add_prefix.is_zero() {
425 self.stack[i].add_output_prefix(add_prefix);
426 }
427 }
428 (i, out)
429 }
430}
431
432#[cfg(feature = "std")]
433impl BuilderNodeUnfinished {
434 fn last_compiled(&mut self, addr: CompiledAddr) {
435 if let Some(trans) = self.last.take() {
436 self.node.trans.push(Transition {
437 inp: trans.inp,
438 out: trans.out,
439 addr,
440 });
441 }
442 }
443
444 fn add_output_prefix(&mut self, prefix: Output) {
445 if self.node.is_final {
446 self.node.final_output = prefix.cat(self.node.final_output);
447 }
448 for t in &mut self.node.trans {
449 t.out = prefix.cat(t.out);
450 }
451 if let Some(ref mut t) = self.last {
452 t.out = prefix.cat(t.out);
453 }
454 }
455}
456
457#[cfg(feature = "alloc")]
458impl Clone for BuilderNode {
459 fn clone(&self) -> BuilderNode {
460 BuilderNode {
461 is_final: self.is_final,
462 final_output: self.final_output,
463 trans: self.trans.clone(),
464 }
465 }
466
467 fn clone_from(&mut self, source: &BuilderNode) {
468 self.is_final = source.is_final;
469 self.final_output = source.final_output;
470 self.trans.clear();
471 self.trans.extend(source.trans.iter());
472 }
473}
474
475#[cfg(feature = "alloc")]
476impl Default for BuilderNode {
477 fn default() -> BuilderNode {
478 BuilderNode {
479 is_final: false,
480 final_output: Output::zero(),
481 trans: vec![],
482 }
483 }
484}