1use smallvec::SmallVec;
65use std::fmt;
66
67#[derive(Debug, Default, Clone)]
69struct NInfo {
70 sibling: u8, child: u8, }
73
74#[derive(Debug, Default, Clone)]
77struct Node {
78 base_: i32, check: i32, }
81
82impl Node {
83 #[inline]
84 fn base(&self) -> i32 {
85 #[cfg(feature = "reduced-trie")]
86 return -(self.base_ + 1);
87 #[cfg(not(feature = "reduced-trie"))]
88 return self.base_;
89 }
90}
91
92#[derive(Debug, Clone)]
94struct Block {
95 prev: i32, next: i32, num: i16, reject: i16, trial: i32, e_head: i32, }
102
103impl Block {
104 pub fn new() -> Self {
105 Block {
106 prev: 0,
107 next: 0,
108 num: 256, reject: 257, trial: 0,
111 e_head: 0,
112 }
113 }
114}
115
116enum BlockType {
119 Open, Closed, Full, }
123
124#[derive(Clone)]
126pub struct Cedar {
127 array: Vec<Node>, n_infos: Vec<NInfo>,
129 blocks: Vec<Block>,
130 reject: Vec<i16>,
131 blocks_head_full: i32, blocks_head_closed: i32, blocks_head_open: i32, capacity: usize,
135 size: usize,
136 ordered: bool,
137 max_trial: i32, }
139
140impl fmt::Debug for Cedar {
141 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142 write!(f, "Cedar(size={}, ordered={})", self.size, self.ordered)
143 }
144}
145
146#[allow(dead_code)]
147const CEDAR_VALUE_LIMIT: i32 = std::i32::MAX - 1;
148const CEDAR_NO_VALUE: i32 = -1;
149
150#[derive(Clone)]
152pub struct PrefixIter<'a> {
153 cedar: &'a Cedar,
154 key: &'a [u8],
155 from: usize,
156 i: usize,
157}
158
159impl<'a> Iterator for PrefixIter<'a> {
160 type Item = (i32, usize);
161
162 fn size_hint(&self) -> (usize, Option<usize>) {
163 (0, Some(self.key.len()))
164 }
165
166 fn next(&mut self) -> Option<Self::Item> {
167 while self.i < self.key.len() {
168 if let Some(value) = self.cedar.find(&self.key[self.i..=self.i], &mut self.from) {
169 if value == CEDAR_NO_VALUE {
170 self.i += 1;
171 continue;
172 } else {
173 let result = Some((value, self.i));
174 self.i += 1;
175 return result;
176 }
177 } else {
178 break;
179 }
180 }
181
182 None
183 }
184}
185
186#[derive(Clone)]
188pub struct PrefixPredictIter<'a> {
189 cedar: &'a Cedar,
190 key: &'a [u8],
191 from: usize,
192 p: usize,
193 root: usize,
194 value: Option<i32>,
195}
196
197impl<'a> PrefixPredictIter<'a> {
198 fn next_until_none(&mut self) -> Option<(i32, usize)> {
199 #[allow(clippy::never_loop)]
200 while let Some(value) = self.value {
201 let result = (value, self.p);
202
203 let (v_, from_, p_) = self.cedar.next(self.from, self.p, self.root);
204 self.from = from_;
205 self.p = p_;
206 self.value = v_;
207
208 return Some(result);
209 }
210
211 None
212 }
213}
214
215impl<'a> Iterator for PrefixPredictIter<'a> {
216 type Item = (i32, usize);
217
218 fn next(&mut self) -> Option<Self::Item> {
219 if self.from == 0 && self.p == 0 {
220 if self.cedar.find(self.key, &mut self.from).is_some() {
223 self.root = self.from;
224
225 let (v_, from_, p_) = self.cedar.begin(self.from, self.p);
226 self.from = from_;
227 self.p = p_;
228 self.value = v_;
229
230 self.next_until_none()
231 } else {
232 None
233 }
234 } else {
235 self.next_until_none()
236 }
237 }
238}
239
240#[allow(clippy::cast_lossless)]
241impl Cedar {
242 #[allow(clippy::new_without_default)]
244 pub fn new() -> Self {
245 let mut array: Vec<Node> = Vec::with_capacity(256);
246 let n_infos: Vec<NInfo> = (0..256).map(|_| Default::default()).collect();
247 let mut blocks: Vec<Block> = vec![Block::new(); 1];
248 let reject: Vec<i16> = (0..=256).map(|i| i + 1).collect();
249
250 #[cfg(feature = "reduced-trie")]
251 array.push(Node { base_: -1, check: -1 });
252 #[cfg(not(feature = "reduced-trie"))]
253 array.push(Node { base_: 0, check: -1 });
254
255 for i in 1..256 {
256 array.push(Node {
258 base_: -(i - 1),
259 check: -(i + 1),
260 })
261 }
262
263 array[1].base_ = -255;
265 array[255].check = -1;
266
267 blocks[0].e_head = 1;
268
269 Cedar {
270 array,
271 n_infos,
272 blocks,
273 reject,
274 blocks_head_full: 0,
275 blocks_head_closed: 0,
276 blocks_head_open: 0,
277 capacity: 256,
278 size: 256,
279 ordered: true,
280 max_trial: 1,
281 }
282 }
283
284 #[allow(dead_code)]
286 pub fn build(&mut self, key_values: &[(&str, i32)]) {
287 for (key, value) in key_values {
288 self.update(key, *value);
289 }
290 }
291
292 pub fn update(&mut self, key: &str, value: i32) {
294 let from = 0;
295 let pos = 0;
296 self.update_(key.as_bytes(), value, from, pos);
297 }
298
299 fn update_(&mut self, key: &[u8], value: i32, mut from: usize, mut pos: usize) -> i32 {
301 if from == 0 && key.is_empty() {
302 panic!("failed to insert zero-length key");
303 }
304
305 while pos < key.len() {
306 #[cfg(feature = "reduced-trie")]
307 {
308 let val_ = self.array[from].base_;
309 if val_ >= 0 && val_ != CEDAR_VALUE_LIMIT {
310 let to = self.follow(from, 0);
311 self.array[to as usize].base_ = val_;
312 }
313 }
314
315 from = self.follow(from, key[pos]) as usize;
316 pos += 1;
317 }
318
319 #[cfg(feature = "reduced-trie")]
320 let to = if self.array[from].base_ >= 0 {
321 from as i32
322 } else {
323 self.follow(from, 0)
324 };
325
326 #[cfg(feature = "reduced-trie")]
327 {
328 if self.array[to as usize].base_ == CEDAR_VALUE_LIMIT {
329 self.array[to as usize].base_ = 0;
330 }
331 }
332
333 #[cfg(not(feature = "reduced-trie"))]
334 let to = self.follow(from, 0);
335
336 self.array[to as usize].base_ = value;
337 self.array[to as usize].base_
338 }
339
340 #[inline]
343 fn follow(&mut self, from: usize, label: u8) -> i32 {
344 let base = self.array[from].base();
345
346 #[allow(unused_assignments)]
347 let mut to = 0;
348
349 if base < 0 || self.array[(base ^ (label as i32)) as usize].check < 0 {
351 to = self.pop_e_node(base, label, from as i32);
353 let branch: i32 = to ^ (label as i32);
354
355 self.push_sibling(from, branch, label, base >= 0);
357 } else {
358 to = base ^ (label as i32);
360 if self.array[to as usize].check != (from as i32) {
361 to = self.resolve(from, base, label);
363 }
364 }
365
366 to
367 }
368
369 fn find(&self, key: &[u8], from: &mut usize) -> Option<i32> {
371 #[allow(unused_assignments)]
372 let mut to: usize = 0;
373 let mut pos = 0;
374
375 while pos < key.len() {
377 #[cfg(feature = "reduced-trie")]
378 {
379 if self.array[*from].base_ >= 0 {
380 break;
381 }
382 }
383
384 to = (self.array[*from].base() ^ (key[pos] as i32)) as usize;
385 if self.array[to as usize].check != (*from as i32) {
386 return None;
387 }
388
389 *from = to;
390 pos += 1;
391 }
392
393 #[cfg(feature = "reduced-trie")]
394 {
395 if self.array[*from].base_ >= 0 {
396 if pos == key.len() {
397 return Some(self.array[*from].base_);
398 } else {
399 return None;
400 }
401 }
402 }
403
404 let n = &self.array[(self.array[*from].base()) as usize];
407 if n.check != (*from as i32) {
408 Some(CEDAR_NO_VALUE)
409 } else {
410 Some(n.base_)
411 }
412 }
413
414 pub fn erase(&mut self, key: &str) {
416 self.erase_(key.as_bytes())
417 }
418
419 fn erase_(&mut self, key: &[u8]) {
421 let mut from = 0;
422
423 if let Some(v) = self.find(key, &mut from) {
425 if v != CEDAR_NO_VALUE {
426 self.erase__(from);
427 }
428 }
429 }
430
431 fn erase__(&mut self, mut from: usize) {
432 #[cfg(feature = "reduced-trie")]
433 let mut e: i32 = if self.array[from].base_ >= 0 {
434 from as i32
435 } else {
436 self.array[from].base()
437 };
438
439 #[cfg(feature = "reduced-trie")]
440 {
441 from = self.array[e as usize].check as usize;
442 }
443
444 #[cfg(not(feature = "reduced-trie"))]
445 let mut e = self.array[from].base();
446
447 #[allow(unused_assignments)]
448 let mut has_sibling = false;
449 loop {
450 let n = self.array[from].clone();
451 has_sibling = self.n_infos[(n.base() ^ (self.n_infos[from].child as i32)) as usize].sibling != 0;
452
453 if has_sibling {
455 self.pop_sibling(from as i32, n.base(), (n.base() ^ e) as u8);
456 }
457
458 self.push_e_node(e);
460 e = from as i32;
461
462 from = self.array[from].check as usize;
464
465 if has_sibling {
467 break;
468 }
469 }
470 }
471
472 pub fn exact_match_search(&self, key: &str) -> Option<(i32, usize, usize)> {
474 let key = key.as_bytes();
475 let mut from = 0;
476
477 if let Some(value) = self.find(key, &mut from) {
478 if value == CEDAR_NO_VALUE {
479 return None;
480 }
481
482 Some((value, key.len(), from))
483 } else {
484 None
485 }
486 }
487
488 pub fn common_prefix_iter<'a>(&'a self, key: &'a str) -> PrefixIter<'a> {
490 let key = key.as_bytes();
491
492 PrefixIter {
493 cedar: self,
494 key,
495 from: 0,
496 i: 0,
497 }
498 }
499
500 pub fn common_prefix_search(&self, key: &str) -> Option<Vec<(i32, usize)>> {
502 self.common_prefix_iter(key).map(Some).collect()
503 }
504
505 pub fn common_prefix_predict_iter<'a>(&'a self, key: &'a str) -> PrefixPredictIter<'a> {
507 let key = key.as_bytes();
508
509 PrefixPredictIter {
510 cedar: self,
511 key,
512 from: 0,
513 p: 0,
514 root: 0,
515 value: None,
516 }
517 }
518
519 pub fn common_prefix_predict(&self, key: &str) -> Option<Vec<(i32, usize)>> {
521 self.common_prefix_predict_iter(key).map(Some).collect()
522 }
523
524 fn begin(&self, mut from: usize, mut p: usize) -> (Option<i32>, usize, usize) {
526 let base = self.array[from].base();
527 let mut c = self.n_infos[from].child;
528
529 if from == 0 {
530 c = self.n_infos[(base ^ (c as i32)) as usize].sibling;
531
532 if c == 0 {
534 return (None, from, p);
535 }
536 }
537
538 while c != 0 {
540 from = (self.array[from].base() ^ (c as i32)) as usize;
541 c = self.n_infos[from].child;
542 p += 1;
543 }
544
545 #[cfg(feature = "reduced-trie")]
546 {
547 if self.array[from].base_ >= 0 {
548 return (Some(self.array[from].base_), from, p);
549 }
550 }
551
552 let v = self.array[(self.array[from].base() ^ (c as i32)) as usize].base_;
554 (Some(v), from, p)
555 }
556
557 fn next(&self, mut from: usize, mut p: usize, root: usize) -> (Option<i32>, usize, usize) {
559 #[allow(unused_assignments)]
560 let mut c: u8 = 0;
561
562 #[cfg(feature = "reduced-trie")]
563 {
564 if self.array[from].base_ < 0 {
565 c = self.n_infos[(self.array[from].base()) as usize].sibling;
566 }
567 }
568 #[cfg(not(feature = "reduced-trie"))]
569 {
570 c = self.n_infos[(self.array[from].base()) as usize].sibling;
571 }
572
573 while c == 0 && from != root {
575 c = self.n_infos[from as usize].sibling;
576 from = self.array[from as usize].check as usize;
577
578 p -= 1;
579 }
580
581 if c != 0 {
582 from = (self.array[from].base() ^ (c as i32)) as usize;
584 let (v_, from_, p_) = self.begin(from, p + 1);
585 (v_, from_, p_)
586 } else {
587 (None, from, p)
589 }
590 }
591
592 fn pop_block(&mut self, idx: i32, from: BlockType, last: bool) {
595 let head: &mut i32 = match from {
596 BlockType::Open => &mut self.blocks_head_open,
597 BlockType::Closed => &mut self.blocks_head_closed,
598 BlockType::Full => &mut self.blocks_head_full,
599 };
600
601 if last {
602 *head = 0;
603 } else {
604 let b = self.blocks[idx as usize].clone();
605 self.blocks[b.prev as usize].next = b.next;
606 self.blocks[b.next as usize].prev = b.prev;
607
608 if idx == *head {
609 *head = b.next;
610 }
611 }
612 }
613
614 fn push_block(&mut self, idx: i32, to: BlockType, empty: bool) {
617 let head: &mut i32 = match to {
618 BlockType::Open => &mut self.blocks_head_open,
619 BlockType::Closed => &mut self.blocks_head_closed,
620 BlockType::Full => &mut self.blocks_head_full,
621 };
622
623 if empty {
624 self.blocks[idx as usize].next = idx;
625 self.blocks[idx as usize].prev = idx;
626 *head = idx;
627 } else {
628 self.blocks[idx as usize].prev = self.blocks[*head as usize].prev;
629 self.blocks[idx as usize].next = *head;
630
631 let t = self.blocks[*head as usize].prev;
632 self.blocks[t as usize].next = idx;
633 self.blocks[*head as usize].prev = idx;
634 *head = idx;
635 }
636 }
637
638 fn add_block(&mut self) -> i32 {
640 if self.size == self.capacity {
641 self.capacity += self.capacity;
642
643 self.array.resize(self.capacity, Default::default());
644 self.n_infos.resize(self.capacity, Default::default());
645 self.blocks.resize(self.capacity >> 8, Block::new());
646 }
647
648 self.blocks[self.size >> 8].e_head = self.size as i32;
649
650 self.array[self.size] = Node {
652 base_: -((self.size as i32) + 255),
653 check: -((self.size as i32) + 1),
654 };
655
656 for i in (self.size + 1)..(self.size + 255) {
657 self.array[i] = Node {
658 base_: -(i as i32 - 1),
659 check: -(i as i32 + 1),
660 };
661 }
662
663 self.array[self.size + 255] = Node {
664 base_: -((self.size as i32) + 254),
665 check: -(self.size as i32),
666 };
667
668 let is_empty = self.blocks_head_open == 0;
669 let idx = (self.size >> 8) as i32;
670 debug_assert!(self.blocks[idx as usize].num > 1);
671 self.push_block(idx, BlockType::Open, is_empty);
672
673 self.size += 256;
674
675 ((self.size >> 8) - 1) as i32
676 }
677
678 fn transfer_block(&mut self, idx: i32, from: BlockType, to: BlockType, to_block_empty: bool) {
681 let is_last = idx == self.blocks[idx as usize].next; let is_empty = to_block_empty && (self.blocks[idx as usize].num != 0);
683
684 self.pop_block(idx, from, is_last);
685 self.push_block(idx, to, is_empty);
686 }
687
688 fn pop_e_node(&mut self, base: i32, label: u8, from: i32) -> i32 {
690 let e: i32 = if base < 0 {
691 self.find_place()
692 } else {
693 base ^ (label as i32)
694 };
695
696 let idx = e >> 8;
697 let n = self.array[e as usize].clone();
698
699 self.blocks[idx as usize].num -= 1;
700 if self.blocks[idx as usize].num == 0 {
702 if idx != 0 {
703 self.transfer_block(idx, BlockType::Closed, BlockType::Full, self.blocks_head_full == 0);
704 }
705 } else {
706 self.array[(-n.base_) as usize].check = n.check;
707 self.array[(-n.check) as usize].base_ = n.base_;
708
709 if e == self.blocks[idx as usize].e_head {
710 self.blocks[idx as usize].e_head = -n.check;
711 }
712
713 if idx != 0 && self.blocks[idx as usize].num == 1 && self.blocks[idx as usize].trial != self.max_trial {
714 self.transfer_block(idx, BlockType::Open, BlockType::Closed, self.blocks_head_closed == 0);
715 }
716 }
717
718 #[cfg(feature = "reduced-trie")]
719 {
720 self.array[e as usize].base_ = CEDAR_VALUE_LIMIT;
721 self.array[e as usize].check = from;
722 if base < 0 {
723 self.array[from as usize].base_ = -(e ^ (label as i32)) - 1;
724 }
725 }
726
727 #[cfg(not(feature = "reduced-trie"))]
728 {
729 if label != 0 {
730 self.array[e as usize].base_ = -1;
731 } else {
732 self.array[e as usize].base_ = 0;
733 }
734 self.array[e as usize].check = from;
735 if base < 0 {
736 self.array[from as usize].base_ = e ^ (label as i32);
737 }
738 }
739
740 e
741 }
742
743 fn push_e_node(&mut self, e: i32) {
745 let idx = e >> 8;
746 self.blocks[idx as usize].num += 1;
747
748 if self.blocks[idx as usize].num == 1 {
749 self.blocks[idx as usize].e_head = e;
750 self.array[e as usize] = Node { base_: -e, check: -e };
751
752 if idx != 0 {
753 self.transfer_block(idx, BlockType::Full, BlockType::Closed, self.blocks_head_closed == 0);
755 }
756 } else {
757 let prev = self.blocks[idx as usize].e_head;
758
759 let next = -self.array[prev as usize].check;
760
761 self.array[e as usize] = Node {
763 base_: -prev,
764 check: -next,
765 };
766
767 self.array[prev as usize].check = -e;
768 self.array[next as usize].base_ = -e;
769
770 if self.blocks[idx as usize].num == 2 || self.blocks[idx as usize].trial == self.max_trial {
772 debug_assert!(self.blocks[idx as usize].num > 1);
773 if idx != 0 {
774 self.transfer_block(idx, BlockType::Closed, BlockType::Open, self.blocks_head_open == 0);
775 }
776 }
777
778 self.blocks[idx as usize].trial = 0;
780 }
781
782 if self.blocks[idx as usize].reject < self.reject[self.blocks[idx as usize].num as usize] {
783 self.blocks[idx as usize].reject = self.reject[self.blocks[idx as usize].num as usize];
784 }
785
786 self.n_infos[e as usize] = Default::default();
787 }
788
789 fn push_sibling(&mut self, from: usize, base: i32, label: u8, has_child: bool) {
791 let keep_order: bool = if self.ordered {
792 label > self.n_infos[from].child
793 } else {
794 self.n_infos[from].child == 0
795 };
796
797 let sibling: u8;
798 {
799 let mut c: &mut u8 = &mut self.n_infos[from as usize].child;
800 if has_child && keep_order {
801 loop {
802 let code = *c as i32;
803 c = &mut self.n_infos[(base ^ code) as usize].sibling;
804
805 if !(self.ordered && (*c != 0) && (*c < label)) {
806 break;
807 }
808 }
809 }
810 sibling = *c;
811
812 *c = label;
813 }
814
815 self.n_infos[(base ^ (label as i32)) as usize].sibling = sibling;
816 }
817
818 #[allow(dead_code)]
820 fn pop_sibling(&mut self, from: i32, base: i32, label: u8) {
821 let mut c: *mut u8 = &mut self.n_infos[from as usize].child;
822 unsafe {
823 while *c != label {
824 let code = *c as i32;
825 c = &mut self.n_infos[(base ^ code) as usize].sibling;
826 }
827
828 let code = label as i32;
829 *c = self.n_infos[(base ^ code) as usize].sibling;
830 }
831 }
832
833 fn consult(&self, base_n: i32, base_p: i32, mut c_n: u8, mut c_p: u8) -> bool {
836 loop {
837 c_n = self.n_infos[(base_n ^ (c_n as i32)) as usize].sibling;
838 c_p = self.n_infos[(base_p ^ (c_p as i32)) as usize].sibling;
839
840 if !(c_n != 0 && c_p != 0) {
841 break;
842 }
843 }
844
845 c_p != 0
846 }
847
848 fn set_child(&self, base: i32, mut c: u8, label: u8, not_terminal: bool) -> SmallVec<[u8; 256]> {
850 let mut child: SmallVec<[u8; 256]> = SmallVec::new();
851
852 if c == 0 {
853 child.push(c);
854 c = self.n_infos[(base ^ (c as i32)) as usize].sibling;
855 }
856
857 if self.ordered {
858 while c != 0 && c <= label {
859 child.push(c);
860 c = self.n_infos[(base ^ (c as i32)) as usize].sibling;
861 }
862 }
863
864 if not_terminal {
865 child.push(label);
866 }
867
868 while c != 0 {
869 child.push(c);
870 c = self.n_infos[(base ^ (c as i32)) as usize].sibling;
871 }
872
873 child
874 }
875
876 fn find_place(&mut self) -> i32 {
878 if self.blocks_head_closed != 0 {
879 return self.blocks[self.blocks_head_closed as usize].e_head;
880 }
881
882 if self.blocks_head_open != 0 {
883 return self.blocks[self.blocks_head_open as usize].e_head;
884 }
885
886 self.add_block() << 8
888 }
889
890 fn find_places(&mut self, child: &[u8]) -> i32 {
892 let mut idx = self.blocks_head_open;
893
894 if idx != 0 {
896 debug_assert!(self.blocks[idx as usize].num > 1);
897 let bz = self.blocks[self.blocks_head_open as usize].prev;
898 let nc = child.len() as i16;
899
900 loop {
901 if self.blocks[idx as usize].num >= nc && nc < self.blocks[idx as usize].reject {
905 let mut e = self.blocks[idx as usize].e_head;
906 loop {
907 let base = e ^ (child[0] as i32);
908
909 let mut i = 1;
910 while self.array[(base ^ (child[i] as i32)) as usize].check < 0 {
912 if i == child.len() - 1 {
913 self.blocks[idx as usize].e_head = e;
915 return e;
916 }
917 i += 1;
918 }
919
920 e = -self.array[e as usize].check;
922 if e == self.blocks[idx as usize].e_head {
923 break;
924 }
925 }
926 }
927
928 self.blocks[idx as usize].reject = nc;
931 if self.blocks[idx as usize].reject < self.reject[self.blocks[idx as usize].num as usize] {
932 self.reject[self.blocks[idx as usize].num as usize] = self.blocks[idx as usize].reject;
934 }
935
936 let idx_ = self.blocks[idx as usize].next;
937
938 self.blocks[idx as usize].trial += 1;
939
940 if self.blocks[idx as usize].trial == self.max_trial {
942 self.transfer_block(idx, BlockType::Open, BlockType::Closed, self.blocks_head_closed == 0);
943 }
944
945 if idx == bz {
947 break;
948 }
949
950 idx = idx_;
952 }
953 }
954
955 self.add_block() << 8
956 }
957
958 fn resolve(&mut self, mut from_n: usize, base_n: i32, label_n: u8) -> i32 {
960 let to_pn = base_n ^ (label_n as i32);
961
962 let from_p = self.array[to_pn as usize].check;
964 let base_p = self.array[from_p as usize].base();
965
966 let flag = self.consult(
968 base_n,
969 base_p,
970 self.n_infos[from_n as usize].child,
971 self.n_infos[from_p as usize].child,
972 );
973
974 let children = if flag {
976 self.set_child(base_n, self.n_infos[from_n as usize].child, label_n, true)
977 } else {
978 self.set_child(base_p, self.n_infos[from_p as usize].child, 255, false)
979 };
980
981 let mut base = if children.len() == 1 {
984 self.find_place()
985 } else {
986 self.find_places(&children)
987 };
988
989 base ^= children[0] as i32;
990
991 let (from, base_) = if flag {
992 (from_n as i32, base_n)
993 } else {
994 (from_p, base_p)
995 };
996
997 if flag && children[0] == label_n {
998 self.n_infos[from as usize].child = label_n;
999 }
1000
1001 #[cfg(feature = "reduced-trie")]
1002 {
1003 self.array[from as usize].base_ = -base - 1;
1004 }
1005
1006 #[cfg(not(feature = "reduced-trie"))]
1007 {
1008 self.array[from as usize].base_ = base;
1009 }
1010
1011 for i in 0..(children.len()) {
1013 let to = self.pop_e_node(base, children[i], from);
1014 let to_ = base_ ^ (children[i] as i32);
1015
1016 if i == children.len() - 1 {
1017 self.n_infos[to as usize].sibling = 0;
1018 } else {
1019 self.n_infos[to as usize].sibling = children[i + 1];
1020 }
1021
1022 if flag && to_ == to_pn {
1023 continue;
1024 }
1025
1026 self.array[to as usize].base_ = self.array[to_ as usize].base_;
1027
1028 #[cfg(feature = "reduced-trie")]
1029 let condition = self.array[to as usize].base_ < 0 && children[i] != 0;
1030 #[cfg(not(feature = "reduced-trie"))]
1031 let condition = self.array[to as usize].base_ > 0 && children[i] != 0;
1032
1033 if condition {
1034 let mut c = self.n_infos[to_ as usize].child;
1035
1036 self.n_infos[to as usize].child = c;
1037
1038 loop {
1039 let idx = (self.array[to as usize].base() ^ (c as i32)) as usize;
1040 self.array[idx].check = to;
1041 c = self.n_infos[idx].sibling;
1042
1043 if c == 0 {
1044 break;
1045 }
1046 }
1047 }
1048
1049 if !flag && to_ == (from_n as i32) {
1050 from_n = to as usize;
1051 }
1052
1053 if !flag && to_ == to_pn {
1055 self.push_sibling(from_n, to_pn ^ (label_n as i32), label_n, true);
1056 self.n_infos[to_ as usize].child = 0;
1057
1058 #[cfg(feature = "reduced-trie")]
1059 {
1060 self.array[to_ as usize].base_ = CEDAR_VALUE_LIMIT;
1061 }
1062
1063 #[cfg(not(feature = "reduced-trie"))]
1064 {
1065 if label_n != 0 {
1066 self.array[to_ as usize].base_ = -1;
1067 } else {
1068 self.array[to_ as usize].base_ = 0;
1069 }
1070 }
1071
1072 self.array[to_ as usize].check = from_n as i32;
1073 } else {
1074 self.push_e_node(to_);
1075 }
1076 }
1077
1078 if flag {
1080 base ^ (label_n as i32)
1081 } else {
1082 to_pn
1083 }
1084 }
1085}
1086
1087#[cfg(test)]
1088mod tests {
1089 use super::*;
1090 use rand::distributions::Alphanumeric;
1091 use rand::{thread_rng, Rng};
1092 use std::iter;
1093
1094 #[test]
1095 fn test_insert_and_delete() {
1096 let dict = vec!["a"];
1097 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1098 let mut cedar = Cedar::new();
1099 cedar.build(&key_values);
1100
1101 let result = cedar.exact_match_search("ab").map(|x| x.0);
1102 assert_eq!(None, result);
1103
1104 cedar.update("ab", 1);
1105 let result = cedar.exact_match_search("ab").map(|x| x.0);
1106 assert_eq!(Some(1), result);
1107
1108 cedar.erase("ab");
1109 let result = cedar.exact_match_search("ab").map(|x| x.0);
1110 assert_eq!(None, result);
1111
1112 cedar.update("abc", 2);
1113 let result = cedar.exact_match_search("abc").map(|x| x.0);
1114 assert_eq!(Some(2), result);
1115
1116 cedar.erase("abc");
1117 let result = cedar.exact_match_search("abc").map(|x| x.0);
1118 assert_eq!(None, result);
1119 }
1120
1121 #[test]
1122 fn test_common_prefix_search() {
1123 let dict = vec![
1124 "a",
1125 "ab",
1126 "abc",
1127 "アルゴリズム",
1128 "データ",
1129 "構造",
1130 "网",
1131 "网球",
1132 "网球拍",
1133 "中",
1134 "中华",
1135 "中华人民",
1136 "中华人民共和国",
1137 ];
1138 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1139 let mut cedar = Cedar::new();
1140 cedar.build(&key_values);
1141
1142 let result: Vec<i32> = cedar
1143 .common_prefix_search("abcdefg")
1144 .unwrap()
1145 .iter()
1146 .map(|x| x.0)
1147 .collect();
1148 assert_eq!(vec![0, 1, 2], result);
1149
1150 let result: Vec<i32> = cedar
1151 .common_prefix_search("网球拍卖会")
1152 .unwrap()
1153 .iter()
1154 .map(|x| x.0)
1155 .collect();
1156 assert_eq!(vec![6, 7, 8], result);
1157
1158 let result: Vec<i32> = cedar
1159 .common_prefix_search("中华人民共和国")
1160 .unwrap()
1161 .iter()
1162 .map(|x| x.0)
1163 .collect();
1164 assert_eq!(vec![9, 10, 11, 12], result);
1165
1166 let result: Vec<i32> = cedar
1167 .common_prefix_search("データ構造とアルゴリズム")
1168 .unwrap()
1169 .iter()
1170 .map(|x| x.0)
1171 .collect();
1172 assert_eq!(vec![4], result);
1173 }
1174
1175 #[test]
1176 fn test_common_prefix_iter() {
1177 let dict = vec![
1178 "a",
1179 "ab",
1180 "abc",
1181 "アルゴリズム",
1182 "データ",
1183 "構造",
1184 "网",
1185 "网球",
1186 "网球拍",
1187 "中",
1188 "中华",
1189 "中华人民",
1190 "中华人民共和国",
1191 ];
1192
1193 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1194 let mut cedar = Cedar::new();
1195 cedar.build(&key_values);
1196
1197 let result: Vec<i32> = cedar.common_prefix_iter("abcdefg").map(|x| x.0).collect();
1198 assert_eq!(vec![0, 1, 2], result);
1199
1200 let result: Vec<i32> = cedar.common_prefix_iter("网球拍卖会").map(|x| x.0).collect();
1201 assert_eq!(vec![6, 7, 8], result);
1202
1203 let result: Vec<i32> = cedar.common_prefix_iter("中华人民共和国").map(|x| x.0).collect();
1204 assert_eq!(vec![9, 10, 11, 12], result);
1205
1206 let result: Vec<i32> = cedar
1207 .common_prefix_iter("データ構造とアルゴリズム")
1208 .map(|x| x.0)
1209 .collect();
1210 assert_eq!(vec![4], result);
1211 }
1212
1213 #[test]
1214 fn test_common_prefix_predict() {
1215 let dict = vec!["a", "ab", "abc"];
1216 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1217 let mut cedar = Cedar::new();
1218 cedar.build(&key_values);
1219
1220 let result: Vec<i32> = cedar.common_prefix_predict("a").unwrap().iter().map(|x| x.0).collect();
1221 assert_eq!(vec![0, 1, 2], result);
1222 }
1223
1224 #[test]
1225 fn test_exact_match_search() {
1226 let dict = vec!["a", "ab", "abc"];
1227 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1228 let mut cedar = Cedar::new();
1229 cedar.build(&key_values);
1230
1231 let result = cedar.exact_match_search("abc").map(|x| x.0);
1232 assert_eq!(Some(2), result);
1233 }
1234
1235 #[test]
1236 fn test_unicode_han_sip() {
1237 let dict = vec!["讥䶯䶰", "讥䶯䶰䶱䶲", "讥䶯䶰䶱䶲䶳䶴䶵𦡦"];
1238
1239 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1240 let mut cedar = Cedar::new();
1241 cedar.build(&key_values);
1242
1243 let result: Vec<i32> = cedar.common_prefix_iter("讥䶯䶰䶱䶲䶳䶴䶵𦡦").map(|x| x.0).collect();
1244 assert_eq!(vec![0, 1, 2], result);
1245 }
1246
1247 #[test]
1248 fn test_unicode_grapheme_cluster() {
1249 let dict = vec!["a", "abc", "abcde\u{0301}"];
1250
1251 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1252 let mut cedar = Cedar::new();
1253 cedar.build(&key_values);
1254
1255 let result: Vec<i32> = cedar
1256 .common_prefix_iter("abcde\u{0301}\u{1100}\u{1161}\u{AC00}")
1257 .map(|x| x.0)
1258 .collect();
1259 assert_eq!(vec![0, 1, 2], result);
1260 }
1261
1262 #[test]
1263 fn test_erase() {
1264 let dict = vec!["a", "ab", "abc"];
1265 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1266 let mut cedar = Cedar::new();
1267 cedar.build(&key_values);
1268
1269 cedar.erase("abc");
1270 assert!(cedar.exact_match_search("abc").is_none());
1271 assert!(cedar.exact_match_search("ab").is_some());
1272 assert!(cedar.exact_match_search("a").is_some());
1273
1274 cedar.erase("ab");
1275 assert!(cedar.exact_match_search("ab").is_none());
1276 assert!(cedar.exact_match_search("a").is_some());
1277
1278 cedar.erase("a");
1279 assert!(cedar.exact_match_search("a").is_none());
1280 }
1281
1282 #[test]
1283 fn test_erase_on_internal_key() {
1284 let mut cedar = Cedar::new();
1285
1286 cedar.update("aa", 0);
1287 assert!(cedar.exact_match_search("aa").is_some());
1288 cedar.update("ab", 1);
1289 assert!(cedar.exact_match_search("ab").is_some());
1290
1291 cedar.erase("a");
1292 assert!(cedar.exact_match_search("a").is_none());
1293 cedar.erase("aa");
1294 assert!(cedar.exact_match_search("aa").is_none());
1295 cedar.erase("ab");
1296 assert!(cedar.exact_match_search("ab").is_none());
1297 }
1298
1299 #[test]
1300 fn test_update() {
1301 let dict = vec!["a", "ab", "abc"];
1302 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1303 let mut cedar = Cedar::new();
1304 cedar.build(&key_values);
1305
1306 cedar.update("abcd", 3);
1307
1308 assert!(cedar.exact_match_search("a").is_some());
1309 assert!(cedar.exact_match_search("ab").is_some());
1310 assert!(cedar.exact_match_search("abc").is_some());
1311 assert!(cedar.exact_match_search("abcd").is_some());
1312 assert!(cedar.exact_match_search("abcde").is_none());
1313
1314 let dict = vec!["a", "ab", "abc"];
1315 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1316 let mut cedar = Cedar::new();
1317 cedar.build(&key_values);
1318 cedar.update("bachelor", 1);
1319 cedar.update("jar", 2);
1320 cedar.update("badge", 3);
1321 cedar.update("baby", 4);
1322
1323 assert!(cedar.exact_match_search("bachelor").is_some());
1324 assert!(cedar.exact_match_search("jar").is_some());
1325 assert!(cedar.exact_match_search("badge").is_some());
1326 assert!(cedar.exact_match_search("baby").is_some());
1327 assert!(cedar.exact_match_search("abcde").is_none());
1328
1329 let dict = vec!["a", "ab", "abc"];
1330 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1331 let mut cedar = Cedar::new();
1332 cedar.build(&key_values);
1333 cedar.update("中", 1);
1334 cedar.update("中华", 2);
1335 cedar.update("中华人民", 3);
1336 cedar.update("中华人民共和国", 4);
1337
1338 assert!(cedar.exact_match_search("中").is_some());
1339 assert!(cedar.exact_match_search("中华").is_some());
1340 assert!(cedar.exact_match_search("中华人民").is_some());
1341 assert!(cedar.exact_match_search("中华人民共和国").is_some());
1342 }
1343
1344 #[test]
1345 fn test_quickcheck_like() {
1346 let mut rng = thread_rng();
1347 let mut dict: Vec<String> = Vec::with_capacity(1000);
1348 for _ in 0..1000 {
1349 let chars: Vec<u8> = iter::repeat(()).map(|()| rng.sample(Alphanumeric)).take(30).collect();
1350 let s = String::from_utf8(chars).unwrap();
1351 dict.push(s);
1352 }
1353
1354 let key_values: Vec<(&str, i32)> = dict.iter().enumerate().map(|(k, s)| (s.as_ref(), k as i32)).collect();
1355 let mut cedar = Cedar::new();
1356 cedar.build(&key_values);
1357
1358 for (k, s) in dict.iter().enumerate() {
1359 assert_eq!(cedar.exact_match_search(s).map(|x| x.0), Some(k as i32));
1360 }
1361 }
1362
1363 #[test]
1364 fn test_quickcheck_like_with_deep_trie() {
1365 let mut rng = thread_rng();
1366 let mut dict: Vec<String> = Vec::with_capacity(1000);
1367 let mut s = String::new();
1368 for _ in 0..1000 {
1369 let c: char = rng.sample(Alphanumeric) as char;
1370 s.push(c);
1371 dict.push(s.clone());
1372 }
1373
1374 let key_values: Vec<(&str, i32)> = dict.iter().enumerate().map(|(k, s)| (s.as_ref(), k as i32)).collect();
1375 let mut cedar = Cedar::new();
1376 cedar.build(&key_values);
1377
1378 for (k, s) in dict.iter().enumerate() {
1379 assert_eq!(cedar.exact_match_search(s).map(|x| x.0), Some(k as i32));
1380 }
1381 }
1382
1383 #[test]
1384 fn test_mass_erase() {
1385 let mut rng = thread_rng();
1386 let mut dict: Vec<String> = Vec::with_capacity(1000);
1387 for _ in 0..1000 {
1388 let chars: Vec<u8> = iter::repeat(()).map(|()| rng.sample(Alphanumeric)).take(30).collect();
1389 let s = String::from_utf8(chars).unwrap();
1390
1391 dict.push(s);
1392 }
1393
1394 let key_values: Vec<(&str, i32)> = dict.iter().enumerate().map(|(k, s)| (s.as_ref(), k as i32)).collect();
1395 let mut cedar = Cedar::new();
1396 cedar.build(&key_values);
1397
1398 for s in dict.iter() {
1399 cedar.erase(s);
1400 assert!(cedar.exact_match_search(s).is_none());
1401 }
1402 }
1403
1404 #[test]
1405 fn test_duplication() {
1406 let dict = vec!["些许端", "些須", "些须", "亜", "亝", "亞", "亞", "亞丁", "亞丁港"];
1407 let key_values: Vec<(&str, i32)> = dict.into_iter().enumerate().map(|(k, s)| (s, k as i32)).collect();
1408 let mut cedar = Cedar::new();
1409 cedar.build(&key_values);
1410
1411 assert_eq!(cedar.exact_match_search("亞").map(|t| t.0), Some(6));
1412 assert_eq!(cedar.exact_match_search("亞丁港").map(|t| t.0), Some(8));
1413 assert_eq!(cedar.exact_match_search("亝").map(|t| t.0), Some(4));
1414 assert_eq!(cedar.exact_match_search("些須").map(|t| t.0), Some(1));
1415 }
1416}