1#![deny(missing_docs)]
5
6use bytemuck::cast_slice;
9use derive_getters::Dissolve;
10use std::ops::Range;
11
12pub mod blocks;
13mod radix;
14pub use radix::PositionalRadixTree;
15
16pub trait PositionalHash {
18 fn position(&self) -> u64;
20}
21
22pub type Token = u32;
24
25pub type Salt = Vec<u8>;
28
29pub type SaltHash = u64;
32
33pub type BlockHash = u64;
36
37pub type SequenceHash = u64;
41
42pub fn compute_hash_v2(data: &[u8], seed: u64) -> u64 {
44 xxhash_rust::xxh3::xxh3_64_with_seed(data, seed)
45}
46
47mod serde_bytes_u128 {
53 use serde::{Deserializer, Serializer};
54
55 pub fn serialize<S>(val: &u128, serializer: S) -> Result<S::Ok, S::Error>
56 where
57 S: Serializer,
58 {
59 serializer.serialize_bytes(&val.to_be_bytes())
60 }
61
62 pub fn deserialize<'de, D>(deserializer: D) -> Result<u128, D::Error>
63 where
64 D: Deserializer<'de>,
65 {
66 use serde::de::{self, SeqAccess, Visitor};
67 use std::fmt;
68
69 struct V;
70 impl<'de> Visitor<'de> for V {
71 type Value = [u8; 16];
72
73 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 f.write_str("16 bytes (msgpack bin) or a sequence of 16 u8 values")
75 }
76
77 fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<[u8; 16], E> {
78 v.try_into()
79 .map_err(|_| E::invalid_length(v.len(), &"16 bytes"))
80 }
81
82 fn visit_borrowed_bytes<E: de::Error>(self, v: &'de [u8]) -> Result<[u8; 16], E> {
83 self.visit_bytes(v)
84 }
85
86 fn visit_byte_buf<E: de::Error>(self, v: Vec<u8>) -> Result<[u8; 16], E> {
87 self.visit_bytes(&v)
88 }
89
90 fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<[u8; 16], A::Error> {
91 let mut arr = [0u8; 16];
92 for (i, slot) in arr.iter_mut().enumerate() {
93 *slot = seq
94 .next_element()?
95 .ok_or_else(|| de::Error::invalid_length(i, &"16 u8 elements"))?;
96 }
97 Ok(arr)
98 }
99 }
100
101 let arr = deserializer.deserialize_bytes(V)?;
102 Ok(u128::from_be_bytes(arr))
103 }
104}
105
106#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
118#[serde(transparent)]
119pub struct PositionalSequenceHash(#[serde(with = "serde_bytes_u128")] u128);
120
121impl PositionalSequenceHash {
122 pub fn new(sequence_hash: SequenceHash, position: u64, local_block_hash: BlockHash) -> Self {
127 let mode = Self::select_mode(position);
128 let upper = Self::encode_upper(mode, position, local_block_hash);
129 let value = ((upper as u128) << 64) | (sequence_hash as u128);
130 PositionalSequenceHash(value)
131 }
132
133 pub fn sequence_hash(&self) -> SequenceHash {
135 (self.0 & 0xFFFF_FFFF_FFFF_FFFF) as u64
136 }
137
138 pub fn position(&self) -> u64 {
140 let (_, position, _) = self.decode_upper();
141 position
142 }
143
144 pub fn local_block_hash(&self) -> BlockHash {
146 let (_, _, lbh) = self.decode_upper();
147 lbh
148 }
149
150 pub fn mode(&self) -> u8 {
152 let (mode, _, _) = self.decode_upper();
153 mode
154 }
155
156 #[inline(always)]
158 pub fn as_u128(&self) -> u128 {
159 self.0
160 }
161
162 fn select_mode(position: u64) -> u8 {
164 if position < (1u64 << 8) {
165 0 } else if position < (1u64 << 16) {
167 1 } else if position < (1u64 << 24) {
169 2 } else if position < (1u64 << 31) {
171 3 } else {
173 panic!(
174 "Position {} exceeds maximum supported value (2^31 - 1)",
175 position
176 );
177 }
178 }
179
180 fn encode_upper(mode: u8, position: u64, local_block_hash: u64) -> u64 {
182 let (position_bits, lbh_bits) = match mode {
183 0 => (8, 54), 1 => (16, 46), 2 => (24, 38), 3 => (31, 31), _ => unreachable!(
188 "Invalid mode {} when encoding PositionalSequenceHash; mode must be 0, 1, 2, or 3",
189 mode
190 ),
191 };
192
193 let position_mask = (1u64 << position_bits) - 1;
195 let lbh_mask = (1u64 << lbh_bits) - 1;
196
197 let position_part = position & position_mask;
199 let lbh_part = local_block_hash & lbh_mask;
200
201 ((mode as u64) << 62) | (position_part << lbh_bits) | lbh_part
203 }
204
205 fn decode_upper(&self) -> (u8, u64, u64) {
207 let upper = (self.0 >> 64) as u64;
208
209 let mode = (upper >> 62) as u8;
211
212 let (position_bits, lbh_bits) = match mode {
213 0 => (8, 54),
214 1 => (16, 46),
215 2 => (24, 38),
216 3 => (31, 31),
217 _ => unreachable!(
218 "Invalid mode {} in PositionalSequenceHash - value may be corrupted",
219 mode
220 ),
221 };
222
223 let lbh_mask = (1u64 << lbh_bits) - 1;
225 let position_mask = (1u64 << position_bits) - 1;
226
227 let lbh = upper & lbh_mask;
229 let position = (upper >> lbh_bits) & position_mask;
230
231 (mode, position, lbh)
232 }
233}
234
235impl std::fmt::Debug for PositionalSequenceHash {
236 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237 f.debug_struct("PositionalSequenceHash")
238 .field("sequence_hash", &self.sequence_hash())
239 .field("local_block_hash", &self.local_block_hash())
240 .field("position", &self.position())
241 .finish()
242 }
243}
244
245#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
261#[serde(transparent)]
262pub struct PositionalLineageHash(#[serde(with = "serde_bytes_u128")] u128);
263
264impl PositionalLineageHash {
265 pub fn new(
280 current_seq_hash: SequenceHash,
281 parent_seq_hash: Option<SequenceHash>,
282 position: u64,
283 ) -> Self {
284 if position >= (1u64 << 24) {
285 panic!(
286 "Position {} exceeds maximum supported value (2^24 - 1 = 16,777,215)",
287 position
288 );
289 }
290
291 let mode = Self::select_mode(position);
292 let (position_bits, parent_bits, current_bits) = Self::bit_layout(mode);
293
294 let next_mode = Self::select_mode(position + 1);
298 let (_, next_parent_bits, _) = Self::bit_layout(next_mode);
299
300 let aligned_current_bits = current_bits.min(next_parent_bits);
302
303 let position_mask = (1u128 << position_bits) - 1;
305 let parent_mask = (1u128 << parent_bits) - 1;
306 let current_mask = (1u128 << aligned_current_bits) - 1;
307
308 let position_part = (position as u128) & position_mask;
310 let parent_part = (parent_seq_hash.unwrap_or(0) as u128) & parent_mask;
311 let current_part = (current_seq_hash as u128) & current_mask;
312
313 let value = ((mode as u128) << 126)
316 | (position_part << (parent_bits + current_bits))
317 | (parent_part << current_bits)
318 | current_part;
319
320 PositionalLineageHash(value)
321 }
322
323 pub fn position(&self) -> u64 {
325 let mode = self.mode();
326 let (position_bits, parent_bits, current_bits) = Self::bit_layout(mode);
327 let position_mask = (1u128 << position_bits) - 1;
328 ((self.0 >> (parent_bits + current_bits)) & position_mask) as u64
329 }
330
331 pub fn current_hash_fragment(&self) -> u64 {
333 let mode = self.mode();
334 let (_, _, current_bits) = Self::bit_layout(mode);
335 let current_mask = (1u128 << current_bits) - 1;
336 (self.0 & current_mask) as u64
337 }
338
339 pub fn parent_hash_fragment(&self) -> u64 {
341 let mode = self.mode();
342 let (_, parent_bits, current_bits) = Self::bit_layout(mode);
343 let parent_mask = (1u128 << parent_bits) - 1;
344 ((self.0 >> current_bits) & parent_mask) as u64
345 }
346
347 pub fn mode(&self) -> u8 {
349 (self.0 >> 126) as u8
350 }
351
352 #[inline(always)]
354 pub fn as_u128(&self) -> u128 {
355 self.0
356 }
357
358 fn select_mode(position: u64) -> u8 {
360 if position < (1u64 << 8) {
361 0 } else if position < (1u64 << 16) {
363 1 } else {
365 2 }
367 }
368
369 fn bit_layout(mode: u8) -> (u32, u32, u32) {
371 match mode {
372 0 => (8, 59, 59), 1 => (16, 55, 55), 2 => (24, 51, 51), _ => unreachable!(
376 "Invalid mode {} in PositionalLineageHash; mode must be 0, 1, or 2",
377 mode
378 ),
379 }
380 }
381}
382
383impl PositionalLineageHash {
384 fn format_impl(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385 let position = self.position();
386 let current_hash = self.current_hash_fragment();
387 let current_hash_b58 = bs58::encode(current_hash.to_be_bytes()).into_string();
388
389 if position == 0 {
390 write!(f, "{}:{}", position, current_hash_b58)
391 } else {
392 let parent_hash = self.parent_hash_fragment();
393 let parent_hash_b58 = bs58::encode(parent_hash.to_be_bytes()).into_string();
394 write!(f, "{}:{}:{}", position, current_hash_b58, parent_hash_b58)
395 }
396 }
397}
398
399impl std::fmt::Debug for PositionalLineageHash {
400 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401 self.format_impl(f)
402 }
403}
404
405impl std::fmt::Display for PositionalLineageHash {
406 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407 self.format_impl(f)
408 }
409}
410
411impl std::cmp::PartialOrd for PositionalLineageHash {
412 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
413 Some(self.cmp(other))
414 }
415}
416
417impl std::cmp::Ord for PositionalLineageHash {
418 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
421 self.position()
422 .cmp(&other.position())
423 .then_with(|| {
424 self.current_hash_fragment()
425 .cmp(&other.current_hash_fragment())
426 })
427 .then_with(|| self.0.cmp(&other.0))
428 }
429}
430
431#[derive(Debug, Clone, Dissolve, Default, Eq)]
435pub struct Tokens(Vec<Token>);
436
437impl AsRef<[Token]> for Tokens {
438 fn as_ref(&self) -> &[Token] {
439 &self.0
440 }
441}
442
443impl std::ops::Deref for Tokens {
444 type Target = [Token];
445
446 fn deref(&self) -> &Self::Target {
447 &self.0
448 }
449}
450
451impl std::borrow::Borrow<[Token]> for Tokens {
452 fn borrow(&self) -> &[Token] {
453 &self.0
454 }
455}
456
457impl From<Vec<Token>> for Tokens {
458 fn from(tokens: Vec<Token>) -> Self {
459 Tokens(tokens)
460 }
461}
462
463impl From<&[Token]> for Tokens {
464 fn from(tokens: &[Token]) -> Self {
465 Tokens(tokens.to_vec())
466 }
467}
468
469impl From<Vec<usize>> for Tokens {
470 fn from(tokens: Vec<usize>) -> Self {
471 Tokens(
472 tokens
473 .into_iter()
474 .map(|t| t.try_into().expect("Token ID exceeds u32::MAX"))
475 .collect(),
476 )
477 }
478}
479
480impl From<Vec<i32>> for Tokens {
481 fn from(tokens: Vec<i32>) -> Self {
483 Tokens(tokens.into_iter().map(|t| t as u32).collect())
484 }
485}
486
487impl From<&[i32]> for Tokens {
488 fn from(tokens: &[i32]) -> Self {
490 Tokens(tokens.iter().map(|&t| t as u32).collect())
491 }
492}
493
494impl From<Tokens> for Vec<Token> {
495 fn from(tokens: Tokens) -> Self {
496 tokens.0
497 }
498}
499
500impl PartialEq<Vec<Token>> for Tokens {
503 fn eq(&self, other: &Vec<Token>) -> bool {
504 self.0 == *other
505 }
506}
507
508impl PartialEq<Tokens> for Vec<Token> {
509 fn eq(&self, other: &Tokens) -> bool {
510 *self == other.0
511 }
512}
513
514impl PartialEq<[Token]> for Tokens {
515 fn eq(&self, other: &[Token]) -> bool {
516 self.0.as_slice() == other
517 }
518}
519
520impl PartialEq<Tokens> for &[Token] {
521 fn eq(&self, other: &Tokens) -> bool {
522 *self == other.0.as_slice()
523 }
524}
525
526impl PartialEq for Tokens {
527 fn eq(&self, other: &Self) -> bool {
528 self.0 == other.0
529 }
530}
531
532impl PartialEq<&[Token]> for Tokens {
535 fn eq(&self, other: &&[Token]) -> bool {
536 self.0.as_slice() == *other
537 }
538}
539
540impl Tokens {
541 fn with_capacity(capacity: usize) -> Self {
542 Tokens(Vec::with_capacity(capacity))
543 }
544
545 pub fn into_sequence(self, block_size: u32, salt_hash: Option<SaltHash>) -> TokenBlockSequence {
555 TokenBlockSequence::new(self, block_size, salt_hash)
556 }
557}
558
559#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
561pub enum TokenBlockError {
562 #[error("TokenBlock is full")]
564 Full,
565
566 #[error("TokenBlock is incomplete")]
568 Incomplete,
569
570 #[error("TokenBlock is empty")]
572 Empty,
573
574 #[error("TokenBlock has insufficient tokens")]
576 InsufficientTokens,
577}
578
579#[derive(Debug, PartialEq)] pub struct PartialTokenBlock {
585 tokens: Tokens,
586 block_size: u32,
587 salt_hash: SaltHash,
588 parent_sequence_hash: Option<SequenceHash>,
589 position: usize, }
591
592impl PartialTokenBlock {
593 pub(crate) fn create_sequence_root(block_size: u32, salt_hash: SaltHash) -> Self {
600 Self {
601 tokens: Tokens::with_capacity(block_size as usize),
602 block_size,
603 salt_hash,
604 parent_sequence_hash: None, position: 0, }
607 }
608
609 pub(crate) fn push_tokens(&mut self, tokens: Tokens) -> Tokens {
622 let remaining_space = self.remaining();
623
624 if remaining_space == 0 {
625 return tokens; }
627
628 if tokens.0.len() <= remaining_space {
629 self.tokens.0.extend(tokens.0);
631 Tokens::default() } else {
633 let (to_add, remaining) = tokens.0.split_at(remaining_space);
635 self.tokens.0.extend_from_slice(to_add);
636 Tokens(remaining.to_vec()) }
638 }
639
640 pub(crate) fn push_token(&mut self, token: Token) -> Result<(), TokenBlockError> {
647 if self.tokens.0.len() >= self.block_size as usize {
648 return Err(TokenBlockError::Full);
649 }
650 self.tokens.0.push(token);
651 Ok(())
652 }
653
654 pub(crate) fn pop_tokens(&mut self, count: usize) -> Result<(), TokenBlockError> {
665 if self.tokens.0.len() < count {
666 return Err(TokenBlockError::InsufficientTokens);
667 }
668 self.tokens.0.truncate(self.tokens.0.len() - count);
669 Ok(())
670 }
671
672 pub fn commit(&mut self) -> Result<TokenBlock, TokenBlockError> {
684 if self.tokens.0.len() != self.block_size as usize {
685 return Err(TokenBlockError::Incomplete);
687 }
688
689 let tokens = std::mem::replace(
691 &mut self.tokens,
692 Tokens::with_capacity(self.block_size as usize),
693 );
694
695 let chunk = TokenBlockChunk::new(tokens, self.salt_hash);
696 let block = TokenBlock::from_chunk(chunk, self.parent_sequence_hash, self.position);
697
698 self.parent_sequence_hash = Some(block.sequence_hash());
700 self.position += 1; Ok(block)
704 }
705
706 pub fn remaining(&self) -> usize {
708 (self.block_size as usize).saturating_sub(self.tokens.0.len())
710 }
711
712 pub fn len(&self) -> usize {
714 self.tokens.0.len()
715 }
716
717 pub fn is_empty(&self) -> bool {
719 self.tokens.0.is_empty()
720 }
721
722 pub fn tokens(&self) -> &Tokens {
724 &self.tokens
725 }
726}
727
728impl std::ops::Deref for PartialTokenBlock {
730 type Target = Tokens;
731
732 fn deref(&self) -> &Self::Target {
733 &self.tokens
734 }
735}
736
737#[derive(Debug)] struct TokenBlockChunk {
743 tokens: Tokens,
744 salt_hash: SaltHash,
745 block_hash: BlockHash,
746}
747
748impl TokenBlockChunk {
749 fn new(tokens: Tokens, salt_hash: SaltHash) -> Self {
751 let block_hash = compute_hash_v2(cast_slice(&tokens), salt_hash);
752 Self {
753 tokens,
754 salt_hash,
755 block_hash,
756 }
757 }
758
759 fn from_tokens(tokens: &[Token], salt_hash: SaltHash) -> Self {
761 let block_hash = compute_hash_v2(cast_slice(tokens), salt_hash);
762 Self {
763 tokens: tokens.into(), salt_hash,
765 block_hash,
766 }
767 }
768}
769
770#[derive(Debug, Clone, Default, PartialEq)] pub struct TokenBlock {
776 tokens: Tokens,
777 salt_hash: SaltHash,
778 block_hash: BlockHash,
779 sequence_hash: SequenceHash,
780 parent_sequence_hash: Option<SequenceHash>,
781 positional_sequence_hash: PositionalSequenceHash,
782 positional_lineage_hash: PositionalLineageHash,
783}
784
785impl TokenBlock {
786 pub fn next_block(&self) -> PartialTokenBlock {
790 PartialTokenBlock {
791 tokens: Tokens::with_capacity(self.tokens.len()),
792 block_size: self.tokens.len() as u32, salt_hash: self.salt_hash,
794 parent_sequence_hash: Some(self.sequence_hash), position: self.position() as usize + 1, }
797 }
798
799 fn from_chunk(
803 chunk: TokenBlockChunk,
804 parent_sequence_hash: Option<SequenceHash>,
805 position: usize,
806 ) -> Self {
807 let sequence_hash = match parent_sequence_hash {
808 Some(parent) => {
809 compute_hash_v2(cast_slice(&[parent, chunk.block_hash]), chunk.salt_hash)
811 }
812 None => {
813 chunk.block_hash
815 }
816 };
817
818 let positional_sequence_hash = PositionalSequenceHash::new(
819 sequence_hash,
820 position as u64,
821 chunk.block_hash, );
823
824 let positional_lineage_hash =
825 PositionalLineageHash::new(sequence_hash, parent_sequence_hash, position as u64);
826
827 Self {
828 tokens: chunk.tokens,
829 salt_hash: chunk.salt_hash,
830 block_hash: chunk.block_hash,
831 sequence_hash,
832 parent_sequence_hash,
833 positional_sequence_hash,
834 positional_lineage_hash,
835 }
836 }
837
838 pub fn tokens(&self) -> &Tokens {
840 &self.tokens
841 }
842
843 pub fn salt_hash(&self) -> SaltHash {
845 self.salt_hash
846 }
847
848 pub fn block_hash(&self) -> BlockHash {
850 self.block_hash
851 }
852
853 pub fn sequence_hash(&self) -> SequenceHash {
855 self.sequence_hash
856 }
857
858 pub fn parent_sequence_hash(&self) -> Option<SequenceHash> {
860 self.parent_sequence_hash
861 }
862
863 pub fn block_size(&self) -> usize {
865 self.tokens.0.len()
866 }
867
868 pub fn positional_sequence_hash(&self) -> PositionalSequenceHash {
870 self.positional_sequence_hash
871 }
872
873 pub fn positional_lineage_hash(&self) -> PositionalLineageHash {
875 self.positional_lineage_hash
876 }
877
878 pub fn position(&self) -> u64 {
880 self.positional_sequence_hash.position()
881 }
882}
883
884impl PositionalHash for PositionalSequenceHash {
885 fn position(&self) -> u64 {
886 self.position()
887 }
888}
889
890impl PositionalHash for PositionalLineageHash {
891 fn position(&self) -> u64 {
892 self.position()
893 }
894}
895
896#[derive(Debug, PartialEq)]
911pub struct TokenBlockSequence {
912 blocks: Vec<TokenBlock>,
913 current_block: PartialTokenBlock,
914 salt_hash: SaltHash,
915 block_size: usize,
916}
917
918impl TokenBlockSequence {
919 pub fn new(tokens: Tokens, block_size: u32, salt_hash: Option<SaltHash>) -> Self {
934 assert!(block_size > 0, "block_size must be greater than 0");
935 let salt_hash = salt_hash.unwrap_or(0);
936 let (blocks, current_block) = Self::split_tokens(&tokens, block_size, salt_hash);
937
938 Self {
939 blocks,
940 current_block,
941 salt_hash,
942 block_size: block_size as usize,
943 }
944 }
945
946 pub fn extend(&mut self, tokens: Tokens) -> Result<Option<Range<usize>>, TokenBlockError> {
963 let start_block_index = self.blocks.len();
964 let mut tokens_to_append = tokens;
965
966 while !tokens_to_append.is_empty() {
967 let remaining_in_current = self.current_block.remaining();
968
969 if remaining_in_current == 0 {
970 let new_block = self.current_block.commit()?;
972 self.blocks.push(new_block);
973 }
975
976 let available_tokens = tokens_to_append;
978 tokens_to_append = self.current_block.push_tokens(available_tokens);
979
980 if self.current_block.remaining() == 0 {
982 let new_block = self.current_block.commit()?;
985 self.blocks.push(new_block);
986 }
987 }
988
989 let end_block_index = self.blocks.len();
990 if start_block_index == end_block_index {
991 Ok(None) } else {
993 Ok(Some(start_block_index..end_block_index))
994 }
995 }
996
997 pub fn append(&mut self, token: Token) -> Result<Option<usize>, TokenBlockError> {
1014 if self.current_block.remaining() == 0 {
1015 let new_block = self.current_block.commit()?;
1016 self.blocks.push(new_block);
1017 }
1018
1019 self.current_block.push_token(token)?;
1020 if self.current_block.remaining() != 0 {
1021 return Ok(None);
1022 }
1023
1024 let completed_idx = self.blocks.len();
1025 let new_block = self.current_block.commit()?;
1026 self.blocks.push(new_block);
1027 Ok(Some(completed_idx))
1028 }
1029
1030 pub fn truncate(&mut self, len: usize) -> Result<(), TokenBlockError> {
1049 let current_total_len = self.total_tokens();
1050 if len >= current_total_len {
1051 return Ok(()); }
1053
1054 let n = current_total_len - len; {
1058 let current_len = self.current_block.len();
1059 let block_size = self.current_block.block_size.max(1);
1061
1062 if n <= current_len {
1063 self.current_block.pop_tokens(n)?;
1065 } else {
1066 let tokens_to_pop_from_blocks = n - current_len;
1068
1069 let num_blocks_to_affect = tokens_to_pop_from_blocks.div_ceil(block_size as usize);
1071
1072 if num_blocks_to_affect > self.blocks.len() {
1074 debug_assert!(
1076 false,
1077 "Truncate calculation error: trying to pop too many blocks."
1078 );
1079 return Err(TokenBlockError::InsufficientTokens);
1080 }
1081
1082 let source_block_index = self.blocks.len() - num_blocks_to_affect;
1084
1085 let num_full_blocks_completely_popped = num_blocks_to_affect - 1;
1087 let num_tokens_to_pop_from_source_block = tokens_to_pop_from_blocks
1088 - num_full_blocks_completely_popped * block_size as usize;
1089 let num_tokens_to_keep_in_new_partial =
1090 (block_size as usize).saturating_sub(num_tokens_to_pop_from_source_block);
1091
1092 let new_partial_tokens = if num_tokens_to_keep_in_new_partial > 0 {
1094 self.blocks[source_block_index].tokens().as_ref()
1095 [..num_tokens_to_keep_in_new_partial]
1096 .to_vec()
1097 } else {
1098 Vec::new()
1099 };
1100
1101 self.blocks.truncate(source_block_index);
1103
1104 self.current_block.tokens = Tokens(new_partial_tokens);
1106 self.current_block.parent_sequence_hash =
1108 self.blocks.last().map(|b| b.sequence_hash());
1109 self.current_block.position = self.blocks.len();
1111 }
1113 }
1114 Ok(())
1115 }
1116
1117 pub fn unwind(&mut self, count: usize) -> Result<(), TokenBlockError> {
1131 let current_total_len = self.total_tokens();
1132 if count > current_total_len {
1133 return Err(TokenBlockError::InsufficientTokens);
1135 }
1136
1137 let len = current_total_len - count;
1139 self.truncate(len)
1140 }
1141
1142 pub fn reset(&mut self) {
1144 self.blocks.clear();
1145 self.current_block =
1146 PartialTokenBlock::create_sequence_root(self.block_size as u32, self.salt_hash);
1147 }
1148
1149 pub fn pop(&mut self) -> Option<Token> {
1158 let current_total_len = self.total_tokens();
1159 if current_total_len == 0 {
1160 return None;
1161 }
1162
1163 let last_token = if !self.current_block.tokens.is_empty() {
1166 *self
1168 .current_block
1169 .tokens
1170 .last()
1171 .expect("Current block checked for non-empty")
1172 } else {
1173 let last_block = self
1175 .blocks
1176 .last()
1177 .expect("Sequence is not empty but has no blocks and empty current block?");
1178 *last_block
1179 .tokens()
1180 .last()
1181 .expect("Last block cannot be empty")
1182 };
1183
1184 match self.truncate(current_total_len - 1) {
1187 Ok(_) => Some(last_token),
1188 Err(_) => {
1189 debug_assert!(
1192 false,
1193 "truncate failed unexpectedly after checking length in pop"
1194 );
1195 None
1196 }
1197 }
1198 }
1199
1200 pub fn blocks(&self) -> &[TokenBlock] {
1202 &self.blocks
1203 }
1204
1205 pub fn last_complete_block(&self) -> Option<&TokenBlock> {
1207 self.blocks.last()
1208 }
1209
1210 pub fn current_block(&self) -> &PartialTokenBlock {
1212 &self.current_block
1213 }
1214
1215 pub fn into_parts(self) -> (Vec<TokenBlock>, PartialTokenBlock) {
1217 (self.blocks, self.current_block)
1218 }
1219
1220 pub fn block_size(&self) -> usize {
1222 self.block_size
1223 }
1224
1225 pub fn salt_hash(&self) -> SaltHash {
1227 self.salt_hash
1228 }
1229
1230 pub fn total_tokens(&self) -> usize {
1233 let block_size = self.current_block.block_size as usize;
1234 (self.blocks.len() * block_size) + self.current_block.len()
1235 }
1236
1237 pub fn tokens_at(&self, range: Range<usize>) -> Tokens {
1239 let total = self.total_tokens();
1240
1241 if range.start > range.end || range.end > total {
1243 return Tokens::default();
1244 }
1245
1246 if range.is_empty() {
1248 return Tokens::default();
1249 }
1250
1251 let mut result = Vec::with_capacity(range.len());
1252
1253 for i in range {
1254 if i < self.blocks.len() * self.block_size {
1255 let block_index = i / self.block_size;
1257 let token_index = i % self.block_size;
1258 result.push(self.blocks[block_index].tokens()[token_index]);
1259 } else {
1260 let current_block_index = i - (self.blocks.len() * self.block_size);
1262 result.push(self.current_block.tokens()[current_block_index]);
1263 }
1264 }
1265
1266 Tokens::from(result)
1267 }
1268
1269 pub fn split_tokens(
1287 tokens: &[Token],
1288 block_size: u32,
1289 salt_hash: u64,
1290 ) -> (Vec<TokenBlock>, PartialTokenBlock) {
1291 assert!(block_size > 0, "block_size must be greater than 0");
1292 let chunks: Vec<TokenBlockChunk> = tokens
1293 .as_ref()
1294 .chunks_exact(block_size as usize)
1295 .map(|chunk| TokenBlockChunk::from_tokens(chunk, salt_hash))
1296 .collect();
1297
1298 let mut result_blocks = Vec::with_capacity(chunks.len());
1299 let mut last_sequence_hash: Option<SequenceHash> = None;
1300
1301 for (position, chunk) in chunks.into_iter().enumerate() {
1303 let new_block = TokenBlock::from_chunk(chunk, last_sequence_hash, position);
1304 last_sequence_hash = Some(new_block.sequence_hash());
1305 result_blocks.push(new_block);
1306 }
1307
1308 let remainder = tokens
1310 .as_ref()
1311 .chunks_exact(block_size as usize)
1312 .remainder();
1313
1314 let next_position = result_blocks.len(); let mut partial_tokens = Tokens::with_capacity(block_size as usize);
1317 partial_tokens.0.extend_from_slice(remainder);
1318
1319 let current_block = PartialTokenBlock {
1320 tokens: partial_tokens,
1321 block_size,
1322 salt_hash,
1323 parent_sequence_hash: last_sequence_hash,
1325 position: next_position,
1326 };
1327
1328 (result_blocks, current_block)
1329 }
1330
1331 pub fn from_slice(tokens: &[Token], block_size: u32, salt_hash: Option<SaltHash>) -> Self {
1342 assert!(block_size > 0, "block_size must be greater than 0");
1343 let salt_hash = salt_hash.unwrap_or(0);
1344 let (blocks, current_block) = Self::split_tokens(tokens, block_size, salt_hash);
1345
1346 Self {
1347 blocks,
1348 current_block,
1349 salt_hash,
1350 block_size: block_size as usize,
1351 }
1352 }
1353}
1354
1355#[cfg(test)]
1356mod tests {
1357 use super::*;
1358 use bytemuck::cast_slice;
1359
1360 fn create_test_sequence(
1362 initial_tokens: &[Token],
1363 block_size: u32,
1364 salt_hash: Option<SaltHash>,
1365 ) -> TokenBlockSequence {
1366 TokenBlockSequence::new(Tokens::from(initial_tokens), block_size, salt_hash)
1367 }
1368
1369 const TEST_SALT_HASH: SaltHash = 1337;
1371 const HASH_1_4: BlockHash = 14643705804678351452; const SEQ_HASH_1_4: SequenceHash = HASH_1_4;
1373 const HASH_5_8: BlockHash = 16777012769546811212; const SEQ_HASH_5_8: SequenceHash = 4945711292740353085; const HASH_9_12: BlockHash = 483935686894639516; const SEQ_HASH_9_12: SequenceHash = 12583592247330656132; impl PartialTokenBlock {
1379 pub fn pop_token(&mut self) -> Result<(), TokenBlockError> {
1386 if self.tokens.0.is_empty() {
1387 return Err(TokenBlockError::Empty);
1388 }
1389 self.tokens.0.pop();
1390 Ok(())
1391 }
1392 }
1393
1394 #[test]
1395 fn test_validate_hash_constants() {
1396 let salt = TEST_SALT_HASH;
1397
1398 let tokens_1_4 = &[1u32, 2, 3, 4];
1400 let computed_hash_1_4 = compute_hash_v2(cast_slice(tokens_1_4), salt);
1401 assert_eq!(computed_hash_1_4, HASH_1_4, "Mismatch for HASH_1_4");
1402 assert_eq!(computed_hash_1_4, SEQ_HASH_1_4, "Mismatch for SEQ_HASH_1_4");
1404
1405 let tokens_5_8 = &[5u32, 6, 7, 8];
1407 let computed_hash_5_8 = compute_hash_v2(cast_slice(tokens_5_8), salt);
1408 assert_eq!(computed_hash_5_8, HASH_5_8, "Mismatch for HASH_5_8");
1409 let computed_seq_hash_5_8 = compute_hash_v2(cast_slice(&[SEQ_HASH_1_4, HASH_5_8]), salt);
1410 assert_eq!(
1411 computed_seq_hash_5_8, SEQ_HASH_5_8,
1412 "Mismatch for SEQ_HASH_5_8"
1413 );
1414
1415 let tokens_9_12 = &[9u32, 10, 11, 12];
1417 let computed_hash_9_12 = compute_hash_v2(cast_slice(tokens_9_12), salt);
1418 assert_eq!(computed_hash_9_12, HASH_9_12, "Mismatch for HASH_9_12");
1419 let computed_seq_hash_9_12 = compute_hash_v2(cast_slice(&[SEQ_HASH_5_8, HASH_9_12]), salt);
1420 assert_eq!(
1421 computed_seq_hash_9_12, SEQ_HASH_9_12,
1422 "Mismatch for SEQ_HASH_9_12"
1423 );
1424 }
1425
1426 #[test]
1427 fn test_positional_sequence_hash_encoding_decoding() {
1428 let seq_hash_0 = 0x1234567890ABCDEF;
1430 let position_0 = 100;
1431 let lbh_0 = 0xFEDCBA9876543210;
1432 let psh_0 = PositionalSequenceHash::new(seq_hash_0, position_0, lbh_0);
1433
1434 assert_eq!(psh_0.mode(), 0, "Position 100 should use mode 0");
1435 assert_eq!(psh_0.sequence_hash(), seq_hash_0);
1436 assert_eq!(psh_0.position(), position_0);
1437 assert_eq!(
1439 psh_0.local_block_hash(),
1440 lbh_0 & ((1u64 << 54) - 1),
1441 "LBH should be truncated to 54 bits"
1442 );
1443
1444 let position_1 = 1000;
1446 let psh_1 = PositionalSequenceHash::new(seq_hash_0, position_1, lbh_0);
1447
1448 assert_eq!(psh_1.mode(), 1, "Position 1000 should use mode 1");
1449 assert_eq!(psh_1.sequence_hash(), seq_hash_0);
1450 assert_eq!(psh_1.position(), position_1);
1451 assert_eq!(
1453 psh_1.local_block_hash(),
1454 lbh_0 & ((1u64 << 46) - 1),
1455 "LBH should be truncated to 46 bits"
1456 );
1457
1458 let position_2 = 100_000;
1460 let psh_2 = PositionalSequenceHash::new(seq_hash_0, position_2, lbh_0);
1461
1462 assert_eq!(psh_2.mode(), 2, "Position 100,000 should use mode 2");
1463 assert_eq!(psh_2.sequence_hash(), seq_hash_0);
1464 assert_eq!(psh_2.position(), position_2);
1465 assert_eq!(
1467 psh_2.local_block_hash(),
1468 lbh_0 & ((1u64 << 38) - 1),
1469 "LBH should be truncated to 38 bits"
1470 );
1471
1472 let position_3 = 20_000_000;
1474 let psh_3 = PositionalSequenceHash::new(seq_hash_0, position_3, lbh_0);
1475
1476 assert_eq!(psh_3.mode(), 3, "Position 20,000,000 should use mode 3");
1477 assert_eq!(psh_3.sequence_hash(), seq_hash_0);
1478 assert_eq!(psh_3.position(), position_3);
1479 assert_eq!(
1481 psh_3.local_block_hash(),
1482 lbh_0 & ((1u64 << 31) - 1),
1483 "LBH should be truncated to 31 bits"
1484 );
1485
1486 let position_255 = 255;
1488 let psh_255 = PositionalSequenceHash::new(seq_hash_0, position_255, lbh_0);
1489 assert_eq!(psh_255.mode(), 0, "Position 255 should use mode 0");
1490 assert_eq!(psh_255.position(), position_255);
1491
1492 let position_256 = 256;
1493 let psh_256 = PositionalSequenceHash::new(seq_hash_0, position_256, lbh_0);
1494 assert_eq!(psh_256.mode(), 1, "Position 256 should use mode 1");
1495 assert_eq!(psh_256.position(), position_256);
1496 }
1497
1498 #[test]
1499 fn test_positional_lineage_hash() {
1500 let current_hash_0 = 0x1234567890ABCDEF;
1502 let parent_hash_0 = 0xFEDCBA9876543210;
1503 let position_0 = 100;
1504 let plh_0 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_0);
1505
1506 assert_eq!(plh_0.mode(), 0, "Position 100 should use mode 0");
1507 assert_eq!(plh_0.position(), position_0);
1508 assert_eq!(
1510 plh_0.current_hash_fragment(),
1511 current_hash_0 & ((1u64 << 59) - 1),
1512 "Current hash should be truncated to 59 bits"
1513 );
1514 assert_eq!(
1515 plh_0.parent_hash_fragment(),
1516 parent_hash_0 & ((1u64 << 59) - 1),
1517 "Parent hash should be truncated to 59 bits"
1518 );
1519
1520 let position_1 = 1000;
1522 let plh_1 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_1);
1523
1524 assert_eq!(plh_1.mode(), 1, "Position 1000 should use mode 1");
1525 assert_eq!(plh_1.position(), position_1);
1526 assert_eq!(
1528 plh_1.current_hash_fragment(),
1529 current_hash_0 & ((1u64 << 55) - 1),
1530 "Current hash should be truncated to 55 bits"
1531 );
1532 assert_eq!(
1533 plh_1.parent_hash_fragment(),
1534 parent_hash_0 & ((1u64 << 55) - 1),
1535 "Parent hash should be truncated to 55 bits"
1536 );
1537
1538 let position_2 = 100_000;
1540 let plh_2 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_2);
1541
1542 assert_eq!(plh_2.mode(), 2, "Position 100,000 should use mode 2");
1543 assert_eq!(plh_2.position(), position_2);
1544 assert_eq!(
1546 plh_2.current_hash_fragment(),
1547 current_hash_0 & ((1u64 << 51) - 1),
1548 "Current hash should be truncated to 51 bits"
1549 );
1550 assert_eq!(
1551 plh_2.parent_hash_fragment(),
1552 parent_hash_0 & ((1u64 << 51) - 1),
1553 "Parent hash should be truncated to 51 bits"
1554 );
1555
1556 let position_255 = 255;
1558 let plh_255 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_255);
1559 assert_eq!(plh_255.mode(), 0, "Position 255 should use mode 0");
1560 assert_eq!(plh_255.position(), position_255);
1561
1562 let position_256 = 256;
1563 let plh_256 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_256);
1564 assert_eq!(plh_256.mode(), 1, "Position 256 should use mode 1");
1565 assert_eq!(plh_256.position(), position_256);
1566
1567 let position_65535 = 65535;
1568 let plh_65535 =
1569 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65535);
1570 assert_eq!(plh_65535.mode(), 1, "Position 65535 should use mode 1");
1571 assert_eq!(plh_65535.position(), position_65535);
1572
1573 let position_65536 = 65536;
1574 let plh_65536 =
1575 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65536);
1576 assert_eq!(plh_65536.mode(), 2, "Position 65536 should use mode 2");
1577 assert_eq!(plh_65536.position(), position_65536);
1578
1579 let plh_root = PositionalLineageHash::new(current_hash_0, None, 0);
1581 assert_eq!(plh_root.mode(), 0);
1582 assert_eq!(plh_root.position(), 0);
1583 assert_eq!(
1584 plh_root.parent_hash_fragment(),
1585 0,
1586 "Root should have zero parent hash"
1587 );
1588 assert_eq!(
1589 plh_root.current_hash_fragment(),
1590 current_hash_0 & ((1u64 << 59) - 1)
1591 );
1592
1593 let position_small = 100; let position_large = 1000; let plh_small =
1597 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_small);
1598 let plh_large =
1599 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_large);
1600
1601 let mask_55 = (1u64 << 55) - 1;
1603 assert_eq!(
1604 plh_large.current_hash_fragment(),
1605 plh_small.current_hash_fragment() & mask_55,
1606 "LSB alignment: mode 1 fragment should be subset of mode 0 fragment"
1607 );
1608 }
1609
1610 #[test]
1611 #[should_panic(expected = "Position 16777216 exceeds maximum supported value")]
1612 fn test_positional_lineage_hash_panic_on_large_position() {
1613 let current_hash = 0x1234567890ABCDEF;
1614 let parent_hash = 0xFEDCBA9876543210;
1615 let position = 1u64 << 24; let _ = PositionalLineageHash::new(current_hash, Some(parent_hash), position);
1617 }
1618
1619 #[test]
1620 fn test_positional_lineage_hash_mode_boundary_alignment() {
1621 let parent_hash = 0xFEDCBA9876543210;
1625 let current_hash_255 = 0x1234567890ABCDEF;
1626 let current_hash_256 = 0xABCDEF0123456789;
1627
1628 let plh_255 = PositionalLineageHash::new(current_hash_255, Some(parent_hash), 255);
1630 assert_eq!(plh_255.mode(), 0);
1631
1632 let plh_256 = PositionalLineageHash::new(current_hash_256, Some(current_hash_255), 256);
1635 assert_eq!(plh_256.mode(), 1);
1636
1637 let mask_55 = (1u64 << 55) - 1;
1641 assert_eq!(
1642 plh_256.parent_hash_fragment(),
1643 plh_255.current_hash_fragment() & mask_55,
1644 "Mode boundary: position 256's parent fragment should match position 255's current fragment (55 bits)"
1645 );
1646
1647 assert_eq!(
1650 plh_255.current_hash_fragment(),
1651 current_hash_255 & mask_55,
1652 "Position 255 should pre-truncate current hash to 55 bits for next mode compatibility"
1653 );
1654
1655 let current_hash_65535 = 0x1111222233334444;
1657 let current_hash_65536 = 0x5555666677778888;
1658
1659 let plh_65535 = PositionalLineageHash::new(current_hash_65535, Some(parent_hash), 65535);
1660 assert_eq!(plh_65535.mode(), 1);
1661
1662 let plh_65536 =
1663 PositionalLineageHash::new(current_hash_65536, Some(current_hash_65535), 65536);
1664 assert_eq!(plh_65536.mode(), 2);
1665
1666 let mask_51 = (1u64 << 51) - 1;
1668 assert_eq!(
1669 plh_65536.parent_hash_fragment(),
1670 plh_65535.current_hash_fragment() & mask_51,
1671 "Mode boundary: position 65536's parent fragment should match position 65535's current fragment (51 bits)"
1672 );
1673
1674 assert_eq!(
1675 plh_65535.current_hash_fragment(),
1676 current_hash_65535 & mask_51,
1677 "Position 65535 should pre-truncate current hash to 51 bits for next mode compatibility"
1678 );
1679 }
1680
1681 #[test]
1682 fn test_tokens_from() {
1683 let vec_u32: Vec<u32> = vec![1, 2, 3];
1684 let tokens_u32: Tokens = vec_u32.clone().into();
1685 assert_eq!(tokens_u32.0, vec_u32);
1686
1687 let slice_u32: &[u32] = &[4, 5];
1688 let tokens_slice_u32: Tokens = slice_u32.into();
1689 assert_eq!(tokens_slice_u32.0, vec![4, 5]);
1690
1691 let vec_i32: Vec<i32> = vec![-1, 0, 1]; let tokens_i32: Tokens = vec_i32.into();
1693 assert_eq!(tokens_i32.0, vec![u32::MAX, 0, 1]);
1694
1695 let slice_i32: &[i32] = &[100, 200];
1696 let tokens_slice_i32: Tokens = slice_i32.into();
1697 assert_eq!(tokens_slice_i32.0, vec![100, 200]);
1698
1699 let into_vec: Vec<u32> = tokens_slice_i32.into();
1700 assert_eq!(into_vec, vec![100, 200]);
1701 }
1702
1703 #[test]
1704 fn test_tokens_equality() {
1705 let tokens = Tokens::from(vec![1, 2, 3]);
1706 assert_eq!(tokens, vec![1, 2, 3]);
1707 assert_eq!(vec![1, 2, 3], tokens);
1708 assert_eq!(tokens, &[1, 2, 3][..]);
1709 assert_eq!(&[1, 2, 3][..], tokens);
1710 assert_eq!(tokens, Tokens::from(vec![1, 2, 3]));
1711 assert_ne!(tokens, Tokens::from(vec![1, 2, 4]));
1712 }
1713
1714 #[test]
1715 fn test_tokens_deref_asref() {
1716 let tokens = Tokens::from(vec![10, 20, 30]);
1717
1718 assert_eq!(tokens.len(), 3);
1720 assert_eq!(tokens[1], 20);
1721 let slice: &[Token] = &tokens;
1722 assert_eq!(slice, &[10, 20, 30]);
1723
1724 let as_ref_slice: &[Token] = tokens.as_ref();
1726 assert_eq!(as_ref_slice, &[10, 20, 30]);
1727
1728 let borrowed_slice: &[Token] = std::borrow::Borrow::borrow(&tokens);
1730 assert_eq!(borrowed_slice, &[10, 20, 30]);
1731 }
1732
1733 #[test]
1734 fn test_tokens_into_sequence() {
1735 let tokens = Tokens::from(vec![1, 2, 3, 4, 5]);
1736 let seq = tokens.into_sequence(3, Some(TEST_SALT_HASH));
1737 assert_eq!(seq.blocks().len(), 1);
1738 assert_eq!(seq.blocks[0].tokens().as_ref(), &[1, 2, 3]);
1739 assert_eq!(seq.current_block().tokens().as_ref(), &[4, 5]);
1740 assert_eq!(seq.salt_hash(), TEST_SALT_HASH);
1741 }
1742
1743 #[test]
1744 fn test_partial_block_ops() {
1745 let mut partial = PartialTokenBlock::create_sequence_root(3, TEST_SALT_HASH);
1746 assert_eq!(partial.len(), 0);
1747 assert_eq!(partial.remaining(), 3);
1748 assert!(partial.is_empty());
1749
1750 assert!(partial.push_token(1).is_ok());
1752 assert_eq!(partial.len(), 1);
1753 assert_eq!(partial.remaining(), 2);
1754 let remaining = partial.push_tokens(Tokens::from(vec![2, 3, 4]));
1755 assert_eq!(partial.len(), 3);
1756 assert_eq!(partial.remaining(), 0);
1757 assert_eq!(remaining.as_ref(), &[4]); assert_eq!(partial.tokens().as_ref(), &[1, 2, 3]);
1759
1760 assert_eq!(partial.push_token(5), Err(TokenBlockError::Full));
1762 let remaining_full = partial.push_tokens(Tokens::from(vec![5]));
1763 assert_eq!(remaining_full.as_ref(), &[5]);
1764
1765 assert!(partial.pop_token().is_ok());
1767 assert_eq!(partial.len(), 2);
1768 assert_eq!(partial.tokens().as_ref(), &[1, 2]);
1769 assert!(partial.pop_tokens(2).is_ok());
1770 assert!(partial.is_empty());
1771
1772 assert_eq!(partial.pop_token(), Err(TokenBlockError::Empty));
1774 assert_eq!(
1775 partial.pop_tokens(1),
1776 Err(TokenBlockError::InsufficientTokens)
1777 );
1778
1779 assert!(partial.push_token(10).is_ok());
1781 assert_eq!(partial.commit(), Err(TokenBlockError::Incomplete));
1782
1783 assert!(partial.push_token(11).is_ok());
1785 assert!(partial.push_token(12).is_ok());
1786 assert_eq!(partial.len(), 3);
1787 let commit_result = partial.commit();
1788 assert!(commit_result.is_ok());
1789 let committed_block = commit_result.unwrap();
1790 assert_eq!(committed_block.tokens().as_ref(), &[10, 11, 12]);
1791
1792 assert!(partial.is_empty());
1794 assert_eq!(
1795 partial.parent_sequence_hash,
1796 Some(committed_block.sequence_hash())
1797 );
1798 assert_eq!(partial.block_size, 3);
1799 }
1800
1801 #[test]
1802 fn test_token_block_creation_and_hashes() {
1803 let salt = TEST_SALT_HASH;
1804 let tokens1 = Tokens::from(vec![1, 2, 3, 4]);
1805 let chunk1 = TokenBlockChunk::new(tokens1.clone(), salt);
1806 let block1 = TokenBlock::from_chunk(chunk1, None, 0);
1807
1808 assert_eq!(block1.tokens(), &tokens1);
1809 assert_eq!(block1.salt_hash(), salt);
1810 assert_eq!(block1.parent_sequence_hash(), None);
1811 assert_eq!(block1.block_hash(), HASH_1_4);
1812 assert_eq!(block1.sequence_hash(), SEQ_HASH_1_4); assert_eq!(block1.position(), 0); let plh1 = block1.positional_lineage_hash();
1817 assert_eq!(plh1.position(), 0);
1818 assert_eq!(plh1.parent_hash_fragment(), 0); assert_eq!(
1820 plh1.current_hash_fragment(),
1821 SEQ_HASH_1_4 & ((1u64 << 59) - 1)
1822 ); let tokens2 = Tokens::from(vec![5, 6, 7, 8]);
1825 let chunk2 = TokenBlockChunk::new(tokens2.clone(), salt);
1826 let block2 = TokenBlock::from_chunk(chunk2, block1.parent_sequence_hash(), 1); assert_ne!(block2.sequence_hash(), SEQ_HASH_5_8);
1829
1830 let chunk2_correct = TokenBlockChunk::new(tokens2.clone(), salt);
1831 let block2_correct =
1832 TokenBlock::from_chunk(chunk2_correct, Some(block1.sequence_hash()), 1);
1833
1834 assert_eq!(block2_correct.tokens(), &tokens2);
1835 assert_eq!(block2_correct.salt_hash(), salt);
1836 assert_eq!(
1837 block2_correct.parent_sequence_hash(),
1838 Some(block1.sequence_hash())
1839 );
1840 assert_eq!(block2_correct.block_hash(), HASH_5_8);
1841 assert_eq!(block2_correct.sequence_hash(), SEQ_HASH_5_8);
1842 assert_eq!(block2_correct.position(), 1); let plh2 = block2_correct.positional_lineage_hash();
1846 assert_eq!(plh2.position(), 1);
1847 assert_eq!(
1848 plh2.parent_hash_fragment(),
1849 SEQ_HASH_1_4 & ((1u64 << 59) - 1)
1850 ); assert_eq!(
1852 plh2.current_hash_fragment(),
1853 SEQ_HASH_5_8 & ((1u64 << 59) - 1)
1854 ); }
1856
1857 #[test]
1858 fn test_new_sequence() {
1859 let seq_empty = create_test_sequence(&[], 4, Some(TEST_SALT_HASH));
1861 assert!(seq_empty.blocks().is_empty());
1862 assert!(seq_empty.current_block().is_empty());
1863 assert_eq!(seq_empty.total_tokens(), 0);
1864 assert_eq!(seq_empty.salt_hash(), TEST_SALT_HASH);
1865 assert_eq!(seq_empty.current_block().parent_sequence_hash, None);
1866
1867 let seq_partial = create_test_sequence(&[1, 2], 4, Some(TEST_SALT_HASH));
1869 assert!(seq_partial.blocks().is_empty());
1870 assert_eq!(seq_partial.current_block().tokens().as_ref(), &[1, 2]);
1871 assert_eq!(seq_partial.total_tokens(), 2);
1872 assert_eq!(seq_partial.current_block().parent_sequence_hash, None);
1873
1874 let seq_one_block = create_test_sequence(&[1, 2, 3, 4], 4, Some(TEST_SALT_HASH));
1876 assert_eq!(seq_one_block.blocks().len(), 1);
1877 assert!(seq_one_block.current_block().is_empty());
1878 assert_eq!(seq_one_block.total_tokens(), 4);
1879 assert_eq!(seq_one_block.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
1880 assert_eq!(seq_one_block.blocks[0].sequence_hash(), SEQ_HASH_1_4);
1881 assert_eq!(
1882 seq_one_block.current_block().parent_sequence_hash,
1883 Some(SEQ_HASH_1_4)
1884 );
1885
1886 let seq_multi = create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9], 4, Some(TEST_SALT_HASH));
1888 assert_eq!(seq_multi.blocks().len(), 2);
1889 assert_eq!(seq_multi.current_block().tokens().as_ref(), &[9]);
1890 assert_eq!(seq_multi.total_tokens(), 9);
1891 assert_eq!(seq_multi.blocks[0].sequence_hash(), SEQ_HASH_1_4);
1892 assert_eq!(seq_multi.blocks[1].sequence_hash(), SEQ_HASH_5_8);
1893 assert_eq!(
1894 seq_multi.current_block().parent_sequence_hash,
1895 Some(SEQ_HASH_5_8)
1896 );
1897
1898 assert_eq!(seq_multi.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]); assert_eq!(seq_multi.tokens_at(4..8).as_ref(), &[5, 6, 7, 8]); assert_eq!(seq_multi.tokens_at(8..9).as_ref(), &[9]); assert_eq!(seq_multi.tokens_at(2..6).as_ref(), &[3, 4, 5, 6]); assert_eq!(seq_multi.tokens_at(6..9).as_ref(), &[7, 8, 9]); assert_eq!(seq_multi.tokens_at(5..5).as_ref(), &[0u32; 0]); assert_eq!(seq_multi.tokens_at(10..15).as_ref(), &[0u32; 0]); let seq_no_salt = create_test_sequence(&[1, 2, 3, 4, 5], 4, None);
1909 assert_eq!(seq_no_salt.salt_hash(), 0);
1910 assert_eq!(seq_no_salt.blocks().len(), 1);
1911 assert_ne!(seq_no_salt.blocks[0].block_hash(), HASH_1_4); assert_eq!(seq_no_salt.current_block().tokens().as_ref(), &[5]);
1913 }
1914
1915 #[test]
1916 #[should_panic]
1917 fn test_new_sequence_zero_block_size() {
1918 let _ = create_test_sequence(&[1], 0, None);
1919 }
1920
1921 #[test]
1922 fn test_append_single_token() {
1923 let mut sequence =
1924 create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4, Some(TEST_SALT_HASH));
1925 assert_eq!(sequence.blocks().len(), 2);
1926 assert_eq!(sequence.current_block().tokens.len(), 2);
1927 assert_eq!(sequence.current_block().tokens, vec![9, 10]);
1928 assert_eq!(
1929 sequence.current_block().parent_sequence_hash,
1930 Some(SEQ_HASH_5_8)
1931 );
1932
1933 let completed_idx = sequence.append(11).unwrap();
1935 assert_eq!(completed_idx, None);
1936 assert_eq!(sequence.blocks().len(), 2);
1937 assert_eq!(sequence.current_block().tokens.as_ref(), &[9, 10, 11]);
1938
1939 let completed_idx = sequence.append(12).unwrap();
1942 assert_eq!(completed_idx, Some(2));
1943 assert_eq!(sequence.blocks().len(), 3);
1944 assert_eq!(sequence.current_block.tokens.as_ref(), &[0u32; 0]);
1945 assert_eq!(sequence.current_block.remaining(), 4);
1946 assert_eq!(
1947 sequence.current_block().parent_sequence_hash,
1948 Some(SEQ_HASH_9_12)
1949 ); let completed_idx_13 = sequence.append(13).unwrap();
1953 assert_eq!(completed_idx_13, None);
1954 assert_eq!(sequence.blocks().len(), 3);
1955 assert_eq!(sequence.blocks[2].tokens().as_ref(), &[9, 10, 11, 12]);
1956 assert_eq!(sequence.blocks[2].sequence_hash(), SEQ_HASH_9_12);
1957 assert_eq!(sequence.current_block.tokens.as_ref(), &[13]); assert_eq!(sequence.current_block.remaining(), 3);
1959 assert_eq!(
1960 sequence.current_block.parent_sequence_hash,
1961 Some(SEQ_HASH_9_12)
1962 ); }
1964
1965 #[test]
1966 fn test_extend() {
1967 let block_size = 4;
1968 let salt_hash = Some(TEST_SALT_HASH);
1969
1970 let mut seq1 = create_test_sequence(&[], block_size, salt_hash);
1972 let tokens1 = Tokens::from(vec![1, 2]);
1973 let completed1 = seq1.extend(tokens1).unwrap();
1974 assert_eq!(completed1, None); assert_eq!(seq1.blocks.len(), 0);
1976 assert_eq!(seq1.current_block.tokens.as_ref(), &[1, 2]);
1977 assert_eq!(seq1.current_block.remaining(), 2);
1978 assert_eq!(seq1.current_block.parent_sequence_hash, None); let mut seq2 = create_test_sequence(&[], block_size, salt_hash);
1982 let tokens2 = Tokens::from(vec![1, 2, 3, 4]);
1983 let completed2 = seq2.extend(tokens2).unwrap();
1984 assert_eq!(completed2, Some(0..1));
1985 assert_eq!(seq2.blocks.len(), 1);
1986 assert_eq!(seq2.current_block.tokens.as_ref(), &[0u32; 0]); assert_eq!(seq2.current_block.remaining(), 4);
1988 assert_eq!(seq2.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); let mut seq3 = create_test_sequence(&[], block_size, salt_hash);
1992 let tokens3 = Tokens::from(vec![1, 2, 3, 4, 5, 6]);
1993 let completed3 = seq3.extend(tokens3).unwrap();
1994 assert_eq!(completed3, Some(0..1)); assert_eq!(seq3.blocks.len(), 1);
1996 assert_eq!(seq3.current_block.tokens.as_ref(), &[5, 6]); assert_eq!(seq3.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
1998 assert_eq!(seq3.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
1999 assert_eq!(seq3.current_block.remaining(), 2);
2000
2001 let mut seq4 = create_test_sequence(&[], block_size, salt_hash);
2003 let tokens4 = Tokens::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
2004 let completed4 = seq4.extend(tokens4).unwrap();
2005 assert_eq!(completed4, Some(0..2)); assert_eq!(seq4.blocks.len(), 2); assert_eq!(seq4.current_block.tokens.as_ref(), &[0u32; 0]);
2008 assert_eq!(seq4.current_block.remaining(), 4);
2009 assert_eq!(seq4.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2010 assert_eq!(seq4.blocks[0].sequence_hash(), SEQ_HASH_1_4);
2011 assert_eq!(seq4.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8)); let mut seq5 = create_test_sequence(&[], block_size, salt_hash);
2015 let tokens5a = Tokens::from(vec![1, 2]);
2016 let completed5a = seq5.extend(tokens5a).unwrap();
2017 assert_eq!(completed5a, None);
2018 assert_eq!(seq5.blocks.len(), 0);
2019 assert_eq!(seq5.current_block.tokens.as_ref(), &[1, 2]);
2020
2021 let tokens5b = Tokens::from(vec![3, 4, 5]);
2022 let completed5b = seq5.extend(tokens5b).unwrap();
2023 assert_eq!(completed5b, Some(0..1)); assert_eq!(seq5.blocks.len(), 1);
2025 assert_eq!(seq5.current_block.tokens.as_ref(), &[5]);
2026 assert_eq!(seq5.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2027 assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2028 assert_eq!(seq5.current_block.remaining(), 3);
2029
2030 let tokens5c = Tokens::from(vec![6, 7, 8, 9, 10]);
2031 let completed5c = seq5.extend(tokens5c).unwrap();
2032 assert_eq!(completed5c, Some(1..2)); assert_eq!(seq5.blocks.len(), 2);
2034 assert_eq!(seq5.current_block.tokens.as_ref(), &[9, 10]);
2035 assert_eq!(seq5.blocks[1].tokens().as_ref(), &[5, 6, 7, 8]);
2036 assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2037 assert_eq!(seq5.current_block.remaining(), 2);
2038
2039 let mut seq6 = create_test_sequence(&[1], block_size, salt_hash);
2041 let completed6 = seq6.extend(Tokens::default()).unwrap();
2042 assert_eq!(completed6, None);
2043 assert_eq!(seq6.blocks.len(), 0);
2044 assert_eq!(seq6.current_block.tokens.as_ref(), &[1]);
2045 assert_eq!(seq6.total_tokens(), 1);
2046
2047 let mut seq7 = create_test_sequence(&[1, 2], block_size, salt_hash);
2049 let tokens7 = Tokens::from(vec![3, 4]);
2050 let completed7 = seq7.extend(tokens7).unwrap();
2051 assert_eq!(completed7, Some(0..1)); assert_eq!(seq7.blocks.len(), 1);
2053 assert_eq!(seq7.current_block.tokens.as_ref(), &[0u32; 0]); assert_eq!(seq7.current_block.remaining(), 4);
2055 assert_eq!(seq7.total_tokens(), 4);
2056 assert_eq!(seq7.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); assert_eq!(seq7.tokens_at(0..2).as_ref(), &[1, 2]);
2060 assert_eq!(seq7.tokens_at(1..3).as_ref(), &[2, 3]);
2061 assert_eq!(seq7.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
2062 assert_eq!(seq7.tokens_at(2..2).as_ref(), &[0u32; 0]); }
2064
2065 #[test]
2066 fn test_truncate() {
2067 let block_size = 4;
2068 let salt_hash = Some(TEST_SALT_HASH);
2069 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq1 = create_test_sequence(initial_tokens, block_size, salt_hash);
2073 assert!(seq1.truncate(9).is_ok());
2074 assert_eq!(seq1.total_tokens(), 9);
2075 assert_eq!(seq1.blocks().len(), 2);
2076 assert_eq!(seq1.current_block().tokens.as_ref(), &[9]);
2077 assert_eq!(
2078 seq1.current_block().parent_sequence_hash,
2079 Some(SEQ_HASH_5_8)
2080 );
2081
2082 let mut seq2 = create_test_sequence(initial_tokens, block_size, salt_hash);
2084 assert!(seq2.truncate(8).is_ok());
2085 assert_eq!(seq2.total_tokens(), 8);
2086 assert_eq!(seq2.blocks().len(), 2);
2087 assert!(seq2.current_block().tokens.is_empty());
2088 assert_eq!(
2089 seq2.current_block().parent_sequence_hash,
2090 Some(SEQ_HASH_5_8)
2091 );
2092
2093 let mut seq3 = create_test_sequence(initial_tokens, block_size, salt_hash);
2095 assert!(seq3.truncate(7).is_ok());
2096 assert_eq!(seq3.total_tokens(), 7);
2097 assert_eq!(seq3.blocks().len(), 1); assert_eq!(seq3.current_block().tokens.as_ref(), &[5, 6, 7]); assert_eq!(
2100 seq3.current_block().parent_sequence_hash,
2101 Some(SEQ_HASH_1_4)
2102 ); assert_eq!(seq3.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2104
2105 let mut seq4 = create_test_sequence(initial_tokens, block_size, salt_hash);
2107 assert!(seq4.truncate(4).is_ok());
2108 assert_eq!(seq4.total_tokens(), 4);
2109 assert_eq!(seq4.blocks().len(), 1); assert!(seq4.current_block().tokens.is_empty()); assert_eq!(
2112 seq4.current_block().parent_sequence_hash,
2113 Some(SEQ_HASH_1_4)
2114 );
2115 assert_eq!(seq4.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2116
2117 let mut seq5 = create_test_sequence(initial_tokens, block_size, salt_hash);
2119 assert!(seq5.truncate(3).is_ok());
2120 assert_eq!(seq5.total_tokens(), 3);
2121 assert!(seq5.blocks().is_empty()); assert_eq!(seq5.current_block().tokens.as_ref(), &[1, 2, 3]); assert_eq!(seq5.current_block().parent_sequence_hash, None); let mut seq6 = create_test_sequence(initial_tokens, block_size, salt_hash);
2127 assert!(seq6.truncate(0).is_ok());
2128 assert_eq!(seq6.total_tokens(), 0);
2129 assert!(seq6.blocks().is_empty());
2130 assert!(seq6.current_block().tokens.is_empty());
2131 assert_eq!(seq6.current_block().parent_sequence_hash, None);
2132
2133 let mut seq7 = create_test_sequence(initial_tokens, block_size, salt_hash);
2135 let original_state = (seq7.blocks.clone(), seq7.current_block.tokens.clone()); assert!(seq7.truncate(11).is_ok()); assert_eq!(seq7.total_tokens(), 10);
2138 assert_eq!(seq7.blocks, original_state.0);
2139 assert_eq!(seq7.current_block.tokens, original_state.1);
2140
2141 let mut seq8 = create_test_sequence(initial_tokens, block_size, salt_hash);
2143 let original_state = (seq8.blocks.clone(), seq8.current_block.tokens.clone());
2144 assert!(seq8.truncate(10).is_ok());
2145 assert_eq!(seq8.total_tokens(), 10);
2146 assert_eq!(seq8.blocks, original_state.0);
2147 assert_eq!(seq8.current_block.tokens, original_state.1);
2148
2149 let mut seq9 = create_test_sequence(&[], block_size, salt_hash);
2151 assert!(seq9.truncate(0).is_ok());
2152 assert_eq!(seq9.total_tokens(), 0);
2153 assert!(seq9.blocks().is_empty());
2154 assert!(seq9.current_block().tokens.is_empty());
2155
2156 let tokens10 = &[1, 2, 3, 4, 5, 6, 7, 8]; let mut seq10 = create_test_sequence(tokens10, block_size, salt_hash);
2159 assert_eq!(seq10.total_tokens(), 8);
2160 assert!(seq10.current_block().is_empty());
2161 assert!(seq10.truncate(4).is_ok()); assert_eq!(seq10.total_tokens(), 4);
2163 assert_eq!(seq10.blocks().len(), 1);
2164 assert!(seq10.current_block().tokens.is_empty());
2165 assert_eq!(
2166 seq10.current_block().parent_sequence_hash,
2167 Some(SEQ_HASH_1_4)
2168 );
2169
2170 let tokens11 = &[1, 2, 3, 4, 5, 6, 7, 8]; let mut seq11 = create_test_sequence(tokens11, block_size, salt_hash);
2173 assert!(seq11.truncate(3).is_ok()); assert_eq!(seq11.total_tokens(), 3);
2175 assert!(seq11.blocks().is_empty());
2176 assert_eq!(seq11.current_block().tokens.as_ref(), &[1, 2, 3]); assert_eq!(seq11.current_block().parent_sequence_hash, None);
2178 }
2179
2180 #[test]
2181 fn test_unwind() {
2182 let block_size = 4;
2183 let salt_hash = Some(TEST_SALT_HASH);
2184 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2188 assert!(seq.unwind(0).is_ok());
2189 assert_eq!(seq.total_tokens(), 10);
2190
2191 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2193 assert!(seq.unwind(1).is_ok());
2194 assert_eq!(seq.total_tokens(), 9);
2195 assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2196
2197 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2199 assert!(seq.unwind(3).is_ok());
2200 assert_eq!(seq.total_tokens(), 7);
2201 assert_eq!(seq.blocks.len(), 1);
2202 assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2203
2204 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2206 assert!(seq.unwind(10).is_ok());
2207 assert_eq!(seq.total_tokens(), 0);
2208 assert!(seq.blocks.is_empty());
2209 assert!(seq.current_block.is_empty());
2210
2211 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2213 assert_eq!(seq.unwind(11), Err(TokenBlockError::InsufficientTokens));
2214 assert_eq!(seq.total_tokens(), 10); let mut seq_empty = create_test_sequence(&[], block_size, salt_hash);
2218 assert_eq!(
2219 seq_empty.unwind(1),
2220 Err(TokenBlockError::InsufficientTokens)
2221 );
2222 }
2223
2224 #[test]
2225 fn test_pop() {
2226 let block_size = 4;
2227 let salt_hash = Some(TEST_SALT_HASH);
2228 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2231
2232 assert_eq!(seq.pop(), Some(10));
2234 assert_eq!(seq.total_tokens(), 9);
2235 assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2236 assert_eq!(seq.blocks.len(), 2);
2237
2238 assert_eq!(seq.pop(), Some(9));
2240 assert_eq!(seq.total_tokens(), 8);
2241 assert!(seq.current_block.is_empty());
2242 assert_eq!(seq.blocks.len(), 2);
2243 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2244
2245 assert_eq!(seq.pop(), Some(8));
2247 assert_eq!(seq.total_tokens(), 7);
2248 assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2249 assert_eq!(seq.blocks.len(), 1);
2250 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2251
2252 assert_eq!(seq.pop(), Some(7));
2254 assert_eq!(seq.pop(), Some(6));
2255 assert_eq!(seq.pop(), Some(5));
2256 assert_eq!(seq.total_tokens(), 4);
2257 assert!(seq.current_block.is_empty());
2258 assert_eq!(seq.blocks.len(), 1);
2259 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2260
2261 assert_eq!(seq.pop(), Some(4));
2263 assert_eq!(seq.total_tokens(), 3);
2264 assert_eq!(seq.current_block.tokens.as_ref(), &[1, 2, 3]);
2265 assert!(seq.blocks.is_empty());
2266 assert_eq!(seq.current_block.parent_sequence_hash, None);
2267
2268 assert_eq!(seq.pop(), Some(3));
2270 assert_eq!(seq.pop(), Some(2));
2271 assert_eq!(seq.pop(), Some(1));
2272 assert_eq!(seq.total_tokens(), 0);
2273 assert!(seq.current_block.is_empty());
2274 assert!(seq.blocks.is_empty());
2275
2276 assert_eq!(seq.pop(), None);
2278 assert_eq!(seq.total_tokens(), 0);
2279 }
2280
2281 #[test]
2282 fn test_total_tokens() {
2283 let block_size = 3;
2284 let salt_hash = Some(TEST_SALT_HASH);
2285
2286 let mut seq = create_test_sequence(&[], block_size, salt_hash);
2287 assert_eq!(seq.total_tokens(), 0);
2288
2289 seq.extend(Tokens::from(vec![1, 2])).unwrap();
2290 assert_eq!(seq.total_tokens(), 2);
2291
2292 seq.append(3).unwrap(); assert_eq!(seq.total_tokens(), 3);
2294
2295 seq.extend(Tokens::from(vec![4, 5, 6, 7])).unwrap(); assert_eq!(seq.total_tokens(), 7);
2297
2298 seq.pop().unwrap(); assert_eq!(seq.total_tokens(), 6);
2300
2301 seq.truncate(4).unwrap(); assert_eq!(seq.total_tokens(), 4);
2303
2304 seq.unwind(2).unwrap(); assert_eq!(seq.total_tokens(), 2);
2306 }
2307
2308 #[test]
2309 fn test_push_tokens_partial_block() {
2310 let mut partial = PartialTokenBlock::create_sequence_root(4, 1337);
2311
2312 let tokens = Tokens(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2313
2314 let remaining = partial.push_tokens(tokens);
2315 assert_eq!(partial.tokens.len(), 4);
2316 assert_eq!(remaining.len(), 6);
2317 }
2318
2319 #[test]
2324 fn test_positional_radix_tree_basic_operations() {
2325 use crate::PositionalRadixTree;
2326
2327 let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2329 assert!(tree.is_empty());
2330 assert_eq!(tree.len(), 0);
2331
2332 let tree2: PositionalRadixTree<i32> = PositionalRadixTree::default();
2334 assert!(tree2.is_empty());
2335
2336 let psh1 = PositionalSequenceHash::new(0x1234, 0, 0xABCD);
2338 let psh2 = PositionalSequenceHash::new(0x5678, 0, 0xEF01);
2339 let psh3 = PositionalSequenceHash::new(0x9ABC, 1, 0x2345);
2340
2341 tree.prefix(&psh1).insert(psh1, "value1".to_string());
2342 assert!(!tree.is_empty());
2343 assert_eq!(tree.len(), 1);
2344
2345 tree.prefix(&psh2).insert(psh2, "value2".to_string());
2346 assert_eq!(tree.len(), 2);
2347
2348 tree.prefix(&psh3).insert(psh3, "value3".to_string());
2349 assert_eq!(tree.len(), 3);
2350
2351 assert_eq!(
2353 tree.prefix(&psh1).get(&psh1).map(|v| v.clone()),
2354 Some("value1".to_string())
2355 );
2356 }
2357
2358 #[test]
2359 fn test_positional_radix_tree_with_lineage_hash() {
2360 use crate::PositionalRadixTree;
2361
2362 let tree: PositionalRadixTree<u32, PositionalLineageHash> = PositionalRadixTree::new();
2364 assert!(tree.is_empty());
2365
2366 let plh1 = PositionalLineageHash::new(0x1234, None, 0);
2367 let plh2 = PositionalLineageHash::new(0x5678, Some(0x1234), 1);
2368
2369 tree.prefix(&plh1).insert(plh1, 100);
2370 tree.prefix(&plh2).insert(plh2, 200);
2371
2372 assert_eq!(tree.len(), 2);
2373 assert_eq!(tree.prefix(&plh1).get(&plh1).map(|v| *v), Some(100));
2374 assert_eq!(tree.prefix(&plh2).get(&plh2).map(|v| *v), Some(200));
2375 }
2376
2377 #[test]
2378 fn test_positional_radix_tree_position_lookup() {
2379 use crate::PositionalRadixTree;
2380
2381 let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2382
2383 let psh0 = PositionalSequenceHash::new(0x1111, 0, 0xAAAA);
2385 let psh1 = PositionalSequenceHash::new(0x2222, 1, 0xBBBB);
2386 let psh2 = PositionalSequenceHash::new(0x3333, 2, 0xCCCC);
2387
2388 tree.prefix(&psh0).insert(psh0, "pos0".to_string());
2389 tree.prefix(&psh1).insert(psh1, "pos1".to_string());
2390 tree.prefix(&psh2).insert(psh2, "pos2".to_string());
2391
2392 assert!(tree.position(0).is_some());
2394 assert!(tree.position(1).is_some());
2395 assert!(tree.position(2).is_some());
2396 assert!(tree.position(3).is_none()); let pos0_map = tree.position(0).unwrap();
2400 assert_eq!(pos0_map.len(), 1);
2401 }
2402
2403 #[test]
2406 fn test_positional_sequence_hash_mode_2_and_3() {
2407 let position_mode2 = 100_000u64;
2409 let seq_hash = 0x1234567890ABCDEF;
2410 let block_hash = 0xFEDCBA9876543210;
2411
2412 let psh_mode2 = PositionalSequenceHash::new(seq_hash, position_mode2, block_hash);
2413 assert_eq!(psh_mode2.mode(), 2, "Position 100,000 should use mode 2");
2414 assert_eq!(psh_mode2.position(), position_mode2);
2415 assert_eq!(psh_mode2.sequence_hash(), seq_hash);
2416 assert_eq!(
2418 psh_mode2.local_block_hash(),
2419 block_hash & ((1u64 << 38) - 1)
2420 );
2421
2422 let position_mode3 = 100_000_000u64;
2424 let psh_mode3 = PositionalSequenceHash::new(seq_hash, position_mode3, block_hash);
2425 assert_eq!(
2426 psh_mode3.mode(),
2427 3,
2428 "Position 100,000,000 should use mode 3"
2429 );
2430 assert_eq!(psh_mode3.position(), position_mode3);
2431 assert_eq!(psh_mode3.sequence_hash(), seq_hash);
2432 assert_eq!(
2434 psh_mode3.local_block_hash(),
2435 block_hash & ((1u64 << 31) - 1)
2436 );
2437 }
2438
2439 #[test]
2440 fn test_positional_sequence_hash_as_u128() {
2441 let psh = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2442 let raw = psh.as_u128();
2443
2444 assert_eq!(raw & 0xFFFF_FFFF_FFFF_FFFF, 0x1234);
2446 assert!(raw > 0); let psh2 = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2450 assert_eq!(psh.as_u128(), psh2.as_u128());
2451 }
2452
2453 #[test]
2454 fn test_positional_sequence_hash_debug() {
2455 let psh = PositionalSequenceHash::new(0x1234567890ABCDEF, 42, 0xFEDCBA98);
2456 let debug_str = format!("{:?}", psh);
2457
2458 assert!(debug_str.contains("PositionalSequenceHash"));
2460 assert!(debug_str.contains("sequence_hash"));
2461 assert!(debug_str.contains("local_block_hash"));
2462 assert!(debug_str.contains("position"));
2463 }
2464
2465 #[test]
2468 fn test_positional_lineage_hash_debug_and_display() {
2469 let plh_root = PositionalLineageHash::new(0x123456789ABCDEF0, None, 0);
2471 let debug_root = format!("{:?}", plh_root);
2472 let display_root = format!("{}", plh_root);
2473
2474 assert!(debug_root.starts_with("0:"));
2476 assert!(display_root.starts_with("0:"));
2477 assert_eq!(debug_root.matches(':').count(), 1);
2479 assert_eq!(display_root.matches(':').count(), 1);
2480
2481 let plh_child = PositionalLineageHash::new(0xABCDEF0123456789, Some(0x123456789ABCDEF0), 5);
2483 let debug_child = format!("{:?}", plh_child);
2484 let display_child = format!("{}", plh_child);
2485
2486 assert!(debug_child.starts_with("5:"));
2488 assert!(display_child.starts_with("5:"));
2489 assert_eq!(debug_child.matches(':').count(), 2);
2491 assert_eq!(display_child.matches(':').count(), 2);
2492 }
2493
2494 #[test]
2495 fn test_positional_lineage_hash_as_u128() {
2496 let plh = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
2497 let raw = plh.as_u128();
2498
2499 assert!(raw > 0);
2500
2501 let plh2 = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
2503 assert_eq!(plh.as_u128(), plh2.as_u128());
2504
2505 let plh3 = PositionalLineageHash::new(0x1234, Some(0x5678), 11);
2507 assert_ne!(plh.as_u128(), plh3.as_u128());
2508 }
2509
2510 #[test]
2511 fn test_positional_lineage_hash_ord_by_position_then_current_fragment() {
2512 let at_5_low = PositionalLineageHash::new(0x10, Some(0x1111), 5);
2513 let at_5_high = PositionalLineageHash::new(0x20, Some(0x1111), 5);
2514 assert!(
2515 at_5_low.current_hash_fragment() < at_5_high.current_hash_fragment(),
2516 "test assumes distinct current fragments at the same position"
2517 );
2518 assert!(at_5_low < at_5_high);
2519 assert!(at_5_high > at_5_low);
2520
2521 let at_3 = PositionalLineageHash::new(0x99, Some(0x2222), 3);
2522 assert!(at_3 < at_5_low);
2523 assert!(at_5_high < PositionalLineageHash::new(0x01, Some(0x3333), 6));
2524 }
2525
2526 #[test]
2527 fn test_positional_lineage_hash_ord_tiebreak_parent_via_packed_u128() {
2528 let same_pos_same_current = PositionalLineageHash::new(0x1234, Some(0x100), 10);
2529 let same_pos_same_current_other_parent =
2530 PositionalLineageHash::new(0x1234, Some(0x200), 10);
2531 assert_eq!(same_pos_same_current.position(), 10);
2532 assert_eq!(
2533 same_pos_same_current.position(),
2534 same_pos_same_current_other_parent.position()
2535 );
2536 assert_eq!(
2537 same_pos_same_current.current_hash_fragment(),
2538 same_pos_same_current_other_parent.current_hash_fragment()
2539 );
2540 assert_ne!(same_pos_same_current, same_pos_same_current_other_parent);
2541 assert_ne!(
2542 same_pos_same_current.cmp(&same_pos_same_current_other_parent),
2543 std::cmp::Ordering::Equal
2544 );
2545 }
2546
2547 #[test]
2548 fn test_positional_lineage_hash_vec_sort_matches_ord() {
2549 let a = PositionalLineageHash::new(0x30, None, 0);
2550 let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
2551 let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
2552 let mut v = vec![b, a, c];
2553 v.sort();
2554 assert_eq!(v, vec![a, b, c]);
2555 }
2556
2557 #[test]
2558 fn test_positional_lineage_hash_itertools_sorted() {
2559 use itertools::Itertools;
2560
2561 let a = PositionalLineageHash::new(0x30, None, 0);
2562 let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
2563 let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
2564 let sorted: Vec<_> = vec![b, a, c].into_iter().sorted().collect();
2565 assert_eq!(sorted, vec![a, b, c]);
2566 }
2567
2568 #[test]
2571 fn test_tokens_from_vec_usize() {
2572 let usize_vec: Vec<usize> = vec![1, 2, 3, 4, 5];
2573 let tokens = Tokens::from(usize_vec);
2574
2575 assert_eq!(tokens.as_ref(), &[1u32, 2, 3, 4, 5]);
2576 assert_eq!(tokens.len(), 5);
2577 }
2578
2579 #[test]
2580 fn test_tokens_partial_eq_slice_ref() {
2581 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2582 let slice: &[Token] = &[1, 2, 3, 4];
2583
2584 assert!(tokens == slice);
2586
2587 let different_slice: &[Token] = &[1, 2, 3, 5];
2588 assert!(tokens != different_slice);
2589 }
2590
2591 #[test]
2594 fn test_token_block_accessors() {
2595 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2596 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2597
2598 let block = &seq.blocks()[0];
2599
2600 assert_eq!(block.block_size(), 4);
2602
2603 let psh = block.positional_sequence_hash();
2605 assert_eq!(psh.position(), 0);
2606
2607 let plh = block.positional_lineage_hash();
2609 assert_eq!(plh.position(), 0);
2610 assert_eq!(plh.parent_hash_fragment(), 0); }
2612
2613 #[test]
2614 fn test_positional_hash_trait_impls() {
2615 use crate::PositionalHash;
2616
2617 let psh = PositionalSequenceHash::new(0x1234, 42, 0xABCD);
2619 assert_eq!(PositionalHash::position(&psh), 42);
2620
2621 let plh = PositionalLineageHash::new(0x1234, None, 99);
2623 assert_eq!(PositionalHash::position(&plh), 99);
2624 }
2625
2626 #[test]
2629 fn test_sequence_pop_from_full_block() {
2630 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
2632 let mut seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
2633
2634 assert!(seq.current_block().is_empty());
2636 assert_eq!(seq.blocks().len(), 2);
2637 assert_eq!(seq.total_tokens(), 8);
2638
2639 let popped = seq.pop();
2641 assert_eq!(popped, Some(8));
2642 assert_eq!(seq.total_tokens(), 7);
2643 assert_eq!(seq.blocks().len(), 1);
2644 assert_eq!(seq.current_block().tokens.as_ref(), &[5, 6, 7]);
2645 }
2646
2647 #[test]
2648 #[allow(clippy::reversed_empty_ranges)] fn test_sequence_tokens_at_edge_cases() {
2650 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
2651 let seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
2652
2653 assert!(seq.tokens_at(3..2).is_empty());
2655
2656 assert!(seq.tokens_at(0..10).is_empty());
2658
2659 assert_eq!(seq.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
2661 assert_eq!(seq.tokens_at(4..5).as_ref(), &[5]);
2662 }
2663
2664 #[test]
2665 fn test_sequence_next_block() {
2666 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2667 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2668
2669 let block = &seq.blocks()[0];
2670 let next_partial = block.next_block();
2671
2672 assert!(next_partial.is_empty());
2674 assert_eq!(next_partial.remaining(), 4);
2675 assert_eq!(
2676 next_partial.parent_sequence_hash,
2677 Some(block.sequence_hash())
2678 );
2679 assert_eq!(next_partial.position, 1);
2680 }
2681
2682 #[test]
2683 fn test_sequence_reset() {
2684 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9]);
2685 let mut seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2686
2687 assert_eq!(seq.blocks().len(), 2);
2688 assert_eq!(seq.total_tokens(), 9);
2689
2690 seq.reset();
2691
2692 assert!(seq.blocks().is_empty());
2693 assert!(seq.current_block().is_empty());
2694 assert_eq!(seq.total_tokens(), 0);
2695 assert_eq!(seq.current_block().parent_sequence_hash, None);
2696 }
2697
2698 #[test]
2699 fn test_sequence_into_parts() {
2700 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
2701 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2702
2703 let (blocks, partial) = seq.into_parts();
2704
2705 assert_eq!(blocks.len(), 1);
2706 assert_eq!(partial.tokens.as_ref(), &[5]);
2707 }
2708
2709 #[test]
2710 fn test_sequence_last_complete_block() {
2711 let seq_empty = TokenBlockSequence::new(Tokens::default(), 4, None);
2713 assert!(seq_empty.last_complete_block().is_none());
2714
2715 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
2717 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2718 let last = seq.last_complete_block();
2719 assert!(last.is_some());
2720 assert_eq!(last.unwrap().tokens().as_ref(), &[5, 6, 7, 8]);
2721 }
2722
2723 #[test]
2724 fn test_positional_hashes_msgpack_roundtrip() {
2725 let psh = PositionalSequenceHash::new(0xDEAD_BEEF_CAFE_BABE, 12345, 0x0123_4567_89AB_CDEF);
2726 let bytes = rmp_serde::to_vec(&psh).expect("psh serialize");
2727 let decoded: PositionalSequenceHash =
2728 rmp_serde::from_slice(&bytes).expect("psh deserialize");
2729 assert_eq!(psh, decoded);
2730 assert_eq!(psh.as_u128(), decoded.as_u128());
2731
2732 let plh =
2733 PositionalLineageHash::new(0x1111_2222_3333_4444, Some(0x5555_6666_7777_8888), 256);
2734 let bytes = rmp_serde::to_vec(&plh).expect("plh serialize");
2735 let decoded: PositionalLineageHash =
2736 rmp_serde::from_slice(&bytes).expect("plh deserialize");
2737 assert_eq!(plh, decoded);
2738 assert_eq!(plh.as_u128(), decoded.as_u128());
2739
2740 let vec = vec![psh, PositionalSequenceHash::default(), psh];
2742 let bytes = rmp_serde::to_vec(&vec).expect("vec serialize");
2743 let decoded: Vec<PositionalSequenceHash> =
2744 rmp_serde::from_slice(&bytes).expect("vec deserialize");
2745 assert_eq!(vec, decoded);
2746 }
2747
2748 #[test]
2749 fn test_positional_hashes_json_roundtrip() {
2750 let psh = PositionalSequenceHash::new(0xAAAA_BBBB_CCCC_DDDD, 7, 0xEEEE_FFFF_0000_1111);
2752 let json = serde_json::to_string(&psh).expect("psh json serialize");
2753 let decoded: PositionalSequenceHash =
2754 serde_json::from_str(&json).expect("psh json deserialize");
2755 assert_eq!(psh, decoded);
2756
2757 let plh = PositionalLineageHash::new(0x1234_5678, Some(0xABCD_EF01), 42);
2758 let json = serde_json::to_string(&plh).expect("plh json serialize");
2759 let decoded: PositionalLineageHash =
2760 serde_json::from_str(&json).expect("plh json deserialize");
2761 assert_eq!(plh, decoded);
2762 }
2763}