1#![deny(missing_docs)]
5
6use bytemuck::cast_slice;
9use derive_getters::Dissolve;
10use std::ops::Range;
11
12pub mod blocks;
13mod radix;
14pub use radix::PositionalRadixTree;
15
16pub trait PositionalHash {
18 fn position(&self) -> u64;
20}
21
22pub type Token = u32;
24
25pub type Salt = Vec<u8>;
28
29pub type SaltHash = u64;
35
36pub type BlockHash = u64;
41
42pub type SequenceHash = u64;
47
48pub fn compute_hash_v2(data: &[u8], seed: u64) -> u64 {
53 xxhash_rust::xxh3::xxh3_64_with_seed(data, seed)
54}
55
56pub const CHAIN_XXH3_SEED: u64 = 1337;
62
63#[inline]
77pub fn compute_next_sequence_hash(
78 parent_sequence_hash: SequenceHash,
79 child_block_hash: BlockHash,
80) -> SequenceHash {
81 let combined = [parent_sequence_hash, child_block_hash];
82 compute_hash_v2(cast_slice(&combined), CHAIN_XXH3_SEED)
83}
84
85mod serde_bytes_u128 {
91 use serde::{Deserializer, Serializer};
92
93 pub fn serialize<S>(val: &u128, serializer: S) -> Result<S::Ok, S::Error>
94 where
95 S: Serializer,
96 {
97 serializer.serialize_bytes(&val.to_be_bytes())
98 }
99
100 pub fn deserialize<'de, D>(deserializer: D) -> Result<u128, D::Error>
101 where
102 D: Deserializer<'de>,
103 {
104 use serde::de::{self, SeqAccess, Visitor};
105 use std::fmt;
106
107 struct V;
108 impl<'de> Visitor<'de> for V {
109 type Value = [u8; 16];
110
111 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
112 f.write_str("16 bytes (msgpack bin) or a sequence of 16 u8 values")
113 }
114
115 fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<[u8; 16], E> {
116 v.try_into()
117 .map_err(|_| E::invalid_length(v.len(), &"16 bytes"))
118 }
119
120 fn visit_borrowed_bytes<E: de::Error>(self, v: &'de [u8]) -> Result<[u8; 16], E> {
121 self.visit_bytes(v)
122 }
123
124 fn visit_byte_buf<E: de::Error>(self, v: Vec<u8>) -> Result<[u8; 16], E> {
125 self.visit_bytes(&v)
126 }
127
128 fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<[u8; 16], A::Error> {
129 let mut arr = [0u8; 16];
130 for (i, slot) in arr.iter_mut().enumerate() {
131 *slot = seq
132 .next_element()?
133 .ok_or_else(|| de::Error::invalid_length(i, &"16 u8 elements"))?;
134 }
135 Ok(arr)
136 }
137 }
138
139 let arr = deserializer.deserialize_bytes(V)?;
140 Ok(u128::from_be_bytes(arr))
141 }
142}
143
144#[inline]
148pub fn compute_block_hash(block_bytes: &[u8], salt: SaltHash) -> BlockHash {
149 compute_hash_v2(block_bytes, salt)
150}
151
152#[inline]
156pub fn compute_salt_hash_from_bytes(payload: &[u8]) -> SaltHash {
157 compute_hash_v2(payload, 0)
158}
159
160#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
168pub struct TokenBlockMmInfo {
169 pub mm_hash: u64,
171 pub offset: usize,
173 pub length: usize,
175}
176
177pub const MM_SLOT_TAG_TOKEN: u8 = 0x00;
180pub const MM_SLOT_TAG_PLACEHOLDER: u8 = 0x01;
182
183impl TokenBlockMmInfo {
184 #[inline]
186 pub fn checked_end(&self) -> Option<usize> {
187 self.offset.checked_add(self.length)
188 }
189
190 #[inline]
196 pub fn end(&self) -> usize {
197 self.checked_end()
198 .expect("TokenBlockMmInfo::end overflowed usize; run was not validated")
199 }
200
201 #[inline]
205 pub fn covers(&self, position: usize) -> bool {
206 match self.checked_end() {
207 Some(end) => position >= self.offset && position < end,
208 None => false,
209 }
210 }
211}
212
213#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
215pub enum MmInfoError {
216 #[error(
218 "mm_info range starting at {offset} (length {length}) exceeds tokens length {tokens_len}"
219 )]
220 OutOfBounds {
221 offset: usize,
223 length: usize,
225 tokens_len: usize,
227 },
228 #[error("mm_info range starting at {offset} (length {length}) overflows usize")]
230 OffsetOverflow {
231 offset: usize,
233 length: usize,
235 },
236 #[error("mm_info ranges overlap at position {position}")]
238 Overlapping {
239 position: usize,
241 },
242 #[error("mm_info length must be greater than zero")]
244 EmptyRun,
245}
246
247pub fn validate_and_sort_mm_info(
255 mm_info: &[TokenBlockMmInfo],
256 tokens_len: usize,
257) -> Result<Vec<TokenBlockMmInfo>, MmInfoError> {
258 let mut sorted: Vec<TokenBlockMmInfo> = mm_info.to_vec();
259 sorted.sort_by_key(|m| m.offset);
260 let mut prev_end = 0usize;
261 for m in &sorted {
262 if m.length == 0 {
263 return Err(MmInfoError::EmptyRun);
264 }
265 let end = m
266 .offset
267 .checked_add(m.length)
268 .ok_or(MmInfoError::OffsetOverflow {
269 offset: m.offset,
270 length: m.length,
271 })?;
272 if end > tokens_len {
273 return Err(MmInfoError::OutOfBounds {
274 offset: m.offset,
275 length: m.length,
276 tokens_len,
277 });
278 }
279 if m.offset < prev_end {
280 return Err(MmInfoError::Overlapping { position: m.offset });
281 }
282 prev_end = end;
283 }
284 Ok(sorted)
285}
286
287fn block_has_mm(block_offset: usize, len: usize, mm_runs: &[TokenBlockMmInfo]) -> bool {
290 let block_end = block_offset.saturating_add(len);
291 mm_runs
292 .iter()
293 .any(|m| m.offset < block_end && m.end() > block_offset)
294}
295
296pub fn compute_block_bytes_with_mm(
327 tokens: &[Token],
328 block_offset: usize,
329 mm_runs: &[TokenBlockMmInfo],
330) -> Vec<u8> {
331 debug_assert!(
337 mm_runs.windows(2).all(|w| w[0].end() <= w[1].offset),
338 "compute_block_bytes_with_mm: mm_runs must be sorted by offset and non-overlapping (use validate_and_sort_mm_info)",
339 );
340 debug_assert!(
341 mm_runs.iter().all(|r| r.length > 0),
342 "compute_block_bytes_with_mm: mm_runs must have non-zero length",
343 );
344
345 if !block_has_mm(block_offset, tokens.len(), mm_runs) {
346 return cast_slice::<Token, u8>(tokens).to_vec();
349 }
350
351 const FRAME: usize = 13;
352 let mut out: Vec<u8> = Vec::with_capacity(tokens.len() * FRAME);
353 let mut run_idx = 0usize;
354 while run_idx < mm_runs.len() && mm_runs[run_idx].end() <= block_offset {
356 run_idx += 1;
357 }
358 for (s, &tok) in tokens.iter().enumerate() {
359 let g = block_offset + s;
360 while run_idx < mm_runs.len() && mm_runs[run_idx].end() <= g {
362 run_idx += 1;
363 }
364 if run_idx < mm_runs.len() && mm_runs[run_idx].covers(g) {
365 let run = &mm_runs[run_idx];
366 let run_offset = (g - run.offset) as u32;
367 out.push(MM_SLOT_TAG_PLACEHOLDER);
368 out.extend_from_slice(&run_offset.to_le_bytes());
369 out.extend_from_slice(&run.mm_hash.to_le_bytes());
370 } else {
371 out.push(MM_SLOT_TAG_TOKEN);
372 out.extend_from_slice(&tok.to_le_bytes());
373 out.extend_from_slice(&0u64.to_le_bytes());
374 }
375 }
376 out
377}
378
379#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
391#[serde(transparent)]
392pub struct PositionalSequenceHash(#[serde(with = "serde_bytes_u128")] u128);
393
394impl PositionalSequenceHash {
395 pub fn new(sequence_hash: SequenceHash, position: u64, local_block_hash: BlockHash) -> Self {
400 let mode = Self::select_mode(position);
401 let upper = Self::encode_upper(mode, position, local_block_hash);
402 let value = ((upper as u128) << 64) | (sequence_hash as u128);
403 PositionalSequenceHash(value)
404 }
405
406 pub fn sequence_hash(&self) -> SequenceHash {
408 (self.0 & 0xFFFF_FFFF_FFFF_FFFF) as u64
409 }
410
411 pub fn position(&self) -> u64 {
413 let (_, position, _) = self.decode_upper();
414 position
415 }
416
417 pub fn local_block_hash(&self) -> BlockHash {
419 let (_, _, lbh) = self.decode_upper();
420 lbh
421 }
422
423 pub fn mode(&self) -> u8 {
425 let (mode, _, _) = self.decode_upper();
426 mode
427 }
428
429 #[inline(always)]
431 pub fn as_u128(&self) -> u128 {
432 self.0
433 }
434
435 fn select_mode(position: u64) -> u8 {
437 if position < (1u64 << 8) {
438 0 } else if position < (1u64 << 16) {
440 1 } else if position < (1u64 << 24) {
442 2 } else if position < (1u64 << 31) {
444 3 } else {
446 panic!(
447 "Position {} exceeds maximum supported value (2^31 - 1)",
448 position
449 );
450 }
451 }
452
453 fn encode_upper(mode: u8, position: u64, local_block_hash: u64) -> u64 {
455 let (position_bits, lbh_bits) = match mode {
456 0 => (8, 54), 1 => (16, 46), 2 => (24, 38), 3 => (31, 31), _ => unreachable!(
461 "Invalid mode {} when encoding PositionalSequenceHash; mode must be 0, 1, 2, or 3",
462 mode
463 ),
464 };
465
466 let position_mask = (1u64 << position_bits) - 1;
468 let lbh_mask = (1u64 << lbh_bits) - 1;
469
470 let position_part = position & position_mask;
472 let lbh_part = local_block_hash & lbh_mask;
473
474 ((mode as u64) << 62) | (position_part << lbh_bits) | lbh_part
476 }
477
478 fn decode_upper(&self) -> (u8, u64, u64) {
480 let upper = (self.0 >> 64) as u64;
481
482 let mode = (upper >> 62) as u8;
484
485 let (position_bits, lbh_bits) = match mode {
486 0 => (8, 54),
487 1 => (16, 46),
488 2 => (24, 38),
489 3 => (31, 31),
490 _ => unreachable!(
491 "Invalid mode {} in PositionalSequenceHash - value may be corrupted",
492 mode
493 ),
494 };
495
496 let lbh_mask = (1u64 << lbh_bits) - 1;
498 let position_mask = (1u64 << position_bits) - 1;
499
500 let lbh = upper & lbh_mask;
502 let position = (upper >> lbh_bits) & position_mask;
503
504 (mode, position, lbh)
505 }
506}
507
508impl std::fmt::Debug for PositionalSequenceHash {
509 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
510 f.debug_struct("PositionalSequenceHash")
511 .field("sequence_hash", &self.sequence_hash())
512 .field("local_block_hash", &self.local_block_hash())
513 .field("position", &self.position())
514 .finish()
515 }
516}
517
518#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
538#[serde(transparent)]
539pub struct PositionalLineageHash(#[serde(with = "serde_bytes_u128")] u128);
540
541impl PositionalLineageHash {
542 pub fn new(
558 current_seq_hash: SequenceHash,
559 parent_seq_hash: Option<SequenceHash>,
560 position: u64,
561 ) -> Self {
562 if position >= (1u64 << 24) {
563 panic!(
564 "Position {} exceeds maximum supported value (2^24 - 1 = 16,777,215)",
565 position
566 );
567 }
568
569 let mode = Self::select_mode(position);
570 let (position_bits, parent_bits) = Self::bit_layout(mode);
571
572 let position_mask = (1u128 << position_bits) - 1;
573 let parent_mask = (1u128 << parent_bits) - 1;
574
575 let position_part = (position as u128) & position_mask;
576 let current_part = current_seq_hash as u128;
577 let parent_part = (parent_seq_hash.unwrap_or(0) as u128) & parent_mask;
578
579 let value = ((mode as u128) << 126)
581 | (position_part << (64 + parent_bits))
582 | (current_part << parent_bits)
583 | parent_part;
584
585 PositionalLineageHash(value)
586 }
587
588 pub fn root(block_hash: BlockHash) -> Self {
592 Self::new(block_hash, None, 0)
593 }
594
595 pub fn extend(&self, child_block_hash: BlockHash) -> Self {
606 let parent_seq = self.current_sequence_hash();
607 let child_seq = compute_next_sequence_hash(parent_seq, child_block_hash);
608 Self::new(child_seq, Some(parent_seq), self.position() + 1)
609 }
610
611 pub fn position(&self) -> u64 {
613 let mode = self.mode();
614 let (position_bits, parent_bits) = Self::bit_layout(mode);
615 let position_mask = (1u128 << position_bits) - 1;
616 ((self.0 >> (64 + parent_bits)) & position_mask) as u64
617 }
618
619 pub fn current_sequence_hash(&self) -> SequenceHash {
624 let mode = self.mode();
625 let (_, parent_bits) = Self::bit_layout(mode);
626 ((self.0 >> parent_bits) & 0xFFFF_FFFF_FFFF_FFFFu128) as u64
627 }
628
629 pub fn parent_hash_fragment(&self) -> u64 {
634 let mode = self.mode();
635 let (_, parent_bits) = Self::bit_layout(mode);
636 let parent_mask = (1u128 << parent_bits) - 1;
637 (self.0 & parent_mask) as u64
638 }
639
640 pub fn parent_fragment_for_child_position(&self, child_position: u64) -> u64 {
647 let child_mode = Self::select_mode(child_position);
648 let (_, child_parent_bits) = Self::bit_layout(child_mode);
649 let mask = (1u64 << child_parent_bits).wrapping_sub(1);
650 self.current_sequence_hash() & mask
651 }
652
653 pub fn mode(&self) -> u8 {
655 (self.0 >> 126) as u8
656 }
657
658 #[inline(always)]
660 pub fn as_u128(&self) -> u128 {
661 self.0
662 }
663
664 fn select_mode(position: u64) -> u8 {
666 if position < (1u64 << 8) {
667 0 } else if position < (1u64 << 16) {
669 1 } else {
671 2 }
673 }
674
675 fn bit_layout(mode: u8) -> (u32, u32) {
678 match mode {
679 0 => (8, 54), 1 => (16, 46), 2 => (24, 38), _ => unreachable!(
683 "Invalid mode {} in PositionalLineageHash; mode must be 0, 1, or 2",
684 mode
685 ),
686 }
687 }
688}
689
690impl PositionalLineageHash {
691 fn format_impl(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
692 let position = self.position();
693 let current_hash = self.current_sequence_hash();
694 let current_hash_b58 = bs58::encode(current_hash.to_be_bytes()).into_string();
695
696 if position == 0 {
697 write!(f, "{}:{}", position, current_hash_b58)
698 } else {
699 let parent_hash = self.parent_hash_fragment();
700 let parent_hash_b58 = bs58::encode(parent_hash.to_be_bytes()).into_string();
701 write!(f, "{}:{}:{}", position, current_hash_b58, parent_hash_b58)
702 }
703 }
704}
705
706impl std::fmt::Debug for PositionalLineageHash {
707 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
708 self.format_impl(f)
709 }
710}
711
712impl std::fmt::Display for PositionalLineageHash {
713 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
714 self.format_impl(f)
715 }
716}
717
718impl std::cmp::PartialOrd for PositionalLineageHash {
719 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
720 Some(self.cmp(other))
721 }
722}
723
724impl std::cmp::Ord for PositionalLineageHash {
725 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
728 self.position()
729 .cmp(&other.position())
730 .then_with(|| {
731 self.current_sequence_hash()
732 .cmp(&other.current_sequence_hash())
733 })
734 .then_with(|| self.0.cmp(&other.0))
735 }
736}
737
738#[derive(Debug, Clone, Dissolve, Default, Eq)]
742pub struct Tokens(Vec<Token>);
743
744impl AsRef<[Token]> for Tokens {
745 fn as_ref(&self) -> &[Token] {
746 &self.0
747 }
748}
749
750impl std::ops::Deref for Tokens {
751 type Target = [Token];
752
753 fn deref(&self) -> &Self::Target {
754 &self.0
755 }
756}
757
758impl std::borrow::Borrow<[Token]> for Tokens {
759 fn borrow(&self) -> &[Token] {
760 &self.0
761 }
762}
763
764impl From<Vec<Token>> for Tokens {
765 fn from(tokens: Vec<Token>) -> Self {
766 Tokens(tokens)
767 }
768}
769
770impl From<&[Token]> for Tokens {
771 fn from(tokens: &[Token]) -> Self {
772 Tokens(tokens.to_vec())
773 }
774}
775
776impl From<Vec<usize>> for Tokens {
777 fn from(tokens: Vec<usize>) -> Self {
778 Tokens(
779 tokens
780 .into_iter()
781 .map(|t| t.try_into().expect("Token ID exceeds u32::MAX"))
782 .collect(),
783 )
784 }
785}
786
787impl From<Vec<i32>> for Tokens {
788 fn from(tokens: Vec<i32>) -> Self {
790 Tokens(tokens.into_iter().map(|t| t as u32).collect())
791 }
792}
793
794impl From<&[i32]> for Tokens {
795 fn from(tokens: &[i32]) -> Self {
797 Tokens(tokens.iter().map(|&t| t as u32).collect())
798 }
799}
800
801impl From<Tokens> for Vec<Token> {
802 fn from(tokens: Tokens) -> Self {
803 tokens.0
804 }
805}
806
807impl PartialEq<Vec<Token>> for Tokens {
810 fn eq(&self, other: &Vec<Token>) -> bool {
811 self.0 == *other
812 }
813}
814
815impl PartialEq<Tokens> for Vec<Token> {
816 fn eq(&self, other: &Tokens) -> bool {
817 *self == other.0
818 }
819}
820
821impl PartialEq<[Token]> for Tokens {
822 fn eq(&self, other: &[Token]) -> bool {
823 self.0.as_slice() == other
824 }
825}
826
827impl PartialEq<Tokens> for &[Token] {
828 fn eq(&self, other: &Tokens) -> bool {
829 *self == other.0.as_slice()
830 }
831}
832
833impl PartialEq for Tokens {
834 fn eq(&self, other: &Self) -> bool {
835 self.0 == other.0
836 }
837}
838
839impl PartialEq<&[Token]> for Tokens {
842 fn eq(&self, other: &&[Token]) -> bool {
843 self.0.as_slice() == *other
844 }
845}
846
847impl Tokens {
848 fn with_capacity(capacity: usize) -> Self {
849 Tokens(Vec::with_capacity(capacity))
850 }
851
852 pub fn into_sequence(self, block_size: u32, salt_hash: Option<SaltHash>) -> TokenBlockSequence {
862 TokenBlockSequence::new(self, block_size, salt_hash)
863 }
864}
865
866#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
868pub enum TokenBlockError {
869 #[error("TokenBlock is full")]
871 Full,
872
873 #[error("TokenBlock is incomplete")]
875 Incomplete,
876
877 #[error("TokenBlock is empty")]
879 Empty,
880
881 #[error("TokenBlock has insufficient tokens")]
883 InsufficientTokens,
884
885 #[error(transparent)]
887 MmInfo(#[from] MmInfoError),
888
889 #[error("operation is not supported on a TokenBlockSequence with multimodal runs")]
891 MmRunsPresent,
892}
893
894#[derive(Debug, PartialEq)] pub struct PartialTokenBlock {
900 tokens: Tokens,
901 block_size: u32,
902 salt_hash: SaltHash,
903 parent_sequence_hash: Option<SequenceHash>,
904 position: usize, }
906
907impl PartialTokenBlock {
908 pub(crate) fn create_sequence_root(block_size: u32, salt_hash: SaltHash) -> Self {
915 Self {
916 tokens: Tokens::with_capacity(block_size as usize),
917 block_size,
918 salt_hash,
919 parent_sequence_hash: None, position: 0, }
922 }
923
924 pub(crate) fn push_tokens(&mut self, tokens: Tokens) -> Tokens {
937 let remaining_space = self.remaining();
938
939 if remaining_space == 0 {
940 return tokens; }
942
943 if tokens.0.len() <= remaining_space {
944 self.tokens.0.extend(tokens.0);
946 Tokens::default() } else {
948 let (to_add, remaining) = tokens.0.split_at(remaining_space);
950 self.tokens.0.extend_from_slice(to_add);
951 Tokens(remaining.to_vec()) }
953 }
954
955 #[cfg(test)]
962 pub(crate) fn push_token(&mut self, token: Token) -> Result<(), TokenBlockError> {
963 if self.tokens.0.len() >= self.block_size as usize {
964 return Err(TokenBlockError::Full);
965 }
966 self.tokens.0.push(token);
967 Ok(())
968 }
969
970 pub(crate) fn pop_tokens(&mut self, count: usize) -> Result<(), TokenBlockError> {
981 if self.tokens.0.len() < count {
982 return Err(TokenBlockError::InsufficientTokens);
983 }
984 self.tokens.0.truncate(self.tokens.0.len() - count);
985 Ok(())
986 }
987
988 pub fn commit(&mut self) -> Result<TokenBlock, TokenBlockError> {
1000 if self.tokens.0.len() != self.block_size as usize {
1001 return Err(TokenBlockError::Incomplete);
1003 }
1004
1005 let tokens = std::mem::replace(
1007 &mut self.tokens,
1008 Tokens::with_capacity(self.block_size as usize),
1009 );
1010
1011 let chunk = TokenBlockChunk::new(tokens, self.salt_hash);
1012 let block = TokenBlock::from_chunk(chunk, self.parent_sequence_hash, self.position);
1013
1014 self.parent_sequence_hash = Some(block.sequence_hash());
1016 self.position += 1; Ok(block)
1020 }
1021
1022 pub fn remaining(&self) -> usize {
1024 (self.block_size as usize).saturating_sub(self.tokens.0.len())
1026 }
1027
1028 pub fn len(&self) -> usize {
1030 self.tokens.0.len()
1031 }
1032
1033 pub fn is_empty(&self) -> bool {
1035 self.tokens.0.is_empty()
1036 }
1037
1038 pub fn tokens(&self) -> &Tokens {
1040 &self.tokens
1041 }
1042}
1043
1044impl std::ops::Deref for PartialTokenBlock {
1046 type Target = Tokens;
1047
1048 fn deref(&self) -> &Self::Target {
1049 &self.tokens
1050 }
1051}
1052
1053#[derive(Debug)] struct TokenBlockChunk {
1059 tokens: Tokens,
1060 salt_hash: SaltHash,
1061 block_hash: BlockHash,
1062}
1063
1064impl TokenBlockChunk {
1065 fn new(tokens: Tokens, salt_hash: SaltHash) -> Self {
1067 let block_hash = compute_block_hash(cast_slice(&tokens), salt_hash);
1068 Self {
1069 tokens,
1070 salt_hash,
1071 block_hash,
1072 }
1073 }
1074
1075 fn from_tokens(tokens: &[Token], salt_hash: SaltHash) -> Self {
1077 let block_hash = compute_block_hash(cast_slice(tokens), salt_hash);
1078 Self {
1079 tokens: tokens.into(), salt_hash,
1081 block_hash,
1082 }
1083 }
1084}
1085
1086#[derive(Debug, Clone, Default, PartialEq)] pub struct TokenBlock {
1092 tokens: Tokens,
1093 salt_hash: SaltHash,
1094 block_hash: BlockHash,
1095 sequence_hash: SequenceHash,
1096 parent_sequence_hash: Option<SequenceHash>,
1097 positional_sequence_hash: PositionalSequenceHash,
1098 positional_lineage_hash: PositionalLineageHash,
1099}
1100
1101impl TokenBlock {
1102 pub fn next_block(&self) -> PartialTokenBlock {
1106 PartialTokenBlock {
1107 tokens: Tokens::with_capacity(self.tokens.len()),
1108 block_size: self.tokens.len() as u32, salt_hash: self.salt_hash,
1110 parent_sequence_hash: Some(self.sequence_hash), position: self.position() as usize + 1, }
1113 }
1114
1115 fn from_chunk(
1119 chunk: TokenBlockChunk,
1120 parent_sequence_hash: Option<SequenceHash>,
1121 position: usize,
1122 ) -> Self {
1123 let sequence_hash = match parent_sequence_hash {
1124 Some(parent) => compute_next_sequence_hash(parent, chunk.block_hash),
1125 None => {
1126 chunk.block_hash
1128 }
1129 };
1130
1131 let positional_sequence_hash = PositionalSequenceHash::new(
1132 sequence_hash,
1133 position as u64,
1134 chunk.block_hash, );
1136
1137 let positional_lineage_hash =
1138 PositionalLineageHash::new(sequence_hash, parent_sequence_hash, position as u64);
1139
1140 Self {
1141 tokens: chunk.tokens,
1142 salt_hash: chunk.salt_hash,
1143 block_hash: chunk.block_hash,
1144 sequence_hash,
1145 parent_sequence_hash,
1146 positional_sequence_hash,
1147 positional_lineage_hash,
1148 }
1149 }
1150
1151 pub fn tokens(&self) -> &Tokens {
1153 &self.tokens
1154 }
1155
1156 pub fn salt_hash(&self) -> SaltHash {
1158 self.salt_hash
1159 }
1160
1161 pub fn block_hash(&self) -> BlockHash {
1163 self.block_hash
1164 }
1165
1166 pub fn sequence_hash(&self) -> SequenceHash {
1168 self.sequence_hash
1169 }
1170
1171 pub fn parent_sequence_hash(&self) -> Option<SequenceHash> {
1173 self.parent_sequence_hash
1174 }
1175
1176 pub fn block_size(&self) -> usize {
1178 self.tokens.0.len()
1179 }
1180
1181 pub fn positional_sequence_hash(&self) -> PositionalSequenceHash {
1183 self.positional_sequence_hash
1184 }
1185
1186 pub fn positional_lineage_hash(&self) -> PositionalLineageHash {
1188 self.positional_lineage_hash
1189 }
1190
1191 pub fn position(&self) -> u64 {
1193 self.positional_sequence_hash.position()
1194 }
1195}
1196
1197impl PositionalHash for PositionalSequenceHash {
1198 fn position(&self) -> u64 {
1199 self.position()
1200 }
1201}
1202
1203impl PositionalHash for PositionalLineageHash {
1204 fn position(&self) -> u64 {
1205 self.position()
1206 }
1207}
1208
1209#[derive(Debug, PartialEq)]
1224pub struct TokenBlockSequence {
1225 blocks: Vec<TokenBlock>,
1226 current_block: PartialTokenBlock,
1227 salt_hash: SaltHash,
1228 block_size: usize,
1229 mm_runs: Vec<TokenBlockMmInfo>,
1233}
1234
1235impl TokenBlockSequence {
1236 pub fn new(tokens: Tokens, block_size: u32, salt_hash: Option<SaltHash>) -> Self {
1251 assert!(block_size > 0, "block_size must be greater than 0");
1252 let salt_hash = salt_hash.unwrap_or_default();
1253 let (blocks, current_block) = Self::split_tokens(&tokens, block_size, salt_hash);
1254
1255 Self {
1256 blocks,
1257 current_block,
1258 salt_hash,
1259 block_size: block_size as usize,
1260 mm_runs: Vec::new(),
1261 }
1262 }
1263
1264 pub fn extend(&mut self, tokens: Tokens) -> Result<Option<Range<usize>>, TokenBlockError> {
1281 let start_block_index = self.blocks.len();
1282 let mut tokens_to_append = tokens;
1283
1284 while !tokens_to_append.is_empty() {
1285 let remaining_in_current = self.current_block.remaining();
1286
1287 if remaining_in_current == 0 {
1288 let new_block = self.commit_current()?;
1290 self.blocks.push(new_block);
1291 }
1293
1294 let available_tokens = tokens_to_append;
1296 tokens_to_append = self.current_block.push_tokens(available_tokens);
1297
1298 if self.current_block.remaining() == 0 {
1300 let new_block = self.commit_current()?;
1303 self.blocks.push(new_block);
1304 }
1305 }
1306
1307 let end_block_index = self.blocks.len();
1308 if start_block_index == end_block_index {
1309 Ok(None) } else {
1311 Ok(Some(start_block_index..end_block_index))
1312 }
1313 }
1314
1315 fn commit_current(&mut self) -> Result<TokenBlock, TokenBlockError> {
1320 if self.mm_runs.is_empty() {
1321 return self.current_block.commit();
1322 }
1323 if self.current_block.tokens.0.len() != self.current_block.block_size as usize {
1325 return Err(TokenBlockError::Incomplete);
1326 }
1327 let block_offset = self.blocks.len() * (self.current_block.block_size as usize);
1328 let tokens = std::mem::take(&mut self.current_block.tokens);
1329 let block_bytes = compute_block_bytes_with_mm(&tokens, block_offset, &self.mm_runs);
1330 let block_hash = compute_block_hash(&block_bytes, self.current_block.salt_hash);
1331 let chunk = TokenBlockChunk {
1332 tokens,
1333 salt_hash: self.current_block.salt_hash,
1334 block_hash,
1335 };
1336 let block = TokenBlock::from_chunk(
1337 chunk,
1338 self.current_block.parent_sequence_hash,
1339 self.current_block.position,
1340 );
1341 self.current_block.parent_sequence_hash = Some(block.sequence_hash());
1342 self.current_block.position += 1;
1343 Ok(block)
1344 }
1345
1346 pub fn append(&mut self, token: Token) -> Result<Option<usize>, TokenBlockError> {
1363 let before = self.blocks.len();
1364 self.extend(Tokens::from(vec![token]))?;
1365 Ok(if self.blocks.len() > before {
1366 Some(before)
1367 } else {
1368 None
1369 })
1370 }
1371
1372 pub fn truncate(&mut self, len: usize) -> Result<(), TokenBlockError> {
1391 if !self.mm_runs.is_empty() {
1392 return Err(TokenBlockError::MmRunsPresent);
1393 }
1394 let current_total_len = self.total_tokens();
1395 if len >= current_total_len {
1396 return Ok(()); }
1398
1399 let n = current_total_len - len; {
1403 let current_len = self.current_block.len();
1404 let block_size = self.current_block.block_size.max(1);
1406
1407 if n <= current_len {
1408 self.current_block.pop_tokens(n)?;
1410 } else {
1411 let tokens_to_pop_from_blocks = n - current_len;
1413
1414 let num_blocks_to_affect = tokens_to_pop_from_blocks.div_ceil(block_size as usize);
1416
1417 if num_blocks_to_affect > self.blocks.len() {
1419 debug_assert!(
1421 false,
1422 "Truncate calculation error: trying to pop too many blocks."
1423 );
1424 return Err(TokenBlockError::InsufficientTokens);
1425 }
1426
1427 let source_block_index = self.blocks.len() - num_blocks_to_affect;
1429
1430 let num_full_blocks_completely_popped = num_blocks_to_affect - 1;
1432 let num_tokens_to_pop_from_source_block = tokens_to_pop_from_blocks
1433 - num_full_blocks_completely_popped * block_size as usize;
1434 let num_tokens_to_keep_in_new_partial =
1435 (block_size as usize).saturating_sub(num_tokens_to_pop_from_source_block);
1436
1437 let new_partial_tokens = if num_tokens_to_keep_in_new_partial > 0 {
1439 self.blocks[source_block_index].tokens().as_ref()
1440 [..num_tokens_to_keep_in_new_partial]
1441 .to_vec()
1442 } else {
1443 Vec::new()
1444 };
1445
1446 self.blocks.truncate(source_block_index);
1448
1449 self.current_block.tokens = Tokens(new_partial_tokens);
1451 self.current_block.parent_sequence_hash =
1453 self.blocks.last().map(|b| b.sequence_hash());
1454 self.current_block.position = self.blocks.len();
1456 }
1458 }
1459 Ok(())
1460 }
1461
1462 pub fn unwind(&mut self, count: usize) -> Result<(), TokenBlockError> {
1476 let current_total_len = self.total_tokens();
1477 if count > current_total_len {
1478 return Err(TokenBlockError::InsufficientTokens);
1480 }
1481
1482 let len = current_total_len - count;
1484 self.truncate(len)
1485 }
1486
1487 pub fn reset(&mut self) {
1493 self.blocks.clear();
1494 self.current_block =
1495 PartialTokenBlock::create_sequence_root(self.block_size as u32, self.salt_hash);
1496 self.mm_runs.clear();
1497 }
1498
1499 pub fn pop(&mut self) -> Option<Token> {
1515 if !self.mm_runs.is_empty() {
1516 panic!(
1517 "TokenBlockSequence::pop is not supported on a sequence with multimodal runs; \
1518 use try_pop or reset before pop"
1519 );
1520 }
1521 let current_total_len = self.total_tokens();
1522 if current_total_len == 0 {
1523 return None;
1524 }
1525
1526 let last_token = if !self.current_block.tokens.is_empty() {
1529 *self
1531 .current_block
1532 .tokens
1533 .last()
1534 .expect("Current block checked for non-empty")
1535 } else {
1536 let last_block = self
1538 .blocks
1539 .last()
1540 .expect("Sequence is not empty but has no blocks and empty current block?");
1541 *last_block
1542 .tokens()
1543 .last()
1544 .expect("Last block cannot be empty")
1545 };
1546
1547 match self.truncate(current_total_len - 1) {
1550 Ok(_) => Some(last_token),
1551 Err(_) => {
1552 debug_assert!(
1555 false,
1556 "truncate failed unexpectedly after checking length in pop"
1557 );
1558 None
1559 }
1560 }
1561 }
1562
1563 pub fn try_pop(&mut self) -> Result<Option<Token>, TokenBlockError> {
1570 if !self.mm_runs.is_empty() {
1571 return Err(TokenBlockError::MmRunsPresent);
1572 }
1573 Ok(self.pop())
1574 }
1575
1576 pub fn blocks(&self) -> &[TokenBlock] {
1578 &self.blocks
1579 }
1580
1581 pub fn last_complete_block(&self) -> Option<&TokenBlock> {
1583 self.blocks.last()
1584 }
1585
1586 pub fn current_block(&self) -> &PartialTokenBlock {
1588 &self.current_block
1589 }
1590
1591 pub fn into_parts(self) -> (Vec<TokenBlock>, PartialTokenBlock) {
1593 (self.blocks, self.current_block)
1594 }
1595
1596 pub fn block_size(&self) -> usize {
1598 self.block_size
1599 }
1600
1601 pub fn salt_hash(&self) -> SaltHash {
1603 self.salt_hash
1604 }
1605
1606 pub fn total_tokens(&self) -> usize {
1609 let block_size = self.current_block.block_size as usize;
1610 (self.blocks.len() * block_size) + self.current_block.len()
1611 }
1612
1613 pub fn tokens_at(&self, range: Range<usize>) -> Tokens {
1615 let total = self.total_tokens();
1616
1617 if range.start > range.end || range.end > total {
1619 return Tokens::default();
1620 }
1621
1622 if range.is_empty() {
1624 return Tokens::default();
1625 }
1626
1627 let mut result = Vec::with_capacity(range.len());
1628
1629 for i in range {
1630 if i < self.blocks.len() * self.block_size {
1631 let block_index = i / self.block_size;
1633 let token_index = i % self.block_size;
1634 result.push(self.blocks[block_index].tokens()[token_index]);
1635 } else {
1636 let current_block_index = i - (self.blocks.len() * self.block_size);
1638 result.push(self.current_block.tokens()[current_block_index]);
1639 }
1640 }
1641
1642 Tokens::from(result)
1643 }
1644
1645 pub fn split_tokens(
1663 tokens: &[Token],
1664 block_size: u32,
1665 salt_hash: SaltHash,
1666 ) -> (Vec<TokenBlock>, PartialTokenBlock) {
1667 assert!(block_size > 0, "block_size must be greater than 0");
1668 let chunks: Vec<TokenBlockChunk> = tokens
1669 .as_ref()
1670 .chunks_exact(block_size as usize)
1671 .map(|chunk| TokenBlockChunk::from_tokens(chunk, salt_hash))
1672 .collect();
1673
1674 let mut result_blocks = Vec::with_capacity(chunks.len());
1675 let mut last_sequence_hash: Option<SequenceHash> = None;
1676
1677 for (position, chunk) in chunks.into_iter().enumerate() {
1679 let new_block = TokenBlock::from_chunk(chunk, last_sequence_hash, position);
1680 last_sequence_hash = Some(new_block.sequence_hash());
1681 result_blocks.push(new_block);
1682 }
1683
1684 let remainder = tokens
1686 .as_ref()
1687 .chunks_exact(block_size as usize)
1688 .remainder();
1689
1690 let next_position = result_blocks.len(); let mut partial_tokens = Tokens::with_capacity(block_size as usize);
1693 partial_tokens.0.extend_from_slice(remainder);
1694
1695 let current_block = PartialTokenBlock {
1696 tokens: partial_tokens,
1697 block_size,
1698 salt_hash,
1699 parent_sequence_hash: last_sequence_hash,
1701 position: next_position,
1702 };
1703
1704 (result_blocks, current_block)
1705 }
1706
1707 pub fn from_slice(tokens: &[Token], block_size: u32, salt_hash: Option<SaltHash>) -> Self {
1718 assert!(block_size > 0, "block_size must be greater than 0");
1719 let salt_hash = salt_hash.unwrap_or_default();
1720 let (blocks, current_block) = Self::split_tokens(tokens, block_size, salt_hash);
1721
1722 Self {
1723 blocks,
1724 current_block,
1725 salt_hash,
1726 block_size: block_size as usize,
1727 mm_runs: Vec::new(),
1728 }
1729 }
1730
1731 pub fn new_with_mm(
1747 tokens: Tokens,
1748 mm_info: &[TokenBlockMmInfo],
1749 block_size: u32,
1750 salt_hash: Option<SaltHash>,
1751 ) -> Result<Self, TokenBlockError> {
1752 assert!(block_size > 0, "block_size must be greater than 0");
1753 let salt_hash = salt_hash.unwrap_or_default();
1754 let validated =
1755 validate_and_sort_mm_info(mm_info, tokens.len()).map_err(TokenBlockError::MmInfo)?;
1756 let (blocks, current_block) =
1757 Self::split_tokens_with_mm(&tokens, &validated, block_size, salt_hash);
1758 Ok(Self {
1759 blocks,
1760 current_block,
1761 salt_hash,
1762 block_size: block_size as usize,
1763 mm_runs: validated,
1764 })
1765 }
1766
1767 pub fn split_tokens_with_mm(
1771 tokens: &[Token],
1772 mm_runs: &[TokenBlockMmInfo],
1773 block_size: u32,
1774 salt_hash: SaltHash,
1775 ) -> (Vec<TokenBlock>, PartialTokenBlock) {
1776 assert!(block_size > 0, "block_size must be greater than 0");
1777 let bs = block_size as usize;
1778 let n_complete = tokens.len() / bs;
1779 let mut result_blocks = Vec::with_capacity(n_complete);
1780 let mut last_seq_hash: Option<SequenceHash> = None;
1781 for i in 0..n_complete {
1782 let block_offset = i * bs;
1783 let block_tokens = &tokens[block_offset..block_offset + bs];
1784 let block_bytes = compute_block_bytes_with_mm(block_tokens, block_offset, mm_runs);
1785 let block_hash = compute_block_hash(&block_bytes, salt_hash);
1786 let chunk = TokenBlockChunk {
1787 tokens: block_tokens.into(),
1788 salt_hash,
1789 block_hash,
1790 };
1791 let new_block = TokenBlock::from_chunk(chunk, last_seq_hash, i);
1792 last_seq_hash = Some(new_block.sequence_hash());
1793 result_blocks.push(new_block);
1794 }
1795 let remainder = &tokens[n_complete * bs..];
1796 let current_block = PartialTokenBlock {
1797 tokens: remainder.into(),
1798 block_size,
1799 salt_hash,
1800 parent_sequence_hash: last_seq_hash,
1801 position: n_complete,
1802 };
1803 (result_blocks, current_block)
1804 }
1805
1806 pub fn mm_runs(&self) -> &[TokenBlockMmInfo] {
1808 &self.mm_runs
1809 }
1810
1811 pub fn push_token(&mut self, token: Token) -> Result<Option<usize>, TokenBlockError> {
1815 self.append(token)
1816 }
1817
1818 pub fn push_mm_run(
1826 &mut self,
1827 mm_hash: u64,
1828 length: usize,
1829 ) -> Result<Option<Range<usize>>, TokenBlockError> {
1830 if length == 0 {
1831 return Err(TokenBlockError::MmInfo(MmInfoError::EmptyRun));
1832 }
1833 let offset = self.total_tokens();
1834 self.mm_runs.push(TokenBlockMmInfo {
1835 mm_hash,
1836 offset,
1837 length,
1838 });
1839 let placeholders = Tokens::from(vec![0u32; length]);
1841 self.extend(placeholders)
1842 }
1843
1844 pub fn extend_with_mm(
1853 &mut self,
1854 tokens: &[Token],
1855 mm_info: &[TokenBlockMmInfo],
1856 ) -> Result<Option<Range<usize>>, TokenBlockError> {
1857 let validated =
1858 validate_and_sort_mm_info(mm_info, tokens.len()).map_err(TokenBlockError::MmInfo)?;
1859 let start_block = self.blocks.len();
1860 let mut cursor = 0usize;
1861 for run in &validated {
1862 if run.offset > cursor {
1863 let real = Tokens::from(tokens[cursor..run.offset].to_vec());
1864 self.extend(real)?;
1865 }
1866 self.push_mm_run(run.mm_hash, run.length)?;
1867 cursor = run.offset + run.length;
1868 }
1869 if cursor < tokens.len() {
1870 let real = Tokens::from(tokens[cursor..].to_vec());
1871 self.extend(real)?;
1872 }
1873 let end_block = self.blocks.len();
1874 if start_block == end_block {
1875 Ok(None)
1876 } else {
1877 Ok(Some(start_block..end_block))
1878 }
1879 }
1880}
1881
1882#[cfg(test)]
1883mod tests {
1884 use super::*;
1885 use bytemuck::cast_slice;
1886
1887 fn create_test_sequence(
1889 initial_tokens: &[Token],
1890 block_size: u32,
1891 salt_hash: Option<SaltHash>,
1892 ) -> TokenBlockSequence {
1893 TokenBlockSequence::new(Tokens::from(initial_tokens), block_size, salt_hash)
1894 }
1895
1896 const TEST_SALT_HASH: SaltHash = 1337;
1898 const HASH_1_4: BlockHash = 14643705804678351452; const SEQ_HASH_1_4: SequenceHash = HASH_1_4;
1900 const HASH_5_8: BlockHash = 16777012769546811212; const SEQ_HASH_5_8: SequenceHash = 4945711292740353085; const HASH_9_12: BlockHash = 483935686894639516; const SEQ_HASH_9_12: SequenceHash = 12583592247330656132; impl PartialTokenBlock {
1906 pub fn pop_token(&mut self) -> Result<(), TokenBlockError> {
1913 if self.tokens.0.is_empty() {
1914 return Err(TokenBlockError::Empty);
1915 }
1916 self.tokens.0.pop();
1917 Ok(())
1918 }
1919 }
1920
1921 #[test]
1922 fn test_validate_hash_constants() {
1923 let salt = TEST_SALT_HASH;
1924
1925 let tokens_1_4 = &[1u32, 2, 3, 4];
1927 let computed_hash_1_4 = compute_block_hash(cast_slice(tokens_1_4), salt);
1928 assert_eq!(computed_hash_1_4, HASH_1_4, "Mismatch for HASH_1_4");
1929 assert_eq!(computed_hash_1_4, SEQ_HASH_1_4, "Mismatch for SEQ_HASH_1_4");
1931
1932 let tokens_5_8 = &[5u32, 6, 7, 8];
1934 let computed_hash_5_8 = compute_block_hash(cast_slice(tokens_5_8), salt);
1935 assert_eq!(computed_hash_5_8, HASH_5_8, "Mismatch for HASH_5_8");
1936 let computed_seq_hash_5_8 = compute_next_sequence_hash(SEQ_HASH_1_4, HASH_5_8);
1938 assert_eq!(
1939 computed_seq_hash_5_8, SEQ_HASH_5_8,
1940 "Mismatch for SEQ_HASH_5_8"
1941 );
1942
1943 let tokens_9_12 = &[9u32, 10, 11, 12];
1945 let computed_hash_9_12 = compute_block_hash(cast_slice(tokens_9_12), salt);
1946 assert_eq!(computed_hash_9_12, HASH_9_12, "Mismatch for HASH_9_12");
1947 let computed_seq_hash_9_12 = compute_next_sequence_hash(SEQ_HASH_5_8, HASH_9_12);
1948 assert_eq!(
1949 computed_seq_hash_9_12, SEQ_HASH_9_12,
1950 "Mismatch for SEQ_HASH_9_12"
1951 );
1952 }
1953
1954 #[test]
1955 fn test_positional_sequence_hash_encoding_decoding() {
1956 let seq_hash_0 = 0x1234567890ABCDEF;
1958 let position_0 = 100;
1959 let lbh_0 = 0xFEDCBA9876543210;
1960 let psh_0 = PositionalSequenceHash::new(seq_hash_0, position_0, lbh_0);
1961
1962 assert_eq!(psh_0.mode(), 0, "Position 100 should use mode 0");
1963 assert_eq!(psh_0.sequence_hash(), seq_hash_0);
1964 assert_eq!(psh_0.position(), position_0);
1965 assert_eq!(
1967 psh_0.local_block_hash(),
1968 lbh_0 & ((1u64 << 54) - 1),
1969 "LBH should be truncated to 54 bits"
1970 );
1971
1972 let position_1 = 1000;
1974 let psh_1 = PositionalSequenceHash::new(seq_hash_0, position_1, lbh_0);
1975
1976 assert_eq!(psh_1.mode(), 1, "Position 1000 should use mode 1");
1977 assert_eq!(psh_1.sequence_hash(), seq_hash_0);
1978 assert_eq!(psh_1.position(), position_1);
1979 assert_eq!(
1981 psh_1.local_block_hash(),
1982 lbh_0 & ((1u64 << 46) - 1),
1983 "LBH should be truncated to 46 bits"
1984 );
1985
1986 let position_2 = 100_000;
1988 let psh_2 = PositionalSequenceHash::new(seq_hash_0, position_2, lbh_0);
1989
1990 assert_eq!(psh_2.mode(), 2, "Position 100,000 should use mode 2");
1991 assert_eq!(psh_2.sequence_hash(), seq_hash_0);
1992 assert_eq!(psh_2.position(), position_2);
1993 assert_eq!(
1995 psh_2.local_block_hash(),
1996 lbh_0 & ((1u64 << 38) - 1),
1997 "LBH should be truncated to 38 bits"
1998 );
1999
2000 let position_3 = 20_000_000;
2002 let psh_3 = PositionalSequenceHash::new(seq_hash_0, position_3, lbh_0);
2003
2004 assert_eq!(psh_3.mode(), 3, "Position 20,000,000 should use mode 3");
2005 assert_eq!(psh_3.sequence_hash(), seq_hash_0);
2006 assert_eq!(psh_3.position(), position_3);
2007 assert_eq!(
2009 psh_3.local_block_hash(),
2010 lbh_0 & ((1u64 << 31) - 1),
2011 "LBH should be truncated to 31 bits"
2012 );
2013
2014 let position_255 = 255;
2016 let psh_255 = PositionalSequenceHash::new(seq_hash_0, position_255, lbh_0);
2017 assert_eq!(psh_255.mode(), 0, "Position 255 should use mode 0");
2018 assert_eq!(psh_255.position(), position_255);
2019
2020 let position_256 = 256;
2021 let psh_256 = PositionalSequenceHash::new(seq_hash_0, position_256, lbh_0);
2022 assert_eq!(psh_256.mode(), 1, "Position 256 should use mode 1");
2023 assert_eq!(psh_256.position(), position_256);
2024 }
2025
2026 #[test]
2027 fn test_positional_lineage_hash() {
2028 let current_hash_0 = 0x1234567890ABCDEF;
2030 let parent_hash_0 = 0xFEDCBA9876543210;
2031 let position_0 = 100;
2032 let plh_0 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_0);
2033
2034 assert_eq!(plh_0.mode(), 0, "Position 100 should use mode 0");
2035 assert_eq!(plh_0.position(), position_0);
2036 assert_eq!(
2038 plh_0.current_sequence_hash(),
2039 current_hash_0,
2040 "Current sequence hash should be stored in full"
2041 );
2042 assert_eq!(
2043 plh_0.parent_hash_fragment(),
2044 parent_hash_0 & ((1u64 << 54) - 1),
2045 "Parent fragment should be truncated to 54 bits in mode 0"
2046 );
2047
2048 let position_1 = 1000;
2050 let plh_1 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_1);
2051
2052 assert_eq!(plh_1.mode(), 1, "Position 1000 should use mode 1");
2053 assert_eq!(plh_1.position(), position_1);
2054 assert_eq!(plh_1.current_sequence_hash(), current_hash_0);
2055 assert_eq!(
2056 plh_1.parent_hash_fragment(),
2057 parent_hash_0 & ((1u64 << 46) - 1),
2058 "Parent fragment should be truncated to 46 bits in mode 1"
2059 );
2060
2061 let position_2 = 100_000;
2063 let plh_2 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_2);
2064
2065 assert_eq!(plh_2.mode(), 2, "Position 100,000 should use mode 2");
2066 assert_eq!(plh_2.position(), position_2);
2067 assert_eq!(plh_2.current_sequence_hash(), current_hash_0);
2068 assert_eq!(
2069 plh_2.parent_hash_fragment(),
2070 parent_hash_0 & ((1u64 << 38) - 1),
2071 "Parent fragment should be truncated to 38 bits in mode 2"
2072 );
2073
2074 let position_255 = 255;
2076 let plh_255 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_255);
2077 assert_eq!(plh_255.mode(), 0, "Position 255 should use mode 0");
2078 assert_eq!(plh_255.position(), position_255);
2079
2080 let position_256 = 256;
2081 let plh_256 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_256);
2082 assert_eq!(plh_256.mode(), 1, "Position 256 should use mode 1");
2083 assert_eq!(plh_256.position(), position_256);
2084
2085 let position_65535 = 65535;
2086 let plh_65535 =
2087 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65535);
2088 assert_eq!(plh_65535.mode(), 1, "Position 65535 should use mode 1");
2089 assert_eq!(plh_65535.position(), position_65535);
2090
2091 let position_65536 = 65536;
2092 let plh_65536 =
2093 PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65536);
2094 assert_eq!(plh_65536.mode(), 2, "Position 65536 should use mode 2");
2095 assert_eq!(plh_65536.position(), position_65536);
2096
2097 let plh_root = PositionalLineageHash::new(current_hash_0, None, 0);
2099 assert_eq!(plh_root.mode(), 0);
2100 assert_eq!(plh_root.position(), 0);
2101 assert_eq!(
2102 plh_root.parent_hash_fragment(),
2103 0,
2104 "Root should have zero parent fragment"
2105 );
2106 assert_eq!(plh_root.current_sequence_hash(), current_hash_0);
2107 }
2108
2109 #[test]
2110 #[should_panic(expected = "Position 16777216 exceeds maximum supported value")]
2111 fn test_positional_lineage_hash_panic_on_large_position() {
2112 let current_hash = 0x1234567890ABCDEF;
2113 let parent_hash = 0xFEDCBA9876543210;
2114 let position = 1u64 << 24; let _ = PositionalLineageHash::new(current_hash, Some(parent_hash), position);
2116 }
2117
2118 #[test]
2119 fn test_positional_lineage_hash_mode_boundary_alignment() {
2120 let parent_hash = 0xFEDCBA9876543210;
2126 let current_hash_255 = 0x1234567890ABCDEF;
2127 let current_hash_256 = 0xABCDEF0123456789;
2128
2129 let plh_255 = PositionalLineageHash::new(current_hash_255, Some(parent_hash), 255);
2131 assert_eq!(plh_255.mode(), 0);
2132 assert_eq!(plh_255.current_sequence_hash(), current_hash_255);
2133
2134 let plh_256 = PositionalLineageHash::new(current_hash_256, Some(current_hash_255), 256);
2136 assert_eq!(plh_256.mode(), 1);
2137
2138 let mask_46 = (1u64 << 46) - 1;
2141 assert_eq!(
2142 plh_256.parent_hash_fragment(),
2143 current_hash_255 & mask_46,
2144 "Mode boundary: position 256's parent fragment matches position 255's current truncated to 46 bits"
2145 );
2146 assert_eq!(
2147 plh_256.parent_hash_fragment(),
2148 plh_255.parent_fragment_for_child_position(256),
2149 "parent_fragment_for_child_position helper should match"
2150 );
2151
2152 let current_hash_65535 = 0x1111222233334444;
2154 let current_hash_65536 = 0x5555666677778888;
2155
2156 let plh_65535 = PositionalLineageHash::new(current_hash_65535, Some(parent_hash), 65535);
2157 assert_eq!(plh_65535.mode(), 1);
2158
2159 let plh_65536 =
2160 PositionalLineageHash::new(current_hash_65536, Some(current_hash_65535), 65536);
2161 assert_eq!(plh_65536.mode(), 2);
2162
2163 let mask_38 = (1u64 << 38) - 1;
2165 assert_eq!(
2166 plh_65536.parent_hash_fragment(),
2167 current_hash_65535 & mask_38,
2168 "Mode boundary: position 65536's parent fragment matches position 65535's current truncated to 38 bits"
2169 );
2170 assert_eq!(
2171 plh_65536.parent_hash_fragment(),
2172 plh_65535.parent_fragment_for_child_position(65536),
2173 );
2174 }
2175
2176 #[test]
2177 fn test_positional_lineage_hash_extend() {
2178 let salt: SaltHash = 1337;
2181 let bh: [BlockHash; 3] = [
2182 compute_block_hash(cast_slice(&[1u32, 2, 3, 4]), salt),
2183 compute_block_hash(cast_slice(&[5u32, 6, 7, 8]), salt),
2184 compute_block_hash(cast_slice(&[9u32, 10, 11, 12]), salt),
2185 ];
2186
2187 let blk0 =
2189 TokenBlock::from_chunk(TokenBlockChunk::from_tokens(&[1, 2, 3, 4], salt), None, 0);
2190 let blk1 = TokenBlock::from_chunk(
2191 TokenBlockChunk::from_tokens(&[5, 6, 7, 8], salt),
2192 Some(blk0.sequence_hash()),
2193 1,
2194 );
2195 let blk2 = TokenBlock::from_chunk(
2196 TokenBlockChunk::from_tokens(&[9, 10, 11, 12], salt),
2197 Some(blk1.sequence_hash()),
2198 2,
2199 );
2200
2201 let plh0 = PositionalLineageHash::root(bh[0]);
2203 let plh1 = plh0.extend(bh[1]);
2204 let plh2 = plh1.extend(bh[2]);
2205
2206 assert_eq!(plh0.as_u128(), blk0.positional_lineage_hash().as_u128());
2207 assert_eq!(plh1.as_u128(), blk1.positional_lineage_hash().as_u128());
2208 assert_eq!(plh2.as_u128(), blk2.positional_lineage_hash().as_u128());
2209
2210 assert_eq!(
2212 plh1.current_sequence_hash(),
2213 compute_next_sequence_hash(plh0.current_sequence_hash(), bh[1]),
2214 );
2215
2216 let alt_salt: SaltHash = 4242;
2219 let alt_bh0 = compute_block_hash(cast_slice(&[1u32, 2, 3, 4]), alt_salt);
2220 assert_ne!(alt_bh0, bh[0]);
2221 let alt_plh0 = PositionalLineageHash::root(alt_bh0);
2222 let alt_plh1 = alt_plh0.extend(compute_block_hash(cast_slice(&[5u32, 6, 7, 8]), alt_salt));
2223 assert_ne!(alt_plh0.as_u128(), plh0.as_u128());
2224 assert_ne!(alt_plh1.as_u128(), plh1.as_u128());
2225 }
2226
2227 #[test]
2228 fn test_tokens_from() {
2229 let vec_u32: Vec<u32> = vec![1, 2, 3];
2230 let tokens_u32: Tokens = vec_u32.clone().into();
2231 assert_eq!(tokens_u32.0, vec_u32);
2232
2233 let slice_u32: &[u32] = &[4, 5];
2234 let tokens_slice_u32: Tokens = slice_u32.into();
2235 assert_eq!(tokens_slice_u32.0, vec![4, 5]);
2236
2237 let vec_i32: Vec<i32> = vec![-1, 0, 1]; let tokens_i32: Tokens = vec_i32.into();
2239 assert_eq!(tokens_i32.0, vec![u32::MAX, 0, 1]);
2240
2241 let slice_i32: &[i32] = &[100, 200];
2242 let tokens_slice_i32: Tokens = slice_i32.into();
2243 assert_eq!(tokens_slice_i32.0, vec![100, 200]);
2244
2245 let into_vec: Vec<u32> = tokens_slice_i32.into();
2246 assert_eq!(into_vec, vec![100, 200]);
2247 }
2248
2249 #[test]
2250 fn test_tokens_equality() {
2251 let tokens = Tokens::from(vec![1, 2, 3]);
2252 assert_eq!(tokens, vec![1, 2, 3]);
2253 assert_eq!(vec![1, 2, 3], tokens);
2254 assert_eq!(tokens, &[1, 2, 3][..]);
2255 assert_eq!(&[1, 2, 3][..], tokens);
2256 assert_eq!(tokens, Tokens::from(vec![1, 2, 3]));
2257 assert_ne!(tokens, Tokens::from(vec![1, 2, 4]));
2258 }
2259
2260 #[test]
2261 fn test_tokens_deref_asref() {
2262 let tokens = Tokens::from(vec![10, 20, 30]);
2263
2264 assert_eq!(tokens.len(), 3);
2266 assert_eq!(tokens[1], 20);
2267 let slice: &[Token] = &tokens;
2268 assert_eq!(slice, &[10, 20, 30]);
2269
2270 let as_ref_slice: &[Token] = tokens.as_ref();
2272 assert_eq!(as_ref_slice, &[10, 20, 30]);
2273
2274 let borrowed_slice: &[Token] = std::borrow::Borrow::borrow(&tokens);
2276 assert_eq!(borrowed_slice, &[10, 20, 30]);
2277 }
2278
2279 #[test]
2280 fn test_tokens_into_sequence() {
2281 let tokens = Tokens::from(vec![1, 2, 3, 4, 5]);
2282 let seq = tokens.into_sequence(3, Some(TEST_SALT_HASH));
2283 assert_eq!(seq.blocks().len(), 1);
2284 assert_eq!(seq.blocks[0].tokens().as_ref(), &[1, 2, 3]);
2285 assert_eq!(seq.current_block().tokens().as_ref(), &[4, 5]);
2286 assert_eq!(seq.salt_hash(), TEST_SALT_HASH);
2287 }
2288
2289 #[test]
2290 fn test_partial_block_ops() {
2291 let mut partial = PartialTokenBlock::create_sequence_root(3, TEST_SALT_HASH);
2292 assert_eq!(partial.len(), 0);
2293 assert_eq!(partial.remaining(), 3);
2294 assert!(partial.is_empty());
2295
2296 assert!(partial.push_token(1).is_ok());
2298 assert_eq!(partial.len(), 1);
2299 assert_eq!(partial.remaining(), 2);
2300 let remaining = partial.push_tokens(Tokens::from(vec![2, 3, 4]));
2301 assert_eq!(partial.len(), 3);
2302 assert_eq!(partial.remaining(), 0);
2303 assert_eq!(remaining.as_ref(), &[4]); assert_eq!(partial.tokens().as_ref(), &[1, 2, 3]);
2305
2306 assert_eq!(partial.push_token(5), Err(TokenBlockError::Full));
2308 let remaining_full = partial.push_tokens(Tokens::from(vec![5]));
2309 assert_eq!(remaining_full.as_ref(), &[5]);
2310
2311 assert!(partial.pop_token().is_ok());
2313 assert_eq!(partial.len(), 2);
2314 assert_eq!(partial.tokens().as_ref(), &[1, 2]);
2315 assert!(partial.pop_tokens(2).is_ok());
2316 assert!(partial.is_empty());
2317
2318 assert_eq!(partial.pop_token(), Err(TokenBlockError::Empty));
2320 assert_eq!(
2321 partial.pop_tokens(1),
2322 Err(TokenBlockError::InsufficientTokens)
2323 );
2324
2325 assert!(partial.push_token(10).is_ok());
2327 assert_eq!(partial.commit(), Err(TokenBlockError::Incomplete));
2328
2329 assert!(partial.push_token(11).is_ok());
2331 assert!(partial.push_token(12).is_ok());
2332 assert_eq!(partial.len(), 3);
2333 let commit_result = partial.commit();
2334 assert!(commit_result.is_ok());
2335 let committed_block = commit_result.unwrap();
2336 assert_eq!(committed_block.tokens().as_ref(), &[10, 11, 12]);
2337
2338 assert!(partial.is_empty());
2340 assert_eq!(
2341 partial.parent_sequence_hash,
2342 Some(committed_block.sequence_hash())
2343 );
2344 assert_eq!(partial.block_size, 3);
2345 }
2346
2347 #[test]
2348 fn test_token_block_creation_and_hashes() {
2349 let salt = TEST_SALT_HASH;
2350 let tokens1 = Tokens::from(vec![1, 2, 3, 4]);
2351 let chunk1 = TokenBlockChunk::new(tokens1.clone(), salt);
2352 let block1 = TokenBlock::from_chunk(chunk1, None, 0);
2353
2354 assert_eq!(block1.tokens(), &tokens1);
2355 assert_eq!(block1.salt_hash(), salt);
2356 assert_eq!(block1.parent_sequence_hash(), None);
2357 assert_eq!(block1.block_hash(), HASH_1_4);
2358 assert_eq!(block1.sequence_hash(), SEQ_HASH_1_4); assert_eq!(block1.position(), 0); let plh1 = block1.positional_lineage_hash();
2363 assert_eq!(plh1.position(), 0);
2364 assert_eq!(plh1.parent_hash_fragment(), 0); assert_eq!(plh1.current_sequence_hash(), SEQ_HASH_1_4);
2366
2367 let tokens2 = Tokens::from(vec![5, 6, 7, 8]);
2368 let chunk2 = TokenBlockChunk::new(tokens2.clone(), salt);
2369 let block2 = TokenBlock::from_chunk(chunk2, block1.parent_sequence_hash(), 1); assert_ne!(block2.sequence_hash(), SEQ_HASH_5_8);
2372
2373 let chunk2_correct = TokenBlockChunk::new(tokens2.clone(), salt);
2374 let block2_correct =
2375 TokenBlock::from_chunk(chunk2_correct, Some(block1.sequence_hash()), 1);
2376
2377 assert_eq!(block2_correct.tokens(), &tokens2);
2378 assert_eq!(block2_correct.salt_hash(), salt);
2379 assert_eq!(
2380 block2_correct.parent_sequence_hash(),
2381 Some(block1.sequence_hash())
2382 );
2383 assert_eq!(block2_correct.block_hash(), HASH_5_8);
2384 assert_eq!(block2_correct.sequence_hash(), SEQ_HASH_5_8);
2385 assert_eq!(block2_correct.position(), 1); let plh2 = block2_correct.positional_lineage_hash();
2389 assert_eq!(plh2.position(), 1);
2390 assert_eq!(
2391 plh2.parent_hash_fragment(),
2392 SEQ_HASH_1_4 & ((1u64 << 54) - 1)
2393 ); assert_eq!(plh2.current_sequence_hash(), SEQ_HASH_5_8);
2395 }
2396
2397 #[test]
2398 fn test_new_sequence() {
2399 let seq_empty = create_test_sequence(&[], 4, Some(TEST_SALT_HASH));
2401 assert!(seq_empty.blocks().is_empty());
2402 assert!(seq_empty.current_block().is_empty());
2403 assert_eq!(seq_empty.total_tokens(), 0);
2404 assert_eq!(seq_empty.salt_hash(), TEST_SALT_HASH);
2405 assert_eq!(seq_empty.current_block().parent_sequence_hash, None);
2406
2407 let seq_partial = create_test_sequence(&[1, 2], 4, Some(TEST_SALT_HASH));
2409 assert!(seq_partial.blocks().is_empty());
2410 assert_eq!(seq_partial.current_block().tokens().as_ref(), &[1, 2]);
2411 assert_eq!(seq_partial.total_tokens(), 2);
2412 assert_eq!(seq_partial.current_block().parent_sequence_hash, None);
2413
2414 let seq_one_block = create_test_sequence(&[1, 2, 3, 4], 4, Some(TEST_SALT_HASH));
2416 assert_eq!(seq_one_block.blocks().len(), 1);
2417 assert!(seq_one_block.current_block().is_empty());
2418 assert_eq!(seq_one_block.total_tokens(), 4);
2419 assert_eq!(seq_one_block.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2420 assert_eq!(seq_one_block.blocks[0].sequence_hash(), SEQ_HASH_1_4);
2421 assert_eq!(
2422 seq_one_block.current_block().parent_sequence_hash,
2423 Some(SEQ_HASH_1_4)
2424 );
2425
2426 let seq_multi = create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9], 4, Some(TEST_SALT_HASH));
2428 assert_eq!(seq_multi.blocks().len(), 2);
2429 assert_eq!(seq_multi.current_block().tokens().as_ref(), &[9]);
2430 assert_eq!(seq_multi.total_tokens(), 9);
2431 assert_eq!(seq_multi.blocks[0].sequence_hash(), SEQ_HASH_1_4);
2432 assert_eq!(seq_multi.blocks[1].sequence_hash(), SEQ_HASH_5_8);
2433 assert_eq!(
2434 seq_multi.current_block().parent_sequence_hash,
2435 Some(SEQ_HASH_5_8)
2436 );
2437
2438 assert_eq!(seq_multi.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]); assert_eq!(seq_multi.tokens_at(4..8).as_ref(), &[5, 6, 7, 8]); assert_eq!(seq_multi.tokens_at(8..9).as_ref(), &[9]); assert_eq!(seq_multi.tokens_at(2..6).as_ref(), &[3, 4, 5, 6]); assert_eq!(seq_multi.tokens_at(6..9).as_ref(), &[7, 8, 9]); assert_eq!(seq_multi.tokens_at(5..5).as_ref(), &[0u32; 0]); assert_eq!(seq_multi.tokens_at(10..15).as_ref(), &[0u32; 0]); let seq_no_salt = create_test_sequence(&[1, 2, 3, 4, 5], 4, None);
2449 assert_eq!(seq_no_salt.salt_hash(), 0);
2450 assert_eq!(seq_no_salt.blocks().len(), 1);
2451 assert_ne!(seq_no_salt.blocks[0].block_hash(), HASH_1_4); assert_eq!(seq_no_salt.current_block().tokens().as_ref(), &[5]);
2453 }
2454
2455 #[test]
2456 #[should_panic]
2457 fn test_new_sequence_zero_block_size() {
2458 let _ = create_test_sequence(&[1], 0, None);
2459 }
2460
2461 #[test]
2462 fn test_append_single_token() {
2463 let mut sequence =
2464 create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4, Some(TEST_SALT_HASH));
2465 assert_eq!(sequence.blocks().len(), 2);
2466 assert_eq!(sequence.current_block().tokens.len(), 2);
2467 assert_eq!(sequence.current_block().tokens, vec![9, 10]);
2468 assert_eq!(
2469 sequence.current_block().parent_sequence_hash,
2470 Some(SEQ_HASH_5_8)
2471 );
2472
2473 let completed_idx = sequence.append(11).unwrap();
2475 assert_eq!(completed_idx, None);
2476 assert_eq!(sequence.blocks().len(), 2);
2477 assert_eq!(sequence.current_block().tokens.as_ref(), &[9, 10, 11]);
2478
2479 let completed_idx = sequence.append(12).unwrap();
2482 assert_eq!(completed_idx, Some(2));
2483 assert_eq!(sequence.blocks().len(), 3);
2484 assert_eq!(sequence.current_block.tokens.as_ref(), &[0u32; 0]);
2485 assert_eq!(sequence.current_block.remaining(), 4);
2486 assert_eq!(
2487 sequence.current_block().parent_sequence_hash,
2488 Some(SEQ_HASH_9_12)
2489 ); let completed_idx_13 = sequence.append(13).unwrap();
2493 assert_eq!(completed_idx_13, None);
2494 assert_eq!(sequence.blocks().len(), 3);
2495 assert_eq!(sequence.blocks[2].tokens().as_ref(), &[9, 10, 11, 12]);
2496 assert_eq!(sequence.blocks[2].sequence_hash(), SEQ_HASH_9_12);
2497 assert_eq!(sequence.current_block.tokens.as_ref(), &[13]); assert_eq!(sequence.current_block.remaining(), 3);
2499 assert_eq!(
2500 sequence.current_block.parent_sequence_hash,
2501 Some(SEQ_HASH_9_12)
2502 ); }
2504
2505 #[test]
2506 fn test_extend() {
2507 let block_size = 4;
2508 let salt_hash = Some(TEST_SALT_HASH);
2509
2510 let mut seq1 = create_test_sequence(&[], block_size, salt_hash);
2512 let tokens1 = Tokens::from(vec![1, 2]);
2513 let completed1 = seq1.extend(tokens1).unwrap();
2514 assert_eq!(completed1, None); assert_eq!(seq1.blocks.len(), 0);
2516 assert_eq!(seq1.current_block.tokens.as_ref(), &[1, 2]);
2517 assert_eq!(seq1.current_block.remaining(), 2);
2518 assert_eq!(seq1.current_block.parent_sequence_hash, None); let mut seq2 = create_test_sequence(&[], block_size, salt_hash);
2522 let tokens2 = Tokens::from(vec![1, 2, 3, 4]);
2523 let completed2 = seq2.extend(tokens2).unwrap();
2524 assert_eq!(completed2, Some(0..1));
2525 assert_eq!(seq2.blocks.len(), 1);
2526 assert_eq!(seq2.current_block.tokens.as_ref(), &[0u32; 0]); assert_eq!(seq2.current_block.remaining(), 4);
2528 assert_eq!(seq2.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); let mut seq3 = create_test_sequence(&[], block_size, salt_hash);
2532 let tokens3 = Tokens::from(vec![1, 2, 3, 4, 5, 6]);
2533 let completed3 = seq3.extend(tokens3).unwrap();
2534 assert_eq!(completed3, Some(0..1)); assert_eq!(seq3.blocks.len(), 1);
2536 assert_eq!(seq3.current_block.tokens.as_ref(), &[5, 6]); assert_eq!(seq3.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2538 assert_eq!(seq3.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2539 assert_eq!(seq3.current_block.remaining(), 2);
2540
2541 let mut seq4 = create_test_sequence(&[], block_size, salt_hash);
2543 let tokens4 = Tokens::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
2544 let completed4 = seq4.extend(tokens4).unwrap();
2545 assert_eq!(completed4, Some(0..2)); assert_eq!(seq4.blocks.len(), 2); assert_eq!(seq4.current_block.tokens.as_ref(), &[0u32; 0]);
2548 assert_eq!(seq4.current_block.remaining(), 4);
2549 assert_eq!(seq4.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2550 assert_eq!(seq4.blocks[0].sequence_hash(), SEQ_HASH_1_4);
2551 assert_eq!(seq4.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8)); let mut seq5 = create_test_sequence(&[], block_size, salt_hash);
2555 let tokens5a = Tokens::from(vec![1, 2]);
2556 let completed5a = seq5.extend(tokens5a).unwrap();
2557 assert_eq!(completed5a, None);
2558 assert_eq!(seq5.blocks.len(), 0);
2559 assert_eq!(seq5.current_block.tokens.as_ref(), &[1, 2]);
2560
2561 let tokens5b = Tokens::from(vec![3, 4, 5]);
2562 let completed5b = seq5.extend(tokens5b).unwrap();
2563 assert_eq!(completed5b, Some(0..1)); assert_eq!(seq5.blocks.len(), 1);
2565 assert_eq!(seq5.current_block.tokens.as_ref(), &[5]);
2566 assert_eq!(seq5.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2567 assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2568 assert_eq!(seq5.current_block.remaining(), 3);
2569
2570 let tokens5c = Tokens::from(vec![6, 7, 8, 9, 10]);
2571 let completed5c = seq5.extend(tokens5c).unwrap();
2572 assert_eq!(completed5c, Some(1..2)); assert_eq!(seq5.blocks.len(), 2);
2574 assert_eq!(seq5.current_block.tokens.as_ref(), &[9, 10]);
2575 assert_eq!(seq5.blocks[1].tokens().as_ref(), &[5, 6, 7, 8]);
2576 assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2577 assert_eq!(seq5.current_block.remaining(), 2);
2578
2579 let mut seq6 = create_test_sequence(&[1], block_size, salt_hash);
2581 let completed6 = seq6.extend(Tokens::default()).unwrap();
2582 assert_eq!(completed6, None);
2583 assert_eq!(seq6.blocks.len(), 0);
2584 assert_eq!(seq6.current_block.tokens.as_ref(), &[1]);
2585 assert_eq!(seq6.total_tokens(), 1);
2586
2587 let mut seq7 = create_test_sequence(&[1, 2], block_size, salt_hash);
2589 let tokens7 = Tokens::from(vec![3, 4]);
2590 let completed7 = seq7.extend(tokens7).unwrap();
2591 assert_eq!(completed7, Some(0..1)); assert_eq!(seq7.blocks.len(), 1);
2593 assert_eq!(seq7.current_block.tokens.as_ref(), &[0u32; 0]); assert_eq!(seq7.current_block.remaining(), 4);
2595 assert_eq!(seq7.total_tokens(), 4);
2596 assert_eq!(seq7.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); assert_eq!(seq7.tokens_at(0..2).as_ref(), &[1, 2]);
2600 assert_eq!(seq7.tokens_at(1..3).as_ref(), &[2, 3]);
2601 assert_eq!(seq7.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
2602 assert_eq!(seq7.tokens_at(2..2).as_ref(), &[0u32; 0]); }
2604
2605 #[test]
2606 fn test_truncate() {
2607 let block_size = 4;
2608 let salt_hash = Some(TEST_SALT_HASH);
2609 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq1 = create_test_sequence(initial_tokens, block_size, salt_hash);
2613 assert!(seq1.truncate(9).is_ok());
2614 assert_eq!(seq1.total_tokens(), 9);
2615 assert_eq!(seq1.blocks().len(), 2);
2616 assert_eq!(seq1.current_block().tokens.as_ref(), &[9]);
2617 assert_eq!(
2618 seq1.current_block().parent_sequence_hash,
2619 Some(SEQ_HASH_5_8)
2620 );
2621
2622 let mut seq2 = create_test_sequence(initial_tokens, block_size, salt_hash);
2624 assert!(seq2.truncate(8).is_ok());
2625 assert_eq!(seq2.total_tokens(), 8);
2626 assert_eq!(seq2.blocks().len(), 2);
2627 assert!(seq2.current_block().tokens.is_empty());
2628 assert_eq!(
2629 seq2.current_block().parent_sequence_hash,
2630 Some(SEQ_HASH_5_8)
2631 );
2632
2633 let mut seq3 = create_test_sequence(initial_tokens, block_size, salt_hash);
2635 assert!(seq3.truncate(7).is_ok());
2636 assert_eq!(seq3.total_tokens(), 7);
2637 assert_eq!(seq3.blocks().len(), 1); assert_eq!(seq3.current_block().tokens.as_ref(), &[5, 6, 7]); assert_eq!(
2640 seq3.current_block().parent_sequence_hash,
2641 Some(SEQ_HASH_1_4)
2642 ); assert_eq!(seq3.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2644
2645 let mut seq4 = create_test_sequence(initial_tokens, block_size, salt_hash);
2647 assert!(seq4.truncate(4).is_ok());
2648 assert_eq!(seq4.total_tokens(), 4);
2649 assert_eq!(seq4.blocks().len(), 1); assert!(seq4.current_block().tokens.is_empty()); assert_eq!(
2652 seq4.current_block().parent_sequence_hash,
2653 Some(SEQ_HASH_1_4)
2654 );
2655 assert_eq!(seq4.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2656
2657 let mut seq5 = create_test_sequence(initial_tokens, block_size, salt_hash);
2659 assert!(seq5.truncate(3).is_ok());
2660 assert_eq!(seq5.total_tokens(), 3);
2661 assert!(seq5.blocks().is_empty()); assert_eq!(seq5.current_block().tokens.as_ref(), &[1, 2, 3]); assert_eq!(seq5.current_block().parent_sequence_hash, None); let mut seq6 = create_test_sequence(initial_tokens, block_size, salt_hash);
2667 assert!(seq6.truncate(0).is_ok());
2668 assert_eq!(seq6.total_tokens(), 0);
2669 assert!(seq6.blocks().is_empty());
2670 assert!(seq6.current_block().tokens.is_empty());
2671 assert_eq!(seq6.current_block().parent_sequence_hash, None);
2672
2673 let mut seq7 = create_test_sequence(initial_tokens, block_size, salt_hash);
2675 let original_state = (seq7.blocks.clone(), seq7.current_block.tokens.clone()); assert!(seq7.truncate(11).is_ok()); assert_eq!(seq7.total_tokens(), 10);
2678 assert_eq!(seq7.blocks, original_state.0);
2679 assert_eq!(seq7.current_block.tokens, original_state.1);
2680
2681 let mut seq8 = create_test_sequence(initial_tokens, block_size, salt_hash);
2683 let original_state = (seq8.blocks.clone(), seq8.current_block.tokens.clone());
2684 assert!(seq8.truncate(10).is_ok());
2685 assert_eq!(seq8.total_tokens(), 10);
2686 assert_eq!(seq8.blocks, original_state.0);
2687 assert_eq!(seq8.current_block.tokens, original_state.1);
2688
2689 let mut seq9 = create_test_sequence(&[], block_size, salt_hash);
2691 assert!(seq9.truncate(0).is_ok());
2692 assert_eq!(seq9.total_tokens(), 0);
2693 assert!(seq9.blocks().is_empty());
2694 assert!(seq9.current_block().tokens.is_empty());
2695
2696 let tokens10 = &[1, 2, 3, 4, 5, 6, 7, 8]; let mut seq10 = create_test_sequence(tokens10, block_size, salt_hash);
2699 assert_eq!(seq10.total_tokens(), 8);
2700 assert!(seq10.current_block().is_empty());
2701 assert!(seq10.truncate(4).is_ok()); assert_eq!(seq10.total_tokens(), 4);
2703 assert_eq!(seq10.blocks().len(), 1);
2704 assert!(seq10.current_block().tokens.is_empty());
2705 assert_eq!(
2706 seq10.current_block().parent_sequence_hash,
2707 Some(SEQ_HASH_1_4)
2708 );
2709
2710 let tokens11 = &[1, 2, 3, 4, 5, 6, 7, 8]; let mut seq11 = create_test_sequence(tokens11, block_size, salt_hash);
2713 assert!(seq11.truncate(3).is_ok()); assert_eq!(seq11.total_tokens(), 3);
2715 assert!(seq11.blocks().is_empty());
2716 assert_eq!(seq11.current_block().tokens.as_ref(), &[1, 2, 3]); assert_eq!(seq11.current_block().parent_sequence_hash, None);
2718 }
2719
2720 #[test]
2721 fn test_unwind() {
2722 let block_size = 4;
2723 let salt_hash = Some(TEST_SALT_HASH);
2724 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2728 assert!(seq.unwind(0).is_ok());
2729 assert_eq!(seq.total_tokens(), 10);
2730
2731 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2733 assert!(seq.unwind(1).is_ok());
2734 assert_eq!(seq.total_tokens(), 9);
2735 assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2736
2737 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2739 assert!(seq.unwind(3).is_ok());
2740 assert_eq!(seq.total_tokens(), 7);
2741 assert_eq!(seq.blocks.len(), 1);
2742 assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2743
2744 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2746 assert!(seq.unwind(10).is_ok());
2747 assert_eq!(seq.total_tokens(), 0);
2748 assert!(seq.blocks.is_empty());
2749 assert!(seq.current_block.is_empty());
2750
2751 let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2753 assert_eq!(seq.unwind(11), Err(TokenBlockError::InsufficientTokens));
2754 assert_eq!(seq.total_tokens(), 10); let mut seq_empty = create_test_sequence(&[], block_size, salt_hash);
2758 assert_eq!(
2759 seq_empty.unwind(1),
2760 Err(TokenBlockError::InsufficientTokens)
2761 );
2762 }
2763
2764 #[test]
2765 fn test_pop() {
2766 let block_size = 4;
2767 let salt_hash = Some(TEST_SALT_HASH);
2768 let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2771
2772 assert_eq!(seq.pop(), Some(10));
2774 assert_eq!(seq.total_tokens(), 9);
2775 assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2776 assert_eq!(seq.blocks.len(), 2);
2777
2778 assert_eq!(seq.pop(), Some(9));
2780 assert_eq!(seq.total_tokens(), 8);
2781 assert!(seq.current_block.is_empty());
2782 assert_eq!(seq.blocks.len(), 2);
2783 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2784
2785 assert_eq!(seq.pop(), Some(8));
2787 assert_eq!(seq.total_tokens(), 7);
2788 assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2789 assert_eq!(seq.blocks.len(), 1);
2790 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2791
2792 assert_eq!(seq.pop(), Some(7));
2794 assert_eq!(seq.pop(), Some(6));
2795 assert_eq!(seq.pop(), Some(5));
2796 assert_eq!(seq.total_tokens(), 4);
2797 assert!(seq.current_block.is_empty());
2798 assert_eq!(seq.blocks.len(), 1);
2799 assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2800
2801 assert_eq!(seq.pop(), Some(4));
2803 assert_eq!(seq.total_tokens(), 3);
2804 assert_eq!(seq.current_block.tokens.as_ref(), &[1, 2, 3]);
2805 assert!(seq.blocks.is_empty());
2806 assert_eq!(seq.current_block.parent_sequence_hash, None);
2807
2808 assert_eq!(seq.pop(), Some(3));
2810 assert_eq!(seq.pop(), Some(2));
2811 assert_eq!(seq.pop(), Some(1));
2812 assert_eq!(seq.total_tokens(), 0);
2813 assert!(seq.current_block.is_empty());
2814 assert!(seq.blocks.is_empty());
2815
2816 assert_eq!(seq.pop(), None);
2818 assert_eq!(seq.total_tokens(), 0);
2819 }
2820
2821 #[test]
2822 fn test_total_tokens() {
2823 let block_size = 3;
2824 let salt_hash = Some(TEST_SALT_HASH);
2825
2826 let mut seq = create_test_sequence(&[], block_size, salt_hash);
2827 assert_eq!(seq.total_tokens(), 0);
2828
2829 seq.extend(Tokens::from(vec![1, 2])).unwrap();
2830 assert_eq!(seq.total_tokens(), 2);
2831
2832 seq.append(3).unwrap(); assert_eq!(seq.total_tokens(), 3);
2834
2835 seq.extend(Tokens::from(vec![4, 5, 6, 7])).unwrap(); assert_eq!(seq.total_tokens(), 7);
2837
2838 seq.pop().unwrap(); assert_eq!(seq.total_tokens(), 6);
2840
2841 seq.truncate(4).unwrap(); assert_eq!(seq.total_tokens(), 4);
2843
2844 seq.unwind(2).unwrap(); assert_eq!(seq.total_tokens(), 2);
2846 }
2847
2848 #[test]
2849 fn test_push_tokens_partial_block() {
2850 let mut partial = PartialTokenBlock::create_sequence_root(4, 1337);
2851
2852 let tokens = Tokens(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2853
2854 let remaining = partial.push_tokens(tokens);
2855 assert_eq!(partial.tokens.len(), 4);
2856 assert_eq!(remaining.len(), 6);
2857 }
2858
2859 #[test]
2864 fn test_positional_radix_tree_basic_operations() {
2865 use crate::PositionalRadixTree;
2866
2867 let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2869 assert!(tree.is_empty());
2870 assert_eq!(tree.len(), 0);
2871
2872 let tree2: PositionalRadixTree<i32> = PositionalRadixTree::default();
2874 assert!(tree2.is_empty());
2875
2876 let psh1 = PositionalSequenceHash::new(0x1234, 0, 0xABCD);
2878 let psh2 = PositionalSequenceHash::new(0x5678, 0, 0xEF01);
2879 let psh3 = PositionalSequenceHash::new(0x9ABC, 1, 0x2345);
2880
2881 tree.prefix(&psh1).insert(psh1, "value1".to_string());
2882 assert!(!tree.is_empty());
2883 assert_eq!(tree.len(), 1);
2884
2885 tree.prefix(&psh2).insert(psh2, "value2".to_string());
2886 assert_eq!(tree.len(), 2);
2887
2888 tree.prefix(&psh3).insert(psh3, "value3".to_string());
2889 assert_eq!(tree.len(), 3);
2890
2891 assert_eq!(
2893 tree.prefix(&psh1).get(&psh1).map(|v| v.clone()),
2894 Some("value1".to_string())
2895 );
2896 }
2897
2898 #[test]
2899 fn test_positional_radix_tree_with_lineage_hash() {
2900 use crate::PositionalRadixTree;
2901
2902 let tree: PositionalRadixTree<u32, PositionalLineageHash> = PositionalRadixTree::new();
2904 assert!(tree.is_empty());
2905
2906 let plh1 = PositionalLineageHash::new(0x1234, None, 0);
2907 let plh2 = PositionalLineageHash::new(0x5678, Some(0x1234), 1);
2908
2909 tree.prefix(&plh1).insert(plh1, 100);
2910 tree.prefix(&plh2).insert(plh2, 200);
2911
2912 assert_eq!(tree.len(), 2);
2913 assert_eq!(tree.prefix(&plh1).get(&plh1).map(|v| *v), Some(100));
2914 assert_eq!(tree.prefix(&plh2).get(&plh2).map(|v| *v), Some(200));
2915 }
2916
2917 #[test]
2918 fn test_positional_radix_tree_position_lookup() {
2919 use crate::PositionalRadixTree;
2920
2921 let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2922
2923 let psh0 = PositionalSequenceHash::new(0x1111, 0, 0xAAAA);
2925 let psh1 = PositionalSequenceHash::new(0x2222, 1, 0xBBBB);
2926 let psh2 = PositionalSequenceHash::new(0x3333, 2, 0xCCCC);
2927
2928 tree.prefix(&psh0).insert(psh0, "pos0".to_string());
2929 tree.prefix(&psh1).insert(psh1, "pos1".to_string());
2930 tree.prefix(&psh2).insert(psh2, "pos2".to_string());
2931
2932 assert!(tree.position(0).is_some());
2934 assert!(tree.position(1).is_some());
2935 assert!(tree.position(2).is_some());
2936 assert!(tree.position(3).is_none()); let pos0_map = tree.position(0).unwrap();
2940 assert_eq!(pos0_map.len(), 1);
2941 }
2942
2943 #[test]
2946 fn test_positional_sequence_hash_mode_2_and_3() {
2947 let position_mode2 = 100_000u64;
2949 let seq_hash = 0x1234567890ABCDEF;
2950 let block_hash = 0xFEDCBA9876543210;
2951
2952 let psh_mode2 = PositionalSequenceHash::new(seq_hash, position_mode2, block_hash);
2953 assert_eq!(psh_mode2.mode(), 2, "Position 100,000 should use mode 2");
2954 assert_eq!(psh_mode2.position(), position_mode2);
2955 assert_eq!(psh_mode2.sequence_hash(), seq_hash);
2956 assert_eq!(
2958 psh_mode2.local_block_hash(),
2959 block_hash & ((1u64 << 38) - 1)
2960 );
2961
2962 let position_mode3 = 100_000_000u64;
2964 let psh_mode3 = PositionalSequenceHash::new(seq_hash, position_mode3, block_hash);
2965 assert_eq!(
2966 psh_mode3.mode(),
2967 3,
2968 "Position 100,000,000 should use mode 3"
2969 );
2970 assert_eq!(psh_mode3.position(), position_mode3);
2971 assert_eq!(psh_mode3.sequence_hash(), seq_hash);
2972 assert_eq!(
2974 psh_mode3.local_block_hash(),
2975 block_hash & ((1u64 << 31) - 1)
2976 );
2977 }
2978
2979 #[test]
2980 fn test_positional_sequence_hash_as_u128() {
2981 let psh = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2982 let raw = psh.as_u128();
2983
2984 assert_eq!(raw & 0xFFFF_FFFF_FFFF_FFFF, 0x1234);
2986 assert!(raw > 0); let psh2 = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2990 assert_eq!(psh.as_u128(), psh2.as_u128());
2991 }
2992
2993 #[test]
2994 fn test_positional_sequence_hash_debug() {
2995 let psh = PositionalSequenceHash::new(0x1234567890ABCDEF, 42, 0xFEDCBA98);
2996 let debug_str = format!("{:?}", psh);
2997
2998 assert!(debug_str.contains("PositionalSequenceHash"));
3000 assert!(debug_str.contains("sequence_hash"));
3001 assert!(debug_str.contains("local_block_hash"));
3002 assert!(debug_str.contains("position"));
3003 }
3004
3005 #[test]
3008 fn test_positional_lineage_hash_debug_and_display() {
3009 let plh_root = PositionalLineageHash::new(0x123456789ABCDEF0, None, 0);
3011 let debug_root = format!("{:?}", plh_root);
3012 let display_root = format!("{}", plh_root);
3013
3014 assert!(debug_root.starts_with("0:"));
3016 assert!(display_root.starts_with("0:"));
3017 assert_eq!(debug_root.matches(':').count(), 1);
3019 assert_eq!(display_root.matches(':').count(), 1);
3020
3021 let plh_child = PositionalLineageHash::new(0xABCDEF0123456789, Some(0x123456789ABCDEF0), 5);
3023 let debug_child = format!("{:?}", plh_child);
3024 let display_child = format!("{}", plh_child);
3025
3026 assert!(debug_child.starts_with("5:"));
3028 assert!(display_child.starts_with("5:"));
3029 assert_eq!(debug_child.matches(':').count(), 2);
3031 assert_eq!(display_child.matches(':').count(), 2);
3032 }
3033
3034 #[test]
3035 fn test_positional_lineage_hash_as_u128() {
3036 let plh = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
3037 let raw = plh.as_u128();
3038
3039 assert!(raw > 0);
3040
3041 let plh2 = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
3043 assert_eq!(plh.as_u128(), plh2.as_u128());
3044
3045 let plh3 = PositionalLineageHash::new(0x1234, Some(0x5678), 11);
3047 assert_ne!(plh.as_u128(), plh3.as_u128());
3048 }
3049
3050 #[test]
3051 fn test_positional_lineage_hash_ord_by_position_then_current_fragment() {
3052 let at_5_low = PositionalLineageHash::new(0x10, Some(0x1111), 5);
3053 let at_5_high = PositionalLineageHash::new(0x20, Some(0x1111), 5);
3054 assert!(
3055 at_5_low.current_sequence_hash() < at_5_high.current_sequence_hash(),
3056 "test assumes distinct current sequence hashes at the same position"
3057 );
3058 assert!(at_5_low < at_5_high);
3059 assert!(at_5_high > at_5_low);
3060
3061 let at_3 = PositionalLineageHash::new(0x99, Some(0x2222), 3);
3062 assert!(at_3 < at_5_low);
3063 assert!(at_5_high < PositionalLineageHash::new(0x01, Some(0x3333), 6));
3064 }
3065
3066 #[test]
3067 fn test_positional_lineage_hash_ord_tiebreak_parent_via_packed_u128() {
3068 let same_pos_same_current = PositionalLineageHash::new(0x1234, Some(0x100), 10);
3069 let same_pos_same_current_other_parent =
3070 PositionalLineageHash::new(0x1234, Some(0x200), 10);
3071 assert_eq!(same_pos_same_current.position(), 10);
3072 assert_eq!(
3073 same_pos_same_current.position(),
3074 same_pos_same_current_other_parent.position()
3075 );
3076 assert_eq!(
3077 same_pos_same_current.current_sequence_hash(),
3078 same_pos_same_current_other_parent.current_sequence_hash()
3079 );
3080 assert_ne!(same_pos_same_current, same_pos_same_current_other_parent);
3081 assert_ne!(
3082 same_pos_same_current.cmp(&same_pos_same_current_other_parent),
3083 std::cmp::Ordering::Equal
3084 );
3085 }
3086
3087 #[test]
3088 fn test_positional_lineage_hash_vec_sort_matches_ord() {
3089 let a = PositionalLineageHash::new(0x30, None, 0);
3090 let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
3091 let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
3092 let mut v = vec![b, a, c];
3093 v.sort();
3094 assert_eq!(v, vec![a, b, c]);
3095 }
3096
3097 #[test]
3098 fn test_positional_lineage_hash_itertools_sorted() {
3099 use itertools::Itertools;
3100
3101 let a = PositionalLineageHash::new(0x30, None, 0);
3102 let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
3103 let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
3104 let sorted: Vec<_> = vec![b, a, c].into_iter().sorted().collect();
3105 assert_eq!(sorted, vec![a, b, c]);
3106 }
3107
3108 #[test]
3111 fn test_tokens_from_vec_usize() {
3112 let usize_vec: Vec<usize> = vec![1, 2, 3, 4, 5];
3113 let tokens = Tokens::from(usize_vec);
3114
3115 assert_eq!(tokens.as_ref(), &[1u32, 2, 3, 4, 5]);
3116 assert_eq!(tokens.len(), 5);
3117 }
3118
3119 #[test]
3120 fn test_tokens_partial_eq_slice_ref() {
3121 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
3122 let slice: &[Token] = &[1, 2, 3, 4];
3123
3124 assert!(tokens == slice);
3126
3127 let different_slice: &[Token] = &[1, 2, 3, 5];
3128 assert!(tokens != different_slice);
3129 }
3130
3131 #[test]
3134 fn test_token_block_accessors() {
3135 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
3136 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
3137
3138 let block = &seq.blocks()[0];
3139
3140 assert_eq!(block.block_size(), 4);
3142
3143 let psh = block.positional_sequence_hash();
3145 assert_eq!(psh.position(), 0);
3146
3147 let plh = block.positional_lineage_hash();
3149 assert_eq!(plh.position(), 0);
3150 assert_eq!(plh.parent_hash_fragment(), 0); }
3152
3153 #[test]
3154 fn test_positional_hash_trait_impls() {
3155 use crate::PositionalHash;
3156
3157 let psh = PositionalSequenceHash::new(0x1234, 42, 0xABCD);
3159 assert_eq!(PositionalHash::position(&psh), 42);
3160
3161 let plh = PositionalLineageHash::new(0x1234, None, 99);
3163 assert_eq!(PositionalHash::position(&plh), 99);
3164 }
3165
3166 #[test]
3169 fn test_sequence_pop_from_full_block() {
3170 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
3172 let mut seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
3173
3174 assert!(seq.current_block().is_empty());
3176 assert_eq!(seq.blocks().len(), 2);
3177 assert_eq!(seq.total_tokens(), 8);
3178
3179 let popped = seq.pop();
3181 assert_eq!(popped, Some(8));
3182 assert_eq!(seq.total_tokens(), 7);
3183 assert_eq!(seq.blocks().len(), 1);
3184 assert_eq!(seq.current_block().tokens.as_ref(), &[5, 6, 7]);
3185 }
3186
3187 #[test]
3188 #[allow(clippy::reversed_empty_ranges)] fn test_sequence_tokens_at_edge_cases() {
3190 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
3191 let seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
3192
3193 assert!(seq.tokens_at(3..2).is_empty());
3195
3196 assert!(seq.tokens_at(0..10).is_empty());
3198
3199 assert_eq!(seq.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
3201 assert_eq!(seq.tokens_at(4..5).as_ref(), &[5]);
3202 }
3203
3204 #[test]
3205 fn test_sequence_next_block() {
3206 let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
3207 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
3208
3209 let block = &seq.blocks()[0];
3210 let next_partial = block.next_block();
3211
3212 assert!(next_partial.is_empty());
3214 assert_eq!(next_partial.remaining(), 4);
3215 assert_eq!(
3216 next_partial.parent_sequence_hash,
3217 Some(block.sequence_hash())
3218 );
3219 assert_eq!(next_partial.position, 1);
3220 }
3221
3222 #[test]
3223 fn test_sequence_reset() {
3224 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9]);
3225 let mut seq = TokenBlockSequence::new(tokens, 4, Some(1337));
3226
3227 assert_eq!(seq.blocks().len(), 2);
3228 assert_eq!(seq.total_tokens(), 9);
3229
3230 seq.reset();
3231
3232 assert!(seq.blocks().is_empty());
3233 assert!(seq.current_block().is_empty());
3234 assert_eq!(seq.total_tokens(), 0);
3235 assert_eq!(seq.current_block().parent_sequence_hash, None);
3236 }
3237
3238 #[test]
3239 fn test_sequence_into_parts() {
3240 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
3241 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
3242
3243 let (blocks, partial) = seq.into_parts();
3244
3245 assert_eq!(blocks.len(), 1);
3246 assert_eq!(partial.tokens.as_ref(), &[5]);
3247 }
3248
3249 #[test]
3250 fn test_sequence_last_complete_block() {
3251 let seq_empty = TokenBlockSequence::new(Tokens::default(), 4, None);
3253 assert!(seq_empty.last_complete_block().is_none());
3254
3255 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
3257 let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
3258 let last = seq.last_complete_block();
3259 assert!(last.is_some());
3260 assert_eq!(last.unwrap().tokens().as_ref(), &[5, 6, 7, 8]);
3261 }
3262
3263 #[test]
3264 fn test_positional_hashes_msgpack_roundtrip() {
3265 let psh = PositionalSequenceHash::new(0xDEAD_BEEF_CAFE_BABE, 12345, 0x0123_4567_89AB_CDEF);
3266 let bytes = rmp_serde::to_vec(&psh).expect("psh serialize");
3267 let decoded: PositionalSequenceHash =
3268 rmp_serde::from_slice(&bytes).expect("psh deserialize");
3269 assert_eq!(psh, decoded);
3270 assert_eq!(psh.as_u128(), decoded.as_u128());
3271
3272 let plh =
3273 PositionalLineageHash::new(0x1111_2222_3333_4444, Some(0x5555_6666_7777_8888), 256);
3274 let bytes = rmp_serde::to_vec(&plh).expect("plh serialize");
3275 let decoded: PositionalLineageHash =
3276 rmp_serde::from_slice(&bytes).expect("plh deserialize");
3277 assert_eq!(plh, decoded);
3278 assert_eq!(plh.as_u128(), decoded.as_u128());
3279
3280 let vec = vec![psh, PositionalSequenceHash::default(), psh];
3282 let bytes = rmp_serde::to_vec(&vec).expect("vec serialize");
3283 let decoded: Vec<PositionalSequenceHash> =
3284 rmp_serde::from_slice(&bytes).expect("vec deserialize");
3285 assert_eq!(vec, decoded);
3286 }
3287
3288 #[test]
3289 fn test_positional_hashes_json_roundtrip() {
3290 let psh = PositionalSequenceHash::new(0xAAAA_BBBB_CCCC_DDDD, 7, 0xEEEE_FFFF_0000_1111);
3292 let json = serde_json::to_string(&psh).expect("psh json serialize");
3293 let decoded: PositionalSequenceHash =
3294 serde_json::from_str(&json).expect("psh json deserialize");
3295 assert_eq!(psh, decoded);
3296
3297 let plh = PositionalLineageHash::new(0x1234_5678, Some(0xABCD_EF01), 42);
3298 let json = serde_json::to_string(&plh).expect("plh json serialize");
3299 let decoded: PositionalLineageHash =
3300 serde_json::from_str(&json).expect("plh json deserialize");
3301 assert_eq!(plh, decoded);
3302 }
3303
3304 #[test]
3310 fn tokens_mm_zero_mm_equivalence() {
3311 let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9]);
3312 let baseline = TokenBlockSequence::new(tokens.clone(), 4, Some(TEST_SALT_HASH));
3313 let mm = TokenBlockSequence::new_with_mm(tokens, &[], 4, Some(TEST_SALT_HASH))
3314 .expect("validation should pass for empty mm_info");
3315
3316 assert_eq!(mm.blocks().len(), baseline.blocks().len());
3317 for (a, b) in mm.blocks().iter().zip(baseline.blocks().iter()) {
3318 assert_eq!(a.salt_hash(), b.salt_hash());
3319 assert_eq!(a.block_hash(), b.block_hash());
3320 assert_eq!(a.sequence_hash(), b.sequence_hash());
3321 assert_eq!(a.parent_sequence_hash(), b.parent_sequence_hash());
3322 assert_eq!(a.positional_lineage_hash(), b.positional_lineage_hash());
3323 }
3324 assert!(mm.mm_runs().is_empty());
3325 }
3326
3327 #[test]
3330 fn tokens_mm_byte_layout() {
3331 let tokens = Tokens::from(vec![100u32, 101, 102, 103, 0, 0, 106, 107]);
3335 let mm = vec![TokenBlockMmInfo {
3336 mm_hash: 0xAAu64,
3337 offset: 4,
3338 length: 2,
3339 }];
3340 let salt = TEST_SALT_HASH;
3341
3342 let mut expected = Vec::new();
3344 for &t in &[100u32, 101, 102, 103] {
3345 expected.push(MM_SLOT_TAG_TOKEN);
3346 expected.extend_from_slice(&t.to_le_bytes());
3347 expected.extend_from_slice(&0u64.to_le_bytes());
3348 }
3349 for run_off in 0u32..2 {
3350 expected.push(MM_SLOT_TAG_PLACEHOLDER);
3351 expected.extend_from_slice(&run_off.to_le_bytes());
3352 expected.extend_from_slice(&0xAAu64.to_le_bytes());
3353 }
3354 for &t in &[106u32, 107] {
3355 expected.push(MM_SLOT_TAG_TOKEN);
3356 expected.extend_from_slice(&t.to_le_bytes());
3357 expected.extend_from_slice(&0u64.to_le_bytes());
3358 }
3359 assert_eq!(expected.len(), 8 * 13);
3360
3361 let helper_bytes = compute_block_bytes_with_mm(&tokens, 0, &mm);
3363 assert_eq!(helper_bytes, expected, "MM-aware byte buffer mismatch");
3364
3365 let expected_block_hash = compute_block_hash(&expected, salt);
3367 let seq = TokenBlockSequence::new_with_mm(tokens, &mm, 8, Some(salt)).unwrap();
3368 assert_eq!(seq.blocks().len(), 1);
3369 assert_eq!(seq.blocks()[0].block_hash(), expected_block_hash);
3370 }
3371
3372 #[test]
3377 fn tokens_mm_no_position_collision() {
3378 let salt = TEST_SALT_HASH;
3379 let tokens_a = Tokens::from(vec![0u32, 0xAB]);
3381 let mm_a = vec![TokenBlockMmInfo {
3382 mm_hash: 0x1122_3344_5566_7788,
3383 offset: 0,
3384 length: 1,
3385 }];
3386 let tokens_b = Tokens::from(vec![0xAB, 0u32]);
3388 let mm_b = vec![TokenBlockMmInfo {
3389 mm_hash: 0x1122_3344_5566_7788,
3390 offset: 1,
3391 length: 1,
3392 }];
3393
3394 let bytes_a = compute_block_bytes_with_mm(&tokens_a, 0, &mm_a);
3395 let bytes_b = compute_block_bytes_with_mm(&tokens_b, 0, &mm_b);
3396 assert_ne!(
3397 bytes_a, bytes_b,
3398 "tagged framing must distinguish slot kinds at different positions"
3399 );
3400
3401 let seq_a = TokenBlockSequence::new_with_mm(tokens_a, &mm_a, 2, Some(salt)).unwrap();
3402 let seq_b = TokenBlockSequence::new_with_mm(tokens_b, &mm_b, 2, Some(salt)).unwrap();
3403 assert_ne!(
3404 seq_a.blocks()[0].block_hash(),
3405 seq_b.blocks()[0].block_hash()
3406 );
3407 }
3408
3409 #[test]
3413 fn tokens_mm_legacy_fallback_per_block() {
3414 let block_size: u32 = 4;
3415 let salt = Some(TEST_SALT_HASH);
3416 let raw = vec![1u32, 2, 3, 4, 5, 6, 7, 8];
3417 let mm = vec![TokenBlockMmInfo {
3419 mm_hash: 0xAB,
3420 offset: 4,
3421 length: 3,
3422 }];
3423 let seq_mm =
3424 TokenBlockSequence::new_with_mm(Tokens::from(raw.clone()), &mm, block_size, salt)
3425 .unwrap();
3426 let seq_plain = TokenBlockSequence::new(Tokens::from(raw), block_size, salt);
3427
3428 assert_eq!(
3430 seq_mm.blocks()[0].block_hash(),
3431 seq_plain.blocks()[0].block_hash()
3432 );
3433 assert_eq!(
3434 seq_mm.blocks()[0].sequence_hash(),
3435 seq_plain.blocks()[0].sequence_hash()
3436 );
3437 assert_ne!(
3439 seq_mm.blocks()[1].block_hash(),
3440 seq_plain.blocks()[1].block_hash()
3441 );
3442 }
3443
3444 #[test]
3447 fn tokens_mm_validation_overflow() {
3448 let bad = vec![TokenBlockMmInfo {
3449 mm_hash: 1,
3450 offset: usize::MAX - 2,
3451 length: 10,
3452 }];
3453 let err = validate_and_sort_mm_info(&bad, usize::MAX).expect_err("must reject overflow");
3454 assert!(matches!(err, MmInfoError::OffsetOverflow { .. }));
3455 }
3456
3457 #[test]
3460 fn tokens_mm_streaming_equals_batch() {
3461 let tokens = Tokens::from(vec![1u32, 2, 3, 0, 0, 0, 0, 6, 7]);
3463 let mm = vec![TokenBlockMmInfo {
3464 mm_hash: 0xAAu64,
3465 offset: 3,
3466 length: 4,
3467 }];
3468 let salt = Some(TEST_SALT_HASH);
3469 let batch = TokenBlockSequence::new_with_mm(tokens, &mm, 4, salt).unwrap();
3470
3471 let mut streamed = TokenBlockSequence::new(Tokens::default(), 4, salt);
3472 streamed.push_token(1).unwrap();
3473 streamed.push_token(2).unwrap();
3474 streamed.push_token(3).unwrap();
3475 streamed.push_mm_run(0xAAu64, 4).unwrap();
3476 streamed.push_token(6).unwrap();
3477 streamed.push_token(7).unwrap();
3478
3479 assert_eq!(streamed.blocks().len(), batch.blocks().len());
3480 for (a, b) in streamed.blocks().iter().zip(batch.blocks().iter()) {
3481 assert_eq!(a.block_hash(), b.block_hash(), "block_hash mismatch");
3482 assert_eq!(a.sequence_hash(), b.sequence_hash(), "seq_hash mismatch");
3483 assert_eq!(
3484 a.positional_lineage_hash(),
3485 b.positional_lineage_hash(),
3486 "PLH mismatch"
3487 );
3488 }
3489 assert_eq!(streamed.mm_runs(), batch.mm_runs());
3490 }
3491
3492 #[test]
3496 fn tokens_mm_multi_block_run() {
3497 let block_size: u32 = 8;
3498 let bs = block_size as usize;
3499 let mut tokens_a: Vec<Token> = vec![0u32; 2 * bs]; tokens_a.extend_from_slice(&[0u32, 0, 0, 0, 100, 101, 102, 103]); let tokens_a = Tokens::from(tokens_a);
3504 let mm = vec![TokenBlockMmInfo {
3505 mm_hash: 0xCAFEBABEu64,
3506 offset: 0,
3507 length: 20,
3508 }];
3509 let seq_a = TokenBlockSequence::new_with_mm(
3510 tokens_a.clone(),
3511 &mm,
3512 block_size,
3513 Some(TEST_SALT_HASH),
3514 )
3515 .unwrap();
3516 assert_eq!(seq_a.blocks().len(), 3);
3517
3518 let bh0 = seq_a.blocks()[0].block_hash();
3521 let bh1 = seq_a.blocks()[1].block_hash();
3522 assert_ne!(
3523 bh0, bh1,
3524 "fully-placeholder blocks at different run_offsets must hash differently"
3525 );
3526
3527 let seq_b =
3529 TokenBlockSequence::new_with_mm(tokens_a, &mm, block_size, Some(TEST_SALT_HASH))
3530 .unwrap();
3531 assert_eq!(
3532 seq_a.blocks()[0].block_hash(),
3533 seq_b.blocks()[0].block_hash()
3534 );
3535 assert_eq!(
3536 seq_a.blocks()[1].block_hash(),
3537 seq_b.blocks()[1].block_hash()
3538 );
3539 assert_eq!(
3540 seq_a.blocks()[2].block_hash(),
3541 seq_b.blocks()[2].block_hash()
3542 );
3543
3544 let mm_diff = vec![TokenBlockMmInfo {
3546 mm_hash: 0xDEADBEEFu64,
3547 offset: 0,
3548 length: 20,
3549 }];
3550 let mut tokens_c: Vec<Token> = vec![0u32; 2 * bs];
3551 tokens_c.extend_from_slice(&[0u32, 0, 0, 0, 100, 101, 102, 103]);
3552 let seq_c = TokenBlockSequence::new_with_mm(
3553 Tokens::from(tokens_c),
3554 &mm_diff,
3555 block_size,
3556 Some(TEST_SALT_HASH),
3557 )
3558 .unwrap();
3559 assert_ne!(
3560 seq_a.blocks()[0].block_hash(),
3561 seq_c.blocks()[0].block_hash()
3562 );
3563 }
3564
3565 #[test]
3567 fn tokens_mm_validation() {
3568 let tokens = Tokens::from(vec![0u32; 32]);
3569 let overlap = vec![
3571 TokenBlockMmInfo {
3572 mm_hash: 1,
3573 offset: 0,
3574 length: 5,
3575 },
3576 TokenBlockMmInfo {
3577 mm_hash: 2,
3578 offset: 4,
3579 length: 5,
3580 },
3581 ];
3582 let err = TokenBlockSequence::new_with_mm(tokens.clone(), &overlap, 4, None).unwrap_err();
3583 assert!(matches!(
3584 err,
3585 TokenBlockError::MmInfo(MmInfoError::Overlapping { .. })
3586 ));
3587
3588 let oob = vec![TokenBlockMmInfo {
3590 mm_hash: 1,
3591 offset: 30,
3592 length: 10,
3593 }];
3594 let err = TokenBlockSequence::new_with_mm(tokens.clone(), &oob, 4, None).unwrap_err();
3595 assert!(matches!(
3596 err,
3597 TokenBlockError::MmInfo(MmInfoError::OutOfBounds { .. })
3598 ));
3599
3600 let empty = vec![TokenBlockMmInfo {
3602 mm_hash: 1,
3603 offset: 0,
3604 length: 0,
3605 }];
3606 let err = TokenBlockSequence::new_with_mm(tokens, &empty, 4, None).unwrap_err();
3607 assert!(matches!(
3608 err,
3609 TokenBlockError::MmInfo(MmInfoError::EmptyRun)
3610 ));
3611
3612 let mut seq = TokenBlockSequence::new(Tokens::default(), 4, None);
3614 let err = seq.push_mm_run(0xAB, 0).unwrap_err();
3615 assert!(matches!(
3616 err,
3617 TokenBlockError::MmInfo(MmInfoError::EmptyRun)
3618 ));
3619
3620 let mut seq = TokenBlockSequence::new(Tokens::from(vec![1u32, 2, 3]), 4, None);
3622 seq.push_mm_run(0xAB, 2).unwrap();
3623 assert!(matches!(
3624 seq.truncate(0).unwrap_err(),
3625 TokenBlockError::MmRunsPresent
3626 ));
3627 assert!(matches!(
3628 seq.unwind(1).unwrap_err(),
3629 TokenBlockError::MmRunsPresent
3630 ));
3631 }
3632}