dynamo_tokens/
lib.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4#![deny(missing_docs)]
5
6//! Types and utilities for handling sequences of tokens, including block creation and hashing.
7
8use bytemuck::cast_slice;
9use derive_getters::Dissolve;
10use std::ops::Range;
11
12pub mod blocks;
13mod radix;
14pub use radix::PositionalRadixTree;
15
16/// Trait for hashes that include position information.
17pub trait PositionalHash {
18    /// Returns the position associated with the hash.
19    fn position(&self) -> u64;
20}
21
22/// A token is represented as a 32-bit unsigned integer.
23pub type Token = u32;
24
25/// A salt used for hashing, represented as a vector of bytes.
26/// This might encode model architecture, weights, PEFT info, etc.
27pub type Salt = Vec<u8>;
28
29/// A 64-bit hash of the salt, computed using [`compute_hash_v2`] with a seed of 0.
30/// Used as the initial seed for subsequent block hashes.
31pub type SaltHash = u64;
32
33/// A 64-bit hash computed only from the tokens within a single block.
34/// It uses [`compute_hash_v2`] with the [`SaltHash`] as the seed.
35pub type BlockHash = u64;
36
37/// A 64-bit sequence-aware hash.
38/// It combines the previous block's [`SequenceHash`] (or the [`SaltHash`] for the first block)
39/// with the current block's [`BlockHash`] using [`compute_hash_v2`] and the [`SaltHash`] as the seed.
40pub type SequenceHash = u64;
41
42/// Computes a hash of the data using the given seed.
43pub fn compute_hash_v2(data: &[u8], seed: u64) -> u64 {
44    xxhash_rust::xxh3::xxh3_64_with_seed(data, seed)
45}
46
47/// Custom serde codec that encodes a `u128` as a 16-byte big-endian byte sequence.
48///
49/// MessagePack (`rmp-serde`) has no native 128-bit integer type, so the default
50/// `u128` derive does not roundtrip reliably. Encoding as raw bytes is supported
51/// uniformly across msgpack, JSON, CBOR, etc.
52mod serde_bytes_u128 {
53    use serde::{Deserializer, Serializer};
54
55    pub fn serialize<S>(val: &u128, serializer: S) -> Result<S::Ok, S::Error>
56    where
57        S: Serializer,
58    {
59        serializer.serialize_bytes(&val.to_be_bytes())
60    }
61
62    pub fn deserialize<'de, D>(deserializer: D) -> Result<u128, D::Error>
63    where
64        D: Deserializer<'de>,
65    {
66        use serde::de::{self, SeqAccess, Visitor};
67        use std::fmt;
68
69        struct V;
70        impl<'de> Visitor<'de> for V {
71            type Value = [u8; 16];
72
73            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
74                f.write_str("16 bytes (msgpack bin) or a sequence of 16 u8 values")
75            }
76
77            fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<[u8; 16], E> {
78                v.try_into()
79                    .map_err(|_| E::invalid_length(v.len(), &"16 bytes"))
80            }
81
82            fn visit_borrowed_bytes<E: de::Error>(self, v: &'de [u8]) -> Result<[u8; 16], E> {
83                self.visit_bytes(v)
84            }
85
86            fn visit_byte_buf<E: de::Error>(self, v: Vec<u8>) -> Result<[u8; 16], E> {
87                self.visit_bytes(&v)
88            }
89
90            fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<[u8; 16], A::Error> {
91                let mut arr = [0u8; 16];
92                for (i, slot) in arr.iter_mut().enumerate() {
93                    *slot = seq
94                        .next_element()?
95                        .ok_or_else(|| de::Error::invalid_length(i, &"16 u8 elements"))?;
96                }
97                Ok(arr)
98            }
99        }
100
101        let arr = deserializer.deserialize_bytes(V)?;
102        Ok(u128::from_be_bytes(arr))
103    }
104}
105
106/// A 128-bit positional sequence hash combining traditional sequence hash with positional information.
107///
108/// Layout:
109/// - Lower 64 bits: Traditional SequenceHash
110/// - Upper 64 bits: 2-bit mode + position + LocalBlockHash (BlockHash)
111///
112/// Modes (automatically selected based on position):
113/// - Mode 00: 8-bit position (max 255) + 54-bit LBH
114/// - Mode 01: 16-bit position (max 65,535) + 46-bit LBH
115/// - Mode 10: 24-bit position (max 16,777,215) + 38-bit LBH
116/// - Mode 11: 31-bit position (max 2,147,483,647) + 31-bit LBH
117#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
118#[serde(transparent)]
119pub struct PositionalSequenceHash(#[serde(with = "serde_bytes_u128")] u128);
120
121impl PositionalSequenceHash {
122    /// Creates a new PositionalSequenceHash from components.
123    ///
124    /// The mode is automatically selected based on the position value to use the minimal
125    /// representation that can fit the position.
126    pub fn new(sequence_hash: SequenceHash, position: u64, local_block_hash: BlockHash) -> Self {
127        let mode = Self::select_mode(position);
128        let upper = Self::encode_upper(mode, position, local_block_hash);
129        let value = ((upper as u128) << 64) | (sequence_hash as u128);
130        PositionalSequenceHash(value)
131    }
132
133    /// Returns the sequence hash component (lower 64 bits).
134    pub fn sequence_hash(&self) -> SequenceHash {
135        (self.0 & 0xFFFF_FFFF_FFFF_FFFF) as u64
136    }
137
138    /// Returns the block position.
139    pub fn position(&self) -> u64 {
140        let (_, position, _) = self.decode_upper();
141        position
142    }
143
144    /// Returns the local block hash (BlockHash) component.
145    pub fn local_block_hash(&self) -> BlockHash {
146        let (_, _, lbh) = self.decode_upper();
147        lbh
148    }
149
150    /// Returns the mode used for encoding (0, 1, 2, or 3).
151    pub fn mode(&self) -> u8 {
152        let (mode, _, _) = self.decode_upper();
153        mode
154    }
155
156    /// Returns the inner 128-bit value.
157    #[inline(always)]
158    pub fn as_u128(&self) -> u128 {
159        self.0
160    }
161
162    /// Selects the minimal mode that can represent the given position.
163    fn select_mode(position: u64) -> u8 {
164        if position < (1u64 << 8) {
165            0 // Mode 00: 8-bit position
166        } else if position < (1u64 << 16) {
167            1 // Mode 01: 16-bit position
168        } else if position < (1u64 << 24) {
169            2 // Mode 10: 24-bit position
170        } else if position < (1u64 << 31) {
171            3 // Mode 11: 31-bit position
172        } else {
173            panic!(
174                "Position {} exceeds maximum supported value (2^31 - 1)",
175                position
176            );
177        }
178    }
179
180    /// Encodes the upper 64 bits from mode, position, and local block hash.
181    fn encode_upper(mode: u8, position: u64, local_block_hash: u64) -> u64 {
182        let (position_bits, lbh_bits) = match mode {
183            0 => (8, 54),  // 2 + 8 + 54 = 64
184            1 => (16, 46), // 2 + 16 + 46 = 64
185            2 => (24, 38), // 2 + 24 + 38 = 64
186            3 => (31, 31), // 2 + 31 + 31 = 64
187            _ => unreachable!(
188                "Invalid mode {} when encoding PositionalSequenceHash; mode must be 0, 1, 2, or 3",
189                mode
190            ),
191        };
192
193        // Create masks for extracting the relevant bits
194        let position_mask = (1u64 << position_bits) - 1;
195        let lbh_mask = (1u64 << lbh_bits) - 1;
196
197        // Extract and position components
198        let position_part = position & position_mask;
199        let lbh_part = local_block_hash & lbh_mask;
200
201        // Combine: [mode (2 bits)][position (X bits)][lbh (R bits)]
202        ((mode as u64) << 62) | (position_part << lbh_bits) | lbh_part
203    }
204
205    /// Decodes the upper 64 bits into (mode, position, local_block_hash).
206    fn decode_upper(&self) -> (u8, u64, u64) {
207        let upper = (self.0 >> 64) as u64;
208
209        // Extract mode from top 2 bits
210        let mode = (upper >> 62) as u8;
211
212        let (position_bits, lbh_bits) = match mode {
213            0 => (8, 54),
214            1 => (16, 46),
215            2 => (24, 38),
216            3 => (31, 31),
217            _ => unreachable!(
218                "Invalid mode {} in PositionalSequenceHash - value may be corrupted",
219                mode
220            ),
221        };
222
223        // Create masks
224        let lbh_mask = (1u64 << lbh_bits) - 1;
225        let position_mask = (1u64 << position_bits) - 1;
226
227        // Extract components
228        let lbh = upper & lbh_mask;
229        let position = (upper >> lbh_bits) & position_mask;
230
231        (mode, position, lbh)
232    }
233}
234
235impl std::fmt::Debug for PositionalSequenceHash {
236    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237        f.debug_struct("PositionalSequenceHash")
238            .field("sequence_hash", &self.sequence_hash())
239            .field("local_block_hash", &self.local_block_hash())
240            .field("position", &self.position())
241            .finish()
242    }
243}
244
245/// A 128-bit positional lineage hash encoding parental lineage for tree traversal.
246///
247/// Layout (using full 128 bits):
248/// - Mode (2 bits): Determines position field size
249/// - Position (8/16/24 bits): Block position in sequence
250/// - Parent Fragment (variable bits): Fragment of parent's sequence hash
251/// - Current Fragment (variable bits): Fragment of current sequence hash
252///
253/// Modes (automatically selected based on position):
254/// - Mode 00: 8-bit position (max 255) + 59-bit parent + 59-bit current
255/// - Mode 01: 16-bit position (max 65,535) + 55-bit parent + 55-bit current
256/// - Mode 10: 24-bit position (max 16,777,215) + 51-bit parent + 51-bit current
257///
258/// This encoding enables backward traversal through the radix tree by matching
259/// parent fragments at position-1.
260#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
261#[serde(transparent)]
262pub struct PositionalLineageHash(#[serde(with = "serde_bytes_u128")] u128);
263
264impl PositionalLineageHash {
265    /// Creates a new PositionalLineageHash from components.
266    ///
267    /// The mode is automatically selected based on the position value to use the minimal
268    /// representation that can fit the position.
269    ///
270    /// # Arguments
271    ///
272    /// * `current_seq_hash` - The sequence hash of the current block
273    /// * `parent_seq_hash` - The sequence hash of the parent block (None for root)
274    /// * `position` - The block position in the sequence
275    ///
276    /// # Panics
277    ///
278    /// Panics if position >= 2^24 (16,777,216).
279    pub fn new(
280        current_seq_hash: SequenceHash,
281        parent_seq_hash: Option<SequenceHash>,
282        position: u64,
283    ) -> Self {
284        if position >= (1u64 << 24) {
285            panic!(
286                "Position {} exceeds maximum supported value (2^24 - 1 = 16,777,215)",
287                position
288            );
289        }
290
291        let mode = Self::select_mode(position);
292        let (position_bits, parent_bits, current_bits) = Self::bit_layout(mode);
293
294        // CRITICAL: For cross-mode boundary matching, we need to align the current hash
295        // to the bits available at the next position. This ensures that when position+1
296        // stores our current hash as its parent, the fragments will match.
297        let next_mode = Self::select_mode(position + 1);
298        let (_, next_parent_bits, _) = Self::bit_layout(next_mode);
299
300        // Use the minimum of current_bits and next_parent_bits to ensure alignment
301        let aligned_current_bits = current_bits.min(next_parent_bits);
302
303        // Create masks
304        let position_mask = (1u128 << position_bits) - 1;
305        let parent_mask = (1u128 << parent_bits) - 1;
306        let current_mask = (1u128 << aligned_current_bits) - 1;
307
308        // Extract fragments (LSB-aligned for subset compatibility across modes)
309        let position_part = (position as u128) & position_mask;
310        let parent_part = (parent_seq_hash.unwrap_or(0) as u128) & parent_mask;
311        let current_part = (current_seq_hash as u128) & current_mask;
312
313        // Pack: [mode (2)][position (P)][parent (M)][current (N)]
314        // Note: We still allocate current_bits in the layout, but only use aligned_current_bits
315        let value = ((mode as u128) << 126)
316            | (position_part << (parent_bits + current_bits))
317            | (parent_part << current_bits)
318            | current_part;
319
320        PositionalLineageHash(value)
321    }
322
323    /// Returns the block position.
324    pub fn position(&self) -> u64 {
325        let mode = self.mode();
326        let (position_bits, parent_bits, current_bits) = Self::bit_layout(mode);
327        let position_mask = (1u128 << position_bits) - 1;
328        ((self.0 >> (parent_bits + current_bits)) & position_mask) as u64
329    }
330
331    /// Returns the current sequence hash fragment.
332    pub fn current_hash_fragment(&self) -> u64 {
333        let mode = self.mode();
334        let (_, _, current_bits) = Self::bit_layout(mode);
335        let current_mask = (1u128 << current_bits) - 1;
336        (self.0 & current_mask) as u64
337    }
338
339    /// Returns the parent sequence hash fragment.
340    pub fn parent_hash_fragment(&self) -> u64 {
341        let mode = self.mode();
342        let (_, parent_bits, current_bits) = Self::bit_layout(mode);
343        let parent_mask = (1u128 << parent_bits) - 1;
344        ((self.0 >> current_bits) & parent_mask) as u64
345    }
346
347    /// Returns the mode used for encoding (0, 1, or 2).
348    pub fn mode(&self) -> u8 {
349        (self.0 >> 126) as u8
350    }
351
352    /// Returns the inner 128-bit value.
353    #[inline(always)]
354    pub fn as_u128(&self) -> u128 {
355        self.0
356    }
357
358    /// Selects the minimal mode that can represent the given position.
359    fn select_mode(position: u64) -> u8 {
360        if position < (1u64 << 8) {
361            0 // Mode 00: 8-bit position
362        } else if position < (1u64 << 16) {
363            1 // Mode 01: 16-bit position
364        } else {
365            2 // Mode 10: 24-bit position
366        }
367    }
368
369    /// Returns the bit layout for a given mode: (position_bits, parent_bits, current_bits).
370    fn bit_layout(mode: u8) -> (u32, u32, u32) {
371        match mode {
372            0 => (8, 59, 59),  // 2 + 8 + 59 + 59 = 128
373            1 => (16, 55, 55), // 2 + 16 + 55 + 55 = 128
374            2 => (24, 51, 51), // 2 + 24 + 51 + 51 = 128
375            _ => unreachable!(
376                "Invalid mode {} in PositionalLineageHash; mode must be 0, 1, or 2",
377                mode
378            ),
379        }
380    }
381}
382
383impl PositionalLineageHash {
384    fn format_impl(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385        let position = self.position();
386        let current_hash = self.current_hash_fragment();
387        let current_hash_b58 = bs58::encode(current_hash.to_be_bytes()).into_string();
388
389        if position == 0 {
390            write!(f, "{}:{}", position, current_hash_b58)
391        } else {
392            let parent_hash = self.parent_hash_fragment();
393            let parent_hash_b58 = bs58::encode(parent_hash.to_be_bytes()).into_string();
394            write!(f, "{}:{}:{}", position, current_hash_b58, parent_hash_b58)
395        }
396    }
397}
398
399impl std::fmt::Debug for PositionalLineageHash {
400    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401        self.format_impl(f)
402    }
403}
404
405impl std::fmt::Display for PositionalLineageHash {
406    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407        self.format_impl(f)
408    }
409}
410
411impl std::cmp::PartialOrd for PositionalLineageHash {
412    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
413        Some(self.cmp(other))
414    }
415}
416
417impl std::cmp::Ord for PositionalLineageHash {
418    /// Lexicographic order: [`Self::position`], then [`Self::current_hash_fragment`],
419    /// then the full packed [`Self::as_u128`] so the order is total and consistent with [`Eq`].
420    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
421        self.position()
422            .cmp(&other.position())
423            .then_with(|| {
424                self.current_hash_fragment()
425                    .cmp(&other.current_hash_fragment())
426            })
427            .then_with(|| self.0.cmp(&other.0))
428    }
429}
430
431/// A collection of tokens, represented as a `Vec<Token>`.
432///
433/// Provides convenience methods for conversion and manipulation.
434#[derive(Debug, Clone, Dissolve, Default, Eq)]
435pub struct Tokens(Vec<Token>);
436
437impl AsRef<[Token]> for Tokens {
438    fn as_ref(&self) -> &[Token] {
439        &self.0
440    }
441}
442
443impl std::ops::Deref for Tokens {
444    type Target = [Token];
445
446    fn deref(&self) -> &Self::Target {
447        &self.0
448    }
449}
450
451impl std::borrow::Borrow<[Token]> for Tokens {
452    fn borrow(&self) -> &[Token] {
453        &self.0
454    }
455}
456
457impl From<Vec<Token>> for Tokens {
458    fn from(tokens: Vec<Token>) -> Self {
459        Tokens(tokens)
460    }
461}
462
463impl From<&[Token]> for Tokens {
464    fn from(tokens: &[Token]) -> Self {
465        Tokens(tokens.to_vec())
466    }
467}
468
469impl From<Vec<usize>> for Tokens {
470    fn from(tokens: Vec<usize>) -> Self {
471        Tokens(
472            tokens
473                .into_iter()
474                .map(|t| t.try_into().expect("Token ID exceeds u32::MAX"))
475                .collect(),
476        )
477    }
478}
479
480impl From<Vec<i32>> for Tokens {
481    /// Converts `Vec<i32>` to `Tokens`, casting each `i32` to `u32`.
482    fn from(tokens: Vec<i32>) -> Self {
483        Tokens(tokens.into_iter().map(|t| t as u32).collect())
484    }
485}
486
487impl From<&[i32]> for Tokens {
488    /// Converts `&[i32]` to `Tokens`, casting each `i32` to `u32`.
489    fn from(tokens: &[i32]) -> Self {
490        Tokens(tokens.iter().map(|&t| t as u32).collect())
491    }
492}
493
494impl From<Tokens> for Vec<Token> {
495    fn from(tokens: Tokens) -> Self {
496        tokens.0
497    }
498}
499
500// PartialEq implementations for comparing Tokens with Vec<Token> and &[Token]
501// (Generated implementations are usually sufficient, but explicit ones can be clearer)
502impl PartialEq<Vec<Token>> for Tokens {
503    fn eq(&self, other: &Vec<Token>) -> bool {
504        self.0 == *other
505    }
506}
507
508impl PartialEq<Tokens> for Vec<Token> {
509    fn eq(&self, other: &Tokens) -> bool {
510        *self == other.0
511    }
512}
513
514impl PartialEq<[Token]> for Tokens {
515    fn eq(&self, other: &[Token]) -> bool {
516        self.0.as_slice() == other
517    }
518}
519
520impl PartialEq<Tokens> for &[Token] {
521    fn eq(&self, other: &Tokens) -> bool {
522        *self == other.0.as_slice()
523    }
524}
525
526impl PartialEq for Tokens {
527    fn eq(&self, other: &Self) -> bool {
528        self.0 == other.0
529    }
530}
531
532// Add PartialEq<&[T]> where T: Into<Token> + Copy could be more general,
533// but specifically implementing for &[Token] is sufficient for the tests.
534impl PartialEq<&[Token]> for Tokens {
535    fn eq(&self, other: &&[Token]) -> bool {
536        self.0.as_slice() == *other
537    }
538}
539
540impl Tokens {
541    fn with_capacity(capacity: usize) -> Self {
542        Tokens(Vec::with_capacity(capacity))
543    }
544
545    /// Consumes the [`Tokens`] object and creates a [`TokenBlockSequence`].
546    ///
547    /// The sequence is initialized with the provided tokens, splitting them into blocks
548    /// of the specified `block_size` using the given `salt_hash` (or 0 if `None`).
549    ///
550    /// # Arguments
551    ///
552    /// * `block_size` - The fixed size for each [`TokenBlock`].
553    /// * `salt_hash` - An optional [`SaltHash`] used as the base seed for hashing. Defaults to 0.
554    pub fn into_sequence(self, block_size: u32, salt_hash: Option<SaltHash>) -> TokenBlockSequence {
555        TokenBlockSequence::new(self, block_size, salt_hash)
556    }
557}
558
559/// Errors that can occur during [`PartialTokenBlock`] operations.
560#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
561pub enum TokenBlockError {
562    /// The operation could not be completed because the block is full.
563    #[error("TokenBlock is full")]
564    Full,
565
566    /// The operation requires a full block, but the block is incomplete.
567    #[error("TokenBlock is incomplete")]
568    Incomplete,
569
570    /// The operation could not be completed because the block is empty.
571    #[error("TokenBlock is empty")]
572    Empty,
573
574    /// The operation requires more tokens than are currently in the block.
575    #[error("TokenBlock has insufficient tokens")]
576    InsufficientTokens,
577}
578
579/// Represents a partially filled block of tokens within a sequence.
580///
581/// This structure accumulates tokens until it reaches the specified `block_size`,
582/// at which point it can be [`commit`](PartialTokenBlock::commit)ted into a full [`TokenBlock`].
583#[derive(Debug, PartialEq)] // No Clone: intended to be unique within a sequence
584pub struct PartialTokenBlock {
585    tokens: Tokens,
586    block_size: u32,
587    salt_hash: SaltHash,
588    parent_sequence_hash: Option<SequenceHash>,
589    position: usize, // The position this block will have when committed
590}
591
592impl PartialTokenBlock {
593    /// Creates the first partial block (root) for a new sequence.
594    ///
595    /// # Arguments
596    ///
597    /// * `block_size` - The fixed size for blocks in this sequence.
598    /// * `salt_hash` - The [`SaltHash`] for the sequence.
599    pub(crate) fn create_sequence_root(block_size: u32, salt_hash: SaltHash) -> Self {
600        Self {
601            tokens: Tokens::with_capacity(block_size as usize),
602            block_size,
603            salt_hash,
604            parent_sequence_hash: None, // Root has no parent
605            position: 0,                // First block is at position 0
606        }
607    }
608
609    /// Attempts to push multiple tokens onto the block from a [`Tokens`] object.
610    ///
611    /// Tokens are added until the block is full or all input tokens are consumed.
612    ///
613    /// # Arguments
614    ///
615    /// * `tokens` - The [`Tokens`] to push.
616    ///
617    /// # Returns
618    ///
619    /// A new [`Tokens`] object containing any tokens that did not fit,
620    /// if all tokens were added, the returned object will be empty.
621    pub(crate) fn push_tokens(&mut self, tokens: Tokens) -> Tokens {
622        let remaining_space = self.remaining();
623
624        if remaining_space == 0 {
625            return tokens; // Block is already full
626        }
627
628        if tokens.0.len() <= remaining_space {
629            // All tokens fit
630            self.tokens.0.extend(tokens.0);
631            Tokens::default() // No remaining tokens
632        } else {
633            // Only some tokens fit
634            let (to_add, remaining) = tokens.0.split_at(remaining_space);
635            self.tokens.0.extend_from_slice(to_add);
636            Tokens(remaining.to_vec()) // Return the leftover tokens
637        }
638    }
639
640    /// Attempts to push a single token onto the block.
641    ///
642    /// # Returns
643    ///
644    /// * `Ok(())` - If the token was successfully added.
645    /// * `Err(TokenBlockError::Full)` - If the block already contains `block_size` tokens.
646    pub(crate) fn push_token(&mut self, token: Token) -> Result<(), TokenBlockError> {
647        if self.tokens.0.len() >= self.block_size as usize {
648            return Err(TokenBlockError::Full);
649        }
650        self.tokens.0.push(token);
651        Ok(())
652    }
653
654    /// Attempts to remove the last `count` tokens from the block.
655    ///
656    /// # Arguments
657    ///
658    /// * `count` - The number of tokens to remove.
659    ///
660    /// # Returns
661    ///
662    /// * `Ok(())` - If the specified number of tokens were successfully removed.
663    /// * `Err(TokenBlockError::InsufficientTokens)` - If `count` is greater than the number of tokens in the block.
664    pub(crate) fn pop_tokens(&mut self, count: usize) -> Result<(), TokenBlockError> {
665        if self.tokens.0.len() < count {
666            return Err(TokenBlockError::InsufficientTokens);
667        }
668        self.tokens.0.truncate(self.tokens.0.len() - count);
669        Ok(())
670    }
671
672    /// Attempts to commit the current partial block into a full [`TokenBlock`].
673    ///
674    /// This operation consumes the tokens within the partial block.
675    /// After a successful commit, this `PartialTokenBlock` instance is reset
676    /// to represent the *next* partial block in the sequence, inheriting the
677    /// sequence hash from the block just committed.
678    ///
679    /// # Returns
680    ///
681    /// * `Ok(TokenBlock)` - The newly created full [`TokenBlock`].
682    /// * `Err(TokenBlockError::Incomplete)` - If the block does not contain exactly `block_size` tokens.
683    pub fn commit(&mut self) -> Result<TokenBlock, TokenBlockError> {
684        if self.tokens.0.len() != self.block_size as usize {
685            // Check for exact size match for committing
686            return Err(TokenBlockError::Incomplete);
687        }
688
689        // Take ownership of the tokens, leaving the internal tokens empty
690        let tokens = std::mem::replace(
691            &mut self.tokens,
692            Tokens::with_capacity(self.block_size as usize),
693        );
694
695        let chunk = TokenBlockChunk::new(tokens, self.salt_hash);
696        let block = TokenBlock::from_chunk(chunk, self.parent_sequence_hash, self.position);
697
698        // Reset self to be the next block in the sequence
699        self.parent_sequence_hash = Some(block.sequence_hash());
700        self.position += 1; // Increment position for the next block
701        // self.block_size and self.salt_hash remain the same
702
703        Ok(block)
704    }
705
706    /// Returns the number of additional tokens required to fill the block.
707    pub fn remaining(&self) -> usize {
708        // Use saturating_sub to prevent underflow if len somehow exceeds block_size
709        (self.block_size as usize).saturating_sub(self.tokens.0.len())
710    }
711
712    /// Returns the number of tokens currently in the block.
713    pub fn len(&self) -> usize {
714        self.tokens.0.len()
715    }
716
717    /// Returns `true` if the block contains no tokens.
718    pub fn is_empty(&self) -> bool {
719        self.tokens.0.is_empty()
720    }
721
722    /// Returns a reference to the tokens currently in the block.
723    pub fn tokens(&self) -> &Tokens {
724        &self.tokens
725    }
726}
727
728// Deref allows treating &PartialTokenBlock like &Tokens for read-only access.
729impl std::ops::Deref for PartialTokenBlock {
730    type Target = Tokens;
731
732    fn deref(&self) -> &Self::Target {
733        &self.tokens
734    }
735}
736
737/// An intermediate structure holding a chunk of tokens destined to become a [`TokenBlock`].
738///
739/// This calculates the [`BlockHash`] but does not compute the final [`SequenceHash`],
740/// allowing chunks to be processed independently (e.g., in parallel).
741#[derive(Debug)] // No Clone: temporary intermediate value
742struct TokenBlockChunk {
743    tokens: Tokens,
744    salt_hash: SaltHash,
745    block_hash: BlockHash,
746}
747
748impl TokenBlockChunk {
749    /// Creates a new chunk from [`Tokens`], calculating the [`BlockHash`].
750    fn new(tokens: Tokens, salt_hash: SaltHash) -> Self {
751        let block_hash = compute_hash_v2(cast_slice(&tokens), salt_hash);
752        Self {
753            tokens,
754            salt_hash,
755            block_hash,
756        }
757    }
758
759    /// Creates a new chunk from a slice of `&[Token]`, calculating the [`BlockHash`].
760    fn from_tokens(tokens: &[Token], salt_hash: SaltHash) -> Self {
761        let block_hash = compute_hash_v2(cast_slice(tokens), salt_hash);
762        Self {
763            tokens: tokens.into(), // Converts slice to owned Tokens
764            salt_hash,
765            block_hash,
766        }
767    }
768}
769
770/// Represents a completed, immutable block of tokens with associated hashes.
771///
772/// Contains exactly `block_size` tokens and includes the [`SaltHash`], [`BlockHash`],
773/// [`SequenceHash`], [`PositionalSequenceHash`], [`PositionalLineageHash`], and optionally the parent's [`SequenceHash`].
774#[derive(Debug, Clone, Default, PartialEq)] // Add PartialEq for tests
775pub struct TokenBlock {
776    tokens: Tokens,
777    salt_hash: SaltHash,
778    block_hash: BlockHash,
779    sequence_hash: SequenceHash,
780    parent_sequence_hash: Option<SequenceHash>,
781    positional_sequence_hash: PositionalSequenceHash,
782    positional_lineage_hash: PositionalLineageHash,
783}
784
785impl TokenBlock {
786    /// Creates a new [`PartialTokenBlock`] representing the block immediately following this one.
787    ///
788    /// The new partial block will have the correct `parent_sequence_hash` and `position` set.
789    pub fn next_block(&self) -> PartialTokenBlock {
790        PartialTokenBlock {
791            tokens: Tokens::with_capacity(self.tokens.len()),
792            block_size: self.tokens.len() as u32, // Should be == self.block_size
793            salt_hash: self.salt_hash,
794            parent_sequence_hash: Some(self.sequence_hash), // Link to this block
795            position: self.position() as usize + 1,         // Next position
796        }
797    }
798
799    /// Finalizes a [`TokenBlock`] from a [`TokenBlockChunk`], parent's sequence hash, and position.
800    ///
801    /// This computes the final [`SequenceHash`], [`PositionalSequenceHash`], and [`PositionalLineageHash`] for the block.
802    fn from_chunk(
803        chunk: TokenBlockChunk,
804        parent_sequence_hash: Option<SequenceHash>,
805        position: usize,
806    ) -> Self {
807        let sequence_hash = match parent_sequence_hash {
808            Some(parent) => {
809                // Combine parent sequence hash and current block hash
810                compute_hash_v2(cast_slice(&[parent, chunk.block_hash]), chunk.salt_hash)
811            }
812            None => {
813                // First block: sequence hash is just the block hash
814                chunk.block_hash
815            }
816        };
817
818        let positional_sequence_hash = PositionalSequenceHash::new(
819            sequence_hash,
820            position as u64,
821            chunk.block_hash, // LocalBlockHash is the same as BlockHash
822        );
823
824        let positional_lineage_hash =
825            PositionalLineageHash::new(sequence_hash, parent_sequence_hash, position as u64);
826
827        Self {
828            tokens: chunk.tokens,
829            salt_hash: chunk.salt_hash,
830            block_hash: chunk.block_hash,
831            sequence_hash,
832            parent_sequence_hash,
833            positional_sequence_hash,
834            positional_lineage_hash,
835        }
836    }
837
838    /// Returns a reference to the tokens in this block.
839    pub fn tokens(&self) -> &Tokens {
840        &self.tokens
841    }
842
843    /// Returns the salt hash used for this block's hashing.
844    pub fn salt_hash(&self) -> SaltHash {
845        self.salt_hash
846    }
847
848    /// Returns the hash of only the tokens within this block.
849    pub fn block_hash(&self) -> BlockHash {
850        self.block_hash
851    }
852
853    /// Returns the sequence-aware hash for this block.
854    pub fn sequence_hash(&self) -> SequenceHash {
855        self.sequence_hash
856    }
857
858    /// Returns the sequence hash of the preceding block, if any.
859    pub fn parent_sequence_hash(&self) -> Option<SequenceHash> {
860        self.parent_sequence_hash
861    }
862
863    /// Returns the number of tokens in the block.
864    pub fn block_size(&self) -> usize {
865        self.tokens.0.len()
866    }
867
868    /// Returns the positional sequence hash for this block.
869    pub fn positional_sequence_hash(&self) -> PositionalSequenceHash {
870        self.positional_sequence_hash
871    }
872
873    /// Returns the positional lineage hash for this block.
874    pub fn positional_lineage_hash(&self) -> PositionalLineageHash {
875        self.positional_lineage_hash
876    }
877
878    /// Returns the position of this block in the sequence.
879    pub fn position(&self) -> u64 {
880        self.positional_sequence_hash.position()
881    }
882}
883
884impl PositionalHash for PositionalSequenceHash {
885    fn position(&self) -> u64 {
886        self.position()
887    }
888}
889
890impl PositionalHash for PositionalLineageHash {
891    fn position(&self) -> u64 {
892        self.position()
893    }
894}
895
896/// Represents a sequence of tokens, segmented into fixed-size, hashed blocks.
897///
898/// This structure manages a series of completed [`TokenBlock`]s and one
899/// [`PartialTokenBlock`] for accumulating incoming tokens.
900/// It provides methods for appending tokens (`append`, `extend`), removing tokens
901/// (`pop`, `truncate`, `unwind`), and accessing sequence information.
902///
903/// Hashing incorporates an initial [`SaltHash`] to ensure uniqueness across different
904/// contexts (e.g., different models, PEFTs).
905///
906/// Key Hashes:
907/// - [`BlockHash`]: Hash of tokens within a single block (seeded by [`SaltHash`]).
908/// - [`SequenceHash`]: Hash combining the previous block's [`SequenceHash`] and the current
909///   block's [`BlockHash`] (also seeded by [`SaltHash`]).
910#[derive(Debug, PartialEq)]
911pub struct TokenBlockSequence {
912    blocks: Vec<TokenBlock>,
913    current_block: PartialTokenBlock,
914    salt_hash: SaltHash,
915    block_size: usize,
916}
917
918impl TokenBlockSequence {
919    /// Creates a new [`TokenBlockSequence`] from an initial set of tokens.
920    ///
921    /// The tokens are split into blocks of `block_size`. Any remaining tokens
922    /// form the initial `current_block`.
923    ///
924    /// # Arguments
925    ///
926    /// * `tokens` - The initial [`Tokens`] for the sequence.
927    /// * `block_size` - The fixed size for each [`TokenBlock`]. Must be greater than 0.
928    /// * `salt_hash` - An optional [`SaltHash`]. Defaults to 0 if `None`.
929    ///
930    /// # Panics
931    ///
932    /// Panics if `block_size` is 0.
933    pub fn new(tokens: Tokens, block_size: u32, salt_hash: Option<SaltHash>) -> Self {
934        assert!(block_size > 0, "block_size must be greater than 0");
935        let salt_hash = salt_hash.unwrap_or(0);
936        let (blocks, current_block) = Self::split_tokens(&tokens, block_size, salt_hash);
937
938        Self {
939            blocks,
940            current_block,
941            salt_hash,
942            block_size: block_size as usize,
943        }
944    }
945
946    /// Extends the sequence with the given tokens, potentially completing multiple blocks.
947    ///
948    /// This method processes all tokens from the input [`Tokens`] object.
949    /// If adding tokens causes one or more blocks to become full, they are committed
950    /// and added to the internal list of completed blocks.
951    ///
952    /// # Arguments
953    ///
954    /// * `tokens` - The [`Tokens`] object containing the tokens to extend the sequence with.
955    ///
956    /// # Returns
957    ///
958    /// * `Ok(Some(Range<usize>))` - The range of indices in the `blocks` vector corresponding
959    ///   to the blocks completed during this `extend` operation.
960    /// * `Ok(None)` - If no blocks were completed.
961    /// * `Err(TokenBlockError)` - If an internal error occurs during commit.
962    pub fn extend(&mut self, tokens: Tokens) -> Result<Option<Range<usize>>, TokenBlockError> {
963        let start_block_index = self.blocks.len();
964        let mut tokens_to_append = tokens;
965
966        while !tokens_to_append.is_empty() {
967            let remaining_in_current = self.current_block.remaining();
968
969            if remaining_in_current == 0 {
970                // Current block is full, commit it first
971                let new_block = self.current_block.commit()?;
972                self.blocks.push(new_block);
973                // Continue loop to add tokens to the *new* current_block
974            }
975
976            // Push as many tokens as possible into the current (potentially new) block
977            let available_tokens = tokens_to_append;
978            tokens_to_append = self.current_block.push_tokens(available_tokens);
979
980            // Check if the current block *became* full after pushing tokens
981            if self.current_block.remaining() == 0 {
982                // If it became full AND there are still more tokens to append,
983                // commit it now so the next loop iteration starts with a fresh block.
984                let new_block = self.current_block.commit()?;
985                self.blocks.push(new_block);
986            }
987        }
988
989        let end_block_index = self.blocks.len();
990        if start_block_index == end_block_index {
991            Ok(None) // No blocks were completed
992        } else {
993            Ok(Some(start_block_index..end_block_index))
994        }
995    }
996
997    /// Appends a single token to the sequence.
998    ///
999    /// If adding this token completes the current partial block, the block is committed,
1000    /// and the index of the newly completed block is returned.
1001    ///
1002    /// This method is equivalent to calling [`extend`] with a single-token [`Tokens`] object.
1003    ///
1004    /// # Arguments
1005    ///
1006    /// * `token` - The [`Token`] to append.
1007    ///
1008    /// # Returns
1009    ///
1010    /// * `Ok(Some(usize))` - The index of the block that was just completed.
1011    /// * `Ok(None)` - No block was completed by adding this token.
1012    /// * `Err(TokenBlockError)` - If an internal error occurs during processing.
1013    pub fn append(&mut self, token: Token) -> Result<Option<usize>, TokenBlockError> {
1014        if self.current_block.remaining() == 0 {
1015            let new_block = self.current_block.commit()?;
1016            self.blocks.push(new_block);
1017        }
1018
1019        self.current_block.push_token(token)?;
1020        if self.current_block.remaining() != 0 {
1021            return Ok(None);
1022        }
1023
1024        let completed_idx = self.blocks.len();
1025        let new_block = self.current_block.commit()?;
1026        self.blocks.push(new_block);
1027        Ok(Some(completed_idx))
1028    }
1029
1030    /// Shortens the sequence, keeping the first `len` tokens and removing the rest.
1031    ///
1032    /// If `len` is greater than the sequence's current length, this has no effect.
1033    ///
1034    /// This operation is analogous to `Vec::truncate`.
1035    /// It may involve removing tokens from the current partial block, removing entire
1036    /// completed blocks, and adjusting the current partial block
1037    /// to reflect the new end of the sequence.
1038    ///
1039    /// # Arguments
1040    ///
1041    /// * `len` - The number of tokens to keep.
1042    ///
1043    /// # Returns
1044    ///
1045    /// * `Ok(())` - If the sequence was successfully truncated.
1046    /// * `Err(TokenBlockError::InsufficientTokens)` - This error should ideally not occur if `len`
1047    ///   is correctly checked against `total_tokens`, but the underlying `pop_tokens` might return it.
1048    pub fn truncate(&mut self, len: usize) -> Result<(), TokenBlockError> {
1049        let current_total_len = self.total_tokens();
1050        if len >= current_total_len {
1051            return Ok(()); // Nothing to truncate
1052        }
1053
1054        let n = current_total_len - len; // Number of tokens to remove
1055
1056        // This inner block handles the actual removal logic based on `n` tokens to remove.
1057        {
1058            let current_len = self.current_block.len();
1059            // Avoid division by zero if block_size is somehow 0 (though asserted in new)
1060            let block_size = self.current_block.block_size.max(1);
1061
1062            if n <= current_len {
1063                // Only need to pop from the current partial block
1064                self.current_block.pop_tokens(n)?;
1065            } else {
1066                // Need to pop from full blocks as well
1067                let tokens_to_pop_from_blocks = n - current_len;
1068
1069                // Calculate how many blocks are affected (including the one partially popped)
1070                let num_blocks_to_affect = tokens_to_pop_from_blocks.div_ceil(block_size as usize);
1071
1072                // Check if we need to pop more blocks than available (should be prevented by initial len check)
1073                if num_blocks_to_affect > self.blocks.len() {
1074                    // This indicates an inconsistency between total_tokens() and internal state.
1075                    debug_assert!(
1076                        false,
1077                        "Truncate calculation error: trying to pop too many blocks."
1078                    );
1079                    return Err(TokenBlockError::InsufficientTokens);
1080                }
1081
1082                // Determine the index of the block that will be the source for the new partial block
1083                let source_block_index = self.blocks.len() - num_blocks_to_affect;
1084
1085                // Calculate how many tokens to keep from that source block
1086                let num_full_blocks_completely_popped = num_blocks_to_affect - 1;
1087                let num_tokens_to_pop_from_source_block = tokens_to_pop_from_blocks
1088                    - num_full_blocks_completely_popped * block_size as usize;
1089                let num_tokens_to_keep_in_new_partial =
1090                    (block_size as usize).saturating_sub(num_tokens_to_pop_from_source_block);
1091
1092                // Get the tokens for the new partial block
1093                let new_partial_tokens = if num_tokens_to_keep_in_new_partial > 0 {
1094                    self.blocks[source_block_index].tokens().as_ref()
1095                        [..num_tokens_to_keep_in_new_partial]
1096                        .to_vec()
1097                } else {
1098                    Vec::new()
1099                };
1100
1101                // Truncate the blocks vector to remove popped blocks
1102                self.blocks.truncate(source_block_index);
1103
1104                // Update the current_block state
1105                self.current_block.tokens = Tokens(new_partial_tokens);
1106                // Correctly set the parent hash based on the *new* last block
1107                self.current_block.parent_sequence_hash =
1108                    self.blocks.last().map(|b| b.sequence_hash());
1109                // Update position to match the number of complete blocks
1110                self.current_block.position = self.blocks.len();
1111                // salt_hash and block_size remain the same for current_block
1112            }
1113        }
1114        Ok(())
1115    }
1116
1117    /// Removes the last `count` tokens from the sequence.
1118    ///
1119    /// This is a convenience method that calculates the required length and calls [`truncate`].
1120    ///
1121    /// # Arguments
1122    ///
1123    /// * `count` - The number of tokens to remove from the end.
1124    ///
1125    /// # Returns
1126    ///
1127    /// * `Ok(())` - If the tokens were successfully removed.
1128    /// * `Err(TokenBlockError::InsufficientTokens)` - If `count` is greater than or equal to
1129    ///   the total number of tokens in the sequence.
1130    pub fn unwind(&mut self, count: usize) -> Result<(), TokenBlockError> {
1131        let current_total_len = self.total_tokens();
1132        if count > current_total_len {
1133            // Allow count == current_total_len, which truncates to 0.
1134            return Err(TokenBlockError::InsufficientTokens);
1135        }
1136
1137        // number of tokens remaining in the sequence after undoing the given count
1138        let len = current_total_len - count;
1139        self.truncate(len)
1140    }
1141
1142    /// Resets the sequence to the initial state.
1143    pub fn reset(&mut self) {
1144        self.blocks.clear();
1145        self.current_block =
1146            PartialTokenBlock::create_sequence_root(self.block_size as u32, self.salt_hash);
1147    }
1148
1149    /// Removes the last token from the sequence and returns it, or [`None`] if it is empty.
1150    ///
1151    /// This operation is analogous to `Vec::pop`.
1152    ///
1153    /// # Returns
1154    ///
1155    /// * `Some(Token)` - The last token, if the sequence was not empty.
1156    /// * `None` - If the sequence was empty.
1157    pub fn pop(&mut self) -> Option<Token> {
1158        let current_total_len = self.total_tokens();
1159        if current_total_len == 0 {
1160            return None;
1161        }
1162
1163        // Determine the last token. It must be in the current_block if current_block is not empty.
1164        // If current_block is empty, it must be the last token of the last full block.
1165        let last_token = if !self.current_block.tokens.is_empty() {
1166            // Last token is in the partial block
1167            *self
1168                .current_block
1169                .tokens
1170                .last()
1171                .expect("Current block checked for non-empty")
1172        } else {
1173            // Current block is empty, sequence is not. Must be in the last full block.
1174            let last_block = self
1175                .blocks
1176                .last()
1177                .expect("Sequence is not empty but has no blocks and empty current block?");
1178            *last_block
1179                .tokens()
1180                .last()
1181                .expect("Last block cannot be empty")
1182        };
1183
1184        // Truncate the sequence by one element.
1185        // We expect this to succeed since we know the length > 0.
1186        match self.truncate(current_total_len - 1) {
1187            Ok(_) => Some(last_token),
1188            Err(_) => {
1189                // This should be logically impossible if total_tokens() and truncate() are correct.
1190                // Panic in debug, return None in release as a fallback, though it indicates a bug.
1191                debug_assert!(
1192                    false,
1193                    "truncate failed unexpectedly after checking length in pop"
1194                );
1195                None
1196            }
1197        }
1198    }
1199
1200    /// Returns a slice containing all the completed [`TokenBlock`]s in the sequence.
1201    pub fn blocks(&self) -> &[TokenBlock] {
1202        &self.blocks
1203    }
1204
1205    /// Returns a reference to the last completed [`TokenBlock`] in the sequence, if any.
1206    pub fn last_complete_block(&self) -> Option<&TokenBlock> {
1207        self.blocks.last()
1208    }
1209
1210    /// Returns a reference to the current [`PartialTokenBlock`] where new tokens are added.
1211    pub fn current_block(&self) -> &PartialTokenBlock {
1212        &self.current_block
1213    }
1214
1215    /// Consumes the sequence and returns its parts: a `Vec` of completed blocks and the final partial block.
1216    pub fn into_parts(self) -> (Vec<TokenBlock>, PartialTokenBlock) {
1217        (self.blocks, self.current_block)
1218    }
1219
1220    /// Returns the block size used for this sequence.
1221    pub fn block_size(&self) -> usize {
1222        self.block_size
1223    }
1224
1225    /// Returns the [`SaltHash`] used for this sequence.
1226    pub fn salt_hash(&self) -> SaltHash {
1227        self.salt_hash
1228    }
1229
1230    /// Returns the total number of tokens in the sequence (sum of tokens in all completed blocks
1231    /// plus tokens in the current partial block).
1232    pub fn total_tokens(&self) -> usize {
1233        let block_size = self.current_block.block_size as usize;
1234        (self.blocks.len() * block_size) + self.current_block.len()
1235    }
1236
1237    /// Extract the token with the range
1238    pub fn tokens_at(&self, range: Range<usize>) -> Tokens {
1239        let total = self.total_tokens();
1240
1241        // Validate range - return empty tokens for invalid ranges
1242        if range.start > range.end || range.end > total {
1243            return Tokens::default();
1244        }
1245
1246        // Handle empty range
1247        if range.is_empty() {
1248            return Tokens::default();
1249        }
1250
1251        let mut result = Vec::with_capacity(range.len());
1252
1253        for i in range {
1254            if i < self.blocks.len() * self.block_size {
1255                // Token is in a completed block
1256                let block_index = i / self.block_size;
1257                let token_index = i % self.block_size;
1258                result.push(self.blocks[block_index].tokens()[token_index]);
1259            } else {
1260                // Token is in the current partial block
1261                let current_block_index = i - (self.blocks.len() * self.block_size);
1262                result.push(self.current_block.tokens()[current_block_index]);
1263            }
1264        }
1265
1266        Tokens::from(result)
1267    }
1268
1269    /// Splits a [`Tokens`] object into a vector of completed blocks and a final partial block.
1270    ///
1271    /// This is primarily used internally by [`TokenBlockSequence::new`] but can be used externally.
1272    ///
1273    /// # Arguments
1274    ///
1275    /// * `tokens` - The [`Tokens`] to split.
1276    /// * `block_size` - The size of each block.
1277    /// * `salt_hash` - The [`SaltHash`] to use for hashing.
1278    ///
1279    /// # Returns
1280    ///
1281    /// A tuple containing `(Vec<TokenBlock>, PartialTokenBlock)`.
1282    ///
1283    /// # Panics
1284    ///
1285    /// Panics if `block_size` is 0.
1286    pub fn split_tokens(
1287        tokens: &[Token],
1288        block_size: u32,
1289        salt_hash: u64,
1290    ) -> (Vec<TokenBlock>, PartialTokenBlock) {
1291        assert!(block_size > 0, "block_size must be greater than 0");
1292        let chunks: Vec<TokenBlockChunk> = tokens
1293            .as_ref()
1294            .chunks_exact(block_size as usize)
1295            .map(|chunk| TokenBlockChunk::from_tokens(chunk, salt_hash))
1296            .collect();
1297
1298        let mut result_blocks = Vec::with_capacity(chunks.len());
1299        let mut last_sequence_hash: Option<SequenceHash> = None;
1300
1301        // Sequentially combine chunks to compute sequence hashes
1302        for (position, chunk) in chunks.into_iter().enumerate() {
1303            let new_block = TokenBlock::from_chunk(chunk, last_sequence_hash, position);
1304            last_sequence_hash = Some(new_block.sequence_hash());
1305            result_blocks.push(new_block);
1306        }
1307
1308        // Handle any remaining tokens
1309        let remainder = tokens
1310            .as_ref()
1311            .chunks_exact(block_size as usize)
1312            .remainder();
1313
1314        let next_position = result_blocks.len(); // Position for the next block to be committed
1315
1316        let mut partial_tokens = Tokens::with_capacity(block_size as usize);
1317        partial_tokens.0.extend_from_slice(remainder);
1318
1319        let current_block = PartialTokenBlock {
1320            tokens: partial_tokens,
1321            block_size,
1322            salt_hash,
1323            // Parent hash is the sequence hash of the last *full* block computed
1324            parent_sequence_hash: last_sequence_hash,
1325            position: next_position,
1326        };
1327
1328        (result_blocks, current_block)
1329    }
1330
1331    /// Creates a new [`TokenBlockSequence`] from a slice of tokens.
1332    ///
1333    /// The tokens are split into blocks of `block_size`. Any remaining tokens
1334    /// form the initial `current_block`.
1335    ///
1336    /// # Arguments
1337    ///
1338    /// * `tokens` - The slice of tokens to create the sequence from.
1339    /// * `block_size` - The size of each block.
1340    /// * `salt_hash` - The [`SaltHash`] to use for hashing.
1341    pub fn from_slice(tokens: &[Token], block_size: u32, salt_hash: Option<SaltHash>) -> Self {
1342        assert!(block_size > 0, "block_size must be greater than 0");
1343        let salt_hash = salt_hash.unwrap_or(0);
1344        let (blocks, current_block) = Self::split_tokens(tokens, block_size, salt_hash);
1345
1346        Self {
1347            blocks,
1348            current_block,
1349            salt_hash,
1350            block_size: block_size as usize,
1351        }
1352    }
1353}
1354
1355#[cfg(test)]
1356mod tests {
1357    use super::*;
1358    use bytemuck::cast_slice;
1359
1360    // Helper to create a sequence for testing
1361    fn create_test_sequence(
1362        initial_tokens: &[Token],
1363        block_size: u32,
1364        salt_hash: Option<SaltHash>,
1365    ) -> TokenBlockSequence {
1366        TokenBlockSequence::new(Tokens::from(initial_tokens), block_size, salt_hash)
1367    }
1368
1369    // Helper to get expected hashes (replace with actual calculated values if needed)
1370    const TEST_SALT_HASH: SaltHash = 1337;
1371    const HASH_1_4: BlockHash = 14643705804678351452; // hash([1,2,3,4], 1337)
1372    const SEQ_HASH_1_4: SequenceHash = HASH_1_4;
1373    const HASH_5_8: BlockHash = 16777012769546811212; // hash([5,6,7,8], 1337)
1374    const SEQ_HASH_5_8: SequenceHash = 4945711292740353085; // hash([SEQ_HASH_1_4, HASH_5_8], 1337)
1375    const HASH_9_12: BlockHash = 483935686894639516; // hash([9,10,11,12], 1337)
1376    const SEQ_HASH_9_12: SequenceHash = 12583592247330656132; // hash([SEQ_HASH_5_8, HASH_9_12], 1337)
1377
1378    impl PartialTokenBlock {
1379        /// Attempts to remove the last token from the block.
1380        ///
1381        /// # Returns
1382        ///
1383        /// * `Ok(())` - If a token was successfully removed.
1384        /// * `Err(TokenBlockError::Empty)` - If the block was already empty.
1385        pub fn pop_token(&mut self) -> Result<(), TokenBlockError> {
1386            if self.tokens.0.is_empty() {
1387                return Err(TokenBlockError::Empty);
1388            }
1389            self.tokens.0.pop();
1390            Ok(())
1391        }
1392    }
1393
1394    #[test]
1395    fn test_validate_hash_constants() {
1396        let salt = TEST_SALT_HASH;
1397
1398        // Block 1: [1, 2, 3, 4]
1399        let tokens_1_4 = &[1u32, 2, 3, 4];
1400        let computed_hash_1_4 = compute_hash_v2(cast_slice(tokens_1_4), salt);
1401        assert_eq!(computed_hash_1_4, HASH_1_4, "Mismatch for HASH_1_4");
1402        // First block's sequence hash is its block hash
1403        assert_eq!(computed_hash_1_4, SEQ_HASH_1_4, "Mismatch for SEQ_HASH_1_4");
1404
1405        // Block 2: [5, 6, 7, 8]
1406        let tokens_5_8 = &[5u32, 6, 7, 8];
1407        let computed_hash_5_8 = compute_hash_v2(cast_slice(tokens_5_8), salt);
1408        assert_eq!(computed_hash_5_8, HASH_5_8, "Mismatch for HASH_5_8");
1409        let computed_seq_hash_5_8 = compute_hash_v2(cast_slice(&[SEQ_HASH_1_4, HASH_5_8]), salt);
1410        assert_eq!(
1411            computed_seq_hash_5_8, SEQ_HASH_5_8,
1412            "Mismatch for SEQ_HASH_5_8"
1413        );
1414
1415        // Block 3: [9, 10, 11, 12]
1416        let tokens_9_12 = &[9u32, 10, 11, 12];
1417        let computed_hash_9_12 = compute_hash_v2(cast_slice(tokens_9_12), salt);
1418        assert_eq!(computed_hash_9_12, HASH_9_12, "Mismatch for HASH_9_12");
1419        let computed_seq_hash_9_12 = compute_hash_v2(cast_slice(&[SEQ_HASH_5_8, HASH_9_12]), salt);
1420        assert_eq!(
1421            computed_seq_hash_9_12, SEQ_HASH_9_12,
1422            "Mismatch for SEQ_HASH_9_12"
1423        );
1424    }
1425
1426    #[test]
1427    fn test_positional_sequence_hash_encoding_decoding() {
1428        // Test Mode 0: position fits in 8 bits (< 256)
1429        let seq_hash_0 = 0x1234567890ABCDEF;
1430        let position_0 = 100;
1431        let lbh_0 = 0xFEDCBA9876543210;
1432        let psh_0 = PositionalSequenceHash::new(seq_hash_0, position_0, lbh_0);
1433
1434        assert_eq!(psh_0.mode(), 0, "Position 100 should use mode 0");
1435        assert_eq!(psh_0.sequence_hash(), seq_hash_0);
1436        assert_eq!(psh_0.position(), position_0);
1437        // LBH is truncated to 54 bits in mode 0
1438        assert_eq!(
1439            psh_0.local_block_hash(),
1440            lbh_0 & ((1u64 << 54) - 1),
1441            "LBH should be truncated to 54 bits"
1442        );
1443
1444        // Test Mode 1: position fits in 16 bits (256 <= pos < 65536)
1445        let position_1 = 1000;
1446        let psh_1 = PositionalSequenceHash::new(seq_hash_0, position_1, lbh_0);
1447
1448        assert_eq!(psh_1.mode(), 1, "Position 1000 should use mode 1");
1449        assert_eq!(psh_1.sequence_hash(), seq_hash_0);
1450        assert_eq!(psh_1.position(), position_1);
1451        // LBH is truncated to 46 bits in mode 1
1452        assert_eq!(
1453            psh_1.local_block_hash(),
1454            lbh_0 & ((1u64 << 46) - 1),
1455            "LBH should be truncated to 46 bits"
1456        );
1457
1458        // Test Mode 2: position fits in 24 bits (65536 <= pos < 16777216)
1459        let position_2 = 100_000;
1460        let psh_2 = PositionalSequenceHash::new(seq_hash_0, position_2, lbh_0);
1461
1462        assert_eq!(psh_2.mode(), 2, "Position 100,000 should use mode 2");
1463        assert_eq!(psh_2.sequence_hash(), seq_hash_0);
1464        assert_eq!(psh_2.position(), position_2);
1465        // LBH is truncated to 38 bits in mode 2
1466        assert_eq!(
1467            psh_2.local_block_hash(),
1468            lbh_0 & ((1u64 << 38) - 1),
1469            "LBH should be truncated to 38 bits"
1470        );
1471
1472        // Test Mode 3: position fits in 31 bits (16777216 <= pos < 2^31)
1473        let position_3 = 20_000_000;
1474        let psh_3 = PositionalSequenceHash::new(seq_hash_0, position_3, lbh_0);
1475
1476        assert_eq!(psh_3.mode(), 3, "Position 20,000,000 should use mode 3");
1477        assert_eq!(psh_3.sequence_hash(), seq_hash_0);
1478        assert_eq!(psh_3.position(), position_3);
1479        // LBH is truncated to 31 bits in mode 3
1480        assert_eq!(
1481            psh_3.local_block_hash(),
1482            lbh_0 & ((1u64 << 31) - 1),
1483            "LBH should be truncated to 31 bits"
1484        );
1485
1486        // Test edge case: position at boundary
1487        let position_255 = 255;
1488        let psh_255 = PositionalSequenceHash::new(seq_hash_0, position_255, lbh_0);
1489        assert_eq!(psh_255.mode(), 0, "Position 255 should use mode 0");
1490        assert_eq!(psh_255.position(), position_255);
1491
1492        let position_256 = 256;
1493        let psh_256 = PositionalSequenceHash::new(seq_hash_0, position_256, lbh_0);
1494        assert_eq!(psh_256.mode(), 1, "Position 256 should use mode 1");
1495        assert_eq!(psh_256.position(), position_256);
1496    }
1497
1498    #[test]
1499    fn test_positional_lineage_hash() {
1500        // Test Mode 0: position fits in 8 bits (< 256)
1501        let current_hash_0 = 0x1234567890ABCDEF;
1502        let parent_hash_0 = 0xFEDCBA9876543210;
1503        let position_0 = 100;
1504        let plh_0 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_0);
1505
1506        assert_eq!(plh_0.mode(), 0, "Position 100 should use mode 0");
1507        assert_eq!(plh_0.position(), position_0);
1508        // Current and parent are truncated to 59 bits in mode 0
1509        assert_eq!(
1510            plh_0.current_hash_fragment(),
1511            current_hash_0 & ((1u64 << 59) - 1),
1512            "Current hash should be truncated to 59 bits"
1513        );
1514        assert_eq!(
1515            plh_0.parent_hash_fragment(),
1516            parent_hash_0 & ((1u64 << 59) - 1),
1517            "Parent hash should be truncated to 59 bits"
1518        );
1519
1520        // Test Mode 1: position fits in 16 bits (256 <= pos < 65536)
1521        let position_1 = 1000;
1522        let plh_1 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_1);
1523
1524        assert_eq!(plh_1.mode(), 1, "Position 1000 should use mode 1");
1525        assert_eq!(plh_1.position(), position_1);
1526        // Current and parent are truncated to 55 bits in mode 1
1527        assert_eq!(
1528            plh_1.current_hash_fragment(),
1529            current_hash_0 & ((1u64 << 55) - 1),
1530            "Current hash should be truncated to 55 bits"
1531        );
1532        assert_eq!(
1533            plh_1.parent_hash_fragment(),
1534            parent_hash_0 & ((1u64 << 55) - 1),
1535            "Parent hash should be truncated to 55 bits"
1536        );
1537
1538        // Test Mode 2: position fits in 24 bits (65536 <= pos < 16777216)
1539        let position_2 = 100_000;
1540        let plh_2 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_2);
1541
1542        assert_eq!(plh_2.mode(), 2, "Position 100,000 should use mode 2");
1543        assert_eq!(plh_2.position(), position_2);
1544        // Current and parent are truncated to 51 bits in mode 2
1545        assert_eq!(
1546            plh_2.current_hash_fragment(),
1547            current_hash_0 & ((1u64 << 51) - 1),
1548            "Current hash should be truncated to 51 bits"
1549        );
1550        assert_eq!(
1551            plh_2.parent_hash_fragment(),
1552            parent_hash_0 & ((1u64 << 51) - 1),
1553            "Parent hash should be truncated to 51 bits"
1554        );
1555
1556        // Test edge cases: position at boundaries
1557        let position_255 = 255;
1558        let plh_255 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_255);
1559        assert_eq!(plh_255.mode(), 0, "Position 255 should use mode 0");
1560        assert_eq!(plh_255.position(), position_255);
1561
1562        let position_256 = 256;
1563        let plh_256 = PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_256);
1564        assert_eq!(plh_256.mode(), 1, "Position 256 should use mode 1");
1565        assert_eq!(plh_256.position(), position_256);
1566
1567        let position_65535 = 65535;
1568        let plh_65535 =
1569            PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65535);
1570        assert_eq!(plh_65535.mode(), 1, "Position 65535 should use mode 1");
1571        assert_eq!(plh_65535.position(), position_65535);
1572
1573        let position_65536 = 65536;
1574        let plh_65536 =
1575            PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_65536);
1576        assert_eq!(plh_65536.mode(), 2, "Position 65536 should use mode 2");
1577        assert_eq!(plh_65536.position(), position_65536);
1578
1579        // Test with None parent (root block)
1580        let plh_root = PositionalLineageHash::new(current_hash_0, None, 0);
1581        assert_eq!(plh_root.mode(), 0);
1582        assert_eq!(plh_root.position(), 0);
1583        assert_eq!(
1584            plh_root.parent_hash_fragment(),
1585            0,
1586            "Root should have zero parent hash"
1587        );
1588        assert_eq!(
1589            plh_root.current_hash_fragment(),
1590            current_hash_0 & ((1u64 << 59) - 1)
1591        );
1592
1593        // Test LSB alignment: verify that smaller mode fragments are subsets of larger mode fragments
1594        let position_small = 100; // Mode 0: 59 bits
1595        let position_large = 1000; // Mode 1: 55 bits
1596        let plh_small =
1597            PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_small);
1598        let plh_large =
1599            PositionalLineageHash::new(current_hash_0, Some(parent_hash_0), position_large);
1600
1601        // The 55-bit fragment from mode 1 should match the lower 55 bits of the 59-bit fragment from mode 0
1602        let mask_55 = (1u64 << 55) - 1;
1603        assert_eq!(
1604            plh_large.current_hash_fragment(),
1605            plh_small.current_hash_fragment() & mask_55,
1606            "LSB alignment: mode 1 fragment should be subset of mode 0 fragment"
1607        );
1608    }
1609
1610    #[test]
1611    #[should_panic(expected = "Position 16777216 exceeds maximum supported value")]
1612    fn test_positional_lineage_hash_panic_on_large_position() {
1613        let current_hash = 0x1234567890ABCDEF;
1614        let parent_hash = 0xFEDCBA9876543210;
1615        let position = 1u64 << 24; // 2^24 = 16,777,216
1616        let _ = PositionalLineageHash::new(current_hash, Some(parent_hash), position);
1617    }
1618
1619    #[test]
1620    fn test_positional_lineage_hash_mode_boundary_alignment() {
1621        // Test that hash fragments align correctly across mode boundaries
1622        // This is critical for backward traversal in the radix tree
1623
1624        let parent_hash = 0xFEDCBA9876543210;
1625        let current_hash_255 = 0x1234567890ABCDEF;
1626        let current_hash_256 = 0xABCDEF0123456789;
1627
1628        // Position 255: Mode 0 (last position before boundary)
1629        let plh_255 = PositionalLineageHash::new(current_hash_255, Some(parent_hash), 255);
1630        assert_eq!(plh_255.mode(), 0);
1631
1632        // Position 256: Mode 1 (first position after boundary)
1633        // This should store position 255's current_hash as its parent
1634        let plh_256 = PositionalLineageHash::new(current_hash_256, Some(current_hash_255), 256);
1635        assert_eq!(plh_256.mode(), 1);
1636
1637        // CRITICAL TEST: The parent fragment at position 256 should match
1638        // the current fragment at position 255, allowing backward traversal
1639        // Both should be truncated to 55 bits (the minimum available at the boundary)
1640        let mask_55 = (1u64 << 55) - 1;
1641        assert_eq!(
1642            plh_256.parent_hash_fragment(),
1643            plh_255.current_hash_fragment() & mask_55,
1644            "Mode boundary: position 256's parent fragment should match position 255's current fragment (55 bits)"
1645        );
1646
1647        // Verify that position 255's current fragment is already truncated to 55 bits
1648        // (not the full 59 bits that Mode 0 could theoretically support)
1649        assert_eq!(
1650            plh_255.current_hash_fragment(),
1651            current_hash_255 & mask_55,
1652            "Position 255 should pre-truncate current hash to 55 bits for next mode compatibility"
1653        );
1654
1655        // Test the other boundary: 65535 -> 65536 (Mode 1 -> Mode 2)
1656        let current_hash_65535 = 0x1111222233334444;
1657        let current_hash_65536 = 0x5555666677778888;
1658
1659        let plh_65535 = PositionalLineageHash::new(current_hash_65535, Some(parent_hash), 65535);
1660        assert_eq!(plh_65535.mode(), 1);
1661
1662        let plh_65536 =
1663            PositionalLineageHash::new(current_hash_65536, Some(current_hash_65535), 65536);
1664        assert_eq!(plh_65536.mode(), 2);
1665
1666        // Both should align to 51 bits (Mode 2's capacity)
1667        let mask_51 = (1u64 << 51) - 1;
1668        assert_eq!(
1669            plh_65536.parent_hash_fragment(),
1670            plh_65535.current_hash_fragment() & mask_51,
1671            "Mode boundary: position 65536's parent fragment should match position 65535's current fragment (51 bits)"
1672        );
1673
1674        assert_eq!(
1675            plh_65535.current_hash_fragment(),
1676            current_hash_65535 & mask_51,
1677            "Position 65535 should pre-truncate current hash to 51 bits for next mode compatibility"
1678        );
1679    }
1680
1681    #[test]
1682    fn test_tokens_from() {
1683        let vec_u32: Vec<u32> = vec![1, 2, 3];
1684        let tokens_u32: Tokens = vec_u32.clone().into();
1685        assert_eq!(tokens_u32.0, vec_u32);
1686
1687        let slice_u32: &[u32] = &[4, 5];
1688        let tokens_slice_u32: Tokens = slice_u32.into();
1689        assert_eq!(tokens_slice_u32.0, vec![4, 5]);
1690
1691        let vec_i32: Vec<i32> = vec![-1, 0, 1]; // Note: -1 becomes large u32
1692        let tokens_i32: Tokens = vec_i32.into();
1693        assert_eq!(tokens_i32.0, vec![u32::MAX, 0, 1]);
1694
1695        let slice_i32: &[i32] = &[100, 200];
1696        let tokens_slice_i32: Tokens = slice_i32.into();
1697        assert_eq!(tokens_slice_i32.0, vec![100, 200]);
1698
1699        let into_vec: Vec<u32> = tokens_slice_i32.into();
1700        assert_eq!(into_vec, vec![100, 200]);
1701    }
1702
1703    #[test]
1704    fn test_tokens_equality() {
1705        let tokens = Tokens::from(vec![1, 2, 3]);
1706        assert_eq!(tokens, vec![1, 2, 3]);
1707        assert_eq!(vec![1, 2, 3], tokens);
1708        assert_eq!(tokens, &[1, 2, 3][..]);
1709        assert_eq!(&[1, 2, 3][..], tokens);
1710        assert_eq!(tokens, Tokens::from(vec![1, 2, 3]));
1711        assert_ne!(tokens, Tokens::from(vec![1, 2, 4]));
1712    }
1713
1714    #[test]
1715    fn test_tokens_deref_asref() {
1716        let tokens = Tokens::from(vec![10, 20, 30]);
1717
1718        // Deref to &[Token]
1719        assert_eq!(tokens.len(), 3);
1720        assert_eq!(tokens[1], 20);
1721        let slice: &[Token] = &tokens;
1722        assert_eq!(slice, &[10, 20, 30]);
1723
1724        // AsRef<[Token]>
1725        let as_ref_slice: &[Token] = tokens.as_ref();
1726        assert_eq!(as_ref_slice, &[10, 20, 30]);
1727
1728        // Borrow<[Token]>
1729        let borrowed_slice: &[Token] = std::borrow::Borrow::borrow(&tokens);
1730        assert_eq!(borrowed_slice, &[10, 20, 30]);
1731    }
1732
1733    #[test]
1734    fn test_tokens_into_sequence() {
1735        let tokens = Tokens::from(vec![1, 2, 3, 4, 5]);
1736        let seq = tokens.into_sequence(3, Some(TEST_SALT_HASH));
1737        assert_eq!(seq.blocks().len(), 1);
1738        assert_eq!(seq.blocks[0].tokens().as_ref(), &[1, 2, 3]);
1739        assert_eq!(seq.current_block().tokens().as_ref(), &[4, 5]);
1740        assert_eq!(seq.salt_hash(), TEST_SALT_HASH);
1741    }
1742
1743    #[test]
1744    fn test_partial_block_ops() {
1745        let mut partial = PartialTokenBlock::create_sequence_root(3, TEST_SALT_HASH);
1746        assert_eq!(partial.len(), 0);
1747        assert_eq!(partial.remaining(), 3);
1748        assert!(partial.is_empty());
1749
1750        // Push tokens
1751        assert!(partial.push_token(1).is_ok());
1752        assert_eq!(partial.len(), 1);
1753        assert_eq!(partial.remaining(), 2);
1754        let remaining = partial.push_tokens(Tokens::from(vec![2, 3, 4]));
1755        assert_eq!(partial.len(), 3);
1756        assert_eq!(partial.remaining(), 0);
1757        assert_eq!(remaining.as_ref(), &[4]); // Token 4 didn't fit
1758        assert_eq!(partial.tokens().as_ref(), &[1, 2, 3]);
1759
1760        // Push when full
1761        assert_eq!(partial.push_token(5), Err(TokenBlockError::Full));
1762        let remaining_full = partial.push_tokens(Tokens::from(vec![5]));
1763        assert_eq!(remaining_full.as_ref(), &[5]);
1764
1765        // Pop tokens
1766        assert!(partial.pop_token().is_ok());
1767        assert_eq!(partial.len(), 2);
1768        assert_eq!(partial.tokens().as_ref(), &[1, 2]);
1769        assert!(partial.pop_tokens(2).is_ok());
1770        assert!(partial.is_empty());
1771
1772        // Pop when empty
1773        assert_eq!(partial.pop_token(), Err(TokenBlockError::Empty));
1774        assert_eq!(
1775            partial.pop_tokens(1),
1776            Err(TokenBlockError::InsufficientTokens)
1777        );
1778
1779        // Commit incomplete
1780        assert!(partial.push_token(10).is_ok());
1781        assert_eq!(partial.commit(), Err(TokenBlockError::Incomplete));
1782
1783        // Commit complete
1784        assert!(partial.push_token(11).is_ok());
1785        assert!(partial.push_token(12).is_ok());
1786        assert_eq!(partial.len(), 3);
1787        let commit_result = partial.commit();
1788        assert!(commit_result.is_ok());
1789        let committed_block = commit_result.unwrap();
1790        assert_eq!(committed_block.tokens().as_ref(), &[10, 11, 12]);
1791
1792        // Check state after commit (partial block is now the next one)
1793        assert!(partial.is_empty());
1794        assert_eq!(
1795            partial.parent_sequence_hash,
1796            Some(committed_block.sequence_hash())
1797        );
1798        assert_eq!(partial.block_size, 3);
1799    }
1800
1801    #[test]
1802    fn test_token_block_creation_and_hashes() {
1803        let salt = TEST_SALT_HASH;
1804        let tokens1 = Tokens::from(vec![1, 2, 3, 4]);
1805        let chunk1 = TokenBlockChunk::new(tokens1.clone(), salt);
1806        let block1 = TokenBlock::from_chunk(chunk1, None, 0);
1807
1808        assert_eq!(block1.tokens(), &tokens1);
1809        assert_eq!(block1.salt_hash(), salt);
1810        assert_eq!(block1.parent_sequence_hash(), None);
1811        assert_eq!(block1.block_hash(), HASH_1_4);
1812        assert_eq!(block1.sequence_hash(), SEQ_HASH_1_4); // First block seq_hash == block_hash
1813        assert_eq!(block1.position(), 0); // First block is at position 0
1814
1815        // Verify positional lineage hash for block 1
1816        let plh1 = block1.positional_lineage_hash();
1817        assert_eq!(plh1.position(), 0);
1818        assert_eq!(plh1.parent_hash_fragment(), 0); // Root has no parent
1819        assert_eq!(
1820            plh1.current_hash_fragment(),
1821            SEQ_HASH_1_4 & ((1u64 << 59) - 1)
1822        ); // Mode 0: 59 bits
1823
1824        let tokens2 = Tokens::from(vec![5, 6, 7, 8]);
1825        let chunk2 = TokenBlockChunk::new(tokens2.clone(), salt);
1826        let block2 = TokenBlock::from_chunk(chunk2, block1.parent_sequence_hash(), 1); // Incorrect parent
1827        // Sequence hash should differ if parent is wrong
1828        assert_ne!(block2.sequence_hash(), SEQ_HASH_5_8);
1829
1830        let chunk2_correct = TokenBlockChunk::new(tokens2.clone(), salt);
1831        let block2_correct =
1832            TokenBlock::from_chunk(chunk2_correct, Some(block1.sequence_hash()), 1);
1833
1834        assert_eq!(block2_correct.tokens(), &tokens2);
1835        assert_eq!(block2_correct.salt_hash(), salt);
1836        assert_eq!(
1837            block2_correct.parent_sequence_hash(),
1838            Some(block1.sequence_hash())
1839        );
1840        assert_eq!(block2_correct.block_hash(), HASH_5_8);
1841        assert_eq!(block2_correct.sequence_hash(), SEQ_HASH_5_8);
1842        assert_eq!(block2_correct.position(), 1); // Second block is at position 1
1843
1844        // Verify positional lineage hash for block 2
1845        let plh2 = block2_correct.positional_lineage_hash();
1846        assert_eq!(plh2.position(), 1);
1847        assert_eq!(
1848            plh2.parent_hash_fragment(),
1849            SEQ_HASH_1_4 & ((1u64 << 59) - 1)
1850        ); // Parent fragment matches block1's sequence hash
1851        assert_eq!(
1852            plh2.current_hash_fragment(),
1853            SEQ_HASH_5_8 & ((1u64 << 59) - 1)
1854        ); // Mode 0: 59 bits
1855    }
1856
1857    #[test]
1858    fn test_new_sequence() {
1859        // Empty initial tokens
1860        let seq_empty = create_test_sequence(&[], 4, Some(TEST_SALT_HASH));
1861        assert!(seq_empty.blocks().is_empty());
1862        assert!(seq_empty.current_block().is_empty());
1863        assert_eq!(seq_empty.total_tokens(), 0);
1864        assert_eq!(seq_empty.salt_hash(), TEST_SALT_HASH);
1865        assert_eq!(seq_empty.current_block().parent_sequence_hash, None);
1866
1867        // Less than one block
1868        let seq_partial = create_test_sequence(&[1, 2], 4, Some(TEST_SALT_HASH));
1869        assert!(seq_partial.blocks().is_empty());
1870        assert_eq!(seq_partial.current_block().tokens().as_ref(), &[1, 2]);
1871        assert_eq!(seq_partial.total_tokens(), 2);
1872        assert_eq!(seq_partial.current_block().parent_sequence_hash, None);
1873
1874        // Exactly one block
1875        let seq_one_block = create_test_sequence(&[1, 2, 3, 4], 4, Some(TEST_SALT_HASH));
1876        assert_eq!(seq_one_block.blocks().len(), 1);
1877        assert!(seq_one_block.current_block().is_empty());
1878        assert_eq!(seq_one_block.total_tokens(), 4);
1879        assert_eq!(seq_one_block.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
1880        assert_eq!(seq_one_block.blocks[0].sequence_hash(), SEQ_HASH_1_4);
1881        assert_eq!(
1882            seq_one_block.current_block().parent_sequence_hash,
1883            Some(SEQ_HASH_1_4)
1884        );
1885
1886        // More than one block
1887        let seq_multi = create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9], 4, Some(TEST_SALT_HASH));
1888        assert_eq!(seq_multi.blocks().len(), 2);
1889        assert_eq!(seq_multi.current_block().tokens().as_ref(), &[9]);
1890        assert_eq!(seq_multi.total_tokens(), 9);
1891        assert_eq!(seq_multi.blocks[0].sequence_hash(), SEQ_HASH_1_4);
1892        assert_eq!(seq_multi.blocks[1].sequence_hash(), SEQ_HASH_5_8);
1893        assert_eq!(
1894            seq_multi.current_block().parent_sequence_hash,
1895            Some(SEQ_HASH_5_8)
1896        );
1897
1898        // Test tokens_at across blocks and partial block
1899        assert_eq!(seq_multi.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]); // First complete block
1900        assert_eq!(seq_multi.tokens_at(4..8).as_ref(), &[5, 6, 7, 8]); // Second complete block
1901        assert_eq!(seq_multi.tokens_at(8..9).as_ref(), &[9]); // Current partial block
1902        assert_eq!(seq_multi.tokens_at(2..6).as_ref(), &[3, 4, 5, 6]); // Spanning blocks
1903        assert_eq!(seq_multi.tokens_at(6..9).as_ref(), &[7, 8, 9]); // Spanning to partial
1904        assert_eq!(seq_multi.tokens_at(5..5).as_ref(), &[0u32; 0]); // Empty range
1905        assert_eq!(seq_multi.tokens_at(10..15).as_ref(), &[0u32; 0]); // Out of bounds
1906
1907        // No salt hash
1908        let seq_no_salt = create_test_sequence(&[1, 2, 3, 4, 5], 4, None);
1909        assert_eq!(seq_no_salt.salt_hash(), 0);
1910        assert_eq!(seq_no_salt.blocks().len(), 1);
1911        assert_ne!(seq_no_salt.blocks[0].block_hash(), HASH_1_4); // Hash differs with salt 0
1912        assert_eq!(seq_no_salt.current_block().tokens().as_ref(), &[5]);
1913    }
1914
1915    #[test]
1916    #[should_panic]
1917    fn test_new_sequence_zero_block_size() {
1918        let _ = create_test_sequence(&[1], 0, None);
1919    }
1920
1921    #[test]
1922    fn test_append_single_token() {
1923        let mut sequence =
1924            create_test_sequence(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4, Some(TEST_SALT_HASH));
1925        assert_eq!(sequence.blocks().len(), 2);
1926        assert_eq!(sequence.current_block().tokens.len(), 2);
1927        assert_eq!(sequence.current_block().tokens, vec![9, 10]);
1928        assert_eq!(
1929            sequence.current_block().parent_sequence_hash,
1930            Some(SEQ_HASH_5_8)
1931        );
1932
1933        // Append token 11 - should not complete a block
1934        let completed_idx = sequence.append(11).unwrap();
1935        assert_eq!(completed_idx, None);
1936        assert_eq!(sequence.blocks().len(), 2);
1937        assert_eq!(sequence.current_block().tokens.as_ref(), &[9, 10, 11]);
1938
1939        // Append token 12 - should complete block 2 (index 2)
1940        // This will also commit block 2
1941        let completed_idx = sequence.append(12).unwrap();
1942        assert_eq!(completed_idx, Some(2));
1943        assert_eq!(sequence.blocks().len(), 3);
1944        assert_eq!(sequence.current_block.tokens.as_ref(), &[0u32; 0]);
1945        assert_eq!(sequence.current_block.remaining(), 4);
1946        assert_eq!(
1947            sequence.current_block().parent_sequence_hash,
1948            Some(SEQ_HASH_9_12)
1949        ); // Still linked to block 1
1950
1951        // Append token 13 - should not complete a block
1952        let completed_idx_13 = sequence.append(13).unwrap();
1953        assert_eq!(completed_idx_13, None);
1954        assert_eq!(sequence.blocks().len(), 3);
1955        assert_eq!(sequence.blocks[2].tokens().as_ref(), &[9, 10, 11, 12]);
1956        assert_eq!(sequence.blocks[2].sequence_hash(), SEQ_HASH_9_12);
1957        assert_eq!(sequence.current_block.tokens.as_ref(), &[13]); // New current block has 13
1958        assert_eq!(sequence.current_block.remaining(), 3);
1959        assert_eq!(
1960            sequence.current_block.parent_sequence_hash,
1961            Some(SEQ_HASH_9_12)
1962        ); // Linked to new block 2
1963    }
1964
1965    #[test]
1966    fn test_extend() {
1967        let block_size = 4;
1968        let salt_hash = Some(TEST_SALT_HASH);
1969
1970        // Case 1: Extend less than block size
1971        let mut seq1 = create_test_sequence(&[], block_size, salt_hash);
1972        let tokens1 = Tokens::from(vec![1, 2]);
1973        let completed1 = seq1.extend(tokens1).unwrap();
1974        assert_eq!(completed1, None); // No blocks completed
1975        assert_eq!(seq1.blocks.len(), 0);
1976        assert_eq!(seq1.current_block.tokens.as_ref(), &[1, 2]);
1977        assert_eq!(seq1.current_block.remaining(), 2);
1978        assert_eq!(seq1.current_block.parent_sequence_hash, None); // Still the root block
1979
1980        // Case 2: Extend exactly block size
1981        let mut seq2 = create_test_sequence(&[], block_size, salt_hash);
1982        let tokens2 = Tokens::from(vec![1, 2, 3, 4]);
1983        let completed2 = seq2.extend(tokens2).unwrap();
1984        assert_eq!(completed2, Some(0..1));
1985        assert_eq!(seq2.blocks.len(), 1);
1986        assert_eq!(seq2.current_block.tokens.as_ref(), &[0u32; 0]); // Current block is empty
1987        assert_eq!(seq2.current_block.remaining(), 4);
1988        assert_eq!(seq2.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); // Still the root block
1989
1990        // Case 3: Extend more than block size, less than two blocks
1991        let mut seq3 = create_test_sequence(&[], block_size, salt_hash);
1992        let tokens3 = Tokens::from(vec![1, 2, 3, 4, 5, 6]);
1993        let completed3 = seq3.extend(tokens3).unwrap();
1994        assert_eq!(completed3, Some(0..1)); // Block at index 0 completed
1995        assert_eq!(seq3.blocks.len(), 1);
1996        assert_eq!(seq3.current_block.tokens.as_ref(), &[5, 6]); // Partial block has remainder
1997        assert_eq!(seq3.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
1998        assert_eq!(seq3.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
1999        assert_eq!(seq3.current_block.remaining(), 2);
2000
2001        // Case 4: Extend exactly two blocks
2002        let mut seq4 = create_test_sequence(&[], block_size, salt_hash);
2003        let tokens4 = Tokens::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
2004        let completed4 = seq4.extend(tokens4).unwrap();
2005        assert_eq!(completed4, Some(0..2)); // Only block 0 is committed
2006        assert_eq!(seq4.blocks.len(), 2); // Only 1 block committed
2007        assert_eq!(seq4.current_block.tokens.as_ref(), &[0u32; 0]);
2008        assert_eq!(seq4.current_block.remaining(), 4);
2009        assert_eq!(seq4.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2010        assert_eq!(seq4.blocks[0].sequence_hash(), SEQ_HASH_1_4);
2011        assert_eq!(seq4.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8)); // Parent is the first block
2012
2013        // Case 5: Extend multiple times, completing blocks across calls
2014        let mut seq5 = create_test_sequence(&[], block_size, salt_hash);
2015        let tokens5a = Tokens::from(vec![1, 2]);
2016        let completed5a = seq5.extend(tokens5a).unwrap();
2017        assert_eq!(completed5a, None);
2018        assert_eq!(seq5.blocks.len(), 0);
2019        assert_eq!(seq5.current_block.tokens.as_ref(), &[1, 2]);
2020
2021        let tokens5b = Tokens::from(vec![3, 4, 5]);
2022        let completed5b = seq5.extend(tokens5b).unwrap();
2023        assert_eq!(completed5b, Some(0..1)); // Block at index 0 completed
2024        assert_eq!(seq5.blocks.len(), 1);
2025        assert_eq!(seq5.current_block.tokens.as_ref(), &[5]);
2026        assert_eq!(seq5.blocks[0].tokens().as_ref(), &[1, 2, 3, 4]);
2027        assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2028        assert_eq!(seq5.current_block.remaining(), 3);
2029
2030        let tokens5c = Tokens::from(vec![6, 7, 8, 9, 10]);
2031        let completed5c = seq5.extend(tokens5c).unwrap();
2032        assert_eq!(completed5c, Some(1..2)); // Block at index 1 completed
2033        assert_eq!(seq5.blocks.len(), 2);
2034        assert_eq!(seq5.current_block.tokens.as_ref(), &[9, 10]);
2035        assert_eq!(seq5.blocks[1].tokens().as_ref(), &[5, 6, 7, 8]);
2036        assert_eq!(seq5.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2037        assert_eq!(seq5.current_block.remaining(), 2);
2038
2039        // Case 6: Extend empty tokens
2040        let mut seq6 = create_test_sequence(&[1], block_size, salt_hash);
2041        let completed6 = seq6.extend(Tokens::default()).unwrap();
2042        assert_eq!(completed6, None);
2043        assert_eq!(seq6.blocks.len(), 0);
2044        assert_eq!(seq6.current_block.tokens.as_ref(), &[1]);
2045        assert_eq!(seq6.total_tokens(), 1);
2046
2047        // Case 7: Extend fills current exactly, no remainder
2048        let mut seq7 = create_test_sequence(&[1, 2], block_size, salt_hash);
2049        let tokens7 = Tokens::from(vec![3, 4]);
2050        let completed7 = seq7.extend(tokens7).unwrap();
2051        assert_eq!(completed7, Some(0..1)); // Block is full but not committed yet
2052        assert_eq!(seq7.blocks.len(), 1);
2053        assert_eq!(seq7.current_block.tokens.as_ref(), &[0u32; 0]); // Current block is full
2054        assert_eq!(seq7.current_block.remaining(), 4);
2055        assert_eq!(seq7.total_tokens(), 4);
2056        assert_eq!(seq7.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4)); // Still the root block
2057
2058        // Test tokens_at extraction
2059        assert_eq!(seq7.tokens_at(0..2).as_ref(), &[1, 2]);
2060        assert_eq!(seq7.tokens_at(1..3).as_ref(), &[2, 3]);
2061        assert_eq!(seq7.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
2062        assert_eq!(seq7.tokens_at(2..2).as_ref(), &[0u32; 0]); // Empty range
2063    }
2064
2065    #[test]
2066    fn test_truncate() {
2067        let block_size = 4;
2068        let salt_hash = Some(TEST_SALT_HASH);
2069        let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; // 10 tokens
2070
2071        // Case 1: Truncate within current block (len 9)
2072        let mut seq1 = create_test_sequence(initial_tokens, block_size, salt_hash);
2073        assert!(seq1.truncate(9).is_ok());
2074        assert_eq!(seq1.total_tokens(), 9);
2075        assert_eq!(seq1.blocks().len(), 2);
2076        assert_eq!(seq1.current_block().tokens.as_ref(), &[9]);
2077        assert_eq!(
2078            seq1.current_block().parent_sequence_hash,
2079            Some(SEQ_HASH_5_8)
2080        );
2081
2082        // Case 2: Truncate to exact block boundary (len 8)
2083        let mut seq2 = create_test_sequence(initial_tokens, block_size, salt_hash);
2084        assert!(seq2.truncate(8).is_ok());
2085        assert_eq!(seq2.total_tokens(), 8);
2086        assert_eq!(seq2.blocks().len(), 2);
2087        assert!(seq2.current_block().tokens.is_empty());
2088        assert_eq!(
2089            seq2.current_block().parent_sequence_hash,
2090            Some(SEQ_HASH_5_8)
2091        );
2092
2093        // Case 3: Truncate into last full block (len 7)
2094        let mut seq3 = create_test_sequence(initial_tokens, block_size, salt_hash);
2095        assert!(seq3.truncate(7).is_ok());
2096        assert_eq!(seq3.total_tokens(), 7);
2097        assert_eq!(seq3.blocks().len(), 1); // Block [5,6,7,8] removed conceptually
2098        assert_eq!(seq3.current_block().tokens.as_ref(), &[5, 6, 7]); // Kept 3 from [5,6,7,8]
2099        assert_eq!(
2100            seq3.current_block().parent_sequence_hash,
2101            Some(SEQ_HASH_1_4)
2102        ); // Parent is hash of [1,2,3,4]
2103        assert_eq!(seq3.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2104
2105        // Case 4: Truncate removing full block(s) exactly (len 4)
2106        let mut seq4 = create_test_sequence(initial_tokens, block_size, salt_hash);
2107        assert!(seq4.truncate(4).is_ok());
2108        assert_eq!(seq4.total_tokens(), 4);
2109        assert_eq!(seq4.blocks().len(), 1); // Block [5,6,7,8] removed
2110        assert!(seq4.current_block().tokens.is_empty()); // New partial based on block [1,2,3,4]
2111        assert_eq!(
2112            seq4.current_block().parent_sequence_hash,
2113            Some(SEQ_HASH_1_4)
2114        );
2115        assert_eq!(seq4.blocks()[0].tokens().as_ref(), &[1, 2, 3, 4]);
2116
2117        // Case 5: Truncate into first block (len 3)
2118        let mut seq5 = create_test_sequence(initial_tokens, block_size, salt_hash);
2119        assert!(seq5.truncate(3).is_ok());
2120        assert_eq!(seq5.total_tokens(), 3);
2121        assert!(seq5.blocks().is_empty()); // Both blocks removed conceptually
2122        assert_eq!(seq5.current_block().tokens.as_ref(), &[1, 2, 3]); // Kept 3 from [1,2,3,4]
2123        assert_eq!(seq5.current_block().parent_sequence_hash, None); // No parent
2124
2125        // Case 6: Truncate to zero length (len 0)
2126        let mut seq6 = create_test_sequence(initial_tokens, block_size, salt_hash);
2127        assert!(seq6.truncate(0).is_ok());
2128        assert_eq!(seq6.total_tokens(), 0);
2129        assert!(seq6.blocks().is_empty());
2130        assert!(seq6.current_block().tokens.is_empty());
2131        assert_eq!(seq6.current_block().parent_sequence_hash, None);
2132
2133        // Case 7: Truncate to length greater than current (len 11)
2134        let mut seq7 = create_test_sequence(initial_tokens, block_size, salt_hash);
2135        let original_state = (seq7.blocks.clone(), seq7.current_block.tokens.clone()); // Clone for state check
2136        assert!(seq7.truncate(11).is_ok()); // Should have no effect
2137        assert_eq!(seq7.total_tokens(), 10);
2138        assert_eq!(seq7.blocks, original_state.0);
2139        assert_eq!(seq7.current_block.tokens, original_state.1);
2140
2141        // Case 8: Truncate to current length (len 10)
2142        let mut seq8 = create_test_sequence(initial_tokens, block_size, salt_hash);
2143        let original_state = (seq8.blocks.clone(), seq8.current_block.tokens.clone());
2144        assert!(seq8.truncate(10).is_ok());
2145        assert_eq!(seq8.total_tokens(), 10);
2146        assert_eq!(seq8.blocks, original_state.0);
2147        assert_eq!(seq8.current_block.tokens, original_state.1);
2148
2149        // Case 9: Truncate an empty sequence to 0
2150        let mut seq9 = create_test_sequence(&[], block_size, salt_hash);
2151        assert!(seq9.truncate(0).is_ok());
2152        assert_eq!(seq9.total_tokens(), 0);
2153        assert!(seq9.blocks().is_empty());
2154        assert!(seq9.current_block().tokens.is_empty());
2155
2156        // Case 10: Truncate on exact block boundary when current is empty (len 4)
2157        let tokens10 = &[1, 2, 3, 4, 5, 6, 7, 8]; // 8 tokens
2158        let mut seq10 = create_test_sequence(tokens10, block_size, salt_hash);
2159        assert_eq!(seq10.total_tokens(), 8);
2160        assert!(seq10.current_block().is_empty());
2161        assert!(seq10.truncate(4).is_ok()); // Remove block [5, 6, 7, 8]
2162        assert_eq!(seq10.total_tokens(), 4);
2163        assert_eq!(seq10.blocks().len(), 1);
2164        assert!(seq10.current_block().tokens.is_empty());
2165        assert_eq!(
2166            seq10.current_block().parent_sequence_hash,
2167            Some(SEQ_HASH_1_4)
2168        );
2169
2170        // Case 11: Truncate into first block when current is empty (len 3)
2171        let tokens11 = &[1, 2, 3, 4, 5, 6, 7, 8]; // 8 tokens
2172        let mut seq11 = create_test_sequence(tokens11, block_size, salt_hash);
2173        assert!(seq11.truncate(3).is_ok()); // Pop block [5,6,7,8] + 1 from [1,2,3,4]
2174        assert_eq!(seq11.total_tokens(), 3);
2175        assert!(seq11.blocks().is_empty());
2176        assert_eq!(seq11.current_block().tokens.as_ref(), &[1, 2, 3]); // Kept 3 from [1,2,3,4]
2177        assert_eq!(seq11.current_block().parent_sequence_hash, None);
2178    }
2179
2180    #[test]
2181    fn test_unwind() {
2182        let block_size = 4;
2183        let salt_hash = Some(TEST_SALT_HASH);
2184        let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; // 10 tokens
2185
2186        // Unwind 0
2187        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2188        assert!(seq.unwind(0).is_ok());
2189        assert_eq!(seq.total_tokens(), 10);
2190
2191        // Unwind 1
2192        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2193        assert!(seq.unwind(1).is_ok());
2194        assert_eq!(seq.total_tokens(), 9);
2195        assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2196
2197        // Unwind 3 (crosses boundary)
2198        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2199        assert!(seq.unwind(3).is_ok());
2200        assert_eq!(seq.total_tokens(), 7);
2201        assert_eq!(seq.blocks.len(), 1);
2202        assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2203
2204        // Unwind all (10)
2205        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2206        assert!(seq.unwind(10).is_ok());
2207        assert_eq!(seq.total_tokens(), 0);
2208        assert!(seq.blocks.is_empty());
2209        assert!(seq.current_block.is_empty());
2210
2211        // Unwind more than available (11)
2212        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2213        assert_eq!(seq.unwind(11), Err(TokenBlockError::InsufficientTokens));
2214        assert_eq!(seq.total_tokens(), 10); // State unchanged
2215
2216        // Unwind from empty
2217        let mut seq_empty = create_test_sequence(&[], block_size, salt_hash);
2218        assert_eq!(
2219            seq_empty.unwind(1),
2220            Err(TokenBlockError::InsufficientTokens)
2221        );
2222    }
2223
2224    #[test]
2225    fn test_pop() {
2226        let block_size = 4;
2227        let salt_hash = Some(TEST_SALT_HASH);
2228        let initial_tokens = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; // 10 tokens
2229
2230        let mut seq = create_test_sequence(initial_tokens, block_size, salt_hash);
2231
2232        // Pop 10
2233        assert_eq!(seq.pop(), Some(10));
2234        assert_eq!(seq.total_tokens(), 9);
2235        assert_eq!(seq.current_block.tokens.as_ref(), &[9]);
2236        assert_eq!(seq.blocks.len(), 2);
2237
2238        // Pop 9
2239        assert_eq!(seq.pop(), Some(9));
2240        assert_eq!(seq.total_tokens(), 8);
2241        assert!(seq.current_block.is_empty());
2242        assert_eq!(seq.blocks.len(), 2);
2243        assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_5_8));
2244
2245        // Pop 8 (crosses boundary)
2246        assert_eq!(seq.pop(), Some(8));
2247        assert_eq!(seq.total_tokens(), 7);
2248        assert_eq!(seq.current_block.tokens.as_ref(), &[5, 6, 7]);
2249        assert_eq!(seq.blocks.len(), 1);
2250        assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2251
2252        // Pop remaining partial (7, 6, 5)
2253        assert_eq!(seq.pop(), Some(7));
2254        assert_eq!(seq.pop(), Some(6));
2255        assert_eq!(seq.pop(), Some(5));
2256        assert_eq!(seq.total_tokens(), 4);
2257        assert!(seq.current_block.is_empty());
2258        assert_eq!(seq.blocks.len(), 1);
2259        assert_eq!(seq.current_block.parent_sequence_hash, Some(SEQ_HASH_1_4));
2260
2261        // Pop 4 (crosses boundary)
2262        assert_eq!(seq.pop(), Some(4));
2263        assert_eq!(seq.total_tokens(), 3);
2264        assert_eq!(seq.current_block.tokens.as_ref(), &[1, 2, 3]);
2265        assert!(seq.blocks.is_empty());
2266        assert_eq!(seq.current_block.parent_sequence_hash, None);
2267
2268        // Pop 3, 2, 1
2269        assert_eq!(seq.pop(), Some(3));
2270        assert_eq!(seq.pop(), Some(2));
2271        assert_eq!(seq.pop(), Some(1));
2272        assert_eq!(seq.total_tokens(), 0);
2273        assert!(seq.current_block.is_empty());
2274        assert!(seq.blocks.is_empty());
2275
2276        // Pop from empty
2277        assert_eq!(seq.pop(), None);
2278        assert_eq!(seq.total_tokens(), 0);
2279    }
2280
2281    #[test]
2282    fn test_total_tokens() {
2283        let block_size = 3;
2284        let salt_hash = Some(TEST_SALT_HASH);
2285
2286        let mut seq = create_test_sequence(&[], block_size, salt_hash);
2287        assert_eq!(seq.total_tokens(), 0);
2288
2289        seq.extend(Tokens::from(vec![1, 2])).unwrap();
2290        assert_eq!(seq.total_tokens(), 2);
2291
2292        seq.append(3).unwrap(); // Completes block 0
2293        assert_eq!(seq.total_tokens(), 3);
2294
2295        seq.extend(Tokens::from(vec![4, 5, 6, 7])).unwrap(); // Completes block 1, partial [7]
2296        assert_eq!(seq.total_tokens(), 7);
2297
2298        seq.pop().unwrap(); // Removes 7
2299        assert_eq!(seq.total_tokens(), 6);
2300
2301        seq.truncate(4).unwrap(); // Keep [1,2,3,4]
2302        assert_eq!(seq.total_tokens(), 4);
2303
2304        seq.unwind(2).unwrap(); // Keep [1,2]
2305        assert_eq!(seq.total_tokens(), 2);
2306    }
2307
2308    #[test]
2309    fn test_push_tokens_partial_block() {
2310        let mut partial = PartialTokenBlock::create_sequence_root(4, 1337);
2311
2312        let tokens = Tokens(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2313
2314        let remaining = partial.push_tokens(tokens);
2315        assert_eq!(partial.tokens.len(), 4);
2316        assert_eq!(remaining.len(), 6);
2317    }
2318
2319    // ========== Additional tests for coverage improvement ==========
2320
2321    // === PositionalRadixTree Tests ===
2322
2323    #[test]
2324    fn test_positional_radix_tree_basic_operations() {
2325        use crate::PositionalRadixTree;
2326
2327        // Test new() and is_empty()
2328        let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2329        assert!(tree.is_empty());
2330        assert_eq!(tree.len(), 0);
2331
2332        // Test default()
2333        let tree2: PositionalRadixTree<i32> = PositionalRadixTree::default();
2334        assert!(tree2.is_empty());
2335
2336        // Test prefix() and insertion
2337        let psh1 = PositionalSequenceHash::new(0x1234, 0, 0xABCD);
2338        let psh2 = PositionalSequenceHash::new(0x5678, 0, 0xEF01);
2339        let psh3 = PositionalSequenceHash::new(0x9ABC, 1, 0x2345);
2340
2341        tree.prefix(&psh1).insert(psh1, "value1".to_string());
2342        assert!(!tree.is_empty());
2343        assert_eq!(tree.len(), 1);
2344
2345        tree.prefix(&psh2).insert(psh2, "value2".to_string());
2346        assert_eq!(tree.len(), 2);
2347
2348        tree.prefix(&psh3).insert(psh3, "value3".to_string());
2349        assert_eq!(tree.len(), 3);
2350
2351        // Test retrieval
2352        assert_eq!(
2353            tree.prefix(&psh1).get(&psh1).map(|v| v.clone()),
2354            Some("value1".to_string())
2355        );
2356    }
2357
2358    #[test]
2359    fn test_positional_radix_tree_with_lineage_hash() {
2360        use crate::PositionalRadixTree;
2361
2362        // Test generic usage with PositionalLineageHash
2363        let tree: PositionalRadixTree<u32, PositionalLineageHash> = PositionalRadixTree::new();
2364        assert!(tree.is_empty());
2365
2366        let plh1 = PositionalLineageHash::new(0x1234, None, 0);
2367        let plh2 = PositionalLineageHash::new(0x5678, Some(0x1234), 1);
2368
2369        tree.prefix(&plh1).insert(plh1, 100);
2370        tree.prefix(&plh2).insert(plh2, 200);
2371
2372        assert_eq!(tree.len(), 2);
2373        assert_eq!(tree.prefix(&plh1).get(&plh1).map(|v| *v), Some(100));
2374        assert_eq!(tree.prefix(&plh2).get(&plh2).map(|v| *v), Some(200));
2375    }
2376
2377    #[test]
2378    fn test_positional_radix_tree_position_lookup() {
2379        use crate::PositionalRadixTree;
2380
2381        let tree: PositionalRadixTree<String> = PositionalRadixTree::new();
2382
2383        // Insert at different positions
2384        let psh0 = PositionalSequenceHash::new(0x1111, 0, 0xAAAA);
2385        let psh1 = PositionalSequenceHash::new(0x2222, 1, 0xBBBB);
2386        let psh2 = PositionalSequenceHash::new(0x3333, 2, 0xCCCC);
2387
2388        tree.prefix(&psh0).insert(psh0, "pos0".to_string());
2389        tree.prefix(&psh1).insert(psh1, "pos1".to_string());
2390        tree.prefix(&psh2).insert(psh2, "pos2".to_string());
2391
2392        // Test position() method
2393        assert!(tree.position(0).is_some());
2394        assert!(tree.position(1).is_some());
2395        assert!(tree.position(2).is_some());
2396        assert!(tree.position(3).is_none()); // No entries at position 3
2397
2398        // Verify position lookup returns correct submap
2399        let pos0_map = tree.position(0).unwrap();
2400        assert_eq!(pos0_map.len(), 1);
2401    }
2402
2403    // === PositionalSequenceHash Additional Tests ===
2404
2405    #[test]
2406    fn test_positional_sequence_hash_mode_2_and_3() {
2407        // Mode 2: position fits in 24 bits (65536 <= pos < 16777216)
2408        let position_mode2 = 100_000u64;
2409        let seq_hash = 0x1234567890ABCDEF;
2410        let block_hash = 0xFEDCBA9876543210;
2411
2412        let psh_mode2 = PositionalSequenceHash::new(seq_hash, position_mode2, block_hash);
2413        assert_eq!(psh_mode2.mode(), 2, "Position 100,000 should use mode 2");
2414        assert_eq!(psh_mode2.position(), position_mode2);
2415        assert_eq!(psh_mode2.sequence_hash(), seq_hash);
2416        // Local block hash truncated to 38 bits in mode 2
2417        assert_eq!(
2418            psh_mode2.local_block_hash(),
2419            block_hash & ((1u64 << 38) - 1)
2420        );
2421
2422        // Mode 3: position fits in 31 bits (16777216 <= pos < 2147483648)
2423        let position_mode3 = 100_000_000u64;
2424        let psh_mode3 = PositionalSequenceHash::new(seq_hash, position_mode3, block_hash);
2425        assert_eq!(
2426            psh_mode3.mode(),
2427            3,
2428            "Position 100,000,000 should use mode 3"
2429        );
2430        assert_eq!(psh_mode3.position(), position_mode3);
2431        assert_eq!(psh_mode3.sequence_hash(), seq_hash);
2432        // Local block hash truncated to 31 bits in mode 3
2433        assert_eq!(
2434            psh_mode3.local_block_hash(),
2435            block_hash & ((1u64 << 31) - 1)
2436        );
2437    }
2438
2439    #[test]
2440    fn test_positional_sequence_hash_as_u128() {
2441        let psh = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2442        let raw = psh.as_u128();
2443
2444        // Verify we can reconstruct from raw value
2445        assert_eq!(raw & 0xFFFF_FFFF_FFFF_FFFF, 0x1234);
2446        assert!(raw > 0); // Non-zero
2447
2448        // Create another and compare
2449        let psh2 = PositionalSequenceHash::new(0x1234, 100, 0xABCD);
2450        assert_eq!(psh.as_u128(), psh2.as_u128());
2451    }
2452
2453    #[test]
2454    fn test_positional_sequence_hash_debug() {
2455        let psh = PositionalSequenceHash::new(0x1234567890ABCDEF, 42, 0xFEDCBA98);
2456        let debug_str = format!("{:?}", psh);
2457
2458        // Debug should contain field names and values
2459        assert!(debug_str.contains("PositionalSequenceHash"));
2460        assert!(debug_str.contains("sequence_hash"));
2461        assert!(debug_str.contains("local_block_hash"));
2462        assert!(debug_str.contains("position"));
2463    }
2464
2465    // === PositionalLineageHash Additional Tests ===
2466
2467    #[test]
2468    fn test_positional_lineage_hash_debug_and_display() {
2469        // Test position 0 (no parent shown)
2470        let plh_root = PositionalLineageHash::new(0x123456789ABCDEF0, None, 0);
2471        let debug_root = format!("{:?}", plh_root);
2472        let display_root = format!("{}", plh_root);
2473
2474        // Debug and Display should show position 0
2475        assert!(debug_root.starts_with("0:"));
2476        assert!(display_root.starts_with("0:"));
2477        // Position 0 should not show parent
2478        assert_eq!(debug_root.matches(':').count(), 1);
2479        assert_eq!(display_root.matches(':').count(), 1);
2480
2481        // Test position > 0 (parent shown)
2482        let plh_child = PositionalLineageHash::new(0xABCDEF0123456789, Some(0x123456789ABCDEF0), 5);
2483        let debug_child = format!("{:?}", plh_child);
2484        let display_child = format!("{}", plh_child);
2485
2486        // Should show position:current:parent
2487        assert!(debug_child.starts_with("5:"));
2488        assert!(display_child.starts_with("5:"));
2489        // Position > 0 should show parent (3 parts)
2490        assert_eq!(debug_child.matches(':').count(), 2);
2491        assert_eq!(display_child.matches(':').count(), 2);
2492    }
2493
2494    #[test]
2495    fn test_positional_lineage_hash_as_u128() {
2496        let plh = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
2497        let raw = plh.as_u128();
2498
2499        assert!(raw > 0);
2500
2501        // Create another with same params and compare
2502        let plh2 = PositionalLineageHash::new(0x1234, Some(0x5678), 10);
2503        assert_eq!(plh.as_u128(), plh2.as_u128());
2504
2505        // Different params should give different hash
2506        let plh3 = PositionalLineageHash::new(0x1234, Some(0x5678), 11);
2507        assert_ne!(plh.as_u128(), plh3.as_u128());
2508    }
2509
2510    #[test]
2511    fn test_positional_lineage_hash_ord_by_position_then_current_fragment() {
2512        let at_5_low = PositionalLineageHash::new(0x10, Some(0x1111), 5);
2513        let at_5_high = PositionalLineageHash::new(0x20, Some(0x1111), 5);
2514        assert!(
2515            at_5_low.current_hash_fragment() < at_5_high.current_hash_fragment(),
2516            "test assumes distinct current fragments at the same position"
2517        );
2518        assert!(at_5_low < at_5_high);
2519        assert!(at_5_high > at_5_low);
2520
2521        let at_3 = PositionalLineageHash::new(0x99, Some(0x2222), 3);
2522        assert!(at_3 < at_5_low);
2523        assert!(at_5_high < PositionalLineageHash::new(0x01, Some(0x3333), 6));
2524    }
2525
2526    #[test]
2527    fn test_positional_lineage_hash_ord_tiebreak_parent_via_packed_u128() {
2528        let same_pos_same_current = PositionalLineageHash::new(0x1234, Some(0x100), 10);
2529        let same_pos_same_current_other_parent =
2530            PositionalLineageHash::new(0x1234, Some(0x200), 10);
2531        assert_eq!(same_pos_same_current.position(), 10);
2532        assert_eq!(
2533            same_pos_same_current.position(),
2534            same_pos_same_current_other_parent.position()
2535        );
2536        assert_eq!(
2537            same_pos_same_current.current_hash_fragment(),
2538            same_pos_same_current_other_parent.current_hash_fragment()
2539        );
2540        assert_ne!(same_pos_same_current, same_pos_same_current_other_parent);
2541        assert_ne!(
2542            same_pos_same_current.cmp(&same_pos_same_current_other_parent),
2543            std::cmp::Ordering::Equal
2544        );
2545    }
2546
2547    #[test]
2548    fn test_positional_lineage_hash_vec_sort_matches_ord() {
2549        let a = PositionalLineageHash::new(0x30, None, 0);
2550        let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
2551        let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
2552        let mut v = vec![b, a, c];
2553        v.sort();
2554        assert_eq!(v, vec![a, b, c]);
2555    }
2556
2557    #[test]
2558    fn test_positional_lineage_hash_itertools_sorted() {
2559        use itertools::Itertools;
2560
2561        let a = PositionalLineageHash::new(0x30, None, 0);
2562        let b = PositionalLineageHash::new(0x10, Some(0x30), 2);
2563        let c = PositionalLineageHash::new(0x20, Some(0x30), 2);
2564        let sorted: Vec<_> = vec![b, a, c].into_iter().sorted().collect();
2565        assert_eq!(sorted, vec![a, b, c]);
2566    }
2567
2568    // === Tokens From Impls ===
2569
2570    #[test]
2571    fn test_tokens_from_vec_usize() {
2572        let usize_vec: Vec<usize> = vec![1, 2, 3, 4, 5];
2573        let tokens = Tokens::from(usize_vec);
2574
2575        assert_eq!(tokens.as_ref(), &[1u32, 2, 3, 4, 5]);
2576        assert_eq!(tokens.len(), 5);
2577    }
2578
2579    #[test]
2580    fn test_tokens_partial_eq_slice_ref() {
2581        let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2582        let slice: &[Token] = &[1, 2, 3, 4];
2583
2584        // Test PartialEq<&[Token]> for Tokens
2585        assert!(tokens == slice);
2586
2587        let different_slice: &[Token] = &[1, 2, 3, 5];
2588        assert!(tokens != different_slice);
2589    }
2590
2591    // === TokenBlock Accessors ===
2592
2593    #[test]
2594    fn test_token_block_accessors() {
2595        let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2596        let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2597
2598        let block = &seq.blocks()[0];
2599
2600        // Test block_size()
2601        assert_eq!(block.block_size(), 4);
2602
2603        // Test positional_sequence_hash()
2604        let psh = block.positional_sequence_hash();
2605        assert_eq!(psh.position(), 0);
2606
2607        // Test positional_lineage_hash()
2608        let plh = block.positional_lineage_hash();
2609        assert_eq!(plh.position(), 0);
2610        assert_eq!(plh.parent_hash_fragment(), 0); // Root has no parent
2611    }
2612
2613    #[test]
2614    fn test_positional_hash_trait_impls() {
2615        use crate::PositionalHash;
2616
2617        // Test PositionalHash for PositionalSequenceHash
2618        let psh = PositionalSequenceHash::new(0x1234, 42, 0xABCD);
2619        assert_eq!(PositionalHash::position(&psh), 42);
2620
2621        // Test PositionalHash for PositionalLineageHash
2622        let plh = PositionalLineageHash::new(0x1234, None, 99);
2623        assert_eq!(PositionalHash::position(&plh), 99);
2624    }
2625
2626    // === TokenBlockSequence Edge Cases ===
2627
2628    #[test]
2629    fn test_sequence_pop_from_full_block() {
2630        // Test pop when current partial block is empty (must pop from full block)
2631        let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
2632        let mut seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
2633
2634        // Current block should be empty, all tokens in completed blocks
2635        assert!(seq.current_block().is_empty());
2636        assert_eq!(seq.blocks().len(), 2);
2637        assert_eq!(seq.total_tokens(), 8);
2638
2639        // Pop should remove from last full block
2640        let popped = seq.pop();
2641        assert_eq!(popped, Some(8));
2642        assert_eq!(seq.total_tokens(), 7);
2643        assert_eq!(seq.blocks().len(), 1);
2644        assert_eq!(seq.current_block().tokens.as_ref(), &[5, 6, 7]);
2645    }
2646
2647    #[test]
2648    #[allow(clippy::reversed_empty_ranges)] // so we can explicitly test invalid ranges
2649    fn test_sequence_tokens_at_edge_cases() {
2650        let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
2651        let seq = TokenBlockSequence::new(tokens, 4, Some(TEST_SALT_HASH));
2652
2653        // Start > end (invalid range
2654        assert!(seq.tokens_at(3..2).is_empty());
2655
2656        // End > total (out of bounds)
2657        assert!(seq.tokens_at(0..10).is_empty());
2658
2659        // Valid edge case: exact boundaries
2660        assert_eq!(seq.tokens_at(0..4).as_ref(), &[1, 2, 3, 4]);
2661        assert_eq!(seq.tokens_at(4..5).as_ref(), &[5]);
2662    }
2663
2664    #[test]
2665    fn test_sequence_next_block() {
2666        let tokens = Tokens::from(vec![1u32, 2, 3, 4]);
2667        let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2668
2669        let block = &seq.blocks()[0];
2670        let next_partial = block.next_block();
2671
2672        // next_block should create a partial block linked to this block
2673        assert!(next_partial.is_empty());
2674        assert_eq!(next_partial.remaining(), 4);
2675        assert_eq!(
2676            next_partial.parent_sequence_hash,
2677            Some(block.sequence_hash())
2678        );
2679        assert_eq!(next_partial.position, 1);
2680    }
2681
2682    #[test]
2683    fn test_sequence_reset() {
2684        let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9]);
2685        let mut seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2686
2687        assert_eq!(seq.blocks().len(), 2);
2688        assert_eq!(seq.total_tokens(), 9);
2689
2690        seq.reset();
2691
2692        assert!(seq.blocks().is_empty());
2693        assert!(seq.current_block().is_empty());
2694        assert_eq!(seq.total_tokens(), 0);
2695        assert_eq!(seq.current_block().parent_sequence_hash, None);
2696    }
2697
2698    #[test]
2699    fn test_sequence_into_parts() {
2700        let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5]);
2701        let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2702
2703        let (blocks, partial) = seq.into_parts();
2704
2705        assert_eq!(blocks.len(), 1);
2706        assert_eq!(partial.tokens.as_ref(), &[5]);
2707    }
2708
2709    #[test]
2710    fn test_sequence_last_complete_block() {
2711        // Empty sequence
2712        let seq_empty = TokenBlockSequence::new(Tokens::default(), 4, None);
2713        assert!(seq_empty.last_complete_block().is_none());
2714
2715        // With blocks
2716        let tokens = Tokens::from(vec![1u32, 2, 3, 4, 5, 6, 7, 8]);
2717        let seq = TokenBlockSequence::new(tokens, 4, Some(1337));
2718        let last = seq.last_complete_block();
2719        assert!(last.is_some());
2720        assert_eq!(last.unwrap().tokens().as_ref(), &[5, 6, 7, 8]);
2721    }
2722
2723    #[test]
2724    fn test_positional_hashes_msgpack_roundtrip() {
2725        let psh = PositionalSequenceHash::new(0xDEAD_BEEF_CAFE_BABE, 12345, 0x0123_4567_89AB_CDEF);
2726        let bytes = rmp_serde::to_vec(&psh).expect("psh serialize");
2727        let decoded: PositionalSequenceHash =
2728            rmp_serde::from_slice(&bytes).expect("psh deserialize");
2729        assert_eq!(psh, decoded);
2730        assert_eq!(psh.as_u128(), decoded.as_u128());
2731
2732        let plh =
2733            PositionalLineageHash::new(0x1111_2222_3333_4444, Some(0x5555_6666_7777_8888), 256);
2734        let bytes = rmp_serde::to_vec(&plh).expect("plh serialize");
2735        let decoded: PositionalLineageHash =
2736            rmp_serde::from_slice(&bytes).expect("plh deserialize");
2737        assert_eq!(plh, decoded);
2738        assert_eq!(plh.as_u128(), decoded.as_u128());
2739
2740        // Vec roundtrip — exercises the codec inside a container.
2741        let vec = vec![psh, PositionalSequenceHash::default(), psh];
2742        let bytes = rmp_serde::to_vec(&vec).expect("vec serialize");
2743        let decoded: Vec<PositionalSequenceHash> =
2744            rmp_serde::from_slice(&bytes).expect("vec deserialize");
2745        assert_eq!(vec, decoded);
2746    }
2747
2748    #[test]
2749    fn test_positional_hashes_json_roundtrip() {
2750        // Confirm the byte-array codec also roundtrips through JSON (array of u8).
2751        let psh = PositionalSequenceHash::new(0xAAAA_BBBB_CCCC_DDDD, 7, 0xEEEE_FFFF_0000_1111);
2752        let json = serde_json::to_string(&psh).expect("psh json serialize");
2753        let decoded: PositionalSequenceHash =
2754            serde_json::from_str(&json).expect("psh json deserialize");
2755        assert_eq!(psh, decoded);
2756
2757        let plh = PositionalLineageHash::new(0x1234_5678, Some(0xABCD_EF01), 42);
2758        let json = serde_json::to_string(&plh).expect("plh json serialize");
2759        let decoded: PositionalLineageHash =
2760            serde_json::from_str(&json).expect("plh json deserialize");
2761        assert_eq!(plh, decoded);
2762    }
2763}
dynamo_tokens/lib.rs

dynamo_tokens/
lib.rs