Skip to main content

cqlite_core/storage/sstable/bti/
node.rs

1//! BTI trie node implementations
2//!
3//! This module defines the trie node structures and operations for the BTI format.
4//! Each node type is optimized for different branching patterns and storage efficiency.
5
6use crate::error::{Error, Result};
7use std::fmt;
8
9/// BTI-specific result type
10pub type BtiResult<T> = Result<T>;
11
12/// BTI-specific error types
13#[derive(Debug, Clone)]
14pub enum BtiError {
15    /// Parse error with details
16    Parse(String),
17    /// Invalid node structure
18    InvalidNodeStructure(String),
19    /// Navigation error during trie traversal
20    NavigationError(String),
21    /// Invalid node type
22    InvalidNodeType(u8),
23    /// Maximum depth exceeded
24    MaxDepthExceeded(usize),
25    /// Invalid byte-comparable key
26    InvalidByteComparableKey(String),
27    /// Corrupted trie structure
28    CorruptedTrie(String),
29    /// Missing component file
30    MissingComponent(String),
31}
32
33impl fmt::Display for BtiError {
34    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35        match self {
36            BtiError::Parse(msg) => write!(f, "BTI parse error: {}", msg),
37            BtiError::InvalidNodeStructure(msg) => write!(f, "Invalid BTI node structure: {}", msg),
38            BtiError::NavigationError(msg) => write!(f, "BTI navigation error: {}", msg),
39            BtiError::InvalidNodeType(node_type) => {
40                write!(f, "Invalid BTI trie node type: 0x{:02X}", node_type)
41            }
42            BtiError::MaxDepthExceeded(depth) => {
43                write!(f, "BTI trie depth exceeded maximum: {}", depth)
44            }
45            BtiError::InvalidByteComparableKey(key) => {
46                write!(f, "Invalid byte-comparable key: {}", key)
47            }
48            BtiError::CorruptedTrie(msg) => {
49                write!(f, "Corrupted BTI trie structure: {}", msg)
50            }
51            BtiError::MissingComponent(component) => {
52                write!(f, "Missing BTI component: {}", component)
53            }
54        }
55    }
56}
57
58impl std::error::Error for BtiError {}
59
60impl From<BtiError> for Error {
61    fn from(err: BtiError) -> Self {
62        Error::Parse(format!("BTI error: {}", err))
63    }
64}
65
66/// BTI node types corresponding to the four trie node variants
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum BtiNodeType {
69    /// Payload-only node (leaf)
70    PayloadOnly,
71    /// Single child node
72    Single,
73    /// Sparse node with few children
74    Sparse,
75    /// Dense node with many consecutive children
76    Dense,
77}
78
79impl BtiNodeType {
80    /// Get expected children range for this node type
81    pub fn expected_children_range(&self) -> (usize, Option<usize>) {
82        match self {
83            BtiNodeType::PayloadOnly => (0, Some(0)),
84            BtiNodeType::Single => (1, Some(1)),
85            BtiNodeType::Sparse => (2, Some(256)), // Reasonable upper bound
86            BtiNodeType::Dense => (1, Some(256)),  // Full byte range
87        }
88    }
89}
90
91impl fmt::Display for BtiNodeType {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        match self {
94            BtiNodeType::PayloadOnly => write!(f, "PayloadOnly"),
95            BtiNodeType::Single => write!(f, "Single"),
96            BtiNodeType::Sparse => write!(f, "Sparse"),
97            BtiNodeType::Dense => write!(f, "Dense"),
98        }
99    }
100}
101
102/// Sized pointer for encoding distances between parent and child nodes
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct SizedPointer {
105    /// Distance from current position to target
106    pub distance: u64,
107    /// Size of the pointer encoding (1, 2, 4, or 8 bytes)
108    pub size: u8,
109}
110
111impl SizedPointer {
112    /// Create a new sized pointer
113    pub fn new(distance: u64) -> Self {
114        let size = if distance <= 0xFF {
115            1
116        } else if distance <= 0xFFFF {
117            2
118        } else if distance <= 0xFFFF_FFFF {
119            4
120        } else {
121            8
122        };
123
124        Self { distance, size }
125    }
126
127    /// Encode the pointer to bytes
128    pub fn to_bytes(&self) -> Vec<u8> {
129        match self.size {
130            1 => vec![self.distance as u8],
131            2 => (self.distance as u16).to_be_bytes().to_vec(),
132            4 => (self.distance as u32).to_be_bytes().to_vec(),
133            8 => self.distance.to_be_bytes().to_vec(),
134            _ => panic!("Invalid pointer size: {}", self.size),
135        }
136    }
137
138    /// Decode pointer from bytes
139    pub fn from_bytes(data: &[u8], size: u8) -> BtiResult<Self> {
140        let distance = match size {
141            1 if !data.is_empty() => data[0] as u64,
142            2 if data.len() >= 2 => u16::from_be_bytes([data[0], data[1]]) as u64,
143            4 if data.len() >= 4 => u32::from_be_bytes([data[0], data[1], data[2], data[3]]) as u64,
144            8 if data.len() >= 8 => u64::from_be_bytes([
145                data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
146            ]),
147            _ => {
148                return Err(BtiError::Parse(format!(
149                    "Invalid pointer size {} or insufficient data",
150                    size
151                ))
152                .into());
153            }
154        };
155
156        Ok(Self { distance, size })
157    }
158}
159
160/// Trie node transition representing a path to a child node
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct Transition {
163    /// The byte value for this transition
164    pub byte: u8,
165    /// Pointer to the child node
166    pub child: SizedPointer,
167}
168
169impl Transition {
170    pub fn new(byte: u8, child: SizedPointer) -> Self {
171        Self { byte, child }
172    }
173}
174
175/// Payload reference for leaf nodes
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct PayloadRef {
178    /// Offset to the payload data
179    pub offset: u64,
180    /// Length of the payload data
181    pub length: u32,
182    /// Optional checksum for validation
183    pub checksum: Option<u32>,
184}
185
186impl PayloadRef {
187    pub fn new(offset: u64, length: u32) -> Self {
188        Self {
189            offset,
190            length,
191            checksum: None,
192        }
193    }
194
195    pub fn with_checksum(mut self, checksum: u32) -> Self {
196        self.checksum = Some(checksum);
197        self
198    }
199}
200
201/// Base trie node structure
202#[derive(Debug, Clone)]
203pub struct BtiNode {
204    /// Type of this node
205    pub node_type: BtiNodeType,
206    /// Level in the trie (0 = leaf level)
207    pub level: u16,
208    /// Key prefix stored at this node (for optimization)
209    pub key_prefix: Vec<u8>,
210    /// Node-specific data
211    pub data: BtiNodeData,
212}
213
214/// Node-specific data based on node type
215#[derive(Debug, Clone)]
216pub enum BtiNodeData {
217    /// Payload-only node (leaf)
218    PayloadOnly { payload: PayloadRef },
219
220    /// Single child node
221    Single { transition: Transition },
222
223    /// Sparse node with few children
224    Sparse { transitions: Vec<Transition> },
225
226    /// Dense node with many consecutive children
227    Dense {
228        /// Starting byte value for the consecutive range
229        start_byte: u8,
230        /// Child pointers for the consecutive range
231        children: Vec<SizedPointer>,
232    },
233}
234
235impl BtiNode {
236    /// Create a payload-only node
237    pub fn payload_only(level: u16, key_prefix: Vec<u8>, payload: PayloadRef) -> Self {
238        Self {
239            node_type: BtiNodeType::PayloadOnly,
240            level,
241            key_prefix,
242            data: BtiNodeData::PayloadOnly { payload },
243        }
244    }
245
246    /// Create a single child node
247    pub fn single(level: u16, key_prefix: Vec<u8>, transition: Transition) -> Self {
248        Self {
249            node_type: BtiNodeType::Single,
250            level,
251            key_prefix,
252            data: BtiNodeData::Single { transition },
253        }
254    }
255
256    /// Create a sparse node
257    pub fn sparse(level: u16, key_prefix: Vec<u8>, mut transitions: Vec<Transition>) -> Self {
258        // Ensure transitions are sorted by byte value for binary search
259        transitions.sort_by_key(|t| t.byte);
260
261        Self {
262            node_type: BtiNodeType::Sparse,
263            level,
264            key_prefix,
265            data: BtiNodeData::Sparse { transitions },
266        }
267    }
268
269    /// Create a dense node
270    pub fn dense(
271        level: u16,
272        key_prefix: Vec<u8>,
273        start_byte: u8,
274        children: Vec<SizedPointer>,
275    ) -> Self {
276        Self {
277            node_type: BtiNodeType::Dense,
278            level,
279            key_prefix,
280            data: BtiNodeData::Dense {
281                start_byte,
282                children,
283            },
284        }
285    }
286
287    /// Find the child node pointer for a given byte
288    pub fn find_child(&self, byte: u8) -> Option<&SizedPointer> {
289        match &self.data {
290            BtiNodeData::PayloadOnly { .. } => None,
291
292            BtiNodeData::Single { transition } => {
293                if transition.byte == byte {
294                    Some(&transition.child)
295                } else {
296                    None
297                }
298            }
299
300            BtiNodeData::Sparse { transitions } => {
301                // Binary search on sorted transitions
302                transitions
303                    .binary_search_by_key(&byte, |t| t.byte)
304                    .ok()
305                    .map(|idx| &transitions[idx].child)
306            }
307
308            BtiNodeData::Dense {
309                start_byte,
310                children,
311            } => {
312                if byte >= *start_byte && (byte as usize) < (*start_byte as usize + children.len())
313                {
314                    let index = byte as usize - *start_byte as usize;
315                    children.get(index)
316                } else {
317                    None
318                }
319            }
320        }
321    }
322
323    /// Get all child transitions
324    pub fn get_transitions(&self) -> Vec<&Transition> {
325        match &self.data {
326            BtiNodeData::PayloadOnly { .. } => Vec::new(),
327            BtiNodeData::Single { transition } => vec![transition],
328            BtiNodeData::Sparse { transitions } => transitions.iter().collect(),
329            BtiNodeData::Dense {
330                start_byte: _,
331                children: _,
332            } => {
333                // Convert dense representation to transitions
334                // Note: This creates temporary Transition objects
335                // In practice, you'd want to avoid this allocation
336                Vec::new() // Simplified for this example
337            }
338        }
339    }
340
341    /// Get the payload reference if this is a leaf node
342    pub fn get_payload(&self) -> Option<&PayloadRef> {
343        match &self.data {
344            BtiNodeData::PayloadOnly { payload } => Some(payload),
345            _ => None,
346        }
347    }
348
349    /// Check if this node is a leaf (has payload)
350    pub fn is_leaf(&self) -> bool {
351        matches!(self.data, BtiNodeData::PayloadOnly { .. })
352    }
353
354    /// Get the number of children
355    pub fn child_count(&self) -> usize {
356        match &self.data {
357            BtiNodeData::PayloadOnly { .. } => 0,
358            BtiNodeData::Single { .. } => 1,
359            BtiNodeData::Sparse { transitions } => transitions.len(),
360            BtiNodeData::Dense { children, .. } => children.len(),
361        }
362    }
363
364    /// Validate node structure consistency
365    pub fn validate(&self) -> BtiResult<()> {
366        let expected_range = self.node_type.expected_children_range();
367        let child_count = self.child_count();
368
369        // Check child count is within expected range
370        if child_count < expected_range.0 {
371            return Err(BtiError::InvalidNodeStructure(format!(
372                "Node type {} has {} children, expected at least {}",
373                self.node_type, child_count, expected_range.0
374            ))
375            .into());
376        }
377
378        if let Some(max) = expected_range.1 {
379            if child_count > max {
380                return Err(BtiError::InvalidNodeStructure(format!(
381                    "Node type {} has {} children, expected at most {}",
382                    self.node_type, child_count, max
383                ))
384                .into());
385            }
386        }
387
388        // Type-specific validation
389        match &self.data {
390            BtiNodeData::Sparse { transitions } => {
391                // Check that transitions are sorted
392                for window in transitions.windows(2) {
393                    if window[0].byte >= window[1].byte {
394                        return Err(BtiError::InvalidNodeStructure(
395                            "Sparse node transitions not sorted".to_string(),
396                        )
397                        .into());
398                    }
399                }
400            }
401
402            BtiNodeData::Dense {
403                start_byte,
404                children,
405            } => {
406                // Check that we don't overflow byte range
407                let end_byte = *start_byte as usize + children.len();
408                if end_byte > 256 {
409                    return Err(BtiError::InvalidNodeStructure(
410                        "Dense node range overflows byte values".to_string(),
411                    )
412                    .into());
413                }
414            }
415
416            _ => {} // Other types don't need special validation
417        }
418
419        Ok(())
420    }
421}
422
423/// Trie navigation context for tracking path through the trie
424#[derive(Debug, Clone)]
425pub struct TrieNavigator {
426    /// Current position in the file
427    pub current_offset: u64,
428    /// Path taken through the trie (for debugging/backtracking)
429    pub path: Vec<u8>,
430    /// Nodes visited (for cycle detection)
431    pub visited_offsets: std::collections::HashSet<u64>,
432}
433
434impl TrieNavigator {
435    /// Create a new navigator at the root
436    pub fn new(root_offset: u64) -> Self {
437        Self {
438            current_offset: root_offset,
439            path: Vec::new(),
440            visited_offsets: std::collections::HashSet::new(),
441        }
442    }
443
444    /// Navigate to a child node
445    pub fn navigate_to_child(&mut self, byte: u8, child_pointer: &SizedPointer) -> BtiResult<()> {
446        let target_offset = self.current_offset + child_pointer.distance;
447
448        // Check for cycles
449        if self.visited_offsets.contains(&target_offset) {
450            return Err(
451                BtiError::NavigationError("Cycle detected in trie navigation".to_string()).into(),
452            );
453        }
454
455        self.visited_offsets.insert(self.current_offset);
456        self.current_offset = target_offset;
457        self.path.push(byte);
458
459        Ok(())
460    }
461
462    /// Get the current path as a key prefix
463    pub fn current_path(&self) -> &[u8] {
464        &self.path
465    }
466
467    /// Reset to navigate from root again
468    pub fn reset(&mut self, root_offset: u64) {
469        self.current_offset = root_offset;
470        self.path.clear();
471        self.visited_offsets.clear();
472    }
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478
479    #[test]
480    fn test_sized_pointer() {
481        let small = SizedPointer::new(100);
482        assert_eq!(small.size, 1);
483        assert_eq!(small.to_bytes(), vec![100]);
484
485        let large = SizedPointer::new(0x10000);
486        assert_eq!(large.size, 4);
487        assert_eq!(large.to_bytes(), vec![0x00, 0x01, 0x00, 0x00]);
488    }
489
490    #[test]
491    fn test_node_creation() {
492        let payload = PayloadRef::new(1000, 50);
493        let node = BtiNode::payload_only(0, b"test".to_vec(), payload);
494
495        assert_eq!(node.node_type, BtiNodeType::PayloadOnly);
496        assert_eq!(node.level, 0);
497        assert_eq!(node.key_prefix, b"test");
498        assert!(node.is_leaf());
499        assert_eq!(node.child_count(), 0);
500    }
501
502    #[test]
503    fn test_sparse_node_search() {
504        let transitions = vec![
505            Transition::new(b'a', SizedPointer::new(100)),
506            Transition::new(b'm', SizedPointer::new(200)),
507            Transition::new(b'z', SizedPointer::new(300)),
508        ];
509
510        let node = BtiNode::sparse(1, Vec::new(), transitions);
511
512        assert!(node.find_child(b'a').is_some());
513        assert!(node.find_child(b'm').is_some());
514        assert!(node.find_child(b'z').is_some());
515        assert!(node.find_child(b'b').is_none());
516
517        assert_eq!(node.child_count(), 3);
518    }
519
520    #[test]
521    fn test_dense_node_lookup() {
522        let children = vec![
523            SizedPointer::new(100),
524            SizedPointer::new(200),
525            SizedPointer::new(300),
526        ];
527
528        let node = BtiNode::dense(1, Vec::new(), b'a', children);
529
530        assert!(node.find_child(b'a').is_some());
531        assert!(node.find_child(b'b').is_some());
532        assert!(node.find_child(b'c').is_some());
533        assert!(node.find_child(b'd').is_none());
534        assert!(node.find_child(b'@').is_none()); // Before range
535    }
536
537    #[test]
538    fn test_node_validation() {
539        // Valid payload-only node
540        let payload_node = BtiNode::payload_only(0, Vec::new(), PayloadRef::new(0, 10));
541        assert!(payload_node.validate().is_ok());
542
543        // Invalid sparse node (not enough children)
544        let _invalid_sparse = BtiNode::sparse(
545            1,
546            Vec::new(),
547            vec![Transition::new(b'a', SizedPointer::new(100))],
548        );
549        // Note: This would be invalid in practice but our implementation
550        // doesn't enforce minimum children for sparse nodes in this test
551    }
552
553    #[test]
554    fn test_trie_navigator() {
555        let mut nav = TrieNavigator::new(1000);
556        assert_eq!(nav.current_offset, 1000);
557        assert_eq!(nav.current_path(), &[] as &[u8]);
558
559        let pointer = SizedPointer::new(100);
560        nav.navigate_to_child(b'a', &pointer).unwrap();
561
562        assert_eq!(nav.current_offset, 1100);
563        assert_eq!(nav.current_path(), b"a");
564    }
565}