Skip to main content

icydb_core/db/cursor/
string.rs

1//! Module: cursor::string
2//! Responsibility: external continuation cursor token string formatting.
3//! Does not own: binary token wire encoding or continuation validation semantics.
4//! Boundary: cursor-owned binary token bytes -> lowercase hex external token text.
5
6#[cfg(test)]
7use crate::db::cursor::{GroupedContinuationToken, TokenWireError};
8use crate::db::{codec::hex::encode_hex_lower, cursor::token::MAX_CURSOR_TOKEN_BYTES};
9
10// External cursor tokens are lowercase hex over binary cursor token bytes, so
11// the string limit must allow the full binary token budget after hex expansion.
12const MAX_CURSOR_TOKEN_HEX_LEN: usize = MAX_CURSOR_TOKEN_BYTES * 2;
13
14///
15/// CursorDecodeError
16///
17/// External continuation cursor string decode failures.
18///
19
20#[derive(Debug, Eq, thiserror::Error, PartialEq)]
21pub enum CursorDecodeError {
22    #[error("cursor token is empty")]
23    Empty,
24
25    #[error("cursor token exceeds max length: {len} hex chars (max {max})")]
26    TooLong { len: usize, max: usize },
27
28    #[error("cursor token must have an even number of hex characters")]
29    OddLength,
30
31    #[error("invalid hex character at position {position}")]
32    InvalidHex { position: usize },
33}
34
35/// Encode raw cursor bytes as a lowercase hex token.
36#[must_use]
37pub fn encode_cursor(bytes: &[u8]) -> String {
38    encode_hex_lower(bytes)
39}
40
41/// Encode one grouped continuation token as an external cursor token string.
42#[cfg(test)]
43pub(in crate::db) fn encode_grouped_cursor_token(
44    token: &GroupedContinuationToken,
45) -> Result<String, TokenWireError> {
46    token
47        .encode()
48        .map(|encoded| encode_cursor(encoded.as_slice()))
49}
50
51/// Decode a lowercase/uppercase hex cursor token into raw bytes.
52///
53/// The token may include surrounding whitespace, which is trimmed.
54pub fn decode_cursor(token: &str) -> Result<Vec<u8>, CursorDecodeError> {
55    // Phase 1: normalize input and enforce envelope-level bounds.
56    let token = token.trim();
57
58    if token.is_empty() {
59        return Err(CursorDecodeError::Empty);
60    }
61
62    if token.len() > MAX_CURSOR_TOKEN_HEX_LEN {
63        return Err(CursorDecodeError::TooLong {
64            len: token.len(),
65            max: MAX_CURSOR_TOKEN_HEX_LEN,
66        });
67    }
68
69    if !token.len().is_multiple_of(2) {
70        return Err(CursorDecodeError::OddLength);
71    }
72
73    // Phase 2: decode validated hex pairs into raw cursor bytes.
74    let mut out = Vec::with_capacity(token.len() / 2);
75    let bytes = token.as_bytes();
76
77    for idx in (0..bytes.len()).step_by(2) {
78        let hi = decode_hex_nibble(bytes[idx])
79            .ok_or(CursorDecodeError::InvalidHex { position: idx + 1 })?;
80
81        let lo = decode_hex_nibble(bytes[idx + 1])
82            .ok_or(CursorDecodeError::InvalidHex { position: idx + 2 })?;
83
84        out.push((hi << 4) | lo);
85    }
86
87    Ok(out)
88}
89
90const fn decode_hex_nibble(byte: u8) -> Option<u8> {
91    match byte {
92        b'0'..=b'9' => Some(byte - b'0'),
93        b'a'..=b'f' => Some(byte - b'a' + 10),
94        b'A'..=b'F' => Some(byte - b'A' + 10),
95        _ => None,
96    }
97}
98
99///
100/// TESTS
101///
102
103#[cfg(test)]
104mod tests {
105    use crate::db::cursor::string::{
106        CursorDecodeError, MAX_CURSOR_TOKEN_HEX_LEN, decode_cursor, encode_cursor,
107    };
108
109    #[test]
110    fn decode_cursor_rejects_empty_and_whitespace_tokens() {
111        let err = decode_cursor("").expect_err("empty token should be rejected");
112        assert_eq!(err, CursorDecodeError::Empty);
113
114        let err = decode_cursor("   \n\t").expect_err("whitespace token should be rejected");
115        assert_eq!(err, CursorDecodeError::Empty);
116    }
117
118    #[test]
119    fn decode_cursor_rejects_odd_length_tokens() {
120        let err = decode_cursor("abc").expect_err("odd-length token should be rejected");
121        assert_eq!(err, CursorDecodeError::OddLength);
122    }
123
124    #[test]
125    fn decode_cursor_enforces_max_token_length() {
126        let accepted = "aa".repeat(MAX_CURSOR_TOKEN_HEX_LEN / 2);
127        let accepted_bytes = decode_cursor(&accepted).expect("max-sized token should decode");
128        assert_eq!(accepted_bytes.len(), MAX_CURSOR_TOKEN_HEX_LEN / 2);
129
130        let rejected = format!("{accepted}aa");
131        let err = decode_cursor(&rejected).expect_err("oversized token should be rejected");
132        assert_eq!(
133            err,
134            CursorDecodeError::TooLong {
135                len: MAX_CURSOR_TOKEN_HEX_LEN + 2,
136                max: MAX_CURSOR_TOKEN_HEX_LEN
137            }
138        );
139    }
140
141    #[test]
142    fn decode_cursor_rejects_invalid_hex_with_position() {
143        let err = decode_cursor("0x").expect_err("invalid hex nibble should be rejected");
144        assert_eq!(err, CursorDecodeError::InvalidHex { position: 2 });
145    }
146
147    #[test]
148    fn decode_cursor_accepts_mixed_case_and_surrounding_whitespace() {
149        let bytes = decode_cursor("  0aFf10  ").expect("mixed-case hex token should decode");
150        assert_eq!(bytes, vec![0x0a, 0xff, 0x10]);
151    }
152
153    #[test]
154    fn encode_decode_cursor_round_trip_is_stable() {
155        let raw = vec![0x00, 0x01, 0x0a, 0xff];
156        let encoded = encode_cursor(&raw);
157        assert_eq!(encoded, "00010aff");
158
159        let decoded = decode_cursor(&encoded).expect("encoded token should decode");
160        assert_eq!(decoded, raw);
161    }
162}