Skip to main content

miden_standards/utils/
string.rs

1//! Fixed-width UTF-8 string stored as N Words (7 bytes/felt, length-prefixed).
2//!
3//! [`FixedWidthString<N>`] is the generic building block for encoding arbitrary UTF-8 strings into
4//! a fixed number of storage words. `N` must be at most 9; with N=9 the capacity is 9×4×7−1 = 251
5//! bytes, which is the maximum that fits in the u8 length prefix (leaving 251 bytes for payload).
6//! The maximum storable string length is therefore **251 bytes** (when N=9).
7//!
8//! ## Buffer layout (N × 4 × 7 bytes)
9//!
10//! ```text
11//! Byte 0:          string length (u8)
12//! Bytes 1..1+len:  UTF-8 content
13//! Remaining:       zero-padded
14//! ```
15//!
16//! Each 7-byte chunk is stored as a little-endian `u64` with the high byte always zero, so the
17//! value is always < 2^56 and fits safely in a Goldilocks field element.
18
19use alloc::boxed::Box;
20use alloc::string::String;
21use alloc::vec::Vec;
22
23use miden_protocol::{Felt, WORD_SIZE, Word};
24
25// ENCODING CONSTANT
26// ================================================================================================
27
28/// Number of data bytes packed into each felt (7 bytes = 56 bits, always < Goldilocks prime).
29const BYTES_PER_FELT: usize = 7;
30
31// FIXED-WIDTH STRING
32// ================================================================================================
33
34/// A UTF-8 string stored in exactly `N` Words (N×4 felts, 7 bytes/felt, length-prefixed).
35///
36/// `N` must be at most 9. With N=9 the maximum storable string length is **251 bytes** (the
37/// full buffer is 252 bytes, one of which is consumed by the length prefix). Higher-level wrapper
38/// types may impose a tighter limit.
39///
40/// Using N=10 (or larger) fails at compile time:
41///
42/// ```compile_fail
43/// # use miden_standards::utils::string::FixedWidthString;
44/// let _ = FixedWidthString::<10>::CAPACITY; // assertion failed: N <= 9
45/// ```
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct FixedWidthString<const N: usize>(Box<str>);
48
49impl<const N: usize> Default for FixedWidthString<N> {
50    fn default() -> Self {
51        Self("".into())
52    }
53}
54
55/// Maximum storable string length (one byte is used for the length prefix).
56const MAX_PAYLOAD_BYTES: usize = 251;
57
58impl<const N: usize> FixedWidthString<N> {
59    /// Compile-time check: N must be at most 9 so that CAPACITY ≤ 251 and the length
60    /// fits in the u8 prefix. (Referenced by CAPACITY so the assert is always evaluated.)
61    const _CAPACITY_FITS_LENGTH_PREFIX: () = assert!(N <= 9);
62
63    /// Maximum bytes that can be stored (full capacity of the N words minus the length byte).
64    /// Never exceeds 251 because the length is encoded in a single u8 (bytes 0..=251).
65    pub const CAPACITY: usize =
66        N * 4 * BYTES_PER_FELT - 1 + (Self::_CAPACITY_FITS_LENGTH_PREFIX, 0).1;
67
68    /// Creates a [`FixedWidthString`] from a UTF-8 string, validating it fits within capacity.
69    pub fn new(value: &str) -> Result<Self, FixedWidthStringError> {
70        if value.len() > Self::CAPACITY {
71            return Err(FixedWidthStringError::TooLong {
72                actual: value.len(),
73                max: Self::CAPACITY,
74            });
75        }
76        Ok(Self(value.into()))
77    }
78
79    /// Returns the string content.
80    pub fn as_str(&self) -> &str {
81        &self.0
82    }
83
84    /// Encodes the string into `N` Words (7 bytes/felt, length-prefixed, zero-padded).
85    pub fn to_words(&self) -> Vec<Word> {
86        let n_felts = N * WORD_SIZE;
87        let buf_len = n_felts * BYTES_PER_FELT;
88        let bytes = self.0.as_bytes();
89        debug_assert!(bytes.len() < buf_len);
90
91        let mut buf = alloc::vec![0u8; buf_len];
92        buf[0] = bytes.len() as u8;
93        buf[1..1 + bytes.len()].copy_from_slice(bytes);
94
95        (0..N)
96            .map(|word_idx| {
97                let felts: [Felt; 4] = core::array::from_fn(|felt_idx| {
98                    let start = (word_idx * 4 + felt_idx) * BYTES_PER_FELT;
99                    let mut le_bytes = [0u8; 8];
100                    le_bytes[..BYTES_PER_FELT].copy_from_slice(&buf[start..start + BYTES_PER_FELT]);
101                    Felt::try_from(u64::from_le_bytes(le_bytes))
102                        .expect("7-byte LE value always fits in a Goldilocks felt")
103                });
104                Word::from(felts)
105            })
106            .collect()
107    }
108
109    /// Decodes a [`FixedWidthString`] from a slice of exactly `N` Words.
110    pub fn try_from_words(words: &[Word]) -> Result<Self, FixedWidthStringError> {
111        if words.len() != N {
112            return Err(FixedWidthStringError::InvalidLength { expected: N, got: words.len() });
113        }
114        let n_felts = N * WORD_SIZE;
115        let buf_len = n_felts * BYTES_PER_FELT;
116        let mut buf = alloc::vec![0u8; buf_len];
117
118        for (word_idx, word) in words.iter().enumerate() {
119            for (felt_idx, felt) in word.as_slice().iter().enumerate() {
120                let felt_value = felt.as_canonical_u64();
121                let le_bytes = felt_value.to_le_bytes();
122                if le_bytes[BYTES_PER_FELT] != 0 {
123                    return Err(FixedWidthStringError::InvalidPadding);
124                }
125                let start = (word_idx * 4 + felt_idx) * BYTES_PER_FELT;
126                buf[start..start + BYTES_PER_FELT].copy_from_slice(&le_bytes[..BYTES_PER_FELT]);
127            }
128        }
129
130        let len = buf[0] as usize;
131        if len > MAX_PAYLOAD_BYTES {
132            return Err(FixedWidthStringError::InvalidLengthPrefix);
133        }
134        if len + 1 > buf_len {
135            return Err(FixedWidthStringError::InvalidLengthPrefix);
136        }
137        String::from_utf8(buf[1..1 + len].to_vec())
138            .map_err(FixedWidthStringError::InvalidUtf8)
139            .map(|s| Self(s.into()))
140    }
141}
142
143// ERROR TYPE
144// ================================================================================================
145
146/// Error type for [`FixedWidthString`] construction and decoding.
147#[derive(Debug, Clone, thiserror::Error)]
148pub enum FixedWidthStringError {
149    /// String exceeds the maximum capacity for this word width.
150    #[error("string must be at most {max} bytes, got {actual}")]
151    TooLong { actual: usize, max: usize },
152    /// Decoded bytes are not valid UTF-8.
153    #[error("string is not valid UTF-8")]
154    InvalidUtf8(#[source] alloc::string::FromUtf8Error),
155    /// A felt's high byte (byte index 7 in LE) is non-zero, violating the 7-bytes-per-felt
156    /// invariant.
157    #[error("felt high byte is non-zero (invalid padding)")]
158    InvalidPadding,
159    /// The length prefix byte claims more bytes than the buffer can hold, or the length is >= 252.
160    #[error("length prefix is invalid or exceeds buffer capacity")]
161    InvalidLengthPrefix,
162    /// Slice length does not match the expected word count.
163    #[error("expected {expected} words, got {got}")]
164    InvalidLength { expected: usize, got: usize },
165}
166
167// TESTS
168// ================================================================================================
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn empty_string_roundtrip() {
176        let s: FixedWidthString<2> = FixedWidthString::new("").unwrap();
177        let words = s.to_words();
178        assert_eq!(words.len(), 2);
179        let decoded = FixedWidthString::<2>::try_from_words(&words).unwrap();
180        assert_eq!(decoded.as_str(), "");
181    }
182
183    #[test]
184    fn ascii_roundtrip_2_words() {
185        let s = FixedWidthString::<2>::new("hello").unwrap();
186        let decoded = FixedWidthString::<2>::try_from_words(&s.to_words()).unwrap();
187        assert_eq!(decoded.as_str(), "hello");
188    }
189
190    #[test]
191    fn ascii_roundtrip_7_words() {
192        let text = "A longer description that spans many felts";
193        let s = FixedWidthString::<7>::new(text).unwrap();
194        let decoded = FixedWidthString::<7>::try_from_words(&s.to_words()).unwrap();
195        assert_eq!(decoded.as_str(), text);
196    }
197
198    #[test]
199    fn utf8_multibyte_roundtrip() {
200        // "café" — contains a 2-byte UTF-8 sequence
201        let s = FixedWidthString::<2>::new("café").unwrap();
202        let decoded = FixedWidthString::<2>::try_from_words(&s.to_words()).unwrap();
203        assert_eq!(decoded.as_str(), "café");
204    }
205
206    #[test]
207    fn exactly_at_capacity_accepted() {
208        let cap = FixedWidthString::<2>::CAPACITY; // 2*4*7 - 1 = 55
209        let s = "a".repeat(cap);
210        assert!(FixedWidthString::<2>::new(&s).is_ok());
211    }
212
213    #[test]
214    fn one_over_capacity_rejected() {
215        let cap = FixedWidthString::<2>::CAPACITY;
216        let s = "a".repeat(cap + 1);
217        assert!(matches!(
218            FixedWidthString::<2>::new(&s),
219            Err(FixedWidthStringError::TooLong { .. })
220        ));
221    }
222
223    #[test]
224    fn capacity_7_words() {
225        // 7*4*7 - 1 = 195
226        assert_eq!(FixedWidthString::<7>::CAPACITY, 195);
227        let s = "b".repeat(195);
228        let fw = FixedWidthString::<7>::new(&s).unwrap();
229        let decoded = FixedWidthString::<7>::try_from_words(&fw.to_words()).unwrap();
230        assert_eq!(decoded.as_str(), s);
231    }
232
233    #[test]
234    fn capacity_9_words_is_max() {
235        // Max N is 9: 9*4*7 - 1 = 251 (one byte for length prefix).
236        assert_eq!(FixedWidthString::<9>::CAPACITY, 251);
237        let s = "x".repeat(251);
238        let fw = FixedWidthString::<9>::new(&s).unwrap();
239        let decoded = FixedWidthString::<9>::try_from_words(&fw.to_words()).unwrap();
240        assert_eq!(decoded.as_str(), s);
241    }
242
243    #[test]
244    #[allow(clippy::assertions_on_constants)]
245    fn n10_would_exceed_length_prefix() {
246        // N=10 would give 10*4*7 - 1 = 279 > 251, so it is disallowed. CAPACITY is defined so
247        // that it depends on _CAPACITY_FITS_LENGTH_PREFIX; therefore any use of
248        // FixedWidthString::<10> (e.g. CAPACITY) fails at compile time with "assertion failed: N <=
249        // 9". That compile-time failure is also tested by the `compile_fail` doctest in the
250        // doc comment above (on [`FixedWidthString`]).
251        assert!(10 * 4 * BYTES_PER_FELT - 1 > MAX_PAYLOAD_BYTES);
252    }
253
254    #[test]
255    fn to_words_returns_correct_count() {
256        let s = FixedWidthString::<7>::new("test").unwrap();
257        assert_eq!(s.to_words().len(), 7);
258    }
259
260    #[test]
261    fn wrong_word_count_returns_error() {
262        let s = FixedWidthString::<2>::new("hi").unwrap();
263        let words = s.to_words();
264        // pass only 1 word instead of 2
265        assert!(matches!(
266            FixedWidthString::<2>::try_from_words(&words[..1]),
267            Err(FixedWidthStringError::InvalidLength { expected: 2, got: 1 })
268        ));
269    }
270
271    #[test]
272    fn length_prefix_overflow_returns_invalid_length_prefix() {
273        // The length byte (first byte of first felt) is set to 0xFF, which exceeds the buffer
274        // and triggers InvalidLengthPrefix. (This is the low byte of the felt, not the high byte.)
275        let overflow_len = Felt::try_from(0xff_u64).unwrap();
276        let words = [
277            Word::from([overflow_len, Felt::ZERO, Felt::ZERO, Felt::ZERO]),
278            Word::from([Felt::ZERO, Felt::ZERO, Felt::ZERO, Felt::ZERO]),
279        ];
280        assert!(matches!(
281            FixedWidthString::<2>::try_from_words(&words),
282            Err(FixedWidthStringError::InvalidLengthPrefix)
283        ));
284    }
285
286    #[test]
287    fn felt_with_high_byte_set_returns_invalid_padding() {
288        // Construct words where one felt has its 8th byte (LE index 7) non-zero, violating the
289        // 7-bytes-per-felt invariant. Bit 63 set gives a valid Felt but invalid length/padding.
290        let high_byte_non_zero = Felt::try_from(2u64.pow(63)).unwrap();
291        let words = [
292            Word::from([Felt::ZERO, high_byte_non_zero, Felt::ZERO, Felt::ZERO]),
293            Word::from([Felt::ZERO, Felt::ZERO, Felt::ZERO, Felt::ZERO]),
294        ];
295        assert!(matches!(
296            FixedWidthString::<2>::try_from_words(&words),
297            Err(FixedWidthStringError::InvalidPadding)
298        ));
299    }
300
301    #[test]
302    fn non_utf8_bytes_return_invalid_utf8() {
303        // Encode raw bytes that are not valid UTF-8 (e.g. 0xFF byte in content).
304        // Length byte = 1, content byte = 0xFF (invalid UTF-8 start byte).
305        // Pack into first felt: LE bytes [1, 0xFF, 0, 0, 0, 0, 0] → u64 = 0x0000_0000_0000_ff01
306        let raw: u64 = 0x0000_0000_0000_ff01;
307        let bad_felt = Felt::try_from(raw).unwrap();
308        let words = [
309            Word::from([bad_felt, Felt::ZERO, Felt::ZERO, Felt::ZERO]),
310            Word::from([Felt::ZERO, Felt::ZERO, Felt::ZERO, Felt::ZERO]),
311        ];
312        assert!(matches!(
313            FixedWidthString::<2>::try_from_words(&words),
314            Err(FixedWidthStringError::InvalidUtf8(_))
315        ));
316    }
317
318    #[test]
319    fn default_is_empty_string() {
320        let s: FixedWidthString<2> = FixedWidthString::default();
321        assert_eq!(s.as_str(), "");
322    }
323
324    #[test]
325    fn empty_string_encodes_to_7_empty_words() {
326        // An empty FixedWidthString encodes to all-zero words because the length prefix is 0
327        // and the rest of the buffer is zero-padded. This property is relied upon by
328        // `TokenMetadata::storage_slots` to encode absent optional fields as empty word slices.
329        let s = FixedWidthString::<7>::new("").unwrap();
330        let words = s.to_words();
331        assert_eq!(words.len(), 7);
332        for word in &words {
333            assert_eq!(*word, Word::default());
334        }
335    }
336
337    #[test]
338    fn empty_string_encodes_to_9_empty_words() {
339        let s = FixedWidthString::<9>::new("").unwrap();
340        let words = s.to_words();
341        assert_eq!(words.len(), 9);
342        for word in &words {
343            assert_eq!(*word, Word::default());
344        }
345    }
346}