Skip to main content

triblespace_core/inline/encodings/
shortstring.rs

1use crate::inline::Encodes;
2use crate::id::ExclusiveId;
3use crate::id::Id;
4use crate::id_hex;
5use crate::macros::entity;
6use crate::metadata;
7use crate::metadata::MetaDescribe;
8use crate::trible::Fragment;
9use crate::inline::TryFromInline;
10use crate::inline::TryToInline;
11use crate::inline::Inline;
12use crate::inline::InlineEncoding;
13
14use indxvec::Printing;
15use std::str::Utf8Error;
16
17/// An error that occurs when converting a string to a short string.
18/// This error occurs when the string is too long or contains an interior NUL byte.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum FromStrError {
21    /// The string exceeds 32 bytes when encoded as UTF-8.
22    TooLong,
23    /// The string contains a NUL byte, which is used as the terminator.
24    InteriorNul,
25}
26
27/// Errors that can occur when validating a [`ShortString`] value.
28#[derive(Debug)]
29pub enum ValidationError {
30    /// Non-zero bytes appear after the first NUL.
31    InteriorNul,
32    /// The byte sequence before the terminator is not valid UTF-8.
33    Utf8(Utf8Error),
34}
35
36/// A inline encoding for a short string.
37/// A short string is a UTF-8 encoded string with a maximum length of 32 bytes (inclusive)
38/// The string is null-terminated.
39/// If the string is shorter than 32 bytes, the remaining bytes are zero.
40/// If the string is exactly 32 bytes, then there is no zero terminator.
41pub struct ShortString;
42
43impl MetaDescribe for ShortString {
44    fn describe() -> Fragment {
45        let id: Id = id_hex!("2D848DB0AF112DB226A6BF1A3640D019");
46        #[allow(unused_mut)]
47        let mut tribles = entity! {
48            ExclusiveId::force_ref(&id) @
49                metadata::name: "shortstring",
50                metadata::description: "UTF-8 string stored inline in 32 bytes with NUL termination and zero padding. Keeping the bytes inside the value makes the string sortable and queryable without an extra blob lookup.\n\nUse for short labels, enum-like names, and keys that must fit in the value boundary. For longer or variable text, store a LongString blob and reference it with a Handle.\n\nInterior NUL bytes are invalid and the maximum length is 32 bytes. The schema stores raw bytes, so it does not account for grapheme width or display columns.",
51                metadata::tag: metadata::KIND_INLINE_ENCODING,
52        };
53
54        #[cfg(feature = "wasm")]
55        {
56            tribles += entity! { ExclusiveId::force_ref(&id) @
57                metadata::value_formatter: wasm_formatter::SHORTSTRING_WASM,
58            };
59        }
60        tribles
61    }
62}
63
64#[cfg(feature = "wasm")]
65mod wasm_formatter {
66    use core::fmt::Write;
67
68    use triblespace_core_macros::value_formatter;
69
70    #[value_formatter]
71    pub(crate) fn shortstring(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
72        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
73
74        if raw[len..].iter().any(|&b| b != 0) {
75            return Err(2);
76        }
77
78        let text = core::str::from_utf8(&raw[..len]).map_err(|_| 3u32)?;
79        out.write_str(text).map_err(|_| 1u32)?;
80        Ok(())
81    }
82}
83
84impl InlineEncoding for ShortString {
85    type ValidationError = ValidationError;
86    type Encoding = Self;
87
88    fn validate(value: Inline<Self>) -> Result<Inline<Self>, Self::ValidationError> {
89        let raw = &value.raw;
90        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
91        // ensure all bytes after first NUL are zero
92        if raw[len..].iter().any(|&b| b != 0) {
93            return Err(ValidationError::InteriorNul);
94        }
95        std::str::from_utf8(&raw[..len]).map_err(ValidationError::Utf8)?;
96        Ok(value)
97    }
98}
99
100impl<'a> TryFromInline<'a, ShortString> for &'a str {
101    type Error = Utf8Error;
102
103    fn try_from_inline(v: &'a Inline<ShortString>) -> Result<&'a str, Self::Error> {
104        let len = v.raw.iter().position(|&b| b == 0).unwrap_or(v.raw.len());
105        #[cfg(kani)]
106        {
107            // Kani spends significant time unwinding the UTF-8 validation loop.
108            // Bounding `len` to 32 keeps the verifier from exploring unrealistic
109            // larger values, reducing runtime from minutes to seconds.
110            kani::assume(len <= 32);
111        }
112        std::str::from_utf8(&v.raw[..len])
113    }
114}
115
116impl<'a> TryFromInline<'a, ShortString> for String {
117    type Error = Utf8Error;
118
119    fn try_from_inline(v: &Inline<ShortString>) -> Result<Self, Self::Error> {
120        let s: &str = v.try_from_inline()?;
121        Ok(s.to_string())
122    }
123}
124
125impl TryToInline<ShortString> for &str {
126    type Error = FromStrError;
127
128    fn try_to_inline(self) -> Result<Inline<ShortString>, Self::Error> {
129        let bytes = self.as_bytes();
130        if bytes.len() > 32 {
131            return Err(FromStrError::TooLong);
132        }
133        if bytes.contains(&0) {
134            return Err(FromStrError::InteriorNul);
135        }
136
137        let mut data: [u8; 32] = [0; 32];
138        data[..bytes.len()].copy_from_slice(bytes);
139
140        Ok(Inline::new(data))
141    }
142}
143
144impl TryToInline<ShortString> for String {
145    type Error = FromStrError;
146
147    fn try_to_inline(self) -> Result<Inline<ShortString>, Self::Error> {
148        (&self[..]).try_to_inline()
149    }
150}
151
152impl Encodes<&str> for ShortString
153{
154    type Output = Inline<ShortString>;
155    fn encode(source: &str) -> Inline<ShortString> {
156        source.try_to_inline().unwrap()
157    }
158}
159
160impl Encodes<String> for ShortString
161{
162    type Output = Inline<ShortString>;
163    fn encode(source: String) -> Inline<ShortString> {
164        source.try_to_inline().unwrap()
165    }
166}
167
168impl Encodes<&String> for ShortString
169{
170    type Output = Inline<ShortString>;
171    fn encode(source: &String) -> Inline<ShortString> {
172        source.to_str().try_to_inline().unwrap()
173    }
174}