Skip to main content

triblespace_core/value/schemas/
shortstring.rs

1use crate::id::ExclusiveId;
2use crate::id::Id;
3use crate::id_hex;
4use crate::macros::entity;
5use crate::metadata;
6use crate::metadata::{ConstDescribe, ConstId};
7use crate::repo::BlobStore;
8use crate::trible::Fragment;
9use crate::value::schemas::hash::Blake3;
10use crate::value::ToValue;
11use crate::value::TryFromValue;
12use crate::value::TryToValue;
13use crate::value::Value;
14use crate::value::ValueSchema;
15
16use indxvec::Printing;
17use std::str::Utf8Error;
18
19/// An error that occurs when converting a string to a short string.
20/// This error occurs when the string is too long or contains an interior NUL byte.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FromStrError {
23    /// The string exceeds 32 bytes when encoded as UTF-8.
24    TooLong,
25    /// The string contains a NUL byte, which is used as the terminator.
26    InteriorNul,
27}
28
29/// Errors that can occur when validating a [`ShortString`] value.
30#[derive(Debug)]
31pub enum ValidationError {
32    /// Non-zero bytes appear after the first NUL.
33    InteriorNul,
34    /// The byte sequence before the terminator is not valid UTF-8.
35    Utf8(Utf8Error),
36}
37
38/// A value schema for a short string.
39/// A short string is a UTF-8 encoded string with a maximum length of 32 bytes (inclusive)
40/// The string is null-terminated.
41/// If the string is shorter than 32 bytes, the remaining bytes are zero.
42/// If the string is exactly 32 bytes, then there is no zero terminator.
43pub struct ShortString;
44
45impl ConstId for ShortString {
46    const ID: Id = id_hex!("2D848DB0AF112DB226A6BF1A3640D019");
47}
48
49impl ConstDescribe for ShortString {
50    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
51    where
52        B: BlobStore<Blake3>,
53    {
54        let id = Self::ID;
55        let description = blobs.put(
56            "UTF-8 string stored inline in 32 bytes with NUL termination and zero padding. Keeping the bytes inside the value makes the string sortable and queryable without an extra blob lookup.\n\nUse for short labels, enum-like names, and keys that must fit in the value boundary. For longer or variable text, store a LongString blob and reference it with a Handle.\n\nInterior NUL bytes are invalid and the maximum length is 32 bytes. The schema stores raw bytes, so it does not account for grapheme width or display columns.",
57        )?;
58        let name = blobs.put("shortstring")?;
59        let tribles = entity! {
60            ExclusiveId::force_ref(&id) @
61                metadata::name: name,
62                metadata::description: description,
63                metadata::tag: metadata::KIND_VALUE_SCHEMA,
64        };
65
66        #[cfg(feature = "wasm")]
67        let tribles = {
68            let mut tribles = tribles;
69            tribles += entity! { ExclusiveId::force_ref(&id) @
70                metadata::value_formatter: blobs.put(wasm_formatter::SHORTSTRING_WASM)?,
71            };
72            tribles
73        };
74        Ok(tribles)
75    }
76}
77
78#[cfg(feature = "wasm")]
79mod wasm_formatter {
80    use core::fmt::Write;
81
82    use triblespace_core_macros::value_formatter;
83
84    #[value_formatter]
85    pub(crate) fn shortstring(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
86        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
87
88        if raw[len..].iter().any(|&b| b != 0) {
89            return Err(2);
90        }
91
92        let text = core::str::from_utf8(&raw[..len]).map_err(|_| 3u32)?;
93        out.write_str(text).map_err(|_| 1u32)?;
94        Ok(())
95    }
96}
97
98impl ValueSchema for ShortString {
99    type ValidationError = ValidationError;
100
101    fn validate(value: Value<Self>) -> Result<Value<Self>, Self::ValidationError> {
102        let raw = &value.raw;
103        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
104        // ensure all bytes after first NUL are zero
105        if raw[len..].iter().any(|&b| b != 0) {
106            return Err(ValidationError::InteriorNul);
107        }
108        std::str::from_utf8(&raw[..len]).map_err(ValidationError::Utf8)?;
109        Ok(value)
110    }
111}
112
113impl<'a> TryFromValue<'a, ShortString> for &'a str {
114    type Error = Utf8Error;
115
116    fn try_from_value(v: &'a Value<ShortString>) -> Result<&'a str, Self::Error> {
117        let len = v.raw.iter().position(|&b| b == 0).unwrap_or(v.raw.len());
118        #[cfg(kani)]
119        {
120            // Kani spends significant time unwinding the UTF-8 validation loop.
121            // Bounding `len` to 32 keeps the verifier from exploring unrealistic
122            // larger values, reducing runtime from minutes to seconds.
123            kani::assume(len <= 32);
124        }
125        std::str::from_utf8(&v.raw[..len])
126    }
127}
128
129impl<'a> TryFromValue<'a, ShortString> for String {
130    type Error = Utf8Error;
131
132    fn try_from_value(v: &Value<ShortString>) -> Result<Self, Self::Error> {
133        let s: &str = v.try_from_value()?;
134        Ok(s.to_string())
135    }
136}
137
138impl TryToValue<ShortString> for &str {
139    type Error = FromStrError;
140
141    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
142        let bytes = self.as_bytes();
143        if bytes.len() > 32 {
144            return Err(FromStrError::TooLong);
145        }
146        if bytes.contains(&0) {
147            return Err(FromStrError::InteriorNul);
148        }
149
150        let mut data: [u8; 32] = [0; 32];
151        data[..bytes.len()].copy_from_slice(bytes);
152
153        Ok(Value::new(data))
154    }
155}
156
157impl TryToValue<ShortString> for String {
158    type Error = FromStrError;
159
160    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
161        (&self[..]).try_to_value()
162    }
163}
164
165impl ToValue<ShortString> for &str {
166    fn to_value(self) -> Value<ShortString> {
167        self.try_to_value().unwrap()
168    }
169}
170
171impl ToValue<ShortString> for String {
172    fn to_value(self) -> Value<ShortString> {
173        self.try_to_value().unwrap()
174    }
175}
176
177impl ToValue<ShortString> for &String {
178    fn to_value(self) -> Value<ShortString> {
179        self.to_str().try_to_value().unwrap()
180    }
181}