Skip to main content

triblespace_core/value/schemas/
shortstring.rs

1use crate::id::ExclusiveId;
2use crate::id::Id;
3use crate::id_hex;
4use crate::macros::entity;
5use crate::metadata;
6use crate::metadata::ConstMetadata;
7use crate::repo::BlobStore;
8use crate::trible::TribleSet;
9use crate::value::schemas::hash::Blake3;
10use crate::value::FromValue;
11use crate::value::ToValue;
12use crate::value::TryFromValue;
13use crate::value::TryToValue;
14use crate::value::Value;
15use crate::value::ValueSchema;
16
17use indxvec::Printing;
18use std::str::Utf8Error;
19
20#[cfg(feature = "wasm")]
21use crate::blob::schemas::wasmcode::WasmCode;
22/// An error that occurs when converting a string to a short string.
23/// This error occurs when the string is too long or contains an interior NUL byte.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum FromStrError {
26    TooLong,
27    InteriorNul,
28}
29
30/// Errors that can occur when validating a [`ShortString`] value.
31#[derive(Debug)]
32pub enum ValidationError {
33    InteriorNul,
34    Utf8(Utf8Error),
35}
36
37/// A value schema for a short string.
38/// A short string is a UTF-8 encoded string with a maximum length of 32 bytes (inclusive)
39/// The string is null-terminated.
40/// If the string is shorter than 32 bytes, the remaining bytes are zero.
41/// If the string is exactly 32 bytes, then there is no zero terminator.
42pub struct ShortString;
43
44impl ConstMetadata for ShortString {
45    fn id() -> Id {
46        id_hex!("2D848DB0AF112DB226A6BF1A3640D019")
47    }
48
49    fn describe<B>(blobs: &mut B) -> Result<TribleSet, B::PutError>
50    where
51        B: BlobStore<Blake3>,
52    {
53        let id = Self::id();
54        let description = blobs.put(
55            "UTF-8 string stored inline in 32 bytes with NUL termination and zero padding. Keeping the bytes inside the value makes the string sortable and queryable without an extra blob lookup.\n\nUse for short labels, enum-like names, and keys that must fit in the value boundary. For longer or variable text, store a LongString blob and reference it with a Handle.\n\nInterior NUL bytes are invalid and the maximum length is 32 bytes. The schema stores raw bytes, so it does not account for grapheme width or display columns.",
56        )?;
57        let name = blobs.put("shortstring".to_string())?;
58        let tribles = entity! {
59            ExclusiveId::force_ref(&id) @
60                metadata::name: name,
61                metadata::description: description,
62                metadata::tag: metadata::KIND_VALUE_SCHEMA,
63        };
64
65        #[cfg(feature = "wasm")]
66        let tribles = {
67            let mut tribles = tribles;
68            tribles += entity! { ExclusiveId::force_ref(&id) @
69                metadata::value_formatter: blobs.put(wasm_formatter::SHORTSTRING_WASM)?,
70            };
71            tribles
72        };
73        Ok(tribles)
74    }
75}
76
77#[cfg(feature = "wasm")]
78mod wasm_formatter {
79    use core::fmt::Write;
80
81    use triblespace_core_macros::value_formatter;
82
83    #[value_formatter]
84    pub(crate) fn shortstring(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
85        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
86
87        if raw[len..].iter().any(|&b| b != 0) {
88            return Err(2);
89        }
90
91        let text = core::str::from_utf8(&raw[..len]).map_err(|_| 3u32)?;
92        out.write_str(text).map_err(|_| 1u32)?;
93        Ok(())
94    }
95}
96
97impl ValueSchema for ShortString {
98    type ValidationError = ValidationError;
99
100    fn validate(value: Value<Self>) -> Result<Value<Self>, Self::ValidationError> {
101        let raw = &value.raw;
102        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
103        // ensure all bytes after first NUL are zero
104        if raw[len..].iter().any(|&b| b != 0) {
105            return Err(ValidationError::InteriorNul);
106        }
107        std::str::from_utf8(&raw[..len]).map_err(ValidationError::Utf8)?;
108        Ok(value)
109    }
110}
111
112impl<'a> TryFromValue<'a, ShortString> for &'a str {
113    type Error = Utf8Error;
114
115    fn try_from_value(v: &'a Value<ShortString>) -> Result<&'a str, Self::Error> {
116        let len = v.raw.iter().position(|&b| b == 0).unwrap_or(v.raw.len());
117        #[cfg(kani)]
118        {
119            // Kani spends significant time unwinding the UTF-8 validation loop.
120            // Bounding `len` to 32 keeps the verifier from exploring unrealistic
121            // larger values, reducing runtime from minutes to seconds.
122            kani::assume(len <= 32);
123        }
124        std::str::from_utf8(&v.raw[..len])
125    }
126}
127
128impl<'a> TryFromValue<'a, ShortString> for String {
129    type Error = Utf8Error;
130
131    fn try_from_value(v: &Value<ShortString>) -> Result<Self, Self::Error> {
132        let s: &str = v.try_from_value()?;
133        Ok(s.to_string())
134    }
135}
136
137impl<'a> FromValue<'a, ShortString> for &'a str {
138    fn from_value(v: &'a Value<ShortString>) -> Self {
139        v.try_from_value().unwrap()
140    }
141}
142
143impl<'a> FromValue<'a, ShortString> for String {
144    fn from_value(v: &'a Value<ShortString>) -> Self {
145        v.try_from_value().unwrap()
146    }
147}
148
149impl TryToValue<ShortString> for &str {
150    type Error = FromStrError;
151
152    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
153        let bytes = self.as_bytes();
154        if bytes.len() > 32 {
155            return Err(FromStrError::TooLong);
156        }
157        if bytes.contains(&0) {
158            return Err(FromStrError::InteriorNul);
159        }
160
161        let mut data: [u8; 32] = [0; 32];
162        data[..bytes.len()].copy_from_slice(bytes);
163
164        Ok(Value::new(data))
165    }
166}
167
168impl TryToValue<ShortString> for String {
169    type Error = FromStrError;
170
171    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
172        (&self[..]).try_to_value()
173    }
174}
175
176impl ToValue<ShortString> for &str {
177    fn to_value(self) -> Value<ShortString> {
178        self.try_to_value().unwrap()
179    }
180}
181
182impl ToValue<ShortString> for String {
183    fn to_value(self) -> Value<ShortString> {
184        self.try_to_value().unwrap()
185    }
186}
187
188impl ToValue<ShortString> for &String {
189    fn to_value(self) -> Value<ShortString> {
190        self.to_str().try_to_value().unwrap()
191    }
192}