Skip to main content

triblespace_core/value/schemas/
shortstring.rs

1use crate::id::ExclusiveId;
2use crate::id::Id;
3use crate::id_hex;
4use crate::macros::entity;
5use crate::metadata;
6use crate::metadata::{ConstDescribe, ConstId};
7use crate::repo::BlobStore;
8use crate::trible::Fragment;
9use crate::value::schemas::hash::Blake3;
10use crate::value::ToValue;
11use crate::value::TryFromValue;
12use crate::value::TryToValue;
13use crate::value::Value;
14use crate::value::ValueSchema;
15
16use indxvec::Printing;
17use std::str::Utf8Error;
18
19/// An error that occurs when converting a string to a short string.
20/// This error occurs when the string is too long or contains an interior NUL byte.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FromStrError {
23    TooLong,
24    InteriorNul,
25}
26
27/// Errors that can occur when validating a [`ShortString`] value.
28#[derive(Debug)]
29pub enum ValidationError {
30    InteriorNul,
31    Utf8(Utf8Error),
32}
33
34/// A value schema for a short string.
35/// A short string is a UTF-8 encoded string with a maximum length of 32 bytes (inclusive)
36/// The string is null-terminated.
37/// If the string is shorter than 32 bytes, the remaining bytes are zero.
38/// If the string is exactly 32 bytes, then there is no zero terminator.
39pub struct ShortString;
40
41impl ConstId for ShortString {
42    const ID: Id = id_hex!("2D848DB0AF112DB226A6BF1A3640D019");
43}
44
45impl ConstDescribe for ShortString {
46    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
47    where
48        B: BlobStore<Blake3>,
49    {
50        let id = Self::ID;
51        let description = blobs.put(
52            "UTF-8 string stored inline in 32 bytes with NUL termination and zero padding. Keeping the bytes inside the value makes the string sortable and queryable without an extra blob lookup.\n\nUse for short labels, enum-like names, and keys that must fit in the value boundary. For longer or variable text, store a LongString blob and reference it with a Handle.\n\nInterior NUL bytes are invalid and the maximum length is 32 bytes. The schema stores raw bytes, so it does not account for grapheme width or display columns.",
53        )?;
54        let name = blobs.put("shortstring")?;
55        let tribles = entity! {
56            ExclusiveId::force_ref(&id) @
57                metadata::name: name,
58                metadata::description: description,
59                metadata::tag: metadata::KIND_VALUE_SCHEMA,
60        };
61
62        #[cfg(feature = "wasm")]
63        let tribles = {
64            let mut tribles = tribles;
65            tribles += entity! { ExclusiveId::force_ref(&id) @
66                metadata::value_formatter: blobs.put(wasm_formatter::SHORTSTRING_WASM)?,
67            };
68            tribles
69        };
70        Ok(tribles)
71    }
72}
73
74#[cfg(feature = "wasm")]
75mod wasm_formatter {
76    use core::fmt::Write;
77
78    use triblespace_core_macros::value_formatter;
79
80    #[value_formatter]
81    pub(crate) fn shortstring(raw: &[u8; 32], out: &mut impl Write) -> Result<(), u32> {
82        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
83
84        if raw[len..].iter().any(|&b| b != 0) {
85            return Err(2);
86        }
87
88        let text = core::str::from_utf8(&raw[..len]).map_err(|_| 3u32)?;
89        out.write_str(text).map_err(|_| 1u32)?;
90        Ok(())
91    }
92}
93
94impl ValueSchema for ShortString {
95    type ValidationError = ValidationError;
96
97    fn validate(value: Value<Self>) -> Result<Value<Self>, Self::ValidationError> {
98        let raw = &value.raw;
99        let len = raw.iter().position(|&b| b == 0).unwrap_or(raw.len());
100        // ensure all bytes after first NUL are zero
101        if raw[len..].iter().any(|&b| b != 0) {
102            return Err(ValidationError::InteriorNul);
103        }
104        std::str::from_utf8(&raw[..len]).map_err(ValidationError::Utf8)?;
105        Ok(value)
106    }
107}
108
109impl<'a> TryFromValue<'a, ShortString> for &'a str {
110    type Error = Utf8Error;
111
112    fn try_from_value(v: &'a Value<ShortString>) -> Result<&'a str, Self::Error> {
113        let len = v.raw.iter().position(|&b| b == 0).unwrap_or(v.raw.len());
114        #[cfg(kani)]
115        {
116            // Kani spends significant time unwinding the UTF-8 validation loop.
117            // Bounding `len` to 32 keeps the verifier from exploring unrealistic
118            // larger values, reducing runtime from minutes to seconds.
119            kani::assume(len <= 32);
120        }
121        std::str::from_utf8(&v.raw[..len])
122    }
123}
124
125impl<'a> TryFromValue<'a, ShortString> for String {
126    type Error = Utf8Error;
127
128    fn try_from_value(v: &Value<ShortString>) -> Result<Self, Self::Error> {
129        let s: &str = v.try_from_value()?;
130        Ok(s.to_string())
131    }
132}
133
134impl TryToValue<ShortString> for &str {
135    type Error = FromStrError;
136
137    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
138        let bytes = self.as_bytes();
139        if bytes.len() > 32 {
140            return Err(FromStrError::TooLong);
141        }
142        if bytes.contains(&0) {
143            return Err(FromStrError::InteriorNul);
144        }
145
146        let mut data: [u8; 32] = [0; 32];
147        data[..bytes.len()].copy_from_slice(bytes);
148
149        Ok(Value::new(data))
150    }
151}
152
153impl TryToValue<ShortString> for String {
154    type Error = FromStrError;
155
156    fn try_to_value(self) -> Result<Value<ShortString>, Self::Error> {
157        (&self[..]).try_to_value()
158    }
159}
160
161impl ToValue<ShortString> for &str {
162    fn to_value(self) -> Value<ShortString> {
163        self.try_to_value().unwrap()
164    }
165}
166
167impl ToValue<ShortString> for String {
168    fn to_value(self) -> Value<ShortString> {
169        self.try_to_value().unwrap()
170    }
171}
172
173impl ToValue<ShortString> for &String {
174    fn to_value(self) -> Value<ShortString> {
175        self.to_str().try_to_value().unwrap()
176    }
177}