Skip to main content

selene_core/
db_string.rs

1//! Engine-owned database strings backed by shared string storage.
2//!
3//! `DbString` is an owned string newtype used for GQL string values, graph
4//! labels, property keys, aliases, and procedure-name segments. Cloning a
5//! `DbString` shares the same allocation, but construction never consults a
6//! process-global string pool. There is no interning table, specialized
7//! small-string storage, or distinct-string cardinality cap: [`db_string`]
8//! simply constructs an owned [`DbString`] after enforcing the per-string byte
9//! cap (`IL013`).
10//!
11//! The only construction guard is the `IL013` per-string byte limit
12//! ([`MAX_DB_STRING_BYTES`]); a string at or below it constructs an
13//! [`DbString`], a longer one raises [`CoreError::StringTooLong`] (GQLSTATUS
14//! `22G03`).
15
16use std::{borrow::Borrow, fmt, sync::Arc};
17
18use rkyv::{
19    Archive, Deserialize as RkyvDeserialize, Place, Serialize as RkyvSerialize, SerializeUnsized,
20    rancor::{Fallible, Source},
21    string::{ArchivedString, StringResolver},
22};
23use serde::{Deserialize, Deserializer, Serialize, Serializer};
24
25use crate::error::{CoreError, CoreResult};
26
27/// Maximum byte length of a single database string.
28///
29/// Per ISO Annex B `IL013` (2^32 - 1 bytes per inline string). A string at or
30/// below this length may be constructed; a longer one raises
31/// [`CoreError::StringTooLong`] (GQLSTATUS `22G03`), mirroring the `IL015`
32/// constructed-value cardinality enforcement in `PropertyMap`.
33pub const MAX_DB_STRING_BYTES: usize = u32::MAX as usize;
34
35/// True when a string of `byte_len` bytes exceeds the `IL013` inline-string limit.
36const fn string_cap_exceeded(byte_len: usize) -> bool {
37    byte_len > MAX_DB_STRING_BYTES
38}
39
40/// Reject strings whose byte length exceeds the `IL013` inline-string limit.
41fn ensure_within_string_cap(s: &str) -> CoreResult<()> {
42    if string_cap_exceeded(s.len()) {
43        return Err(CoreError::StringTooLong {
44            got: s.len(),
45            max: u32::MAX,
46        });
47    }
48    Ok(())
49}
50
51/// Owned database string.
52///
53/// `DbString` is a shared [`Arc<str>`] newtype. It is owned and `'static` (no
54/// borrow), so the multi-writer committer's
55/// `assert_send_static::<SealedCommit>()` proof holds for free. Ordering is
56/// **lexicographic** through the inner string, so query-visible comparisons and
57/// `BTreeMap`/`BTreeSet` iteration are content-ordered.
58#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
59#[repr(transparent)]
60pub struct DbString(Arc<str>);
61
62/// Construct an owned [`DbString`] from a string slice.
63///
64/// Construction allocates shared string storage guarded only by the `IL013`
65/// per-string byte cap; there is no global pool, specialized small-string
66/// storage, or distinct-string cardinality cap.
67///
68/// # Errors
69///
70/// Returns [`CoreError::StringTooLong`] if `s` exceeds
71/// [`MAX_DB_STRING_BYTES`] (IL013).
72pub fn db_string(s: &str) -> CoreResult<DbString> {
73    ensure_within_string_cap(s)?;
74    Ok(DbString(Arc::from(s)))
75}
76
77impl DbString {
78    /// Construct a [`DbString`] from an owned [`String`].
79    ///
80    /// This preserves the same `IL013` guard as [`db_string`] and moves the
81    /// owned string into shared storage.
82    ///
83    /// # Errors
84    ///
85    /// Returns [`CoreError::StringTooLong`] if `value` exceeds
86    /// [`MAX_DB_STRING_BYTES`] (IL013).
87    pub fn from_string(value: String) -> CoreResult<Self> {
88        ensure_within_string_cap(&value)?;
89        Ok(Self(Arc::from(value)))
90    }
91
92    /// Return this database string as a string slice.
93    #[must_use]
94    pub fn as_str(&self) -> &str {
95        &self.0
96    }
97
98    /// Consume this database string and return an owned [`String`].
99    ///
100    /// The returned string is copied out of the shared storage. Prefer
101    /// [`DbString::as_str`] when a borrowed view is enough.
102    #[must_use]
103    pub fn into_string(self) -> String {
104        self.0.to_string()
105    }
106}
107
108impl AsRef<str> for DbString {
109    fn as_ref(&self) -> &str {
110        self.as_str()
111    }
112}
113
114impl Borrow<str> for DbString {
115    fn borrow(&self) -> &str {
116        self.as_str()
117    }
118}
119
120impl TryFrom<String> for DbString {
121    type Error = CoreError;
122
123    fn try_from(value: String) -> Result<Self, Self::Error> {
124        Self::from_string(value)
125    }
126}
127
128impl TryFrom<&str> for DbString {
129    type Error = CoreError;
130
131    fn try_from(value: &str) -> Result<Self, Self::Error> {
132        db_string(value)
133    }
134}
135
136impl From<DbString> for String {
137    fn from(value: DbString) -> Self {
138        value.into_string()
139    }
140}
141
142impl Archive for DbString {
143    type Archived = ArchivedString;
144    type Resolver = StringResolver;
145
146    fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
147        ArchivedString::resolve_from_str(self.as_str(), resolver, out);
148    }
149}
150
151impl<S> RkyvSerialize<S> for DbString
152where
153    S: Fallible + ?Sized,
154    S::Error: Source,
155    str: SerializeUnsized<S>,
156{
157    fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
158        // Why: archive bytes mirror `String`/`ArchivedString` exactly so
159        // snapshots stay content-addressable and cold-start portable per spec
160        // 04 section 2 / D9.
161        ArchivedString::serialize_from_str(self.as_str(), serializer)
162    }
163}
164
165impl<D> RkyvDeserialize<DbString, D> for ArchivedString
166where
167    D: Fallible + ?Sized,
168    D::Error: Source,
169{
170    fn deserialize(&self, _deserializer: &mut D) -> Result<DbString, D::Error> {
171        // IL013 byte guard is retained on the decode path: an over-length
172        // archived string raises StringTooLong (22G03) via `db_string`.
173        match db_string(self.as_str()) {
174            Ok(value) => Ok(value),
175            Err(error) => {
176                rkyv::rancor::fail!(error);
177            }
178        }
179    }
180}
181
182impl fmt::Display for DbString {
183    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184        f.write_str(self.as_str())
185    }
186}
187
188impl Serialize for DbString {
189    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
190    where
191        S: Serializer,
192    {
193        // Byte-identical to `String`: emit the string content via
194        // `serialize_str`.
195        serializer.serialize_str(self.as_str())
196    }
197}
198
199impl<'de> Deserialize<'de> for DbString {
200    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
201    where
202        D: Deserializer<'de>,
203    {
204        // IL013 byte guard is retained on the decode path via `DbString::from_string`.
205        let value = String::deserialize(deserializer)?;
206        DbString::from_string(value).map_err(serde::de::Error::custom)
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn db_string_round_trip() {
216        let key = db_string("alpha").expect("DB string construction succeeds");
217        assert_eq!(key.as_str(), "alpha");
218        assert_eq!(key.to_string(), "alpha");
219    }
220
221    #[test]
222    fn same_string_constructs_equal_value() {
223        assert_eq!(db_string("same").unwrap(), db_string("same").unwrap());
224    }
225
226    #[test]
227    fn cloned_string_shares_storage() {
228        let value = db_string("shared-storage").unwrap();
229        let cloned = value.clone();
230        assert_eq!(value, cloned);
231        assert!(std::ptr::eq(
232            value.as_str().as_ptr(),
233            cloned.as_str().as_ptr()
234        ));
235    }
236
237    #[test]
238    fn separate_construction_is_not_interning() {
239        let left = db_string("not-interned").unwrap();
240        let right = db_string("not-interned").unwrap();
241        assert_eq!(left, right);
242        assert!(!std::ptr::eq(
243            left.as_str().as_ptr(),
244            right.as_str().as_ptr()
245        ));
246    }
247
248    #[test]
249    fn owned_string_constructs_without_changing_content() {
250        let source = String::from("owned-alpha");
251        let value = DbString::from_string(source).expect("owned DB string construction succeeds");
252        assert_eq!(value.as_str(), "owned-alpha");
253    }
254
255    #[test]
256    fn db_string_exposes_standard_string_traits() {
257        let value = DbString::from_string(String::from("borrowed-view")).unwrap();
258        assert_eq!(AsRef::<str>::as_ref(&value), "borrowed-view");
259        assert_eq!(Borrow::<str>::borrow(&value), "borrowed-view");
260        let round_tripped: String = value.into();
261        assert_eq!(round_tripped, "borrowed-view");
262    }
263
264    #[test]
265    fn distinct_strings_construct_distinct_values() {
266        assert_ne!(db_string("left").unwrap(), db_string("right").unwrap());
267    }
268
269    #[test]
270    fn empty_and_unicode_strings_construct() {
271        assert_eq!(db_string("").unwrap().as_str(), "");
272        assert_eq!(
273            db_string("\u{03bb} graph").unwrap().as_str(),
274            "\u{03bb} graph"
275        );
276    }
277
278    #[test]
279    fn db_string_is_arc_str_sized() {
280        // `Arc<str>` is a fat pointer: pointer + length.
281        assert_eq!(std::mem::size_of::<DbString>(), 16);
282    }
283
284    #[test]
285    fn db_string_ord_is_lexicographic() {
286        let aaa = db_string("aaa").unwrap();
287        let zzz = db_string("zzz").unwrap();
288        assert!(aaa < zzz);
289        assert_eq!(aaa.cmp(&zzz), aaa.as_str().cmp(zzz.as_str()));
290    }
291
292    #[test]
293    fn string_cap_boundary_is_il013_byte_limit() {
294        // CORE-12: IL013 enforces 2^32 - 1 bytes per inline string. A 4 GiB
295        // allocation is infeasible in a test, so exercise the length predicate
296        // at the exact boundary.
297        assert_eq!(MAX_DB_STRING_BYTES, u32::MAX as usize);
298        assert!(!string_cap_exceeded(MAX_DB_STRING_BYTES));
299        assert!(!string_cap_exceeded(MAX_DB_STRING_BYTES - 1));
300        assert!(string_cap_exceeded(MAX_DB_STRING_BYTES + 1));
301    }
302
303    #[test]
304    fn over_length_string_raises_string_too_long_with_22g03() {
305        // CORE-12: the producer maps an over-length string to StringTooLong /
306        // GQLSTATUS 22G03, mirroring IL015's ConstructedValueTooLarge.
307        let err = ensure_within_string_cap_for_len(MAX_DB_STRING_BYTES + 1)
308            .expect_err("over-length string is rejected");
309        assert!(matches!(
310            err,
311            CoreError::StringTooLong {
312                max,
313                ..
314            } if max == u32::MAX
315        ));
316        assert_eq!(err.gqlstatus(), "22G03");
317    }
318
319    #[test]
320    fn within_length_string_constructs_normally() {
321        // CORE-12: a sub-cap string still constructs and round-trips.
322        let key = format!("core-12-within-cap-{}", std::process::id());
323        let value = db_string(&key).expect("within-cap string fits DB string cap");
324        assert_eq!(value.as_str(), key);
325    }
326
327    /// Test-only shim exercising the byte-cap producer at a synthetic length
328    /// without allocating the multi-gigabyte string the real boundary needs.
329    fn ensure_within_string_cap_for_len(byte_len: usize) -> CoreResult<()> {
330        if string_cap_exceeded(byte_len) {
331            Err(CoreError::StringTooLong {
332                got: byte_len,
333                max: u32::MAX,
334            })
335        } else {
336            Ok(())
337        }
338    }
339
340    #[test]
341    fn rkyv_archives_resolved_string() {
342        // Wire-stability guard: the newtype archives its string content as an
343        // ArchivedString rather than exposing the in-memory Arc layout.
344        let key = db_string("db_string.rkyv.portable").unwrap();
345        let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&key).unwrap();
346        let archived =
347            rkyv::access::<rkyv::Archived<DbString>, rkyv::rancor::Error>(&bytes).unwrap();
348        assert_eq!(archived.as_str(), "db_string.rkyv.portable");
349    }
350
351    #[test]
352    fn rkyv_round_trip_preserves_string() {
353        // Wire-stability guard: round-trip through rkyv preserves content and
354        // equality.
355        let key = db_string("db_string.rkyv.round_trip").unwrap();
356        let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&key).unwrap();
357        let decoded: DbString = rkyv::from_bytes::<DbString, rkyv::rancor::Error>(&bytes).unwrap();
358        assert_eq!(decoded.as_str(), "db_string.rkyv.round_trip");
359        assert_eq!(decoded, key);
360    }
361}