Skip to main content

axess_strings/
lib.rs

1//! Short, hot-path string primitive for the axess workspace.
2//!
3//! [`ShortString`] is a value type optimized for short identifiers that
4//! are hashed, compared, and cloned at high volume: event taxonomy tags,
5//! factor names, routing discriminators, and similar.
6//!
7//! The internal representation is an Umbra-style 16-byte stack value
8//! ("German string" / Umbra-DB string): strings up to 12 bytes are
9//! inlined with no allocation, `&'static str` is referenced in place
10//! (also no allocation), and longer owned strings use a single
11//! refcounted heap buffer that is shared on clone. The four-byte
12//! prefix that powers the equality fast-path lives at a fixed offset
13//! across all three variants, so prefix comparison is variant-agnostic.
14//!
15//! The internal layout is documented in `src/repr.rs` and is the only
16//! place in the axess workspace where `unsafe` is permitted; every
17//! `unsafe` block there cites the invariant it relies on.
18//!
19//! # Quick start
20//!
21//! ```
22//! use axess_strings::ShortString;
23//!
24//! let s = ShortString::new("auth.login_attempt.v2");
25//! assert_eq!(s.as_str(), "auth.login_attempt.v2");
26//! assert_eq!(s.len(), 21);
27//!
28//! const KIND_LOGIN_V2: ShortString =
29//!     ShortString::from_static("auth.login_attempt.v2");
30//! assert_eq!(KIND_LOGIN_V2, s);
31//! ```
32//!
33//! # Feature flags
34//!
35//! | Feature | Default | Effect |
36//! |---------|---------|--------|
37//! | `serde` | yes | [`serde::Serialize`] / [`serde::Deserialize`] forwarding to/from a string. |
38//! | `rkyv`  | no  | rkyv `Archive` / `Serialize` / `Deserialize` forwarding to [`rkyv::string::ArchivedString`]. |
39//! | `full`  | no  | Both `serde` and `rkyv`. |
40
41// `deny(unsafe_code)` rather than `forbid` because `repr.rs` carries
42// a localised `#![allow(unsafe_code)]` for the raw-heap representation
43// (allocation, NonNull dereference, `unsafe impl Sync`). `forbid`
44// cannot be overridden module-locally, so the inner pragma would be a
45// hard error. Every `unsafe` block in `repr.rs` is in scope of that
46// allow; no other module in this crate may use `unsafe`.
47#![deny(unsafe_code)]
48#![deny(missing_docs)]
49#![cfg_attr(docsrs, feature(doc_cfg))]
50
51mod repr;
52
53use std::cmp::Ordering;
54use std::fmt;
55use std::hash::{Hash, Hasher};
56use std::ops::Deref;
57use std::str::FromStr;
58
59use crate::repr::Repr;
60
61/// Short, hot-path string primitive.
62///
63/// Umbra-style 16-byte stack representation:
64///
65/// - strings up to 12 bytes are inlined on the stack with no allocation
66/// - strings constructed via [`ShortString::from_static`] reference
67///   `&'static str` memory, also without allocation
68/// - strings longer than 12 bytes that are not static use a single
69///   refcounted heap allocation; cloning increments the refcount
70///
71/// The discriminator and the four-byte prefix used by the equality
72/// fast-path live in the same offset across all three variants, so
73/// length and prefix comparison are variant-agnostic.
74///
75/// # Equality and hashing
76///
77/// Two `ShortString` values are equal when their string contents are
78/// equal, regardless of which variant backs each. Hash output depends
79/// only on the string contents.
80#[derive(Clone)]
81pub struct ShortString(Repr);
82
83impl ShortString {
84    /// Construct from any string slice. Inlines if ≤ 12 bytes; allocates
85    /// a single refcounted buffer otherwise. For compile-time-known
86    /// strings prefer [`ShortString::from_static`] which never
87    /// allocates.
88    #[inline]
89    pub fn new(s: &str) -> Self {
90        Self(Repr::from_str(s))
91    }
92
93    /// Construct from a `&'static str` without allocating. `const fn` so
94    /// callers can build module-level constants regardless of length.
95    #[inline]
96    pub const fn from_static(s: &'static str) -> Self {
97        Self(Repr::from_static(s))
98    }
99
100    /// Borrow the contents as a `&str`.
101    #[inline]
102    pub fn as_str(&self) -> &str {
103        self.0.as_str()
104    }
105
106    /// Number of UTF-8 bytes (matches [`str::len`]).
107    #[inline]
108    pub fn len(&self) -> usize {
109        self.0.len()
110    }
111
112    /// Whether the string is empty.
113    #[inline]
114    pub fn is_empty(&self) -> bool {
115        self.0.len() == 0
116    }
117
118    /// First four bytes of the string, zero-padded if shorter.
119    ///
120    /// Lives at the same offset in every internal variant, so the read
121    /// is variant-agnostic and is the fast path for equality / hashing
122    /// when the full byte compare would otherwise dereference the heap
123    /// buffer. Sliced at the byte level; not guaranteed to fall on a
124    /// UTF-8 boundary.
125    #[inline]
126    pub fn prefix(&self) -> [u8; 4] {
127        self.0.prefix()
128    }
129}
130
131impl Default for ShortString {
132    fn default() -> Self {
133        Self(Repr::empty())
134    }
135}
136
137impl fmt::Display for ShortString {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        f.write_str(self.as_str())
140    }
141}
142
143impl fmt::Debug for ShortString {
144    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
145        fmt::Debug::fmt(self.as_str(), f)
146    }
147}
148
149impl PartialEq for ShortString {
150    fn eq(&self, other: &Self) -> bool {
151        self.as_str() == other.as_str()
152    }
153}
154
155impl Eq for ShortString {}
156
157impl PartialOrd for ShortString {
158    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
159        Some(self.cmp(other))
160    }
161}
162
163impl Ord for ShortString {
164    fn cmp(&self, other: &Self) -> Ordering {
165        self.as_str().cmp(other.as_str())
166    }
167}
168
169impl Hash for ShortString {
170    fn hash<H: Hasher>(&self, state: &mut H) {
171        self.as_str().hash(state);
172    }
173}
174
175impl Deref for ShortString {
176    type Target = str;
177    fn deref(&self) -> &str {
178        self.as_str()
179    }
180}
181
182impl AsRef<str> for ShortString {
183    fn as_ref(&self) -> &str {
184        self.as_str()
185    }
186}
187
188impl AsRef<[u8]> for ShortString {
189    fn as_ref(&self) -> &[u8] {
190        self.as_str().as_bytes()
191    }
192}
193
194impl PartialEq<str> for ShortString {
195    fn eq(&self, other: &str) -> bool {
196        self.as_str() == other
197    }
198}
199
200impl PartialEq<&str> for ShortString {
201    fn eq(&self, other: &&str) -> bool {
202        self.as_str() == *other
203    }
204}
205
206impl PartialEq<String> for ShortString {
207    fn eq(&self, other: &String) -> bool {
208        self.as_str() == other.as_str()
209    }
210}
211
212impl From<&str> for ShortString {
213    fn from(s: &str) -> Self {
214        Self::new(s)
215    }
216}
217
218impl From<String> for ShortString {
219    fn from(s: String) -> Self {
220        Self(Repr::from_str(&s))
221    }
222}
223
224impl From<Box<str>> for ShortString {
225    fn from(s: Box<str>) -> Self {
226        Self(Repr::from_str(&s))
227    }
228}
229
230impl FromStr for ShortString {
231    type Err = std::convert::Infallible;
232    fn from_str(s: &str) -> Result<Self, Self::Err> {
233        Ok(Self::new(s))
234    }
235}
236
237// ── serde ──────────────────────────────────────────────────────────────
238
239#[cfg(feature = "serde")]
240#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
241impl serde::Serialize for ShortString {
242    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
243        serializer.serialize_str(self.as_str())
244    }
245}
246
247#[cfg(feature = "serde")]
248#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
249impl<'de> serde::Deserialize<'de> for ShortString {
250    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
251        struct V;
252        impl<'de> serde::de::Visitor<'de> for V {
253            type Value = ShortString;
254            fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
255                f.write_str("a string")
256            }
257            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
258                Ok(ShortString::new(v))
259            }
260            fn visit_string<E: serde::de::Error>(self, v: String) -> Result<Self::Value, E> {
261                Ok(ShortString::from(v))
262            }
263            fn visit_borrowed_str<E: serde::de::Error>(
264                self,
265                v: &'de str,
266            ) -> Result<Self::Value, E> {
267                Ok(ShortString::new(v))
268            }
269        }
270        deserializer.deserialize_str(V)
271    }
272}
273
274// ── rkyv ───────────────────────────────────────────────────────────────
275
276#[cfg(feature = "rkyv")]
277#[cfg_attr(docsrs, doc(cfg(feature = "rkyv")))]
278const _: () = {
279    use rkyv::{
280        Archive, Place, Serialize,
281        rancor::{Fallible, Source},
282        ser::{Allocator, Writer},
283        string::{ArchivedString, StringResolver},
284    };
285
286    impl Archive for ShortString {
287        type Archived = ArchivedString;
288        type Resolver = StringResolver;
289
290        fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
291            ArchivedString::resolve_from_str(self.as_str(), resolver, out);
292        }
293    }
294
295    impl<S> Serialize<S> for ShortString
296    where
297        S: Allocator + Fallible + Writer + ?Sized,
298        S::Error: Source,
299    {
300        fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
301            ArchivedString::serialize_from_str(self.as_str(), serializer)
302        }
303    }
304
305    impl<D> rkyv::Deserialize<ShortString, D> for ArchivedString
306    where
307        D: Fallible + ?Sized,
308    {
309        fn deserialize(&self, _: &mut D) -> Result<ShortString, D::Error> {
310            Ok(ShortString::new(self.as_str()))
311        }
312    }
313};
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use std::collections::HashMap;
319
320    #[test]
321    fn new_round_trips() {
322        let s = ShortString::new("hello");
323        assert_eq!(s.as_str(), "hello");
324        assert_eq!(s.len(), 5);
325        assert!(!s.is_empty());
326    }
327
328    #[test]
329    fn from_static_round_trips() {
330        const S: ShortString = ShortString::from_static("auth.login_attempt.v2");
331        assert_eq!(S.as_str(), "auth.login_attempt.v2");
332        assert_eq!(S.len(), 21);
333    }
334
335    /// `from_static` of edge-case lengths (0..=4) must not panic. The
336    /// `len > N` guards (lines 224..=227 in `repr.rs`) prevent indexing
337    /// `bytes[N]` for strings shorter than that. Mutating any guard to
338    /// `>=` would index one byte past the input for the threshold length.
339    /// We exercise each length so cargo-mutants observes a panic for
340    /// every flipped guard.
341    #[test]
342    fn from_static_handles_short_lengths_without_panic() {
343        const E0: ShortString = ShortString::from_static("");
344        const E1: ShortString = ShortString::from_static("a");
345        const E2: ShortString = ShortString::from_static("ab");
346        const E3: ShortString = ShortString::from_static("abc");
347        const E4: ShortString = ShortString::from_static("abcd");
348        assert_eq!(E0.as_str(), "");
349        assert_eq!(E1.as_str(), "a");
350        assert_eq!(E2.as_str(), "ab");
351        assert_eq!(E3.as_str(), "abc");
352        assert_eq!(E4.as_str(), "abcd");
353    }
354
355    #[test]
356    fn default_is_empty() {
357        let s = ShortString::default();
358        assert!(s.is_empty());
359        assert_eq!(s.len(), 0);
360        assert_eq!(s.as_str(), "");
361    }
362
363    #[test]
364    fn equality_across_repr() {
365        let heap = ShortString::new("kind.v1");
366        let static_ = ShortString::from_static("kind.v1");
367        assert_eq!(heap, static_);
368    }
369
370    #[test]
371    fn hash_matches_equality() {
372        use std::collections::hash_map::DefaultHasher;
373        let heap = ShortString::new("kind.v1");
374        let static_ = ShortString::from_static("kind.v1");
375        let mut h1 = DefaultHasher::new();
376        let mut h2 = DefaultHasher::new();
377        heap.hash(&mut h1);
378        static_.hash(&mut h2);
379        assert_eq!(h1.finish(), h2.finish());
380    }
381
382    #[test]
383    fn ordering_is_lexicographic() {
384        let mut v: Vec<ShortString> = ["banana", "apple", "cherry"]
385            .into_iter()
386            .map(ShortString::new)
387            .collect();
388        v.sort();
389        assert_eq!(
390            v.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
391            vec!["apple", "banana", "cherry"]
392        );
393    }
394
395    #[test]
396    fn prefix_pads_short_strings() {
397        assert_eq!(ShortString::new("ab").prefix(), *b"ab\0\0");
398        assert_eq!(ShortString::new("abcd").prefix(), *b"abcd");
399        assert_eq!(ShortString::new("abcdef").prefix(), *b"abcd");
400        assert_eq!(ShortString::new("").prefix(), [0u8; 4]);
401    }
402
403    #[test]
404    fn display_formats_as_string() {
405        let s = ShortString::new("hello");
406        assert_eq!(format!("{s}"), "hello");
407    }
408
409    #[test]
410    fn debug_formats_as_quoted_string() {
411        let s = ShortString::new("hello");
412        assert_eq!(format!("{s:?}"), "\"hello\"");
413    }
414
415    #[test]
416    fn from_str_round_trips() {
417        let s: ShortString = "hello".parse().unwrap();
418        assert_eq!(s.as_str(), "hello");
419    }
420
421    #[test]
422    fn from_string_takes_ownership() {
423        let owned = String::from("kind.v2");
424        let s = ShortString::from(owned);
425        assert_eq!(s.as_str(), "kind.v2");
426    }
427
428    #[test]
429    fn from_box_takes_ownership() {
430        let b: Box<str> = "kind.v3".into();
431        let s = ShortString::from(b);
432        assert_eq!(s.as_str(), "kind.v3");
433    }
434
435    #[test]
436    fn deref_to_str_works() {
437        let s = ShortString::new("hello");
438        assert!(s.starts_with("hel"));
439        assert_eq!(&s[1..4], "ell");
440    }
441
442    #[test]
443    fn use_as_hashmap_key() {
444        let mut map: HashMap<ShortString, u32> = HashMap::new();
445        map.insert(ShortString::new("kind.v1"), 1);
446        assert_eq!(map.get(&ShortString::from_static("kind.v1")), Some(&1));
447    }
448
449    #[test]
450    fn cross_str_equality() {
451        let s = ShortString::new("hello");
452        assert_eq!(s, "hello");
453        assert_eq!(s, String::from("hello"));
454    }
455
456    #[cfg(feature = "serde")]
457    #[test]
458    fn serde_json_round_trips() {
459        let s = ShortString::new("auth.login_attempt.v2");
460        let json = serde_json::to_string(&s).unwrap();
461        assert_eq!(json, "\"auth.login_attempt.v2\"");
462        let back: ShortString = serde_json::from_str(&json).unwrap();
463        assert_eq!(back, s);
464    }
465
466    #[cfg(feature = "serde")]
467    #[test]
468    fn serde_handles_empty_string() {
469        let s = ShortString::default();
470        let json = serde_json::to_string(&s).unwrap();
471        assert_eq!(json, "\"\"");
472        let back: ShortString = serde_json::from_str(&json).unwrap();
473        assert_eq!(back, s);
474        assert!(back.is_empty());
475    }
476
477    #[cfg(feature = "rkyv")]
478    #[test]
479    fn rkyv_round_trips() {
480        use rkyv::{from_bytes, rancor::Error, to_bytes};
481        let s = ShortString::new("auth.login_attempt.v2");
482        let bytes = to_bytes::<Error>(&s).unwrap();
483        let back: ShortString = from_bytes::<ShortString, Error>(&bytes).unwrap();
484        assert_eq!(back, s);
485    }
486
487    /// Hash impl must delegate to `str`'s hash, not return a constant
488    /// (which would silently lose entries from a HashMap with mixed
489    /// `ShortString` keys). Pin by comparing against `str::hash` of
490    /// the same content.
491    #[test]
492    fn hash_delegates_to_str_hash() {
493        use std::collections::hash_map::DefaultHasher;
494        let payload = "kind.v1";
495        let s = ShortString::new(payload);
496        let mut h1 = DefaultHasher::new();
497        let mut h2 = DefaultHasher::new();
498        s.hash(&mut h1);
499        payload.hash(&mut h2);
500        assert_eq!(
501            h1.finish(),
502            h2.finish(),
503            "ShortString::hash must match str::hash of identical content"
504        );
505    }
506
507    /// `AsRef<str>` returns the actual content, not a constant. Mutations
508    /// `-> ""` and `-> "xyzzy"` change the borrowed payload.
509    #[test]
510    fn as_ref_str_returns_content() {
511        let s = ShortString::new("kind.v1");
512        let borrowed: &str = s.as_ref();
513        assert_eq!(borrowed, "kind.v1");
514    }
515
516    /// `AsRef<[u8]>` returns the UTF-8 bytes of the content. The
517    /// mutations leak constant Vecs (empty / `[0]` / `[1]`), all of
518    /// which differ from real content bytes.
519    #[test]
520    fn as_ref_bytes_returns_utf8() {
521        let s = ShortString::new("ab");
522        let bytes: &[u8] = s.as_ref();
523        assert_eq!(bytes, b"ab");
524    }
525
526    /// `PartialEq<str>` discriminates equal from unequal content.
527    /// Body-mutations `true` / `false` would conflate the two.
528    #[test]
529    fn partial_eq_str_distinguishes_equal_from_unequal() {
530        let s = ShortString::new("hello");
531        assert!(<ShortString as PartialEq<str>>::eq(&s, "hello"));
532        assert!(!<ShortString as PartialEq<str>>::eq(&s, "world"));
533    }
534
535    /// `PartialEq<&str>` body-mutation `-> true` would equate every
536    /// `&str`. Pin with an unequal pair.
537    #[test]
538    fn partial_eq_amp_str_distinguishes_unequal() {
539        let s = ShortString::new("hello");
540        let other = "world";
541        assert!(!<ShortString as PartialEq<&str>>::eq(&s, &other));
542    }
543
544    /// `PartialEq<String>` body-mutation `-> true` would equate every
545    /// owned String. Pin with an unequal pair.
546    #[test]
547    fn partial_eq_string_distinguishes_unequal() {
548        let s = ShortString::new("hello");
549        let other = String::from("world");
550        assert!(!<ShortString as PartialEq<String>>::eq(&s, &other));
551    }
552
553    /// `From<&str>` must construct a ShortString carrying the input;
554    /// `-> Default::default()` would always yield an empty string.
555    #[test]
556    fn from_str_carries_input() {
557        let s: ShortString = "kind.v9".into();
558        assert_eq!(s.as_str(), "kind.v9");
559    }
560}
561
562#[cfg(test)]
563mod proptests {
564    use super::*;
565    use proptest::prelude::*;
566
567    proptest! {
568        #[test]
569        fn round_trip_preserves_string(s in any::<String>()) {
570            let ss = ShortString::new(&s);
571            prop_assert_eq!(ss.as_str(), s.as_str());
572            prop_assert_eq!(ss.len(), s.len());
573        }
574
575        #[test]
576        fn equality_consistent_with_str(a in any::<String>(), b in any::<String>()) {
577            let aa = ShortString::new(&a);
578            let bb = ShortString::new(&b);
579            prop_assert_eq!(aa == bb, a == b);
580        }
581
582        #[test]
583        fn ordering_consistent_with_str(a in any::<String>(), b in any::<String>()) {
584            let aa = ShortString::new(&a);
585            let bb = ShortString::new(&b);
586            prop_assert_eq!(aa.cmp(&bb), a.cmp(&b));
587        }
588
589        #[test]
590        fn hash_consistent_with_eq(a in any::<String>()) {
591            use std::collections::hash_map::DefaultHasher;
592            let aa = ShortString::new(&a);
593            let bb = ShortString::new(&a);
594            let mut h1 = DefaultHasher::new();
595            let mut h2 = DefaultHasher::new();
596            aa.hash(&mut h1);
597            bb.hash(&mut h2);
598            prop_assert_eq!(h1.finish(), h2.finish());
599        }
600
601        #[cfg(feature = "serde")]
602        #[test]
603        fn serde_json_round_trips_arbitrary(s in any::<String>()) {
604            let ss = ShortString::new(&s);
605            let j = serde_json::to_string(&ss).unwrap();
606            let back: ShortString = serde_json::from_str(&j).unwrap();
607            prop_assert_eq!(back, ss);
608        }
609    }
610}