Skip to main content

relon_eval_api/
smol_str.rs

1//! Short-string optimization (SSO) for `Value::String`.
2//!
3//! # Why
4//!
5//! Tree-walker and compiled string paths spend a non-trivial slice of their
6//! hot path on `String` allocation + drop pairs that hold a few
7//! bytes of payload — dict keys, identifiers, short concat intermediates
8//! (`"a" + i.to_str()`), `type_name()` results, etc. Every one of those
9//! `String`s touches the global allocator twice (alloc on push / drop
10//! on free), pulls the heap header into cache, and adds a pointer-chase
11//! every time the evaluator reads the bytes.
12//!
13//! LuaJIT addresses the same shape with a `GCstr` short/long split
14//! (≤ 39 byte payload stays in the string-table directly, longer
15//! strings spill to a separate object). Relon's `Value` enum already
16//! reserves a 24-byte slot for the `String` variant (see
17//! `value::size_guard::value_enum_is_compact`), so the same idea fits
18//! natively — we keep the existing slot width and use it for either
19//! inline bytes (≤ 22 bytes) or a refcounted `Arc<str>` to the heap.
20//!
21//! # Layout
22//!
23//! ```text
24//! 24 bytes, 8-aligned:
25//!
26//!   Inline { len: u8, data: [u8; 22] }   ≤ 22 byte payload, no alloc
27//!   Heap   ( Arc<str> )                   long string, shared by clones
28//! ```
29//!
30//! The Rust niche-optimization on `Arc<str>::ptr` (NonNull) gives us the
31//! discriminant for free, so the enum stays 24 bytes — identical to the
32//! `String` it replaces. The 22-byte inline cap was picked to match the
33//! 24-byte slot with one byte left for the inline-length tag; raising it
34//! would push the `Value` enum past its 48-byte size guard.
35//!
36//! # Semantics
37//!
38//! `SmolStr` is value-equal to `&str` / `String` byte-for-byte and
39//! implements `Deref<Target = str>` so existing pattern bindings
40//! (`Value::String(s) => s.len()` etc.) keep working unchanged. Cloning
41//! is `O(len/word)` for inline payloads (memcpy) and a single `Arc`
42//! refcount bump for heap payloads — both well under what a `String`
43//! clone costs (heap alloc + memcpy).
44//!
45//! Serde and `Display` formatting round-trip through `&str` so external
46//! shapes (JSON, error messages) stay identical to the pre-SSO baseline.
47
48// `unsafe` is allowed inside this module only — see the `as_str()`
49// SAFETY comment. The rest of `relon-eval-api` runs under `deny`.
50#![allow(unsafe_code)]
51
52use serde::{Deserialize, Deserializer, Serialize, Serializer};
53use std::borrow::Borrow;
54use std::cmp::Ordering;
55use std::fmt;
56use std::hash::{Hash, Hasher};
57use std::ops::Deref;
58use std::sync::Arc;
59
60/// Max payload length that stays inline in the `Inline` variant. Bumping
61/// this requires re-running `value::size_guard::value_enum_is_compact`
62/// because the `Value` enum width is governed by `Float (16 B)`,
63/// `SmolStr (24 B)`, and the boxed heavy variants — `SmolStr` is the
64/// current widest slot.
65pub const SMOL_STR_INLINE_CAP: usize = 22;
66
67/// Short-string-optimized string. Inlines ≤ [`SMOL_STR_INLINE_CAP`]
68/// bytes directly in the value slot; longer payloads land on the heap
69/// behind a refcounted `Arc<str>` so clones are O(1).
70#[derive(Clone)]
71pub struct SmolStr {
72    repr: SmolStrRepr,
73}
74
75#[derive(Clone)]
76enum SmolStrRepr {
77    Inline {
78        len: u8,
79        data: [u8; SMOL_STR_INLINE_CAP],
80    },
81    Heap(Arc<str>),
82}
83
84impl SmolStr {
85    /// Build an empty `SmolStr` without touching the allocator.
86    #[inline]
87    pub const fn new_empty() -> Self {
88        Self {
89            repr: SmolStrRepr::Inline {
90                len: 0,
91                data: [0u8; SMOL_STR_INLINE_CAP],
92            },
93        }
94    }
95
96    #[inline]
97    fn inline(len: u8, data: [u8; SMOL_STR_INLINE_CAP]) -> Self {
98        debug_assert!((len as usize) <= SMOL_STR_INLINE_CAP);
99        Self {
100            repr: SmolStrRepr::Inline { len, data },
101        }
102    }
103
104    #[inline]
105    fn heap(arc: Arc<str>) -> Self {
106        Self {
107            repr: SmolStrRepr::Heap(arc),
108        }
109    }
110
111    /// Borrow the payload as a `&str` slice. Cheap (no copies) in both
112    /// `Inline` and `Heap` modes.
113    #[inline]
114    pub fn as_str(&self) -> &str {
115        match &self.repr {
116            SmolStrRepr::Inline { len, data } => {
117                let slice = &data[..*len as usize];
118                // SAFETY: the representation is private and every
119                // constructor validates or copies from an existing
120                // `str`, so `data[..len]` is always UTF-8.
121                unsafe { std::str::from_utf8_unchecked(slice) }
122            }
123            SmolStrRepr::Heap(arc) => arc,
124        }
125    }
126
127    /// Byte length of the payload (matching `str::len`).
128    #[inline]
129    pub fn len(&self) -> usize {
130        match &self.repr {
131            SmolStrRepr::Inline { len, .. } => *len as usize,
132            SmolStrRepr::Heap(arc) => arc.len(),
133        }
134    }
135
136    /// `true` iff the payload is empty.
137    #[inline]
138    pub fn is_empty(&self) -> bool {
139        self.len() == 0
140    }
141
142    /// Returns `true` when the payload is stored inline (no heap
143    /// allocation). Useful for SSO-aware diagnostics + tests.
144    #[inline]
145    pub fn is_inline(&self) -> bool {
146        matches!(&self.repr, SmolStrRepr::Inline { .. })
147    }
148
149    /// Returns `true` iff every byte in the payload is ASCII
150    /// (`< 0x80`).
151    ///
152    /// # Why this exists
153    ///
154    /// The tree-walker case-fold helpers (`upper` / `lower` / `title`
155    /// in `relon-evaluator::stdlib`) accept an `AsciiHint` so they can
156    /// skip the per-call SIMD scan inside
157    /// `fold_string_with_ascii_hint`. Without a `SmolStr`-side oracle
158    /// every surface call had to pass `AsciiHint::Unknown` and let the
159    /// fold engine pay the scan cost — even when the caller's value
160    /// container had the bytes right there. Wiring `is_ascii()` into
161    /// the helpers lets them surface `AllAscii` / `KnownNonAscii` and
162    /// route through the preclassified fast path documented in
163    /// `crates/relon-bench/benches/ascii_case_fold.rs` (the
164    /// `preclassified_*` rows in `bench ascii_case_fold`).
165    ///
166    /// # Cost
167    ///
168    /// * **Inline** (`len ≤ SMOL_STR_INLINE_CAP = 22`): a single
169    ///   vectorisable byte-AND scan over at most 22 bytes — well under
170    ///   one cycle on every modern x86_64 / aarch64 target. Rust's
171    ///   `[u8]::is_ascii()` codegens to a single `vpand` + `vpmovmskb`
172    ///   shape at this size.
173    /// * **Heap** (`Arc<str>`): delegates to `str::is_ascii()`, which
174    ///   the standard library implements via the same SIMD primitive
175    ///   over the full payload. A future revision can cache the bit
176    ///   beside the `Arc<str>` pointer so heap payloads become an O(1)
177    ///   load too; for now the on-demand scan keeps the slot layout
178    ///   identical to its pre-flag shape and avoids touching the
179    ///   niche-optimisation that pins the enum size to 24 bytes.
180    #[inline]
181    pub fn is_ascii(&self) -> bool {
182        match &self.repr {
183            // Inline: scan the (≤ 22-byte) data prefix directly. Even
184            // on a non-SIMD target this is a tight loop bounded by the
185            // inline cap.
186            SmolStrRepr::Inline { len, data } => data[..*len as usize].is_ascii(),
187            // Heap: delegate to `str::is_ascii`. See type-level note
188            // for the follow-up cache work.
189            SmolStrRepr::Heap(arc) => arc.is_ascii(),
190        }
191    }
192
193    /// Build a `SmolStr` from any `&str`. ≤ [`SMOL_STR_INLINE_CAP`]
194    /// bytes land inline; longer payloads allocate one `Arc<str>`.
195    ///
196    /// Named `from_borrowed` to avoid shadowing the `FromStr` trait
197    /// method (clippy::should_implement_trait); the trait impl below
198    /// forwards to this helper so `"x".parse::<SmolStr>()` keeps
199    /// working too.
200    #[inline]
201    pub fn from_borrowed(s: &str) -> Self {
202        let bytes = s.as_bytes();
203        if bytes.len() <= SMOL_STR_INLINE_CAP {
204            // Zero-init the tail unconditionally so `as_str()` only
205            // needs to look at `len` (no per-byte sentinel scan). The
206            // 22-byte array is laid out as a single SIMD-width store
207            // on x86_64 + aarch64; benchmarks show the zero-fill is
208            // <2 ns at this size, well under the `String::with_capacity`
209            // / `to_owned` cost the alternative path pays.
210            let mut data = [0u8; SMOL_STR_INLINE_CAP];
211            data[..bytes.len()].copy_from_slice(bytes);
212            Self::inline(bytes.len() as u8, data)
213        } else {
214            Self::heap(Arc::from(s))
215        }
216    }
217
218    /// Consume a `String`. ≤ [`SMOL_STR_INLINE_CAP`] bytes copy into the
219    /// inline slot and drop the original heap buffer; longer payloads
220    /// reuse the underlying allocation via `Arc::from(String)` so the
221    /// payload is not re-copied.
222    #[inline]
223    pub fn from_string(s: String) -> Self {
224        if s.len() <= SMOL_STR_INLINE_CAP {
225            // Drop the heap buffer once inline-copy is done.
226            SmolStr::from_borrowed(s.as_str())
227        } else {
228            Self::heap(Arc::from(s))
229        }
230    }
231
232    /// Concatenate two `&str` slices into a single `SmolStr` without
233    /// going through a `format!` / intermediate `String` allocation.
234    ///
235    /// * If `a.len() + b.len() <= SMOL_STR_INLINE_CAP` the result lands
236    ///   in the inline slot — zero allocations on the path.
237    /// * Otherwise we allocate one `Arc<str>` directly from the two
238    ///   slices (matching the heap-fallback behaviour of the single-
239    ///   slice constructors).
240    ///
241    /// This is the hot path the evaluator's `Operator::Add` rule on
242    /// `Value::String(a) + Value::String(b)` (W3-style concat) goes
243    /// through; eliminating the `format!` indirection drops the
244    /// short-string concat row by ~3x in the bench.
245    #[inline]
246    pub fn concat(a: &str, b: &str) -> Self {
247        let total = a.len() + b.len();
248        if total <= SMOL_STR_INLINE_CAP {
249            let mut data = [0u8; SMOL_STR_INLINE_CAP];
250            data[..a.len()].copy_from_slice(a.as_bytes());
251            data[a.len()..total].copy_from_slice(b.as_bytes());
252            Self::inline(total as u8, data)
253        } else {
254            // Heap fallback: pre-size a `String` (one allocation), push
255            // both slices, then hand the buffer to `Arc::from(String)`
256            // which moves the allocation into the Arc payload without
257            // re-copying.
258            let mut buf = String::with_capacity(total);
259            buf.push_str(a);
260            buf.push_str(b);
261            Self::heap(Arc::from(buf))
262        }
263    }
264
265    /// Concatenate N `&str` slices into a single `SmolStr` with at most
266    /// one allocation regardless of arity. Compared to the recursive
267    /// `concat(concat(a, b), c)` shape this drops the intermediate
268    /// `Arc<str>` allocations (and their refcount drops) entirely —
269    /// useful when the evaluator detects a left-leaning `+` chain on
270    /// `Value::String` operands (e.g. `"prefix" + name + ": " + value`).
271    ///
272    /// * Pre-scans the total length once.
273    /// * Inline-fast-path when `total <= SMOL_STR_INLINE_CAP`: no
274    ///   allocator hit, single byte-fill into the 22-byte slot.
275    /// * Heap fallback allocates one `String::with_capacity(total)`,
276    ///   pushes each slice in order, then hands the buffer to
277    ///   `Arc::from(String)` which moves the allocation into the Arc
278    ///   payload without a second copy.
279    ///
280    /// Degenerate inputs:
281    ///
282    /// * Zero slices -> empty inline payload.
283    /// * One slice -> identical semantics to `from_borrowed`.
284    /// * Two slices -> identical semantics to `concat`. Kept as a single
285    ///   entry point so the evaluator can pick `concat_many` whenever the
286    ///   chain length is `>= 2` without dispatching on arity.
287    #[inline]
288    pub fn concat_many(slices: &[&str]) -> Self {
289        // Sum total length once. We rely on the caller to keep the slice
290        // count small enough that `usize` cannot overflow — every reachable
291        // caller bounds the chain via the AST shape, which is itself
292        // memory-bounded.
293        let total: usize = slices.iter().map(|s| s.len()).sum();
294        if total <= SMOL_STR_INLINE_CAP {
295            let mut data = [0u8; SMOL_STR_INLINE_CAP];
296            let mut offset = 0usize;
297            for s in slices {
298                let bytes = s.as_bytes();
299                data[offset..offset + bytes.len()].copy_from_slice(bytes);
300                offset += bytes.len();
301            }
302            Self::inline(total as u8, data)
303        } else {
304            let mut buf = String::with_capacity(total);
305            for s in slices {
306                buf.push_str(s);
307            }
308            Self::heap(Arc::from(buf))
309        }
310    }
311
312    /// Materialise an owned `String` copy of the payload. Allocates for
313    /// inline and heap variants alike — call sites that only need a
314    /// borrow should prefer [`SmolStr::as_str`] / `Deref`.
315    #[inline]
316    pub fn into_string(self) -> String {
317        // `Arc<str>::try_unwrap` is unstable for unsized payloads, so
318        // we always copy. The hot evaluator paths read through
319        // [`SmolStr::as_str`]; only a handful of compatibility shims
320        // call `into_string` (host boundary, JSON projector).
321        self.as_str().to_owned()
322    }
323
324    /// Build an inline `SmolStr` by writing UTF-8 bytes directly into
325    /// the 22-byte inline slot via the caller-supplied writer.
326    ///
327    /// `out_len` is the number of bytes the writer will emit; the call
328    /// returns `None` immediately if `out_len > SMOL_STR_INLINE_CAP`,
329    /// letting the caller fall through to its heap-path implementation
330    /// without paying for the writer invocation. When the inline path
331    /// is taken the caller receives a `&mut [u8]` of length `out_len`
332    /// pointing into the inline buffer. The resulting byte prefix is
333    /// validated before construction; invalid UTF-8 returns `None`.
334    /// This keeps the unchecked `as_str()` conversion behind the
335    /// private representation sound while preserving the allocation-free
336    /// fast path for ASCII case-fold helpers.
337    #[inline]
338    pub fn try_build_inline<F>(out_len: usize, write: F) -> Option<Self>
339    where
340        F: FnOnce(&mut [u8]),
341    {
342        if out_len > SMOL_STR_INLINE_CAP {
343            return None;
344        }
345        let mut data = [0u8; SMOL_STR_INLINE_CAP];
346        // Hand the writer the exact slice it must fill. The zero-fill
347        // on the tail bytes (past `out_len`) is the same SIMD-width
348        // store the `from_borrowed` path performs, so the cost matches
349        // the existing inline-path baseline.
350        write(&mut data[..out_len]);
351        std::str::from_utf8(&data[..out_len]).ok()?;
352        Some(Self::inline(out_len as u8, data))
353    }
354}
355
356impl Default for SmolStr {
357    #[inline]
358    fn default() -> Self {
359        SmolStr::new_empty()
360    }
361}
362
363impl Deref for SmolStr {
364    type Target = str;
365
366    #[inline]
367    fn deref(&self) -> &str {
368        self.as_str()
369    }
370}
371
372impl AsRef<str> for SmolStr {
373    #[inline]
374    fn as_ref(&self) -> &str {
375        self.as_str()
376    }
377}
378
379impl Borrow<str> for SmolStr {
380    #[inline]
381    fn borrow(&self) -> &str {
382        self.as_str()
383    }
384}
385
386impl From<&str> for SmolStr {
387    #[inline]
388    fn from(s: &str) -> Self {
389        SmolStr::from_borrowed(s)
390    }
391}
392
393impl std::str::FromStr for SmolStr {
394    type Err = std::convert::Infallible;
395
396    #[inline]
397    fn from_str(s: &str) -> Result<Self, Self::Err> {
398        Ok(SmolStr::from_borrowed(s))
399    }
400}
401
402impl From<String> for SmolStr {
403    #[inline]
404    fn from(s: String) -> Self {
405        SmolStr::from_string(s)
406    }
407}
408
409impl From<&String> for SmolStr {
410    #[inline]
411    fn from(s: &String) -> Self {
412        SmolStr::from_borrowed(s.as_str())
413    }
414}
415
416impl From<SmolStr> for String {
417    #[inline]
418    fn from(s: SmolStr) -> Self {
419        s.into_string()
420    }
421}
422
423impl From<&SmolStr> for String {
424    #[inline]
425    fn from(s: &SmolStr) -> Self {
426        s.as_str().to_owned()
427    }
428}
429
430impl fmt::Debug for SmolStr {
431    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
432        fmt::Debug::fmt(self.as_str(), f)
433    }
434}
435
436impl fmt::Display for SmolStr {
437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438        fmt::Display::fmt(self.as_str(), f)
439    }
440}
441
442impl PartialEq for SmolStr {
443    #[inline]
444    fn eq(&self, other: &Self) -> bool {
445        self.as_str() == other.as_str()
446    }
447}
448
449impl Eq for SmolStr {}
450
451impl PartialEq<str> for SmolStr {
452    #[inline]
453    fn eq(&self, other: &str) -> bool {
454        self.as_str() == other
455    }
456}
457
458impl PartialEq<&str> for SmolStr {
459    #[inline]
460    fn eq(&self, other: &&str) -> bool {
461        self.as_str() == *other
462    }
463}
464
465impl PartialEq<String> for SmolStr {
466    #[inline]
467    fn eq(&self, other: &String) -> bool {
468        self.as_str() == other.as_str()
469    }
470}
471
472impl PartialEq<SmolStr> for str {
473    #[inline]
474    fn eq(&self, other: &SmolStr) -> bool {
475        self == other.as_str()
476    }
477}
478
479impl PartialEq<SmolStr> for &str {
480    #[inline]
481    fn eq(&self, other: &SmolStr) -> bool {
482        *self == other.as_str()
483    }
484}
485
486impl PartialEq<SmolStr> for String {
487    #[inline]
488    fn eq(&self, other: &SmolStr) -> bool {
489        self.as_str() == other.as_str()
490    }
491}
492
493impl PartialOrd for SmolStr {
494    #[inline]
495    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
496        Some(self.cmp(other))
497    }
498}
499
500impl Ord for SmolStr {
501    #[inline]
502    fn cmp(&self, other: &Self) -> Ordering {
503        self.as_str().cmp(other.as_str())
504    }
505}
506
507impl Hash for SmolStr {
508    #[inline]
509    fn hash<H: Hasher>(&self, state: &mut H) {
510        // Hash the &str representation so SmolStr / &str / String hash
511        // to the same value when their payloads match — preserves the
512        // ability to look up Dict keys by &str across types.
513        self.as_str().hash(state)
514    }
515}
516
517impl Serialize for SmolStr {
518    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
519    where
520        S: Serializer,
521    {
522        serializer.serialize_str(self.as_str())
523    }
524}
525
526impl<'de> Deserialize<'de> for SmolStr {
527    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
528    where
529        D: Deserializer<'de>,
530    {
531        let s = String::deserialize(deserializer)?;
532        Ok(SmolStr::from_string(s))
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    #[test]
541    fn empty_is_inline() {
542        let s = SmolStr::new_empty();
543        assert!(s.is_inline());
544        assert_eq!(s.len(), 0);
545        assert_eq!(s.as_str(), "");
546    }
547
548    #[test]
549    fn short_payload_stays_inline() {
550        let s = SmolStr::from_borrowed("hello");
551        assert!(s.is_inline());
552        assert_eq!(s.as_str(), "hello");
553        assert_eq!(s.len(), 5);
554    }
555
556    #[test]
557    fn cap_boundary_inline() {
558        // Exactly cap bytes -> still inline.
559        let payload = "a".repeat(SMOL_STR_INLINE_CAP);
560        let s = SmolStr::from_borrowed(&payload);
561        assert!(s.is_inline());
562        assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
563        assert_eq!(s.as_str(), payload);
564    }
565
566    #[test]
567    fn one_past_cap_goes_heap() {
568        let payload = "a".repeat(SMOL_STR_INLINE_CAP + 1);
569        let s = SmolStr::from_borrowed(&payload);
570        assert!(!s.is_inline());
571        assert_eq!(s.len(), SMOL_STR_INLINE_CAP + 1);
572        assert_eq!(s.as_str(), payload);
573    }
574
575    #[test]
576    fn clone_inline_does_not_alloc_heap() {
577        let s = SmolStr::from_borrowed("short");
578        let c = s.clone();
579        assert!(c.is_inline());
580        assert_eq!(s, c);
581    }
582
583    #[test]
584    fn clone_heap_shares_arc() {
585        let s = SmolStr::from_borrowed(&"x".repeat(40));
586        let c = s.clone();
587        match (&s.repr, &c.repr) {
588            (SmolStrRepr::Heap(a), SmolStrRepr::Heap(b)) => {
589                assert!(
590                    Arc::ptr_eq(a, b),
591                    "Heap clone should share the same Arc allocation"
592                );
593            }
594            _ => panic!("expected both heap variants"),
595        }
596    }
597
598    #[test]
599    fn round_trip_serde() {
600        let s = SmolStr::from_borrowed("hello world");
601        let json = serde_json::to_string(&s).unwrap();
602        assert_eq!(json, "\"hello world\"");
603        let back: SmolStr = serde_json::from_str(&json).unwrap();
604        assert_eq!(back, s);
605    }
606
607    #[test]
608    fn eq_against_str_and_string() {
609        let s = SmolStr::from_borrowed("k");
610        assert_eq!(s, "k");
611        assert_eq!(s, *"k");
612        assert_eq!(s, String::from("k"));
613        assert_eq!(String::from("k"), s);
614    }
615
616    #[test]
617    fn size_is_24_bytes() {
618        // Match `String` exactly so `Value` enum width does not grow.
619        assert_eq!(std::mem::size_of::<SmolStr>(), 24);
620    }
621
622    #[test]
623    fn concat_many_empty_is_empty_inline() {
624        let s = SmolStr::concat_many(&[]);
625        assert!(s.is_inline());
626        assert_eq!(s.len(), 0);
627        assert_eq!(s.as_str(), "");
628    }
629
630    #[test]
631    fn concat_many_single_slice_matches_from_borrowed() {
632        let s = SmolStr::concat_many(&["hello"]);
633        assert!(s.is_inline());
634        assert_eq!(s.as_str(), "hello");
635    }
636
637    #[test]
638    fn concat_many_inline_path() {
639        // 4 chunks of 5 bytes = 20 bytes, still inline.
640        let s = SmolStr::concat_many(&["aaaaa", "bbbbb", "ccccc", "ddddd"]);
641        assert!(s.is_inline());
642        assert_eq!(s.as_str(), "aaaaabbbbbcccccddddd");
643        assert_eq!(s.len(), 20);
644    }
645
646    #[test]
647    fn concat_many_at_cap_inline() {
648        // 22 bytes exactly -> still inline.
649        let s = SmolStr::concat_many(&["a".repeat(11).as_str(), "b".repeat(11).as_str()]);
650        assert!(s.is_inline());
651        assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
652    }
653
654    #[test]
655    fn concat_many_heap_path() {
656        // 4 chunks of 8 = 32 bytes, past cap -> heap.
657        let s = SmolStr::concat_many(&["aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd"]);
658        assert!(!s.is_inline());
659        assert_eq!(s.as_str(), "aaaaaaaabbbbbbbbccccccccdddddddd");
660        assert_eq!(s.len(), 32);
661    }
662
663    #[test]
664    fn try_build_inline_fills_inline_slot() {
665        // Writer fills the slice byte-by-byte with the lower-case of
666        // each ASCII letter — exercises the to_lower fast path shape
667        // the stdlib helpers now use.
668        let src = b"HELLO";
669        let s = SmolStr::try_build_inline(src.len(), |out| {
670            for (i, b) in src.iter().enumerate() {
671                out[i] = b.to_ascii_lowercase();
672            }
673        })
674        .expect("inline path should accept 5-byte payload");
675        assert!(s.is_inline());
676        assert_eq!(s.as_str(), "hello");
677    }
678
679    #[test]
680    fn try_build_inline_at_cap_inline() {
681        // Exactly 22 bytes — boundary of the inline slot.
682        let s =
683            SmolStr::try_build_inline(SMOL_STR_INLINE_CAP, |out| out.fill(b'x')).expect("22 fits");
684        assert!(s.is_inline());
685        assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
686    }
687
688    #[test]
689    fn try_build_inline_overflow_returns_none() {
690        // 23 bytes — past the cap. Writer must not be invoked; we
691        // assert via a panicking closure to catch a hypothetical
692        // regression.
693        let s = SmolStr::try_build_inline(SMOL_STR_INLINE_CAP + 1, |_out| {
694            panic!("writer must not run when out_len exceeds cap");
695        });
696        assert!(s.is_none());
697    }
698
699    #[test]
700    fn try_build_inline_rejects_invalid_utf8() {
701        let s = SmolStr::try_build_inline(1, |out| out[0] = 0xff);
702        assert!(s.is_none());
703    }
704
705    #[test]
706    fn try_build_inline_zero_length_is_empty() {
707        let s = SmolStr::try_build_inline(0, |_out| { /* nothing */ })
708            .expect("zero-length always inline");
709        assert!(s.is_inline());
710        assert_eq!(s.as_str(), "");
711    }
712
713    #[test]
714    fn is_ascii_inline_empty() {
715        // Empty payload is vacuously ASCII.
716        let s = SmolStr::new_empty();
717        assert!(s.is_inline());
718        assert!(s.is_ascii());
719    }
720
721    #[test]
722    fn is_ascii_inline_pure_ascii() {
723        let s = SmolStr::from_borrowed("hello");
724        assert!(s.is_inline());
725        assert!(s.is_ascii());
726    }
727
728    #[test]
729    fn is_ascii_inline_with_high_byte() {
730        // 'caf' + U+00E9 (encoded as 0xC3 0xA9). Built from raw bytes
731        // so the source file stays pure-ASCII while the SmolStr
732        // payload contains a byte >= 0x80, forcing `is_ascii()` to
733        // false.
734        let raw = vec![b'c', b'a', b'f', 0xC3, 0xA9];
735        let payload = String::from_utf8(raw).expect("valid UTF-8");
736        let s = SmolStr::from_borrowed(&payload);
737        assert!(s.is_inline());
738        assert!(!s.is_ascii());
739    }
740
741    #[test]
742    fn is_ascii_inline_at_cap_boundary() {
743        // 22-byte ASCII payload sits exactly at the inline cap.
744        let payload = "a".repeat(SMOL_STR_INLINE_CAP);
745        let s = SmolStr::from_borrowed(&payload);
746        assert!(s.is_inline());
747        assert!(s.is_ascii());
748    }
749
750    #[test]
751    fn is_ascii_heap_pure_ascii() {
752        let payload = "x".repeat(SMOL_STR_INLINE_CAP + 8);
753        let s = SmolStr::from_borrowed(&payload);
754        assert!(!s.is_inline());
755        assert!(s.is_ascii());
756    }
757
758    #[test]
759    fn is_ascii_heap_with_non_ascii() {
760        // Heap-sized payload (> 22 bytes) that contains a non-ASCII
761        // codepoint near the end — exercises the heap-path delegation
762        // to `str::is_ascii`. We append U+00E9 (encoded as 0xC3 0xA9
763        // raw bytes) so the source file stays pure-ASCII while the
764        // runtime payload contains a byte >= 0x80.
765        let mut payload = "x".repeat(SMOL_STR_INLINE_CAP).into_bytes();
766        payload.extend_from_slice(&[b'y', b'y', b'z', 0xC3, 0xA9]);
767        let payload = String::from_utf8(payload).expect("valid UTF-8");
768        let s = SmolStr::from_borrowed(&payload);
769        assert!(!s.is_inline());
770        assert!(!s.is_ascii());
771    }
772
773    #[test]
774    fn concat_many_matches_nested_concat() {
775        // Result must be byte-identical to the recursive shape so the
776        // evaluator can swap in `concat_many` without changing user-
777        // visible string values.
778        let leaves = ["foo_", "bar_", "baz_", "qux_"];
779        let nested = {
780            let mut acc = SmolStr::new_empty();
781            for leaf in leaves.iter() {
782                acc = SmolStr::concat(acc.as_str(), leaf);
783            }
784            acc
785        };
786        let folded = SmolStr::concat_many(&leaves);
787        assert_eq!(nested.as_str(), folded.as_str());
788    }
789}