relon_eval_api/smol_str.rs
1//! Short-string optimization (SSO) for `Value::String`.
2//!
3//! # Why
4//!
5//! Tree-walker and compiled string paths spend a non-trivial slice of their
6//! hot path on `String` allocation + drop pairs that hold a few
7//! bytes of payload — dict keys, identifiers, short concat intermediates
8//! (`"a" + i.to_str()`), `type_name()` results, etc. Every one of those
9//! `String`s touches the global allocator twice (alloc on push / drop
10//! on free), pulls the heap header into cache, and adds a pointer-chase
11//! every time the evaluator reads the bytes.
12//!
13//! LuaJIT addresses the same shape with a `GCstr` short/long split
14//! (≤ 39 byte payload stays in the string-table directly, longer
15//! strings spill to a separate object). Relon's `Value` enum already
16//! reserves a 24-byte slot for the `String` variant (see
17//! `value::size_guard::value_enum_is_compact`), so the same idea fits
18//! natively — we keep the existing slot width and use it for either
19//! inline bytes (≤ 22 bytes) or a refcounted `Arc<str>` to the heap.
20//!
21//! # Layout
22//!
23//! ```text
24//! 24 bytes, 8-aligned:
25//!
26//! Inline { len: u8, data: [u8; 22] } ≤ 22 byte payload, no alloc
27//! Heap ( Arc<str> ) long string, shared by clones
28//! ```
29//!
30//! The Rust niche-optimization on `Arc<str>::ptr` (NonNull) gives us the
31//! discriminant for free, so the enum stays 24 bytes — identical to the
32//! `String` it replaces. The 22-byte inline cap was picked to match the
33//! 24-byte slot with one byte left for the inline-length tag; raising it
34//! would push the `Value` enum past its 48-byte size guard.
35//!
36//! # Semantics
37//!
38//! `SmolStr` is value-equal to `&str` / `String` byte-for-byte and
39//! implements `Deref<Target = str>` so existing pattern bindings
40//! (`Value::String(s) => s.len()` etc.) keep working unchanged. Cloning
41//! is `O(len/word)` for inline payloads (memcpy) and a single `Arc`
42//! refcount bump for heap payloads — both well under what a `String`
43//! clone costs (heap alloc + memcpy).
44//!
45//! Serde and `Display` formatting round-trip through `&str` so external
46//! shapes (JSON, error messages) stay identical to the pre-SSO baseline.
47
48// `unsafe` is allowed inside this module only — see the `as_str()`
49// SAFETY comment. The rest of `relon-eval-api` runs under `deny`.
50#![allow(unsafe_code)]
51
52use serde::{Deserialize, Deserializer, Serialize, Serializer};
53use std::borrow::Borrow;
54use std::cmp::Ordering;
55use std::fmt;
56use std::hash::{Hash, Hasher};
57use std::ops::Deref;
58use std::sync::Arc;
59
60/// Max payload length that stays inline in the `Inline` variant. Bumping
61/// this requires re-running `value::size_guard::value_enum_is_compact`
62/// because the `Value` enum width is governed by `Float (16 B)`,
63/// `SmolStr (24 B)`, and the boxed heavy variants — `SmolStr` is the
64/// current widest slot.
65pub const SMOL_STR_INLINE_CAP: usize = 22;
66
67/// Short-string-optimized string. Inlines ≤ [`SMOL_STR_INLINE_CAP`]
68/// bytes directly in the value slot; longer payloads land on the heap
69/// behind a refcounted `Arc<str>` so clones are O(1).
70#[derive(Clone)]
71pub struct SmolStr {
72 repr: SmolStrRepr,
73}
74
75#[derive(Clone)]
76enum SmolStrRepr {
77 Inline {
78 len: u8,
79 data: [u8; SMOL_STR_INLINE_CAP],
80 },
81 Heap(Arc<str>),
82}
83
84impl SmolStr {
85 /// Build an empty `SmolStr` without touching the allocator.
86 #[inline]
87 pub const fn new_empty() -> Self {
88 Self {
89 repr: SmolStrRepr::Inline {
90 len: 0,
91 data: [0u8; SMOL_STR_INLINE_CAP],
92 },
93 }
94 }
95
96 #[inline]
97 fn inline(len: u8, data: [u8; SMOL_STR_INLINE_CAP]) -> Self {
98 debug_assert!((len as usize) <= SMOL_STR_INLINE_CAP);
99 Self {
100 repr: SmolStrRepr::Inline { len, data },
101 }
102 }
103
104 #[inline]
105 fn heap(arc: Arc<str>) -> Self {
106 Self {
107 repr: SmolStrRepr::Heap(arc),
108 }
109 }
110
111 /// Borrow the payload as a `&str` slice. Cheap (no copies) in both
112 /// `Inline` and `Heap` modes.
113 #[inline]
114 pub fn as_str(&self) -> &str {
115 match &self.repr {
116 SmolStrRepr::Inline { len, data } => {
117 let slice = &data[..*len as usize];
118 // SAFETY: the representation is private and every
119 // constructor validates or copies from an existing
120 // `str`, so `data[..len]` is always UTF-8.
121 unsafe { std::str::from_utf8_unchecked(slice) }
122 }
123 SmolStrRepr::Heap(arc) => arc,
124 }
125 }
126
127 /// Byte length of the payload (matching `str::len`).
128 #[inline]
129 pub fn len(&self) -> usize {
130 match &self.repr {
131 SmolStrRepr::Inline { len, .. } => *len as usize,
132 SmolStrRepr::Heap(arc) => arc.len(),
133 }
134 }
135
136 /// `true` iff the payload is empty.
137 #[inline]
138 pub fn is_empty(&self) -> bool {
139 self.len() == 0
140 }
141
142 /// Returns `true` when the payload is stored inline (no heap
143 /// allocation). Useful for SSO-aware diagnostics + tests.
144 #[inline]
145 pub fn is_inline(&self) -> bool {
146 matches!(&self.repr, SmolStrRepr::Inline { .. })
147 }
148
149 /// Returns `true` iff every byte in the payload is ASCII
150 /// (`< 0x80`).
151 ///
152 /// # Why this exists
153 ///
154 /// The tree-walker case-fold helpers (`upper` / `lower` / `title`
155 /// in `relon-evaluator::stdlib`) accept an `AsciiHint` so they can
156 /// skip the per-call SIMD scan inside
157 /// `fold_string_with_ascii_hint`. Without a `SmolStr`-side oracle
158 /// every surface call had to pass `AsciiHint::Unknown` and let the
159 /// fold engine pay the scan cost — even when the caller's value
160 /// container had the bytes right there. Wiring `is_ascii()` into
161 /// the helpers lets them surface `AllAscii` / `KnownNonAscii` and
162 /// route through the preclassified fast path documented in
163 /// `crates/relon-bench/benches/ascii_case_fold.rs` (the
164 /// `preclassified_*` rows in `bench ascii_case_fold`).
165 ///
166 /// # Cost
167 ///
168 /// * **Inline** (`len ≤ SMOL_STR_INLINE_CAP = 22`): a single
169 /// vectorisable byte-AND scan over at most 22 bytes — well under
170 /// one cycle on every modern x86_64 / aarch64 target. Rust's
171 /// `[u8]::is_ascii()` codegens to a single `vpand` + `vpmovmskb`
172 /// shape at this size.
173 /// * **Heap** (`Arc<str>`): delegates to `str::is_ascii()`, which
174 /// the standard library implements via the same SIMD primitive
175 /// over the full payload. A future revision can cache the bit
176 /// beside the `Arc<str>` pointer so heap payloads become an O(1)
177 /// load too; for now the on-demand scan keeps the slot layout
178 /// identical to its pre-flag shape and avoids touching the
179 /// niche-optimisation that pins the enum size to 24 bytes.
180 #[inline]
181 pub fn is_ascii(&self) -> bool {
182 match &self.repr {
183 // Inline: scan the (≤ 22-byte) data prefix directly. Even
184 // on a non-SIMD target this is a tight loop bounded by the
185 // inline cap.
186 SmolStrRepr::Inline { len, data } => data[..*len as usize].is_ascii(),
187 // Heap: delegate to `str::is_ascii`. See type-level note
188 // for the follow-up cache work.
189 SmolStrRepr::Heap(arc) => arc.is_ascii(),
190 }
191 }
192
193 /// Build a `SmolStr` from any `&str`. ≤ [`SMOL_STR_INLINE_CAP`]
194 /// bytes land inline; longer payloads allocate one `Arc<str>`.
195 ///
196 /// Named `from_borrowed` to avoid shadowing the `FromStr` trait
197 /// method (clippy::should_implement_trait); the trait impl below
198 /// forwards to this helper so `"x".parse::<SmolStr>()` keeps
199 /// working too.
200 #[inline]
201 pub fn from_borrowed(s: &str) -> Self {
202 let bytes = s.as_bytes();
203 if bytes.len() <= SMOL_STR_INLINE_CAP {
204 // Zero-init the tail unconditionally so `as_str()` only
205 // needs to look at `len` (no per-byte sentinel scan). The
206 // 22-byte array is laid out as a single SIMD-width store
207 // on x86_64 + aarch64; benchmarks show the zero-fill is
208 // <2 ns at this size, well under the `String::with_capacity`
209 // / `to_owned` cost the alternative path pays.
210 let mut data = [0u8; SMOL_STR_INLINE_CAP];
211 data[..bytes.len()].copy_from_slice(bytes);
212 Self::inline(bytes.len() as u8, data)
213 } else {
214 Self::heap(Arc::from(s))
215 }
216 }
217
218 /// Consume a `String`. ≤ [`SMOL_STR_INLINE_CAP`] bytes copy into the
219 /// inline slot and drop the original heap buffer; longer payloads
220 /// reuse the underlying allocation via `Arc::from(String)` so the
221 /// payload is not re-copied.
222 #[inline]
223 pub fn from_string(s: String) -> Self {
224 if s.len() <= SMOL_STR_INLINE_CAP {
225 // Drop the heap buffer once inline-copy is done.
226 SmolStr::from_borrowed(s.as_str())
227 } else {
228 Self::heap(Arc::from(s))
229 }
230 }
231
232 /// Concatenate two `&str` slices into a single `SmolStr` without
233 /// going through a `format!` / intermediate `String` allocation.
234 ///
235 /// * If `a.len() + b.len() <= SMOL_STR_INLINE_CAP` the result lands
236 /// in the inline slot — zero allocations on the path.
237 /// * Otherwise we allocate one `Arc<str>` directly from the two
238 /// slices (matching the heap-fallback behaviour of the single-
239 /// slice constructors).
240 ///
241 /// This is the hot path the evaluator's `Operator::Add` rule on
242 /// `Value::String(a) + Value::String(b)` (W3-style concat) goes
243 /// through; eliminating the `format!` indirection drops the
244 /// short-string concat row by ~3x in the bench.
245 #[inline]
246 pub fn concat(a: &str, b: &str) -> Self {
247 let total = a.len() + b.len();
248 if total <= SMOL_STR_INLINE_CAP {
249 let mut data = [0u8; SMOL_STR_INLINE_CAP];
250 data[..a.len()].copy_from_slice(a.as_bytes());
251 data[a.len()..total].copy_from_slice(b.as_bytes());
252 Self::inline(total as u8, data)
253 } else {
254 // Heap fallback: pre-size a `String` (one allocation), push
255 // both slices, then hand the buffer to `Arc::from(String)`
256 // which moves the allocation into the Arc payload without
257 // re-copying.
258 let mut buf = String::with_capacity(total);
259 buf.push_str(a);
260 buf.push_str(b);
261 Self::heap(Arc::from(buf))
262 }
263 }
264
265 /// Concatenate N `&str` slices into a single `SmolStr` with at most
266 /// one allocation regardless of arity. Compared to the recursive
267 /// `concat(concat(a, b), c)` shape this drops the intermediate
268 /// `Arc<str>` allocations (and their refcount drops) entirely —
269 /// useful when the evaluator detects a left-leaning `+` chain on
270 /// `Value::String` operands (e.g. `"prefix" + name + ": " + value`).
271 ///
272 /// * Pre-scans the total length once.
273 /// * Inline-fast-path when `total <= SMOL_STR_INLINE_CAP`: no
274 /// allocator hit, single byte-fill into the 22-byte slot.
275 /// * Heap fallback allocates one `String::with_capacity(total)`,
276 /// pushes each slice in order, then hands the buffer to
277 /// `Arc::from(String)` which moves the allocation into the Arc
278 /// payload without a second copy.
279 ///
280 /// Degenerate inputs:
281 ///
282 /// * Zero slices -> empty inline payload.
283 /// * One slice -> identical semantics to `from_borrowed`.
284 /// * Two slices -> identical semantics to `concat`. Kept as a single
285 /// entry point so the evaluator can pick `concat_many` whenever the
286 /// chain length is `>= 2` without dispatching on arity.
287 #[inline]
288 pub fn concat_many(slices: &[&str]) -> Self {
289 // Sum total length once. We rely on the caller to keep the slice
290 // count small enough that `usize` cannot overflow — every reachable
291 // caller bounds the chain via the AST shape, which is itself
292 // memory-bounded.
293 let total: usize = slices.iter().map(|s| s.len()).sum();
294 if total <= SMOL_STR_INLINE_CAP {
295 let mut data = [0u8; SMOL_STR_INLINE_CAP];
296 let mut offset = 0usize;
297 for s in slices {
298 let bytes = s.as_bytes();
299 data[offset..offset + bytes.len()].copy_from_slice(bytes);
300 offset += bytes.len();
301 }
302 Self::inline(total as u8, data)
303 } else {
304 let mut buf = String::with_capacity(total);
305 for s in slices {
306 buf.push_str(s);
307 }
308 Self::heap(Arc::from(buf))
309 }
310 }
311
312 /// Materialise an owned `String` copy of the payload. Allocates for
313 /// inline and heap variants alike — call sites that only need a
314 /// borrow should prefer [`SmolStr::as_str`] / `Deref`.
315 #[inline]
316 pub fn into_string(self) -> String {
317 // `Arc<str>::try_unwrap` is unstable for unsized payloads, so
318 // we always copy. The hot evaluator paths read through
319 // [`SmolStr::as_str`]; only a handful of compatibility shims
320 // call `into_string` (host boundary, JSON projector).
321 self.as_str().to_owned()
322 }
323
324 /// Build an inline `SmolStr` by writing UTF-8 bytes directly into
325 /// the 22-byte inline slot via the caller-supplied writer.
326 ///
327 /// `out_len` is the number of bytes the writer will emit; the call
328 /// returns `None` immediately if `out_len > SMOL_STR_INLINE_CAP`,
329 /// letting the caller fall through to its heap-path implementation
330 /// without paying for the writer invocation. When the inline path
331 /// is taken the caller receives a `&mut [u8]` of length `out_len`
332 /// pointing into the inline buffer. The resulting byte prefix is
333 /// validated before construction; invalid UTF-8 returns `None`.
334 /// This keeps the unchecked `as_str()` conversion behind the
335 /// private representation sound while preserving the allocation-free
336 /// fast path for ASCII case-fold helpers.
337 #[inline]
338 pub fn try_build_inline<F>(out_len: usize, write: F) -> Option<Self>
339 where
340 F: FnOnce(&mut [u8]),
341 {
342 if out_len > SMOL_STR_INLINE_CAP {
343 return None;
344 }
345 let mut data = [0u8; SMOL_STR_INLINE_CAP];
346 // Hand the writer the exact slice it must fill. The zero-fill
347 // on the tail bytes (past `out_len`) is the same SIMD-width
348 // store the `from_borrowed` path performs, so the cost matches
349 // the existing inline-path baseline.
350 write(&mut data[..out_len]);
351 std::str::from_utf8(&data[..out_len]).ok()?;
352 Some(Self::inline(out_len as u8, data))
353 }
354}
355
356impl Default for SmolStr {
357 #[inline]
358 fn default() -> Self {
359 SmolStr::new_empty()
360 }
361}
362
363impl Deref for SmolStr {
364 type Target = str;
365
366 #[inline]
367 fn deref(&self) -> &str {
368 self.as_str()
369 }
370}
371
372impl AsRef<str> for SmolStr {
373 #[inline]
374 fn as_ref(&self) -> &str {
375 self.as_str()
376 }
377}
378
379impl Borrow<str> for SmolStr {
380 #[inline]
381 fn borrow(&self) -> &str {
382 self.as_str()
383 }
384}
385
386impl From<&str> for SmolStr {
387 #[inline]
388 fn from(s: &str) -> Self {
389 SmolStr::from_borrowed(s)
390 }
391}
392
393impl std::str::FromStr for SmolStr {
394 type Err = std::convert::Infallible;
395
396 #[inline]
397 fn from_str(s: &str) -> Result<Self, Self::Err> {
398 Ok(SmolStr::from_borrowed(s))
399 }
400}
401
402impl From<String> for SmolStr {
403 #[inline]
404 fn from(s: String) -> Self {
405 SmolStr::from_string(s)
406 }
407}
408
409impl From<&String> for SmolStr {
410 #[inline]
411 fn from(s: &String) -> Self {
412 SmolStr::from_borrowed(s.as_str())
413 }
414}
415
416impl From<SmolStr> for String {
417 #[inline]
418 fn from(s: SmolStr) -> Self {
419 s.into_string()
420 }
421}
422
423impl From<&SmolStr> for String {
424 #[inline]
425 fn from(s: &SmolStr) -> Self {
426 s.as_str().to_owned()
427 }
428}
429
430impl fmt::Debug for SmolStr {
431 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
432 fmt::Debug::fmt(self.as_str(), f)
433 }
434}
435
436impl fmt::Display for SmolStr {
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438 fmt::Display::fmt(self.as_str(), f)
439 }
440}
441
442impl PartialEq for SmolStr {
443 #[inline]
444 fn eq(&self, other: &Self) -> bool {
445 self.as_str() == other.as_str()
446 }
447}
448
449impl Eq for SmolStr {}
450
451impl PartialEq<str> for SmolStr {
452 #[inline]
453 fn eq(&self, other: &str) -> bool {
454 self.as_str() == other
455 }
456}
457
458impl PartialEq<&str> for SmolStr {
459 #[inline]
460 fn eq(&self, other: &&str) -> bool {
461 self.as_str() == *other
462 }
463}
464
465impl PartialEq<String> for SmolStr {
466 #[inline]
467 fn eq(&self, other: &String) -> bool {
468 self.as_str() == other.as_str()
469 }
470}
471
472impl PartialEq<SmolStr> for str {
473 #[inline]
474 fn eq(&self, other: &SmolStr) -> bool {
475 self == other.as_str()
476 }
477}
478
479impl PartialEq<SmolStr> for &str {
480 #[inline]
481 fn eq(&self, other: &SmolStr) -> bool {
482 *self == other.as_str()
483 }
484}
485
486impl PartialEq<SmolStr> for String {
487 #[inline]
488 fn eq(&self, other: &SmolStr) -> bool {
489 self.as_str() == other.as_str()
490 }
491}
492
493impl PartialOrd for SmolStr {
494 #[inline]
495 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
496 Some(self.cmp(other))
497 }
498}
499
500impl Ord for SmolStr {
501 #[inline]
502 fn cmp(&self, other: &Self) -> Ordering {
503 self.as_str().cmp(other.as_str())
504 }
505}
506
507impl Hash for SmolStr {
508 #[inline]
509 fn hash<H: Hasher>(&self, state: &mut H) {
510 // Hash the &str representation so SmolStr / &str / String hash
511 // to the same value when their payloads match — preserves the
512 // ability to look up Dict keys by &str across types.
513 self.as_str().hash(state)
514 }
515}
516
517impl Serialize for SmolStr {
518 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
519 where
520 S: Serializer,
521 {
522 serializer.serialize_str(self.as_str())
523 }
524}
525
526impl<'de> Deserialize<'de> for SmolStr {
527 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
528 where
529 D: Deserializer<'de>,
530 {
531 let s = String::deserialize(deserializer)?;
532 Ok(SmolStr::from_string(s))
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use super::*;
539
540 #[test]
541 fn empty_is_inline() {
542 let s = SmolStr::new_empty();
543 assert!(s.is_inline());
544 assert_eq!(s.len(), 0);
545 assert_eq!(s.as_str(), "");
546 }
547
548 #[test]
549 fn short_payload_stays_inline() {
550 let s = SmolStr::from_borrowed("hello");
551 assert!(s.is_inline());
552 assert_eq!(s.as_str(), "hello");
553 assert_eq!(s.len(), 5);
554 }
555
556 #[test]
557 fn cap_boundary_inline() {
558 // Exactly cap bytes -> still inline.
559 let payload = "a".repeat(SMOL_STR_INLINE_CAP);
560 let s = SmolStr::from_borrowed(&payload);
561 assert!(s.is_inline());
562 assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
563 assert_eq!(s.as_str(), payload);
564 }
565
566 #[test]
567 fn one_past_cap_goes_heap() {
568 let payload = "a".repeat(SMOL_STR_INLINE_CAP + 1);
569 let s = SmolStr::from_borrowed(&payload);
570 assert!(!s.is_inline());
571 assert_eq!(s.len(), SMOL_STR_INLINE_CAP + 1);
572 assert_eq!(s.as_str(), payload);
573 }
574
575 #[test]
576 fn clone_inline_does_not_alloc_heap() {
577 let s = SmolStr::from_borrowed("short");
578 let c = s.clone();
579 assert!(c.is_inline());
580 assert_eq!(s, c);
581 }
582
583 #[test]
584 fn clone_heap_shares_arc() {
585 let s = SmolStr::from_borrowed(&"x".repeat(40));
586 let c = s.clone();
587 match (&s.repr, &c.repr) {
588 (SmolStrRepr::Heap(a), SmolStrRepr::Heap(b)) => {
589 assert!(
590 Arc::ptr_eq(a, b),
591 "Heap clone should share the same Arc allocation"
592 );
593 }
594 _ => panic!("expected both heap variants"),
595 }
596 }
597
598 #[test]
599 fn round_trip_serde() {
600 let s = SmolStr::from_borrowed("hello world");
601 let json = serde_json::to_string(&s).unwrap();
602 assert_eq!(json, "\"hello world\"");
603 let back: SmolStr = serde_json::from_str(&json).unwrap();
604 assert_eq!(back, s);
605 }
606
607 #[test]
608 fn eq_against_str_and_string() {
609 let s = SmolStr::from_borrowed("k");
610 assert_eq!(s, "k");
611 assert_eq!(s, *"k");
612 assert_eq!(s, String::from("k"));
613 assert_eq!(String::from("k"), s);
614 }
615
616 #[test]
617 fn size_is_24_bytes() {
618 // Match `String` exactly so `Value` enum width does not grow.
619 assert_eq!(std::mem::size_of::<SmolStr>(), 24);
620 }
621
622 #[test]
623 fn concat_many_empty_is_empty_inline() {
624 let s = SmolStr::concat_many(&[]);
625 assert!(s.is_inline());
626 assert_eq!(s.len(), 0);
627 assert_eq!(s.as_str(), "");
628 }
629
630 #[test]
631 fn concat_many_single_slice_matches_from_borrowed() {
632 let s = SmolStr::concat_many(&["hello"]);
633 assert!(s.is_inline());
634 assert_eq!(s.as_str(), "hello");
635 }
636
637 #[test]
638 fn concat_many_inline_path() {
639 // 4 chunks of 5 bytes = 20 bytes, still inline.
640 let s = SmolStr::concat_many(&["aaaaa", "bbbbb", "ccccc", "ddddd"]);
641 assert!(s.is_inline());
642 assert_eq!(s.as_str(), "aaaaabbbbbcccccddddd");
643 assert_eq!(s.len(), 20);
644 }
645
646 #[test]
647 fn concat_many_at_cap_inline() {
648 // 22 bytes exactly -> still inline.
649 let s = SmolStr::concat_many(&["a".repeat(11).as_str(), "b".repeat(11).as_str()]);
650 assert!(s.is_inline());
651 assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
652 }
653
654 #[test]
655 fn concat_many_heap_path() {
656 // 4 chunks of 8 = 32 bytes, past cap -> heap.
657 let s = SmolStr::concat_many(&["aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd"]);
658 assert!(!s.is_inline());
659 assert_eq!(s.as_str(), "aaaaaaaabbbbbbbbccccccccdddddddd");
660 assert_eq!(s.len(), 32);
661 }
662
663 #[test]
664 fn try_build_inline_fills_inline_slot() {
665 // Writer fills the slice byte-by-byte with the lower-case of
666 // each ASCII letter — exercises the to_lower fast path shape
667 // the stdlib helpers now use.
668 let src = b"HELLO";
669 let s = SmolStr::try_build_inline(src.len(), |out| {
670 for (i, b) in src.iter().enumerate() {
671 out[i] = b.to_ascii_lowercase();
672 }
673 })
674 .expect("inline path should accept 5-byte payload");
675 assert!(s.is_inline());
676 assert_eq!(s.as_str(), "hello");
677 }
678
679 #[test]
680 fn try_build_inline_at_cap_inline() {
681 // Exactly 22 bytes — boundary of the inline slot.
682 let s =
683 SmolStr::try_build_inline(SMOL_STR_INLINE_CAP, |out| out.fill(b'x')).expect("22 fits");
684 assert!(s.is_inline());
685 assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
686 }
687
688 #[test]
689 fn try_build_inline_overflow_returns_none() {
690 // 23 bytes — past the cap. Writer must not be invoked; we
691 // assert via a panicking closure to catch a hypothetical
692 // regression.
693 let s = SmolStr::try_build_inline(SMOL_STR_INLINE_CAP + 1, |_out| {
694 panic!("writer must not run when out_len exceeds cap");
695 });
696 assert!(s.is_none());
697 }
698
699 #[test]
700 fn try_build_inline_rejects_invalid_utf8() {
701 let s = SmolStr::try_build_inline(1, |out| out[0] = 0xff);
702 assert!(s.is_none());
703 }
704
705 #[test]
706 fn try_build_inline_zero_length_is_empty() {
707 let s = SmolStr::try_build_inline(0, |_out| { /* nothing */ })
708 .expect("zero-length always inline");
709 assert!(s.is_inline());
710 assert_eq!(s.as_str(), "");
711 }
712
713 #[test]
714 fn is_ascii_inline_empty() {
715 // Empty payload is vacuously ASCII.
716 let s = SmolStr::new_empty();
717 assert!(s.is_inline());
718 assert!(s.is_ascii());
719 }
720
721 #[test]
722 fn is_ascii_inline_pure_ascii() {
723 let s = SmolStr::from_borrowed("hello");
724 assert!(s.is_inline());
725 assert!(s.is_ascii());
726 }
727
728 #[test]
729 fn is_ascii_inline_with_high_byte() {
730 // 'caf' + U+00E9 (encoded as 0xC3 0xA9). Built from raw bytes
731 // so the source file stays pure-ASCII while the SmolStr
732 // payload contains a byte >= 0x80, forcing `is_ascii()` to
733 // false.
734 let raw = vec![b'c', b'a', b'f', 0xC3, 0xA9];
735 let payload = String::from_utf8(raw).expect("valid UTF-8");
736 let s = SmolStr::from_borrowed(&payload);
737 assert!(s.is_inline());
738 assert!(!s.is_ascii());
739 }
740
741 #[test]
742 fn is_ascii_inline_at_cap_boundary() {
743 // 22-byte ASCII payload sits exactly at the inline cap.
744 let payload = "a".repeat(SMOL_STR_INLINE_CAP);
745 let s = SmolStr::from_borrowed(&payload);
746 assert!(s.is_inline());
747 assert!(s.is_ascii());
748 }
749
750 #[test]
751 fn is_ascii_heap_pure_ascii() {
752 let payload = "x".repeat(SMOL_STR_INLINE_CAP + 8);
753 let s = SmolStr::from_borrowed(&payload);
754 assert!(!s.is_inline());
755 assert!(s.is_ascii());
756 }
757
758 #[test]
759 fn is_ascii_heap_with_non_ascii() {
760 // Heap-sized payload (> 22 bytes) that contains a non-ASCII
761 // codepoint near the end — exercises the heap-path delegation
762 // to `str::is_ascii`. We append U+00E9 (encoded as 0xC3 0xA9
763 // raw bytes) so the source file stays pure-ASCII while the
764 // runtime payload contains a byte >= 0x80.
765 let mut payload = "x".repeat(SMOL_STR_INLINE_CAP).into_bytes();
766 payload.extend_from_slice(&[b'y', b'y', b'z', 0xC3, 0xA9]);
767 let payload = String::from_utf8(payload).expect("valid UTF-8");
768 let s = SmolStr::from_borrowed(&payload);
769 assert!(!s.is_inline());
770 assert!(!s.is_ascii());
771 }
772
773 #[test]
774 fn concat_many_matches_nested_concat() {
775 // Result must be byte-identical to the recursive shape so the
776 // evaluator can swap in `concat_many` without changing user-
777 // visible string values.
778 let leaves = ["foo_", "bar_", "baz_", "qux_"];
779 let nested = {
780 let mut acc = SmolStr::new_empty();
781 for leaf in leaves.iter() {
782 acc = SmolStr::concat(acc.as_str(), leaf);
783 }
784 acc
785 };
786 let folded = SmolStr::concat_many(&leaves);
787 assert_eq!(nested.as_str(), folded.as_str());
788 }
789}