Skip to main content

surrealdb_strand/
lib.rs

1//! A small-string-optimised immutable string type used throughout the value layer.
2//!
3//! [`Strand`] backs string-shaped keys and values — `Value::String`, `Object` keys, `TableName`,
4//! `RecordIdKey::String` — and trades the mutability of `String` for three complementary storage
5//! strategies selected at construction time:
6//!
7//! * `Inline` — short strings (≤ [`INLINE_CAP`] bytes) stored directly on the stack, with no heap
8//!   allocation.
9//! * `Static` — a `&'static str` wrapper for compile-time known values. Construction is `const`,
10//!   clone is a pointer copy, and drop is a no-op. Ideal for long literals (table names, reserved
11//!   keywords, response keys) that would otherwise allocate. Build one with [`Strand::new_static`].
12//! * `Boxed` — dynamic long strings held in a `Box<str>`: one allocation per string, no atomic ops
13//!   on construction or drop. Clone does `malloc + memcpy`, so reach for [`Strand::new_static`]
14//!   whenever the value is known at compile time.
15//!
16//! ## Layout
17//!
18//! `Strand` uses a custom 24-byte union layout. The first 16 bytes (on 64-bit) always form a
19//! valid `&str` fat pointer for `Static` and `Boxed` strings. For `Inline` strings, the string
20//! data is stored inline. The 24th byte (index 23) is used as a tag:
21//! * `0..=23`: The string is `Inline`, and the tag is the length.
22//! * `254`: The string is `Static`.
23//! * `255`: The string is `Boxed`.
24//!
25//! This layout allows `as_str()` to be completely branchless, significantly improving the
26//! performance of equality and ordering comparisons for all variants.
27
28use std::borrow::Borrow;
29use std::cmp::Ordering;
30use std::fmt::{Debug, Display};
31use std::hash::{Hash, Hasher};
32use std::mem::ManuallyDrop;
33use std::ops::Deref;
34
35use revision::{DeserializeRevisioned, Error, Revisioned, SerializeRevisioned};
36use serde::de::{self, Visitor};
37use serde::{Deserialize, Deserializer, Serialize, Serializer};
38
39/// Maximum byte length of a string that can be stored inline.
40pub const INLINE_CAP: usize = 23;
41
42/// Tag for static strings.
43const TAG_STATIC: u8 = 254;
44/// Tag for boxed strings.
45const TAG_BOXED: u8 = 255;
46
47/// Length of the padding bytes in the heap data.
48const HEAP_PAD_LEN: usize = 23 - 2 * std::mem::size_of::<usize>();
49
50/// Heap data for boxed strings.
51#[derive(Clone, Copy)]
52#[repr(C)]
53struct HeapData {
54	ptr: *const u8,
55	len: usize,
56	_pad: [u8; HEAP_PAD_LEN],
57	tag: u8,
58}
59
60/// Union for inline and heap data.
61#[repr(C)]
62union StrandData {
63	inline: [u8; 24],
64	heap: ManuallyDrop<HeapData>,
65}
66
67/// Immutable string with inline small-string optimisation.
68///
69/// See the [module docs](self) for the design rationale.
70#[repr(transparent)]
71pub struct Strand {
72	data: StrandData,
73}
74
75unsafe impl Send for Strand {}
76unsafe impl Sync for Strand {}
77
78impl Strand {
79	/// Create a new [`Strand`] from any string-like input.
80	#[inline]
81	pub fn new(s: impl AsRef<str>) -> Self {
82		let s = s.as_ref();
83		if s.len() <= INLINE_CAP {
84			Self::new_inline(s)
85		} else {
86			Self::from(Box::from(s))
87		}
88	}
89
90	#[inline]
91	fn new_inline(s: &str) -> Self {
92		debug_assert!(s.len() <= INLINE_CAP);
93		let mut inline = [0u8; 24];
94		// SAFETY: We checked the length above.
95		unsafe {
96			std::ptr::copy_nonoverlapping(s.as_ptr(), inline.as_mut_ptr(), s.len());
97		}
98		inline[23] = s.len() as u8;
99		Self {
100			data: StrandData {
101				inline,
102			},
103		}
104	}
105
106	/// Wrap a `&'static str` as a [`Strand`] without allocating.
107	///
108	/// This never allocates or copies; the returned `Strand` holds
109	/// the caller's fat pointer directly, and cloning it is a
110	/// bitwise copy. Callable in `const` context, so compile-time
111	/// `Strand` constants of arbitrary length are fine:
112	///
113	/// ```
114	/// # use surrealdb_strand::Strand;
115	/// const KIND: Strand = Strand::new_static("geometry<multipolygon>");
116	/// ```
117	#[inline(always)]
118	pub const fn new_static(text: &'static str) -> Self {
119		Self {
120			data: StrandData {
121				heap: ManuallyDrop::new(HeapData {
122					ptr: text.as_ptr(),
123					len: text.len(),
124					_pad: [0; HEAP_PAD_LEN],
125					tag: TAG_STATIC,
126				}),
127			},
128		}
129	}
130
131	/// Format a value directly into an inline `Strand`, bypassing any heap allocation.
132	///
133	/// If the formatted string exceeds `INLINE_CAP` bytes, this falls back to a heap-allocated
134	/// `Boxed` string. This is ideal for constructing short, dynamic strings (like keys or
135	/// identifiers) where the length is known or highly likely to be small.
136	pub fn from_display(d: impl Display) -> Self {
137		use std::fmt::Write;
138		// Custom writer for the inline buffer
139		struct StrandWriter {
140			inline: [u8; 24],
141			len: usize,
142			overflow: Option<String>,
143		}
144		// Implement the inline buffer writer
145		impl Write for StrandWriter {
146			fn write_str(&mut self, s: &str) -> std::fmt::Result {
147				// Check if we have overflowed already
148				if let Some(overflow) = &mut self.overflow {
149					overflow.push_str(s);
150					return Ok(());
151				}
152				// Get the string bytes
153				let bytes = s.as_bytes();
154				// Calculate the end length
155				let end = self.len + bytes.len();
156				// Check if it fits in the inline buffer
157				if end <= INLINE_CAP {
158					// It fits in the inline buffer
159					unsafe {
160						std::ptr::copy_nonoverlapping(
161							bytes.as_ptr(),
162							self.inline.as_mut_ptr().add(self.len),
163							bytes.len(),
164						);
165					}
166					self.len = end;
167				} else {
168					// It overflows! Convert what we have so far into a String, then append the new
169					// string.
170					let valid_utf8 =
171						unsafe { std::str::from_utf8_unchecked(&self.inline[..self.len]) };
172					let mut overflow = String::with_capacity(end);
173					overflow.push_str(valid_utf8);
174					overflow.push_str(s);
175					self.overflow = Some(overflow);
176				}
177				Ok(())
178			}
179		}
180		// Create a new writer
181		let mut writer = StrandWriter {
182			inline: [0u8; 24],
183			len: 0,
184			overflow: None,
185		};
186		// Write the displayable value into our custom writer
187		write!(&mut writer, "{}", d).expect("writing to StrandWriter should never fail");
188		// Check if we have an overflow
189		if let Some(overflow) = writer.overflow {
190			// It was too long, return a Boxed strand
191			Self::from(overflow)
192		} else {
193			// It fit perfectly! Set the tag/length and return the inline strand
194			writer.inline[23] = writer.len as u8;
195			Self {
196				data: StrandData {
197					inline: writer.inline,
198				},
199			}
200		}
201	}
202
203	/// Whether this string is stored inline (no heap allocation).
204	#[inline]
205	pub fn is_inline(&self) -> bool {
206		unsafe { self.data.inline[23] <= INLINE_CAP as u8 }
207	}
208
209	/// Whether this string wraps a `&'static str` (no allocation).
210	#[inline]
211	pub fn is_static(&self) -> bool {
212		unsafe { self.data.inline[23] == TAG_STATIC }
213	}
214
215	/// Whether this string is heap-allocated in a `Box<str>`.
216	#[inline]
217	pub fn is_boxed(&self) -> bool {
218		unsafe { self.data.inline[23] == TAG_BOXED }
219	}
220
221	/// Access the underlying string slice.
222	#[inline(always)]
223	pub fn as_str(&self) -> &str {
224		// SAFETY: The tag byte is strictly controlled during construction.
225		// It is either the length of an inline string (0..=23), TAG_STATIC (254),
226		// or TAG_BOXED (255). This allows for LLVM optimizations.
227		unsafe {
228			// Get the tag byte.
229			let tag = self.data.inline[23];
230			// Tell the compiler that tags between 24 and 253 are impossible.
231			if tag > INLINE_CAP as u8 && tag != TAG_STATIC && tag != TAG_BOXED {
232				std::hint::unreachable_unchecked();
233			}
234			// Check if the string is inline.
235			let is_inline = tag <= INLINE_CAP as u8;
236			// Get the length of the string.
237			let len = if is_inline {
238				tag as usize
239			} else {
240				self.data.heap.len
241			};
242			// Get the pointer to the string.
243			let ptr = if is_inline {
244				self.data.inline.as_ptr()
245			} else {
246				self.data.heap.ptr
247			};
248			// Return the string.
249			std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len))
250		}
251	}
252
253	/// Byte length of the string.
254	#[inline]
255	pub fn len(&self) -> usize {
256		self.as_str().len()
257	}
258
259	/// Whether the string is empty.
260	#[inline]
261	pub fn is_empty(&self) -> bool {
262		self.as_str().is_empty()
263	}
264
265	/// Convert into an owned `String`, copying the bytes.
266	#[inline]
267	pub fn into_string(self) -> String {
268		self.as_str().to_owned()
269	}
270}
271
272// -----------------------------------------------------------------------
273// Drop
274// -----------------------------------------------------------------------
275
276impl Drop for Strand {
277	#[inline]
278	fn drop(&mut self) {
279		// SAFETY: We only drop the inner Box<str> if the tag indicates it is Boxed.
280		// The pointer and length are guaranteed to be valid because they were created
281		// from a valid Box<str> in the `From<Box<str>>` implementation.
282		unsafe {
283			if self.data.inline[23] == TAG_BOXED {
284				let ptr = self.data.heap.ptr as *mut u8;
285				let len = self.data.heap.len;
286				let slice = std::ptr::slice_from_raw_parts_mut(ptr, len);
287				let _ = Box::from_raw(slice as *mut str);
288			}
289		}
290	}
291}
292
293// -----------------------------------------------------------------------
294// Clone
295// -----------------------------------------------------------------------
296
297impl Clone for Strand {
298	#[inline]
299	fn clone(&self) -> Self {
300		// SAFETY: We explicitly check the tag to see if it is Boxed.
301		// If it is, we perform a deep copy by allocating a new Box<str>.
302		// If it is Inline or Static, we can safely perform a bitwise copy because
303		// neither variant owns any heap allocations that need to be duplicated.
304		unsafe {
305			let tag = self.data.inline[23];
306			if tag == TAG_BOXED {
307				#[cold]
308				#[inline(never)]
309				fn cold_clone(s: &str) -> Strand {
310					Strand::from(Box::from(s))
311				}
312				cold_clone(self.as_str())
313			} else {
314				// For Inline and Static, it's just a bitwise copy
315				std::ptr::read(self as *const Strand)
316			}
317		}
318	}
319}
320
321// -----------------------------------------------------------------------
322// Default / Deref / AsRef / Borrow
323// -----------------------------------------------------------------------
324
325impl Default for Strand {
326	#[inline]
327	fn default() -> Self {
328		Self {
329			data: StrandData {
330				inline: [0u8; 24],
331			},
332		}
333	}
334}
335
336impl Deref for Strand {
337	type Target = str;
338	#[inline]
339	fn deref(&self) -> &str {
340		self.as_str()
341	}
342}
343
344impl AsRef<str> for Strand {
345	#[inline]
346	fn as_ref(&self) -> &str {
347		self.as_str()
348	}
349}
350
351impl Borrow<str> for Strand {
352	#[inline]
353	fn borrow(&self) -> &str {
354		self.as_str()
355	}
356}
357
358// -----------------------------------------------------------------------
359// Construction conversions
360// -----------------------------------------------------------------------
361
362impl From<&str> for Strand {
363	#[inline]
364	fn from(s: &str) -> Self {
365		Self::new(s)
366	}
367}
368
369impl From<String> for Strand {
370	#[inline]
371	fn from(s: String) -> Self {
372		if s.len() <= INLINE_CAP {
373			Self::new_inline(&s)
374		} else {
375			Self::from(s.into_boxed_str())
376		}
377	}
378}
379
380impl From<&String> for Strand {
381	#[inline]
382	fn from(s: &String) -> Self {
383		Self::new(s.as_str())
384	}
385}
386
387impl From<Box<str>> for Strand {
388	#[inline]
389	fn from(s: Box<str>) -> Self {
390		if s.len() <= INLINE_CAP {
391			Self::new_inline(&s)
392		} else {
393			let ptr = s.as_ptr();
394			let len = s.len();
395			std::mem::forget(s);
396			Self {
397				data: StrandData {
398					heap: ManuallyDrop::new(HeapData {
399						ptr,
400						len,
401						_pad: [0; HEAP_PAD_LEN],
402						tag: TAG_BOXED,
403					}),
404				},
405			}
406		}
407	}
408}
409
410impl From<Strand> for String {
411	#[inline]
412	fn from(s: Strand) -> String {
413		s.as_str().to_owned()
414	}
415}
416
417impl From<&Strand> for String {
418	#[inline]
419	fn from(s: &Strand) -> String {
420		s.as_str().to_owned()
421	}
422}
423
424// -----------------------------------------------------------------------
425// Equality / ordering / hashing
426// -----------------------------------------------------------------------
427
428impl Eq for Strand {}
429
430impl PartialEq for Strand {
431	#[inline]
432	fn eq(&self, other: &Self) -> bool {
433		// SAFETY: We only compare the arrays directly if both tags are <= INLINE_CAP.
434		// When an inline string is created in `new_inline`, the entire 24-byte array
435		// is zero-initialized before the string data is copied into it.
436		// Therefore, any unused padding bytes are guaranteed to be zero, making a
437		// direct byte-for-byte comparison of the full 24-byte array safe and correct.
438		unsafe {
439			// Get the strand tags
440			let tag_a = self.data.inline[23];
441			let tag_b = other.data.inline[23];
442			// Fast path: Both are Inline strings
443			if tag_a <= INLINE_CAP as u8 && tag_b <= INLINE_CAP as u8 {
444				// We can compare the 24-byte arrays directly
445				return self.data.inline == other.data.inline;
446			}
447		}
448		// Slow path: Types are different
449		self.as_str() == other.as_str()
450	}
451}
452
453impl PartialEq<str> for Strand {
454	#[inline]
455	fn eq(&self, other: &str) -> bool {
456		self.as_str() == other
457	}
458}
459
460impl PartialEq<&str> for Strand {
461	#[inline]
462	fn eq(&self, other: &&str) -> bool {
463		self.as_str() == *other
464	}
465}
466
467impl PartialEq<String> for Strand {
468	#[inline]
469	fn eq(&self, other: &String) -> bool {
470		self.as_str() == other.as_str()
471	}
472}
473
474impl Ord for Strand {
475	#[inline]
476	fn cmp(&self, other: &Self) -> Ordering {
477		// SAFETY: We only extract slices from the inline array if both tags are <= INLINE_CAP.
478		// We use the tag as the exact length of the valid string data, ensuring we don't
479		// compare any unused padding bytes which could interfere with lexicographical ordering.
480		unsafe {
481			// Get the strand tags
482			let tag_a = self.data.inline[23];
483			let tag_b = other.data.inline[23];
484			// Fast path: Both are Inline strings
485			if tag_a <= INLINE_CAP as u8 && tag_b <= INLINE_CAP as u8 {
486				// Get the lengths of the strings
487				let len_a = tag_a as usize;
488				let len_b = tag_b as usize;
489				// For ordering, we must compare the valid bytes exactly because
490				// the padding bytes might interfere with lexicographical ordering.
491				let slice_a = std::slice::from_raw_parts(self.data.inline.as_ptr(), len_a);
492				let slice_b = std::slice::from_raw_parts(other.data.inline.as_ptr(), len_b);
493				// Compare the strings
494				return slice_a.cmp(slice_b);
495			}
496		}
497		// Slow path: Types are different
498		self.as_str().cmp(other.as_str())
499	}
500}
501
502impl PartialOrd for Strand {
503	#[inline]
504	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
505		Some(self.cmp(other))
506	}
507}
508
509impl PartialOrd<str> for Strand {
510	#[inline]
511	fn partial_cmp(&self, other: &str) -> Option<Ordering> {
512		self.as_str().partial_cmp(other)
513	}
514}
515
516impl PartialOrd<String> for Strand {
517	#[inline]
518	fn partial_cmp(&self, other: &String) -> Option<Ordering> {
519		self.as_str().partial_cmp(other.as_str())
520	}
521}
522
523impl Hash for Strand {
524	#[inline]
525	fn hash<H: Hasher>(&self, state: &mut H) {
526		self.as_str().hash(state)
527	}
528}
529
530impl Debug for Strand {
531	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
532		Debug::fmt(self.as_str(), f)
533	}
534}
535
536impl Display for Strand {
537	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
538		Display::fmt(self.as_str(), f)
539	}
540}
541
542// -----------------------------------------------------------------------
543// serde
544// -----------------------------------------------------------------------
545
546impl Serialize for Strand {
547	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
548	where
549		S: Serializer,
550	{
551		serializer.serialize_str(self.as_str())
552	}
553}
554
555impl<'de> Deserialize<'de> for Strand {
556	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
557	where
558		D: Deserializer<'de>,
559	{
560		struct StrandVisitor;
561
562		impl<'de> Visitor<'de> for StrandVisitor {
563			type Value = Strand;
564
565			fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
566				f.write_str("a string")
567			}
568
569			fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
570				Ok(Strand::from(v))
571			}
572
573			fn visit_string<E: de::Error>(self, v: String) -> Result<Self::Value, E> {
574				Ok(Strand::from(v))
575			}
576		}
577
578		deserializer.deserialize_str(StrandVisitor)
579	}
580}
581
582// -----------------------------------------------------------------------
583// revision
584// -----------------------------------------------------------------------
585
586impl Revisioned for Strand {
587	#[inline]
588	fn revision() -> u16 {
589		1
590	}
591}
592
593impl SerializeRevisioned for Strand {
594	#[inline]
595	fn serialize_revisioned<W: std::io::Write>(&self, writer: &mut W) -> Result<(), Error> {
596		self.as_str().serialize_revisioned(writer)
597	}
598}
599
600impl DeserializeRevisioned for Strand {
601	#[inline]
602	fn deserialize_revisioned<R: std::io::Read>(reader: &mut R) -> Result<Self, Error> {
603		let len = usize::deserialize_revisioned(reader)?;
604		if len == 0 {
605			return Ok(Self::default());
606		}
607		if len <= INLINE_CAP {
608			let mut inline = [0u8; 24];
609			reader.read_exact(&mut inline[..len]).map_err(Error::Io)?;
610			std::str::from_utf8(&inline[..len]).map_err(Error::Utf8Error)?;
611			inline[23] = len as u8;
612			return Ok(Strand {
613				data: StrandData {
614					inline,
615				},
616			});
617		}
618		let mut buf = vec![0u8; len];
619		reader.read_exact(&mut buf).map_err(Error::Io)?;
620		let s = String::from_utf8(buf).map_err(|e| Error::Utf8Error(e.utf8_error()))?;
621		Ok(Strand::from(s.into_boxed_str()))
622	}
623}
624
625impl revision::SkipRevisioned for Strand {
626	#[inline]
627	fn skip_revisioned<R: std::io::Read>(reader: &mut R) -> Result<(), Error> {
628		<String as revision::SkipRevisioned>::skip_revisioned(reader)
629	}
630
631	#[inline]
632	fn skip_revisioned_slice(reader: &mut revision::SliceReader<'_>) -> Result<(), Error> {
633		<String as revision::SkipRevisioned>::skip_revisioned_slice(reader)
634	}
635}
636
637impl revision::WalkRevisioned for Strand {
638	type Walker<'r, R: revision::BorrowedReader + 'r> = revision::LeafWalker<'r, Strand, R>;
639
640	#[inline]
641	fn walk_revisioned<'r, R: revision::BorrowedReader>(
642		reader: &'r mut R,
643	) -> Result<Self::Walker<'r, R>, Error> {
644		Ok(revision::LeafWalker::new(reader))
645	}
646}
647
648impl revision::LengthPrefixedBytes for Strand {}
649
650// -----------------------------------------------------------------------
651// storekey
652// -----------------------------------------------------------------------
653
654impl<F> storekey::Encode<F> for Strand {
655	#[inline]
656	fn encode<W: std::io::Write>(
657		&self,
658		writer: &mut storekey::Writer<W>,
659	) -> Result<(), storekey::EncodeError> {
660		<str as storekey::Encode<F>>::encode(self.as_str(), writer)
661	}
662}
663
664impl<'de, F> storekey::BorrowDecode<'de, F> for Strand {
665	#[inline]
666	fn borrow_decode(
667		reader: &mut storekey::BorrowReader<'de>,
668	) -> Result<Self, storekey::DecodeError> {
669		let cow = reader.read_str_cow()?;
670		let s: &str = &cow;
671		Ok(if s.len() <= INLINE_CAP {
672			Self::new_inline(s)
673		} else {
674			Self::from(Box::from(s))
675		})
676	}
677}
678
679impl<F> storekey::Decode<F> for Strand {
680	#[inline]
681	fn decode<R: std::io::BufRead>(
682		reader: &mut storekey::Reader<R>,
683	) -> Result<Self, storekey::DecodeError> {
684		let bytes = reader.read_vec()?;
685		let s = std::str::from_utf8(&bytes).map_err(|_| storekey::DecodeError::Utf8)?;
686		Ok(if s.len() <= INLINE_CAP {
687			Self::new_inline(s)
688		} else {
689			Self::from(Box::from(s))
690		})
691	}
692}
693
694// -----------------------------------------------------------------------
695// arbitrary
696// -----------------------------------------------------------------------
697
698#[cfg(feature = "arbitrary")]
699impl<'a> arbitrary::Arbitrary<'a> for Strand {
700	#[inline]
701	fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
702		let s = <&str as arbitrary::Arbitrary<'a>>::arbitrary(u)?;
703		Ok(Strand::from(s))
704	}
705
706	#[inline]
707	fn size_hint(depth: usize) -> (usize, Option<usize>) {
708		<&str as arbitrary::Arbitrary<'a>>::size_hint(depth)
709	}
710}
711
712#[cfg(test)]
713mod tests {
714	use super::*;
715
716	const SHORT: &str = "hello";
717	/// 23 bytes — the inline boundary, exactly fills the inline buffer.
718	const AT_CAP: &str = "abcdefghijklmnopqrstuvw";
719	const LONG: &str = "this string is intentionally much longer than twenty three bytes so it must live on the heap";
720
721	// --- layout ---------------------------------------------------------
722
723	#[test]
724	fn stack_size_is_24_bytes() {
725		use std::mem::size_of;
726		// Note: Strand is always 24 bytes.
727		// String is 24 bytes on 64-bit
728		// String is 12 bytes on 32-bit.
729		assert_eq!(size_of::<Strand>(), 24);
730	}
731
732	// --- basic construction --------------------------------------------
733
734	#[test]
735	fn short_strings_are_inline() {
736		let s = Strand::from(SHORT);
737		assert!(s.is_inline());
738		assert_eq!(s.as_str(), SHORT);
739	}
740
741	#[test]
742	fn long_strings_are_boxed() {
743		let s = Strand::from(LONG);
744		assert!(!s.is_inline());
745		assert!(!s.is_static());
746		assert!(s.is_boxed());
747		assert_eq!(s.as_str(), LONG);
748	}
749
750	#[test]
751	fn inline_boundary() {
752		assert_eq!(AT_CAP.len(), INLINE_CAP);
753		assert!(Strand::from(AT_CAP).is_inline());
754
755		let over: String = "a".repeat(INLINE_CAP + 1);
756		assert!(!Strand::from(over.as_str()).is_inline());
757	}
758
759	#[test]
760	fn empty_is_inline() {
761		let s = Strand::from("");
762		assert!(s.is_inline());
763		assert!(s.is_empty());
764		assert_eq!(s.as_str(), "");
765	}
766
767	#[test]
768	fn default_is_empty_inline() {
769		let s = Strand::default();
770		assert!(s.is_inline());
771		assert!(s.is_empty());
772	}
773
774	// --- clone & drop --------------------------------------------------
775
776	#[test]
777	fn inline_clone_is_independent_copy() {
778		let s = Strand::from(SHORT);
779		let t = s.clone();
780		assert_eq!(s.as_str(), t.as_str());
781		// Both are inline; drop one and the other must still be valid.
782		drop(s);
783		assert_eq!(t.as_str(), SHORT);
784	}
785
786	#[test]
787	fn boxed_clone_is_deep_copy() {
788		let s = Strand::from(LONG);
789		assert!(s.is_boxed());
790		let t = s.clone();
791		assert_eq!(s.as_str(), t.as_str());
792		// Each `Boxed` clone owns its own allocation; the byte
793		// pointers must differ even though the contents are equal.
794		assert_ne!(s.as_str().as_ptr(), t.as_str().as_ptr());
795		// Drop the original — the clone must still be fully valid.
796		drop(s);
797		assert_eq!(t.as_str(), LONG);
798	}
799
800	// --- static --------------------------------------------------------
801
802	#[test]
803	fn new_static_is_static() {
804		let s = Strand::new_static("foo");
805		assert!(s.is_static());
806		assert!(!s.is_inline());
807		assert!(!s.is_boxed());
808		assert_eq!(s.as_str(), "foo");
809	}
810
811	#[test]
812	fn new_static_long_is_still_static() {
813		// Longer than `INLINE_CAP`: the key benefit of `Static` is
814		// that it skips the allocation regardless of length.
815		let s = Strand::new_static(LONG);
816		assert!(s.len() > INLINE_CAP);
817		assert!(s.is_static());
818		assert!(!s.is_boxed());
819		assert_eq!(s.as_str(), LONG);
820	}
821
822	#[test]
823	fn new_static_is_const() {
824		// The whole point of the variant: `const` construction,
825		// including for strings longer than `INLINE_CAP`.
826		const SHORT_STATIC: Strand = Strand::new_static("foo");
827		const LONG_STATIC: Strand =
828			Strand::new_static("this literal is longer than INLINE_CAP but costs nothing");
829		assert_eq!(SHORT_STATIC.as_str(), "foo");
830		assert!(LONG_STATIC.as_str().len() > INLINE_CAP);
831	}
832
833	#[test]
834	fn static_clone_is_pointer_copy() {
835		let original = "some compile-time string";
836		let s = Strand::new_static(original);
837		let t = s.clone();
838		// Both clones must point at the exact same backing bytes as
839		// the original literal — no allocation, no copy.
840		assert_eq!(s.as_str().as_ptr(), original.as_ptr());
841		assert_eq!(t.as_str().as_ptr(), original.as_ptr());
842	}
843
844	// --- semantics -----------------------------------------------------
845
846	#[test]
847	fn cross_repr_equality() {
848		// All three variants holding the same bytes must compare
849		// equal to each other.
850		let inline = Strand::from("abc");
851		let stat = Strand::new_static("abc");
852		let boxed = Strand::from("a".repeat(INLINE_CAP + 1));
853		let boxed2 = Strand::from(boxed.as_str());
854		assert!(inline.is_inline());
855		assert!(stat.is_static());
856		assert!(boxed.is_boxed());
857		assert_eq!(inline, stat);
858		assert_eq!(stat, inline);
859		assert_eq!(boxed, boxed2);
860	}
861
862	#[test]
863	fn ord_is_lexicographic() {
864		let a = Strand::from("apple");
865		let b = Strand::from("banana");
866		assert!(a < b);
867	}
868
869	#[test]
870	fn hashing_works_as_map_key() {
871		use std::collections::HashMap;
872		let mut m = HashMap::new();
873		m.insert(Strand::from("k"), 1);
874		assert_eq!(m.get("k"), Some(&1));
875	}
876
877	#[test]
878	fn roundtrip_revisioned() {
879		let s = Strand::from("round trip");
880		let mut bytes = Vec::new();
881		s.serialize_revisioned(&mut bytes).unwrap();
882		let back = Strand::deserialize_revisioned(&mut bytes.as_slice()).unwrap();
883		assert_eq!(s, back);
884	}
885
886	#[test]
887	fn roundtrip_long_heap() {
888		let s = Strand::from(LONG);
889		let mut bytes = Vec::new();
890		s.serialize_revisioned(&mut bytes).unwrap();
891		let back = Strand::deserialize_revisioned(&mut bytes.as_slice()).unwrap();
892		assert_eq!(s, back);
893		assert!(!back.is_inline());
894	}
895
896	// --- revisioned edge cases ----------------------------------------
897	//
898	// These exercise the in-place `Box<str>` decode path and the
899	// inline-buffer decode path, which bypass `String::deserialize_revisioned`
900	// entirely and must cover the same cases the generic `String` impl
901	// used to.
902
903	fn roundtrip_revisioned_for(s: &str, expect_inline: bool) {
904		let strand = Strand::from(s);
905		let mut bytes = Vec::new();
906		strand.serialize_revisioned(&mut bytes).unwrap();
907		let back = Strand::deserialize_revisioned(&mut bytes.as_slice()).unwrap();
908		assert_eq!(back.as_str(), s);
909		assert_eq!(back.is_inline(), expect_inline);
910	}
911
912	#[test]
913	fn roundtrip_revisioned_empty() {
914		roundtrip_revisioned_for("", true);
915	}
916
917	#[test]
918	fn roundtrip_revisioned_at_inline_cap() {
919		roundtrip_revisioned_for(AT_CAP, true);
920		assert_eq!(AT_CAP.len(), INLINE_CAP);
921	}
922
923	#[test]
924	fn roundtrip_revisioned_one_over_inline_cap() {
925		let over: String = "a".repeat(INLINE_CAP + 1);
926		roundtrip_revisioned_for(&over, false);
927	}
928
929	/// Multi-byte UTF-8 that straddles the inline/heap boundary — the
930	/// in-place decode must not be fooled by a codepoint whose UTF-8
931	/// length is not 1, and must validate UTF-8 even when the bytes
932	/// are written directly into a freshly allocated `Vec<u8>`.
933	#[test]
934	fn roundtrip_revisioned_utf8_heap() {
935		let s = "δοκιμή αξιολόγησης κειμένου με πολυβυτικούς χαρακτήρες";
936		assert!(s.len() > INLINE_CAP);
937		roundtrip_revisioned_for(s, false);
938	}
939
940	#[test]
941	fn deserialize_revisioned_rejects_invalid_utf8() {
942		// Manually craft a payload that claims 2 bytes of payload but
943		// provides invalid UTF-8 (a lone continuation byte).
944		let mut bytes = Vec::new();
945		2usize.serialize_revisioned(&mut bytes).unwrap();
946		bytes.push(0xFF);
947		bytes.push(0xFE);
948		assert!(Strand::deserialize_revisioned(&mut bytes.as_slice()).is_err());
949	}
950
951	// --- storekey round-trips -----------------------------------------
952
953	fn roundtrip_storekey_for(s: &str, expect_inline: bool) {
954		use storekey::{BorrowDecode, Decode, Encode};
955
956		let strand = Strand::from(s);
957		// Encode via storekey.
958		let mut buf = Vec::new();
959		let mut w = storekey::Writer::new(&mut buf);
960		<Strand as Encode<()>>::encode(&strand, &mut w).unwrap();
961
962		// BorrowDecode path.
963		{
964			let mut r = storekey::BorrowReader::new(&buf);
965			let back = <Strand as BorrowDecode<'_, ()>>::borrow_decode(&mut r).unwrap();
966			assert_eq!(back.as_str(), s);
967			assert_eq!(back.is_inline(), expect_inline);
968		}
969
970		// Streaming Decode path.
971		{
972			let mut r = storekey::Reader::new(buf.as_slice());
973			let back = <Strand as Decode<()>>::decode(&mut r).unwrap();
974			assert_eq!(back.as_str(), s);
975			assert_eq!(back.is_inline(), expect_inline);
976		}
977	}
978
979	#[test]
980	fn roundtrip_storekey_empty() {
981		roundtrip_storekey_for("", true);
982	}
983
984	#[test]
985	fn roundtrip_storekey_short() {
986		roundtrip_storekey_for(SHORT, true);
987	}
988
989	#[test]
990	fn roundtrip_storekey_at_inline_cap() {
991		roundtrip_storekey_for(AT_CAP, true);
992	}
993
994	#[test]
995	fn roundtrip_storekey_long() {
996		roundtrip_storekey_for(LONG, false);
997	}
998
999	/// Values that contain `0x00` and `0x01` bytes exercise the
1000	/// escape-aware decoder branch in `BorrowReader::read_str_cow`
1001	/// (which returns `Cow::Owned` instead of `Cow::Borrowed`).
1002	#[test]
1003	fn roundtrip_storekey_with_escape_bytes() {
1004		roundtrip_storekey_for("abc\0def\x01ghi", true);
1005		let long_with_escapes: String = format!("{}\0{}", "x".repeat(30), "y".repeat(30));
1006		roundtrip_storekey_for(&long_with_escapes, false);
1007	}
1008
1009	// --- wire-format compatibility with `String` ----------------------
1010	//
1011	// `Strand` is a drop-in replacement for `String` at both the
1012	// `revision` (on-disk, document/change-feed) and `storekey`
1013	// (index-key) layers. The entire value of the small-string
1014	// optimisation hinges on that being invisible from the wire
1015	// format's perspective — any byte-level divergence between
1016	// `Strand::serialize` and `String::serialize` for the same input
1017	// would silently break on-disk data on upgrade. These tests assert
1018	// both byte-identity and cross-type decode compatibility for
1019	// every edge case the earlier roundtrip tests touch.
1020
1021	/// Inputs that exercise every interesting structural case:
1022	/// - empty (length-prefix only, no payload);
1023	/// - one byte below, equal to, and one byte above `INLINE_CAP` (the inline/heap boundary only
1024	///   the `Strand` impl cares about; from the wire's perspective it should be invisible);
1025	/// - a long ASCII string (typical `LONG` payload);
1026	/// - multi-byte UTF-8 whose byte length straddles `INLINE_CAP` (guards against off-by-one in
1027	///   the boundary check or a codepoint-vs-byte confusion);
1028	/// - strings containing `0x00` and `0x01` bytes, which trigger the escape-aware branch in
1029	///   `storekey`'s writer/reader.
1030	fn wire_format_cases() -> Vec<String> {
1031		let at_cap_minus_one: String = "a".repeat(INLINE_CAP - 1);
1032		let at_cap: String = "a".repeat(INLINE_CAP);
1033		let at_cap_plus_one: String = "a".repeat(INLINE_CAP + 1);
1034		let utf8_heap = "δοκιμή αξιολόγησης κειμένου με πολυβυτικούς χαρακτήρες".to_owned();
1035		let escape_short = "abc\0def\x01ghi".to_owned();
1036		let escape_long = format!("{}\0{}", "x".repeat(30), "y".repeat(30));
1037		vec![
1038			String::new(),
1039			SHORT.to_owned(),
1040			at_cap_minus_one,
1041			at_cap,
1042			at_cap_plus_one,
1043			LONG.to_owned(),
1044			utf8_heap,
1045			escape_short,
1046			escape_long,
1047		]
1048	}
1049
1050	/// For every fixture, assert that `Strand` produces byte-identical
1051	/// `revisioned` output to `String`, and that both types can decode
1052	/// each other's bytes back to the original value.
1053	#[test]
1054	fn revisioned_wire_matches_string() {
1055		for input in wire_format_cases() {
1056			let strand = Strand::from(input.as_str());
1057
1058			let mut strand_bytes = Vec::new();
1059			strand.serialize_revisioned(&mut strand_bytes).unwrap();
1060
1061			let mut string_bytes = Vec::new();
1062			input.serialize_revisioned(&mut string_bytes).unwrap();
1063
1064			// (1) Byte-identical output.
1065			assert_eq!(
1066				strand_bytes, string_bytes,
1067				"Strand and String must produce identical revisioned bytes for {:?}",
1068				input
1069			);
1070
1071			// (2) `Strand` can decode bytes produced by `String`.
1072			let from_string_bytes =
1073				Strand::deserialize_revisioned(&mut string_bytes.as_slice()).unwrap();
1074			assert_eq!(from_string_bytes.as_str(), input);
1075
1076			// (3) `String` can decode bytes produced by `Strand`.
1077			let from_strand_bytes =
1078				String::deserialize_revisioned(&mut strand_bytes.as_slice()).unwrap();
1079			assert_eq!(from_strand_bytes, input);
1080		}
1081	}
1082
1083	/// [`revision::LengthPrefixedBytes`] enables [`revision::LeafWalker::with_bytes`] on
1084	/// slice-backed readers; payload bytes must match UTF-8 encoding of the strand.
1085	#[test]
1086	fn revision_leaf_walker_with_bytes_matches_strand_utf8() {
1087		use revision::{SerializeRevisioned, WalkRevisioned};
1088		let s = Strand::from("hello ρ");
1089		let mut buf = Vec::new();
1090		s.serialize_revisioned(&mut buf).unwrap();
1091		let mut r = buf.as_slice();
1092		let w = Strand::walk_revisioned(&mut r).unwrap();
1093		w.with_bytes(|bytes| assert_eq!(bytes, s.as_str().as_bytes())).unwrap();
1094	}
1095
1096	/// Same assertions for the `storekey` encoding, covering both the
1097	/// borrowed and streaming decode paths plus the cross-type decode.
1098	#[test]
1099	fn storekey_wire_matches_string() {
1100		use storekey::{BorrowDecode, Decode, Encode};
1101
1102		for input in wire_format_cases() {
1103			let strand = Strand::from(input.as_str());
1104
1105			let mut strand_bytes = Vec::new();
1106			{
1107				let mut w = storekey::Writer::new(&mut strand_bytes);
1108				<Strand as Encode<()>>::encode(&strand, &mut w).unwrap();
1109			}
1110
1111			let mut string_bytes = Vec::new();
1112			{
1113				let mut w = storekey::Writer::new(&mut string_bytes);
1114				<String as Encode<()>>::encode(&input, &mut w).unwrap();
1115			}
1116
1117			// (1) Byte-identical output — escape-aware encoder must
1118			// treat `Strand` exactly like `String`, so `0x00`/`0x01`
1119			// bytes come out escaped the same way.
1120			assert_eq!(
1121				strand_bytes, string_bytes,
1122				"Strand and String must produce identical storekey bytes for {:?}",
1123				input
1124			);
1125
1126			// (2) `Strand` (both BorrowDecode and Decode) can decode
1127			// bytes produced by `String`.
1128			{
1129				let mut r = storekey::BorrowReader::new(&string_bytes);
1130				let back = <Strand as BorrowDecode<'_, ()>>::borrow_decode(&mut r).unwrap();
1131				assert_eq!(back.as_str(), input);
1132			}
1133			{
1134				let mut r = storekey::Reader::new(string_bytes.as_slice());
1135				let back = <Strand as Decode<()>>::decode(&mut r).unwrap();
1136				assert_eq!(back.as_str(), input);
1137			}
1138
1139			// (3) `String` can decode bytes produced by `Strand`.
1140			{
1141				let mut r = storekey::BorrowReader::new(&strand_bytes);
1142				let back = <String as BorrowDecode<'_, ()>>::borrow_decode(&mut r).unwrap();
1143				assert_eq!(back, input);
1144			}
1145			{
1146				let mut r = storekey::Reader::new(strand_bytes.as_slice());
1147				let back = <String as Decode<()>>::decode(&mut r).unwrap();
1148				assert_eq!(back, input);
1149			}
1150		}
1151	}
1152}