arcstr/arc_str.rs
1#![allow(
2// We follow libstd's lead and prefer to define both.
3 clippy::partialeq_ne_impl,
4// This is a really annoying clippy lint, since it's required for so many cases...
5 clippy::cast_ptr_alignment,
6// For macros
7 clippy::redundant_slicing,
8)]
9use core::alloc::Layout;
10use core::mem::{align_of, size_of, MaybeUninit};
11use core::ptr::NonNull;
12#[cfg(not(all(loom, test)))]
13pub(crate) use core::sync::atomic::{AtomicUsize, Ordering};
14#[cfg(all(loom, test))]
15pub(crate) use loom::sync::atomic::{AtomicUsize, Ordering};
16
17#[cfg(feature = "substr")]
18use crate::Substr;
19use alloc::borrow::Cow;
20use alloc::boxed::Box;
21use alloc::string::String;
22
23/// A better atomically-reference counted string type.
24///
25/// ## Benefits of `ArcStr` over `Arc<str>`
26///
27/// - It's possible to create a const `ArcStr` from a literal via the
28/// [`arcstr::literal!`][crate::literal] macro. This is probably the killer
29/// feature, to be honest.
30///
31/// These "static" `ArcStr`s are zero cost, take no heap allocation, and don't
32/// even need to perform atomic reads/writes when being cloned or dropped (nor
33/// at any other time).
34///
35/// They even get stored in the read-only memory of your executable, which can
36/// be beneficial for performance and memory usage. (In theory your linker may
37/// even dedupe these for you, but usually not)
38///
39/// - `ArcStr`s from `arcstr::literal!` can be turned into `&'static str` safely
40/// at any time using [`ArcStr::as_static`]. (This returns an Option, which is
41/// `None` if the `ArcStr` was not static)
42///
43/// - This should be unsurprising given the literal functionality, but
44/// [`ArcStr::new`] is able to be a `const` function.
45///
46/// - `ArcStr` is thin, e.g. only a single pointer. Great for cases where you
47/// want to keep the data structure lightweight or need to do some FFI stuff
48/// with it.
49///
50/// - `ArcStr` is totally immutable. No need to lose sleep because you're afraid
51/// of code which thinks it has a right to mutate your `Arc`s just because it
52/// holds the only reference...
53///
54/// - Lower reference counting operations are lower overhead because we don't
55/// support `Weak` references. This can be a drawback for some use cases, but
56/// improves performance for the common case of no-weak-refs.
57///
58/// ## What does "zero-cost literals" mean?
59///
60/// In a few places I call the literal arcstrs "zero-cost". No overhead most
61/// accesses accesses (aside from stuff like `as_static` which obviously
62/// requires it). and it imposes a extra branch in both `clone` and `drop`.
63///
64/// This branch in `clone`/`drop` is not on the result of an atomic load, and is
65/// just a normal memory read. This is actually what allows literal/static
66/// `ArcStr`s to avoid needing to perform any atomic operations in those
67/// functions, which seems likely more than cover the cost.
68///
69/// (Additionally, it's almost certain that in the future we'll be able to
70/// reduce the synchronization required for atomic instructions. This is due to
71/// our guarantee of immutability and lack of support for `Weak`.)
72///
73/// # Usage
74///
75/// ## As a `const`
76///
77/// The big unique feature of `ArcStr` is the ability to create static/const
78/// `ArcStr`s. (See [the macro](crate::literal) docs or the [feature
79/// overview][feats]
80///
81/// [feats]: index.html#feature-overview
82///
83/// ```
84/// # use arcstr::ArcStr;
85/// const WOW: ArcStr = arcstr::literal!("cool robot!");
86/// assert_eq!(WOW, "cool robot!");
87/// ```
88///
89/// ## As a `str`
90///
91/// (This is not unique to `ArcStr`, but is a frequent source of confusion I've
92/// seen): `ArcStr` implements `Deref<Target = str>`, and so all functions and
93/// methods from `str` work on it, even though we don't expose them on `ArcStr`
94/// directly.
95///
96/// ```
97/// # use arcstr::ArcStr;
98/// let s = ArcStr::from("something");
99/// // These go through `Deref`, so they work even though
100/// // there is no `ArcStr::eq_ignore_ascii_case` function
101/// assert!(s.eq_ignore_ascii_case("SOMETHING"));
102/// ```
103///
104/// Additionally, `&ArcStr` can be passed to any function which accepts `&str`.
105/// For example:
106///
107/// ```
108/// # use arcstr::ArcStr;
109/// fn accepts_str(s: &str) {
110/// # let _ = s;
111/// // s...
112/// }
113///
114/// let test_str: ArcStr = "test".into();
115/// // This works even though `&test_str` is normally an `&ArcStr`
116/// accepts_str(&test_str);
117///
118/// // Of course, this works for functionality from the standard library as well.
119/// let test_but_loud = ArcStr::from("TEST");
120/// assert!(test_str.eq_ignore_ascii_case(&test_but_loud));
121/// ```
122
123#[repr(transparent)]
124pub struct ArcStr(NonNull<ThinInner>);
125
126unsafe impl Sync for ArcStr {}
127unsafe impl Send for ArcStr {}
128
129impl ArcStr {
130 /// Construct a new empty string.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// # use arcstr::ArcStr;
136 /// let s = ArcStr::new();
137 /// assert_eq!(s, "");
138 /// ```
139 #[inline]
140 pub const fn new() -> Self {
141 EMPTY
142 }
143
144 /// Attempt to copy the provided string into a newly allocated `ArcStr`, but
145 /// return `None` if we cannot allocate the required memory.
146 ///
147 /// # Examples
148 ///
149 /// ```
150 /// # use arcstr::ArcStr;
151 ///
152 /// # fn do_stuff_with(s: ArcStr) {}
153 ///
154 /// let some_big_str = "please pretend this is a very long string";
155 /// if let Some(s) = ArcStr::try_alloc(some_big_str) {
156 /// do_stuff_with(s);
157 /// } else {
158 /// // Complain about allocation failure, somehow.
159 /// }
160 /// ```
161 #[inline]
162 pub fn try_alloc(copy_from: &str) -> Option<Self> {
163 if let Ok(inner) = ThinInner::try_allocate(copy_from, false) {
164 Some(Self(inner))
165 } else {
166 None
167 }
168 }
169
170 /// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the
171 /// provided callback to fully initialize the provided buffer with valid
172 /// UTF-8 text.
173 ///
174 /// This function returns `None` if memory allocation fails, see
175 /// [`ArcStr::init_with_unchecked`] for a version which calls
176 /// [`handle_alloc_error`](alloc::alloc::handle_alloc_error).
177 ///
178 /// # Safety
179 /// The provided `initializer` callback must fully initialize the provided
180 /// buffer with valid UTF-8 text.
181 ///
182 /// # Examples
183 ///
184 /// ```
185 /// # use arcstr::ArcStr;
186 /// # use core::mem::MaybeUninit;
187 /// let arcstr = unsafe {
188 /// ArcStr::try_init_with_unchecked(10, |s: &mut [MaybeUninit<u8>]| {
189 /// s.fill(MaybeUninit::new(b'a'));
190 /// }).unwrap()
191 /// };
192 /// assert_eq!(arcstr, "aaaaaaaaaa")
193 /// ```
194 #[inline]
195 pub unsafe fn try_init_with_unchecked<F>(n: usize, initializer: F) -> Option<Self>
196 where
197 F: FnOnce(&mut [MaybeUninit<u8>]),
198 {
199 if let Ok(inner) = ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) {
200 Some(Self(inner))
201 } else {
202 None
203 }
204 }
205
206 /// Allocate memory for an [`ArcStr`] of length `n`, and use the provided
207 /// callback to fully initialize the provided buffer with valid UTF-8 text.
208 ///
209 /// This function calls
210 /// [`handle_alloc_error`](alloc::alloc::handle_alloc_error) if memory
211 /// allocation fails, see [`ArcStr::try_init_with_unchecked`] for a version
212 /// which returns `None`
213 ///
214 /// # Safety
215 /// The provided `initializer` callback must fully initialize the provided
216 /// buffer with valid UTF-8 text.
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// # use arcstr::ArcStr;
222 /// # use core::mem::MaybeUninit;
223 /// let arcstr = unsafe {
224 /// ArcStr::init_with_unchecked(10, |s: &mut [MaybeUninit<u8>]| {
225 /// s.fill(MaybeUninit::new(b'a'));
226 /// })
227 /// };
228 /// assert_eq!(arcstr, "aaaaaaaaaa")
229 /// ```
230 #[inline]
231 pub unsafe fn init_with_unchecked<F>(n: usize, initializer: F) -> Self
232 where
233 F: FnOnce(&mut [MaybeUninit<u8>]),
234 {
235 match ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) {
236 Ok(inner) => Self(inner),
237 Err(None) => panic!("capacity overflow"),
238 Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout),
239 }
240 }
241
242 /// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the
243 /// provided callback to initialize the provided (initially-zeroed) buffer
244 /// with valid UTF-8 text.
245 ///
246 /// Note: This function is provided with a zeroed buffer, and performs UTF-8
247 /// validation after calling the initializer. While both of these are fast
248 /// operations, some high-performance use cases will be better off using
249 /// [`ArcStr::try_init_with_unchecked`] as the building block.
250 ///
251 /// # Errors
252 /// The provided `initializer` callback must initialize the provided buffer
253 /// with valid UTF-8 text, or a UTF-8 error will be returned.
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// # use arcstr::ArcStr;
259 ///
260 /// let s = ArcStr::init_with(5, |slice| {
261 /// slice
262 /// .iter_mut()
263 /// .zip(b'0'..b'5')
264 /// .for_each(|(db, sb)| *db = sb);
265 /// }).unwrap();
266 /// assert_eq!(s, "01234");
267 /// ```
268 #[inline]
269 pub fn init_with<F>(n: usize, initializer: F) -> Result<Self, core::str::Utf8Error>
270 where
271 F: FnOnce(&mut [u8]),
272 {
273 let mut failed = None::<core::str::Utf8Error>;
274 let wrapper = |zeroed_slice: &mut [MaybeUninit<u8>]| {
275 debug_assert_eq!(n, zeroed_slice.len());
276 // Safety: we pass `AllocInit::Zero`, so this is actually initialized
277 let slice = unsafe {
278 core::slice::from_raw_parts_mut(zeroed_slice.as_mut_ptr().cast::<u8>(), n)
279 };
280 initializer(slice);
281 if let Err(e) = core::str::from_utf8(slice) {
282 failed = Some(e);
283 }
284 };
285 match unsafe { ThinInner::try_allocate_with(n, false, AllocInit::Zero, wrapper) } {
286 Ok(inner) => {
287 // Ensure we clean up the allocation even on error.
288 let this = Self(inner);
289 if let Some(e) = failed {
290 Err(e)
291 } else {
292 Ok(this)
293 }
294 }
295 Err(None) => panic!("capacity overflow"),
296 Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout),
297 }
298 }
299
300 /// Extract a string slice containing our data.
301 ///
302 /// Note: This is an equivalent to our `Deref` implementation, but can be
303 /// more readable than `&*s` in the cases where a manual invocation of
304 /// `Deref` would be required.
305 ///
306 /// # Examples
307 // TODO: find a better example where `&*` would have been required.
308 /// ```
309 /// # use arcstr::ArcStr;
310 /// let s = ArcStr::from("abc");
311 /// assert_eq!(s.as_str(), "abc");
312 /// ```
313 #[inline]
314 pub fn as_str(&self) -> &str {
315 self
316 }
317
318 /// Returns the length of this `ArcStr` in bytes.
319 ///
320 /// # Examples
321 ///
322 /// ```
323 /// # use arcstr::ArcStr;
324 /// let a = ArcStr::from("foo");
325 /// assert_eq!(a.len(), 3);
326 /// ```
327 #[inline]
328 pub fn len(&self) -> usize {
329 self.get_inner_len_flag().uint_part()
330 }
331
332 #[inline]
333 fn get_inner_len_flag(&self) -> PackedFlagUint {
334 unsafe { ThinInner::get_len_flag(self.0.as_ptr()) }
335 }
336
337 /// Returns true if this `ArcStr` is empty.
338 ///
339 /// # Examples
340 ///
341 /// ```
342 /// # use arcstr::ArcStr;
343 /// assert!(!ArcStr::from("foo").is_empty());
344 /// assert!(ArcStr::new().is_empty());
345 /// ```
346 #[inline]
347 pub fn is_empty(&self) -> bool {
348 self.len() == 0
349 }
350
351 /// Convert us to a `std::string::String`.
352 ///
353 /// This is provided as an inherent method to avoid needing to route through
354 /// the `Display` machinery, but is equivalent to `ToString::to_string`.
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// # use arcstr::ArcStr;
360 /// let s = ArcStr::from("abc");
361 /// assert_eq!(s.to_string(), "abc");
362 /// ```
363 #[inline]
364 #[allow(clippy::inherent_to_string_shadow_display)]
365 pub fn to_string(&self) -> String {
366 #[cfg(not(feature = "std"))]
367 use alloc::borrow::ToOwned;
368 self.as_str().to_owned()
369 }
370
371 /// Extract a byte slice containing the string's data.
372 ///
373 /// # Examples
374 ///
375 /// ```
376 /// # use arcstr::ArcStr;
377 /// let foobar = ArcStr::from("foobar");
378 /// assert_eq!(foobar.as_bytes(), b"foobar");
379 /// ```
380 #[inline]
381 pub fn as_bytes(&self) -> &[u8] {
382 let len = self.len();
383 let p = self.0.as_ptr();
384 unsafe {
385 let data = p.cast::<u8>().add(OFFSET_DATA);
386 debug_assert_eq!(core::ptr::addr_of!((*p).data).cast::<u8>(), data);
387 core::slice::from_raw_parts(data, len)
388 }
389 }
390
391 /// Return the raw pointer this `ArcStr` wraps, for advanced use cases.
392 ///
393 /// Note that in addition to the `NonNull` constraint expressed in the type
394 /// signature, we also guarantee the pointer has an alignment of at least 8
395 /// bytes, even on platforms where a lower alignment would be acceptable.
396 ///
397 /// # Examples
398 ///
399 /// ```
400 /// # use arcstr::ArcStr;
401 /// let s = ArcStr::from("abcd");
402 /// let p = ArcStr::into_raw(s);
403 /// // Some time later...
404 /// let s = unsafe { ArcStr::from_raw(p) };
405 /// assert_eq!(s, "abcd");
406 /// ```
407 #[inline]
408 pub fn into_raw(this: Self) -> NonNull<()> {
409 let p = this.0;
410 core::mem::forget(this);
411 p.cast()
412 }
413
414 /// The opposite version of [`Self::into_raw`]. Still intended only for
415 /// advanced use cases.
416 ///
417 /// # Safety
418 ///
419 /// This function must be used on a valid pointer returned from
420 /// [`ArcStr::into_raw`]. Additionally, you must ensure that a given `ArcStr`
421 /// instance is only dropped once.
422 ///
423 /// # Examples
424 ///
425 /// ```
426 /// # use arcstr::ArcStr;
427 /// let s = ArcStr::from("abcd");
428 /// let p = ArcStr::into_raw(s);
429 /// // Some time later...
430 /// let s = unsafe { ArcStr::from_raw(p) };
431 /// assert_eq!(s, "abcd");
432 /// ```
433 #[inline]
434 pub unsafe fn from_raw(ptr: NonNull<()>) -> Self {
435 Self(ptr.cast())
436 }
437
438 /// Returns true if the two `ArcStr`s point to the same allocation.
439 ///
440 /// Note that functions like `PartialEq` check this already, so there's
441 /// no performance benefit to doing something like `ArcStr::ptr_eq(&a1, &a2) || (a1 == a2)`.
442 ///
443 /// Caveat: `const`s aren't guaranteed to only occur in an executable a
444 /// single time, and so this may be non-deterministic for `ArcStr` defined
445 /// in a `const` with [`arcstr::literal!`][crate::literal], unless one
446 /// was created by a `clone()` on the other.
447 ///
448 /// # Examples
449 ///
450 /// ```
451 /// use arcstr::ArcStr;
452 ///
453 /// let foobar = ArcStr::from("foobar");
454 /// let same_foobar = foobar.clone();
455 /// let other_foobar = ArcStr::from("foobar");
456 /// assert!(ArcStr::ptr_eq(&foobar, &same_foobar));
457 /// assert!(!ArcStr::ptr_eq(&foobar, &other_foobar));
458 ///
459 /// const YET_AGAIN_A_DIFFERENT_FOOBAR: ArcStr = arcstr::literal!("foobar");
460 /// let strange_new_foobar = YET_AGAIN_A_DIFFERENT_FOOBAR.clone();
461 /// let wild_blue_foobar = strange_new_foobar.clone();
462 /// assert!(ArcStr::ptr_eq(&strange_new_foobar, &wild_blue_foobar));
463 /// ```
464 #[inline]
465 pub fn ptr_eq(lhs: &Self, rhs: &Self) -> bool {
466 core::ptr::eq(lhs.0.as_ptr(), rhs.0.as_ptr())
467 }
468
469 /// Returns the number of references that exist to this `ArcStr`. If this is
470 /// a static `ArcStr` (For example, one from
471 /// [`arcstr::literal!`][crate::literal]), returns `None`.
472 ///
473 /// Despite the difference in return type, this is named to match the method
474 /// from the stdlib's Arc:
475 /// [`Arc::strong_count`][alloc::sync::Arc::strong_count].
476 ///
477 /// If you aren't sure how to handle static `ArcStr` in the context of this
478 /// return value, `ArcStr::strong_count(&s).unwrap_or(usize::MAX)` is
479 /// frequently reasonable.
480 ///
481 /// # Safety
482 ///
483 /// This method by itself is safe, but using it correctly requires extra
484 /// care. Another thread can change the strong count at any time, including
485 /// potentially between calling this method and acting on the result.
486 ///
487 /// However, it may never change from `None` to `Some` or from `Some` to
488 /// `None` for a given `ArcStr` — whether or not it is static is determined
489 /// at construction, and never changes.
490 ///
491 /// # Examples
492 ///
493 /// ### Dynamic ArcStr
494 /// ```
495 /// # use arcstr::ArcStr;
496 /// let foobar = ArcStr::from("foobar");
497 /// assert_eq!(Some(1), ArcStr::strong_count(&foobar));
498 /// let also_foobar = ArcStr::clone(&foobar);
499 /// assert_eq!(Some(2), ArcStr::strong_count(&foobar));
500 /// assert_eq!(Some(2), ArcStr::strong_count(&also_foobar));
501 /// ```
502 ///
503 /// ### Static ArcStr
504 /// ```
505 /// # use arcstr::ArcStr;
506 /// let baz = arcstr::literal!("baz");
507 /// assert_eq!(None, ArcStr::strong_count(&baz));
508 /// // Similarly:
509 /// assert_eq!(None, ArcStr::strong_count(&ArcStr::default()));
510 /// ```
511 #[inline]
512 pub fn strong_count(this: &Self) -> Option<usize> {
513 let cf = Self::load_count_flag(this, Ordering::Acquire)?;
514 if cf.flag_part() {
515 None
516 } else {
517 Some(cf.uint_part())
518 }
519 }
520
521 /// Safety: Unsafe to use `this` is stored in static memory (check
522 /// `Self::has_static_lenflag`)
523 #[inline]
524 unsafe fn load_count_flag_raw(this: &Self, ord_if_needed: Ordering) -> PackedFlagUint {
525 PackedFlagUint::from_encoded((*this.0.as_ptr()).count_flag.load(ord_if_needed))
526 }
527
528 #[inline]
529 fn load_count_flag(this: &Self, ord_if_needed: Ordering) -> Option<PackedFlagUint> {
530 if Self::has_static_lenflag(this) {
531 None
532 } else {
533 let count_and_flag = PackedFlagUint::from_encoded(unsafe {
534 (*this.0.as_ptr()).count_flag.load(ord_if_needed)
535 });
536 Some(count_and_flag)
537 }
538 }
539
540 /// Convert the `ArcStr` into a "static" `ArcStr`, even if it was originally
541 /// created from runtime values. The `&'static str` is returned.
542 ///
543 /// This is useful if you want to use [`ArcStr::as_static`] or
544 /// [`ArcStr::is_static`] on a value only known at runtime.
545 ///
546 /// If the `ArcStr` is already static, then this is a noop.
547 ///
548 /// # Caveats
549 /// Calling this function on an ArcStr will cause us to never free it, thus
550 /// leaking it's memory. Doing this excessively can lead to problems.
551 ///
552 /// # Examples
553 /// ```no_run
554 /// # // This isn't run because it needs a leakcheck suppression,
555 /// # // which I can't seem to make work in CI (no symbols for
556 /// # // doctests?). Instead, we test this in tests/arc_str.rs
557 /// # use arcstr::ArcStr;
558 /// let s = ArcStr::from("foobar");
559 /// assert!(!ArcStr::is_static(&s));
560 /// assert!(ArcStr::as_static(&s).is_none());
561 ///
562 /// let leaked: &'static str = s.leak();
563 /// assert_eq!(leaked, s);
564 /// assert!(ArcStr::is_static(&s));
565 /// assert_eq!(ArcStr::as_static(&s), Some("foobar"));
566 /// ```
567 #[inline]
568 pub fn leak(&self) -> &'static str {
569 if Self::has_static_lenflag(self) {
570 return unsafe { Self::to_static_unchecked(self) };
571 }
572 let is_static_count = unsafe {
573 // Not sure about ordering, maybe relaxed would be fine.
574 Self::load_count_flag_raw(self, Ordering::Acquire)
575 };
576 if is_static_count.flag_part() {
577 return unsafe { Self::to_static_unchecked(self) };
578 }
579 unsafe { Self::become_static(self, is_static_count.uint_part() == 1) };
580 debug_assert!(Self::is_static(self));
581 unsafe { Self::to_static_unchecked(self) }
582 }
583
584 unsafe fn become_static(this: &Self, is_unique: bool) {
585 if is_unique {
586 core::ptr::addr_of_mut!((*this.0.as_ptr()).count_flag).write(AtomicUsize::new(
587 PackedFlagUint::new_raw(true, 1).encoded_value(),
588 ));
589 let lenp = core::ptr::addr_of_mut!((*this.0.as_ptr()).len_flag);
590 debug_assert!(!lenp.read().flag_part());
591 lenp.write(lenp.read().with_flag(true));
592 } else {
593 let flag_bit = PackedFlagUint::new_raw(true, 0).encoded_value();
594 let atomic_count_flag = &*core::ptr::addr_of!((*this.0.as_ptr()).count_flag);
595 atomic_count_flag.fetch_or(flag_bit, Ordering::Release);
596 }
597 }
598
599 #[inline]
600 unsafe fn to_static_unchecked(this: &Self) -> &'static str {
601 &*Self::str_ptr(this)
602 }
603
604 #[inline]
605 fn bytes_ptr(this: &Self) -> *const [u8] {
606 let len = this.get_inner_len_flag().uint_part();
607 unsafe {
608 let p: *const ThinInner = this.0.as_ptr();
609 let data = p.cast::<u8>().add(OFFSET_DATA);
610 debug_assert_eq!(core::ptr::addr_of!((*p).data).cast::<u8>(), data,);
611 core::ptr::slice_from_raw_parts(data, len)
612 }
613 }
614
615 #[inline]
616 fn str_ptr(this: &Self) -> *const str {
617 Self::bytes_ptr(this) as *const str
618 }
619
620 /// Returns true if `this` is a "static" ArcStr. For example, if it was
621 /// created from a call to [`arcstr::literal!`][crate::literal]),
622 /// returned by `ArcStr::new`, etc.
623 ///
624 /// Static `ArcStr`s can be converted to `&'static str` for free using
625 /// [`ArcStr::as_static`], without leaking memory — they're static constants
626 /// in the program (somewhere).
627 ///
628 /// # Examples
629 ///
630 /// ```
631 /// # use arcstr::ArcStr;
632 /// const STATIC: ArcStr = arcstr::literal!("Electricity!");
633 /// assert!(ArcStr::is_static(&STATIC));
634 ///
635 /// let still_static = arcstr::literal!("Shocking!");
636 /// assert!(ArcStr::is_static(&still_static));
637 /// assert!(
638 /// ArcStr::is_static(&still_static.clone()),
639 /// "Cloned statics are still static"
640 /// );
641 ///
642 /// let nonstatic = ArcStr::from("Grounded...");
643 /// assert!(!ArcStr::is_static(&nonstatic));
644 /// ```
645 #[inline]
646 pub fn is_static(this: &Self) -> bool {
647 // We align this to 16 bytes and keep the `is_static` flags in the same
648 // place. In theory this means that if `cfg(target_feature = "avx")`
649 // (where aligned 16byte loads are atomic), the compiler *could*
650 // implement this function using the equivalent of:
651 // ```
652 // let vec = _mm_load_si128(self.0.as_ptr().cast());
653 // let mask = _mm_movemask_pd(_mm_srli_epi64(vac, 63));
654 // mask != 0
655 // ```
656 // and that's all; one load, no branching. (I don't think it *does*, but
657 // I haven't checked so I'll be optimistic and keep the `#[repr(align)]`
658 // -- hey, maybe the CPU can peephole-optimize it).
659 //
660 // That said, unless I did it in asm, *I* can't implement it that way,
661 // since Rust's semantics don't allow me to make that change
662 // optimization on my own (that load isn't considered atomic, for
663 // example).
664 this.get_inner_len_flag().flag_part()
665 || unsafe { Self::load_count_flag_raw(this, Ordering::Relaxed).flag_part() }
666 }
667
668 /// This is true for any `ArcStr` that has been static from the time when it
669 /// was created. It's cheaper than `has_static_rcflag`.
670 #[inline]
671 fn has_static_lenflag(this: &Self) -> bool {
672 this.get_inner_len_flag().flag_part()
673 }
674
675 /// Returns true if `this` is a "static"/`"literal"` ArcStr. For example, if
676 /// it was created from a call to [`literal!`][crate::literal]), returned by
677 /// `ArcStr::new`, etc.
678 ///
679 /// Static `ArcStr`s can be converted to `&'static str` for free using
680 /// [`ArcStr::as_static`], without leaking memory — they're static constants
681 /// in the program (somewhere).
682 ///
683 /// # Examples
684 ///
685 /// ```
686 /// # use arcstr::ArcStr;
687 /// const STATIC: ArcStr = arcstr::literal!("Electricity!");
688 /// assert_eq!(ArcStr::as_static(&STATIC), Some("Electricity!"));
689 ///
690 /// // Note that they don't have to be consts, just made using `literal!`:
691 /// let still_static = arcstr::literal!("Shocking!");
692 /// assert_eq!(ArcStr::as_static(&still_static), Some("Shocking!"));
693 /// // Cloning a static still produces a static.
694 /// assert_eq!(ArcStr::as_static(&still_static.clone()), Some("Shocking!"));
695 ///
696 /// // But it won't work for strings from other sources.
697 /// let nonstatic = ArcStr::from("Grounded...");
698 /// assert_eq!(ArcStr::as_static(&nonstatic), None);
699 /// ```
700 #[inline]
701 pub fn as_static(this: &Self) -> Option<&'static str> {
702 if Self::is_static(this) {
703 // We know static strings live forever, so they can have a static lifetime.
704 Some(unsafe { &*(this.as_str() as *const str) })
705 } else {
706 None
707 }
708 }
709
710 // Not public API. Exists so the `arcstr::literal` macro can call it.
711 #[inline]
712 #[doc(hidden)]
713 pub const unsafe fn _private_new_from_static_data<B>(
714 ptr: &'static StaticArcStrInner<B>,
715 ) -> Self {
716 Self(NonNull::new_unchecked(ptr as *const _ as *mut ThinInner))
717 }
718
719 /// `feature = "substr"` Returns a substr of `self` over the given range.
720 ///
721 /// # Examples
722 ///
723 /// ```
724 /// use arcstr::{ArcStr, Substr};
725 ///
726 /// let a = ArcStr::from("abcde");
727 /// let b: Substr = a.substr(2..);
728 ///
729 /// assert_eq!(b, "cde");
730 /// ```
731 ///
732 /// # Panics
733 /// If any of the following are untrue, we panic
734 /// - `range.start() <= range.end()`
735 /// - `range.end() <= self.len()`
736 /// - `self.is_char_boundary(start) && self.is_char_boundary(end)`
737 /// - These can be conveniently verified in advance using
738 /// `self.get(start..end).is_some()` if needed.
739 #[cfg(feature = "substr")]
740 #[inline]
741 pub fn substr(&self, range: impl core::ops::RangeBounds<usize>) -> Substr {
742 Substr::from_parts(self, range)
743 }
744
745 /// `feature = "substr"` Returns a [`Substr`] of self over the given `&str`.
746 ///
747 /// It is not rare to end up with a `&str` which holds a view into a
748 /// `ArcStr`'s backing data. A common case is when using functionality that
749 /// takes and returns `&str` and are entirely unaware of `arcstr`, for
750 /// example: `str::trim()`.
751 ///
752 /// This function allows you to reconstruct a [`Substr`] from a `&str` which
753 /// is a view into this `ArcStr`'s backing string.
754 ///
755 /// # Examples
756 ///
757 /// ```
758 /// use arcstr::{ArcStr, Substr};
759 /// let text = ArcStr::from(" abc");
760 /// let trimmed = text.trim();
761 /// let substr: Substr = text.substr_from(trimmed);
762 /// assert_eq!(substr, "abc");
763 /// // for illustration
764 /// assert!(ArcStr::ptr_eq(substr.parent(), &text));
765 /// assert_eq!(substr.range(), 3..6);
766 /// ```
767 ///
768 /// # Panics
769 ///
770 /// Panics if `substr` isn't a view into our memory.
771 ///
772 /// Also panics if `substr` is a view into our memory but is >= `u32::MAX`
773 /// bytes away from our start, if we're a 64-bit machine and
774 /// `substr-usize-indices` is not enabled.
775 #[cfg(feature = "substr")]
776 pub fn substr_from(&self, substr: &str) -> Substr {
777 if substr.is_empty() {
778 return Substr::new();
779 }
780
781 let self_start = self.as_ptr() as usize;
782 let self_end = self_start + self.len();
783
784 let substr_start = substr.as_ptr() as usize;
785 let substr_end = substr_start + substr.len();
786 if substr_start < self_start || substr_end > self_end {
787 out_of_range(self, &substr);
788 }
789
790 let index = substr_start - self_start;
791 let end = index + substr.len();
792 self.substr(index..end)
793 }
794
795 /// `feature = "substr"` If possible, returns a [`Substr`] of self over the
796 /// given `&str`.
797 ///
798 /// This is a fallible version of [`ArcStr::substr_from`].
799 ///
800 /// It is not rare to end up with a `&str` which holds a view into a
801 /// `ArcStr`'s backing data. A common case is when using functionality that
802 /// takes and returns `&str` and are entirely unaware of `arcstr`, for
803 /// example: `str::trim()`.
804 ///
805 /// This function allows you to reconstruct a [`Substr`] from a `&str` which
806 /// is a view into this `ArcStr`'s backing string.
807 ///
808 /// # Examples
809 ///
810 /// ```
811 /// use arcstr::{ArcStr, Substr};
812 /// let text = ArcStr::from(" abc");
813 /// let trimmed = text.trim();
814 /// let substr: Option<Substr> = text.try_substr_from(trimmed);
815 /// assert_eq!(substr.unwrap(), "abc");
816 /// // `&str`s not derived from `self` will return None.
817 /// let not_substr = text.try_substr_from("abc");
818 /// assert!(not_substr.is_none());
819 /// ```
820 ///
821 /// # Panics
822 ///
823 /// Panics if `substr` is a view into our memory but is >= `u32::MAX` bytes
824 /// away from our start, if we're a 64-bit machine and
825 /// `substr-usize-indices` is not enabled.
826 #[cfg(feature = "substr")]
827 pub fn try_substr_from(&self, substr: &str) -> Option<Substr> {
828 if substr.is_empty() {
829 return Some(Substr::new());
830 }
831
832 let self_start = self.as_ptr() as usize;
833 let self_end = self_start + self.len();
834
835 let substr_start = substr.as_ptr() as usize;
836 let substr_end = substr_start + substr.len();
837 if substr_start < self_start || substr_end > self_end {
838 return None;
839 }
840
841 let index = substr_start - self_start;
842 let end = index + substr.len();
843 debug_assert!(self.get(index..end).is_some());
844 Some(self.substr(index..end))
845 }
846
847 /// `feature = "substr"` Compute a derived `&str` a function of `&str` =>
848 /// `&str`, and produce a Substr of the result if possible.
849 ///
850 /// The function may return either a derived string, or any empty string.
851 ///
852 /// This function is mainly a wrapper around [`ArcStr::try_substr_from`]. If
853 /// you're coming to `arcstr` from the `shared_string` crate, this is the
854 /// moral equivalent of the `slice_with` function.
855 ///
856 /// # Examples
857 ///
858 /// ```
859 /// use arcstr::{ArcStr, Substr};
860 /// let text = ArcStr::from(" abc");
861 /// let trimmed: Option<Substr> = text.try_substr_using(str::trim);
862 /// assert_eq!(trimmed.unwrap(), "abc");
863 /// let other = text.try_substr_using(|_s| "different string!");
864 /// assert_eq!(other, None);
865 /// // As a special case, this is allowed.
866 /// let empty = text.try_substr_using(|_s| "");
867 /// assert_eq!(empty.unwrap(), "");
868 /// ```
869 #[cfg(feature = "substr")]
870 pub fn try_substr_using(&self, f: impl FnOnce(&str) -> &str) -> Option<Substr> {
871 self.try_substr_from(f(self.as_str()))
872 }
873
874 /// `feature = "substr"` Compute a derived `&str` a function of `&str` =>
875 /// `&str`, and produce a Substr of the result.
876 ///
877 /// The function may return either a derived string, or any empty string.
878 /// Returning anything else will result in a panic.
879 ///
880 /// This function is mainly a wrapper around [`ArcStr::try_substr_from`]. If
881 /// you're coming to `arcstr` from the `shared_string` crate, this is the
882 /// likely closest to the `slice_with_unchecked` function, but this panics
883 /// instead of UB on dodginess.
884 ///
885 /// # Examples
886 ///
887 /// ```
888 /// use arcstr::{ArcStr, Substr};
889 /// let text = ArcStr::from(" abc");
890 /// let trimmed: Substr = text.substr_using(str::trim);
891 /// assert_eq!(trimmed, "abc");
892 /// // As a special case, this is allowed.
893 /// let empty = text.substr_using(|_s| "");
894 /// assert_eq!(empty, "");
895 /// ```
896 #[cfg(feature = "substr")]
897 pub fn substr_using(&self, f: impl FnOnce(&str) -> &str) -> Substr {
898 self.substr_from(f(self.as_str()))
899 }
900
901 /// Creates an `ArcStr` by repeating the source string `n` times
902 ///
903 /// # Errors
904 ///
905 /// This function returns an error if the capacity overflows or allocation
906 /// fails.
907 ///
908 /// # Examples
909 ///
910 /// ```
911 /// use arcstr::ArcStr;
912 ///
913 /// let source = "A";
914 /// let repeated = ArcStr::try_repeat(source, 10);
915 /// assert_eq!(repeated.unwrap(), "AAAAAAAAAA");
916 /// ```
917 pub fn try_repeat(source: &str, n: usize) -> Option<Self> {
918 // If the source string is empty or the user asked for zero repetitions,
919 // return an empty string
920 if source.is_empty() || n == 0 {
921 return Some(Self::new());
922 }
923
924 // Calculate the capacity for the allocated string
925 let capacity = source.len().checked_mul(n)?;
926 let inner =
927 ThinInner::try_allocate_maybe_uninit(capacity, false, AllocInit::Uninit).ok()?;
928
929 unsafe {
930 let mut data_ptr = ThinInner::data_ptr(inner);
931 let data_end = data_ptr.add(capacity);
932
933 // Copy `source` into the allocated string `n` times
934 while data_ptr < data_end {
935 core::ptr::copy_nonoverlapping(source.as_ptr(), data_ptr, source.len());
936 data_ptr = data_ptr.add(source.len());
937 }
938 }
939
940 Some(Self(inner))
941 }
942
943 /// Creates an `ArcStr` by repeating the source string `n` times
944 ///
945 /// # Panics
946 ///
947 /// This function panics if the capacity overflows, see
948 /// [`try_repeat`](ArcStr::try_repeat) if this is undesirable.
949 ///
950 /// # Examples
951 ///
952 /// Basic usage:
953 /// ```
954 /// use arcstr::ArcStr;
955 ///
956 /// let source = "A";
957 /// let repeated = ArcStr::repeat(source, 10);
958 /// assert_eq!(repeated, "AAAAAAAAAA");
959 /// ```
960 ///
961 /// A panic upon overflow:
962 /// ```should_panic
963 /// # use arcstr::ArcStr;
964 ///
965 /// // this will panic at runtime
966 /// let huge = ArcStr::repeat("A", usize::MAX);
967 /// ```
968 pub fn repeat(source: &str, n: usize) -> Self {
969 Self::try_repeat(source, n).expect("capacity overflow")
970 }
971}
972
973#[cold]
974#[inline(never)]
975#[cfg(feature = "substr")]
976fn out_of_range(arc: &ArcStr, substr: &&str) -> ! {
977 let arc_start = arc.as_ptr();
978 let arc_end = arc_start.wrapping_add(arc.len());
979 let substr_start = substr.as_ptr();
980 let substr_end = substr_start.wrapping_add(substr.len());
981 panic!(
982 "ArcStr over ({:p}..{:p}) does not contain substr over ({:p}..{:p})",
983 arc_start, arc_end, substr_start, substr_end,
984 );
985}
986
987impl Clone for ArcStr {
988 #[inline]
989 fn clone(&self) -> Self {
990 if !Self::is_static(self) {
991 // From libstd's impl:
992 //
993 // > Using a relaxed ordering is alright here, as knowledge of the
994 // > original reference prevents other threads from erroneously deleting
995 // > the object.
996 //
997 // See: https://doc.rust-lang.org/src/alloc/sync.rs.html#1073
998 let n: PackedFlagUint = PackedFlagUint::from_encoded(unsafe {
999 let step = PackedFlagUint::FALSE_ONE.encoded_value();
1000 (*self.0.as_ptr())
1001 .count_flag
1002 .fetch_add(step, Ordering::Relaxed)
1003 });
1004 // Protect against aggressive leaking of Arcs causing us to
1005 // overflow. Technically, we could probably transition it to static
1006 // here, but I haven't thought it through.
1007 if n.uint_part() > RC_MAX && !n.flag_part() {
1008 let val = PackedFlagUint::new_raw(true, 0).encoded_value();
1009 unsafe {
1010 (*self.0.as_ptr())
1011 .count_flag
1012 .fetch_or(val, Ordering::Release)
1013 };
1014 // abort();
1015 }
1016 }
1017 Self(self.0)
1018 }
1019}
1020const RC_MAX: usize = PackedFlagUint::UINT_PART_MAX / 2;
1021
1022impl Drop for ArcStr {
1023 #[inline]
1024 fn drop(&mut self) {
1025 if Self::is_static(self) {
1026 return;
1027 }
1028 unsafe {
1029 let this = self.0.as_ptr();
1030 let enc = PackedFlagUint::from_encoded(
1031 (*this)
1032 .count_flag
1033 .fetch_sub(PackedFlagUint::FALSE_ONE.encoded_value(), Ordering::Release),
1034 );
1035 // Note: `enc == PackedFlagUint::FALSE_ONE`
1036 if enc == PackedFlagUint::FALSE_ONE {
1037 let _ = (*this).count_flag.load(Ordering::Acquire);
1038 ThinInner::destroy_cold(this)
1039 }
1040 }
1041 }
1042}
1043// Caveat on the `static`/`strong` fields: "is_static" indicates if we're
1044// located in static data (as with empty string). is_static being false meanse
1045// we are a normal arc-ed string.
1046//
1047// While `ArcStr` claims to hold a pointer to a `ThinInner`, for the static case
1048// we actually are using a pointer to a `StaticArcStrInner<[u8; N]>`. These have
1049// almost identical layouts, except the static contains a explicit trailing
1050// array, and does not have a `AtomicUsize` The issue is: We kind of want the
1051// static ones to not have any interior mutability, so that `const`s can use
1052// them, and so that they may be stored in read-only memory.
1053//
1054// We do this by keeping a flag in `len_flag` flag to indicate which case we're
1055// in, and maintaining the invariant that if we're a `StaticArcStrInner` **we
1056// may never access `.strong` in any way or produce a `&ThinInner` pointing to
1057// our data**.
1058//
1059// This is more subtle than you might think, sinc AFAIK we're not legally
1060// allowed to create an `&ThinInner` until we're 100% sure it's nonstatic, and
1061// prior to determining it, we are forced to work from entirely behind a raw
1062// pointer...
1063//
1064// That said, a bit of this hoop jumping might be not required in the future,
1065// but for now what we're doing works and is apparently sound:
1066// https://github.com/rust-lang/unsafe-code-guidelines/issues/246
1067#[repr(C, align(8))]
1068struct ThinInner {
1069 // Both of these are `PackedFlagUint`s that store `is_static` as the flag.
1070 //
1071 // The reason it's not just stored in len is because an ArcStr may become
1072 // static after creation (via `ArcStr::leak`) and we don't need to do an
1073 // atomic load to access the length (and not only because it would mess with
1074 // optimization).
1075 //
1076 // The reason it's not just stored in the count is because it may be UB to
1077 // do atomic loads from read-only memory. This is also the reason it's not
1078 // stored in a separate atomic, and why doing an atomic load to access the
1079 // length wouldn't be acceptable even if compilers were really good.
1080 len_flag: PackedFlagUint,
1081 count_flag: AtomicUsize,
1082 data: [u8; 0],
1083}
1084
1085const OFFSET_LENFLAGS: usize = 0;
1086const OFFSET_COUNTFLAGS: usize = size_of::<PackedFlagUint>();
1087const OFFSET_DATA: usize = OFFSET_COUNTFLAGS + size_of::<AtomicUsize>();
1088
1089// Not public API, exists for macros.
1090#[repr(C, align(8))]
1091#[doc(hidden)]
1092pub struct StaticArcStrInner<Buf> {
1093 pub len_flag: usize,
1094 pub count_flag: usize,
1095 pub data: Buf,
1096}
1097
1098impl<Buf> StaticArcStrInner<Buf> {
1099 #[doc(hidden)]
1100 pub const STATIC_COUNT_VALUE: usize = PackedFlagUint::new_raw(true, 1).encoded_value();
1101 #[doc(hidden)]
1102 #[inline]
1103 pub const fn encode_len(v: usize) -> Option<usize> {
1104 match PackedFlagUint::new(true, v) {
1105 Some(v) => Some(v.encoded_value()),
1106 None => None,
1107 }
1108 }
1109}
1110
1111const _: [(); size_of::<StaticArcStrInner<[u8; 0]>>()] = [(); 2 * size_of::<usize>()];
1112const _: [(); align_of::<StaticArcStrInner<[u8; 0]>>()] = [(); 8];
1113
1114const _: [(); size_of::<StaticArcStrInner<[u8; 2 * size_of::<usize>()]>>()] =
1115 [(); 4 * size_of::<usize>()];
1116const _: [(); align_of::<StaticArcStrInner<[u8; 2 * size_of::<usize>()]>>()] = [(); 8];
1117
1118const _: [(); size_of::<ThinInner>()] = [(); 2 * size_of::<usize>()];
1119const _: [(); align_of::<ThinInner>()] = [(); 8];
1120
1121const _: [(); align_of::<AtomicUsize>()] = [(); align_of::<usize>()];
1122const _: [(); align_of::<AtomicUsize>()] = [(); size_of::<usize>()];
1123const _: [(); size_of::<AtomicUsize>()] = [(); size_of::<usize>()];
1124
1125const _: [(); align_of::<PackedFlagUint>()] = [(); align_of::<usize>()];
1126const _: [(); size_of::<PackedFlagUint>()] = [(); size_of::<usize>()];
1127
1128#[derive(Clone, Copy, PartialEq, Eq)]
1129#[repr(transparent)]
1130struct PackedFlagUint(usize);
1131impl PackedFlagUint {
1132 const UINT_PART_MAX: usize = (1 << (usize::BITS - 1)) - 1;
1133 /// Encodes `false` as the flag and `1` as the uint. Used for a few things,
1134 /// such as the amount we `fetch_add` by for refcounting, and so on.
1135 const FALSE_ONE: Self = Self::new_raw(false, 1);
1136
1137 #[inline]
1138 const fn new(flag_part: bool, uint_part: usize) -> Option<Self> {
1139 if uint_part > Self::UINT_PART_MAX {
1140 None
1141 } else {
1142 Some(Self::new_raw(flag_part, uint_part))
1143 }
1144 }
1145
1146 #[inline(always)]
1147 const fn new_raw(flag_part: bool, uint_part: usize) -> Self {
1148 Self(flag_part as usize | (uint_part << 1))
1149 }
1150
1151 #[inline(always)]
1152 const fn uint_part(self) -> usize {
1153 self.0 >> 1
1154 }
1155
1156 #[inline(always)]
1157 const fn flag_part(self) -> bool {
1158 (self.0 & 1) != 0
1159 }
1160
1161 #[inline(always)]
1162 const fn from_encoded(v: usize) -> Self {
1163 Self(v)
1164 }
1165
1166 #[inline(always)]
1167 const fn encoded_value(self) -> usize {
1168 self.0
1169 }
1170
1171 #[inline(always)]
1172 #[must_use]
1173 const fn with_flag(self, v: bool) -> Self {
1174 Self(v as usize | self.0)
1175 }
1176}
1177
1178const EMPTY: ArcStr = literal!("");
1179
1180impl ThinInner {
1181 #[inline]
1182 fn allocate(data: &str, initially_static: bool) -> NonNull<Self> {
1183 match Self::try_allocate(data, initially_static) {
1184 Ok(v) => v,
1185 Err(None) => alloc_overflow(),
1186 Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout),
1187 }
1188 }
1189
1190 #[inline]
1191 fn data_ptr(this: NonNull<Self>) -> *mut u8 {
1192 unsafe { this.as_ptr().cast::<u8>().add(OFFSET_DATA) }
1193 }
1194
1195 /// Allocates a `ThinInner` where the data segment is uninitialized or
1196 /// zeroed.
1197 ///
1198 /// Returns `Err(Some(layout))` if we failed to allocate that layout, and
1199 /// `Err(None)` for integer overflow when computing layout
1200 fn try_allocate_maybe_uninit(
1201 capacity: usize,
1202 initially_static: bool,
1203 init_how: AllocInit,
1204 ) -> Result<NonNull<Self>, Option<Layout>> {
1205 const ALIGN: usize = align_of::<ThinInner>();
1206
1207 debug_assert_ne!(capacity, 0);
1208 if capacity >= (isize::MAX as usize) - (OFFSET_DATA + ALIGN) {
1209 return Err(None);
1210 }
1211
1212 debug_assert!(Layout::from_size_align(capacity + OFFSET_DATA, ALIGN).is_ok());
1213 let layout = unsafe { Layout::from_size_align_unchecked(capacity + OFFSET_DATA, ALIGN) };
1214 let ptr = match init_how {
1215 AllocInit::Uninit => unsafe { alloc::alloc::alloc(layout) as *mut ThinInner },
1216 AllocInit::Zero => unsafe { alloc::alloc::alloc_zeroed(layout) as *mut ThinInner },
1217 };
1218 if ptr.is_null() {
1219 return Err(Some(layout));
1220 }
1221
1222 // we actually already checked this above...
1223 debug_assert!(PackedFlagUint::new(initially_static, capacity).is_some());
1224
1225 let len_flag = PackedFlagUint::new_raw(initially_static, capacity);
1226 debug_assert_eq!(len_flag.uint_part(), capacity);
1227 debug_assert_eq!(len_flag.flag_part(), initially_static);
1228
1229 unsafe {
1230 core::ptr::addr_of_mut!((*ptr).len_flag).write(len_flag);
1231
1232 let initial_count_flag = PackedFlagUint::new_raw(initially_static, 1);
1233 let count_flag: AtomicUsize = AtomicUsize::new(initial_count_flag.encoded_value());
1234 core::ptr::addr_of_mut!((*ptr).count_flag).write(count_flag);
1235
1236 debug_assert_eq!(
1237 (ptr as *const u8).wrapping_add(OFFSET_DATA),
1238 (*ptr).data.as_ptr(),
1239 );
1240
1241 Ok(NonNull::new_unchecked(ptr))
1242 }
1243 }
1244
1245 // returns `Err(Some(l))` if we failed to allocate that layout, and
1246 // `Err(None)` for integer overflow when computing layout.
1247 #[inline]
1248 fn try_allocate(data: &str, initially_static: bool) -> Result<NonNull<Self>, Option<Layout>> {
1249 // Safety: we initialize the whole buffer by copying `data` into it.
1250 unsafe {
1251 // Allocate a enough space to hold the given string
1252 Self::try_allocate_with(
1253 data.len(),
1254 initially_static,
1255 AllocInit::Uninit,
1256 // Copy the given string into the allocation
1257 |uninit_slice| {
1258 debug_assert_eq!(uninit_slice.len(), data.len());
1259 core::ptr::copy_nonoverlapping(
1260 data.as_ptr(),
1261 uninit_slice.as_mut_ptr().cast::<u8>(),
1262 data.len(),
1263 )
1264 },
1265 )
1266 }
1267 }
1268
1269 /// Safety: caller must fully initialize the provided buffer with valid
1270 /// UTF-8 in the `initializer` function (well, you at least need to handle
1271 /// it before giving it back to the user).
1272 #[inline]
1273 unsafe fn try_allocate_with(
1274 len: usize,
1275 initially_static: bool,
1276 init_style: AllocInit,
1277 initializer: impl FnOnce(&mut [core::mem::MaybeUninit<u8>]),
1278 ) -> Result<NonNull<Self>, Option<Layout>> {
1279 // Allocate a enough space to hold the given string
1280 let this = Self::try_allocate_maybe_uninit(len, initially_static, init_style)?;
1281
1282 initializer(core::slice::from_raw_parts_mut(
1283 Self::data_ptr(this).cast::<MaybeUninit<u8>>(),
1284 len,
1285 ));
1286
1287 Ok(this)
1288 }
1289
1290 #[inline]
1291 unsafe fn get_len_flag(p: *const ThinInner) -> PackedFlagUint {
1292 debug_assert_eq!(OFFSET_LENFLAGS, 0);
1293 *p.cast()
1294 }
1295
1296 #[cold]
1297 unsafe fn destroy_cold(p: *mut ThinInner) {
1298 let lf = Self::get_len_flag(p);
1299 let (is_static, len) = (lf.flag_part(), lf.uint_part());
1300 debug_assert!(!is_static);
1301 let layout = {
1302 let size = len + OFFSET_DATA;
1303 let align = align_of::<ThinInner>();
1304 Layout::from_size_align_unchecked(size, align)
1305 };
1306 alloc::alloc::dealloc(p as *mut _, layout);
1307 }
1308}
1309
1310#[derive(Clone, Copy, PartialEq)]
1311enum AllocInit {
1312 Uninit,
1313 Zero,
1314}
1315
1316#[inline(never)]
1317#[cold]
1318fn alloc_overflow() -> ! {
1319 panic!("overflow during Layout computation")
1320}
1321
1322impl From<&str> for ArcStr {
1323 #[inline]
1324 fn from(s: &str) -> Self {
1325 if s.is_empty() {
1326 Self::new()
1327 } else {
1328 Self(ThinInner::allocate(s, false))
1329 }
1330 }
1331}
1332
1333impl core::ops::Deref for ArcStr {
1334 type Target = str;
1335 #[inline]
1336 fn deref(&self) -> &str {
1337 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
1338 }
1339}
1340
1341impl Default for ArcStr {
1342 #[inline]
1343 fn default() -> Self {
1344 Self::new()
1345 }
1346}
1347
1348impl From<String> for ArcStr {
1349 #[inline]
1350 fn from(v: String) -> Self {
1351 v.as_str().into()
1352 }
1353}
1354
1355impl From<&mut str> for ArcStr {
1356 #[inline]
1357 fn from(s: &mut str) -> Self {
1358 let s: &str = s;
1359 Self::from(s)
1360 }
1361}
1362
1363impl From<Box<str>> for ArcStr {
1364 #[inline]
1365 fn from(s: Box<str>) -> Self {
1366 Self::from(&s[..])
1367 }
1368}
1369impl From<ArcStr> for Box<str> {
1370 #[inline]
1371 fn from(s: ArcStr) -> Self {
1372 s.as_str().into()
1373 }
1374}
1375impl From<ArcStr> for alloc::rc::Rc<str> {
1376 #[inline]
1377 fn from(s: ArcStr) -> Self {
1378 s.as_str().into()
1379 }
1380}
1381impl From<ArcStr> for alloc::sync::Arc<str> {
1382 #[inline]
1383 fn from(s: ArcStr) -> Self {
1384 s.as_str().into()
1385 }
1386}
1387impl From<alloc::rc::Rc<str>> for ArcStr {
1388 #[inline]
1389 fn from(s: alloc::rc::Rc<str>) -> Self {
1390 Self::from(&*s)
1391 }
1392}
1393impl From<alloc::sync::Arc<str>> for ArcStr {
1394 #[inline]
1395 fn from(s: alloc::sync::Arc<str>) -> Self {
1396 Self::from(&*s)
1397 }
1398}
1399impl<'a> From<Cow<'a, str>> for ArcStr {
1400 #[inline]
1401 fn from(s: Cow<'a, str>) -> Self {
1402 Self::from(&*s)
1403 }
1404}
1405impl<'a> From<&'a ArcStr> for Cow<'a, str> {
1406 #[inline]
1407 fn from(s: &'a ArcStr) -> Self {
1408 Cow::Borrowed(s)
1409 }
1410}
1411
1412impl<'a> From<ArcStr> for Cow<'a, str> {
1413 #[inline]
1414 fn from(s: ArcStr) -> Self {
1415 if let Some(st) = ArcStr::as_static(&s) {
1416 Cow::Borrowed(st)
1417 } else {
1418 Cow::Owned(s.to_string())
1419 }
1420 }
1421}
1422
1423impl From<&String> for ArcStr {
1424 #[inline]
1425 fn from(s: &String) -> Self {
1426 Self::from(s.as_str())
1427 }
1428}
1429impl From<&ArcStr> for ArcStr {
1430 #[inline]
1431 fn from(s: &ArcStr) -> Self {
1432 s.clone()
1433 }
1434}
1435
1436impl core::fmt::Debug for ArcStr {
1437 #[inline]
1438 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1439 core::fmt::Debug::fmt(self.as_str(), f)
1440 }
1441}
1442
1443impl core::fmt::Display for ArcStr {
1444 #[inline]
1445 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1446 core::fmt::Display::fmt(self.as_str(), f)
1447 }
1448}
1449
1450impl PartialEq for ArcStr {
1451 #[inline]
1452 fn eq(&self, o: &Self) -> bool {
1453 ArcStr::ptr_eq(self, o) || PartialEq::eq(self.as_str(), o.as_str())
1454 }
1455 #[inline]
1456 fn ne(&self, o: &Self) -> bool {
1457 !ArcStr::ptr_eq(self, o) && PartialEq::ne(self.as_str(), o.as_str())
1458 }
1459}
1460
1461impl Eq for ArcStr {}
1462
1463macro_rules! impl_peq {
1464 (@one $a:ty, $b:ty) => {
1465 #[allow(clippy::extra_unused_lifetimes)]
1466 impl<'a> PartialEq<$b> for $a {
1467 #[inline]
1468 fn eq(&self, s: &$b) -> bool {
1469 PartialEq::eq(&self[..], &s[..])
1470 }
1471 #[inline]
1472 fn ne(&self, s: &$b) -> bool {
1473 PartialEq::ne(&self[..], &s[..])
1474 }
1475 }
1476 };
1477 ($(($a:ty, $b:ty),)+) => {$(
1478 impl_peq!(@one $a, $b);
1479 impl_peq!(@one $b, $a);
1480 )+};
1481}
1482
1483impl_peq! {
1484 (ArcStr, str),
1485 (ArcStr, &'a str),
1486 (ArcStr, String),
1487 (ArcStr, Cow<'a, str>),
1488 (ArcStr, Box<str>),
1489 (ArcStr, alloc::sync::Arc<str>),
1490 (ArcStr, alloc::rc::Rc<str>),
1491 (ArcStr, alloc::sync::Arc<String>),
1492 (ArcStr, alloc::rc::Rc<String>),
1493}
1494
1495impl PartialOrd for ArcStr {
1496 #[inline]
1497 fn partial_cmp(&self, s: &Self) -> Option<core::cmp::Ordering> {
1498 Some(self.as_str().cmp(s.as_str()))
1499 }
1500}
1501
1502impl Ord for ArcStr {
1503 #[inline]
1504 fn cmp(&self, s: &Self) -> core::cmp::Ordering {
1505 self.as_str().cmp(s.as_str())
1506 }
1507}
1508
1509impl core::hash::Hash for ArcStr {
1510 #[inline]
1511 fn hash<H: core::hash::Hasher>(&self, h: &mut H) {
1512 self.as_str().hash(h)
1513 }
1514}
1515
1516macro_rules! impl_index {
1517 ($($IdxT:ty,)*) => {$(
1518 impl core::ops::Index<$IdxT> for ArcStr {
1519 type Output = str;
1520 #[inline]
1521 fn index(&self, i: $IdxT) -> &Self::Output {
1522 &self.as_str()[i]
1523 }
1524 }
1525 )*};
1526}
1527
1528impl_index! {
1529 core::ops::RangeFull,
1530 core::ops::Range<usize>,
1531 core::ops::RangeFrom<usize>,
1532 core::ops::RangeTo<usize>,
1533 core::ops::RangeInclusive<usize>,
1534 core::ops::RangeToInclusive<usize>,
1535}
1536
1537impl AsRef<str> for ArcStr {
1538 #[inline]
1539 fn as_ref(&self) -> &str {
1540 self
1541 }
1542}
1543
1544impl AsRef<[u8]> for ArcStr {
1545 #[inline]
1546 fn as_ref(&self) -> &[u8] {
1547 self.as_bytes()
1548 }
1549}
1550
1551impl core::borrow::Borrow<str> for ArcStr {
1552 #[inline]
1553 fn borrow(&self) -> &str {
1554 self
1555 }
1556}
1557
1558impl core::str::FromStr for ArcStr {
1559 type Err = core::convert::Infallible;
1560 #[inline]
1561 fn from_str(s: &str) -> Result<Self, Self::Err> {
1562 Ok(Self::from(s))
1563 }
1564}
1565
1566#[cfg(test)]
1567#[cfg(not(msrv))] // core::mem::offset_of! isn't stable in our MSRV
1568mod test {
1569 use super::*;
1570
1571 fn sasi_layout_check<Buf>() {
1572 assert!(align_of::<StaticArcStrInner<Buf>>() >= 8);
1573 assert_eq!(
1574 core::mem::offset_of!(StaticArcStrInner<Buf>, count_flag),
1575 OFFSET_COUNTFLAGS
1576 );
1577 assert_eq!(
1578 core::mem::offset_of!(StaticArcStrInner<Buf>, len_flag),
1579 OFFSET_LENFLAGS
1580 );
1581 assert_eq!(
1582 core::mem::offset_of!(StaticArcStrInner<Buf>, data),
1583 OFFSET_DATA
1584 );
1585 assert_eq!(
1586 core::mem::offset_of!(ThinInner, count_flag),
1587 core::mem::offset_of!(StaticArcStrInner::<Buf>, count_flag),
1588 );
1589 assert_eq!(
1590 core::mem::offset_of!(ThinInner, len_flag),
1591 core::mem::offset_of!(StaticArcStrInner::<Buf>, len_flag),
1592 );
1593 assert_eq!(
1594 core::mem::offset_of!(ThinInner, data),
1595 core::mem::offset_of!(StaticArcStrInner::<Buf>, data),
1596 );
1597 }
1598
1599 #[test]
1600 fn verify_type_pun_offsets_sasi_big_bufs() {
1601 assert_eq!(
1602 core::mem::offset_of!(ThinInner, count_flag),
1603 OFFSET_COUNTFLAGS,
1604 );
1605 assert_eq!(core::mem::offset_of!(ThinInner, len_flag), OFFSET_LENFLAGS);
1606 assert_eq!(core::mem::offset_of!(ThinInner, data), OFFSET_DATA);
1607
1608 assert!(align_of::<ThinInner>() >= 8);
1609
1610 sasi_layout_check::<[u8; 0]>();
1611 sasi_layout_check::<[u8; 1]>();
1612 sasi_layout_check::<[u8; 2]>();
1613 sasi_layout_check::<[u8; 3]>();
1614 sasi_layout_check::<[u8; 4]>();
1615 sasi_layout_check::<[u8; 5]>();
1616 sasi_layout_check::<[u8; 15]>();
1617 sasi_layout_check::<[u8; 16]>();
1618 sasi_layout_check::<[u8; 64]>();
1619 sasi_layout_check::<[u8; 128]>();
1620 sasi_layout_check::<[u8; 1024]>();
1621 sasi_layout_check::<[u8; 4095]>();
1622 sasi_layout_check::<[u8; 4096]>();
1623 }
1624}
1625
1626#[cfg(all(test, loom))]
1627mod loomtest {
1628 use super::ArcStr;
1629 use loom::sync::Arc;
1630 use loom::thread;
1631 #[test]
1632 fn cloning_threads() {
1633 loom::model(|| {
1634 let a = ArcStr::from("abcdefgh");
1635 let addr = a.as_ptr() as usize;
1636
1637 let a1 = Arc::new(a);
1638 let a2 = a1.clone();
1639
1640 let t1 = thread::spawn(move || {
1641 let b: ArcStr = (*a1).clone();
1642 assert_eq!(b.as_ptr() as usize, addr);
1643 });
1644 let t2 = thread::spawn(move || {
1645 let b: ArcStr = (*a2).clone();
1646 assert_eq!(b.as_ptr() as usize, addr);
1647 });
1648
1649 t1.join().unwrap();
1650 t2.join().unwrap();
1651 });
1652 }
1653 #[test]
1654 fn drop_timing() {
1655 loom::model(|| {
1656 let a1 = alloc::vec![
1657 ArcStr::from("s1"),
1658 ArcStr::from("s2"),
1659 ArcStr::from("s3"),
1660 ArcStr::from("s4"),
1661 ];
1662 let a2 = a1.clone();
1663
1664 let t1 = thread::spawn(move || {
1665 let mut a1 = a1;
1666 while let Some(s) = a1.pop() {
1667 assert!(s.starts_with("s"));
1668 }
1669 });
1670 let t2 = thread::spawn(move || {
1671 let mut a2 = a2;
1672 while let Some(s) = a2.pop() {
1673 assert!(s.starts_with("s"));
1674 }
1675 });
1676
1677 t1.join().unwrap();
1678 t2.join().unwrap();
1679 });
1680 }
1681
1682 #[test]
1683 fn leak_drop() {
1684 loom::model(|| {
1685 let a1 = ArcStr::from("foo");
1686 let a2 = a1.clone();
1687
1688 let t1 = thread::spawn(move || {
1689 drop(a1);
1690 });
1691 let t2 = thread::spawn(move || a2.leak());
1692 t1.join().unwrap();
1693 let leaked: &'static str = t2.join().unwrap();
1694 assert_eq!(leaked, "foo");
1695 });
1696 }
1697}