cowstr/
rcstring.rs

1#![allow(clippy::inline_always)]
2
3use core::mem::{align_of, size_of};
4use std::alloc;
5use std::alloc::Layout;
6use std::ptr::{addr_of_mut, NonNull};
7
8use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering};
9
10use crate::Error;
11
12/// Wraps raw memory to a reference counted string.
13///
14/// Currently rust has no support for representing/reallocating DST's that contain a
15/// header. To solve this problem we store the raw allocated *mut u8 pointing to our
16/// allocation and cast it opportunistically into the desired data/header parts. This is safe
17/// because `RcString` constructors ensure that objects are always properly initialized.
18/// This approach fixes the stacked-borrows bugs miri reported in an earlier implementation.
19///
20/// A `RcString` must be explicitly deallocated otherwise it will leak.
21#[derive(Copy, Clone)]
22#[repr(transparent)]
23pub(crate) struct RcString(NonNull<u8>);
24
25// # Safety
26//
27// RcString is for most parts immutable when shared. Mutations can happen only when the
28// refcount is one or the unsafe try_push* API's are used.
29//
30//  - Reference counting is atomic.
31//  - `try_push()` is Sync by using a ThreadCell and only increment the .len atomically
32//    once data is in place
33unsafe impl Send for RcString {}
34unsafe impl Sync for RcString {}
35
36// ctor/dtor/allocation
37impl RcString {
38    /// Creates a `RcString` from a raw pointer and layout.
39    /// Panics when the pointer is null.
40    #[inline]
41    unsafe fn from_ptr(allocated: *mut u8, layout: Layout) -> Self {
42        if allocated.is_null() {
43            alloc::handle_alloc_error(layout);
44        }
45        Self(NonNull::new_unchecked(allocated))
46    }
47
48    /// Allocated a new empty `RcString` with at least the given capacity.
49    pub(crate) fn allocate(capacity: usize) -> Self {
50        unsafe {
51            // Safety: using const RCSTRING_ALIGN which is defined by align_of the header.
52            let layout = Layout::from_size_align_unchecked(
53                capacity
54                    .checked_add(RCSTRING_HEADER_SIZE)
55                    .expect("Capacity overflow"),
56                RCSTRING_ALIGN,
57            );
58
59            let allocated = Self::from_ptr(alloc::alloc(layout), layout);
60
61            // Safety: `allocated` is uninitialized, it becomes initialized below.
62            // this is not a problem since we only access it via addr_of_mut/ptr.write()
63            let rcstring = allocated.0.cast::<RcStringHeader>().as_ptr();
64            addr_of_mut!((*rcstring).refcount).write(AtomicUsize::new(1usize));
65            addr_of_mut!((*rcstring).len).write(AtomicUsize::new(0));
66            addr_of_mut!((*rcstring).capacity).write(layout.size() - RCSTRING_HEADER_SIZE);
67            addr_of_mut!((*rcstring).guard_active).write(AtomicBool::new(false));
68
69            // everything safe and sound now
70            allocated
71        }
72    }
73
74    /// Grows an `RcString` to have free space for at least 'reserve' bytes.
75    ///
76    /// # Safety
77    ///
78    /// Must be called with reference count of 1.
79    pub(crate) unsafe fn grow(mut self, reserve: usize, tryreserve: bool) -> Result<Self, Error> {
80        // Safety: Internal interface, the caller has to ensure that the refcount is one.
81        // This is the same with all debug_asserts later down.
82        debug_assert_eq!(self.strong_count(), 1);
83        let layout = self.current_layout();
84        let min_new_size = (self.len_relaxed() + RCSTRING_HEADER_SIZE)
85            .checked_add(reserve)
86            .expect("Capacity overflow");
87
88        let new_size = DEFAULT_ALLOCATION_STRATEGY
89            .grow(layout.size(), min_new_size)
90            .expect("Capacity overflow");
91
92        // really grow
93        if new_size > layout.size() {
94            let ptr = alloc::realloc(self.0.as_ptr(), layout, new_size);
95            if tryreserve && ptr.is_null() {
96                return Err(Error::TryReserveError);
97            }
98            self = Self::from_ptr(ptr, layout);
99            self.header_mut().capacity = new_size - RCSTRING_HEADER_SIZE;
100        }
101
102        Ok(self)
103    }
104
105    /// Shrinks an `RcString` to at minimum `new_capacity` or its current length, whatever is
106    /// larger.
107    ///
108    /// # Safety
109    ///
110    /// Must be called with reference count of 1.
111    pub(crate) unsafe fn shrink(mut self, new_capacity: usize) -> Self {
112        debug_assert_eq!(self.strong_count(), 1);
113
114        let layout = self.current_layout();
115
116        let new_size = DEFAULT_ALLOCATION_STRATEGY
117            .align(self.len_relaxed().max(new_capacity) + RCSTRING_HEADER_SIZE);
118
119        // really shrink, or keep the old one when the shrinking fails
120        if new_size < layout.size() {
121            let ptr = alloc::realloc(self.0.as_ptr(), layout, new_size);
122            if !ptr.is_null() {
123                let mut new_self = Self::from_ptr(ptr, layout);
124                new_self.header_mut().capacity = new_size - RCSTRING_HEADER_SIZE;
125                self = new_self;
126            };
127        }
128
129        self
130    }
131
132    /// Creates a new `RcString` by copying from a &[u8] with some spare capacity.
133    ///
134    /// # Safety
135    ///
136    /// The source must be valid utf-8
137    unsafe fn from_bytes_unchecked(source: &[u8], reserve: usize) -> Self {
138        let mut allocated = Self::allocate(
139            source
140                .len()
141                .checked_add(reserve)
142                .expect("Capacity overflow"),
143        );
144
145        std::ptr::copy_nonoverlapping(source.as_ptr(), allocated.data_mut(), source.len());
146        allocated.len_set_release(source.len());
147
148        allocated
149    }
150
151    /// Creates a new `RcString` by copying from a &str with some spare capacity.
152    #[inline]
153    pub(crate) fn from_str(source: &str, reserve: usize) -> Self {
154        // Safety: source is a valid utf-8 string
155        unsafe { Self::from_bytes_unchecked(source.as_bytes(), reserve) }
156    }
157
158    /// Unconditionally deallocates a `RcString`.
159    ///
160    /// # Safety
161    ///
162    /// Must only be called with a valid `RcString` pointer whose refcount is zero.
163    // The side effect here is that the memory becomes freed, which we can't easily test
164    #[mutants::skip]
165    pub(crate) unsafe fn dealloc(self) {
166        debug_assert_eq!(self.strong_count(), 0);
167        let layout = self.current_layout();
168        alloc::dealloc(self.0.as_ptr(), layout);
169    }
170
171    /// Returns the current layout.
172    #[inline]
173    fn current_layout(self) -> Layout {
174        unsafe {
175            // Safety: &self is a valid RcString
176            Layout::from_size_align_unchecked(
177                self.header().capacity + RCSTRING_HEADER_SIZE,
178                RCSTRING_ALIGN,
179            )
180        }
181    }
182}
183
184// push etc
185impl RcString {
186    /// Appends the given `char` to the end of this `RcString`.
187    ///
188    /// # Safety
189    ///
190    /// Must have enough spare capacity available. The capacity has to be extended before
191    /// calling this.
192    #[inline]
193    pub(crate) unsafe fn push(&mut self, ch: char) {
194        let mut buf = [0u8; 4];
195        let bytes = ch.encode_utf8(&mut buf).as_bytes();
196
197        debug_assert!(self.spare_capacity() >= bytes.len());
198
199        std::ptr::copy_nonoverlapping(
200            bytes.as_ptr(),
201            self.data_mut().add(self.header().len_relaxed()),
202            bytes.len(),
203        );
204        self.len_add_release(bytes.len());
205    }
206
207    /// Tries to append the given `char` to the end of this `RcString`.
208    /// This locks the string for appending and will fail when there is not enough spare capacity.
209    ///
210    /// # Safety
211    ///
212    /// * Must only be called from a `CowStrPushGuard` (or single owner) calling it
213    ///   concurrently appends in random order (but safely)
214    /// * Violates the immutably promise, all `CowStr` cloned from this see the mutation.
215    pub(crate) unsafe fn try_push(&self, ch: char) -> Result<&str, Error> {
216        let mut buf = [0u8; 4];
217        let bytes = ch.encode_utf8(&mut buf).as_bytes();
218
219        if self.spare_capacity() >= bytes.len() {
220            std::ptr::copy_nonoverlapping(
221                bytes.as_ptr(),
222                // can be safely cast to *mut here because we write into the uninitialized
223                // threadcell protected part
224                self.data().cast_mut().add(self.header().len_relaxed()),
225                bytes.len(),
226            );
227
228            let start = self.len_acquire();
229            self.len_add_release(bytes.len());
230            Ok(&self.as_str()[start..start + bytes.len()])
231        } else {
232            Err(Error::Capacity)
233        }
234    }
235
236    /// Appends the given `str` to the end of this `RcString`.
237    ///
238    /// # Safety
239    ///
240    /// Must have enough spare capacity available. The capacity has to be extended before
241    /// calling this.
242    #[inline]
243    pub(crate) unsafe fn push_str(&mut self, s: &str) {
244        debug_assert!(self.spare_capacity() >= s.len());
245
246        std::ptr::copy_nonoverlapping(
247            s.as_ptr(),
248            self.data_mut().add(self.header().len_relaxed()),
249            s.len(),
250        );
251        self.len_add_release(s.len());
252    }
253
254    /// Tries to appends the given `str` to the end of this `RcString`.  Self should be
255    /// acquired by the current thread, will fail when there is not enough spare capacity.
256    ///
257    /// # Safety
258    ///
259    /// * Must only be called from a `CowStrPushGuard` (or single owner) calling it
260    ///   concurrently appends in random order (but safely)
261    /// * Violates the immutably promise, all `CowStr` cloned from this see the mutation.
262    pub(crate) unsafe fn try_push_str(&self, s: &str) -> Result<&str, Error> {
263        if self.spare_capacity() >= s.len() {
264            std::ptr::copy_nonoverlapping(
265                s.as_ptr(),
266                // can be safely cast to *mut here because we write into the uninitialized
267                // threadcell protected part
268                self.data().cast_mut().add(self.header().len_relaxed()),
269                s.len(),
270            );
271            let start = self.len_acquire();
272            self.len_add_release(s.len());
273            Ok(&self.as_str()[start..start + s.len()])
274        } else {
275            Err(Error::Capacity)
276        }
277    }
278
279    // helper only used by the Guard
280    #[inline]
281    pub(crate) fn acquire_guard(self) -> bool {
282        self.header()
283            .guard_active
284            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
285            .is_ok()
286    }
287
288    // helper only used by the Guard
289    #[inline]
290    pub(crate) fn release_guard(self) {
291        self.header().guard_active.store(false, Ordering::Release);
292    }
293
294    // helper only used by Debug
295    #[inline]
296    #[mutants::skip]
297    pub(crate) fn has_guard(self) -> bool {
298        self.header().guard_active.load(Ordering::Relaxed)
299    }
300
301    /// Removes the last character and returns it.
302    #[inline]
303    pub(crate) unsafe fn pop(&mut self) -> Option<char> {
304        // Safety: caller must only call this on a mutable object.
305        debug_assert_eq!(self.strong_count(), 1);
306        let ch = self.as_str().chars().next_back()?;
307        self.len_sub_release(ch.len_utf8());
308        Some(ch)
309    }
310
311    /// Truncates the string to zero length. keeps capacity.
312    #[inline]
313    pub(crate) unsafe fn clear(&mut self) {
314        // Safety: caller must only call this on a mutable object.
315        debug_assert_eq!(self.strong_count(), 1);
316        self.len_set_release(0);
317    }
318
319    /// Truncates the string to the supplied length. keeps capacity.
320    #[inline]
321    pub(crate) unsafe fn truncate(&mut self, new_len: usize) {
322        // Safety: caller must only call this on a mutable object.
323        debug_assert_eq!(self.strong_count(), 1);
324        // Safety: caller checks already.
325        debug_assert!(self.as_str().is_char_boundary(new_len));
326        self.len_set_release(new_len);
327    }
328
329    /// remove a character from a string
330    pub(crate) unsafe fn remove(&mut self, idx: usize) -> char {
331        // Safety: caller must only call this on a mutable object.
332        debug_assert_eq!(self.strong_count(), 1);
333
334        let Some(ch) = self.as_str()[idx..].chars().next() else {
335            panic!("cannot remove a char from the end of a string")
336        };
337
338        let next = idx + ch.len_utf8();
339        let len = self.len_relaxed();
340
341        let data = self.data_mut();
342        std::ptr::copy(data.add(next), data.add(idx), len - next);
343        self.len_sub_release(next - idx);
344
345        ch
346    }
347
348    /// replaces a range of a rcstring with another str
349    pub(crate) unsafe fn replace_range<R>(&mut self, range: R, replace_with: &str)
350    where
351        R: std::slice::SliceIndex<str, Output = str>,
352    {
353        // Safety: caller must only call this on a mutable object.
354        debug_assert_eq!(self.strong_count(), 1);
355
356        // PLANNED: this impl is a hack, make it more rusty
357        let removed_slice = &self.as_str()[range];
358        let old_len = self.as_str().len();
359        let new_len = old_len + replace_with.len() - removed_slice.len();
360        debug_assert!(self.capacity() >= new_len);
361
362        let removed_start: usize = removed_slice
363            .as_ptr()
364            .offset_from(self.as_str().as_ptr())
365            .try_into()
366            .unwrap_unchecked();
367
368        let tail_start = removed_start + removed_slice.len();
369        let insert_end = removed_start + replace_with.len();
370
371        let tail_len = old_len - tail_start;
372
373        self.len_set_release(new_len);
374        let data = self.data_mut();
375
376        // shift the tail
377        std::ptr::copy(data.add(tail_start), data.add(insert_end), tail_len);
378        // copy replace_with
379        std::ptr::copy_nonoverlapping(
380            replace_with.as_ptr(),
381            data.add(removed_start),
382            replace_with.len(),
383        );
384    }
385}
386
387// conversions
388impl RcString {
389    /// Returns self as &str
390    #[inline]
391    pub(crate) fn as_str(&self) -> &str {
392        // Safety: RcString always holds a valid utf-8 string
393        unsafe {
394            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
395                self.data(),
396                self.len_acquire(),
397            ))
398        }
399    }
400
401    /// Returns self as &mut str
402    ///
403    /// # Safety
404    ///
405    /// must be called with refcount equal one.
406    #[inline]
407    pub(crate) unsafe fn as_mut_str(&mut self) -> &mut str {
408        // Safety: caller must only call this on a mutable object.
409        debug_assert_eq!(self.strong_count(), 1);
410        // Safety: RcString always holds a valid utf-8 string
411        std::str::from_utf8_unchecked_mut(std::slice::from_raw_parts_mut(
412            self.data_mut(),
413            self.len_acquire(),
414        ))
415    }
416}
417
418// header accessors
419//
420// # Safety
421//
422// A RcString always wraps a properly aligned and initialized block of memory. We need to view
423// it as a header followed by capacity*bytes, where the bytes above header.len are not yet
424// initialized. miri's stacked borrows wont let us make up objects within this block easily. However
425// casting this opportunistically whenever needed is sound and safe.
426//
427// NOTE: I would be glad if someone can point out a less hacky way to implement such kinds of DST's.
428impl RcString {
429    #[inline(always)]
430    fn header(&self) -> &RcStringHeader {
431        unsafe { std::mem::transmute::<_, &RcStringHeader>(self.0) }
432    }
433
434    #[inline(always)]
435    fn header_mut(&mut self) -> &mut RcStringHeader {
436        unsafe { std::mem::transmute::<_, &mut RcStringHeader>(self.0) }
437    }
438
439    #[inline(always)]
440    const fn data(&self) -> *const u8 {
441        unsafe { self.0.as_ptr().add(RCSTRING_HEADER_SIZE) }
442    }
443
444    #[inline(always)]
445    fn data_mut(&mut self) -> *mut u8 {
446        unsafe { self.0.as_ptr().add(RCSTRING_HEADER_SIZE) }
447    }
448
449    #[inline(always)]
450    pub(crate) fn spare_capacity(self) -> usize {
451        self.header().spare_capacity()
452    }
453
454    #[inline(always)]
455    pub(crate) fn capacity(self) -> usize {
456        self.header().capacity()
457    }
458
459    #[inline(always)]
460    pub(crate) fn strong_count(self) -> usize {
461        self.header().strong_count()
462    }
463
464    #[inline(always)]
465    pub(crate) fn increment_strong_count(self) {
466        self.header().increment_strong_count();
467    }
468
469    #[inline(always)]
470    #[must_use = "Would leak when 'true' is ignored"]
471    pub(crate) fn decrement_strong_count(self) -> bool {
472        self.header().decrement_strong_count()
473    }
474
475    /// Get len from atomic or Cell with relaxed semantic.
476    #[inline(always)]
477    #[mutants::skip]
478    pub(crate) fn len_relaxed(self) -> usize {
479        self.header().len_relaxed()
480    }
481
482    /// Get len from atomic or Cell with Acquire semantic.
483    #[inline(always)]
484    fn len_acquire(self) -> usize {
485        self.header().len_acquire()
486    }
487
488    /// Add 'n' to the length with Release semantic.
489    ///
490    /// # Safety
491    ///
492    /// The new len must comply with valid utf-8 encoding.
493    #[inline(always)]
494    unsafe fn len_add_release(self, n: usize) {
495        self.header().len_add_release(n);
496    }
497
498    /// Add 'n' to the length with Release semantic.
499    ///
500    /// # Safety
501    ///
502    /// The new len must comply with valid utf-8 encoding.
503    #[inline(always)]
504    unsafe fn len_sub_release(self, n: usize) {
505        self.header().len_sub_release(n);
506    }
507
508    /// Sets a new length with Release semantic.
509    ///
510    /// # Safety
511    ///
512    /// The new len must comply with valid utf-8 encoding.
513    #[inline(always)]
514    unsafe fn len_set_release(self, n: usize) {
515        self.header().len_set_release(n);
516    }
517}
518
519impl std::fmt::Debug for RcString {
520    #[mutants::skip]
521    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
522        f.debug_struct("RcString")
523            .field("header", self.header())
524            .field("str", &self.as_str())
525            .finish()
526    }
527}
528
529#[derive(Debug)]
530#[repr(C, align(32))]
531pub(crate) struct RcStringHeader {
532    refcount: AtomicUsize,
533    len: AtomicUsize,
534    capacity: usize,
535    guard_active: AtomicBool,
536}
537
538/// The size of the header of a `CowStr` allocated memory block. This is mostly a implementation
539/// detail but it is exported to give the user more control about memory
540/// allocations. `CowStr::with_capacity(65536 - cowstr::RCSTRING_HEADER_SIZE)` will allocate a `CowStr`
541/// that uses exactly 64kb memory.
542pub const RCSTRING_HEADER_SIZE: usize = size_of::<RcStringHeader>();
543
544/// The alignment/size granularity a `CowStr` allocated memory block. This is mostly a
545/// implementation detail. To avoid tiny allocations and memory fragmentation allocated
546/// lengths are a multiple of this alignment. Note that an allocation includes a header.
547pub const RCSTRING_ALIGN: usize = align_of::<RcStringHeader>();
548
549impl RcStringHeader {
550    /// Get len from atomic or Cell with relaxed semantic.
551    #[inline(always)]
552    fn len_relaxed(&self) -> usize {
553        self.len.load(Ordering::Relaxed)
554    }
555
556    /// Get len from atomic or Cell with Acquire semantic.
557    #[inline(always)]
558    fn len_acquire(&self) -> usize {
559        self.len.load(Ordering::Acquire)
560    }
561
562    /// Add 'n' to the length with Release semantic.
563    #[inline(always)]
564    unsafe fn len_add_release(&self, n: usize) {
565        self.len.fetch_add(n, Ordering::Release);
566    }
567
568    /// Sub 'n' from the length with Release semantic.
569    #[inline(always)]
570    unsafe fn len_sub_release(&self, n: usize) {
571        self.len.fetch_sub(n, Ordering::Release);
572    }
573
574    /// Sets a new length with Release semantic.
575    #[inline(always)]
576    unsafe fn len_set_release(&self, n: usize) {
577        self.len.store(n, Ordering::Release);
578    }
579
580    /// Returns the amount of bytes that can be pushed without reallocation.  This is an
581    /// acquire fenced load allowing any further access to len to be relaxed.
582    #[inline]
583    pub(crate) fn spare_capacity(&self) -> usize {
584        self.capacity - self.len_acquire()
585    }
586
587    /// Returns the capacity of the allocation.
588    #[inline]
589    pub(crate) const fn capacity(&self) -> usize {
590        self.capacity
591    }
592
593    #[inline]
594    pub(crate) fn strong_count(&self) -> usize {
595        self.refcount.load(Ordering::Relaxed)
596    }
597
598    // NOTE: incrementing the strong count can always be relaxed because for any existing
599    //       object it is:
600    //
601    //        * '1' then we own the object (mutably), no one else can decrement the strong count
602    //        * '>1' the object is immutable/shared it may be concurrently decremented, but
603    //          since we own it here it can't be dropped and we are going to increment it at
604    //          least to 2.
605    pub(crate) fn increment_strong_count(&self) {
606        self.refcount.fetch_add(1, Ordering::Relaxed);
607    }
608
609    #[must_use = "Would leak when 'true' is ignored"]
610    pub(crate) fn decrement_strong_count(&self) -> bool {
611        if self.refcount.fetch_sub(1, Ordering::Release) == 1 {
612            fence(Ordering::Acquire);
613            true
614        } else {
615            false
616        }
617    }
618}
619
620const KB: usize = 1024;
621const MB: usize = KB * 1024;
622const GB: usize = MB * 1024;
623
624/// The allocation strategy when resizing an allocation
625struct AllocationStrategy {
626    /// Minimum allocation size returned
627    pub min_allocation: usize,
628    /// first cut, below this the allocations will be doubled, above until grow_const they will be increased by 50%
629    pub grow_half: usize,
630    /// second cut, above this allocation will grow constantly by grow_const/2
631    pub grow_const: usize,
632}
633
634const DEFAULT_ALLOCATION_STRATEGY: AllocationStrategy = AllocationStrategy {
635    min_allocation: 32,
636    grow_half: 8 * MB,
637    grow_const: /* 1 */ GB,
638};
639
640impl AllocationStrategy {
641    /// returns the new size that shall be allocated, either per allocation strategy or when
642    /// that is not sufficient, the `minimum_needed` size rounded up to a multiple of `min_allocation`.
643    #[inline]
644    fn grow(&self, old_size: usize, minimum_needed: usize) -> Option<usize> {
645        Some(
646            self.align(
647                if old_size < self.min_allocation {
648                    self.min_allocation
649                } else if old_size < self.grow_half {
650                    old_size.checked_mul(2)?
651                } else if old_size < self.grow_const {
652                    old_size.checked_add(old_size / 2)?
653                } else {
654                    old_size.checked_add(self.grow_const / 2)?
655                }
656                .max(minimum_needed),
657            ),
658        )
659    }
660
661    /// rounds/aligns the size by the `min_allocation` bytes
662    #[cfg(not(feature = "nightly_int_roundings"))]
663    #[inline]
664    #[mutants::skip]
665    const fn align(&self, size: usize) -> usize {
666        size | (self.min_allocation - 1)
667    }
668
669    /// rounds/aligns the size by the `min_allocation` bytes
670    #[cfg(feature = "nightly_int_roundings")]
671    #[inline]
672    #[mutants::skip]
673    fn align(&self, size: usize) -> usize {
674        size.next_multiple_of(self.min_allocation)
675    }
676}