rkyv/string/
repr.rs

1//! An archived string representation that supports inlining short strings.
2
3use core::{
4    marker::PhantomPinned,
5    mem,
6    ptr::{self, copy_nonoverlapping, write_bytes},
7    slice, str,
8};
9
10use munge::munge;
11use rancor::{Panic, ResultExt as _, Source};
12
13use crate::{
14    primitive::{ArchivedIsize, ArchivedUsize, FixedIsize, FixedUsize},
15    seal::Seal,
16    Place, Portable,
17};
18
19#[derive(Clone, Copy, Portable)]
20#[rkyv(crate)]
21#[repr(C)]
22struct OutOfLineRepr {
23    len: ArchivedUsize,
24    offset: ArchivedIsize,
25    _phantom: PhantomPinned,
26}
27
28/// The maximum number of bytes that can be inlined.
29pub const INLINE_CAPACITY: usize = mem::size_of::<OutOfLineRepr>();
30/// The maximum number of bytes that can be out-of-line.
31pub const OUT_OF_LINE_CAPACITY: usize = (1 << (FixedUsize::BITS - 2)) - 1;
32
33#[derive(Clone, Copy, Portable)]
34#[rkyv(crate)]
35#[repr(C)]
36struct InlineRepr {
37    bytes: [u8; INLINE_CAPACITY],
38}
39
40/// An archived string representation that can inline short strings.
41#[derive(Portable)]
42#[rkyv(crate)]
43#[repr(C)]
44pub union ArchivedStringRepr {
45    out_of_line: OutOfLineRepr,
46    inline: InlineRepr,
47}
48
49impl ArchivedStringRepr {
50    /// Returns whether the representation is inline.
51    #[inline]
52    pub fn is_inline(&self) -> bool {
53        unsafe { self.inline.bytes[0] & 0xc0 != 0x80 }
54    }
55
56    /// Returns the offset of the representation.
57    ///
58    /// # Safety
59    ///
60    /// The internal representation must be out-of-line.
61    #[inline]
62    pub unsafe fn out_of_line_offset(&self) -> isize {
63        // SAFETY: The caller has guaranteed that the internal representation is
64        // out-of-line
65        unsafe { self.out_of_line.offset.to_native() as isize }
66    }
67
68    /// Returns a pointer to the bytes of the string.
69    #[inline]
70    pub fn as_ptr(&self) -> *const u8 {
71        if self.is_inline() {
72            unsafe { self.inline.bytes.as_ptr() }
73        } else {
74            unsafe {
75                (self as *const Self)
76                    .cast::<u8>()
77                    .offset(self.out_of_line_offset())
78            }
79        }
80    }
81
82    /// Returns a mutable pointer to the bytes of the string.
83    #[inline]
84    pub fn as_mut_ptr(this: Seal<'_, Self>) -> *mut u8 {
85        let this = unsafe { this.unseal_unchecked() };
86        if this.is_inline() {
87            unsafe { this.inline.bytes.as_mut_ptr() }
88        } else {
89            unsafe {
90                (this as *mut Self)
91                    .cast::<u8>()
92                    .offset(this.out_of_line_offset())
93            }
94        }
95    }
96
97    /// Returns the length of the string.
98    #[inline]
99    pub fn len(&self) -> usize {
100        if self.is_inline() {
101            unsafe {
102                self.inline
103                    .bytes
104                    .iter()
105                    .position(|b| *b == 0xff)
106                    .unwrap_or(INLINE_CAPACITY)
107            }
108        } else {
109            let len = unsafe { self.out_of_line.len.to_native() };
110            // Little-endian: remove the 7th and 8th bits
111            #[cfg(not(feature = "big_endian"))]
112            let len = (len & 0b0011_1111) | ((len & !0xff) >> 2);
113            // Big-endian: remove the top two bits
114            #[cfg(feature = "big_endian")]
115            let len = len & (FixedUsize::MAX >> 2);
116            len as usize
117        }
118    }
119
120    /// Returns whether the string is empty.
121    #[inline]
122    pub fn is_empty(&self) -> bool {
123        self.len() == 0
124    }
125
126    /// Returns a pointer to the string as a `str`.
127    #[inline]
128    pub fn as_str_ptr(&self) -> *const str {
129        ptr_meta::from_raw_parts(self.as_ptr().cast(), self.len())
130    }
131
132    /// Returns a slice of the bytes of the string.
133    #[inline]
134    pub fn as_bytes(&self) -> &[u8] {
135        unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
136    }
137
138    /// Returns a mutable slice of the bytes of the string.
139    #[inline]
140    pub fn as_bytes_seal(this: Seal<'_, Self>) -> Seal<'_, [u8]> {
141        let len = this.len();
142        let slice =
143            unsafe { slice::from_raw_parts_mut(Self::as_mut_ptr(this), len) };
144        Seal::new(slice)
145    }
146
147    /// Returns a reference to the string as a `str`.
148    #[inline]
149    pub fn as_str(&self) -> &str {
150        unsafe { str::from_utf8_unchecked(self.as_bytes()) }
151    }
152
153    /// Returns a mutable reference to the string as a `str`.
154    #[inline]
155    pub fn as_str_seal(this: Seal<'_, Self>) -> Seal<'_, str> {
156        let bytes =
157            unsafe { Seal::unseal_unchecked(Self::as_bytes_seal(this)) };
158        Seal::new(unsafe { str::from_utf8_unchecked_mut(bytes) })
159    }
160
161    /// Emplaces a new inline representation for the given `str`.
162    ///
163    /// This function is guaranteed not to write any uninitialized bytes to
164    /// `out`.
165    ///
166    /// # Safety
167    ///
168    /// - The length of `value` must be less than or equal to
169    ///   [`INLINE_CAPACITY`].
170    /// - `out` must point to a valid location to write the inline
171    ///   representation.
172    #[inline]
173    pub unsafe fn emplace_inline(value: &str, out: *mut Self) {
174        debug_assert!(value.len() <= INLINE_CAPACITY);
175
176        // SAFETY: The caller has guaranteed that `out` points to a
177        // dereferenceable location.
178        let out_bytes = unsafe { ptr::addr_of_mut!((*out).inline.bytes) };
179
180        // SAFETY: The caller has guaranteed that the length of `value` is less
181        // than or equal to `INLINE_CAPACITY`. We know that `out_bytes` is a
182        // valid pointer to bytes because it is a subfield of `out` which the
183        // caller has guaranteed points to a valid location.
184        unsafe {
185            write_bytes(out_bytes, 0xff, 1);
186            copy_nonoverlapping(
187                value.as_bytes().as_ptr(),
188                out_bytes.cast(),
189                value.len(),
190            );
191        }
192    }
193
194    /// Emplaces a new out-of-line representation for the given `str`.
195    ///
196    /// # Safety
197    ///
198    /// The length of `str` must be greater than [`INLINE_CAPACITY`] and less
199    /// than or equal to [`OUT_OF_LINE_CAPACITY`].
200    pub unsafe fn try_emplace_out_of_line<E: Source>(
201        value: &str,
202        target: usize,
203        out: Place<Self>,
204    ) -> Result<(), E> {
205        let (len, offset) = unsafe {
206            munge! {
207                let ArchivedStringRepr {
208                    out_of_line: OutOfLineRepr { len, offset, _phantom: _ }
209                } = out;
210            }
211            (len, offset)
212        };
213
214        let l = value.len() as FixedUsize;
215        // Little-endian: insert 10 as the 7th and 8th bits
216        #[cfg(not(feature = "big_endian"))]
217        let l = (l & 0b0011_1111) | 0b1000_0000 | ((l & !0b0011_1111) << 2);
218        // Big-endian: set the top two bits to 10
219        #[cfg(feature = "big_endian")]
220        let l = l & (FixedUsize::MAX >> 2) | (1 << FixedUsize::BITS - 1);
221        len.write(ArchivedUsize::from_native(l));
222
223        let off = crate::rel_ptr::signed_offset(out.pos(), target)?;
224        offset.write(ArchivedIsize::from_native(off as FixedIsize));
225
226        Ok(())
227    }
228
229    /// Emplaces a new out-of-line representation for the given `str`.
230    ///
231    /// # Panics
232    ///
233    /// - The offset calculated for the repr does not fit in an `isize`
234    /// - The offset calculated for the repr exceeds the offset storage
235    ///
236    /// # Safety
237    ///
238    /// The length of `str` must be greater than [`INLINE_CAPACITY`] and less
239    /// than or equal to [`OUT_OF_LINE_CAPACITY`].
240    #[inline]
241    pub unsafe fn emplace_out_of_line(
242        value: &str,
243        target: usize,
244        out: Place<Self>,
245    ) {
246        // SAFETY: The safety conditions for `emplace_out_of_line()` are the
247        // same as the safety conditions for `try_emplace_out_of_line()`.
248        unsafe {
249            Self::try_emplace_out_of_line::<Panic>(value, target, out)
250                .always_ok()
251        }
252    }
253}
254
255#[cfg(feature = "bytecheck")]
256const _: () = {
257    use core::{error::Error, fmt};
258
259    use bytecheck::{rancor::Fallible, CheckBytes};
260    use rancor::fail;
261
262    /// An error resulting from an invalid string representation.
263    ///
264    /// Strings that are inline must have a length of at most
265    /// [`INLINE_CAPACITY`].
266    #[derive(Debug)]
267    pub struct CheckStringReprError;
268
269    impl fmt::Display for CheckStringReprError {
270        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271            write!(
272                f,
273                "String representation was out-of-line but the length was too \
274                 short",
275            )
276        }
277    }
278
279    impl Error for CheckStringReprError {}
280
281    unsafe impl<C> CheckBytes<C> for ArchivedStringRepr
282    where
283        C: Fallible + ?Sized,
284        C::Error: Source,
285    {
286        unsafe fn check_bytes(
287            value: *const Self,
288            _: &mut C,
289        ) -> Result<(), C::Error> {
290            // SAFETY: The fields of `ArchivedStringRepr` are always valid for
291            // every bit pattern.
292            let repr = unsafe { &*value };
293
294            if !repr.is_inline() && repr.len() <= INLINE_CAPACITY {
295                fail!(CheckStringReprError);
296            } else {
297                Ok(())
298            }
299        }
300    }
301};