istring/
small.rs

1use core::{fmt, slice, str, convert, mem, cmp};
2use core::clone::Clone;
3use core::ops::{self, Index};
4use core::borrow::Borrow;
5use alloc::{string::String, vec::Vec};
6use alloc::boxed::Box;
7use crate::FromUtf8Error;
8
9#[cfg(feature="ts")]
10use alloc::{borrow::ToOwned, string::ToString, format};
11
12const IS_INLINE: u8 = 1 << 7;
13const LEN_MASK: u8 = !IS_INLINE;
14
15#[cfg(target_pointer_width="64")]
16const INLINE_CAPACITY: usize = 15;
17#[cfg(target_pointer_width="32")]
18const INLINE_CAPACITY: usize = 7;
19
20#[allow(unused)]
21#[cfg(target_pointer_width="64")]
22const MAX_CAPACITY: usize = (1 << 63) - 1;
23#[cfg(target_pointer_width="32")]
24const MAX_CAPACITY: usize = (1 << 31) - 1;
25
26// use the MSG of heap.len to encode the variant
27// which is also MSB of inline.len
28#[cfg(target_endian = "little")]
29#[derive(Copy, Clone)]
30#[repr(C)]
31pub struct Inline {
32    pub data:   [u8; INLINE_CAPACITY],
33    pub len:    u8
34}
35#[cfg(target_endian = "little")]
36#[derive(Copy, Clone)]
37#[repr(C)]
38pub struct Heap {
39    pub ptr:    *mut u8,
40    pub len:    usize
41}
42
43#[cfg(target_endian = "big")]
44#[derive(Copy, Clone)]
45#[repr(C)]
46pub struct Inline {
47    pub len:    u8,
48    pub data:   [u8; INLINE_CAPACITY],
49}
50
51#[cfg(target_endian = "big")]
52#[derive(Copy, Clone)]
53#[repr(C)]
54pub struct Heap {
55    pub len:    usize,
56    pub ptr:    *mut u8,
57}
58
59union SmallBytesUnion {
60    inline: Inline,
61    heap:   Heap
62}
63
64#[cfg_attr(feature="ts", derive(ts_rs::TS), ts(type="string"))]
65pub struct SmallBytes {
66    union: SmallBytesUnion,
67}
68unsafe impl Send for SmallBytes {}
69unsafe impl Sync for SmallBytes {}
70
71#[derive(Clone)]
72#[cfg_attr(feature="size", derive(datasize::DataSize))]
73#[cfg_attr(feature="ts", derive(ts_rs::TS), ts(as="String"))]
74
75pub struct SmallString {
76    bytes: SmallBytes,
77}
78
79#[cfg(feature="rkyv")]
80mod rkyv_impl {
81    use rkyv::{
82        string::ArchivedString,
83        Archive, Deserialize, DeserializeUnsized, Serialize, SerializeUnsized, Place
84    };
85    use rancor::{Fallible, Source};
86    use super::SmallString;
87
88    impl Archive for SmallString {
89        type Archived = rkyv::string::ArchivedString;
90        type Resolver = rkyv::string::StringResolver;
91
92        #[inline]
93        fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
94            rkyv::string::ArchivedString::resolve_from_str(self.as_str(), resolver, out);
95        }
96    }
97
98    #[cfg(feature="rkyv")]
99    impl<S: Fallible + ?Sized> Serialize<S> for SmallString
100    where
101        str: SerializeUnsized<S>,
102        S::Error: Source
103    {
104        #[inline]
105        fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
106            ArchivedString::serialize_from_str(self.as_str(), serializer)
107        }
108    }
109    impl<D: Fallible + ?Sized> Deserialize<SmallString, D> for ArchivedString
110    where
111        str: DeserializeUnsized<str, D>,
112    {
113        #[inline]
114        fn deserialize(&self, _: &mut D) -> Result<SmallString, D::Error> {
115            Ok(self.as_str().into())
116        }
117    }
118    impl PartialEq<SmallString> for ArchivedString {
119        #[inline]
120        fn eq(&self, other: &SmallString) -> bool {
121            PartialEq::eq(self.as_str(), other.as_str())
122        }
123    }
124    
125    impl PartialEq<ArchivedString> for SmallString {
126        #[inline]
127        fn eq(&self, other: &ArchivedString) -> bool {
128            PartialEq::eq(other.as_str(), self.as_str())
129        }
130    }
131}
132
133#[test]
134fn test_layout() {
135    let s = SmallBytesUnion { inline: Inline { data: [0; INLINE_CAPACITY], len: IS_INLINE } };
136    let heap = unsafe { s.heap };
137    assert_eq!(heap.len, MAX_CAPACITY + 1);
138}
139
140#[inline(always)]
141fn box_slice(s: &[u8]) -> Box<[u8]> {
142    Box::from(s)
143}
144#[inline(always)]
145fn box_slice_into_raw_parts(mut s: Box<[u8]>) -> (*mut u8, usize) {
146    let len = s.len();
147    let ptr = s.as_mut_ptr();
148    mem::forget(s);
149    (ptr, len)
150}
151#[inline(always)]
152unsafe fn box_slice_from_raw_parts(ptr: *mut u8, len: usize) -> Box<[u8]> {
153    let ptr = slice::from_raw_parts_mut(ptr, len) as *mut [u8];
154    Box::from_raw(ptr)
155}
156
157impl SmallBytes {
158    #[inline(always)]
159    pub fn new() -> SmallBytes {
160        unsafe {
161            SmallBytes::from_inline(
162                Inline { data: [0; INLINE_CAPACITY], len: 0 },
163            )
164        }
165    }
166}
167impl<'a> From<&'a [u8]> for SmallBytes {
168    #[inline]
169    fn from(s: &[u8]) -> SmallBytes {
170        let len = s.len();
171        unsafe {
172            if len > INLINE_CAPACITY {
173                let s = box_slice(s);
174                let (ptr, len) = box_slice_into_raw_parts(s);
175                SmallBytes::from_heap(
176                    Heap {
177                        ptr,
178                        len
179                    },
180                )
181            } else {
182                let mut data = [0; INLINE_CAPACITY];
183                data[.. len].copy_from_slice(s);
184                SmallBytes::from_inline(
185                    Inline { data, len: len as u8 },
186                )
187            }
188        }
189    }
190}
191
192impl SmallString {
193    #[inline(always)]
194    pub fn new() -> SmallString {
195        SmallString {
196            bytes: SmallBytes::new()
197        }
198    }
199    pub fn from_utf8(bytes: SmallBytes) -> Result<SmallString, FromUtf8Error<SmallBytes>> {
200        match str::from_utf8(bytes.as_slice()) {
201            Ok(_) => Ok(SmallString { bytes }),
202            Err(error) => Err(FromUtf8Error {
203                bytes,
204                error
205            })
206        }
207    }
208}
209impl Drop for SmallBytes {
210    #[inline]
211    fn drop(&mut self) {
212        if !self.is_inline() {
213            unsafe {
214                box_slice_from_raw_parts(self.union.heap.ptr, self.union.heap.len);
215            }
216        }
217    }
218}
219impl<'a> convert::From<&'a str> for SmallString {
220    #[inline]
221    fn from(s: &'a str) -> SmallString {
222        SmallString {
223            bytes: SmallBytes::from(s.as_bytes())
224        }
225    }
226}
227impl convert::From<Vec<u8>> for SmallBytes {
228    #[inline]
229    fn from(s: Vec<u8>) -> SmallBytes {
230        let len = s.len();
231        if len <= INLINE_CAPACITY {
232            return SmallBytes::from(s.as_slice());
233        }
234
235        unsafe {
236            let s = s.into_boxed_slice();
237            let (ptr, len) = box_slice_into_raw_parts(s);
238            let heap = Heap {
239                ptr,
240                len,
241            };
242
243            SmallBytes::from_heap(
244                heap,
245            )
246        }
247    }
248}
249impl convert::From<String> for SmallString {
250    #[inline]
251    fn from(s: String) -> SmallString {
252        SmallString {
253            bytes: SmallBytes::from(s.into_bytes())
254        }
255    }
256}
257impl Into<Vec<u8>> for SmallBytes {
258    #[inline]
259    fn into(self) -> Vec<u8> {
260        let len = self.len();
261        if self.is_inline() {
262            self.as_slice().into()
263        } else {
264            unsafe {
265                let s = box_slice_from_raw_parts(self.union.heap.ptr, len);
266                // the SmallString must not drop
267                mem::forget(self);
268
269                Vec::from(s)
270            }
271        }
272    }
273}
274impl Into<String> for SmallString {
275    #[inline]
276    fn into(self) -> String {
277        unsafe {
278            String::from_utf8_unchecked(self.bytes.into())
279        }
280    }
281}
282impl Clone for SmallBytes {
283    #[inline]
284    fn clone(&self) -> SmallBytes {
285        unsafe {
286            if self.is_inline() {
287                // simple case
288                SmallBytes {
289                    union: SmallBytesUnion { inline: self.union.inline },
290                }
291            } else {
292                let len = self.len();
293                let bytes = slice::from_raw_parts(self.union.heap.ptr, len);
294                let (ptr, len) = box_slice_into_raw_parts(box_slice(bytes));
295                SmallBytes::from_heap(
296                    Heap {
297                        ptr,
298                        len
299                    },
300                )
301            }
302        }
303    }
304}
305impl FromIterator<char> for SmallString {
306    fn from_iter<T: IntoIterator<Item=char>>(iter: T) -> Self {
307        let mut buf = [0; INLINE_CAPACITY];
308        let mut pos = 0;
309        let mut iter = iter.into_iter();
310        while let Some(c) = iter.next() {
311            if pos + c.len_utf8() > INLINE_CAPACITY {
312                let mut s = String::with_capacity(32);
313                s.push_str(unsafe { str::from_utf8_unchecked(&buf[..pos]) });
314                s.push(c);
315                s.extend(iter);
316                return s.into();
317            }
318            pos += c.encode_utf8(&mut buf[pos..]).len();
319        }
320        let bytes = unsafe { SmallBytes::from_inline(
321            Inline { data: buf, len: pos as u8 },
322        ) };
323        SmallString { bytes }
324    }
325}
326impl From<char> for SmallString {
327    fn from(c: char) -> SmallString {
328        let mut buf = [0; INLINE_CAPACITY];
329        let len = c.encode_utf8(&mut buf).len();
330        let bytes = unsafe { SmallBytes::from_inline(
331            Inline { data: buf, len: len as u8 },
332        ) };
333        SmallString { bytes }
334    }
335}
336
337
338#[cfg(feature="size")]
339impl datasize::DataSize for SmallBytes {
340    const IS_DYNAMIC: bool = true;
341    const STATIC_HEAP_SIZE: usize = core::mem::size_of::<Self>();
342
343    fn estimate_heap_size(&self) -> usize {
344        if self.is_inline() {
345            Self::STATIC_HEAP_SIZE
346        } else {
347            Self::STATIC_HEAP_SIZE + self.len()
348        }
349    }
350}
351
352define_common_string!(SmallString, SmallStringUnion);
353define_common_bytes!(SmallBytes, SmallBytesUnion);