stringlet/
methods.rs

1//! Many implementations to make Stringlet easy to use.
2
3use crate::*;
4
5/**
6Magic sauce for a UTF-8 hack: a byte containing two high bits is not a valid last byte.
7Use this a a marker to distinguish whether we use the full length. Otherwise the lower bits contain
8the length of the unused tail. At full length there is no tagged last byte, so we only need to encode
964 lengths. Which is where this crate’s length limit comes from.
10
11To enable simple eq-test, always put the same value on all unused bytes! Counting from the end, i.e.
12the length of the unused tail, makes the branchless implementation of `len()` more efficient.
13
14If you change the semantics, `option_env!("STRINGLET_RAW_DEBUG")` is your friend.
15*/
16pub(crate) const TAG: u8 = 0b11_000000;
17
18impl<const SIZE: usize, const FIXED: bool, const LEN: usize, const ALIGN: u8>
19    StringletBase<SIZE, FIXED, LEN, ALIGN>
20where
21    Self: Config<SIZE, FIXED, LEN, ALIGN>,
22{
23    pub const fn from_str(str: &str) -> Result<Self, ()> {
24        if Self::fits(str.len()) {
25            // SAFETY we checked the length
26            Ok(unsafe { Self::from_str_unchecked(str) })
27        } else {
28            Err(())
29        }
30    }
31
32    /// # Safety
33    /// It is the callers responsibility to ensure that the size fits
34    pub const unsafe fn from_str_unchecked(str: &str) -> Self {
35        // SAFETY len() is up to the caller
36        unsafe { Self::from_utf8_unchecked(str.as_bytes()) }
37    }
38
39    #[doc(hidden)]
40    #[inline]
41    pub const fn _from_macro(str: &str) -> Self {
42        if Self::fits(str.len()) {
43            // SAFETY checked the length and got UTF-8
44            unsafe { Self::from_str_unchecked(str) }
45        } else if FIXED {
46            panic!("stringlet!(...): parameter too short or too long.")
47        } else {
48            panic!("stringlet!(var|slim ...): parameter too long.")
49        }
50    }
51
52    pub fn from_utf8_bytes(str: [u8; SIZE]) -> Result<Self, std::str::Utf8Error> {
53        str::from_utf8(&str)?;
54        // SAFETY always short enough and just checked for UTF-8 error
55        Ok(unsafe { Self::from_utf8_bytes_unchecked(str) })
56    }
57
58    /// # Safety
59    /// It is the callers responsibility to ensure that the content is UTF-8.
60    pub const unsafe fn from_utf8_bytes_unchecked(str: [u8; SIZE]) -> Self {
61        Self {
62            _align: [],
63            str,
64            len: [if LEN > 0 { str.len() as _ } else { 0 }; LEN],
65        }
66    }
67
68    pub fn from_utf8(bytes: &[u8]) -> Result<Self, std::str::Utf8Error> {
69        // todo return an Error, e.g. std::array::TryFromSliceError
70        assert!(
71            Self::fits(bytes.len()),
72            "{}::from_utf8(): cannot store {} characters",
73            std::any::type_name::<Self>(),
74            bytes.len()
75        );
76        str::from_utf8(bytes)?;
77        // SAFETY we checked the length and utf8ness
78        Ok(unsafe { Self::from_utf8_unchecked(bytes) })
79    }
80
81    /// # Safety
82    /// It is the callers responsibility to ensure that the size fits and the content is UTF-8.
83    pub const unsafe fn from_utf8_unchecked(bytes: &[u8]) -> Self {
84        let bytes_len = bytes.len();
85
86        let mut str_uninit = core::mem::MaybeUninit::uninit();
87        let str = str_uninit.as_mut_ptr() as *mut u8;
88
89        Self {
90            _align: [],
91            // SAFETY we only write to uninit via pointer methods before Rust sees the value
92            str: unsafe {
93                core::ptr::copy_nonoverlapping(bytes.as_ptr(), str, bytes_len);
94                if !FIXED {
95                    let tail = if LEN == 1 {
96                        TAG
97                    } else {
98                        TAG | (SIZE - bytes_len) as u8
99                    };
100                    str.add(bytes_len).write_bytes(tail, SIZE - bytes_len);
101                }
102                str_uninit.assume_init()
103            },
104            len: [bytes_len as _; _],
105        }
106    }
107
108    /* Once we add appending
109    pub const fn capacity(&self) -> usize {
110        SIZE
111    } */
112
113    #[inline(always)]
114    pub const fn len(&self) -> usize {
115        if FIXED || SIZE == 0 {
116            return SIZE;
117        } else if LEN == 1 {
118            // For VarStringlet look no further
119            return self.len[0] as _;
120        }
121
122        // Only SlimStringlet after here
123        let last = self.last();
124        if SIZE == 1 {
125            // iff single byte is untagged we have 1
126            (last < TAG) as _
127        } else if SIZE == 64 {
128            // 64 is special as we only store 6 bits, where 0b00_0000 means SIZE-0b100_0000
129            SIZE - (last == TAG) as usize * SIZE - (last > TAG) as usize * (last ^ TAG) as usize
130        } else {
131            // branchless: if last is UTF-8, SIZE, else extract tail len from low bits of last
132            SIZE - (last > TAG) as usize * (last ^ TAG) as usize
133        }
134    }
135
136    #[inline(always)]
137    pub const fn is_empty(&self) -> bool {
138        if SIZE == 0 {
139            // trivially
140            true
141        } else if FIXED {
142            // and already checked SIZE > 0
143            false
144        } else if LEN == 1 {
145            // For VarStringlet look no further
146            self.len[0] == 0
147        } else if SIZE == 64 {
148            // Special case as we have 65 lengths but only 6 bits.
149            self.last() == TAG
150        } else {
151            self.last() == TAG | SIZE as u8
152        }
153    }
154
155    #[inline(always)]
156    pub const fn as_bytes(&self) -> &[u8] {
157        if FIXED {
158            &self.str
159        } else {
160            // No [..self.len()] in const yet, asm differs in debug but same as slice in release
161            self.str.split_at(self.len()).0
162        }
163    }
164
165    #[inline(always)]
166    pub const fn as_str(&self) -> &str {
167        // SAFETY: str up to len() is guaranteed to to be initialized with valid UTF-8
168        unsafe { str::from_utf8_unchecked(self.as_bytes()) }
169    }
170
171    /// Name without StringletBase details in 3 parts, on which you must call .join("")
172    pub(crate) const fn type_name() -> [&'static str; 3] {
173        let name = ["SlimStringlet", "VarStringlet"][LEN];
174        let log2 = ALIGN.trailing_zeros() as usize;
175        let align = ["", "2", "4", "8", "16", "32", "64"][log2];
176        let size = [
177            "<0>", "<1>", "<2>", "<3>", "<4>", "<5>", "<6>", "<7>", "<8>", "<9>", "<10>", "<11>",
178            "<12>", "<13>", "<14>", "<15>", "", "<17>", "<18>", "<19>", "<20>", "<21>", "<22>",
179            "<23>", "<24>", "<25>", "<26>", "<27>", "<28>", "<29>", "<30>", "<31>", "<32>", "<33>",
180            "<34>", "<35>", "<36>", "<37>", "<38>", "<39>", "<40>", "<41>", "<42>", "<43>", "<44>",
181            "<45>", "<46>", "<47>", "<48>", "<49>", "<50>", "<51>", "<52>", "<53>", "<54>", "<55>",
182            "<56>", "<57>", "<58>", "<59>", "<60>", "<61>", "<62>", "<63>", "<64>",
183        ][SIZE];
184
185        if FIXED {
186            // Skip "Slim". No [4..] in const yet
187            [name.split_at(4).1, align, size]
188        } else {
189            [name, align, size]
190        }
191    }
192
193    #[inline(always)]
194    pub(crate) const fn fits(len: usize) -> bool {
195        if FIXED { len == SIZE } else { len <= SIZE }
196    }
197
198    #[inline(always)]
199    pub(crate) const fn last(&self) -> u8 {
200        debug_assert!(SIZE != 0, "unchecked call");
201        self.str[SIZE - 1]
202    }
203}
204
205#[cfg(doctest)]
206mod doctests {
207    /**
208    ```compile_fail
209    let _x: stringlet::VarStringlet<256>;
210    ```
211    */
212    fn test_var_stringlet_256_compile_fail() {}
213
214    /**
215    ```compile_fail
216    let _x: stringlet::SlimStringlet<65>;
217    ```
218    */
219    fn test_slim_stringlet_65_compile_fail() {}
220
221    /**
222    ```compile_fail
223    # use stringlet::{align, StringletBase};
224    let _x: StringletBase::<align::Align1, 0, true, 1>;
225    ```
226    */
227    fn test_fixed_1_compile_fail() {}
228}
229
230#[cfg(test)]
231mod tests {
232    use core::convert::Into;
233
234    use super::*;
235
236    #[test]
237    fn test_big() {
238        let _f: Stringlet<1024>;
239        let _v: VarStringlet<255>;
240        let _s: SlimStringlet<64>;
241    }
242
243    #[test]
244    fn test_as_str() {
245        let f: Stringlet<7> = "A123456".into();
246        assert_eq!(f.as_str(), "A123456");
247        let v: VarStringlet = "A123456".into();
248        assert_eq!(v.as_str(), "A123456");
249        let s: SlimStringlet = "A123456".into();
250        assert_eq!(s.as_str(), "A123456");
251    }
252
253    #[test]
254    fn test_const() {
255        const ABCD: Stringlet<4> =
256            unsafe { Stringlet::<4>::from_utf8_bytes_unchecked([b'A', b'b', b'c', b'd']) };
257        assert_eq!(&ABCD, "Abcd");
258        const A123456: Stringlet<7> = stringlet!("A123456");
259        assert_eq!(&A123456, "A123456");
260    }
261
262    fn test_all_lengths<const SIZE: usize>()
263    where
264        Stringlet<SIZE>: Config<SIZE>,
265        VarStringlet<SIZE>: Config<SIZE, false, 1>,
266        SlimStringlet<SIZE>: Config<SIZE, false>,
267    {
268        const STR64: &str = "0123456789_123456789_123456789_123456789_123456789_123456789_123";
269        for len in 0..=SIZE {
270            let str: VarStringlet<SIZE> = (&STR64[..len]).into();
271            assert_eq!(str.is_empty(), len == 0);
272            assert_eq!(str.len(), len);
273            let str: SlimStringlet<SIZE> = (&STR64[..len]).into();
274            assert_eq!(str.is_empty(), len == 0);
275            assert_eq!(str.len(), len);
276        }
277        let fixed: Stringlet<SIZE> = (&STR64[..SIZE]).into();
278        assert_eq!(fixed.is_empty(), SIZE == 0);
279        assert_eq!(fixed.len(), SIZE);
280    }
281    #[test]
282    fn test_len() {
283        macro_rules! test_all_lengths {
284            ($($size:literal),+) => {
285                $(test_all_lengths::<$size>();)+
286            };
287        }
288        test_all_lengths![
289            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
290            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
291            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
292        ];
293    }
294
295    #[test]
296    fn test_empty() {
297        assert!(stringlet!("").is_empty());
298        assert!(!stringlet!("a").is_empty());
299        assert!(!stringlet!("ab").is_empty());
300
301        assert!(stringlet!(var: "").is_empty());
302        assert!(stringlet!(var 1: "").is_empty());
303        assert!(stringlet!(var 2: "").is_empty());
304        assert!(!stringlet!(var: "a").is_empty());
305        assert!(!stringlet!(var: "ab").is_empty());
306
307        assert!(stringlet!(slim: "").is_empty());
308        assert!(stringlet!(slim 1: "").is_empty());
309        assert!(stringlet!(slim 2: "").is_empty());
310        assert!(!stringlet!(slim: "a").is_empty());
311        assert!(!stringlet!(slim: "ab").is_empty());
312    }
313
314    #[test]
315    fn test_all_type_names() {
316        macro_rules! test_it {
317            (1 $ty:ty) => {
318                assert_eq!(stringify!($ty).replace(' ', ""), <$ty>::type_name().join(""));
319            };
320            ([$($size:literal),+] $ty:tt) => {
321                $(
322                    test_it!(1 $ty<$size>);
323                )+
324            };
325            ($ty:tt) => {
326                test_it!(1 $ty); // special case default 16
327                test_it!([
328                    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23,
329                    24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
330                    44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
331                ] $ty);
332            };
333        }
334        test_it!(Stringlet);
335        test_it!(VarStringlet);
336        test_it!(SlimStringlet);
337        test_it!(Stringlet2);
338        test_it!(VarStringlet4);
339        test_it!(SlimStringlet8);
340        test_it!(Stringlet16);
341        test_it!(VarStringlet32);
342        test_it!(SlimStringlet64);
343    }
344}