zerocopy_str/
lib.rs

1//! This library is to be used to represent urls, extensions, and filepaths compactly.
2//! These use cases call for strings which are typically under 256 bytes.
3//! As such, we use a smaller length to avoid needing to ser/de a 4 or 8 byte length type.
4
5use shadow_nft_common::array_from_fn;
6
7#[derive(Debug, PartialEq, Clone, Copy)]
8pub struct ZeroCopyStr<'a> {
9    inner: &'a str,
10}
11
12type LenType = u8;
13const LEN_TYPE_SIZE: usize = ::core::mem::size_of::<LenType>();
14
15impl<'a> ZeroCopyStr<'a> {
16    /// Writes the contents of a `&str` within the buffer provided.
17    /// Requires space for the `str` byte length + LEN_TYPE_SIZE length.
18    ///
19    /// We also return a new reference pointing just after the end of the `str` written.
20    ///
21    /// Panics if buffer is not large enough.
22    pub fn write_to(str: &str, bytes: &'a mut [u8]) -> (ZeroCopyStr<'a>, &'a mut [u8]) {
23        // Calculate required buffer size
24        let str_byte_len = str.as_bytes().len();
25        let required_size = str.as_bytes().len() + LEN_TYPE_SIZE;
26        if str_byte_len > LenType::MAX as usize {
27            panic!("the string provided is too large");
28        }
29        if bytes.len() < required_size {
30            panic!("buffer provided for initialization is not large enough");
31        }
32
33        // Write str length
34        bytes[..LEN_TYPE_SIZE].copy_from_slice(&(str.as_bytes().len() as LenType).to_le_bytes());
35
36        // Write str to buffer
37        bytes[LEN_TYPE_SIZE..LEN_TYPE_SIZE + str_byte_len].copy_from_slice(str.as_bytes());
38
39        // Get reference to buffer after write location
40        let (len_inner, rest) = bytes.split_at_mut(required_size);
41
42        // Build the inner &str
43        // Note that the inner &str is not the original reference.
44        // This is a reference to where the bytes were written.
45        let (_len, inner_bytes) = len_inner.split_at(LEN_TYPE_SIZE);
46        let inner = ::core::str::from_utf8(inner_bytes)
47            .expect("the user should have passed in a valid &str");
48
49        // Return the `ZeroCopyStr` and new mut slice
50        (ZeroCopyStr { inner }, rest)
51    }
52
53    /// Writes the contents of a `&str` within the buffer provided.
54    /// Requires space for the `str` byte length + LEN_TYPE_SIZE length.
55    ///
56    /// We also update the position of the reference to just after the end of the `str`.
57    ///
58    /// Panics if buffer is not large enough.
59    pub fn write_to_update<'o>(str: &str, bytes: &'o mut &'a mut [u8]) -> ZeroCopyStr<'a> {
60        // Calculate required buffer size
61        let str_byte_len = str.as_bytes().len();
62        let required_size = str.as_bytes().len() + LEN_TYPE_SIZE;
63        if str_byte_len > LenType::MAX as usize {
64            panic!("the string provided is too large");
65        }
66        if bytes.len() < required_size {
67            panic!("buffer provided for initialization is not large enough");
68        }
69
70        // Write str length
71        bytes[..LEN_TYPE_SIZE].copy_from_slice(&(str.as_bytes().len() as LenType).to_le_bytes());
72
73        // Write str to buffer
74        bytes[LEN_TYPE_SIZE..LEN_TYPE_SIZE + str_byte_len].copy_from_slice(str.as_bytes());
75
76        // Update pointer
77        let (len_inner, rest) = unsafe {
78            ::core::mem::transmute::<(&'o mut [u8], &'o mut [u8]), (&'a mut [u8], &'a mut [u8])>(
79                bytes.split_at_mut(required_size),
80            )
81        };
82        *bytes = rest;
83
84        // Build the inner &str
85        // Note that the inner &str is not the original reference.
86        // This is a reference to where the bytes were written.
87        let (_len, inner_bytes) = len_inner.split_at(LEN_TYPE_SIZE);
88        let inner = ::core::str::from_utf8(inner_bytes)
89            .expect("the user should have passed in a valid &str");
90
91        // Return the `ZeroCopyStr`
92        ZeroCopyStr { inner }
93    }
94
95    /// Zero-copy deserializes bytes into `ZeroCopyStr`. Update the reference to point
96    ///
97    /// We also update the position of the reference to just after the end of the `str`.
98    ///
99    /// Panics if the buffer does not hold the proper data.
100    pub fn read_from(bytes: &mut &'a [u8]) -> ZeroCopyStr<'a> {
101        // Verify bytes at least hold length
102        if bytes.len() < LEN_TYPE_SIZE {
103            panic!("invalid buffer")
104        }
105
106        // Get str length in bytes
107        let str_byte_len = LenType::from_le_bytes(array_from_fn::from_fn(|i| bytes[i]));
108
109        // Read str. panics if:
110        // 1) out-of-bounds if buffer does not contain enough bytes
111        // 2) not valid utf8
112        let inner =
113            ::core::str::from_utf8(&bytes[LEN_TYPE_SIZE..LEN_TYPE_SIZE + str_byte_len as usize])
114                .expect("invalid utf8");
115
116        // Update pointer
117        *bytes = &bytes[LEN_TYPE_SIZE + str_byte_len as usize..];
118
119        // Return `ZeroCopyStr`
120        ZeroCopyStr { inner }
121    }
122
123    /// Total serialized size: str bytes + len bytes
124    pub fn serialized_size(&self) -> usize {
125        self.inner.as_bytes().len() + LEN_TYPE_SIZE
126    }
127
128    /// Returns a view of the bytes of the inner `&str`.
129    ///
130    /// Note this does not include the bytes of the str length
131    pub fn as_bytes(&self) -> &[u8] {
132        self.inner.as_bytes()
133    }
134
135    /// Returns a view of the inner `&str`
136    pub fn as_str(&self) -> &str {
137        self.inner
138    }
139
140    /// Serializes the str into a vec, in the format specified in this crate.
141    pub fn to_vec(&self) -> Vec<u8> {
142        let mut buf = vec![0; self.serialized_size()];
143        // # SAFETY:
144        //
145        // The compiler requires the outer lifetime (i.e. the one pointing to the `as_mut_slice`)
146        // which is the lifetime of the scope of the method invoked to be as long as the lifetime of
147        // the slice which lives for the scope of this function. However, since we are only using this
148        // `&'outer mut` to write to the vector, this is irrelevant so we can transmute the outer lifetime.
149        let mut buf_cursor = unsafe { ::core::mem::transmute(buf.as_mut_slice()) };
150        Self::write_to_update(self.as_str(), &mut buf_cursor);
151        buf
152    }
153}
154
155impl<'a> From<&'a str> for ZeroCopyStr<'a> {
156    fn from(value: &'a str) -> Self {
157        Self { inner: value }
158    }
159}
160
161impl<'a> PartialEq<&str> for ZeroCopyStr<'a> {
162    fn eq(&self, other: &&str) -> bool {
163        self.inner.eq(*other)
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use crate::{LenType, ZeroCopyStr, LEN_TYPE_SIZE};
170
171    // This was the first immutable file stored on sdrive
172    // 88 bytes in length
173    const SHADOWY_STR: &str =
174        "https://shdw-drive.genesysgo.net/2EC2FnYfstrscZDzQcCEgN3hSn1A5wc1pQNKp5DPfCVo/momma.html";
175
176    // Aquaman
177    // 125 bytes in length
178    const WEAVY_STR: &str =
179        "https://4w5qopogxy735ydfrvlfjycvuyho5a2am3g5wtektb3kqmnspl7a.arweave.net/5bsHPca-P77gZY1WVOBVpg7ug0BmzdtMiph2qDGyev4/7500.png";
180
181    // Empty and full strings (0 and 255 bytes), (full is just A repeated)
182    const A: u8 = 0x41;
183    const EMPTY_STR: &str = "";
184    const FULL_STR: &str =
185        unsafe { core::str::from_utf8_unchecked(&[A; LenType::MAX as usize]) };
186
187    #[test_case::test_case(SHADOWY_STR; "shadowy file")]
188    #[test_case::test_case(WEAVY_STR; "the king aquaman")]
189    #[test_case::test_case(EMPTY_STR; "empty str")]
190    #[test_case::test_case(FULL_STR; "full str")]
191    fn test_round_trip(str: &str) {
192        const BUFFER_LEN: usize = 1024;
193        let mut buffer = vec![0; BUFFER_LEN];
194
195        // Write str and return
196        // 1) ZeroCopyStr pointing to new copy
197        // 2) updated pointer
198        let (zcs, buffer) = ZeroCopyStr::write_to(str, &mut buffer);
199
200        // Check str written
201        assert_eq!(zcs, str);
202
203        // Check pointer was updated
204        // (Not a complete check but buf_ref should now be shorter)
205        let expected_bytes_written = str.as_bytes().len() + LEN_TYPE_SIZE;
206        assert_eq!(BUFFER_LEN, buffer.len() + expected_bytes_written);
207    }
208
209    #[test_case::test_case(SHADOWY_STR; "shadowy file")]
210    #[test_case::test_case(WEAVY_STR; "the king aquaman")]
211    #[test_case::test_case(EMPTY_STR; "empty str")]
212    #[test_case::test_case(FULL_STR; "full str")]
213    fn test_round_trip_update(str: &str) {
214        const BUFFER_LEN: usize = 1024;
215        let mut buffer = vec![0; BUFFER_LEN];
216
217        // Write str and return
218        // 1) ZeroCopyStr pointing to new copy
219        // 2) updated pointer
220        let mut buf = buffer.as_mut_slice();
221        let zcs = ZeroCopyStr::write_to_update(str, &mut buf);
222
223        // Check str written
224        assert_eq!(zcs, str);
225
226        // Check pointer was updated
227        // (Not a complete check but buf_ref should now be shorter)
228        let expected_bytes_written = str.as_bytes().len() + LEN_TYPE_SIZE;
229        assert_eq!(BUFFER_LEN, buf.len() + expected_bytes_written);
230    }
231}