jstring/
raw_string.rs

1use core::fmt;
2use core::mem;
3use core::ops::{Deref, DerefMut};
4use core::ptr::NonNull;
5use core::slice;
6
7/// String whose contents can't be mutated, just like how Java strings work.
8///
9/// Operations like mutation are, in all but a select few cases, O(n) time.
10/// No amortization here buddy.
11///
12/// Maintains invariants:
13/// 1. Internal pointer is always big endian if valid
14/// 2. `data` is only a valid pointer if its big-endian representation is aligned
15///    to 2 bytes.
16#[repr(C)]
17pub struct RawJavaString {
18    len: usize,
19    data: NonNull<u8>,
20}
21
22impl RawJavaString {
23    /// Returns the maxiumum length of an interned string on the target architecture.
24    #[inline(always)]
25    pub const fn max_intern_len() -> usize {
26        mem::size_of::<usize>() * 2 - 1
27    }
28
29    /// Returns whether or not this string is interned.
30    #[inline(always)]
31    pub fn is_interned(&self) -> bool {
32        ((self.read_ptr() as usize) % 2) == 1 // Check if the pointer value is even
33    }
34
35    #[inline(always)]
36    pub fn read_ptr(&self) -> *mut u8 {
37        usize::from_be(self.data.as_ptr() as usize) as *mut u8
38    }
39
40    #[inline(always)]
41    pub fn write_ptr(&mut self, ptr: *mut u8) {
42        self.data = NonNull::new(usize::to_be(ptr as usize) as *mut u8)
43            .expect("Wrote null to JavaString pointer.");
44    }
45
46    #[inline(always)]
47    pub unsafe fn write_ptr_unchecked(&mut self, ptr: *mut u8) {
48        self.data = NonNull::new_unchecked(usize::to_be(ptr as usize) as *mut u8);
49    }
50
51    /// Returns the length of this string.
52    #[inline(always)]
53    pub fn len(&self) -> usize {
54        if self.is_interned() {
55            (self.read_ptr() as usize as u8 >> 1) as usize
56        } else {
57            self.len
58        }
59    }
60
61    /// Returns the current memory layout of this object. If None, then we're looking
62    /// at an interned string.
63    #[inline(always)]
64    fn get_memory_layout(&self) -> Option<alloc::alloc::Layout> {
65        if self.len() > Self::max_intern_len() {
66            Some(unsafe { alloc::alloc::Layout::from_size_align_unchecked(self.len(), 2) })
67        } else {
68            None
69        }
70    }
71
72    pub fn get_bytes(&self) -> &[u8] {
73        #[cfg(test)]
74        println!("Calling get_bytes");
75        let (ptr, len) = if self.is_interned() {
76            let len = ((self.read_ptr() as usize as u8) >> 1) as usize;
77            let ptr = (&self.len) as *const usize as *const u8 as *mut u8;
78            (ptr, len)
79        } else {
80            (self.read_ptr(), self.len)
81        };
82
83        unsafe { slice::from_raw_parts(ptr, len) }
84    }
85
86    #[inline]
87    pub fn get_bytes_mut(&mut self) -> &mut [u8] {
88        unsafe { &mut *(self.get_bytes() as *const [u8] as *mut [u8]) }
89    }
90
91    /// Creates a new, empty, RawJavaString.
92    pub const fn new() -> Self {
93        Self {
94            len: 0,
95            data: unsafe { NonNull::new_unchecked(usize::to_be(1) as *mut u8) },
96        }
97    }
98
99    /// Builds a new string from raw bytes.
100    ///
101    /// Complexity is O(n) in the length of `bytes`.
102    pub fn from_bytes(bytes: impl Deref<Target = [u8]>) -> Self {
103        let bytes_list: &[_] = &[bytes];
104        Self::from_bytes_array_inline(bytes_list)
105    }
106
107    /// Builds a new string from raw bytes.
108    ///
109    /// Complexity is O(n) in the sum of the lengths of the elements of `bytes`.
110    pub fn from_bytes_array(bytes_list: impl Deref<Target = [impl Deref<Target = [u8]>]>) -> Self {
111        Self::from_bytes_array_inline(bytes_list)
112    }
113
114    /// Builds a new string from raw bytes.
115    ///
116    /// Complexity is O(n) in the sum of the lengths of the elements of `bytes`.
117    #[inline(always)]
118    fn from_bytes_array_inline(
119        bytes_list: impl Deref<Target = [impl Deref<Target = [u8]>]>,
120    ) -> Self {
121        let mut new = Self::new();
122        let len = bytes_list
123            .iter()
124            .map(|bytes| bytes.len())
125            .fold(0, |sum, len| sum + len);
126
127        let (mut write_location, data_pointer_value) = if len <= Self::max_intern_len() {
128            let pointer_value = (len << 1) + 1;
129            (
130                (&mut new.len) as *mut usize as *mut u8,
131                (pointer_value as usize as *mut u8),
132            )
133        } else {
134            use alloc::alloc::*;
135            // TODO use safe version and put this version behind flag
136            let ptr = unsafe { alloc(Layout::from_size_align_unchecked(len, 2)) };
137            new.len = len;
138            (ptr, ptr)
139        };
140
141        unsafe {
142            new.write_ptr_unchecked(data_pointer_value);
143        }
144
145        for bytes in bytes_list.iter() {
146            unsafe {
147                core::ptr::copy_nonoverlapping(bytes.as_ptr(), write_location, len);
148                write_location = write_location.add(len);
149            }
150        }
151
152        new
153    }
154
155    /// Overwrites what was previously in this buffer with the contents of bytes.
156    ///
157    /// Complexity is O(n) in the length of `bytes`.
158    #[inline(always)]
159    pub fn set_bytes(&mut self, bytes: impl Deref<Target = [u8]>) {
160        *self = Self::from_bytes(bytes);
161    }
162}
163
164impl Drop for RawJavaString {
165    fn drop(&mut self) {
166        #[cfg(test)]
167        println!("Dropping");
168        if !self.is_interned() {
169            #[cfg(test)]
170            println!("Dropping non-interned string");
171            use alloc::alloc::{dealloc, Layout};
172            unsafe {
173                dealloc(
174                    self.read_ptr(),
175                    Layout::from_size_align_unchecked(self.len(), 2),
176                );
177            }
178        }
179    }
180}
181
182impl Clone for RawJavaString {
183    #[inline(always)]
184    fn clone(&self) -> Self {
185        Self::from_bytes(self.get_bytes())
186    }
187}
188
189impl fmt::Debug for RawJavaString {
190    fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
191        write!(formatter, "{:?}", &self.get_bytes())
192    }
193}
194
195impl Deref for RawJavaString {
196    type Target = [u8];
197    #[inline(always)]
198    fn deref(&self) -> &[u8] {
199        self.get_bytes()
200    }
201}
202
203impl DerefMut for RawJavaString {
204    #[inline(always)]
205    fn deref_mut(&mut self) -> &mut [u8] {
206        self.get_bytes_mut()
207    }
208}
209
210#[cfg(test)]
211mod tests {
212
213    use super::*;
214
215    #[test]
216    fn new_does_not_use_heap() {
217        let string = RawJavaString::new();
218        assert!(string.is_interned(), "Empty RawJavaString isn't interned!");
219    }
220
221    #[test]
222    fn option_size() {
223        assert!(
224            mem::size_of::<Option<RawJavaString>>() == 2 * mem::size_of::<usize>(),
225            "Size of Option<JavaString> is incorrect!"
226        );
227    }
228
229    #[test]
230    fn size() {
231        assert!(
232            mem::size_of::<RawJavaString>() == 2 * mem::size_of::<usize>(),
233            "Size of JavaString is incorrect!"
234        );
235    }
236
237    #[test]
238    fn from_bytes() {
239        let bytes: &[u8] = &[12, 3, 2, 1];
240        let string = RawJavaString::from_bytes(bytes);
241        assert!(string.is_interned(), "String should be interned but isn't.");
242
243        assert!(
244            bytes == string.get_bytes(),
245            "String should have value `{:?}`, but instead has value `{:?}`",
246            bytes,
247            string
248        );
249    }
250
251    #[test]
252    fn from_bytes_large_with_nulls() {
253        let bytes: &[u8] = &[0; 127];
254
255        let string = RawJavaString::from_bytes(bytes);
256        assert!(
257            !string.is_interned(),
258            "String shouldn't be interned but is."
259        );
260
261        assert!(
262            bytes == string.get_bytes(),
263            "String should have value `{:?}`, but instead has value `{:?}`",
264            bytes,
265            string
266        );
267    }
268
269    #[test]
270    fn large_interned() {
271        let bytes: &[u8] = &[0; RawJavaString::max_intern_len()];
272        let bytes_2: &[u8] = &[1; RawJavaString::max_intern_len()];
273
274        let mut string = RawJavaString::from_bytes(bytes);
275        assert!(string.is_interned(), "String should be interned but isn't.");
276
277        assert!(
278            bytes == string.get_bytes(),
279            "String should have value `{:?}`, but instead has value `{:?}`",
280            bytes,
281            string
282        );
283
284        string.set_bytes(bytes_2);
285        assert!(string.is_interned(), "String should be interned but isn't.");
286
287        assert!(
288            bytes_2 == string.get_bytes(),
289            "String should have value `{:?}`, but instead has value `{:?}`",
290            bytes,
291            string
292        );
293    }
294
295    #[test]
296    fn from_bytes_large() {
297        let bytes: &[u8] = &[1; 255];
298
299        let string = RawJavaString::from_bytes(bytes);
300        assert!(
301            !string.is_interned(),
302            "String shouldn't be interned but is."
303        );
304
305        assert!(
306            bytes == string.get_bytes(),
307            "String should have value `{:?}`, but instead has value `{:?}`",
308            bytes,
309            string
310        );
311    }
312}