cesu8_str/java/
string.rs

1use super::JavaStr;
2use crate::internal::{InternalStr, InternalString};
3use crate::{validate_cesu8_internal, FromVecError};
4
5use core::borrow::Borrow;
6use core::ops::{Deref, DerefMut};
7
8use alloc::boxed::Box;
9use alloc::collections::TryReserveError;
10use alloc::vec::Vec;
11
12/// A Java CESU-8 encoded, growable string.
13#[repr(transparent)]
14#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
15pub struct JavaString {
16    internal: InternalString,
17}
18
19impl JavaString {
20    /// Creates a new empty `JavaString`.
21    ///
22    /// Given that the `JavaString` is empty, this will not allocate any
23    /// initial buffer. While that means that this initial operations is very
24    /// inexpensive, it may cause excessive allocation later when you add data.
25    /// If you have an idea of how much data the `JavaString` will hold,
26    /// consider the [`with_capacity`] method to prevent excessive
27    /// re-allocation.
28    ///
29    /// [`with_capacity`]: Self::with_capacity
30    #[inline]
31    #[must_use]
32    pub const fn new() -> JavaString {
33        JavaString {
34            internal: InternalString::new(),
35        }
36    }
37
38    /// Creates a new empty `JavaString` with at least the specified
39    /// capacity.
40    ///
41    /// `JavaString`s have an internal buffer to hold their data. The
42    /// capacity is at length of that buffer, and can be queried with the
43    /// [`capacity`] method. This method creates an empty `JavaString`, but
44    /// one with an initial buffer that can hold at least `capacity` bytes. This
45    /// is useful when you may be appending a bunch of data to the
46    /// `JavaString`, reducing the number of reallocations it needs to do.
47    ///
48    /// [`capacity`]: Self::capacity
49    ///
50    /// If the given capacity is `0`, no allocation will occur, and this method
51    /// is identical to the [`new`] method.
52    ///
53    /// [`new`]: Self::new
54    #[inline]
55    #[must_use]
56    pub fn with_capacity(capacity: usize) -> JavaString {
57        JavaString {
58            internal: InternalString::with_capacity(capacity),
59        }
60    }
61
62    /// Converts a vector of bytes into a `JavaString`.
63    ///
64    /// A string ([`JavaString`]) is made of bytes ([`u8`]), and a vector of
65    /// bytes ([`Vec<u8>`]) is made of bytes, so this function converts between
66    /// the two. Not all byte slices are valid `JavaString`s, however:
67    /// `JavaString` requires that it is valid Java CESU-8. `from_java_cesu8`
68    /// checks to ensure that the bytes are valid Java CESU-8, and then does
69    /// the conversion.
70    ///
71    /// If you are sure that the byte slice is valid Java CESU-8, and you don't
72    /// want to incur the overhead of the validity check, there is an unsafe
73    /// version of this function, [`from_java_cesu8_unchecked`], which has
74    /// the same behavior but skips the check.
75    ///
76    /// This method will take care to not to copy the vector, for efficiency's
77    /// sake.
78    ///
79    /// If you need a [`&JavaStr`] instead of a `JavaString`, consider
80    /// [`JavaStr::from_java_cesu8`].
81    ///
82    /// The inverse of this method is [`into_bytes`].
83    ///
84    /// [`from_java_cesu8_unchecked`]: Self::from_java_cesu8_unchecked
85    /// [`&JavaStr`]: JavaStr
86    /// [`into_bytes`]: Self::into_bytes
87    ///
88    /// # Errors
89    ///
90    /// Returns [`Err`] if the slice is not Java CESU-8 with the index and
91    /// length of the invalid byte. The vector you moved in is also
92    /// included.
93    #[inline]
94    pub fn from_java_cesu8(vec: Vec<u8>) -> Result<JavaString, FromVecError> {
95        match validate_cesu8_internal::<true>(&vec) {
96            Ok(()) => unsafe {
97                Ok(JavaString {
98                    internal: InternalString::from_unchecked(vec),
99                })
100            },
101            Err(e) => Err(FromVecError { bytes: vec, error: e}),
102        }
103    }
104
105    /// Creates a new `JavaString` from a length, capacity, and pointer.
106    ///
107    /// # Safety
108    ///
109    /// This is highly unsafe, due to the number of invariants that aren't
110    /// checked:
111    /// * The memory at `buf` needs to have been previously allocationed by the
112    ///   same allocator the standard library uess, with a required alignment of
113    ///   exactly 1.
114    /// * `length` needs to be less than or equal to `capacity`.
115    /// * `capacity` needs to be the correct value.
116    /// * The first `length` bytes at `buf` need to be valid Java CESU-8.
117    ///
118    /// Violating these may cause problems like correcting the allocator's
119    /// internal data structures. For example, it is normally **not** safe to
120    /// build a `JavaString` from a pointer to a C `char` array containing
121    /// Java CESU-8 _unless_ you are certain that array was originally allocated
122    /// by the Rust standard library's allocator.
123    ///
124    /// The ownership of `buf` is effectively transferred to the
125    /// `JavaString` which may then deallocate, reallocate, or change the
126    /// contents of memory pointed to by the pointer at will. Ensure that
127    /// nothing elese uses the pointer after calling this function.
128    #[inline]
129    #[must_use]
130    pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> JavaString {
131        unsafe {
132            JavaString {
133                internal: InternalString::from_raw_parts(buf, length, capacity),
134            }
135        }
136    }
137
138    /// Converts a vector of bytes to a `JavaString` without checking that
139    /// the string contains valid Java CESU-8.
140    ///
141    /// # Safety
142    ///
143    /// This function is unsafe because it does not check that the bytes passed
144    /// to it are valid Java CESU-8. If this constraint is violated, it may
145    /// cause memory unsafety issues with future users of the `JavaString`.
146    #[inline]
147    #[must_use]
148    pub const unsafe fn from_java_cesu8_unchecked(bytes: Vec<u8>) -> JavaString {
149        JavaString {
150            internal: InternalString::from_unchecked(bytes),
151        }
152    }
153
154    /// Converts an internal string to a `JavaString` without checking that the
155    /// string contains valid Java CESU-8.
156    ///
157    /// # Safety
158    ///
159    /// This function is unsafe because it does not check that the
160    /// `InternalString` passed to it contains valid Java CESU-8. If this
161    /// constraint is violated, it may cause memory unsafety issues with future
162    /// users of the `JavaString`.
163    #[inline]
164    #[must_use]
165    pub(crate) const unsafe fn from_internal_unchecked(internal: InternalString) -> JavaString {
166        JavaString { internal }
167    }
168
169    /// Converts a `JavaString` into a byte vector.
170    ///
171    /// This consumes the `JavaString`, so we do not need to copy its contents.
172    #[inline]
173    #[must_use]
174    pub fn into_bytes(self) -> Vec<u8> {
175        self.internal.into_bytes()
176    }
177
178    /// Extracts a string slice containing the entire `JavaString`.
179    #[inline]
180    #[must_use]
181    pub fn as_str(&self) -> &JavaStr {
182        self
183    }
184
185    /// Converts a `JavaString` into a mutable string slice.
186    #[inline]
187    #[must_use]
188    pub fn as_mut_str(&mut self) -> &mut JavaStr {
189        self
190    }
191
192    /// Appends a given string slice onto the end of this `JavaString`.
193    #[inline]
194    pub fn push_str(&mut self, str: &JavaStr) {
195        self.internal.push_str(&str.internal);
196    }
197
198    /// Returns this `JavaString`'s capacity, in bytes.
199    #[inline]
200    #[must_use]
201    pub fn capacity(&self) -> usize {
202        self.internal.capacity()
203    }
204
205    /// Reserves capacity for at least `additional` bytes more than the current
206    /// length. The allocator may reserve more space to speculatively avoid
207    /// frequent allocations. After calling `reserve`, capacity will be greater
208    /// than or equal to `self.len() + additional`. Does nothing if the capacity
209    /// is already sufficient.
210    ///
211    /// # Panics
212    ///
213    /// Panics if the new capacity overflows [`usize`].
214    #[inline]
215    pub fn reserve(&mut self, additional: usize) {
216        self.internal.reserve(additional);
217    }
218
219    /// Reserves the minimum capacity for at least `additional` bytes more than
220    /// the current length. Unlike [`reserve`], this will not deliberately
221    /// over-allocate to speculatively avoid allocations. After calling reserve
222    /// `reserve_excat`, capacity will be greater than or equal to `self.len() +
223    /// additional`. Does nothing if the capacity is already sufficient.
224    ///
225    /// [`reserve`]: Self::reserve
226    ///
227    /// # Panics
228    ///
229    /// Panics if the new capacity overflows [`usize`].
230    #[inline]
231    pub fn reserve_exact(&mut self, additional: usize) {
232        self.internal.reserve_exact(additional);
233    }
234
235    /// Tries to reserve capacity for at least `additional` bytes more than the
236    /// current length. The allocator may reserve more space to speculatively
237    /// avoid frequent allocations. After calling `try_reserve`, capacity will
238    /// be greater than or equal to `self.len() + additional` if it returns
239    /// `OK(())`. Does nothing if capacity is already sufficient. This method
240    /// preserves the contents even if an error occurs.
241    ///
242    /// # Errors
243    ///
244    /// If the capacity overflows, or the allocator reports a failure, then an
245    /// error is returned.
246    #[inline]
247    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
248        self.internal.try_reserve(additional)
249    }
250
251    /// Tries to reserve the minimum capacity for at least `additional` bytes
252    /// more than current length. Unlike [`try_reserve`], this will not
253    /// deliberately over-allocate to speculatively avoid frequent allocations.
254    /// After calling `try_reserve_exact`, capacity will be greater than or
255    /// equal `self.len() + additional` if it returns `Ok(())`. Does nothing if
256    /// the capacity is already sufficient.
257    ///
258    /// Not that the allocator may give the collection more space than it
259    /// requests. Therefore, capacity can not be relied upon to be precisely
260    /// minimal. Prefer [`try_reserve`] if future insertions are expected.
261    ///
262    /// [`try_reserve`]: JavaString::try_reserve
263    ///
264    /// # Errors
265    ///
266    /// If the capacity overflows, or the allocator reports a failure, then an
267    /// error is returned.
268    #[inline]
269    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
270        self.internal.try_reserve_exact(additional)
271    }
272
273    /// Shrinks the capacity of this `JavaString` to match its length.
274    #[inline]
275    pub fn shrink_to_fit(&mut self) {
276        self.internal.shrink_to_fit();
277    }
278
279    /// Shrinks the capacity of this `JavaString` with a lower bound.
280    ///
281    /// The capacity will remaing at least as large as both the length and the
282    /// supplied value.
283    ///
284    /// If the current capacity is less than the lower limit, this is a no-op.
285    #[inline]
286    pub fn shrink_to(&mut self, min_capacity: usize) {
287        self.internal.shrink_to(min_capacity);
288    }
289
290    /// Appends the given [`char`] to the end of this `JavaString`.
291    #[inline]
292    pub fn push(&mut self, c: char) {
293        self.internal.push::<true>(c);
294    }
295
296    /// Returns a byte slice of this `JavaString`'s contents.
297    #[inline]
298    #[must_use]
299    pub fn as_bytes(&self) -> &[u8] {
300        self.internal.as_bytes()
301    }
302
303    /// Shortens this `JavaString` to the specified length.
304    ///
305    /// If `new_len` is greater than the string's current length, this has no
306    /// effect.
307    ///
308    /// Note that this method has no effect on the allocated capacity of the
309    /// string.
310    ///
311    /// # Panics
312    ///
313    /// Panics if `new_len` does not lie on a [`char`] boundary.
314    #[inline]
315    pub fn truncate(&mut self, new_len: usize) {
316        self.internal.truncate(new_len);
317    }
318
319    /// Removes the last character from the string buffer and returns it.
320    ///
321    /// Returns [`None`] if this `JavaString` is empty.
322    #[inline]
323    #[must_use]
324    pub fn pop(&mut self) -> Option<char> {
325        self.internal.pop()
326    }
327
328    /// Removes a [`char`] from this `JavaString` at a byte position and
329    /// returns it.
330    ///
331    /// This is an *O*(*n*) operation, as it requires copy every element in the
332    /// buffer.
333    ///
334    /// # Panics
335    ///
336    /// Panics if `idx` is large than or equal to the `JavaString`'s length, or
337    /// if it does not lie on a [`char`] boundary.
338    #[inline]
339    pub fn remove(&mut self, idx: usize) -> char {
340        self.internal.remove(idx)
341    }
342
343    /// Inserts a character into this `JavaString` at a byte position.
344    ///
345    /// This is an *O*(*n*) operation as it requires copying every element in
346    /// the buffer.
347    ///
348    /// # Panics
349    ///
350    /// Panics if `idx` is larger than the `JavaString`'s length, or if it does
351    /// not lie on a [`char`] boundary.
352    #[inline]
353    pub fn insert(&mut self, idx: usize, c: char) {
354        self.internal.insert::<true>(idx, c);
355    }
356
357    /// Returns a mutable reference to the contents of this `JavaString`.
358    ///     
359    /// # Safety
360    ///
361    /// This function is unsafe because the returned `&mut Vec` allows writing
362    /// bytes which are not valid Java Java CESU-8. If this constraint is
363    /// violated, using the original `JavaString` after dropping the `&mut Vec`
364    /// may violate memory safety, as `JavaString`s are expected to always
365    /// contains valid Java Java CESU-8.
366    #[inline]
367    pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
368        self.internal.as_mut_vec()
369    }
370
371    /// Returns the length of this `JavaString`, in bytes, nor [`char`]s or
372    /// graphemes. In other words, it might not be what a human considers the
373    /// length of the string.
374    #[inline]
375    #[must_use]
376    pub fn len(&self) -> usize {
377        self.internal.len()
378    }
379
380    /// Returns `true` if this `JavaString` has a length of zero, and `false`
381    /// otherwise.
382    #[inline]
383    #[must_use]
384    pub fn is_empty(&self) -> bool {
385        self.internal.is_empty()
386    }
387
388    /// Truncates this `JavaString`, removing all contents.
389    ///
390    /// While this means the `JavaString` will have a length of zero, it does
391    /// not touch its capacity.
392    #[inline]
393    pub fn clear(&mut self) {
394        self.internal.clear();
395    }
396
397    /// Converts this `JavaString` into a <code>[Box]<[JavaStr]</code>.
398    ///
399    /// This will drop any excess capacity.
400    #[inline]
401    #[must_use]
402    pub fn into_boxed_str(self) -> Box<JavaStr> {
403        let x = self.internal.into_boxed_str();
404        unsafe { JavaStr::from_boxed_internal_unchecked(x) }
405    }
406
407    /// Consumes and leaks the `JavaString`, returning a mutable reference to
408    /// the contents, `&'a mut InternalStr`.
409    ///
410    /// The caller has free choice over the returned lifetime, including
411    /// `'static`. Indeed, this function is ideally used for data that lives fro
412    /// the remainder of the program's life, as dropping the returned reference
413    /// will cause a memory leak.
414    ///
415    /// It does not reallocate or shrink the `JavaString`, so the leaked
416    /// allocation may include unused capacity that is not part of the returned
417    /// slice. If you don't want that, call [`into_boxed_str`], and then
418    /// [`Box::leak`].
419    ///
420    /// [`into_boxed_str`]: Self::into_boxed_str
421    #[inline]
422    #[must_use]
423    pub fn leak<'a>(self) -> &'a mut JavaStr {
424        let str = self.internal.leak();
425        unsafe { &mut *(str as *mut InternalStr as *mut JavaStr) }
426    }
427}
428
429impl Default for JavaString {
430    fn default() -> Self {
431        Self::new()
432    }
433}
434
435impl Borrow<JavaStr> for JavaString {
436    fn borrow(&self) -> &JavaStr {
437        self
438    }
439}
440
441impl Deref for JavaString {
442    type Target = JavaStr;
443
444    #[inline]
445    fn deref(&self) -> &Self::Target {
446        unsafe { JavaStr::from_java_cesu8_unchecked(self.internal.as_bytes()) }
447    }
448}
449
450impl DerefMut for JavaString {
451    #[inline]
452    fn deref_mut(&mut self) -> &mut Self::Target {
453        unsafe { JavaStr::from_java_cesu8_unchecked_mut(self.internal.as_bytes_mut()) }
454    }
455}
456
457impl core::fmt::Debug for JavaString {
458    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
459        core::fmt::Debug::fmt(&self.internal, f)
460    }
461}
462
463impl core::fmt::Display for JavaString {
464    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
465        core::fmt::Display::fmt(&self.internal, f)
466    }
467}