cesu8_str/java/
str.rs

1use crate::internal::InternalStr;
2use crate::{validate_cesu8_internal, EncodingError};
3
4use super::iter::{JavaCharIndices, JavaChars};
5
6use core::ops::RangeBounds;
7
8#[cfg(feature = "alloc")]
9use super::JavaString;
10
11#[cfg(feature = "alloc")]
12use alloc::borrow::ToOwned;
13#[cfg(feature = "alloc")]
14use alloc::boxed::Box;
15
16/// A Java CESU-8 encoded string slice.
17#[repr(transparent)]
18#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub struct JavaStr {
20    pub(crate) internal: InternalStr,
21}
22
23impl JavaStr {
24    /// Converts a slice of bytes to a `JavaStr`.
25    ///
26    /// A Java CESU-8 string slice ([`JavaStr`]) is made of bytes ([`u8`]), and
27    /// a byte slice ([`[u8]`][byteslice]) is made of bytes, so this
28    /// function converts betwen the two. Not all byte slices are valid
29    /// string slices, however `JavaStr` requires that it is valid Java
30    /// CESU-8. [`from_java_cesu8`] checks to ensure that the bytes are
31    /// valid Java CESU-8, and then does the conversion.
32    ///
33    /// [byteslice]: slice
34    /// [`from_java_cesu8`]: Self::from_java_cesu8
35    ///
36    /// If you are sure that the byte slice is valid Java CESU-8, and you don't
37    /// want to incur the overhead of the validity check, there is an unsafe
38    /// version of this function [`from_java_cesu8_unchecked`], which has the
39    /// same behavior but skips the check.
40    ///
41    /// [`from_java_cesu8_unchecked`]: Self::from_java_cesu8_unchecked
42    ///
43    /// # Errors
44    ///
45    /// Returns [`Err`] if the slice is not Java CESU-8 with
46    /// a description as to why the provided slice is
47    /// not Java CESU-8.
48    #[inline]
49    pub const fn from_java_cesu8(v: &[u8]) -> Result<&JavaStr, EncodingError> {
50        match validate_cesu8_internal::<true>(v) {
51            Ok(()) => Ok(unsafe { JavaStr::from_java_cesu8_unchecked(v) }),
52            Err(e) => Err(e),
53        }
54    }
55
56    /// Converts a mutable slice of bytes to a mutable `JavaStr`.
57    ///
58    /// A Java CESU-8 string slice ([`JavaStr`]) is made of bytes ([`u8`]), and
59    /// a byte slice ([`[u8]`][byteslice]) is made of bytes, so this
60    /// function converts betwen the two. Not all byte slices are valid
61    /// string slices, however `JavaStr` requires that it is valid Java
62    /// CESU-8. [`from_java_cesu8`] checks to ensure that the bytes are
63    /// valid Java CESU-8, and then does the conversion.
64    ///
65    /// [byteslice]: slice
66    /// [`from_java_cesu8`]: Self::from_java_cesu8
67    ///
68    /// If you are sure that the byte slice is valid Java CESU-8, and you don't
69    /// want to incur the overhead of the validity check, there is an unsafe
70    /// version of this function [`from_java_cesu8_unchecked_mut`], which has
71    /// the same behavior but skips the check.
72    ///
73    /// [`from_java_cesu8_unchecked_mut`]: Self::from_java_cesu8_unchecked_mut
74    ///
75    /// # Errors
76    ///
77    /// Returns [`Err`] if the slice is not Java CESU-8 with
78    /// a description as to why the provided slice is
79    /// not Java CESU-8.
80    #[inline]
81    pub fn from_java_cesu8_mut(v: &mut [u8]) -> Result<&mut JavaStr, EncodingError> {
82        match validate_cesu8_internal::<true>(v) {
83            Ok(()) => Ok(unsafe { JavaStr::from_java_cesu8_unchecked_mut(v) }),
84            Err(e) => Err(e),
85        }
86    }
87
88    /// Converts a slice of bytes to a `JavaStr` without checking that the
89    /// string contains valid Java CESU-8.
90    ///
91    /// See the safe version, [`from_java_cesu8`], for more details.
92    ///
93    /// [`from_java_cesu8`]: Self::from_java_cesu8
94    ///
95    /// # Safety
96    ///
97    /// The bytes passed in must be valid Java CESU-8.
98    #[inline]
99    #[must_use]
100    pub const unsafe fn from_java_cesu8_unchecked(v: &[u8]) -> &JavaStr {
101        unsafe { &*(v as *const [u8] as *const JavaStr) }
102    }
103
104    /// Converts a mutable slice of bytes to a mutable `JavaStr` without
105    /// checking that the string contains valid Java CESU-8.
106    ///
107    /// See the safe version, [`from_java_cesu8_mut`], for more details.
108    ///
109    /// [`from_java_cesu8_mut`]: Self::from_java_cesu8_mut
110    ///
111    /// # Safety
112    ///
113    /// The bytes passed in must be valid Java CESU-8.
114    #[inline]
115    #[must_use]
116    pub unsafe fn from_java_cesu8_unchecked_mut(v: &mut [u8]) -> &mut JavaStr {
117        unsafe { &mut *(v as *mut [u8] as *mut JavaStr) }
118    }
119
120    /// Converts a boxed slice of bytes to a boxed string slice without checking
121    /// that the string contains valid Java CESU-8.
122    ///
123    /// # Safety
124    ///
125    /// The bytes passed in must be valid Java CESU-8.
126    #[cfg(feature = "alloc")]
127    #[inline]
128    #[must_use]
129    pub unsafe fn from_boxed_java_cesu8_unchecked(v: Box<[u8]>) -> Box<JavaStr> {
130        unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) }
131    }
132
133    /// Converts an `InternalStr` to a `JavaStr` without checking that the
134    /// string contains valid Java CESU-8.
135    ///
136    /// # Safety
137    ///
138    /// The string passed in must be valid Java CESU-8.
139    #[inline]
140    #[must_use]
141    pub(crate) const unsafe fn from_internal_unchecked(v: &InternalStr) -> &JavaStr {
142        unsafe { &*(v as *const InternalStr as *const JavaStr) }
143    }
144
145    /// Converts a mutable `InternalStr` to a mutable `JavaStr` without
146    /// checking that the string contains valid Java CESU-8.
147    ///
148    /// # Safety
149    ///
150    /// The string passed in must be valid Java CESU-8.
151    #[inline]
152    #[must_use]
153    pub(crate) unsafe fn from_internal_unchecked_mut(v: &mut InternalStr) -> &mut JavaStr {
154        unsafe { &mut *(v as *mut InternalStr as *mut JavaStr) }
155    }
156
157    /// Converts an `InternalStr` into a boxed string slice without checking
158    /// that the string contains valid Java CESU-8.
159    ///
160    /// # Safety
161    ///
162    /// The string passed in must be valid Java CESU-8.
163    #[cfg(feature = "alloc")]
164    #[inline]
165    #[must_use]
166    pub(crate) unsafe fn from_boxed_internal_unchecked(v: Box<InternalStr>) -> Box<JavaStr> {
167        unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) }
168    }
169
170    /// Returns the length of `self`.
171    ///
172    /// This length is in bytes, not [`char`]s or graphemes. In other words, it
173    /// might not be what a human considers the length of the string.
174    #[inline]
175    #[must_use]
176    pub const fn len(&self) -> usize {
177        self.internal.len()
178    }
179
180    /// Returns `true` if `self` has a length of zero bytes.
181    #[inline]
182    #[must_use]
183    pub const fn is_empty(&self) -> bool {
184        self.internal.is_empty()
185    }
186
187    /// Checks that the `index`-th byte is the first byte in a Java CESU-8 code
188    /// point sequence or the end of the string.
189    ///
190    /// The start and end of the string (when `index == self.len()`) are
191    /// considered to be boundaries.
192    ///
193    /// Returns `false` if `index is greater than self.len()`.
194    #[inline]
195    #[must_use]
196    pub fn is_char_boundary(&self, index: usize) -> bool {
197        self.internal.is_char_boundary(index)
198    }
199
200    /// Converts a string slice to a byte slice.
201    #[inline]
202    #[must_use]
203    pub const fn as_bytes(&self) -> &[u8] {
204        self.internal.as_bytes()
205    }
206
207    /// Converts a mutable string slice to a mutable byte slice.
208    ///
209    /// # Safety
210    ///
211    /// The caller must ensure that the content of the slice is valid Java
212    /// CESU-8 before the borrow ends and the underlying `JavaStr` is used.
213    ///
214    /// Use of a `JavaStr` whose contents are not valid Java CESU-8 is undefined
215    /// behavior.
216    #[inline]
217    #[must_use]
218    pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
219        self.internal.as_bytes_mut()
220    }
221
222    /// Converts a string slice to a raw pointer.
223    ///
224    /// As string slices are a slice of bytes, the raw pointer points to a
225    /// [`u8`]. This pointer will be pointing to the first bytes of the string
226    /// slice.
227    ///
228    /// The caller must ensure that the returned pointer is never written to. If
229    /// you need to mutate the contents of the string slice, use [`as_mut_ptr`].
230    ///
231    /// [`as_mut_ptr`]: Self::as_mut_ptr
232    #[inline]
233    #[must_use]
234    pub const fn as_ptr(&self) -> *const u8 {
235        self.internal.as_ptr()
236    }
237
238    /// Converts a mutable string slice to a raw pointer.
239    ///
240    /// As string slices are a slice of bytes, the raw pointer points to a
241    /// [`u8`]. This pointer will be pointing to the first byte of the string
242    /// slice.
243    ///
244    /// It is your responsibility to make sure that the string slice only gets
245    /// modified in a way that it remains valid Java CESU-8.
246    #[inline]
247    #[must_use]
248    pub fn as_mut_ptr(&mut self) -> *mut u8 {
249        self.internal.as_mut_ptr()
250    }
251
252    /// Returns a subslice of `JavaStr`.
253    ///
254    /// This is the non-panicking alternative to indexing the `str`. Returns
255    /// [`None`] whenever equivalent indexing operations would panic.
256    #[inline]
257    #[must_use]
258    pub fn get<I: RangeBounds<usize>>(&self, index: I) -> Option<&JavaStr> {
259        self.internal
260            .get(index)
261            .map(|internal| unsafe { JavaStr::from_internal_unchecked(internal) })
262    }
263
264    /// Returns a mutable subslice of `JavaStr`.
265    ///
266    /// This is the non-panicking alternative to indexing the `JavaStr`.
267    /// Returns [`None`] whenver equivalent indexing operations would panic.
268    #[inline]
269    #[must_use]
270    pub fn get_mut<I: RangeBounds<usize>>(&mut self, index: I) -> Option<&mut JavaStr> {
271        self.internal
272            .get_mut(index)
273            .map(|internal| unsafe { JavaStr::from_internal_unchecked_mut(internal) })
274    }
275
276    /// Returns an unchecked subslice of `JavaStr`.
277    ///
278    /// This is the unchecked alternative to indexing the `JavaStr`.
279    ///
280    /// # Safety
281    ///
282    /// Callers of this function are responsible for ensuring that:
283    /// * The starting index does not exceed the ending index;
284    /// * The indices are within the bounds of the original slice;
285    /// * The indices fall on Java CESU-8 sequence boundaries.
286    ///
287    /// Failing that, the returned string slice may reference invalid memory or
288    /// violate the invariants communicated by the `JavaStr` type.
289    #[inline]
290    #[must_use]
291    pub unsafe fn get_unchecked<I: RangeBounds<usize>>(&self, index: I) -> &JavaStr {
292        unsafe { JavaStr::from_internal_unchecked(self.internal.get_unchecked(index)) }
293    }
294
295    /// Returns a mutable, unchecked subslice of `JavaStr`.
296    ///
297    /// This the unchecked alternative to indexing the `JavaStr`.
298    ///
299    /// # Safety
300    ///
301    /// Callers of this function are responsible for ensuring that:
302    /// * The starting index does not exceed the ending index;
303    /// * The indices are within the bounds of the original slice;
304    /// * The indices fall on Java CESU-8 sequence boundaries.
305    ///
306    /// Failing that, the returned string slice may reference invalid memory or
307    /// violate the invariants communicated by the `JavaStr` type.
308    #[inline]
309    #[must_use]
310    pub unsafe fn get_unchecked_mut<I: RangeBounds<usize>>(&mut self, index: I) -> &mut JavaStr {
311        unsafe { JavaStr::from_internal_unchecked_mut(self.internal.get_unchecked_mut(index)) }
312    }
313
314    /// Divide one string slice into two at an index.
315    ///
316    /// The argument, `mid`, should be a byte offset from the start of the
317    /// string. It must also be on the boundary of a Java CESU-8 character.
318    ///
319    /// The two slices returned go from the string of the string slice to `mid`,
320    /// and from `mid` to the end of the string slice.
321    ///
322    /// To get mutable string slices instead, see the [`split_at_mut`] method.
323    ///
324    /// [`split_at_mut`]: Self::split_at_mut
325    #[inline]
326    #[must_use]
327    pub fn split_at(&self, mid: usize) -> (&JavaStr, &JavaStr) {
328        let (left, right) = self.internal.split_at(mid);
329        unsafe {
330            (
331                JavaStr::from_internal_unchecked(left),
332                JavaStr::from_internal_unchecked(right),
333            )
334        }
335    }
336
337    /// Divide one mutable string slice into two at an index.
338    ///
339    /// The argument, `mid`, should be a byte offset from the start of the
340    /// string. It must also be on the boundary of a Java CESU-8 character.
341    ///
342    /// The two slices returned go from the string of the string slice to `mid`,
343    /// and from `mid` to the end of the string slice.
344    ///
345    /// To get immutable string slices instead, see the [`split_at`] method.
346    ///
347    /// [`split_at`]: Self::split_at
348    #[inline]
349    #[must_use]
350    pub fn split_at_mut(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) {
351        let (left, right) = self.internal.split_at_mut(mid);
352        unsafe {
353            (
354                JavaStr::from_internal_unchecked_mut(left),
355                JavaStr::from_internal_unchecked_mut(right),
356            )
357        }
358    }
359
360    /// Divide one string slice into two at an index.
361    ///
362    /// The argument, `mid`, should be a valid byte offset from the start of the
363    /// string. It must also be on the boundary of a Java CESU-8 code point. The
364    /// method returns `None` if that's not the case.
365    ///
366    /// The two slices returned go from the start of the string slice to `mid`,
367    /// and from `mid` to the end of the string slice.
368    ///
369    /// To get mutable string slices instead, see the [`split_at_mut_checked`]
370    /// method.
371    ///
372    /// [`split_at_mut_checked`]: Self::split_at_mut_checked
373    #[inline]
374    #[must_use]
375    pub fn split_at_checked(&self, mid: usize) -> Option<(&JavaStr, &JavaStr)> {
376        let (left, right) = self.internal.split_at_checked(mid)?;
377        Some(unsafe {
378            (
379                JavaStr::from_internal_unchecked(left),
380                JavaStr::from_internal_unchecked(right),
381            )
382        })
383    }
384
385    /// Divide one mutable string slice into two at an index.
386    ///
387    /// The argument, `mid`, should be a valid byte offset from the start of the
388    /// string. It must also be on the boundary of a Java CESU-8 code point. The
389    /// method returns `None` if that's not the case.
390    ///
391    /// The two slices returned go from the start of the string slice to `mid`,
392    /// and from `mid` to the end of the string slice.
393    ///
394    /// To get immutable string slices instead, see the [`split_at_checked`]
395    /// method.
396    ///
397    /// [`split_at_checked`]: Self::split_at_checked
398    #[inline]
399    #[must_use]
400    pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut JavaStr, &mut JavaStr)> {
401        let (left, right) = self.internal.split_at_mut_checked(mid)?;
402        Some(unsafe {
403            (
404                JavaStr::from_internal_unchecked_mut(left),
405                JavaStr::from_internal_unchecked_mut(right),
406            )
407        })
408    }
409
410    /// Divide a string into two at an index.
411    ///
412    /// The two slices returned go from the start of the string slice to `mid`,
413    /// and from `mid` to the end of the string slice.
414    ///
415    /// To get mutable string slices instead, see the [`split_at_mut_unchecked`]
416    /// method.
417    ///
418    /// [`split_at_mut_unchecked`]: Self::split_at_mut_unchecked
419    ///
420    /// # Safety
421    ///
422    /// The caller must ensure that `mid` is a valid byte offset from the start
423    /// of the string and falls on the boundary of a Java CESU-8 character.
424    #[inline]
425    #[must_use]
426    pub unsafe fn split_at_unchecked(&self, mid: usize) -> (&JavaStr, &JavaStr) {
427        let (left, right) = self.internal.split_at_unchecked(mid);
428        unsafe {
429            (
430                JavaStr::from_internal_unchecked(left),
431                JavaStr::from_internal_unchecked(right),
432            )
433        }
434    }
435
436    /// Divide a mutable string into two at an index.
437    ///
438    /// The two slices returned go from the start of the string slice to `mid`,
439    /// and from `mid` to the end of the string slice.
440    ///
441    /// To get immutable string slices instead, see the [`split_at_unchecked`]
442    /// method.
443    ///
444    /// [`split_at_unchecked`]: Self::split_at_unchecked
445    ///
446    /// # Safety
447    ///
448    /// The caller must ensure that `mid` is a valid byte offset from the start
449    /// of the string and falls on the boundary of a Java CESU-8 character.
450    #[inline]
451    #[must_use]
452    pub unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) {
453        let (left, right) = self.internal.split_at_mut_unchecked(mid);
454        unsafe {
455            (
456                JavaStr::from_internal_unchecked_mut(left),
457                JavaStr::from_internal_unchecked_mut(right),
458            )
459        }
460    }
461
462    /// Returns an iterator over the [`char`]s of a string slice.
463    ///
464    /// As an `JavaStr` consists of valid Java CESU-8, we can iterate through a
465    /// string by [`char`]. This method returns such an iterator.
466    ///
467    /// It's important to remember that [`char`] represents a Unicode Scalar
468    /// Value, and might not match your idea of what a 'character' is. Iteration
469    /// over grapheme clusters may be what you actually want. This functionality
470    /// is not provided by this crate.
471    #[inline]
472    pub fn chars(&self) -> JavaChars {
473        JavaChars {
474            iter: self.internal.chars(),
475        }
476    }
477
478    /// Returns an iterator over the [`char`]s of a string slice, and their
479    /// positions.
480    ///
481    /// As an `JavaStr` consists of valid Java CESU-8, we can iterate through a
482    /// string by [`char`]. This method returns an iterator of both these
483    /// [`char`]s, as well as their byte positions.
484    ///
485    /// The iterator yields tuples. The position is first,
486    /// the [`char`] is second.
487    #[inline]
488    pub fn char_indices(&self) -> JavaCharIndices {
489        JavaCharIndices {
490            iter: self.internal.char_indices(),
491        }
492    }
493
494    /// Checks if all characters in this string are within the ASCII range.
495    #[inline]
496    #[must_use]
497    pub const fn is_ascii(&self) -> bool {
498        self.internal.is_ascii()
499    }
500}
501
502#[cfg(feature = "alloc")]
503impl ToOwned for JavaStr {
504    type Owned = JavaString;
505
506    #[inline]
507    fn to_owned(&self) -> Self::Owned {
508        let vec = self.as_bytes().to_owned();
509        unsafe { JavaString::from_java_cesu8_unchecked(vec) }
510    }
511}
512
513impl AsRef<[u8]> for JavaStr {
514    #[inline]
515    fn as_ref(&self) -> &[u8] {
516        self.as_bytes()
517    }
518}
519
520impl core::fmt::Debug for JavaStr {
521    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
522        core::fmt::Debug::fmt(&self.internal, f)
523    }
524}
525
526impl core::fmt::Display for JavaStr {
527    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
528        core::fmt::Display::fmt(&self.internal, f)
529    }
530}