cesu8_str/java/str.rs
1use crate::internal::InternalStr;
2use crate::{validate_cesu8_internal, EncodingError};
3
4use super::iter::{JavaCharIndices, JavaChars};
5
6use core::ops::RangeBounds;
7
8#[cfg(feature = "alloc")]
9use super::JavaString;
10
11#[cfg(feature = "alloc")]
12use alloc::borrow::ToOwned;
13#[cfg(feature = "alloc")]
14use alloc::boxed::Box;
15
16/// A Java CESU-8 encoded string slice.
17#[repr(transparent)]
18#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub struct JavaStr {
20 pub(crate) internal: InternalStr,
21}
22
23impl JavaStr {
24 /// Converts a slice of bytes to a `JavaStr`.
25 ///
26 /// A Java CESU-8 string slice ([`JavaStr`]) is made of bytes ([`u8`]), and
27 /// a byte slice ([`[u8]`][byteslice]) is made of bytes, so this
28 /// function converts betwen the two. Not all byte slices are valid
29 /// string slices, however `JavaStr` requires that it is valid Java
30 /// CESU-8. [`from_java_cesu8`] checks to ensure that the bytes are
31 /// valid Java CESU-8, and then does the conversion.
32 ///
33 /// [byteslice]: slice
34 /// [`from_java_cesu8`]: Self::from_java_cesu8
35 ///
36 /// If you are sure that the byte slice is valid Java CESU-8, and you don't
37 /// want to incur the overhead of the validity check, there is an unsafe
38 /// version of this function [`from_java_cesu8_unchecked`], which has the
39 /// same behavior but skips the check.
40 ///
41 /// [`from_java_cesu8_unchecked`]: Self::from_java_cesu8_unchecked
42 ///
43 /// # Errors
44 ///
45 /// Returns [`Err`] if the slice is not Java CESU-8 with
46 /// a description as to why the provided slice is
47 /// not Java CESU-8.
48 #[inline]
49 pub const fn from_java_cesu8(v: &[u8]) -> Result<&JavaStr, EncodingError> {
50 match validate_cesu8_internal::<true>(v) {
51 Ok(()) => Ok(unsafe { JavaStr::from_java_cesu8_unchecked(v) }),
52 Err(e) => Err(e),
53 }
54 }
55
56 /// Converts a mutable slice of bytes to a mutable `JavaStr`.
57 ///
58 /// A Java CESU-8 string slice ([`JavaStr`]) is made of bytes ([`u8`]), and
59 /// a byte slice ([`[u8]`][byteslice]) is made of bytes, so this
60 /// function converts betwen the two. Not all byte slices are valid
61 /// string slices, however `JavaStr` requires that it is valid Java
62 /// CESU-8. [`from_java_cesu8`] checks to ensure that the bytes are
63 /// valid Java CESU-8, and then does the conversion.
64 ///
65 /// [byteslice]: slice
66 /// [`from_java_cesu8`]: Self::from_java_cesu8
67 ///
68 /// If you are sure that the byte slice is valid Java CESU-8, and you don't
69 /// want to incur the overhead of the validity check, there is an unsafe
70 /// version of this function [`from_java_cesu8_unchecked_mut`], which has
71 /// the same behavior but skips the check.
72 ///
73 /// [`from_java_cesu8_unchecked_mut`]: Self::from_java_cesu8_unchecked_mut
74 ///
75 /// # Errors
76 ///
77 /// Returns [`Err`] if the slice is not Java CESU-8 with
78 /// a description as to why the provided slice is
79 /// not Java CESU-8.
80 #[inline]
81 pub fn from_java_cesu8_mut(v: &mut [u8]) -> Result<&mut JavaStr, EncodingError> {
82 match validate_cesu8_internal::<true>(v) {
83 Ok(()) => Ok(unsafe { JavaStr::from_java_cesu8_unchecked_mut(v) }),
84 Err(e) => Err(e),
85 }
86 }
87
88 /// Converts a slice of bytes to a `JavaStr` without checking that the
89 /// string contains valid Java CESU-8.
90 ///
91 /// See the safe version, [`from_java_cesu8`], for more details.
92 ///
93 /// [`from_java_cesu8`]: Self::from_java_cesu8
94 ///
95 /// # Safety
96 ///
97 /// The bytes passed in must be valid Java CESU-8.
98 #[inline]
99 #[must_use]
100 pub const unsafe fn from_java_cesu8_unchecked(v: &[u8]) -> &JavaStr {
101 unsafe { &*(v as *const [u8] as *const JavaStr) }
102 }
103
104 /// Converts a mutable slice of bytes to a mutable `JavaStr` without
105 /// checking that the string contains valid Java CESU-8.
106 ///
107 /// See the safe version, [`from_java_cesu8_mut`], for more details.
108 ///
109 /// [`from_java_cesu8_mut`]: Self::from_java_cesu8_mut
110 ///
111 /// # Safety
112 ///
113 /// The bytes passed in must be valid Java CESU-8.
114 #[inline]
115 #[must_use]
116 pub unsafe fn from_java_cesu8_unchecked_mut(v: &mut [u8]) -> &mut JavaStr {
117 unsafe { &mut *(v as *mut [u8] as *mut JavaStr) }
118 }
119
120 /// Converts a boxed slice of bytes to a boxed string slice without checking
121 /// that the string contains valid Java CESU-8.
122 ///
123 /// # Safety
124 ///
125 /// The bytes passed in must be valid Java CESU-8.
126 #[cfg(feature = "alloc")]
127 #[inline]
128 #[must_use]
129 pub unsafe fn from_boxed_java_cesu8_unchecked(v: Box<[u8]>) -> Box<JavaStr> {
130 unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) }
131 }
132
133 /// Converts an `InternalStr` to a `JavaStr` without checking that the
134 /// string contains valid Java CESU-8.
135 ///
136 /// # Safety
137 ///
138 /// The string passed in must be valid Java CESU-8.
139 #[inline]
140 #[must_use]
141 pub(crate) const unsafe fn from_internal_unchecked(v: &InternalStr) -> &JavaStr {
142 unsafe { &*(v as *const InternalStr as *const JavaStr) }
143 }
144
145 /// Converts a mutable `InternalStr` to a mutable `JavaStr` without
146 /// checking that the string contains valid Java CESU-8.
147 ///
148 /// # Safety
149 ///
150 /// The string passed in must be valid Java CESU-8.
151 #[inline]
152 #[must_use]
153 pub(crate) unsafe fn from_internal_unchecked_mut(v: &mut InternalStr) -> &mut JavaStr {
154 unsafe { &mut *(v as *mut InternalStr as *mut JavaStr) }
155 }
156
157 /// Converts an `InternalStr` into a boxed string slice without checking
158 /// that the string contains valid Java CESU-8.
159 ///
160 /// # Safety
161 ///
162 /// The string passed in must be valid Java CESU-8.
163 #[cfg(feature = "alloc")]
164 #[inline]
165 #[must_use]
166 pub(crate) unsafe fn from_boxed_internal_unchecked(v: Box<InternalStr>) -> Box<JavaStr> {
167 unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) }
168 }
169
170 /// Returns the length of `self`.
171 ///
172 /// This length is in bytes, not [`char`]s or graphemes. In other words, it
173 /// might not be what a human considers the length of the string.
174 #[inline]
175 #[must_use]
176 pub const fn len(&self) -> usize {
177 self.internal.len()
178 }
179
180 /// Returns `true` if `self` has a length of zero bytes.
181 #[inline]
182 #[must_use]
183 pub const fn is_empty(&self) -> bool {
184 self.internal.is_empty()
185 }
186
187 /// Checks that the `index`-th byte is the first byte in a Java CESU-8 code
188 /// point sequence or the end of the string.
189 ///
190 /// The start and end of the string (when `index == self.len()`) are
191 /// considered to be boundaries.
192 ///
193 /// Returns `false` if `index is greater than self.len()`.
194 #[inline]
195 #[must_use]
196 pub fn is_char_boundary(&self, index: usize) -> bool {
197 self.internal.is_char_boundary(index)
198 }
199
200 /// Converts a string slice to a byte slice.
201 #[inline]
202 #[must_use]
203 pub const fn as_bytes(&self) -> &[u8] {
204 self.internal.as_bytes()
205 }
206
207 /// Converts a mutable string slice to a mutable byte slice.
208 ///
209 /// # Safety
210 ///
211 /// The caller must ensure that the content of the slice is valid Java
212 /// CESU-8 before the borrow ends and the underlying `JavaStr` is used.
213 ///
214 /// Use of a `JavaStr` whose contents are not valid Java CESU-8 is undefined
215 /// behavior.
216 #[inline]
217 #[must_use]
218 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
219 self.internal.as_bytes_mut()
220 }
221
222 /// Converts a string slice to a raw pointer.
223 ///
224 /// As string slices are a slice of bytes, the raw pointer points to a
225 /// [`u8`]. This pointer will be pointing to the first bytes of the string
226 /// slice.
227 ///
228 /// The caller must ensure that the returned pointer is never written to. If
229 /// you need to mutate the contents of the string slice, use [`as_mut_ptr`].
230 ///
231 /// [`as_mut_ptr`]: Self::as_mut_ptr
232 #[inline]
233 #[must_use]
234 pub const fn as_ptr(&self) -> *const u8 {
235 self.internal.as_ptr()
236 }
237
238 /// Converts a mutable string slice to a raw pointer.
239 ///
240 /// As string slices are a slice of bytes, the raw pointer points to a
241 /// [`u8`]. This pointer will be pointing to the first byte of the string
242 /// slice.
243 ///
244 /// It is your responsibility to make sure that the string slice only gets
245 /// modified in a way that it remains valid Java CESU-8.
246 #[inline]
247 #[must_use]
248 pub fn as_mut_ptr(&mut self) -> *mut u8 {
249 self.internal.as_mut_ptr()
250 }
251
252 /// Returns a subslice of `JavaStr`.
253 ///
254 /// This is the non-panicking alternative to indexing the `str`. Returns
255 /// [`None`] whenever equivalent indexing operations would panic.
256 #[inline]
257 #[must_use]
258 pub fn get<I: RangeBounds<usize>>(&self, index: I) -> Option<&JavaStr> {
259 self.internal
260 .get(index)
261 .map(|internal| unsafe { JavaStr::from_internal_unchecked(internal) })
262 }
263
264 /// Returns a mutable subslice of `JavaStr`.
265 ///
266 /// This is the non-panicking alternative to indexing the `JavaStr`.
267 /// Returns [`None`] whenver equivalent indexing operations would panic.
268 #[inline]
269 #[must_use]
270 pub fn get_mut<I: RangeBounds<usize>>(&mut self, index: I) -> Option<&mut JavaStr> {
271 self.internal
272 .get_mut(index)
273 .map(|internal| unsafe { JavaStr::from_internal_unchecked_mut(internal) })
274 }
275
276 /// Returns an unchecked subslice of `JavaStr`.
277 ///
278 /// This is the unchecked alternative to indexing the `JavaStr`.
279 ///
280 /// # Safety
281 ///
282 /// Callers of this function are responsible for ensuring that:
283 /// * The starting index does not exceed the ending index;
284 /// * The indices are within the bounds of the original slice;
285 /// * The indices fall on Java CESU-8 sequence boundaries.
286 ///
287 /// Failing that, the returned string slice may reference invalid memory or
288 /// violate the invariants communicated by the `JavaStr` type.
289 #[inline]
290 #[must_use]
291 pub unsafe fn get_unchecked<I: RangeBounds<usize>>(&self, index: I) -> &JavaStr {
292 unsafe { JavaStr::from_internal_unchecked(self.internal.get_unchecked(index)) }
293 }
294
295 /// Returns a mutable, unchecked subslice of `JavaStr`.
296 ///
297 /// This the unchecked alternative to indexing the `JavaStr`.
298 ///
299 /// # Safety
300 ///
301 /// Callers of this function are responsible for ensuring that:
302 /// * The starting index does not exceed the ending index;
303 /// * The indices are within the bounds of the original slice;
304 /// * The indices fall on Java CESU-8 sequence boundaries.
305 ///
306 /// Failing that, the returned string slice may reference invalid memory or
307 /// violate the invariants communicated by the `JavaStr` type.
308 #[inline]
309 #[must_use]
310 pub unsafe fn get_unchecked_mut<I: RangeBounds<usize>>(&mut self, index: I) -> &mut JavaStr {
311 unsafe { JavaStr::from_internal_unchecked_mut(self.internal.get_unchecked_mut(index)) }
312 }
313
314 /// Divide one string slice into two at an index.
315 ///
316 /// The argument, `mid`, should be a byte offset from the start of the
317 /// string. It must also be on the boundary of a Java CESU-8 character.
318 ///
319 /// The two slices returned go from the string of the string slice to `mid`,
320 /// and from `mid` to the end of the string slice.
321 ///
322 /// To get mutable string slices instead, see the [`split_at_mut`] method.
323 ///
324 /// [`split_at_mut`]: Self::split_at_mut
325 #[inline]
326 #[must_use]
327 pub fn split_at(&self, mid: usize) -> (&JavaStr, &JavaStr) {
328 let (left, right) = self.internal.split_at(mid);
329 unsafe {
330 (
331 JavaStr::from_internal_unchecked(left),
332 JavaStr::from_internal_unchecked(right),
333 )
334 }
335 }
336
337 /// Divide one mutable string slice into two at an index.
338 ///
339 /// The argument, `mid`, should be a byte offset from the start of the
340 /// string. It must also be on the boundary of a Java CESU-8 character.
341 ///
342 /// The two slices returned go from the string of the string slice to `mid`,
343 /// and from `mid` to the end of the string slice.
344 ///
345 /// To get immutable string slices instead, see the [`split_at`] method.
346 ///
347 /// [`split_at`]: Self::split_at
348 #[inline]
349 #[must_use]
350 pub fn split_at_mut(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) {
351 let (left, right) = self.internal.split_at_mut(mid);
352 unsafe {
353 (
354 JavaStr::from_internal_unchecked_mut(left),
355 JavaStr::from_internal_unchecked_mut(right),
356 )
357 }
358 }
359
360 /// Divide one string slice into two at an index.
361 ///
362 /// The argument, `mid`, should be a valid byte offset from the start of the
363 /// string. It must also be on the boundary of a Java CESU-8 code point. The
364 /// method returns `None` if that's not the case.
365 ///
366 /// The two slices returned go from the start of the string slice to `mid`,
367 /// and from `mid` to the end of the string slice.
368 ///
369 /// To get mutable string slices instead, see the [`split_at_mut_checked`]
370 /// method.
371 ///
372 /// [`split_at_mut_checked`]: Self::split_at_mut_checked
373 #[inline]
374 #[must_use]
375 pub fn split_at_checked(&self, mid: usize) -> Option<(&JavaStr, &JavaStr)> {
376 let (left, right) = self.internal.split_at_checked(mid)?;
377 Some(unsafe {
378 (
379 JavaStr::from_internal_unchecked(left),
380 JavaStr::from_internal_unchecked(right),
381 )
382 })
383 }
384
385 /// Divide one mutable string slice into two at an index.
386 ///
387 /// The argument, `mid`, should be a valid byte offset from the start of the
388 /// string. It must also be on the boundary of a Java CESU-8 code point. The
389 /// method returns `None` if that's not the case.
390 ///
391 /// The two slices returned go from the start of the string slice to `mid`,
392 /// and from `mid` to the end of the string slice.
393 ///
394 /// To get immutable string slices instead, see the [`split_at_checked`]
395 /// method.
396 ///
397 /// [`split_at_checked`]: Self::split_at_checked
398 #[inline]
399 #[must_use]
400 pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut JavaStr, &mut JavaStr)> {
401 let (left, right) = self.internal.split_at_mut_checked(mid)?;
402 Some(unsafe {
403 (
404 JavaStr::from_internal_unchecked_mut(left),
405 JavaStr::from_internal_unchecked_mut(right),
406 )
407 })
408 }
409
410 /// Divide a string into two at an index.
411 ///
412 /// The two slices returned go from the start of the string slice to `mid`,
413 /// and from `mid` to the end of the string slice.
414 ///
415 /// To get mutable string slices instead, see the [`split_at_mut_unchecked`]
416 /// method.
417 ///
418 /// [`split_at_mut_unchecked`]: Self::split_at_mut_unchecked
419 ///
420 /// # Safety
421 ///
422 /// The caller must ensure that `mid` is a valid byte offset from the start
423 /// of the string and falls on the boundary of a Java CESU-8 character.
424 #[inline]
425 #[must_use]
426 pub unsafe fn split_at_unchecked(&self, mid: usize) -> (&JavaStr, &JavaStr) {
427 let (left, right) = self.internal.split_at_unchecked(mid);
428 unsafe {
429 (
430 JavaStr::from_internal_unchecked(left),
431 JavaStr::from_internal_unchecked(right),
432 )
433 }
434 }
435
436 /// Divide a mutable string into two at an index.
437 ///
438 /// The two slices returned go from the start of the string slice to `mid`,
439 /// and from `mid` to the end of the string slice.
440 ///
441 /// To get immutable string slices instead, see the [`split_at_unchecked`]
442 /// method.
443 ///
444 /// [`split_at_unchecked`]: Self::split_at_unchecked
445 ///
446 /// # Safety
447 ///
448 /// The caller must ensure that `mid` is a valid byte offset from the start
449 /// of the string and falls on the boundary of a Java CESU-8 character.
450 #[inline]
451 #[must_use]
452 pub unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) {
453 let (left, right) = self.internal.split_at_mut_unchecked(mid);
454 unsafe {
455 (
456 JavaStr::from_internal_unchecked_mut(left),
457 JavaStr::from_internal_unchecked_mut(right),
458 )
459 }
460 }
461
462 /// Returns an iterator over the [`char`]s of a string slice.
463 ///
464 /// As an `JavaStr` consists of valid Java CESU-8, we can iterate through a
465 /// string by [`char`]. This method returns such an iterator.
466 ///
467 /// It's important to remember that [`char`] represents a Unicode Scalar
468 /// Value, and might not match your idea of what a 'character' is. Iteration
469 /// over grapheme clusters may be what you actually want. This functionality
470 /// is not provided by this crate.
471 #[inline]
472 pub fn chars(&self) -> JavaChars {
473 JavaChars {
474 iter: self.internal.chars(),
475 }
476 }
477
478 /// Returns an iterator over the [`char`]s of a string slice, and their
479 /// positions.
480 ///
481 /// As an `JavaStr` consists of valid Java CESU-8, we can iterate through a
482 /// string by [`char`]. This method returns an iterator of both these
483 /// [`char`]s, as well as their byte positions.
484 ///
485 /// The iterator yields tuples. The position is first,
486 /// the [`char`] is second.
487 #[inline]
488 pub fn char_indices(&self) -> JavaCharIndices {
489 JavaCharIndices {
490 iter: self.internal.char_indices(),
491 }
492 }
493
494 /// Checks if all characters in this string are within the ASCII range.
495 #[inline]
496 #[must_use]
497 pub const fn is_ascii(&self) -> bool {
498 self.internal.is_ascii()
499 }
500}
501
502#[cfg(feature = "alloc")]
503impl ToOwned for JavaStr {
504 type Owned = JavaString;
505
506 #[inline]
507 fn to_owned(&self) -> Self::Owned {
508 let vec = self.as_bytes().to_owned();
509 unsafe { JavaString::from_java_cesu8_unchecked(vec) }
510 }
511}
512
513impl AsRef<[u8]> for JavaStr {
514 #[inline]
515 fn as_ref(&self) -> &[u8] {
516 self.as_bytes()
517 }
518}
519
520impl core::fmt::Debug for JavaStr {
521 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
522 core::fmt::Debug::fmt(&self.internal, f)
523 }
524}
525
526impl core::fmt::Display for JavaStr {
527 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
528 core::fmt::Display::fmt(&self.internal, f)
529 }
530}