cesu8_str/java/string.rs
1use super::JavaStr;
2use crate::internal::{InternalStr, InternalString};
3use crate::{validate_cesu8_internal, FromVecError};
4
5use core::borrow::Borrow;
6use core::ops::{Deref, DerefMut};
7
8use alloc::boxed::Box;
9use alloc::collections::TryReserveError;
10use alloc::vec::Vec;
11
12/// A Java CESU-8 encoded, growable string.
13#[repr(transparent)]
14#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
15pub struct JavaString {
16 internal: InternalString,
17}
18
19impl JavaString {
20 /// Creates a new empty `JavaString`.
21 ///
22 /// Given that the `JavaString` is empty, this will not allocate any
23 /// initial buffer. While that means that this initial operations is very
24 /// inexpensive, it may cause excessive allocation later when you add data.
25 /// If you have an idea of how much data the `JavaString` will hold,
26 /// consider the [`with_capacity`] method to prevent excessive
27 /// re-allocation.
28 ///
29 /// [`with_capacity`]: Self::with_capacity
30 #[inline]
31 #[must_use]
32 pub const fn new() -> JavaString {
33 JavaString {
34 internal: InternalString::new(),
35 }
36 }
37
38 /// Creates a new empty `JavaString` with at least the specified
39 /// capacity.
40 ///
41 /// `JavaString`s have an internal buffer to hold their data. The
42 /// capacity is at length of that buffer, and can be queried with the
43 /// [`capacity`] method. This method creates an empty `JavaString`, but
44 /// one with an initial buffer that can hold at least `capacity` bytes. This
45 /// is useful when you may be appending a bunch of data to the
46 /// `JavaString`, reducing the number of reallocations it needs to do.
47 ///
48 /// [`capacity`]: Self::capacity
49 ///
50 /// If the given capacity is `0`, no allocation will occur, and this method
51 /// is identical to the [`new`] method.
52 ///
53 /// [`new`]: Self::new
54 #[inline]
55 #[must_use]
56 pub fn with_capacity(capacity: usize) -> JavaString {
57 JavaString {
58 internal: InternalString::with_capacity(capacity),
59 }
60 }
61
62 /// Converts a vector of bytes into a `JavaString`.
63 ///
64 /// A string ([`JavaString`]) is made of bytes ([`u8`]), and a vector of
65 /// bytes ([`Vec<u8>`]) is made of bytes, so this function converts between
66 /// the two. Not all byte slices are valid `JavaString`s, however:
67 /// `JavaString` requires that it is valid Java CESU-8. `from_java_cesu8`
68 /// checks to ensure that the bytes are valid Java CESU-8, and then does
69 /// the conversion.
70 ///
71 /// If you are sure that the byte slice is valid Java CESU-8, and you don't
72 /// want to incur the overhead of the validity check, there is an unsafe
73 /// version of this function, [`from_java_cesu8_unchecked`], which has
74 /// the same behavior but skips the check.
75 ///
76 /// This method will take care to not to copy the vector, for efficiency's
77 /// sake.
78 ///
79 /// If you need a [`&JavaStr`] instead of a `JavaString`, consider
80 /// [`JavaStr::from_java_cesu8`].
81 ///
82 /// The inverse of this method is [`into_bytes`].
83 ///
84 /// [`from_java_cesu8_unchecked`]: Self::from_java_cesu8_unchecked
85 /// [`&JavaStr`]: JavaStr
86 /// [`into_bytes`]: Self::into_bytes
87 ///
88 /// # Errors
89 ///
90 /// Returns [`Err`] if the slice is not Java CESU-8 with the index and
91 /// length of the invalid byte. The vector you moved in is also
92 /// included.
93 #[inline]
94 pub fn from_java_cesu8(vec: Vec<u8>) -> Result<JavaString, FromVecError> {
95 match validate_cesu8_internal::<true>(&vec) {
96 Ok(()) => unsafe {
97 Ok(JavaString {
98 internal: InternalString::from_unchecked(vec),
99 })
100 },
101 Err(e) => Err(FromVecError { bytes: vec, error: e}),
102 }
103 }
104
105 /// Creates a new `JavaString` from a length, capacity, and pointer.
106 ///
107 /// # Safety
108 ///
109 /// This is highly unsafe, due to the number of invariants that aren't
110 /// checked:
111 /// * The memory at `buf` needs to have been previously allocationed by the
112 /// same allocator the standard library uess, with a required alignment of
113 /// exactly 1.
114 /// * `length` needs to be less than or equal to `capacity`.
115 /// * `capacity` needs to be the correct value.
116 /// * The first `length` bytes at `buf` need to be valid Java CESU-8.
117 ///
118 /// Violating these may cause problems like correcting the allocator's
119 /// internal data structures. For example, it is normally **not** safe to
120 /// build a `JavaString` from a pointer to a C `char` array containing
121 /// Java CESU-8 _unless_ you are certain that array was originally allocated
122 /// by the Rust standard library's allocator.
123 ///
124 /// The ownership of `buf` is effectively transferred to the
125 /// `JavaString` which may then deallocate, reallocate, or change the
126 /// contents of memory pointed to by the pointer at will. Ensure that
127 /// nothing elese uses the pointer after calling this function.
128 #[inline]
129 #[must_use]
130 pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> JavaString {
131 unsafe {
132 JavaString {
133 internal: InternalString::from_raw_parts(buf, length, capacity),
134 }
135 }
136 }
137
138 /// Converts a vector of bytes to a `JavaString` without checking that
139 /// the string contains valid Java CESU-8.
140 ///
141 /// # Safety
142 ///
143 /// This function is unsafe because it does not check that the bytes passed
144 /// to it are valid Java CESU-8. If this constraint is violated, it may
145 /// cause memory unsafety issues with future users of the `JavaString`.
146 #[inline]
147 #[must_use]
148 pub const unsafe fn from_java_cesu8_unchecked(bytes: Vec<u8>) -> JavaString {
149 JavaString {
150 internal: InternalString::from_unchecked(bytes),
151 }
152 }
153
154 /// Converts an internal string to a `JavaString` without checking that the
155 /// string contains valid Java CESU-8.
156 ///
157 /// # Safety
158 ///
159 /// This function is unsafe because it does not check that the
160 /// `InternalString` passed to it contains valid Java CESU-8. If this
161 /// constraint is violated, it may cause memory unsafety issues with future
162 /// users of the `JavaString`.
163 #[inline]
164 #[must_use]
165 pub(crate) const unsafe fn from_internal_unchecked(internal: InternalString) -> JavaString {
166 JavaString { internal }
167 }
168
169 /// Converts a `JavaString` into a byte vector.
170 ///
171 /// This consumes the `JavaString`, so we do not need to copy its contents.
172 #[inline]
173 #[must_use]
174 pub fn into_bytes(self) -> Vec<u8> {
175 self.internal.into_bytes()
176 }
177
178 /// Extracts a string slice containing the entire `JavaString`.
179 #[inline]
180 #[must_use]
181 pub fn as_str(&self) -> &JavaStr {
182 self
183 }
184
185 /// Converts a `JavaString` into a mutable string slice.
186 #[inline]
187 #[must_use]
188 pub fn as_mut_str(&mut self) -> &mut JavaStr {
189 self
190 }
191
192 /// Appends a given string slice onto the end of this `JavaString`.
193 #[inline]
194 pub fn push_str(&mut self, str: &JavaStr) {
195 self.internal.push_str(&str.internal);
196 }
197
198 /// Returns this `JavaString`'s capacity, in bytes.
199 #[inline]
200 #[must_use]
201 pub fn capacity(&self) -> usize {
202 self.internal.capacity()
203 }
204
205 /// Reserves capacity for at least `additional` bytes more than the current
206 /// length. The allocator may reserve more space to speculatively avoid
207 /// frequent allocations. After calling `reserve`, capacity will be greater
208 /// than or equal to `self.len() + additional`. Does nothing if the capacity
209 /// is already sufficient.
210 ///
211 /// # Panics
212 ///
213 /// Panics if the new capacity overflows [`usize`].
214 #[inline]
215 pub fn reserve(&mut self, additional: usize) {
216 self.internal.reserve(additional);
217 }
218
219 /// Reserves the minimum capacity for at least `additional` bytes more than
220 /// the current length. Unlike [`reserve`], this will not deliberately
221 /// over-allocate to speculatively avoid allocations. After calling reserve
222 /// `reserve_excat`, capacity will be greater than or equal to `self.len() +
223 /// additional`. Does nothing if the capacity is already sufficient.
224 ///
225 /// [`reserve`]: Self::reserve
226 ///
227 /// # Panics
228 ///
229 /// Panics if the new capacity overflows [`usize`].
230 #[inline]
231 pub fn reserve_exact(&mut self, additional: usize) {
232 self.internal.reserve_exact(additional);
233 }
234
235 /// Tries to reserve capacity for at least `additional` bytes more than the
236 /// current length. The allocator may reserve more space to speculatively
237 /// avoid frequent allocations. After calling `try_reserve`, capacity will
238 /// be greater than or equal to `self.len() + additional` if it returns
239 /// `OK(())`. Does nothing if capacity is already sufficient. This method
240 /// preserves the contents even if an error occurs.
241 ///
242 /// # Errors
243 ///
244 /// If the capacity overflows, or the allocator reports a failure, then an
245 /// error is returned.
246 #[inline]
247 pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
248 self.internal.try_reserve(additional)
249 }
250
251 /// Tries to reserve the minimum capacity for at least `additional` bytes
252 /// more than current length. Unlike [`try_reserve`], this will not
253 /// deliberately over-allocate to speculatively avoid frequent allocations.
254 /// After calling `try_reserve_exact`, capacity will be greater than or
255 /// equal `self.len() + additional` if it returns `Ok(())`. Does nothing if
256 /// the capacity is already sufficient.
257 ///
258 /// Not that the allocator may give the collection more space than it
259 /// requests. Therefore, capacity can not be relied upon to be precisely
260 /// minimal. Prefer [`try_reserve`] if future insertions are expected.
261 ///
262 /// [`try_reserve`]: JavaString::try_reserve
263 ///
264 /// # Errors
265 ///
266 /// If the capacity overflows, or the allocator reports a failure, then an
267 /// error is returned.
268 #[inline]
269 pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
270 self.internal.try_reserve_exact(additional)
271 }
272
273 /// Shrinks the capacity of this `JavaString` to match its length.
274 #[inline]
275 pub fn shrink_to_fit(&mut self) {
276 self.internal.shrink_to_fit();
277 }
278
279 /// Shrinks the capacity of this `JavaString` with a lower bound.
280 ///
281 /// The capacity will remaing at least as large as both the length and the
282 /// supplied value.
283 ///
284 /// If the current capacity is less than the lower limit, this is a no-op.
285 #[inline]
286 pub fn shrink_to(&mut self, min_capacity: usize) {
287 self.internal.shrink_to(min_capacity);
288 }
289
290 /// Appends the given [`char`] to the end of this `JavaString`.
291 #[inline]
292 pub fn push(&mut self, c: char) {
293 self.internal.push::<true>(c);
294 }
295
296 /// Returns a byte slice of this `JavaString`'s contents.
297 #[inline]
298 #[must_use]
299 pub fn as_bytes(&self) -> &[u8] {
300 self.internal.as_bytes()
301 }
302
303 /// Shortens this `JavaString` to the specified length.
304 ///
305 /// If `new_len` is greater than the string's current length, this has no
306 /// effect.
307 ///
308 /// Note that this method has no effect on the allocated capacity of the
309 /// string.
310 ///
311 /// # Panics
312 ///
313 /// Panics if `new_len` does not lie on a [`char`] boundary.
314 #[inline]
315 pub fn truncate(&mut self, new_len: usize) {
316 self.internal.truncate(new_len);
317 }
318
319 /// Removes the last character from the string buffer and returns it.
320 ///
321 /// Returns [`None`] if this `JavaString` is empty.
322 #[inline]
323 #[must_use]
324 pub fn pop(&mut self) -> Option<char> {
325 self.internal.pop()
326 }
327
328 /// Removes a [`char`] from this `JavaString` at a byte position and
329 /// returns it.
330 ///
331 /// This is an *O*(*n*) operation, as it requires copy every element in the
332 /// buffer.
333 ///
334 /// # Panics
335 ///
336 /// Panics if `idx` is large than or equal to the `JavaString`'s length, or
337 /// if it does not lie on a [`char`] boundary.
338 #[inline]
339 pub fn remove(&mut self, idx: usize) -> char {
340 self.internal.remove(idx)
341 }
342
343 /// Inserts a character into this `JavaString` at a byte position.
344 ///
345 /// This is an *O*(*n*) operation as it requires copying every element in
346 /// the buffer.
347 ///
348 /// # Panics
349 ///
350 /// Panics if `idx` is larger than the `JavaString`'s length, or if it does
351 /// not lie on a [`char`] boundary.
352 #[inline]
353 pub fn insert(&mut self, idx: usize, c: char) {
354 self.internal.insert::<true>(idx, c);
355 }
356
357 /// Returns a mutable reference to the contents of this `JavaString`.
358 ///
359 /// # Safety
360 ///
361 /// This function is unsafe because the returned `&mut Vec` allows writing
362 /// bytes which are not valid Java Java CESU-8. If this constraint is
363 /// violated, using the original `JavaString` after dropping the `&mut Vec`
364 /// may violate memory safety, as `JavaString`s are expected to always
365 /// contains valid Java Java CESU-8.
366 #[inline]
367 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
368 self.internal.as_mut_vec()
369 }
370
371 /// Returns the length of this `JavaString`, in bytes, nor [`char`]s or
372 /// graphemes. In other words, it might not be what a human considers the
373 /// length of the string.
374 #[inline]
375 #[must_use]
376 pub fn len(&self) -> usize {
377 self.internal.len()
378 }
379
380 /// Returns `true` if this `JavaString` has a length of zero, and `false`
381 /// otherwise.
382 #[inline]
383 #[must_use]
384 pub fn is_empty(&self) -> bool {
385 self.internal.is_empty()
386 }
387
388 /// Truncates this `JavaString`, removing all contents.
389 ///
390 /// While this means the `JavaString` will have a length of zero, it does
391 /// not touch its capacity.
392 #[inline]
393 pub fn clear(&mut self) {
394 self.internal.clear();
395 }
396
397 /// Converts this `JavaString` into a <code>[Box]<[JavaStr]</code>.
398 ///
399 /// This will drop any excess capacity.
400 #[inline]
401 #[must_use]
402 pub fn into_boxed_str(self) -> Box<JavaStr> {
403 let x = self.internal.into_boxed_str();
404 unsafe { JavaStr::from_boxed_internal_unchecked(x) }
405 }
406
407 /// Consumes and leaks the `JavaString`, returning a mutable reference to
408 /// the contents, `&'a mut InternalStr`.
409 ///
410 /// The caller has free choice over the returned lifetime, including
411 /// `'static`. Indeed, this function is ideally used for data that lives fro
412 /// the remainder of the program's life, as dropping the returned reference
413 /// will cause a memory leak.
414 ///
415 /// It does not reallocate or shrink the `JavaString`, so the leaked
416 /// allocation may include unused capacity that is not part of the returned
417 /// slice. If you don't want that, call [`into_boxed_str`], and then
418 /// [`Box::leak`].
419 ///
420 /// [`into_boxed_str`]: Self::into_boxed_str
421 #[inline]
422 #[must_use]
423 pub fn leak<'a>(self) -> &'a mut JavaStr {
424 let str = self.internal.leak();
425 unsafe { &mut *(str as *mut InternalStr as *mut JavaStr) }
426 }
427}
428
429impl Default for JavaString {
430 fn default() -> Self {
431 Self::new()
432 }
433}
434
435impl Borrow<JavaStr> for JavaString {
436 fn borrow(&self) -> &JavaStr {
437 self
438 }
439}
440
441impl Deref for JavaString {
442 type Target = JavaStr;
443
444 #[inline]
445 fn deref(&self) -> &Self::Target {
446 unsafe { JavaStr::from_java_cesu8_unchecked(self.internal.as_bytes()) }
447 }
448}
449
450impl DerefMut for JavaString {
451 #[inline]
452 fn deref_mut(&mut self) -> &mut Self::Target {
453 unsafe { JavaStr::from_java_cesu8_unchecked_mut(self.internal.as_bytes_mut()) }
454 }
455}
456
457impl core::fmt::Debug for JavaString {
458 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
459 core::fmt::Debug::fmt(&self.internal, f)
460 }
461}
462
463impl core::fmt::Display for JavaString {
464 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
465 core::fmt::Display::fmt(&self.internal, f)
466 }
467}