cheetah_string/cheetah_string.rs
1use core::fmt;
2use core::str::Utf8Error;
3use std::borrow::{Borrow, Cow};
4use std::cmp::Ordering;
5use std::fmt::Display;
6use std::hash::Hash;
7use std::ops::Deref;
8use std::str::FromStr;
9use std::sync::Arc;
10
11#[derive(Clone)]
12#[repr(transparent)]
13pub struct CheetahString {
14 pub(super) inner: InnerString,
15}
16
17impl Default for CheetahString {
18 fn default() -> Self {
19 CheetahString {
20 inner: InnerString::Inline {
21 len: 0,
22 data: [0; INLINE_CAPACITY],
23 },
24 }
25 }
26}
27
28impl From<String> for CheetahString {
29 #[inline]
30 fn from(s: String) -> Self {
31 CheetahString::from_string(s)
32 }
33}
34
35impl From<Arc<String>> for CheetahString {
36 #[inline]
37 fn from(s: Arc<String>) -> Self {
38 CheetahString::from_arc_string(s)
39 }
40}
41
42impl<'a> From<&'a str> for CheetahString {
43 #[inline]
44 fn from(s: &'a str) -> Self {
45 CheetahString::from_slice(s)
46 }
47}
48
49/// # Safety Warning
50///
51/// This implementation uses `unsafe` code and may cause undefined behavior
52/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_bytes()`
53/// for safe UTF-8 validation.
54///
55/// This implementation will be deprecated in a future version.
56impl From<&[u8]> for CheetahString {
57 #[inline]
58 fn from(b: &[u8]) -> Self {
59 // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
60 // This will be deprecated in favor of try_from_bytes in the next version.
61 CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(b) })
62 }
63}
64
65impl FromStr for CheetahString {
66 type Err = std::string::ParseError;
67 #[inline]
68 fn from_str(s: &str) -> Result<Self, Self::Err> {
69 Ok(CheetahString::from_slice(s))
70 }
71}
72
73/// # Safety Warning
74///
75/// This implementation uses `unsafe` code and may cause undefined behavior
76/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_vec()`
77/// for safe UTF-8 validation.
78///
79/// This implementation will be deprecated in a future version.
80impl From<Vec<u8>> for CheetahString {
81 #[inline]
82 fn from(v: Vec<u8>) -> Self {
83 // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
84 // This will be deprecated in favor of try_from_vec in the next version.
85 CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(&v) })
86 }
87}
88
89impl From<Cow<'static, str>> for CheetahString {
90 #[inline]
91 fn from(cow: Cow<'static, str>) -> Self {
92 match cow {
93 Cow::Borrowed(s) => CheetahString::from_static_str(s),
94 Cow::Owned(s) => CheetahString::from_string(s),
95 }
96 }
97}
98
99impl From<Cow<'_, String>> for CheetahString {
100 #[inline]
101 fn from(cow: Cow<'_, String>) -> Self {
102 match cow {
103 Cow::Borrowed(s) => CheetahString::from_slice(s),
104 Cow::Owned(s) => CheetahString::from_string(s),
105 }
106 }
107}
108
109impl From<char> for CheetahString {
110 /// Allocates an owned [`CheetahString`] from a single character.
111 ///
112 /// # Example
113 /// ```rust
114 /// use cheetah_string::CheetahString;
115 /// let c: char = 'a';
116 /// let s: CheetahString = CheetahString::from(c);
117 /// assert_eq!("a", &s[..]);
118 /// ```
119 #[inline]
120 fn from(c: char) -> Self {
121 CheetahString::from_string(c.to_string())
122 }
123}
124
125impl<'a> FromIterator<&'a char> for CheetahString {
126 #[inline]
127 fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> CheetahString {
128 let mut buf = String::new();
129 buf.extend(iter);
130 CheetahString::from_string(buf)
131 }
132}
133
134impl<'a> FromIterator<&'a str> for CheetahString {
135 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> CheetahString {
136 let mut buf = String::new();
137 buf.extend(iter);
138 CheetahString::from_string(buf)
139 }
140}
141
142impl FromIterator<String> for CheetahString {
143 #[inline]
144 fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
145 let mut buf = String::new();
146 buf.extend(iter);
147 CheetahString::from_string(buf)
148 }
149}
150
151impl<'a> FromIterator<&'a String> for CheetahString {
152 #[inline]
153 fn from_iter<T: IntoIterator<Item = &'a String>>(iter: T) -> Self {
154 let mut buf = String::new();
155 buf.extend(iter.into_iter().map(|s| s.as_str()));
156 CheetahString::from_string(buf)
157 }
158}
159
160#[cfg(feature = "bytes")]
161impl From<bytes::Bytes> for CheetahString {
162 #[inline]
163 fn from(b: bytes::Bytes) -> Self {
164 CheetahString::from_bytes(b)
165 }
166}
167
168impl From<&CheetahString> for CheetahString {
169 #[inline]
170 fn from(s: &CheetahString) -> Self {
171 s.clone()
172 }
173}
174
175impl From<CheetahString> for String {
176 #[inline]
177 fn from(s: CheetahString) -> Self {
178 match s {
179 CheetahString {
180 inner: InnerString::Inline { len, data },
181 } => {
182 // SAFETY: Inline strings are always valid UTF-8
183 unsafe { String::from_utf8_unchecked(data[..len as usize].to_vec()) }
184 }
185 CheetahString {
186 inner: InnerString::StaticStr(s),
187 } => s.to_string(),
188 CheetahString {
189 inner: InnerString::ArcStr(s),
190 } => s.to_string(),
191 CheetahString {
192 inner: InnerString::ArcString(s),
193 } => s.as_ref().clone(),
194 CheetahString {
195 inner: InnerString::ArcVecString(s),
196 } => {
197 // SAFETY: ArcVecString should only be created from valid UTF-8 sources
198 unsafe { String::from_utf8_unchecked(s.to_vec()) }
199 }
200 #[cfg(feature = "bytes")]
201 CheetahString {
202 inner: InnerString::Bytes(b),
203 } => {
204 // SAFETY: Bytes variant should only be created from valid UTF-8 sources
205 unsafe { String::from_utf8_unchecked(b.to_vec()) }
206 }
207 }
208 }
209}
210
211impl Deref for CheetahString {
212 type Target = str;
213
214 #[inline]
215 fn deref(&self) -> &Self::Target {
216 self.as_str()
217 }
218}
219
220impl AsRef<str> for CheetahString {
221 #[inline]
222 fn as_ref(&self) -> &str {
223 self.as_str()
224 }
225}
226
227impl AsRef<[u8]> for CheetahString {
228 #[inline]
229 fn as_ref(&self) -> &[u8] {
230 self.as_bytes()
231 }
232}
233
234impl AsRef<CheetahString> for CheetahString {
235 #[inline]
236 fn as_ref(&self) -> &CheetahString {
237 self
238 }
239}
240
241impl From<&String> for CheetahString {
242 #[inline]
243 fn from(s: &String) -> Self {
244 CheetahString::from_slice(s)
245 }
246}
247
248impl CheetahString {
249 #[inline]
250 pub const fn empty() -> Self {
251 CheetahString {
252 inner: InnerString::Inline {
253 len: 0,
254 data: [0; INLINE_CAPACITY],
255 },
256 }
257 }
258
259 #[inline]
260 pub fn new() -> Self {
261 CheetahString::default()
262 }
263
264 #[inline]
265 pub const fn from_static_str(s: &'static str) -> Self {
266 CheetahString {
267 inner: InnerString::StaticStr(s),
268 }
269 }
270
271 #[inline]
272 pub fn from_vec(s: Vec<u8>) -> Self {
273 CheetahString {
274 inner: InnerString::ArcVecString(Arc::new(s)),
275 }
276 }
277
278 /// Creates a `CheetahString` from a byte vector with UTF-8 validation.
279 ///
280 /// # Errors
281 ///
282 /// Returns an error if the bytes are not valid UTF-8.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use cheetah_string::CheetahString;
288 ///
289 /// let bytes = vec![104, 101, 108, 108, 111]; // "hello"
290 /// let s = CheetahString::try_from_vec(bytes).unwrap();
291 /// assert_eq!(s, "hello");
292 ///
293 /// let invalid = vec![0xFF, 0xFE];
294 /// assert!(CheetahString::try_from_vec(invalid).is_err());
295 /// ```
296 pub fn try_from_vec(v: Vec<u8>) -> Result<Self, Utf8Error> {
297 // Validate UTF-8
298 std::str::from_utf8(&v)?;
299 Ok(CheetahString {
300 inner: InnerString::ArcVecString(Arc::new(v)),
301 })
302 }
303
304 /// Creates a `CheetahString` from a byte slice with UTF-8 validation.
305 ///
306 /// # Errors
307 ///
308 /// Returns an error if the bytes are not valid UTF-8.
309 ///
310 /// # Examples
311 ///
312 /// ```
313 /// use cheetah_string::CheetahString;
314 ///
315 /// let bytes = b"hello";
316 /// let s = CheetahString::try_from_bytes(bytes).unwrap();
317 /// assert_eq!(s, "hello");
318 ///
319 /// let invalid = &[0xFF, 0xFE];
320 /// assert!(CheetahString::try_from_bytes(invalid).is_err());
321 /// ```
322 pub fn try_from_bytes(b: &[u8]) -> Result<Self, Utf8Error> {
323 let s = std::str::from_utf8(b)?;
324 Ok(CheetahString::from_slice(s))
325 }
326
327 #[inline]
328 pub fn from_arc_vec(s: Arc<Vec<u8>>) -> Self {
329 CheetahString {
330 inner: InnerString::ArcVecString(s),
331 }
332 }
333
334 #[inline]
335 pub fn from_slice(s: &str) -> Self {
336 if s.len() <= INLINE_CAPACITY {
337 // Use inline storage for short strings
338 let mut data = [0u8; INLINE_CAPACITY];
339 data[..s.len()].copy_from_slice(s.as_bytes());
340 CheetahString {
341 inner: InnerString::Inline {
342 len: s.len() as u8,
343 data,
344 },
345 }
346 } else {
347 // Use Arc for long strings
348 CheetahString {
349 inner: InnerString::ArcString(Arc::new(s.to_owned())),
350 }
351 }
352 }
353
354 #[inline]
355 pub fn from_string(s: String) -> Self {
356 if s.len() <= INLINE_CAPACITY {
357 // Use inline storage for short strings
358 let mut data = [0u8; INLINE_CAPACITY];
359 data[..s.len()].copy_from_slice(s.as_bytes());
360 CheetahString {
361 inner: InnerString::Inline {
362 len: s.len() as u8,
363 data,
364 },
365 }
366 } else {
367 // Use Arc<str> for long strings to avoid double allocation
368 let arc_str: Arc<str> = s.into_boxed_str().into();
369 CheetahString {
370 inner: InnerString::ArcStr(arc_str),
371 }
372 }
373 }
374 #[inline]
375 pub fn from_arc_string(s: Arc<String>) -> Self {
376 CheetahString {
377 inner: InnerString::ArcString(s),
378 }
379 }
380
381 #[inline]
382 #[cfg(feature = "bytes")]
383 pub fn from_bytes(b: bytes::Bytes) -> Self {
384 CheetahString {
385 inner: InnerString::Bytes(b),
386 }
387 }
388
389 #[inline]
390 pub fn as_str(&self) -> &str {
391 match &self.inner {
392 InnerString::Inline { len, data } => {
393 // SAFETY: Inline strings are only created from valid UTF-8 sources.
394 // The data is always valid UTF-8 up to len bytes.
395 unsafe { std::str::from_utf8_unchecked(&data[..*len as usize]) }
396 }
397 InnerString::StaticStr(s) => s,
398 InnerString::ArcStr(s) => s.as_ref(),
399 InnerString::ArcString(s) => s.as_str(),
400 InnerString::ArcVecString(s) => {
401 // SAFETY: ArcVecString is only created from validated UTF-8 sources.
402 // All constructors ensure this invariant is maintained.
403 unsafe { std::str::from_utf8_unchecked(s.as_ref()) }
404 }
405 #[cfg(feature = "bytes")]
406 InnerString::Bytes(b) => {
407 // SAFETY: Bytes variant is only created from validated UTF-8 sources.
408 // The from_bytes constructor ensures this invariant.
409 unsafe { std::str::from_utf8_unchecked(b.as_ref()) }
410 }
411 }
412 }
413
414 #[inline]
415 pub fn as_bytes(&self) -> &[u8] {
416 match &self.inner {
417 InnerString::Inline { len, data } => &data[..*len as usize],
418 InnerString::StaticStr(s) => s.as_bytes(),
419 InnerString::ArcStr(s) => s.as_bytes(),
420 InnerString::ArcString(s) => s.as_bytes(),
421 InnerString::ArcVecString(s) => s.as_ref(),
422 #[cfg(feature = "bytes")]
423 InnerString::Bytes(b) => b.as_ref(),
424 }
425 }
426
427 #[inline]
428 pub fn len(&self) -> usize {
429 match &self.inner {
430 InnerString::Inline { len, .. } => *len as usize,
431 InnerString::StaticStr(s) => s.len(),
432 InnerString::ArcStr(s) => s.len(),
433 InnerString::ArcString(s) => s.len(),
434 InnerString::ArcVecString(s) => s.len(),
435 #[cfg(feature = "bytes")]
436 InnerString::Bytes(b) => b.len(),
437 }
438 }
439
440 #[inline]
441 pub fn is_empty(&self) -> bool {
442 match &self.inner {
443 InnerString::Inline { len, .. } => *len == 0,
444 InnerString::StaticStr(s) => s.is_empty(),
445 InnerString::ArcStr(s) => s.is_empty(),
446 InnerString::ArcString(s) => s.is_empty(),
447 InnerString::ArcVecString(s) => s.is_empty(),
448 #[cfg(feature = "bytes")]
449 InnerString::Bytes(b) => b.is_empty(),
450 }
451 }
452
453 // Query methods - delegate to &str
454
455 /// Returns `true` if the string starts with the given pattern.
456 ///
457 /// When the `simd` feature is enabled, this method uses SIMD instructions
458 /// for improved performance on longer patterns.
459 ///
460 /// # Examples
461 ///
462 /// ```
463 /// use cheetah_string::CheetahString;
464 ///
465 /// let s = CheetahString::from("hello world");
466 /// assert!(s.starts_with("hello"));
467 /// assert!(!s.starts_with("world"));
468 /// assert!(s.starts_with('h'));
469 /// ```
470 #[inline]
471 pub fn starts_with<P: StrPattern>(&self, pat: P) -> bool {
472 match pat.as_str_pattern() {
473 StrPatternImpl::Char(c) => self.as_str().starts_with(c),
474 StrPatternImpl::Str(s) => {
475 #[cfg(feature = "simd")]
476 {
477 crate::simd::starts_with_bytes(self.as_bytes(), s.as_bytes())
478 }
479 #[cfg(not(feature = "simd"))]
480 {
481 self.as_str().starts_with(s)
482 }
483 }
484 }
485 }
486
487 /// Returns `true` if the string starts with the given character.
488 ///
489 /// # Examples
490 ///
491 /// ```
492 /// use cheetah_string::CheetahString;
493 ///
494 /// let s = CheetahString::from("hello world");
495 /// assert!(s.starts_with_char('h'));
496 /// assert!(!s.starts_with_char('w'));
497 /// ```
498 #[inline]
499 pub fn starts_with_char(&self, pat: char) -> bool {
500 self.as_str().starts_with(pat)
501 }
502
503 /// Returns `true` if the string ends with the given pattern.
504 ///
505 /// When the `simd` feature is enabled, this method uses SIMD instructions
506 /// for improved performance on longer patterns.
507 ///
508 /// # Examples
509 ///
510 /// ```
511 /// use cheetah_string::CheetahString;
512 ///
513 /// let s = CheetahString::from("hello world");
514 /// assert!(s.ends_with("world"));
515 /// assert!(!s.ends_with("hello"));
516 /// assert!(s.ends_with('d'));
517 /// ```
518 #[inline]
519 pub fn ends_with<P: StrPattern>(&self, pat: P) -> bool {
520 match pat.as_str_pattern() {
521 StrPatternImpl::Char(c) => self.as_str().ends_with(c),
522 StrPatternImpl::Str(s) => {
523 #[cfg(feature = "simd")]
524 {
525 crate::simd::ends_with_bytes(self.as_bytes(), s.as_bytes())
526 }
527 #[cfg(not(feature = "simd"))]
528 {
529 self.as_str().ends_with(s)
530 }
531 }
532 }
533 }
534
535 /// Returns `true` if the string ends with the given character.
536 ///
537 /// # Examples
538 ///
539 /// ```
540 /// use cheetah_string::CheetahString;
541 ///
542 /// let s = CheetahString::from("hello world");
543 /// assert!(s.ends_with_char('d'));
544 /// assert!(!s.ends_with_char('h'));
545 /// ```
546 #[inline]
547 pub fn ends_with_char(&self, pat: char) -> bool {
548 self.as_str().ends_with(pat)
549 }
550
551 /// Returns `true` if the string contains the given pattern.
552 ///
553 /// When the `simd` feature is enabled, this method uses SIMD instructions
554 /// for improved performance on longer patterns.
555 ///
556 /// # Examples
557 ///
558 /// ```
559 /// use cheetah_string::CheetahString;
560 ///
561 /// let s = CheetahString::from("hello world");
562 /// assert!(s.contains("llo"));
563 /// assert!(!s.contains("xyz"));
564 /// assert!(s.contains('o'));
565 /// ```
566 #[inline]
567 pub fn contains<P: StrPattern>(&self, pat: P) -> bool {
568 match pat.as_str_pattern() {
569 StrPatternImpl::Char(c) => self.as_str().contains(c),
570 StrPatternImpl::Str(s) => {
571 #[cfg(feature = "simd")]
572 {
573 crate::simd::find_bytes(self.as_bytes(), s.as_bytes()).is_some()
574 }
575 #[cfg(not(feature = "simd"))]
576 {
577 self.as_str().contains(s)
578 }
579 }
580 }
581 }
582
583 /// Returns `true` if the string contains the given character.
584 ///
585 /// # Examples
586 ///
587 /// ```
588 /// use cheetah_string::CheetahString;
589 ///
590 /// let s = CheetahString::from("hello world");
591 /// assert!(s.contains_char('o'));
592 /// assert!(!s.contains_char('x'));
593 /// ```
594 #[inline]
595 pub fn contains_char(&self, pat: char) -> bool {
596 self.as_str().contains(pat)
597 }
598
599 /// Returns the byte index of the first occurrence of the pattern, or `None` if not found.
600 ///
601 /// When the `simd` feature is enabled, this method uses SIMD instructions
602 /// for improved performance on longer patterns.
603 ///
604 /// # Examples
605 ///
606 /// ```
607 /// use cheetah_string::CheetahString;
608 ///
609 /// let s = CheetahString::from("hello world");
610 /// assert_eq!(s.find("world"), Some(6));
611 /// assert_eq!(s.find("xyz"), None);
612 /// ```
613 #[inline]
614 pub fn find<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
615 let pat = pat.as_ref();
616 #[cfg(feature = "simd")]
617 {
618 crate::simd::find_bytes(self.as_bytes(), pat.as_bytes())
619 }
620 #[cfg(not(feature = "simd"))]
621 {
622 self.as_str().find(pat)
623 }
624 }
625
626 /// Returns the byte index of the last occurrence of the pattern, or `None` if not found.
627 ///
628 /// # Examples
629 ///
630 /// ```
631 /// use cheetah_string::CheetahString;
632 ///
633 /// let s = CheetahString::from("hello hello");
634 /// assert_eq!(s.rfind("hello"), Some(6));
635 /// ```
636 #[inline]
637 pub fn rfind<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
638 self.as_str().rfind(pat.as_ref())
639 }
640
641 /// Returns a string slice with leading and trailing whitespace removed.
642 ///
643 /// # Examples
644 ///
645 /// ```
646 /// use cheetah_string::CheetahString;
647 ///
648 /// let s = CheetahString::from(" hello ");
649 /// assert_eq!(s.trim(), "hello");
650 /// ```
651 #[inline]
652 pub fn trim(&self) -> &str {
653 self.as_str().trim()
654 }
655
656 /// Returns a string slice with leading whitespace removed.
657 ///
658 /// # Examples
659 ///
660 /// ```
661 /// use cheetah_string::CheetahString;
662 ///
663 /// let s = CheetahString::from(" hello");
664 /// assert_eq!(s.trim_start(), "hello");
665 /// ```
666 #[inline]
667 pub fn trim_start(&self) -> &str {
668 self.as_str().trim_start()
669 }
670
671 /// Returns a string slice with trailing whitespace removed.
672 ///
673 /// # Examples
674 ///
675 /// ```
676 /// use cheetah_string::CheetahString;
677 ///
678 /// let s = CheetahString::from("hello ");
679 /// assert_eq!(s.trim_end(), "hello");
680 /// ```
681 #[inline]
682 pub fn trim_end(&self) -> &str {
683 self.as_str().trim_end()
684 }
685
686 /// Splits the string by the given pattern.
687 ///
688 /// # Examples
689 ///
690 /// ```
691 /// use cheetah_string::CheetahString;
692 ///
693 /// let s = CheetahString::from("a,b,c");
694 /// let parts: Vec<&str> = s.split(",").collect();
695 /// assert_eq!(parts, vec!["a", "b", "c"]);
696 /// let parts2: Vec<&str> = s.split(',').collect();
697 /// assert_eq!(parts2, vec!["a", "b", "c"]);
698 /// ```
699 #[inline]
700 pub fn split<'a, P>(&'a self, pat: P) -> SplitWrapper<'a>
701 where
702 P: SplitPattern<'a>,
703 {
704 pat.split_str(self.as_str())
705 }
706
707 /// Returns an iterator over the lines of the string.
708 ///
709 /// # Examples
710 ///
711 /// ```
712 /// use cheetah_string::CheetahString;
713 ///
714 /// let s = CheetahString::from("line1\nline2\nline3");
715 /// let lines: Vec<&str> = s.lines().collect();
716 /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
717 /// ```
718 #[inline]
719 pub fn lines(&self) -> impl Iterator<Item = &str> {
720 self.as_str().lines()
721 }
722
723 /// Returns an iterator over the characters of the string.
724 ///
725 /// # Examples
726 ///
727 /// ```
728 /// use cheetah_string::CheetahString;
729 ///
730 /// let s = CheetahString::from("hello");
731 /// let chars: Vec<char> = s.chars().collect();
732 /// assert_eq!(chars, vec!['h', 'e', 'l', 'l', 'o']);
733 /// let reversed: Vec<char> = s.chars().rev().collect();
734 /// assert_eq!(reversed, vec!['o', 'l', 'l', 'e', 'h']);
735 /// ```
736 #[inline]
737 pub fn chars(&self) -> std::str::Chars<'_> {
738 self.as_str().chars()
739 }
740
741 // Transformation methods - create new CheetahString
742
743 /// Returns a new `CheetahString` with all characters converted to uppercase.
744 ///
745 /// # Examples
746 ///
747 /// ```
748 /// use cheetah_string::CheetahString;
749 ///
750 /// let s = CheetahString::from("hello");
751 /// assert_eq!(s.to_uppercase(), "HELLO");
752 /// ```
753 #[inline]
754 pub fn to_uppercase(&self) -> CheetahString {
755 CheetahString::from_string(self.as_str().to_uppercase())
756 }
757
758 /// Returns a new `CheetahString` with all characters converted to lowercase.
759 ///
760 /// # Examples
761 ///
762 /// ```
763 /// use cheetah_string::CheetahString;
764 ///
765 /// let s = CheetahString::from("HELLO");
766 /// assert_eq!(s.to_lowercase(), "hello");
767 /// ```
768 #[inline]
769 pub fn to_lowercase(&self) -> CheetahString {
770 CheetahString::from_string(self.as_str().to_lowercase())
771 }
772
773 /// Replaces all occurrences of a pattern with another string.
774 ///
775 /// # Examples
776 ///
777 /// ```
778 /// use cheetah_string::CheetahString;
779 ///
780 /// let s = CheetahString::from("hello world");
781 /// assert_eq!(s.replace("world", "rust"), "hello rust");
782 /// ```
783 #[inline]
784 pub fn replace<P: AsRef<str>>(&self, from: P, to: &str) -> CheetahString {
785 CheetahString::from_string(self.as_str().replace(from.as_ref(), to))
786 }
787
788 /// Returns a new `CheetahString` with the specified range replaced.
789 ///
790 /// # Examples
791 ///
792 /// ```
793 /// use cheetah_string::CheetahString;
794 ///
795 /// let s = CheetahString::from("hello world");
796 /// assert_eq!(s.replacen("l", "L", 1), "heLlo world");
797 /// ```
798 #[inline]
799 pub fn replacen<P: AsRef<str>>(&self, from: P, to: &str, count: usize) -> CheetahString {
800 CheetahString::from_string(self.as_str().replacen(from.as_ref(), to, count))
801 }
802
803 /// Returns a substring as a new `CheetahString`.
804 ///
805 /// # Panics
806 ///
807 /// Panics if the indices are not on valid UTF-8 character boundaries.
808 ///
809 /// # Examples
810 ///
811 /// ```
812 /// use cheetah_string::CheetahString;
813 ///
814 /// let s = CheetahString::from("hello world");
815 /// assert_eq!(s.substring(0, 5), "hello");
816 /// assert_eq!(s.substring(6, 11), "world");
817 /// ```
818 #[inline]
819 pub fn substring(&self, start: usize, end: usize) -> CheetahString {
820 CheetahString::from_slice(&self.as_str()[start..end])
821 }
822
823 /// Repeats the string `n` times.
824 ///
825 /// # Examples
826 ///
827 /// ```
828 /// use cheetah_string::CheetahString;
829 ///
830 /// let s = CheetahString::from("abc");
831 /// assert_eq!(s.repeat(3), "abcabcabc");
832 /// ```
833 #[inline]
834 pub fn repeat(&self, n: usize) -> CheetahString {
835 CheetahString::from_string(self.as_str().repeat(n))
836 }
837
838 // Incremental building methods
839
840 /// Creates a new `CheetahString` with the specified capacity.
841 ///
842 /// The string will be able to hold at least `capacity` bytes without reallocating.
843 /// If `capacity` is less than or equal to the inline capacity (23 bytes),
844 /// an empty inline string is returned.
845 ///
846 /// # Examples
847 ///
848 /// ```
849 /// use cheetah_string::CheetahString;
850 ///
851 /// let mut s = CheetahString::with_capacity(100);
852 /// s.push_str("hello");
853 /// assert_eq!(s, "hello");
854 /// ```
855 #[inline]
856 pub fn with_capacity(capacity: usize) -> Self {
857 if capacity <= INLINE_CAPACITY {
858 CheetahString::empty()
859 } else {
860 CheetahString::from_string(String::with_capacity(capacity))
861 }
862 }
863
864 /// Appends a string slice to the end of this `CheetahString`.
865 ///
866 /// This method is optimized for incremental building and will:
867 /// - Mutate inline storage when possible
868 /// - Mutate unique Arc<String> in-place when available
869 /// - Only allocate when necessary
870 ///
871 /// # Examples
872 ///
873 /// ```
874 /// use cheetah_string::CheetahString;
875 ///
876 /// let mut s = CheetahString::from("Hello");
877 /// s.push_str(" ");
878 /// s.push_str("World");
879 /// assert_eq!(s, "Hello World");
880 /// ```
881 #[inline]
882 pub fn push_str(&mut self, string: &str) {
883 *self += string;
884 }
885
886 /// Reserves capacity for at least `additional` more bytes.
887 ///
888 /// This method will modify the internal representation if needed to ensure
889 /// that the string can hold at least `additional` more bytes without reallocating.
890 ///
891 /// # Examples
892 ///
893 /// ```
894 /// use cheetah_string::CheetahString;
895 ///
896 /// let mut s = CheetahString::from("hello");
897 /// s.reserve(100);
898 /// s.push_str(" world");
899 /// ```
900 #[inline]
901 pub fn reserve(&mut self, additional: usize) {
902 let new_len = self.len() + additional;
903
904 // If it still fits inline, nothing to do
905 if new_len <= INLINE_CAPACITY {
906 return;
907 }
908
909 match &mut self.inner {
910 InnerString::Inline { .. } => {
911 // Convert inline to Arc<String> with capacity
912 let mut s = String::with_capacity(new_len);
913 s.push_str(self.as_str());
914 *self = CheetahString {
915 inner: InnerString::ArcString(Arc::new(s)),
916 };
917 }
918 InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
919 // Reserve in the unique Arc<String>
920 if let Some(s) = Arc::get_mut(arc) {
921 s.reserve(additional);
922 }
923 }
924 InnerString::StaticStr(_) | InnerString::ArcStr(_) => {
925 // Convert to Arc<String> with capacity
926 let mut s = String::with_capacity(new_len);
927 s.push_str(self.as_str());
928 *self = CheetahString {
929 inner: InnerString::ArcString(Arc::new(s)),
930 };
931 }
932 _ => {
933 // For shared Arc or other types, convert if needed
934 if Arc::strong_count(match &self.inner {
935 InnerString::ArcString(arc) => arc,
936 _ => return,
937 }) > 1
938 {
939 let mut s = String::with_capacity(new_len);
940 s.push_str(self.as_str());
941 *self = CheetahString {
942 inner: InnerString::ArcString(Arc::new(s)),
943 };
944 }
945 }
946 }
947 }
948}
949
950impl PartialEq for CheetahString {
951 #[inline]
952 fn eq(&self, other: &Self) -> bool {
953 #[cfg(feature = "simd")]
954 {
955 crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
956 }
957 #[cfg(not(feature = "simd"))]
958 {
959 self.as_str() == other.as_str()
960 }
961 }
962}
963
964impl PartialEq<str> for CheetahString {
965 #[inline]
966 fn eq(&self, other: &str) -> bool {
967 #[cfg(feature = "simd")]
968 {
969 crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
970 }
971 #[cfg(not(feature = "simd"))]
972 {
973 self.as_str() == other
974 }
975 }
976}
977
978impl PartialEq<String> for CheetahString {
979 #[inline]
980 fn eq(&self, other: &String) -> bool {
981 #[cfg(feature = "simd")]
982 {
983 crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
984 }
985 #[cfg(not(feature = "simd"))]
986 {
987 self.as_str() == other.as_str()
988 }
989 }
990}
991
992impl PartialEq<Vec<u8>> for CheetahString {
993 #[inline]
994 fn eq(&self, other: &Vec<u8>) -> bool {
995 self.as_bytes() == other.as_slice()
996 }
997}
998
999impl<'a> PartialEq<&'a str> for CheetahString {
1000 #[inline]
1001 fn eq(&self, other: &&'a str) -> bool {
1002 self.as_str() == *other
1003 }
1004}
1005
1006impl PartialEq<CheetahString> for str {
1007 #[inline]
1008 fn eq(&self, other: &CheetahString) -> bool {
1009 self == other.as_str()
1010 }
1011}
1012
1013impl PartialEq<CheetahString> for String {
1014 #[inline]
1015 fn eq(&self, other: &CheetahString) -> bool {
1016 self.as_str() == other.as_str()
1017 }
1018}
1019
1020impl PartialEq<CheetahString> for &str {
1021 #[inline]
1022 fn eq(&self, other: &CheetahString) -> bool {
1023 *self == other.as_str()
1024 }
1025}
1026
1027impl Eq for CheetahString {}
1028
1029impl PartialOrd for CheetahString {
1030 #[inline]
1031 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1032 Some(self.cmp(other))
1033 }
1034}
1035
1036impl Ord for CheetahString {
1037 #[inline]
1038 fn cmp(&self, other: &Self) -> Ordering {
1039 self.as_str().cmp(other.as_str())
1040 }
1041}
1042
1043impl Hash for CheetahString {
1044 #[inline]
1045 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1046 self.as_str().hash(state);
1047 }
1048}
1049
1050impl Display for CheetahString {
1051 #[inline]
1052 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1053 self.as_str().fmt(f)
1054 }
1055}
1056
1057impl std::fmt::Debug for CheetahString {
1058 #[inline]
1059 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1060 fmt::Debug::fmt(self.as_str(), f)
1061 }
1062}
1063
1064impl Borrow<str> for CheetahString {
1065 #[inline]
1066 fn borrow(&self) -> &str {
1067 self.as_str()
1068 }
1069}
1070
1071// Add trait implementations for string concatenation
1072
1073impl std::ops::Add<&str> for CheetahString {
1074 type Output = CheetahString;
1075
1076 /// Concatenates a `CheetahString` with a string slice.
1077 ///
1078 /// # Examples
1079 ///
1080 /// ```
1081 /// use cheetah_string::CheetahString;
1082 ///
1083 /// let s = CheetahString::from("Hello");
1084 /// let result = s + " World";
1085 /// assert_eq!(result, "Hello World");
1086 /// ```
1087 #[inline]
1088 fn add(self, rhs: &str) -> Self::Output {
1089 let total_len = self.len() + rhs.len();
1090
1091 // Fast path: result fits in inline storage
1092 if total_len <= INLINE_CAPACITY {
1093 let mut data = [0u8; INLINE_CAPACITY];
1094 let self_bytes = self.as_bytes();
1095 data[..self_bytes.len()].copy_from_slice(self_bytes);
1096 data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1097 return CheetahString {
1098 inner: InnerString::Inline {
1099 len: total_len as u8,
1100 data,
1101 },
1102 };
1103 }
1104
1105 // Slow path: allocate for long result
1106 let mut result = String::with_capacity(total_len);
1107 result.push_str(self.as_str());
1108 result.push_str(rhs);
1109 CheetahString::from_string(result)
1110 }
1111}
1112
1113impl std::ops::Add<&CheetahString> for CheetahString {
1114 type Output = CheetahString;
1115
1116 /// Concatenates two `CheetahString` values.
1117 ///
1118 /// # Examples
1119 ///
1120 /// ```
1121 /// use cheetah_string::CheetahString;
1122 ///
1123 /// let s1 = CheetahString::from("Hello");
1124 /// let s2 = CheetahString::from(" World");
1125 /// let result = s1 + &s2;
1126 /// assert_eq!(result, "Hello World");
1127 /// ```
1128 #[inline]
1129 fn add(self, rhs: &CheetahString) -> Self::Output {
1130 let total_len = self.len() + rhs.len();
1131
1132 // Fast path: result fits in inline storage
1133 if total_len <= INLINE_CAPACITY {
1134 let mut data = [0u8; INLINE_CAPACITY];
1135 let self_bytes = self.as_bytes();
1136 data[..self_bytes.len()].copy_from_slice(self_bytes);
1137 data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1138 return CheetahString {
1139 inner: InnerString::Inline {
1140 len: total_len as u8,
1141 data,
1142 },
1143 };
1144 }
1145
1146 // Slow path: allocate for long result
1147 let mut result = String::with_capacity(total_len);
1148 result.push_str(self.as_str());
1149 result.push_str(rhs.as_str());
1150 CheetahString::from_string(result)
1151 }
1152}
1153
1154impl std::ops::Add<String> for CheetahString {
1155 type Output = CheetahString;
1156
1157 /// Concatenates a `CheetahString` with a `String`.
1158 ///
1159 /// # Examples
1160 ///
1161 /// ```
1162 /// use cheetah_string::CheetahString;
1163 ///
1164 /// let s = CheetahString::from("Hello");
1165 /// let result = s + String::from(" World");
1166 /// assert_eq!(result, "Hello World");
1167 /// ```
1168 #[inline]
1169 fn add(self, rhs: String) -> Self::Output {
1170 let total_len = self.len() + rhs.len();
1171
1172 // Fast path: result fits in inline storage
1173 if total_len <= INLINE_CAPACITY {
1174 let mut data = [0u8; INLINE_CAPACITY];
1175 let self_bytes = self.as_bytes();
1176 data[..self_bytes.len()].copy_from_slice(self_bytes);
1177 data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1178 return CheetahString {
1179 inner: InnerString::Inline {
1180 len: total_len as u8,
1181 data,
1182 },
1183 };
1184 }
1185
1186 // Slow path: allocate for long result
1187 let mut result = String::with_capacity(total_len);
1188 result.push_str(self.as_str());
1189 result.push_str(&rhs);
1190 CheetahString::from_string(result)
1191 }
1192}
1193
1194impl std::ops::AddAssign<&str> for CheetahString {
1195 /// Appends a string slice to a `CheetahString`.
1196 ///
1197 /// # Examples
1198 ///
1199 /// ```
1200 /// use cheetah_string::CheetahString;
1201 ///
1202 /// let mut s = CheetahString::from("Hello");
1203 /// s += " World";
1204 /// assert_eq!(s, "Hello World");
1205 /// ```
1206 #[inline]
1207 fn add_assign(&mut self, rhs: &str) {
1208 let total_len = self.len() + rhs.len();
1209
1210 match &mut self.inner {
1211 // Fast path 1: Both self and result fit in inline storage
1212 InnerString::Inline { len, data } if total_len <= INLINE_CAPACITY => {
1213 // Mutate inline buffer directly
1214 data[*len as usize..total_len].copy_from_slice(rhs.as_bytes());
1215 *len = total_len as u8;
1216 return;
1217 }
1218 // Fast path 2: Self is unique Arc<String>, mutate in-place
1219 InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
1220 // SAFETY: strong_count == 1 guarantees exclusive access
1221 if let Some(s) = Arc::get_mut(arc) {
1222 s.push_str(rhs);
1223 return;
1224 }
1225 }
1226 _ => {}
1227 }
1228
1229 // Slow path: allocate new string
1230 let mut result = String::with_capacity(total_len);
1231 result.push_str(self.as_str());
1232 result.push_str(rhs);
1233 *self = CheetahString::from_string(result);
1234 }
1235}
1236
1237impl std::ops::AddAssign<&CheetahString> for CheetahString {
1238 /// Appends a `CheetahString` to another `CheetahString`.
1239 ///
1240 /// # Examples
1241 ///
1242 /// ```
1243 /// use cheetah_string::CheetahString;
1244 ///
1245 /// let mut s1 = CheetahString::from("Hello");
1246 /// let s2 = CheetahString::from(" World");
1247 /// s1 += &s2;
1248 /// assert_eq!(s1, "Hello World");
1249 /// ```
1250 #[inline]
1251 fn add_assign(&mut self, rhs: &CheetahString) {
1252 let total_len = self.len() + rhs.len();
1253
1254 match &mut self.inner {
1255 // Fast path 1: Both self and result fit in inline storage
1256 InnerString::Inline { len, data } if total_len <= INLINE_CAPACITY => {
1257 // Mutate inline buffer directly
1258 data[*len as usize..total_len].copy_from_slice(rhs.as_bytes());
1259 *len = total_len as u8;
1260 return;
1261 }
1262 // Fast path 2: Self is unique Arc<String>, mutate in-place
1263 InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
1264 // SAFETY: strong_count == 1 guarantees exclusive access
1265 if let Some(s) = Arc::get_mut(arc) {
1266 s.push_str(rhs.as_str());
1267 return;
1268 }
1269 }
1270 _ => {}
1271 }
1272
1273 // Slow path: allocate new string
1274 let mut result = String::with_capacity(total_len);
1275 result.push_str(self.as_str());
1276 result.push_str(rhs.as_str());
1277 *self = CheetahString::from_string(result);
1278 }
1279}
1280
1281/// Maximum capacity for inline string storage (23 bytes + 1 byte for length = 24 bytes total)
1282const INLINE_CAPACITY: usize = 23;
1283
1284/// The `InnerString` enum represents different types of string storage.
1285///
1286/// This enum uses Small String Optimization (SSO) to avoid heap allocations for short strings.
1287///
1288/// Variants:
1289///
1290/// * `Inline` - Inline storage for strings <= 23 bytes (zero heap allocations).
1291/// * `StaticStr(&'static str)` - A static string slice (zero heap allocations).
1292/// * `ArcStr(Arc<str>)` - A reference-counted string slice (single heap allocation, optimized).
1293/// * `ArcString(Arc<String>)` - A reference-counted string (for backwards compatibility).
1294/// * `ArcVecString(Arc<Vec<u8>>)` - A reference-counted byte vector.
1295/// * `Bytes(bytes::Bytes)` - A byte buffer (available when the "bytes" feature is enabled).
1296#[derive(Clone)]
1297pub(super) enum InnerString {
1298 /// Inline storage for short strings (up to 23 bytes).
1299 /// Stores the length and data directly without heap allocation.
1300 Inline {
1301 len: u8,
1302 data: [u8; INLINE_CAPACITY],
1303 },
1304 /// Static string slice with 'static lifetime.
1305 StaticStr(&'static str),
1306 /// Reference-counted string slice (single heap allocation).
1307 /// Preferred over ArcString for long strings created from owned data.
1308 ArcStr(Arc<str>),
1309 /// Reference-counted heap-allocated string.
1310 /// Kept for backwards compatibility and when Arc<String> is explicitly provided.
1311 ArcString(Arc<String>),
1312 /// Reference-counted heap-allocated byte vector.
1313 ArcVecString(Arc<Vec<u8>>),
1314 /// Bytes type integration (requires "bytes" feature).
1315 #[cfg(feature = "bytes")]
1316 Bytes(bytes::Bytes),
1317}
1318
1319// Sealed trait pattern to support both &str and char in starts_with/ends_with/contains
1320mod private {
1321 pub trait Sealed {}
1322 impl Sealed for char {}
1323 impl Sealed for &str {}
1324 impl Sealed for &String {}
1325
1326 pub trait SplitSealed {}
1327 impl SplitSealed for char {}
1328 impl SplitSealed for &str {}
1329}
1330
1331/// A pattern that can be used with `starts_with` and `ends_with` methods.
1332pub trait StrPattern: private::Sealed {
1333 #[doc(hidden)]
1334 fn as_str_pattern(&self) -> StrPatternImpl<'_>;
1335}
1336
1337#[doc(hidden)]
1338pub enum StrPatternImpl<'a> {
1339 Char(char),
1340 Str(&'a str),
1341}
1342
1343impl StrPattern for char {
1344 fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1345 StrPatternImpl::Char(*self)
1346 }
1347}
1348
1349impl StrPattern for &str {
1350 fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1351 StrPatternImpl::Str(self)
1352 }
1353}
1354
1355impl StrPattern for &String {
1356 fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1357 StrPatternImpl::Str(self.as_str())
1358 }
1359}
1360
1361/// A pattern that can be used with `split` method.
1362pub trait SplitPattern<'a>: private::SplitSealed {
1363 #[doc(hidden)]
1364 fn split_str(self, s: &'a str) -> SplitWrapper<'a>;
1365}
1366
1367impl SplitPattern<'_> for char {
1368 fn split_str(self, s: &str) -> SplitWrapper<'_> {
1369 SplitWrapper::Char(s.split(self))
1370 }
1371}
1372
1373impl<'a> SplitPattern<'a> for &'a str {
1374 fn split_str(self, s: &'a str) -> SplitWrapper<'a> {
1375 let empty_pattern_state = if self.is_empty() {
1376 Some(EmptyPatternState {
1377 chars: s.char_indices(),
1378 original: s,
1379 started: false,
1380 })
1381 } else {
1382 None
1383 };
1384
1385 SplitWrapper::Str(SplitStr {
1386 string: s,
1387 pattern: self,
1388 finished: false,
1389 empty_pattern_state,
1390 })
1391 }
1392}
1393
1394/// Helper struct for splitting strings by a string pattern
1395pub struct SplitStr<'a> {
1396 string: &'a str,
1397 pattern: &'a str,
1398 finished: bool,
1399 /// For empty pattern, we need to iterate over chars
1400 empty_pattern_state: Option<EmptyPatternState<'a>>,
1401}
1402
1403#[derive(Clone)]
1404struct EmptyPatternState<'a> {
1405 chars: std::str::CharIndices<'a>,
1406 original: &'a str,
1407 started: bool,
1408}
1409
1410impl<'a> Iterator for SplitStr<'a> {
1411 type Item = &'a str;
1412
1413 fn next(&mut self) -> Option<Self::Item> {
1414 if self.finished {
1415 return None;
1416 }
1417
1418 // Handle empty pattern case (split between every character)
1419 if self.pattern.is_empty() {
1420 if let Some(ref mut state) = self.empty_pattern_state {
1421 if !state.started {
1422 state.started = true;
1423 // First element is always empty string before first char
1424 return Some("");
1425 }
1426
1427 match state.chars.next() {
1428 Some((pos, ch)) => {
1429 let char_end = pos + ch.len_utf8();
1430 let result = &state.original[pos..char_end];
1431 Some(result)
1432 }
1433 None => {
1434 self.finished = true;
1435 // Last element is empty string after last char
1436 Some("")
1437 }
1438 }
1439 } else {
1440 unreachable!("empty_pattern_state should be Some for empty pattern")
1441 }
1442 } else {
1443 // Normal case: non-empty pattern
1444 match self.string.find(self.pattern) {
1445 Some(pos) => {
1446 let result = &self.string[..pos];
1447 self.string = &self.string[pos + self.pattern.len()..];
1448 Some(result)
1449 }
1450 None => {
1451 self.finished = true;
1452 Some(self.string)
1453 }
1454 }
1455 }
1456 }
1457}
1458
1459/// Wrapper for split iterator that supports both char and str patterns
1460pub enum SplitWrapper<'a> {
1461 #[doc(hidden)]
1462 Char(std::str::Split<'a, char>),
1463 #[doc(hidden)]
1464 Str(SplitStr<'a>),
1465}
1466
1467impl<'a> Iterator for SplitWrapper<'a> {
1468 type Item = &'a str;
1469
1470 fn next(&mut self) -> Option<Self::Item> {
1471 match self {
1472 SplitWrapper::Char(iter) => iter.next(),
1473 SplitWrapper::Str(iter) => iter.next(),
1474 }
1475 }
1476}
1477
1478impl<'a> DoubleEndedIterator for SplitWrapper<'a> {
1479 fn next_back(&mut self) -> Option<Self::Item> {
1480 match self {
1481 SplitWrapper::Char(iter) => iter.next_back(),
1482 SplitWrapper::Str(_) => {
1483 // String pattern split doesn't support reverse iteration
1484 // This is consistent with std::str::Split<&str>
1485 panic!("split with string pattern does not support reverse iteration")
1486 }
1487 }
1488 }
1489}