cheetah_string/
cheetah_string.rs

1use core::fmt;
2use core::str::Utf8Error;
3use std::borrow::{Borrow, Cow};
4use std::cmp::Ordering;
5use std::fmt::Display;
6use std::hash::Hash;
7use std::ops::Deref;
8use std::str::FromStr;
9use std::sync::Arc;
10
11#[derive(Clone)]
12#[repr(transparent)]
13pub struct CheetahString {
14    pub(super) inner: InnerString,
15}
16
17impl Default for CheetahString {
18    fn default() -> Self {
19        CheetahString {
20            inner: InnerString::Inline {
21                len: 0,
22                data: [0; INLINE_CAPACITY],
23            },
24        }
25    }
26}
27
28impl From<String> for CheetahString {
29    #[inline]
30    fn from(s: String) -> Self {
31        CheetahString::from_string(s)
32    }
33}
34
35impl From<Arc<String>> for CheetahString {
36    #[inline]
37    fn from(s: Arc<String>) -> Self {
38        CheetahString::from_arc_string(s)
39    }
40}
41
42impl<'a> From<&'a str> for CheetahString {
43    #[inline]
44    fn from(s: &'a str) -> Self {
45        CheetahString::from_slice(s)
46    }
47}
48
49/// # Safety Warning
50///
51/// This implementation uses `unsafe` code and may cause undefined behavior
52/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_bytes()`
53/// for safe UTF-8 validation.
54///
55/// This implementation will be deprecated in a future version.
56impl From<&[u8]> for CheetahString {
57    #[inline]
58    fn from(b: &[u8]) -> Self {
59        // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
60        // This will be deprecated in favor of try_from_bytes in the next version.
61        CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(b) })
62    }
63}
64
65impl FromStr for CheetahString {
66    type Err = std::string::ParseError;
67    #[inline]
68    fn from_str(s: &str) -> Result<Self, Self::Err> {
69        Ok(CheetahString::from_slice(s))
70    }
71}
72
73/// # Safety Warning
74///
75/// This implementation uses `unsafe` code and may cause undefined behavior
76/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_vec()`
77/// for safe UTF-8 validation.
78///
79/// This implementation will be deprecated in a future version.
80impl From<Vec<u8>> for CheetahString {
81    #[inline]
82    fn from(v: Vec<u8>) -> Self {
83        // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
84        // This will be deprecated in favor of try_from_vec in the next version.
85        CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(&v) })
86    }
87}
88
89impl From<Cow<'static, str>> for CheetahString {
90    #[inline]
91    fn from(cow: Cow<'static, str>) -> Self {
92        match cow {
93            Cow::Borrowed(s) => CheetahString::from_static_str(s),
94            Cow::Owned(s) => CheetahString::from_string(s),
95        }
96    }
97}
98
99impl From<Cow<'_, String>> for CheetahString {
100    #[inline]
101    fn from(cow: Cow<'_, String>) -> Self {
102        match cow {
103            Cow::Borrowed(s) => CheetahString::from_slice(s),
104            Cow::Owned(s) => CheetahString::from_string(s),
105        }
106    }
107}
108
109impl From<char> for CheetahString {
110    /// Allocates an owned [`CheetahString`] from a single character.
111    ///
112    /// # Example
113    /// ```rust
114    /// use cheetah_string::CheetahString;
115    /// let c: char = 'a';
116    /// let s: CheetahString = CheetahString::from(c);
117    /// assert_eq!("a", &s[..]);
118    /// ```
119    #[inline]
120    fn from(c: char) -> Self {
121        CheetahString::from_string(c.to_string())
122    }
123}
124
125impl<'a> FromIterator<&'a char> for CheetahString {
126    #[inline]
127    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> CheetahString {
128        let mut buf = String::new();
129        buf.extend(iter);
130        CheetahString::from_string(buf)
131    }
132}
133
134impl<'a> FromIterator<&'a str> for CheetahString {
135    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> CheetahString {
136        let mut buf = String::new();
137        buf.extend(iter);
138        CheetahString::from_string(buf)
139    }
140}
141
142impl FromIterator<String> for CheetahString {
143    #[inline]
144    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
145        let mut buf = String::new();
146        buf.extend(iter);
147        CheetahString::from_string(buf)
148    }
149}
150
151impl<'a> FromIterator<&'a String> for CheetahString {
152    #[inline]
153    fn from_iter<T: IntoIterator<Item = &'a String>>(iter: T) -> Self {
154        let mut buf = String::new();
155        buf.extend(iter.into_iter().map(|s| s.as_str()));
156        CheetahString::from_string(buf)
157    }
158}
159
160#[cfg(feature = "bytes")]
161impl From<bytes::Bytes> for CheetahString {
162    #[inline]
163    fn from(b: bytes::Bytes) -> Self {
164        CheetahString::from_bytes(b)
165    }
166}
167
168impl From<&CheetahString> for CheetahString {
169    #[inline]
170    fn from(s: &CheetahString) -> Self {
171        s.clone()
172    }
173}
174
175impl From<CheetahString> for String {
176    #[inline]
177    fn from(s: CheetahString) -> Self {
178        match s {
179            CheetahString {
180                inner: InnerString::Inline { len, data },
181            } => {
182                // SAFETY: Inline strings are always valid UTF-8
183                unsafe { String::from_utf8_unchecked(data[..len as usize].to_vec()) }
184            }
185            CheetahString {
186                inner: InnerString::StaticStr(s),
187            } => s.to_string(),
188            CheetahString {
189                inner: InnerString::ArcStr(s),
190            } => s.to_string(),
191            CheetahString {
192                inner: InnerString::ArcString(s),
193            } => s.as_ref().clone(),
194            CheetahString {
195                inner: InnerString::ArcVecString(s),
196            } => {
197                // SAFETY: ArcVecString should only be created from valid UTF-8 sources
198                unsafe { String::from_utf8_unchecked(s.to_vec()) }
199            }
200            #[cfg(feature = "bytes")]
201            CheetahString {
202                inner: InnerString::Bytes(b),
203            } => {
204                // SAFETY: Bytes variant should only be created from valid UTF-8 sources
205                unsafe { String::from_utf8_unchecked(b.to_vec()) }
206            }
207        }
208    }
209}
210
211impl Deref for CheetahString {
212    type Target = str;
213
214    #[inline]
215    fn deref(&self) -> &Self::Target {
216        self.as_str()
217    }
218}
219
220impl AsRef<str> for CheetahString {
221    #[inline]
222    fn as_ref(&self) -> &str {
223        self.as_str()
224    }
225}
226
227impl AsRef<[u8]> for CheetahString {
228    #[inline]
229    fn as_ref(&self) -> &[u8] {
230        self.as_bytes()
231    }
232}
233
234impl AsRef<CheetahString> for CheetahString {
235    #[inline]
236    fn as_ref(&self) -> &CheetahString {
237        self
238    }
239}
240
241impl From<&String> for CheetahString {
242    #[inline]
243    fn from(s: &String) -> Self {
244        CheetahString::from_slice(s)
245    }
246}
247
248impl CheetahString {
249    #[inline]
250    pub const fn empty() -> Self {
251        CheetahString {
252            inner: InnerString::Inline {
253                len: 0,
254                data: [0; INLINE_CAPACITY],
255            },
256        }
257    }
258
259    #[inline]
260    pub fn new() -> Self {
261        CheetahString::default()
262    }
263
264    #[inline]
265    pub const fn from_static_str(s: &'static str) -> Self {
266        CheetahString {
267            inner: InnerString::StaticStr(s),
268        }
269    }
270
271    #[inline]
272    pub fn from_vec(s: Vec<u8>) -> Self {
273        CheetahString {
274            inner: InnerString::ArcVecString(Arc::new(s)),
275        }
276    }
277
278    /// Creates a `CheetahString` from a byte vector with UTF-8 validation.
279    ///
280    /// # Errors
281    ///
282    /// Returns an error if the bytes are not valid UTF-8.
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// use cheetah_string::CheetahString;
288    ///
289    /// let bytes = vec![104, 101, 108, 108, 111]; // "hello"
290    /// let s = CheetahString::try_from_vec(bytes).unwrap();
291    /// assert_eq!(s, "hello");
292    ///
293    /// let invalid = vec![0xFF, 0xFE];
294    /// assert!(CheetahString::try_from_vec(invalid).is_err());
295    /// ```
296    pub fn try_from_vec(v: Vec<u8>) -> Result<Self, Utf8Error> {
297        // Validate UTF-8
298        std::str::from_utf8(&v)?;
299        Ok(CheetahString {
300            inner: InnerString::ArcVecString(Arc::new(v)),
301        })
302    }
303
304    /// Creates a `CheetahString` from a byte slice with UTF-8 validation.
305    ///
306    /// # Errors
307    ///
308    /// Returns an error if the bytes are not valid UTF-8.
309    ///
310    /// # Examples
311    ///
312    /// ```
313    /// use cheetah_string::CheetahString;
314    ///
315    /// let bytes = b"hello";
316    /// let s = CheetahString::try_from_bytes(bytes).unwrap();
317    /// assert_eq!(s, "hello");
318    ///
319    /// let invalid = &[0xFF, 0xFE];
320    /// assert!(CheetahString::try_from_bytes(invalid).is_err());
321    /// ```
322    pub fn try_from_bytes(b: &[u8]) -> Result<Self, Utf8Error> {
323        let s = std::str::from_utf8(b)?;
324        Ok(CheetahString::from_slice(s))
325    }
326
327    #[inline]
328    pub fn from_arc_vec(s: Arc<Vec<u8>>) -> Self {
329        CheetahString {
330            inner: InnerString::ArcVecString(s),
331        }
332    }
333
334    #[inline]
335    pub fn from_slice(s: &str) -> Self {
336        if s.len() <= INLINE_CAPACITY {
337            // Use inline storage for short strings
338            let mut data = [0u8; INLINE_CAPACITY];
339            data[..s.len()].copy_from_slice(s.as_bytes());
340            CheetahString {
341                inner: InnerString::Inline {
342                    len: s.len() as u8,
343                    data,
344                },
345            }
346        } else {
347            // Use Arc for long strings
348            CheetahString {
349                inner: InnerString::ArcString(Arc::new(s.to_owned())),
350            }
351        }
352    }
353
354    #[inline]
355    pub fn from_string(s: String) -> Self {
356        if s.len() <= INLINE_CAPACITY {
357            // Use inline storage for short strings
358            let mut data = [0u8; INLINE_CAPACITY];
359            data[..s.len()].copy_from_slice(s.as_bytes());
360            CheetahString {
361                inner: InnerString::Inline {
362                    len: s.len() as u8,
363                    data,
364                },
365            }
366        } else {
367            // Use Arc<str> for long strings to avoid double allocation
368            let arc_str: Arc<str> = s.into_boxed_str().into();
369            CheetahString {
370                inner: InnerString::ArcStr(arc_str),
371            }
372        }
373    }
374    #[inline]
375    pub fn from_arc_string(s: Arc<String>) -> Self {
376        CheetahString {
377            inner: InnerString::ArcString(s),
378        }
379    }
380
381    #[inline]
382    #[cfg(feature = "bytes")]
383    pub fn from_bytes(b: bytes::Bytes) -> Self {
384        CheetahString {
385            inner: InnerString::Bytes(b),
386        }
387    }
388
389    #[inline]
390    pub fn as_str(&self) -> &str {
391        match &self.inner {
392            InnerString::Inline { len, data } => {
393                // SAFETY: Inline strings are only created from valid UTF-8 sources.
394                // The data is always valid UTF-8 up to len bytes.
395                unsafe { std::str::from_utf8_unchecked(&data[..*len as usize]) }
396            }
397            InnerString::StaticStr(s) => s,
398            InnerString::ArcStr(s) => s.as_ref(),
399            InnerString::ArcString(s) => s.as_str(),
400            InnerString::ArcVecString(s) => {
401                // SAFETY: ArcVecString is only created from validated UTF-8 sources.
402                // All constructors ensure this invariant is maintained.
403                unsafe { std::str::from_utf8_unchecked(s.as_ref()) }
404            }
405            #[cfg(feature = "bytes")]
406            InnerString::Bytes(b) => {
407                // SAFETY: Bytes variant is only created from validated UTF-8 sources.
408                // The from_bytes constructor ensures this invariant.
409                unsafe { std::str::from_utf8_unchecked(b.as_ref()) }
410            }
411        }
412    }
413
414    #[inline]
415    pub fn as_bytes(&self) -> &[u8] {
416        match &self.inner {
417            InnerString::Inline { len, data } => &data[..*len as usize],
418            InnerString::StaticStr(s) => s.as_bytes(),
419            InnerString::ArcStr(s) => s.as_bytes(),
420            InnerString::ArcString(s) => s.as_bytes(),
421            InnerString::ArcVecString(s) => s.as_ref(),
422            #[cfg(feature = "bytes")]
423            InnerString::Bytes(b) => b.as_ref(),
424        }
425    }
426
427    #[inline]
428    pub fn len(&self) -> usize {
429        match &self.inner {
430            InnerString::Inline { len, .. } => *len as usize,
431            InnerString::StaticStr(s) => s.len(),
432            InnerString::ArcStr(s) => s.len(),
433            InnerString::ArcString(s) => s.len(),
434            InnerString::ArcVecString(s) => s.len(),
435            #[cfg(feature = "bytes")]
436            InnerString::Bytes(b) => b.len(),
437        }
438    }
439
440    #[inline]
441    pub fn is_empty(&self) -> bool {
442        match &self.inner {
443            InnerString::Inline { len, .. } => *len == 0,
444            InnerString::StaticStr(s) => s.is_empty(),
445            InnerString::ArcStr(s) => s.is_empty(),
446            InnerString::ArcString(s) => s.is_empty(),
447            InnerString::ArcVecString(s) => s.is_empty(),
448            #[cfg(feature = "bytes")]
449            InnerString::Bytes(b) => b.is_empty(),
450        }
451    }
452
453    // Query methods - delegate to &str
454
455    /// Returns `true` if the string starts with the given pattern.
456    ///
457    /// When the `simd` feature is enabled, this method uses SIMD instructions
458    /// for improved performance on longer patterns.
459    ///
460    /// # Examples
461    ///
462    /// ```
463    /// use cheetah_string::CheetahString;
464    ///
465    /// let s = CheetahString::from("hello world");
466    /// assert!(s.starts_with("hello"));
467    /// assert!(!s.starts_with("world"));
468    /// assert!(s.starts_with('h'));
469    /// ```
470    #[inline]
471    pub fn starts_with<P: StrPattern>(&self, pat: P) -> bool {
472        match pat.as_str_pattern() {
473            StrPatternImpl::Char(c) => self.as_str().starts_with(c),
474            StrPatternImpl::Str(s) => {
475                #[cfg(all(feature = "simd", target_arch = "x86_64"))]
476                {
477                    crate::simd::starts_with_bytes(self.as_bytes(), s.as_bytes())
478                }
479                #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
480                {
481                    self.as_str().starts_with(s)
482                }
483            }
484        }
485    }
486
487    /// Returns `true` if the string starts with the given character.
488    ///
489    /// # Examples
490    ///
491    /// ```
492    /// use cheetah_string::CheetahString;
493    ///
494    /// let s = CheetahString::from("hello world");
495    /// assert!(s.starts_with_char('h'));
496    /// assert!(!s.starts_with_char('w'));
497    /// ```
498    #[inline]
499    pub fn starts_with_char(&self, pat: char) -> bool {
500        self.as_str().starts_with(pat)
501    }
502
503    /// Returns `true` if the string ends with the given pattern.
504    ///
505    /// When the `simd` feature is enabled, this method uses SIMD instructions
506    /// for improved performance on longer patterns.
507    ///
508    /// # Examples
509    ///
510    /// ```
511    /// use cheetah_string::CheetahString;
512    ///
513    /// let s = CheetahString::from("hello world");
514    /// assert!(s.ends_with("world"));
515    /// assert!(!s.ends_with("hello"));
516    /// assert!(s.ends_with('d'));
517    /// ```
518    #[inline]
519    pub fn ends_with<P: StrPattern>(&self, pat: P) -> bool {
520        match pat.as_str_pattern() {
521            StrPatternImpl::Char(c) => self.as_str().ends_with(c),
522            StrPatternImpl::Str(s) => {
523                #[cfg(all(feature = "simd", target_arch = "x86_64"))]
524                {
525                    crate::simd::ends_with_bytes(self.as_bytes(), s.as_bytes())
526                }
527                #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
528                {
529                    self.as_str().ends_with(s)
530                }
531            }
532        }
533    }
534
535    /// Returns `true` if the string ends with the given character.
536    ///
537    /// # Examples
538    ///
539    /// ```
540    /// use cheetah_string::CheetahString;
541    ///
542    /// let s = CheetahString::from("hello world");
543    /// assert!(s.ends_with_char('d'));
544    /// assert!(!s.ends_with_char('h'));
545    /// ```
546    #[inline]
547    pub fn ends_with_char(&self, pat: char) -> bool {
548        self.as_str().ends_with(pat)
549    }
550
551    /// Returns `true` if the string contains the given pattern.
552    ///
553    /// When the `simd` feature is enabled, this method uses SIMD instructions
554    /// for improved performance on longer patterns.
555    ///
556    /// # Examples
557    ///
558    /// ```
559    /// use cheetah_string::CheetahString;
560    ///
561    /// let s = CheetahString::from("hello world");
562    /// assert!(s.contains("llo"));
563    /// assert!(!s.contains("xyz"));
564    /// assert!(s.contains('o'));
565    /// ```
566    #[inline]
567    pub fn contains<P: StrPattern>(&self, pat: P) -> bool {
568        match pat.as_str_pattern() {
569            StrPatternImpl::Char(c) => self.as_str().contains(c),
570            StrPatternImpl::Str(s) => {
571                #[cfg(all(feature = "simd", target_arch = "x86_64"))]
572                {
573                    crate::simd::find_bytes(self.as_bytes(), s.as_bytes()).is_some()
574                }
575                #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
576                {
577                    self.as_str().contains(s)
578                }
579            }
580        }
581    }
582
583    /// Returns `true` if the string contains the given character.
584    ///
585    /// # Examples
586    ///
587    /// ```
588    /// use cheetah_string::CheetahString;
589    ///
590    /// let s = CheetahString::from("hello world");
591    /// assert!(s.contains_char('o'));
592    /// assert!(!s.contains_char('x'));
593    /// ```
594    #[inline]
595    pub fn contains_char(&self, pat: char) -> bool {
596        self.as_str().contains(pat)
597    }
598
599    /// Returns the byte index of the first occurrence of the pattern, or `None` if not found.
600    ///
601    /// When the `simd` feature is enabled, this method uses SIMD instructions
602    /// for improved performance on longer patterns.
603    ///
604    /// # Examples
605    ///
606    /// ```
607    /// use cheetah_string::CheetahString;
608    ///
609    /// let s = CheetahString::from("hello world");
610    /// assert_eq!(s.find("world"), Some(6));
611    /// assert_eq!(s.find("xyz"), None);
612    /// ```
613    #[inline]
614    pub fn find<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
615        let pat = pat.as_ref();
616        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
617        {
618            crate::simd::find_bytes(self.as_bytes(), pat.as_bytes())
619        }
620        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
621        {
622            self.as_str().find(pat)
623        }
624    }
625
626    /// Returns the byte index of the last occurrence of the pattern, or `None` if not found.
627    ///
628    /// # Examples
629    ///
630    /// ```
631    /// use cheetah_string::CheetahString;
632    ///
633    /// let s = CheetahString::from("hello hello");
634    /// assert_eq!(s.rfind("hello"), Some(6));
635    /// ```
636    #[inline]
637    pub fn rfind<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
638        self.as_str().rfind(pat.as_ref())
639    }
640
641    /// Returns a string slice with leading and trailing whitespace removed.
642    ///
643    /// # Examples
644    ///
645    /// ```
646    /// use cheetah_string::CheetahString;
647    ///
648    /// let s = CheetahString::from("  hello  ");
649    /// assert_eq!(s.trim(), "hello");
650    /// ```
651    #[inline]
652    pub fn trim(&self) -> &str {
653        self.as_str().trim()
654    }
655
656    /// Returns a string slice with leading whitespace removed.
657    ///
658    /// # Examples
659    ///
660    /// ```
661    /// use cheetah_string::CheetahString;
662    ///
663    /// let s = CheetahString::from("  hello");
664    /// assert_eq!(s.trim_start(), "hello");
665    /// ```
666    #[inline]
667    pub fn trim_start(&self) -> &str {
668        self.as_str().trim_start()
669    }
670
671    /// Returns a string slice with trailing whitespace removed.
672    ///
673    /// # Examples
674    ///
675    /// ```
676    /// use cheetah_string::CheetahString;
677    ///
678    /// let s = CheetahString::from("hello  ");
679    /// assert_eq!(s.trim_end(), "hello");
680    /// ```
681    #[inline]
682    pub fn trim_end(&self) -> &str {
683        self.as_str().trim_end()
684    }
685
686    /// Splits the string by the given pattern.
687    ///
688    /// # Examples
689    ///
690    /// ```
691    /// use cheetah_string::CheetahString;
692    ///
693    /// let s = CheetahString::from("a,b,c");
694    /// let parts: Vec<&str> = s.split(",").collect();
695    /// assert_eq!(parts, vec!["a", "b", "c"]);
696    /// let parts2: Vec<&str> = s.split(',').collect();
697    /// assert_eq!(parts2, vec!["a", "b", "c"]);
698    /// ```
699    #[inline]
700    pub fn split<'a, P>(&'a self, pat: P) -> SplitWrapper<'a>
701    where
702        P: SplitPattern<'a>,
703    {
704        pat.split_str(self.as_str())
705    }
706
707    /// Returns an iterator over the lines of the string.
708    ///
709    /// # Examples
710    ///
711    /// ```
712    /// use cheetah_string::CheetahString;
713    ///
714    /// let s = CheetahString::from("line1\nline2\nline3");
715    /// let lines: Vec<&str> = s.lines().collect();
716    /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
717    /// ```
718    #[inline]
719    pub fn lines(&self) -> impl Iterator<Item = &str> {
720        self.as_str().lines()
721    }
722
723    /// Returns an iterator over the characters of the string.
724    ///
725    /// # Examples
726    ///
727    /// ```
728    /// use cheetah_string::CheetahString;
729    ///
730    /// let s = CheetahString::from("hello");
731    /// let chars: Vec<char> = s.chars().collect();
732    /// assert_eq!(chars, vec!['h', 'e', 'l', 'l', 'o']);
733    /// let reversed: Vec<char> = s.chars().rev().collect();
734    /// assert_eq!(reversed, vec!['o', 'l', 'l', 'e', 'h']);
735    /// ```
736    #[inline]
737    pub fn chars(&self) -> std::str::Chars<'_> {
738        self.as_str().chars()
739    }
740
741    // Transformation methods - create new CheetahString
742
743    /// Returns a new `CheetahString` with all characters converted to uppercase.
744    ///
745    /// # Examples
746    ///
747    /// ```
748    /// use cheetah_string::CheetahString;
749    ///
750    /// let s = CheetahString::from("hello");
751    /// assert_eq!(s.to_uppercase(), "HELLO");
752    /// ```
753    #[inline]
754    pub fn to_uppercase(&self) -> CheetahString {
755        CheetahString::from_string(self.as_str().to_uppercase())
756    }
757
758    /// Returns a new `CheetahString` with all characters converted to lowercase.
759    ///
760    /// # Examples
761    ///
762    /// ```
763    /// use cheetah_string::CheetahString;
764    ///
765    /// let s = CheetahString::from("HELLO");
766    /// assert_eq!(s.to_lowercase(), "hello");
767    /// ```
768    #[inline]
769    pub fn to_lowercase(&self) -> CheetahString {
770        CheetahString::from_string(self.as_str().to_lowercase())
771    }
772
773    /// Replaces all occurrences of a pattern with another string.
774    ///
775    /// # Examples
776    ///
777    /// ```
778    /// use cheetah_string::CheetahString;
779    ///
780    /// let s = CheetahString::from("hello world");
781    /// assert_eq!(s.replace("world", "rust"), "hello rust");
782    /// ```
783    #[inline]
784    pub fn replace<P: AsRef<str>>(&self, from: P, to: &str) -> CheetahString {
785        CheetahString::from_string(self.as_str().replace(from.as_ref(), to))
786    }
787
788    /// Returns a new `CheetahString` with the specified range replaced.
789    ///
790    /// # Examples
791    ///
792    /// ```
793    /// use cheetah_string::CheetahString;
794    ///
795    /// let s = CheetahString::from("hello world");
796    /// assert_eq!(s.replacen("l", "L", 1), "heLlo world");
797    /// ```
798    #[inline]
799    pub fn replacen<P: AsRef<str>>(&self, from: P, to: &str, count: usize) -> CheetahString {
800        CheetahString::from_string(self.as_str().replacen(from.as_ref(), to, count))
801    }
802
803    /// Returns a substring as a new `CheetahString`.
804    ///
805    /// # Panics
806    ///
807    /// Panics if the indices are not on valid UTF-8 character boundaries.
808    ///
809    /// # Examples
810    ///
811    /// ```
812    /// use cheetah_string::CheetahString;
813    ///
814    /// let s = CheetahString::from("hello world");
815    /// assert_eq!(s.substring(0, 5), "hello");
816    /// assert_eq!(s.substring(6, 11), "world");
817    /// ```
818    #[inline]
819    pub fn substring(&self, start: usize, end: usize) -> CheetahString {
820        CheetahString::from_slice(&self.as_str()[start..end])
821    }
822
823    /// Repeats the string `n` times.
824    ///
825    /// # Examples
826    ///
827    /// ```
828    /// use cheetah_string::CheetahString;
829    ///
830    /// let s = CheetahString::from("abc");
831    /// assert_eq!(s.repeat(3), "abcabcabc");
832    /// ```
833    #[inline]
834    pub fn repeat(&self, n: usize) -> CheetahString {
835        CheetahString::from_string(self.as_str().repeat(n))
836    }
837
838    // Incremental building methods
839
840    /// Creates a new `CheetahString` with the specified capacity.
841    ///
842    /// The string will be able to hold at least `capacity` bytes without reallocating.
843    /// If `capacity` is less than or equal to the inline capacity (23 bytes),
844    /// an empty inline string is returned.
845    ///
846    /// # Examples
847    ///
848    /// ```
849    /// use cheetah_string::CheetahString;
850    ///
851    /// let mut s = CheetahString::with_capacity(100);
852    /// s.push_str("hello");
853    /// assert_eq!(s, "hello");
854    /// ```
855    #[inline]
856    pub fn with_capacity(capacity: usize) -> Self {
857        if capacity <= INLINE_CAPACITY {
858            CheetahString::empty()
859        } else {
860            CheetahString::from_string(String::with_capacity(capacity))
861        }
862    }
863
864    /// Appends a string slice to the end of this `CheetahString`.
865    ///
866    /// This method is optimized for incremental building and will:
867    /// - Mutate inline storage when possible
868    /// - Mutate unique Arc<String> in-place when available
869    /// - Only allocate when necessary
870    ///
871    /// # Examples
872    ///
873    /// ```
874    /// use cheetah_string::CheetahString;
875    ///
876    /// let mut s = CheetahString::from("Hello");
877    /// s.push_str(" ");
878    /// s.push_str("World");
879    /// assert_eq!(s, "Hello World");
880    /// ```
881    #[inline]
882    pub fn push_str(&mut self, string: &str) {
883        *self += string;
884    }
885
886    /// Reserves capacity for at least `additional` more bytes.
887    ///
888    /// This method will modify the internal representation if needed to ensure
889    /// that the string can hold at least `additional` more bytes without reallocating.
890    ///
891    /// # Examples
892    ///
893    /// ```
894    /// use cheetah_string::CheetahString;
895    ///
896    /// let mut s = CheetahString::from("hello");
897    /// s.reserve(100);
898    /// s.push_str(" world");
899    /// ```
900    #[inline]
901    pub fn reserve(&mut self, additional: usize) {
902        let new_len = self.len() + additional;
903
904        // If it still fits inline, nothing to do
905        if new_len <= INLINE_CAPACITY {
906            return;
907        }
908
909        match &mut self.inner {
910            InnerString::Inline { .. } => {
911                // Convert inline to Arc<String> with capacity
912                let mut s = String::with_capacity(new_len);
913                s.push_str(self.as_str());
914                *self = CheetahString {
915                    inner: InnerString::ArcString(Arc::new(s)),
916                };
917            }
918            InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
919                // Reserve in the unique Arc<String>
920                if let Some(s) = Arc::get_mut(arc) {
921                    s.reserve(additional);
922                }
923            }
924            InnerString::StaticStr(_) | InnerString::ArcStr(_) => {
925                // Convert to Arc<String> with capacity
926                let mut s = String::with_capacity(new_len);
927                s.push_str(self.as_str());
928                *self = CheetahString {
929                    inner: InnerString::ArcString(Arc::new(s)),
930                };
931            }
932            _ => {
933                // For shared Arc or other types, convert if needed
934                if Arc::strong_count(match &self.inner {
935                    InnerString::ArcString(arc) => arc,
936                    _ => return,
937                }) > 1
938                {
939                    let mut s = String::with_capacity(new_len);
940                    s.push_str(self.as_str());
941                    *self = CheetahString {
942                        inner: InnerString::ArcString(Arc::new(s)),
943                    };
944                }
945            }
946        }
947    }
948}
949
950impl PartialEq for CheetahString {
951    #[inline]
952    fn eq(&self, other: &Self) -> bool {
953        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
954        {
955            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
956        }
957        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
958        {
959            self.as_str() == other.as_str()
960        }
961    }
962}
963
964impl PartialEq<str> for CheetahString {
965    #[inline]
966    fn eq(&self, other: &str) -> bool {
967        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
968        {
969            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
970        }
971        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
972        {
973            self.as_str() == other
974        }
975    }
976}
977
978impl PartialEq<String> for CheetahString {
979    #[inline]
980    fn eq(&self, other: &String) -> bool {
981        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
982        {
983            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
984        }
985        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
986        {
987            self.as_str() == other.as_str()
988        }
989    }
990}
991
992impl PartialEq<Vec<u8>> for CheetahString {
993    #[inline]
994    fn eq(&self, other: &Vec<u8>) -> bool {
995        self.as_bytes() == other.as_slice()
996    }
997}
998
999impl<'a> PartialEq<&'a str> for CheetahString {
1000    #[inline]
1001    fn eq(&self, other: &&'a str) -> bool {
1002        self.as_str() == *other
1003    }
1004}
1005
1006impl PartialEq<CheetahString> for str {
1007    #[inline]
1008    fn eq(&self, other: &CheetahString) -> bool {
1009        self == other.as_str()
1010    }
1011}
1012
1013impl PartialEq<CheetahString> for String {
1014    #[inline]
1015    fn eq(&self, other: &CheetahString) -> bool {
1016        self.as_str() == other.as_str()
1017    }
1018}
1019
1020impl PartialEq<CheetahString> for &str {
1021    #[inline]
1022    fn eq(&self, other: &CheetahString) -> bool {
1023        *self == other.as_str()
1024    }
1025}
1026
1027impl Eq for CheetahString {}
1028
1029impl PartialOrd for CheetahString {
1030    #[inline]
1031    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1032        Some(self.cmp(other))
1033    }
1034}
1035
1036impl Ord for CheetahString {
1037    #[inline]
1038    fn cmp(&self, other: &Self) -> Ordering {
1039        self.as_str().cmp(other.as_str())
1040    }
1041}
1042
1043impl Hash for CheetahString {
1044    #[inline]
1045    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1046        self.as_str().hash(state);
1047    }
1048}
1049
1050impl Display for CheetahString {
1051    #[inline]
1052    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1053        self.as_str().fmt(f)
1054    }
1055}
1056
1057impl std::fmt::Debug for CheetahString {
1058    #[inline]
1059    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1060        fmt::Debug::fmt(self.as_str(), f)
1061    }
1062}
1063
1064impl Borrow<str> for CheetahString {
1065    #[inline]
1066    fn borrow(&self) -> &str {
1067        self.as_str()
1068    }
1069}
1070
1071// Add trait implementations for string concatenation
1072
1073impl std::ops::Add<&str> for CheetahString {
1074    type Output = CheetahString;
1075
1076    /// Concatenates a `CheetahString` with a string slice.
1077    ///
1078    /// # Examples
1079    ///
1080    /// ```
1081    /// use cheetah_string::CheetahString;
1082    ///
1083    /// let s = CheetahString::from("Hello");
1084    /// let result = s + " World";
1085    /// assert_eq!(result, "Hello World");
1086    /// ```
1087    #[inline]
1088    fn add(self, rhs: &str) -> Self::Output {
1089        let total_len = self.len() + rhs.len();
1090
1091        // Fast path: result fits in inline storage
1092        if total_len <= INLINE_CAPACITY {
1093            let mut data = [0u8; INLINE_CAPACITY];
1094            let self_bytes = self.as_bytes();
1095            data[..self_bytes.len()].copy_from_slice(self_bytes);
1096            data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1097            return CheetahString {
1098                inner: InnerString::Inline {
1099                    len: total_len as u8,
1100                    data,
1101                },
1102            };
1103        }
1104
1105        // Slow path: allocate for long result
1106        let mut result = String::with_capacity(total_len);
1107        result.push_str(self.as_str());
1108        result.push_str(rhs);
1109        CheetahString::from_string(result)
1110    }
1111}
1112
1113impl std::ops::Add<&CheetahString> for CheetahString {
1114    type Output = CheetahString;
1115
1116    /// Concatenates two `CheetahString` values.
1117    ///
1118    /// # Examples
1119    ///
1120    /// ```
1121    /// use cheetah_string::CheetahString;
1122    ///
1123    /// let s1 = CheetahString::from("Hello");
1124    /// let s2 = CheetahString::from(" World");
1125    /// let result = s1 + &s2;
1126    /// assert_eq!(result, "Hello World");
1127    /// ```
1128    #[inline]
1129    fn add(self, rhs: &CheetahString) -> Self::Output {
1130        let total_len = self.len() + rhs.len();
1131
1132        // Fast path: result fits in inline storage
1133        if total_len <= INLINE_CAPACITY {
1134            let mut data = [0u8; INLINE_CAPACITY];
1135            let self_bytes = self.as_bytes();
1136            data[..self_bytes.len()].copy_from_slice(self_bytes);
1137            data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1138            return CheetahString {
1139                inner: InnerString::Inline {
1140                    len: total_len as u8,
1141                    data,
1142                },
1143            };
1144        }
1145
1146        // Slow path: allocate for long result
1147        let mut result = String::with_capacity(total_len);
1148        result.push_str(self.as_str());
1149        result.push_str(rhs.as_str());
1150        CheetahString::from_string(result)
1151    }
1152}
1153
1154impl std::ops::Add<String> for CheetahString {
1155    type Output = CheetahString;
1156
1157    /// Concatenates a `CheetahString` with a `String`.
1158    ///
1159    /// # Examples
1160    ///
1161    /// ```
1162    /// use cheetah_string::CheetahString;
1163    ///
1164    /// let s = CheetahString::from("Hello");
1165    /// let result = s + String::from(" World");
1166    /// assert_eq!(result, "Hello World");
1167    /// ```
1168    #[inline]
1169    fn add(self, rhs: String) -> Self::Output {
1170        let total_len = self.len() + rhs.len();
1171
1172        // Fast path: result fits in inline storage
1173        if total_len <= INLINE_CAPACITY {
1174            let mut data = [0u8; INLINE_CAPACITY];
1175            let self_bytes = self.as_bytes();
1176            data[..self_bytes.len()].copy_from_slice(self_bytes);
1177            data[self_bytes.len()..total_len].copy_from_slice(rhs.as_bytes());
1178            return CheetahString {
1179                inner: InnerString::Inline {
1180                    len: total_len as u8,
1181                    data,
1182                },
1183            };
1184        }
1185
1186        // Slow path: allocate for long result
1187        let mut result = String::with_capacity(total_len);
1188        result.push_str(self.as_str());
1189        result.push_str(&rhs);
1190        CheetahString::from_string(result)
1191    }
1192}
1193
1194impl std::ops::AddAssign<&str> for CheetahString {
1195    /// Appends a string slice to a `CheetahString`.
1196    ///
1197    /// # Examples
1198    ///
1199    /// ```
1200    /// use cheetah_string::CheetahString;
1201    ///
1202    /// let mut s = CheetahString::from("Hello");
1203    /// s += " World";
1204    /// assert_eq!(s, "Hello World");
1205    /// ```
1206    #[inline]
1207    fn add_assign(&mut self, rhs: &str) {
1208        let total_len = self.len() + rhs.len();
1209
1210        match &mut self.inner {
1211            // Fast path 1: Both self and result fit in inline storage
1212            InnerString::Inline { len, data } if total_len <= INLINE_CAPACITY => {
1213                // Mutate inline buffer directly
1214                data[*len as usize..total_len].copy_from_slice(rhs.as_bytes());
1215                *len = total_len as u8;
1216                return;
1217            }
1218            // Fast path 2: Self is unique Arc<String>, mutate in-place
1219            InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
1220                // SAFETY: strong_count == 1 guarantees exclusive access
1221                if let Some(s) = Arc::get_mut(arc) {
1222                    s.push_str(rhs);
1223                    return;
1224                }
1225            }
1226            _ => {}
1227        }
1228
1229        // Slow path: allocate new string
1230        let mut result = String::with_capacity(total_len);
1231        result.push_str(self.as_str());
1232        result.push_str(rhs);
1233        *self = CheetahString::from_string(result);
1234    }
1235}
1236
1237impl std::ops::AddAssign<&CheetahString> for CheetahString {
1238    /// Appends a `CheetahString` to another `CheetahString`.
1239    ///
1240    /// # Examples
1241    ///
1242    /// ```
1243    /// use cheetah_string::CheetahString;
1244    ///
1245    /// let mut s1 = CheetahString::from("Hello");
1246    /// let s2 = CheetahString::from(" World");
1247    /// s1 += &s2;
1248    /// assert_eq!(s1, "Hello World");
1249    /// ```
1250    #[inline]
1251    fn add_assign(&mut self, rhs: &CheetahString) {
1252        let total_len = self.len() + rhs.len();
1253
1254        match &mut self.inner {
1255            // Fast path 1: Both self and result fit in inline storage
1256            InnerString::Inline { len, data } if total_len <= INLINE_CAPACITY => {
1257                // Mutate inline buffer directly
1258                data[*len as usize..total_len].copy_from_slice(rhs.as_bytes());
1259                *len = total_len as u8;
1260                return;
1261            }
1262            // Fast path 2: Self is unique Arc<String>, mutate in-place
1263            InnerString::ArcString(arc) if Arc::strong_count(arc) == 1 => {
1264                // SAFETY: strong_count == 1 guarantees exclusive access
1265                if let Some(s) = Arc::get_mut(arc) {
1266                    s.push_str(rhs.as_str());
1267                    return;
1268                }
1269            }
1270            _ => {}
1271        }
1272
1273        // Slow path: allocate new string
1274        let mut result = String::with_capacity(total_len);
1275        result.push_str(self.as_str());
1276        result.push_str(rhs.as_str());
1277        *self = CheetahString::from_string(result);
1278    }
1279}
1280
1281/// Maximum capacity for inline string storage (23 bytes + 1 byte for length = 24 bytes total)
1282const INLINE_CAPACITY: usize = 23;
1283
1284/// The `InnerString` enum represents different types of string storage.
1285///
1286/// This enum uses Small String Optimization (SSO) to avoid heap allocations for short strings.
1287///
1288/// Variants:
1289///
1290/// * `Inline` - Inline storage for strings <= 23 bytes (zero heap allocations).
1291/// * `StaticStr(&'static str)` - A static string slice (zero heap allocations).
1292/// * `ArcStr(Arc<str>)` - A reference-counted string slice (single heap allocation, optimized).
1293/// * `ArcString(Arc<String>)` - A reference-counted string (for backwards compatibility).
1294/// * `ArcVecString(Arc<Vec<u8>>)` - A reference-counted byte vector.
1295/// * `Bytes(bytes::Bytes)` - A byte buffer (available when the "bytes" feature is enabled).
1296#[derive(Clone)]
1297pub(super) enum InnerString {
1298    /// Inline storage for short strings (up to 23 bytes).
1299    /// Stores the length and data directly without heap allocation.
1300    Inline {
1301        len: u8,
1302        data: [u8; INLINE_CAPACITY],
1303    },
1304    /// Static string slice with 'static lifetime.
1305    StaticStr(&'static str),
1306    /// Reference-counted string slice (single heap allocation).
1307    /// Preferred over ArcString for long strings created from owned data.
1308    ArcStr(Arc<str>),
1309    /// Reference-counted heap-allocated string.
1310    /// Kept for backwards compatibility and when Arc<String> is explicitly provided.
1311    ArcString(Arc<String>),
1312    /// Reference-counted heap-allocated byte vector.
1313    ArcVecString(Arc<Vec<u8>>),
1314    /// Bytes type integration (requires "bytes" feature).
1315    #[cfg(feature = "bytes")]
1316    Bytes(bytes::Bytes),
1317}
1318
1319// Sealed trait pattern to support both &str and char in starts_with/ends_with/contains
1320mod private {
1321    pub trait Sealed {}
1322    impl Sealed for char {}
1323    impl Sealed for &str {}
1324    impl Sealed for &String {}
1325
1326    pub trait SplitSealed {}
1327    impl SplitSealed for char {}
1328    impl SplitSealed for &str {}
1329}
1330
1331/// A pattern that can be used with `starts_with` and `ends_with` methods.
1332pub trait StrPattern: private::Sealed {
1333    #[doc(hidden)]
1334    fn as_str_pattern(&self) -> StrPatternImpl<'_>;
1335}
1336
1337#[doc(hidden)]
1338pub enum StrPatternImpl<'a> {
1339    Char(char),
1340    Str(&'a str),
1341}
1342
1343impl StrPattern for char {
1344    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1345        StrPatternImpl::Char(*self)
1346    }
1347}
1348
1349impl StrPattern for &str {
1350    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1351        StrPatternImpl::Str(self)
1352    }
1353}
1354
1355impl StrPattern for &String {
1356    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1357        StrPatternImpl::Str(self.as_str())
1358    }
1359}
1360
1361/// A pattern that can be used with `split` method.
1362pub trait SplitPattern<'a>: private::SplitSealed {
1363    #[doc(hidden)]
1364    fn split_str(self, s: &'a str) -> SplitWrapper<'a>;
1365}
1366
1367impl SplitPattern<'_> for char {
1368    fn split_str(self, s: &str) -> SplitWrapper<'_> {
1369        SplitWrapper::Char(s.split(self))
1370    }
1371}
1372
1373impl<'a> SplitPattern<'a> for &'a str {
1374    fn split_str(self, s: &'a str) -> SplitWrapper<'a> {
1375        let empty_pattern_state = if self.is_empty() {
1376            Some(EmptyPatternState {
1377                chars: s.char_indices(),
1378                original: s,
1379                started: false,
1380            })
1381        } else {
1382            None
1383        };
1384
1385        SplitWrapper::Str(SplitStr {
1386            string: s,
1387            pattern: self,
1388            finished: false,
1389            empty_pattern_state,
1390        })
1391    }
1392}
1393
1394/// Helper struct for splitting strings by a string pattern
1395pub struct SplitStr<'a> {
1396    string: &'a str,
1397    pattern: &'a str,
1398    finished: bool,
1399    /// For empty pattern, we need to iterate over chars
1400    empty_pattern_state: Option<EmptyPatternState<'a>>,
1401}
1402
1403#[derive(Clone)]
1404struct EmptyPatternState<'a> {
1405    chars: std::str::CharIndices<'a>,
1406    original: &'a str,
1407    started: bool,
1408}
1409
1410impl<'a> Iterator for SplitStr<'a> {
1411    type Item = &'a str;
1412
1413    fn next(&mut self) -> Option<Self::Item> {
1414        if self.finished {
1415            return None;
1416        }
1417
1418        // Handle empty pattern case (split between every character)
1419        if self.pattern.is_empty() {
1420            if let Some(ref mut state) = self.empty_pattern_state {
1421                if !state.started {
1422                    state.started = true;
1423                    // First element is always empty string before first char
1424                    return Some("");
1425                }
1426
1427                match state.chars.next() {
1428                    Some((pos, ch)) => {
1429                        let char_end = pos + ch.len_utf8();
1430                        let result = &state.original[pos..char_end];
1431                        Some(result)
1432                    }
1433                    None => {
1434                        self.finished = true;
1435                        // Last element is empty string after last char
1436                        Some("")
1437                    }
1438                }
1439            } else {
1440                unreachable!("empty_pattern_state should be Some for empty pattern")
1441            }
1442        } else {
1443            // Normal case: non-empty pattern
1444            match self.string.find(self.pattern) {
1445                Some(pos) => {
1446                    let result = &self.string[..pos];
1447                    self.string = &self.string[pos + self.pattern.len()..];
1448                    Some(result)
1449                }
1450                None => {
1451                    self.finished = true;
1452                    Some(self.string)
1453                }
1454            }
1455        }
1456    }
1457}
1458
1459/// Wrapper for split iterator that supports both char and str patterns
1460pub enum SplitWrapper<'a> {
1461    #[doc(hidden)]
1462    Char(std::str::Split<'a, char>),
1463    #[doc(hidden)]
1464    Str(SplitStr<'a>),
1465}
1466
1467impl<'a> Iterator for SplitWrapper<'a> {
1468    type Item = &'a str;
1469
1470    fn next(&mut self) -> Option<Self::Item> {
1471        match self {
1472            SplitWrapper::Char(iter) => iter.next(),
1473            SplitWrapper::Str(iter) => iter.next(),
1474        }
1475    }
1476}
1477
1478impl<'a> DoubleEndedIterator for SplitWrapper<'a> {
1479    fn next_back(&mut self) -> Option<Self::Item> {
1480        match self {
1481            SplitWrapper::Char(iter) => iter.next_back(),
1482            SplitWrapper::Str(_) => {
1483                // String pattern split doesn't support reverse iteration
1484                // This is consistent with std::str::Split<&str>
1485                panic!("split with string pattern does not support reverse iteration")
1486            }
1487        }
1488    }
1489}