Skip to main content

cheetah_string/
cheetah_string.rs

1use alloc::borrow::Cow;
2use alloc::string::{ParseError, String, ToString};
3use alloc::sync::Arc;
4use alloc::vec::Vec;
5use core::borrow::Borrow;
6use core::cmp::Ordering;
7use core::fmt::{self, Display};
8use core::hash::{Hash, Hasher};
9use core::ops::{Add, AddAssign, Deref};
10use core::str::{self, FromStr, Utf8Error};
11
12#[derive(Clone)]
13#[repr(transparent)]
14pub struct CheetahString {
15    pub(super) inner: InnerString,
16}
17
18impl Default for CheetahString {
19    fn default() -> Self {
20        CheetahString {
21            inner: InnerString::Inline {
22                len: 0,
23                data: [0; INLINE_CAPACITY],
24            },
25        }
26    }
27}
28
29impl From<String> for CheetahString {
30    #[inline]
31    fn from(s: String) -> Self {
32        CheetahString::from_string(s)
33    }
34}
35
36impl From<Arc<String>> for CheetahString {
37    #[inline]
38    fn from(s: Arc<String>) -> Self {
39        CheetahString::from_arc_string(s)
40    }
41}
42
43impl<'a> From<&'a str> for CheetahString {
44    #[inline]
45    fn from(s: &'a str) -> Self {
46        CheetahString::from_slice(s)
47    }
48}
49
50impl<'a> TryFrom<&'a [u8]> for CheetahString {
51    type Error = Utf8Error;
52
53    #[inline]
54    fn try_from(b: &'a [u8]) -> Result<Self, Self::Error> {
55        CheetahString::try_from_bytes(b)
56    }
57}
58
59impl FromStr for CheetahString {
60    type Err = ParseError;
61    #[inline]
62    fn from_str(s: &str) -> Result<Self, Self::Err> {
63        Ok(CheetahString::from_slice(s))
64    }
65}
66
67impl TryFrom<Vec<u8>> for CheetahString {
68    type Error = Utf8Error;
69
70    #[inline]
71    fn try_from(v: Vec<u8>) -> Result<Self, Self::Error> {
72        CheetahString::try_from_vec(v)
73    }
74}
75
76impl From<Cow<'static, str>> for CheetahString {
77    #[inline]
78    fn from(cow: Cow<'static, str>) -> Self {
79        match cow {
80            Cow::Borrowed(s) => CheetahString::from_static_str(s),
81            Cow::Owned(s) => CheetahString::from_string(s),
82        }
83    }
84}
85
86impl From<Cow<'_, String>> for CheetahString {
87    #[inline]
88    fn from(cow: Cow<'_, String>) -> Self {
89        match cow {
90            Cow::Borrowed(s) => CheetahString::from_slice(s),
91            Cow::Owned(s) => CheetahString::from_string(s),
92        }
93    }
94}
95
96impl From<char> for CheetahString {
97    /// Allocates an owned [`CheetahString`] from a single character.
98    ///
99    /// # Example
100    /// ```rust
101    /// use cheetah_string::CheetahString;
102    /// let c: char = 'a';
103    /// let s: CheetahString = CheetahString::from(c);
104    /// assert_eq!("a", &s[..]);
105    /// ```
106    #[inline]
107    fn from(c: char) -> Self {
108        CheetahString::from_string(c.to_string())
109    }
110}
111
112impl<'a> FromIterator<&'a char> for CheetahString {
113    #[inline]
114    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> CheetahString {
115        let mut buf = String::new();
116        buf.extend(iter);
117        CheetahString::from_string(buf)
118    }
119}
120
121impl<'a> FromIterator<&'a str> for CheetahString {
122    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> CheetahString {
123        let mut buf = String::new();
124        buf.extend(iter);
125        CheetahString::from_string(buf)
126    }
127}
128
129impl FromIterator<String> for CheetahString {
130    #[inline]
131    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
132        let mut buf = String::new();
133        buf.extend(iter);
134        CheetahString::from_string(buf)
135    }
136}
137
138impl<'a> FromIterator<&'a String> for CheetahString {
139    #[inline]
140    fn from_iter<T: IntoIterator<Item = &'a String>>(iter: T) -> Self {
141        let mut buf = String::new();
142        buf.extend(iter.into_iter().map(|s| s.as_str()));
143        CheetahString::from_string(buf)
144    }
145}
146
147#[cfg(feature = "bytes")]
148impl TryFrom<bytes::Bytes> for CheetahString {
149    type Error = Utf8Error;
150
151    #[inline]
152    fn try_from(b: bytes::Bytes) -> Result<Self, Self::Error> {
153        CheetahString::try_from_bytes_buf(b)
154    }
155}
156
157impl From<&CheetahString> for CheetahString {
158    #[inline]
159    fn from(s: &CheetahString) -> Self {
160        s.clone()
161    }
162}
163
164impl From<CheetahString> for String {
165    #[inline]
166    fn from(s: CheetahString) -> Self {
167        match s {
168            CheetahString {
169                inner: InnerString::Inline { len, data },
170            } => {
171                // SAFETY: Inline strings are always valid UTF-8
172                unsafe { String::from_utf8_unchecked(data[..len as usize].to_vec()) }
173            }
174            CheetahString {
175                inner: InnerString::Static(s),
176            } => s.to_string(),
177            CheetahString {
178                inner: InnerString::Shared(s),
179            } => s.to_string(),
180            CheetahString {
181                inner: InnerString::Owned(s),
182            } => s,
183        }
184    }
185}
186
187impl Deref for CheetahString {
188    type Target = str;
189
190    #[inline]
191    fn deref(&self) -> &Self::Target {
192        self.as_str()
193    }
194}
195
196impl AsRef<str> for CheetahString {
197    #[inline]
198    fn as_ref(&self) -> &str {
199        self.as_str()
200    }
201}
202
203impl AsRef<[u8]> for CheetahString {
204    #[inline]
205    fn as_ref(&self) -> &[u8] {
206        self.as_bytes()
207    }
208}
209
210impl AsRef<CheetahString> for CheetahString {
211    #[inline]
212    fn as_ref(&self) -> &CheetahString {
213        self
214    }
215}
216
217impl From<&String> for CheetahString {
218    #[inline]
219    fn from(s: &String) -> Self {
220        CheetahString::from_slice(s)
221    }
222}
223
224impl CheetahString {
225    #[inline]
226    pub const fn empty() -> Self {
227        CheetahString {
228            inner: InnerString::Inline {
229                len: 0,
230                data: [0; INLINE_CAPACITY],
231            },
232        }
233    }
234
235    #[inline]
236    pub fn new() -> Self {
237        CheetahString::default()
238    }
239
240    #[inline]
241    pub const fn from_static_str(s: &'static str) -> Self {
242        CheetahString {
243            inner: InnerString::Static(s),
244        }
245    }
246
247    #[deprecated(
248        since = "1.1.0",
249        note = "use try_from_vec for checked construction or from_utf8_unchecked_vec for an explicit unsafe constructor"
250    )]
251    pub fn from_vec(s: Vec<u8>) -> Self {
252        CheetahString::try_from_vec(s).expect(
253            "CheetahString::from_vec requires valid UTF-8; use try_from_vec for fallible construction",
254        )
255    }
256
257    /// Creates a `CheetahString` from a byte vector without validating UTF-8.
258    ///
259    /// # Safety
260    ///
261    /// The caller must guarantee that `s` contains valid UTF-8 for the entire
262    /// lifetime of the returned `CheetahString`.
263    #[inline]
264    pub unsafe fn from_utf8_unchecked_vec(s: Vec<u8>) -> Self {
265        CheetahString::from_validated_vec_unchecked(s)
266    }
267
268    #[inline]
269    fn from_validated_vec_unchecked(s: Vec<u8>) -> Self {
270        if s.len() <= INLINE_CAPACITY {
271            let mut data = [0u8; INLINE_CAPACITY];
272            data[..s.len()].copy_from_slice(&s);
273            CheetahString {
274                inner: InnerString::Inline {
275                    len: s.len() as u8,
276                    data,
277                },
278            }
279        } else {
280            // SAFETY: Callers validate UTF-8 before reaching this helper.
281            CheetahString::from_builder_string(unsafe { String::from_utf8_unchecked(s) })
282        }
283    }
284
285    /// Creates a `CheetahString` from a byte vector with UTF-8 validation.
286    ///
287    /// # Errors
288    ///
289    /// Returns an error if the bytes are not valid UTF-8.
290    ///
291    /// # Examples
292    ///
293    /// ```
294    /// use cheetah_string::CheetahString;
295    ///
296    /// let bytes = vec![104, 101, 108, 108, 111]; // "hello"
297    /// let s = CheetahString::try_from_vec(bytes).unwrap();
298    /// assert_eq!(s, "hello");
299    ///
300    /// let invalid = vec![0xFF, 0xFE];
301    /// assert!(CheetahString::try_from_vec(invalid).is_err());
302    /// ```
303    pub fn try_from_vec(v: Vec<u8>) -> Result<Self, Utf8Error> {
304        str::from_utf8(&v)?;
305        Ok(CheetahString::from_validated_vec_unchecked(v))
306    }
307
308    /// Creates a `CheetahString` from a byte slice with UTF-8 validation.
309    ///
310    /// # Errors
311    ///
312    /// Returns an error if the bytes are not valid UTF-8.
313    ///
314    /// # Examples
315    ///
316    /// ```
317    /// use cheetah_string::CheetahString;
318    ///
319    /// let bytes = b"hello";
320    /// let s = CheetahString::try_from_bytes(bytes).unwrap();
321    /// assert_eq!(s, "hello");
322    ///
323    /// let invalid = &[0xFF, 0xFE];
324    /// assert!(CheetahString::try_from_bytes(invalid).is_err());
325    /// ```
326    pub fn try_from_bytes(b: &[u8]) -> Result<Self, Utf8Error> {
327        let s = str::from_utf8(b)?;
328        Ok(CheetahString::from_slice(s))
329    }
330
331    /// Creates a `CheetahString` from a byte slice without validating UTF-8.
332    ///
333    /// # Safety
334    ///
335    /// The caller must guarantee that `b` contains valid UTF-8.
336    #[inline]
337    pub unsafe fn from_utf8_unchecked_bytes(b: &[u8]) -> Self {
338        // SAFETY: The caller guarantees that `b` contains valid UTF-8.
339        CheetahString::from_slice(unsafe { str::from_utf8_unchecked(b) })
340    }
341
342    /// Creates a `CheetahString` from a shared byte vector with UTF-8 validation.
343    ///
344    /// # Errors
345    ///
346    /// Returns an error if the bytes are not valid UTF-8.
347    #[inline]
348    pub fn try_from_arc_vec(s: Arc<Vec<u8>>) -> Result<Self, Utf8Error> {
349        match Arc::try_unwrap(s) {
350            Ok(v) => CheetahString::try_from_vec(v),
351            Err(s) => {
352                let s = str::from_utf8(s.as_slice())?;
353                Ok(CheetahString::from_slice(s))
354            }
355        }
356    }
357
358    #[deprecated(
359        since = "1.1.0",
360        note = "use try_from_arc_vec for checked construction or from_utf8_unchecked_arc_vec for an explicit unsafe constructor"
361    )]
362    #[inline]
363    pub fn from_arc_vec(s: Arc<Vec<u8>>) -> Self {
364        CheetahString::try_from_arc_vec(s).expect(
365            "CheetahString::from_arc_vec requires valid UTF-8; use try_from_arc_vec for fallible construction",
366        )
367    }
368
369    /// Creates a `CheetahString` from a shared byte vector without validating UTF-8.
370    ///
371    /// # Safety
372    ///
373    /// The caller must guarantee that `s` contains valid UTF-8.
374    #[inline]
375    pub unsafe fn from_utf8_unchecked_arc_vec(s: Arc<Vec<u8>>) -> Self {
376        CheetahString::from_validated_arc_vec_unchecked(s)
377    }
378
379    #[inline]
380    fn from_validated_arc_vec_unchecked(s: Arc<Vec<u8>>) -> Self {
381        match Arc::try_unwrap(s) {
382            Ok(v) => CheetahString::from_validated_vec_unchecked(v),
383            Err(s) => {
384                // SAFETY: Callers validate UTF-8 before reaching this helper.
385                unsafe { CheetahString::from_utf8_unchecked_bytes(s.as_slice()) }
386            }
387        }
388    }
389
390    #[inline]
391    pub fn from_slice(s: &str) -> Self {
392        if s.len() <= INLINE_CAPACITY {
393            // Use inline storage for short strings
394            let mut data = [0u8; INLINE_CAPACITY];
395            data[..s.len()].copy_from_slice(s.as_bytes());
396            CheetahString {
397                inner: InnerString::Inline {
398                    len: s.len() as u8,
399                    data,
400                },
401            }
402        } else {
403            // Use Arc<str> for long borrowed strings to avoid the extra String header.
404            let arc_str: Arc<str> = Arc::from(s);
405            CheetahString {
406                inner: InnerString::Shared(arc_str),
407            }
408        }
409    }
410
411    #[inline]
412    pub fn from_string(s: String) -> Self {
413        if s.len() <= INLINE_CAPACITY {
414            // Use inline storage for short strings
415            let mut data = [0u8; INLINE_CAPACITY];
416            data[..s.len()].copy_from_slice(s.as_bytes());
417            CheetahString {
418                inner: InnerString::Inline {
419                    len: s.len() as u8,
420                    data,
421                },
422            }
423        } else {
424            // Use Arc<str> for long strings to avoid double allocation
425            let arc_str: Arc<str> = s.into_boxed_str().into();
426            CheetahString {
427                inner: InnerString::Shared(arc_str),
428            }
429        }
430    }
431
432    #[inline]
433    fn from_builder_string(s: String) -> Self {
434        if s.len() <= INLINE_CAPACITY && s.capacity() <= INLINE_CAPACITY {
435            let mut data = [0u8; INLINE_CAPACITY];
436            data[..s.len()].copy_from_slice(s.as_bytes());
437            CheetahString {
438                inner: InnerString::Inline {
439                    len: s.len() as u8,
440                    data,
441                },
442            }
443        } else {
444            CheetahString {
445                inner: InnerString::Owned(s),
446            }
447        }
448    }
449
450    #[inline]
451    pub fn from_arc_string(s: Arc<String>) -> Self {
452        match Arc::try_unwrap(s) {
453            Ok(s) => CheetahString::from_builder_string(s),
454            Err(s) => CheetahString::from_slice(s.as_str()),
455        }
456    }
457
458    #[inline]
459    #[cfg(feature = "bytes")]
460    #[deprecated(
461        since = "1.1.0",
462        note = "use try_from_bytes_buf for checked construction or from_utf8_unchecked_bytes_buf for an explicit unsafe constructor"
463    )]
464    pub fn from_bytes(b: bytes::Bytes) -> Self {
465        CheetahString::try_from_bytes_buf(b).expect(
466            "CheetahString::from_bytes requires valid UTF-8; use try_from_bytes_buf for fallible construction",
467        )
468    }
469
470    #[inline]
471    #[cfg(feature = "bytes")]
472    pub fn try_from_bytes_buf(b: bytes::Bytes) -> Result<Self, Utf8Error> {
473        str::from_utf8(b.as_ref())?;
474        Ok(CheetahString::from_validated_bytes_unchecked(b))
475    }
476
477    /// Creates a `CheetahString` from `bytes::Bytes` without validating UTF-8.
478    ///
479    /// # Safety
480    ///
481    /// The caller must guarantee that `b` contains valid UTF-8.
482    #[inline]
483    #[cfg(feature = "bytes")]
484    pub unsafe fn from_utf8_unchecked_bytes_buf(b: bytes::Bytes) -> Self {
485        CheetahString::from_validated_bytes_unchecked(b)
486    }
487
488    #[inline]
489    #[cfg(feature = "bytes")]
490    fn from_validated_bytes_unchecked(b: bytes::Bytes) -> Self {
491        // SAFETY: Callers validate UTF-8 before reaching this helper.
492        unsafe { CheetahString::from_utf8_unchecked_bytes(b.as_ref()) }
493    }
494
495    #[inline]
496    pub fn as_str(&self) -> &str {
497        match &self.inner {
498            InnerString::Inline { len, data } => {
499                // SAFETY: Inline strings are only created from valid UTF-8 sources.
500                // The data is always valid UTF-8 up to len bytes.
501                unsafe { str::from_utf8_unchecked(&data[..*len as usize]) }
502            }
503            InnerString::Static(s) => s,
504            InnerString::Shared(s) => s.as_ref(),
505            InnerString::Owned(s) => s.as_str(),
506        }
507    }
508
509    #[inline]
510    pub fn as_bytes(&self) -> &[u8] {
511        match &self.inner {
512            InnerString::Inline { len, data } => &data[..*len as usize],
513            InnerString::Static(s) => s.as_bytes(),
514            InnerString::Shared(s) => s.as_bytes(),
515            InnerString::Owned(s) => s.as_bytes(),
516        }
517    }
518
519    #[inline]
520    pub fn len(&self) -> usize {
521        match &self.inner {
522            InnerString::Inline { len, .. } => *len as usize,
523            InnerString::Static(s) => s.len(),
524            InnerString::Shared(s) => s.len(),
525            InnerString::Owned(s) => s.len(),
526        }
527    }
528
529    #[inline]
530    pub fn is_empty(&self) -> bool {
531        match &self.inner {
532            InnerString::Inline { len, .. } => *len == 0,
533            InnerString::Static(s) => s.is_empty(),
534            InnerString::Shared(s) => s.is_empty(),
535            InnerString::Owned(s) => s.is_empty(),
536        }
537    }
538
539    // Query methods - delegate to &str
540
541    /// Returns `true` if the string starts with the given pattern.
542    ///
543    /// When the `simd` feature is enabled, this method uses SIMD instructions
544    /// for improved performance on longer patterns.
545    ///
546    /// # Examples
547    ///
548    /// ```
549    /// use cheetah_string::CheetahString;
550    ///
551    /// let s = CheetahString::from("hello world");
552    /// assert!(s.starts_with("hello"));
553    /// assert!(!s.starts_with("world"));
554    /// assert!(s.starts_with('h'));
555    /// ```
556    #[inline]
557    pub fn starts_with<P: StrPattern>(&self, pat: P) -> bool {
558        match pat.as_str_pattern() {
559            StrPatternImpl::Char(c) => self.as_str().starts_with(c),
560            StrPatternImpl::Str(s) => {
561                #[cfg(all(feature = "simd", target_arch = "x86_64"))]
562                {
563                    if s.len() >= crate::simd::SIMD_THRESHOLD {
564                        return crate::simd::starts_with_bytes(self.as_bytes(), s.as_bytes());
565                    }
566                }
567
568                self.as_str().starts_with(s)
569            }
570        }
571    }
572
573    /// Returns `true` if the string starts with the given character.
574    ///
575    /// # Examples
576    ///
577    /// ```
578    /// use cheetah_string::CheetahString;
579    ///
580    /// let s = CheetahString::from("hello world");
581    /// assert!(s.starts_with_char('h'));
582    /// assert!(!s.starts_with_char('w'));
583    /// ```
584    #[inline]
585    pub fn starts_with_char(&self, pat: char) -> bool {
586        self.as_str().starts_with(pat)
587    }
588
589    /// Returns `true` if the string ends with the given pattern.
590    ///
591    /// When the `simd` feature is enabled, this method uses SIMD instructions
592    /// for improved performance on longer patterns.
593    ///
594    /// # Examples
595    ///
596    /// ```
597    /// use cheetah_string::CheetahString;
598    ///
599    /// let s = CheetahString::from("hello world");
600    /// assert!(s.ends_with("world"));
601    /// assert!(!s.ends_with("hello"));
602    /// assert!(s.ends_with('d'));
603    /// ```
604    #[inline]
605    pub fn ends_with<P: StrPattern>(&self, pat: P) -> bool {
606        match pat.as_str_pattern() {
607            StrPatternImpl::Char(c) => self.as_str().ends_with(c),
608            StrPatternImpl::Str(s) => {
609                #[cfg(all(feature = "simd", target_arch = "x86_64"))]
610                {
611                    if s.len() >= crate::simd::SIMD_THRESHOLD {
612                        return crate::simd::ends_with_bytes(self.as_bytes(), s.as_bytes());
613                    }
614                }
615
616                self.as_str().ends_with(s)
617            }
618        }
619    }
620
621    /// Returns `true` if the string ends with the given character.
622    ///
623    /// # Examples
624    ///
625    /// ```
626    /// use cheetah_string::CheetahString;
627    ///
628    /// let s = CheetahString::from("hello world");
629    /// assert!(s.ends_with_char('d'));
630    /// assert!(!s.ends_with_char('h'));
631    /// ```
632    #[inline]
633    pub fn ends_with_char(&self, pat: char) -> bool {
634        self.as_str().ends_with(pat)
635    }
636
637    /// Returns `true` if the string contains the given pattern.
638    ///
639    /// When the `simd` feature is enabled, this method uses SIMD instructions
640    /// for improved performance on longer patterns.
641    ///
642    /// # Examples
643    ///
644    /// ```
645    /// use cheetah_string::CheetahString;
646    ///
647    /// let s = CheetahString::from("hello world");
648    /// assert!(s.contains("llo"));
649    /// assert!(!s.contains("xyz"));
650    /// assert!(s.contains('o'));
651    /// ```
652    #[inline]
653    pub fn contains<P: StrPattern>(&self, pat: P) -> bool {
654        match pat.as_str_pattern() {
655            StrPatternImpl::Char(c) => self.as_str().contains(c),
656            StrPatternImpl::Str(s) => {
657                crate::search::find_bytes(self.as_bytes(), s.as_bytes()).is_some()
658            }
659        }
660    }
661
662    /// Returns `true` if the string contains the given character.
663    ///
664    /// # Examples
665    ///
666    /// ```
667    /// use cheetah_string::CheetahString;
668    ///
669    /// let s = CheetahString::from("hello world");
670    /// assert!(s.contains_char('o'));
671    /// assert!(!s.contains_char('x'));
672    /// ```
673    #[inline]
674    pub fn contains_char(&self, pat: char) -> bool {
675        self.as_str().contains(pat)
676    }
677
678    /// Returns the byte index of the first occurrence of the pattern, or `None` if not found.
679    ///
680    /// When the `simd` feature is enabled, this method uses SIMD instructions
681    /// for improved performance on longer patterns.
682    ///
683    /// # Examples
684    ///
685    /// ```
686    /// use cheetah_string::CheetahString;
687    ///
688    /// let s = CheetahString::from("hello world");
689    /// assert_eq!(s.find("world"), Some(6));
690    /// assert_eq!(s.find("xyz"), None);
691    /// ```
692    #[inline]
693    pub fn find<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
694        let pat = pat.as_ref();
695        crate::search::find_bytes(self.as_bytes(), pat.as_bytes())
696    }
697
698    /// Returns the byte index of the last occurrence of the pattern, or `None` if not found.
699    ///
700    /// # Examples
701    ///
702    /// ```
703    /// use cheetah_string::CheetahString;
704    ///
705    /// let s = CheetahString::from("hello hello");
706    /// assert_eq!(s.rfind("hello"), Some(6));
707    /// ```
708    #[inline]
709    pub fn rfind<P: AsRef<str>>(&self, pat: P) -> Option<usize> {
710        crate::search::rfind_bytes(self.as_bytes(), pat.as_ref().as_bytes())
711    }
712
713    /// Returns a string slice with leading and trailing whitespace removed.
714    ///
715    /// # Examples
716    ///
717    /// ```
718    /// use cheetah_string::CheetahString;
719    ///
720    /// let s = CheetahString::from("  hello  ");
721    /// assert_eq!(s.trim(), "hello");
722    /// ```
723    #[inline]
724    pub fn trim(&self) -> &str {
725        self.as_str().trim()
726    }
727
728    /// Returns a string slice with leading whitespace removed.
729    ///
730    /// # Examples
731    ///
732    /// ```
733    /// use cheetah_string::CheetahString;
734    ///
735    /// let s = CheetahString::from("  hello");
736    /// assert_eq!(s.trim_start(), "hello");
737    /// ```
738    #[inline]
739    pub fn trim_start(&self) -> &str {
740        self.as_str().trim_start()
741    }
742
743    /// Returns a string slice with trailing whitespace removed.
744    ///
745    /// # Examples
746    ///
747    /// ```
748    /// use cheetah_string::CheetahString;
749    ///
750    /// let s = CheetahString::from("hello  ");
751    /// assert_eq!(s.trim_end(), "hello");
752    /// ```
753    #[inline]
754    pub fn trim_end(&self) -> &str {
755        self.as_str().trim_end()
756    }
757
758    /// Splits the string by the given pattern.
759    ///
760    /// # Examples
761    ///
762    /// ```
763    /// use cheetah_string::CheetahString;
764    ///
765    /// let s = CheetahString::from("a,b,c");
766    /// let parts: Vec<&str> = s.split(",").collect();
767    /// assert_eq!(parts, vec!["a", "b", "c"]);
768    /// let parts2: Vec<&str> = s.split(',').collect();
769    /// assert_eq!(parts2, vec!["a", "b", "c"]);
770    /// ```
771    #[inline]
772    pub fn split<'a, P>(&'a self, pat: P) -> SplitWrapper<'a>
773    where
774        P: SplitPattern<'a>,
775    {
776        pat.split_str(self.as_str())
777    }
778
779    /// Returns an iterator over the lines of the string.
780    ///
781    /// # Examples
782    ///
783    /// ```
784    /// use cheetah_string::CheetahString;
785    ///
786    /// let s = CheetahString::from("line1\nline2\nline3");
787    /// let lines: Vec<&str> = s.lines().collect();
788    /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
789    /// ```
790    #[inline]
791    pub fn lines(&self) -> impl Iterator<Item = &str> {
792        self.as_str().lines()
793    }
794
795    /// Returns an iterator over the characters of the string.
796    ///
797    /// # Examples
798    ///
799    /// ```
800    /// use cheetah_string::CheetahString;
801    ///
802    /// let s = CheetahString::from("hello");
803    /// let chars: Vec<char> = s.chars().collect();
804    /// assert_eq!(chars, vec!['h', 'e', 'l', 'l', 'o']);
805    /// let reversed: Vec<char> = s.chars().rev().collect();
806    /// assert_eq!(reversed, vec!['o', 'l', 'l', 'e', 'h']);
807    /// ```
808    #[inline]
809    pub fn chars(&self) -> str::Chars<'_> {
810        self.as_str().chars()
811    }
812
813    // Transformation methods - create new CheetahString
814
815    /// Returns a new `CheetahString` with all characters converted to uppercase.
816    ///
817    /// # Examples
818    ///
819    /// ```
820    /// use cheetah_string::CheetahString;
821    ///
822    /// let s = CheetahString::from("hello");
823    /// assert_eq!(s.to_uppercase(), "HELLO");
824    /// ```
825    #[inline]
826    pub fn to_uppercase(&self) -> CheetahString {
827        CheetahString::from_string(self.as_str().to_uppercase())
828    }
829
830    /// Returns a new `CheetahString` with all characters converted to lowercase.
831    ///
832    /// # Examples
833    ///
834    /// ```
835    /// use cheetah_string::CheetahString;
836    ///
837    /// let s = CheetahString::from("HELLO");
838    /// assert_eq!(s.to_lowercase(), "hello");
839    /// ```
840    #[inline]
841    pub fn to_lowercase(&self) -> CheetahString {
842        CheetahString::from_string(self.as_str().to_lowercase())
843    }
844
845    /// Replaces all occurrences of a pattern with another string.
846    ///
847    /// # Examples
848    ///
849    /// ```
850    /// use cheetah_string::CheetahString;
851    ///
852    /// let s = CheetahString::from("hello world");
853    /// assert_eq!(s.replace("world", "rust"), "hello rust");
854    /// ```
855    #[inline]
856    pub fn replace<P: AsRef<str>>(&self, from: P, to: &str) -> CheetahString {
857        CheetahString::from_string(self.as_str().replace(from.as_ref(), to))
858    }
859
860    /// Returns a new `CheetahString` with the specified range replaced.
861    ///
862    /// # Examples
863    ///
864    /// ```
865    /// use cheetah_string::CheetahString;
866    ///
867    /// let s = CheetahString::from("hello world");
868    /// assert_eq!(s.replacen("l", "L", 1), "heLlo world");
869    /// ```
870    #[inline]
871    pub fn replacen<P: AsRef<str>>(&self, from: P, to: &str, count: usize) -> CheetahString {
872        CheetahString::from_string(self.as_str().replacen(from.as_ref(), to, count))
873    }
874
875    /// Returns a substring as a new `CheetahString`.
876    ///
877    /// # Panics
878    ///
879    /// Panics if the indices are not on valid UTF-8 character boundaries.
880    ///
881    /// # Examples
882    ///
883    /// ```
884    /// use cheetah_string::CheetahString;
885    ///
886    /// let s = CheetahString::from("hello world");
887    /// assert_eq!(s.substring(0, 5), "hello");
888    /// assert_eq!(s.substring(6, 11), "world");
889    /// ```
890    #[inline]
891    pub fn substring(&self, start: usize, end: usize) -> CheetahString {
892        CheetahString::from_slice(&self.as_str()[start..end])
893    }
894
895    /// Repeats the string `n` times.
896    ///
897    /// # Examples
898    ///
899    /// ```
900    /// use cheetah_string::CheetahString;
901    ///
902    /// let s = CheetahString::from("abc");
903    /// assert_eq!(s.repeat(3), "abcabcabc");
904    /// ```
905    #[inline]
906    pub fn repeat(&self, n: usize) -> CheetahString {
907        CheetahString::from_string(self.as_str().repeat(n))
908    }
909
910    // Incremental building methods
911
912    /// Creates a new `CheetahString` with the specified capacity.
913    ///
914    /// The string will be able to hold at least `capacity` bytes without reallocating.
915    /// If `capacity` is less than or equal to the inline capacity (23 bytes),
916    /// an empty inline string is returned.
917    ///
918    /// # Examples
919    ///
920    /// ```
921    /// use cheetah_string::CheetahString;
922    ///
923    /// let mut s = CheetahString::with_capacity(100);
924    /// s.push_str("hello");
925    /// assert_eq!(s, "hello");
926    /// ```
927    #[inline]
928    pub fn with_capacity(capacity: usize) -> Self {
929        if capacity <= INLINE_CAPACITY {
930            CheetahString::empty()
931        } else {
932            CheetahString::from_builder_string(String::with_capacity(capacity))
933        }
934    }
935
936    #[inline]
937    fn push_str_internal(&mut self, string: &str) {
938        if string.is_empty() {
939            return;
940        }
941
942        match &mut self.inner {
943            InnerString::Inline { len, data } => {
944                let total_len = *len as usize + string.len();
945                if total_len <= INLINE_CAPACITY {
946                    data[*len as usize..total_len].copy_from_slice(string.as_bytes());
947                    *len = total_len as u8;
948                    return;
949                }
950            }
951            InnerString::Owned(s) => {
952                s.push_str(string);
953                return;
954            }
955            _ => {}
956        }
957
958        let total_len = self.len() + string.len();
959        let mut result = String::with_capacity(total_len);
960        result.push_str(self.as_str());
961        result.push_str(string);
962        *self = CheetahString::from_builder_string(result);
963    }
964
965    /// Appends a string slice to the end of this `CheetahString`.
966    ///
967    /// This method is optimized for incremental building and will:
968    /// - Mutate inline storage when possible
969    /// - Mutate owned heap storage in-place when capacity allows
970    /// - Only allocate when necessary
971    ///
972    /// # Examples
973    ///
974    /// ```
975    /// use cheetah_string::CheetahString;
976    ///
977    /// let mut s = CheetahString::from("Hello");
978    /// s.push_str(" ");
979    /// s.push_str("World");
980    /// assert_eq!(s, "Hello World");
981    /// ```
982    #[inline]
983    pub fn push_str(&mut self, string: &str) {
984        self.push_str_internal(string);
985    }
986
987    /// Reserves capacity for at least `additional` more bytes.
988    ///
989    /// This method will modify the internal representation if needed to ensure
990    /// that the string can hold at least `additional` more bytes without reallocating.
991    ///
992    /// # Examples
993    ///
994    /// ```
995    /// use cheetah_string::CheetahString;
996    ///
997    /// let mut s = CheetahString::from("hello");
998    /// s.reserve(100);
999    /// s.push_str(" world");
1000    /// ```
1001    #[inline]
1002    pub fn reserve(&mut self, additional: usize) {
1003        if additional == 0 {
1004            return;
1005        }
1006
1007        match &mut self.inner {
1008            InnerString::Inline { len, .. } if *len as usize + additional <= INLINE_CAPACITY => {
1009                return;
1010            }
1011            InnerString::Inline { .. } => {}
1012            InnerString::Owned(s) => {
1013                s.reserve(additional);
1014                return;
1015            }
1016            _ => {}
1017        }
1018
1019        let new_len = self.len() + additional;
1020        let mut s = String::with_capacity(new_len);
1021        s.push_str(self.as_str());
1022        *self = CheetahString::from_builder_string(s);
1023    }
1024}
1025
1026impl PartialEq for CheetahString {
1027    #[inline]
1028    fn eq(&self, other: &Self) -> bool {
1029        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
1030        {
1031            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
1032        }
1033        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
1034        {
1035            self.as_str() == other.as_str()
1036        }
1037    }
1038}
1039
1040impl PartialEq<str> for CheetahString {
1041    #[inline]
1042    fn eq(&self, other: &str) -> bool {
1043        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
1044        {
1045            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
1046        }
1047        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
1048        {
1049            self.as_str() == other
1050        }
1051    }
1052}
1053
1054impl PartialEq<String> for CheetahString {
1055    #[inline]
1056    fn eq(&self, other: &String) -> bool {
1057        #[cfg(all(feature = "simd", target_arch = "x86_64"))]
1058        {
1059            crate::simd::eq_bytes(self.as_bytes(), other.as_bytes())
1060        }
1061        #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
1062        {
1063            self.as_str() == other.as_str()
1064        }
1065    }
1066}
1067
1068impl PartialEq<Vec<u8>> for CheetahString {
1069    #[inline]
1070    fn eq(&self, other: &Vec<u8>) -> bool {
1071        self.as_bytes() == other.as_slice()
1072    }
1073}
1074
1075impl<'a> PartialEq<&'a str> for CheetahString {
1076    #[inline]
1077    fn eq(&self, other: &&'a str) -> bool {
1078        self.as_str() == *other
1079    }
1080}
1081
1082impl PartialEq<CheetahString> for str {
1083    #[inline]
1084    fn eq(&self, other: &CheetahString) -> bool {
1085        self == other.as_str()
1086    }
1087}
1088
1089impl PartialEq<CheetahString> for String {
1090    #[inline]
1091    fn eq(&self, other: &CheetahString) -> bool {
1092        self.as_str() == other.as_str()
1093    }
1094}
1095
1096impl PartialEq<CheetahString> for &str {
1097    #[inline]
1098    fn eq(&self, other: &CheetahString) -> bool {
1099        *self == other.as_str()
1100    }
1101}
1102
1103impl Eq for CheetahString {}
1104
1105impl PartialOrd for CheetahString {
1106    #[inline]
1107    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1108        Some(self.cmp(other))
1109    }
1110}
1111
1112impl Ord for CheetahString {
1113    #[inline]
1114    fn cmp(&self, other: &Self) -> Ordering {
1115        self.as_str().cmp(other.as_str())
1116    }
1117}
1118
1119impl Hash for CheetahString {
1120    #[inline]
1121    fn hash<H: Hasher>(&self, state: &mut H) {
1122        self.as_str().hash(state);
1123    }
1124}
1125
1126impl Display for CheetahString {
1127    #[inline]
1128    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1129        self.as_str().fmt(f)
1130    }
1131}
1132
1133impl fmt::Debug for CheetahString {
1134    #[inline]
1135    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1136        fmt::Debug::fmt(self.as_str(), f)
1137    }
1138}
1139
1140impl Borrow<str> for CheetahString {
1141    #[inline]
1142    fn borrow(&self) -> &str {
1143        self.as_str()
1144    }
1145}
1146
1147// Add trait implementations for string concatenation
1148
1149impl Add<&str> for CheetahString {
1150    type Output = CheetahString;
1151
1152    /// Concatenates a `CheetahString` with a string slice.
1153    ///
1154    /// # Examples
1155    ///
1156    /// ```
1157    /// use cheetah_string::CheetahString;
1158    ///
1159    /// let s = CheetahString::from("Hello");
1160    /// let result = s + " World";
1161    /// assert_eq!(result, "Hello World");
1162    /// ```
1163    #[inline]
1164    fn add(mut self, rhs: &str) -> Self::Output {
1165        self.push_str_internal(rhs);
1166        self
1167    }
1168}
1169
1170impl Add<&CheetahString> for CheetahString {
1171    type Output = CheetahString;
1172
1173    /// Concatenates two `CheetahString` values.
1174    ///
1175    /// # Examples
1176    ///
1177    /// ```
1178    /// use cheetah_string::CheetahString;
1179    ///
1180    /// let s1 = CheetahString::from("Hello");
1181    /// let s2 = CheetahString::from(" World");
1182    /// let result = s1 + &s2;
1183    /// assert_eq!(result, "Hello World");
1184    /// ```
1185    #[inline]
1186    fn add(mut self, rhs: &CheetahString) -> Self::Output {
1187        self.push_str_internal(rhs.as_str());
1188        self
1189    }
1190}
1191
1192impl Add<String> for CheetahString {
1193    type Output = CheetahString;
1194
1195    /// Concatenates a `CheetahString` with a `String`.
1196    ///
1197    /// # Examples
1198    ///
1199    /// ```
1200    /// use cheetah_string::CheetahString;
1201    ///
1202    /// let s = CheetahString::from("Hello");
1203    /// let result = s + String::from(" World");
1204    /// assert_eq!(result, "Hello World");
1205    /// ```
1206    #[inline]
1207    fn add(mut self, rhs: String) -> Self::Output {
1208        if self.is_empty() {
1209            return CheetahString::from_string(rhs);
1210        }
1211
1212        self.push_str_internal(&rhs);
1213        self
1214    }
1215}
1216
1217impl AddAssign<&str> for CheetahString {
1218    /// Appends a string slice to a `CheetahString`.
1219    ///
1220    /// # Examples
1221    ///
1222    /// ```
1223    /// use cheetah_string::CheetahString;
1224    ///
1225    /// let mut s = CheetahString::from("Hello");
1226    /// s += " World";
1227    /// assert_eq!(s, "Hello World");
1228    /// ```
1229    #[inline]
1230    fn add_assign(&mut self, rhs: &str) {
1231        self.push_str_internal(rhs);
1232    }
1233}
1234
1235impl AddAssign<&CheetahString> for CheetahString {
1236    /// Appends a `CheetahString` to another `CheetahString`.
1237    ///
1238    /// # Examples
1239    ///
1240    /// ```
1241    /// use cheetah_string::CheetahString;
1242    ///
1243    /// let mut s1 = CheetahString::from("Hello");
1244    /// let s2 = CheetahString::from(" World");
1245    /// s1 += &s2;
1246    /// assert_eq!(s1, "Hello World");
1247    /// ```
1248    #[inline]
1249    fn add_assign(&mut self, rhs: &CheetahString) {
1250        self.push_str_internal(rhs.as_str());
1251    }
1252}
1253
1254/// Maximum capacity for inline string storage (23 bytes + 1 byte for length = 24 bytes total)
1255const INLINE_CAPACITY: usize = 23;
1256
1257/// The `InnerString` enum represents different types of string storage.
1258///
1259/// This enum uses Small String Optimization (SSO) to avoid heap allocations for short strings.
1260///
1261/// Variants:
1262///
1263/// * `Inline` - Inline storage for strings <= 23 bytes (zero heap allocations).
1264/// * `Static(&'static str)` - A static string slice (zero heap allocations).
1265/// * `Shared(Arc<str>)` - A reference-counted string slice (single heap allocation, optimized).
1266/// * `Owned(String)` - An owned heap string used for builder-style mutation.
1267#[derive(Clone)]
1268pub(super) enum InnerString {
1269    /// Inline storage for short strings (up to 23 bytes).
1270    /// Stores the length and data directly without heap allocation.
1271    Inline {
1272        len: u8,
1273        data: [u8; INLINE_CAPACITY],
1274    },
1275    /// Static string slice with 'static lifetime.
1276    Static(&'static str),
1277    /// Reference-counted string slice (single heap allocation).
1278    /// Preferred for long immutable strings created from owned or borrowed data.
1279    Shared(Arc<str>),
1280    /// Owned heap-allocated string used when exclusive mutability matters.
1281    Owned(String),
1282}
1283
1284// Sealed trait pattern to support both &str and char in starts_with/ends_with/contains
1285mod private {
1286    use alloc::string::String;
1287
1288    pub trait Sealed {}
1289    impl Sealed for char {}
1290    impl Sealed for &str {}
1291    impl Sealed for &String {}
1292
1293    pub trait SplitSealed {}
1294    impl SplitSealed for char {}
1295    impl SplitSealed for &str {}
1296}
1297
1298/// A pattern that can be used with `starts_with` and `ends_with` methods.
1299pub trait StrPattern: private::Sealed {
1300    #[doc(hidden)]
1301    fn as_str_pattern(&self) -> StrPatternImpl<'_>;
1302}
1303
1304#[doc(hidden)]
1305pub enum StrPatternImpl<'a> {
1306    Char(char),
1307    Str(&'a str),
1308}
1309
1310impl StrPattern for char {
1311    #[inline]
1312    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1313        StrPatternImpl::Char(*self)
1314    }
1315}
1316
1317impl StrPattern for &str {
1318    #[inline]
1319    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1320        StrPatternImpl::Str(self)
1321    }
1322}
1323
1324impl StrPattern for &String {
1325    #[inline]
1326    fn as_str_pattern(&self) -> StrPatternImpl<'_> {
1327        StrPatternImpl::Str(self.as_str())
1328    }
1329}
1330
1331/// A pattern that can be used with `split` method.
1332pub trait SplitPattern<'a>: private::SplitSealed {
1333    #[doc(hidden)]
1334    fn split_str(self, s: &'a str) -> SplitWrapper<'a>;
1335}
1336
1337impl SplitPattern<'_> for char {
1338    fn split_str(self, s: &str) -> SplitWrapper<'_> {
1339        SplitWrapper::Char(s.split(self))
1340    }
1341}
1342
1343impl<'a> SplitPattern<'a> for &'a str {
1344    fn split_str(self, s: &'a str) -> SplitWrapper<'a> {
1345        let inner = match single_char_pattern(self) {
1346            Some(ch) => SplitStrInner::Char(s.split(ch)),
1347            None => SplitStrInner::Str(s.split(self)),
1348        };
1349
1350        SplitWrapper::Str(SplitStr(inner))
1351    }
1352}
1353
1354/// Helper struct for splitting strings by a string pattern
1355pub struct SplitStr<'a>(SplitStrInner<'a>);
1356
1357enum SplitStrInner<'a> {
1358    Str(str::Split<'a, &'a str>),
1359    Char(str::Split<'a, char>),
1360}
1361
1362#[inline]
1363fn single_char_pattern(pattern: &str) -> Option<char> {
1364    let mut chars = pattern.chars();
1365    let ch = chars.next()?;
1366
1367    if chars.next().is_none() {
1368        Some(ch)
1369    } else {
1370        None
1371    }
1372}
1373
1374impl<'a> Iterator for SplitStr<'a> {
1375    type Item = &'a str;
1376
1377    fn next(&mut self) -> Option<Self::Item> {
1378        match &mut self.0 {
1379            SplitStrInner::Str(iter) => iter.next(),
1380            SplitStrInner::Char(iter) => iter.next(),
1381        }
1382    }
1383}
1384
1385/// Wrapper for split iterator that supports both char and str patterns
1386pub enum SplitWrapper<'a> {
1387    #[doc(hidden)]
1388    Char(str::Split<'a, char>),
1389    #[doc(hidden)]
1390    Str(SplitStr<'a>),
1391}
1392
1393impl<'a> Iterator for SplitWrapper<'a> {
1394    type Item = &'a str;
1395
1396    fn next(&mut self) -> Option<Self::Item> {
1397        match self {
1398            SplitWrapper::Char(iter) => iter.next(),
1399            SplitWrapper::Str(iter) => iter.next(),
1400        }
1401    }
1402}
1403
1404impl<'a> DoubleEndedIterator for SplitWrapper<'a> {
1405    fn next_back(&mut self) -> Option<Self::Item> {
1406        match self {
1407            SplitWrapper::Char(iter) => iter.next_back(),
1408            SplitWrapper::Str(_) => {
1409                // String pattern split doesn't support reverse iteration
1410                // This is consistent with std::str::Split<&str>
1411                panic!("split with string pattern does not support reverse iteration")
1412            }
1413        }
1414    }
1415}
1416
1417#[cfg(test)]
1418mod tests {
1419    use super::*;
1420    use alloc::{format, vec};
1421
1422    #[test]
1423    fn with_capacity_above_inline_uses_heap_storage() {
1424        let s = CheetahString::with_capacity(INLINE_CAPACITY + 8);
1425
1426        match &s.inner {
1427            InnerString::Owned(inner) => {
1428                assert!(inner.capacity() >= INLINE_CAPACITY + 8);
1429            }
1430            other => panic!(
1431                "expected heap-backed storage from with_capacity, got {:?}",
1432                core::mem::discriminant(other)
1433            ),
1434        }
1435    }
1436
1437    #[test]
1438    fn push_str_promotes_builder_growth_to_owned_storage() {
1439        let suffix = "a".repeat(INLINE_CAPACITY);
1440        let expected = format!("hello{suffix}");
1441        let mut s = CheetahString::from("hello");
1442
1443        s.push_str(&suffix);
1444
1445        match &s.inner {
1446            InnerString::Owned(inner) => {
1447                assert_eq!(inner.as_str(), expected.as_str());
1448                assert!(inner.capacity() >= expected.len());
1449            }
1450            other => panic!(
1451                "expected owned heap storage after builder growth, got {:?}",
1452                core::mem::discriminant(other)
1453            ),
1454        }
1455    }
1456
1457    #[test]
1458    fn long_borrowed_str_uses_shared_storage() {
1459        let value = "a".repeat(INLINE_CAPACITY + 1);
1460        let s = CheetahString::from_slice(&value);
1461
1462        match &s.inner {
1463            InnerString::Shared(inner) => assert_eq!(inner.as_ref(), value.as_str()),
1464            other => panic!(
1465                "expected Shared for long borrowed input, got {:?}",
1466                core::mem::discriminant(other)
1467            ),
1468        }
1469    }
1470
1471    #[test]
1472    fn try_from_vec_short_input_uses_inline_storage() {
1473        let s = CheetahString::try_from_vec(b"hello".to_vec()).expect("valid utf-8");
1474
1475        match &s.inner {
1476            InnerString::Inline { len, data } => {
1477                assert_eq!(*len as usize, 5);
1478                assert_eq!(&data[..5], b"hello");
1479            }
1480            other => panic!(
1481                "expected inline storage for short validated Vec<u8>, got {:?}",
1482                core::mem::discriminant(other)
1483            ),
1484        }
1485    }
1486
1487    #[test]
1488    fn long_vec_conversion_uses_owned_storage() {
1489        let value = "a".repeat(INLINE_CAPACITY + 1).into_bytes();
1490        let s = CheetahString::try_from_vec(value).expect("valid utf-8");
1491
1492        match &s.inner {
1493            InnerString::Owned(inner) => {
1494                assert_eq!(inner.len(), INLINE_CAPACITY + 1);
1495                assert_eq!(inner.as_bytes(), vec![b'a'; INLINE_CAPACITY + 1].as_slice());
1496            }
1497            other => panic!(
1498                "expected Owned for long Vec<u8> conversion, got {:?}",
1499                core::mem::discriminant(other)
1500            ),
1501        }
1502    }
1503}