eztd_core/string/
mod.rs

1mod bytes;
2mod inline;
3mod shared;
4
5use std::cmp::Ordering;
6use std::fmt;
7use std::hash::{Hash, Hasher};
8use std::iter::FromIterator;
9use std::ops;
10
11use shared::SharedString;
12
13const TAG_SIZE: usize = 1;
14const CAPACITY: usize = std::mem::size_of::<SharedString>() - TAG_SIZE;
15type InlineString = inline::InlineString<CAPACITY>;
16type StdString = std::string::String;
17
18pub use bytes::Bytes;
19
20#[derive(Clone)]
21pub struct String(StringInner);
22
23#[derive(Clone)]
24enum StringInner {
25    Empty,
26    Inline(InlineString),
27    Shared(SharedString),
28}
29
30impl String {
31    /// Creates a new empty `String`.
32    ///
33    /// Given that the `String` is empty, this will not allocate any initial
34    /// buffer. While that means that this initial operation is very
35    /// inexpensive, it may cause excessive allocation later when you add
36    /// data.
37    ///
38    /// # Examples
39    ///
40    /// Basic usage:
41    ///
42    /// ```
43    /// let s = eztd_core::String::new();
44    /// ```
45    #[inline]
46    pub const fn new() -> Self {
47        Self(StringInner::Empty)
48    }
49
50    /// Returns the length of this `String`, in bytes, not [`char`]s or
51    /// graphemes. In other words, it may not be what a human considers the
52    /// length of the string.
53    ///
54    /// # Examples
55    ///
56    /// Basic usage:
57    ///
58    /// ```
59    /// let a = eztd_core::String::from("foo");
60    /// assert_eq!(a.byte_len(), 3);
61    /// ```
62    #[inline]
63    pub fn byte_len(&self) -> usize {
64        self.as_str().len()
65    }
66
67    /// Returns `true` if `self` has a length of zero bytes.
68    ///
69    /// # Examples
70    ///
71    /// Basic usage:
72    ///
73    /// ```
74    /// let s = eztd_core::String::from("");
75    /// assert!(s.is_empty());
76    ///
77    /// let s = eztd_core::String::from("not empty");
78    /// assert!(!s.is_empty());
79    /// ```
80    #[inline]
81    pub fn is_empty(&self) -> bool {
82        self.as_str().is_empty()
83    }
84
85    /// Returns the length of this `String`, in bytes, not [`char`]s or
86    /// graphemes. In other words, it may not be what a human considers the
87    /// length of the string.
88    ///
89    /// # Examples
90    ///
91    /// Basic usage:
92    ///
93    /// ```
94    /// let a = eztd_core::String::from("foo");
95    /// assert_eq!(a.char_len(), 3);
96    /// ```
97    #[inline]
98    pub fn char_len(&self) -> usize {
99        self.as_str().chars().count()
100    }
101
102    #[inline]
103    #[deprecated = "Use either `byte_len` or `char_len` to be more explicit on meaning"]
104    pub fn len(&self) -> usize {
105        self.byte_len()
106    }
107
108    /// Returns a subslice of `String`.
109    ///
110    /// This is the non-panicking alternative to indexing the `String`. Returns
111    /// [`None`] whenever equivalent indexing operation would panic.
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// let v = eztd_core::String::from("Hello World");
117    ///
118    /// assert_eq!(Some(eztd_core::String::from("Hell")), v.get(0..4));
119    /// ```
120    #[inline]
121    pub fn get(&self, range: impl std::ops::RangeBounds<usize>) -> Option<Self> {
122        match self.coerce_range(range) {
123            Some(range) => self.as_str().get(range).map(|s| self.own_str(s)),
124            None => Some(String::new()),
125        }
126    }
127
128    /// Divide one string slice into two at an index.
129    ///
130    /// The argument, `mid`, should be a byte offset from the start of the
131    /// string. It must also be on the boundary of a UTF-8 code point.
132    ///
133    /// The two slices returned go from the start of the string slice to `mid`,
134    /// and from `mid` to the end of the string slice.
135    ///
136    /// To get mutable string slices instead, see the [`split_at_mut`]
137    /// method.
138    ///
139    /// [`split_at_mut`]: str::split_at_mut
140    ///
141    /// # Panics
142    ///
143    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
144    /// past the end of the last code point of the string slice.
145    ///
146    /// # Examples
147    ///
148    /// Basic usage:
149    ///
150    /// ```
151    /// let s = eztd_core::String::from("Per Martin");
152    ///
153    /// let (first, last) = s.split_at(3);
154    ///
155    /// assert_eq!("Per", first);
156    /// assert_eq!(" Martin", last);
157    /// ```
158    #[inline]
159    pub fn split_at(&self, mid: usize) -> (Self, Self) {
160        let (left, right) = self.as_str().split_at(mid);
161        (self.own_str(left), self.own_str(right))
162    }
163
164    /// An iterator over the bytes of a string slice.
165    ///
166    /// As a string slice consists of a sequence of bytes, we can iterate
167    /// through a string slice by byte. This method returns such an iterator.
168    ///
169    /// # Examples
170    ///
171    /// Basic usage:
172    ///
173    /// ```
174    /// let mut bytes = eztd_core::String::from("bors").bytes();
175    ///
176    /// assert_eq!(Some(b'b'), bytes.next());
177    /// assert_eq!(Some(b'o'), bytes.next());
178    /// assert_eq!(Some(b'r'), bytes.next());
179    /// assert_eq!(Some(b's'), bytes.next());
180    ///
181    /// assert_eq!(None, bytes.next());
182    /// ```
183    #[inline]
184    pub fn bytes(&self) -> Bytes {
185        Bytes::new(self.clone())
186    }
187
188    /// Returns a string slice with leading whitespace removed.
189    ///
190    /// 'Whitespace' is defined according to the terms of the Unicode Derived
191    /// Core Property `White_Space`.
192    ///
193    /// # Text directionality
194    ///
195    /// A string is a sequence of bytes. `start` in this context means the first
196    /// position of that byte string; for a left-to-right language like English or
197    /// Russian, this will be left side, and for right-to-left languages like
198    /// Arabic or Hebrew, this will be the right side.
199    ///
200    /// # Examples
201    ///
202    /// Basic usage:
203    ///
204    /// ```
205    /// let s = " Hello\tworld\t";
206    /// assert_eq!("Hello\tworld\t", s.trim_start());
207    /// ```
208    #[inline]
209    #[must_use = "this returns the trimmed string as a new string, \
210                  without modifying the original"]
211    pub fn trim_start(&self) -> Self {
212        self.own_str(self.as_str().trim_start())
213    }
214
215    /// Appends a given string onto the end of this `String`.
216    ///
217    /// # Examples
218    ///
219    /// Basic usage:
220    ///
221    /// ```
222    /// let s = eztd_core::String::from("foo");
223    ///
224    /// let s = s.join_str("bar");
225    /// assert_eq!("foobar", s);
226    ///
227    /// let baz = eztd_core::String::from("baz");
228    /// let s = s.join_str(baz);
229    ///
230    /// assert_eq!("foobarbaz", s);
231    /// ```
232    #[inline]
233    #[must_use = "this returns the trimmed string as a new string, \
234                  without modifying the original"]
235    pub fn join_str(&self, string: impl AsRef<str>) -> Self {
236        let mut buffer = StdString::from(self.as_str());
237        buffer.push_str(string.as_ref());
238        Self::from(buffer.as_str())
239    }
240
241    /// Appends the given [`char`] to the end of this `String`.
242    ///
243    /// # Examples
244    ///
245    /// Basic usage:
246    ///
247    /// ```
248    /// let s = eztd_core::String::from("abc");
249    ///
250    /// let s = s.join_char('1').join_char('2').join_char('3');
251    ///
252    /// assert_eq!("abc123", s);
253    /// ```
254    #[inline]
255    #[must_use = "this returns the trimmed string as a new string, \
256                  without modifying the original"]
257    pub fn join_char(&self, ch: char) -> Self {
258        let mut buffer = StdString::from(self.as_str());
259        buffer.push(ch);
260        Self::from(buffer.as_str())
261    }
262
263    /// Shrinks the capacity of this `String` to match its length.
264    ///
265    /// # Examples
266    ///
267    /// Basic usage:
268    ///
269    /// ```
270    /// let s = eztd_core::String::from("foo");
271    ///
272    /// let s = s.shrink_to_fit();
273    /// ```
274    #[inline]
275    #[must_use = "this returns the trimmed string as a new string, \
276                  without modifying the original"]
277    pub fn shrink_to_fit(&self) -> String {
278        String::from(self.as_str())
279    }
280
281    fn own_str(&self, subset: &str) -> Self {
282        if subset.is_empty() {
283            String::new()
284        } else {
285            match &self.0 {
286                StringInner::Empty => String::new(),
287                StringInner::Inline(s) => s.own_str(subset).into(),
288                StringInner::Shared(s) => s.own_str(subset).into(),
289            }
290        }
291    }
292
293    fn coerce_range(
294        &self,
295        range: impl std::ops::RangeBounds<usize>,
296    ) -> Option<std::ops::RangeInclusive<usize>> {
297        let len = self.byte_len();
298        if len == 0 {
299            return None;
300        }
301
302        let range_start = match range.start_bound() {
303            std::ops::Bound::Included(s) => *s,
304            std::ops::Bound::Excluded(s) => {
305                if *s == usize::MAX {
306                    return None;
307                } else {
308                    s + 1
309                }
310            }
311            std::ops::Bound::Unbounded => 0,
312        };
313        let range_end = match range.end_bound() {
314            std::ops::Bound::Included(s) => *s,
315            std::ops::Bound::Excluded(s) => {
316                if *s == 0 {
317                    return None;
318                } else {
319                    s - 1
320                }
321            }
322            std::ops::Bound::Unbounded => usize::MAX,
323        }
324        .min(len - 1);
325
326        if len <= range_start || range_end < range_start {
327            None
328        } else {
329            Some(range_start..=range_end)
330        }
331    }
332}
333
334/// Transitional Python API
335impl String {
336    #[deprecated = "In Rust, we refer to this as `trim_start`"]
337    pub fn lstrip(&self) -> Self {
338        self.trim_start()
339    }
340}
341
342/// Interop
343impl String {
344    /// Extracts a string slice containing the entire `String`.
345    ///
346    /// # Examples
347    ///
348    /// Basic usage:
349    ///
350    /// ```
351    /// let s = eztd_core::String::from("foo");
352    ///
353    /// assert_eq!("foo", s.as_str());
354    /// ```
355    #[inline]
356    pub fn as_str(&self) -> &str {
357        match &self.0 {
358            StringInner::Empty => "",
359            StringInner::Inline(s) => s.as_str(),
360            StringInner::Shared(s) => s.as_str(),
361        }
362    }
363}
364
365pub(crate) fn calculate_subset(s: &str, subset: &str) -> (usize, usize) {
366    unsafe {
367        let self_start = s.as_ptr();
368        let self_end = self_start.add(s.len());
369
370        let subset_start = subset.as_ptr();
371        let subset_end = subset_start.add(subset.len());
372        debug_assert!(self_start <= subset_start);
373        debug_assert!(subset_end <= self_end);
374
375        let start = subset_start.offset_from(self_start) as usize;
376        let end = subset_end.offset_from(self_start) as usize;
377        (start, end)
378    }
379}
380
381impl Default for String {
382    #[inline]
383    fn default() -> Self {
384        Self::new()
385    }
386}
387
388impl From<InlineString> for String {
389    #[inline]
390    fn from(other: InlineString) -> Self {
391        Self(StringInner::Inline(other))
392    }
393}
394
395impl From<SharedString> for String {
396    #[inline]
397    fn from(other: SharedString) -> Self {
398        Self(StringInner::Shared(other))
399    }
400}
401
402impl<'s> From<&'s str> for String {
403    #[inline]
404    fn from(other: &'s str) -> Self {
405        match other.len() {
406            0 => String::new(),
407            len if len <= CAPACITY => InlineString::from(other).into(),
408            _ => SharedString::from(other).into(),
409        }
410    }
411}
412
413impl From<StdString> for String {
414    #[inline]
415    fn from(other: StdString) -> Self {
416        other.as_str().into()
417    }
418}
419
420impl From<char> for String {
421    #[inline]
422    fn from(other: char) -> Self {
423        Self::new().join_char(other)
424    }
425}
426
427impl std::str::FromStr for String {
428    type Err = core::convert::Infallible;
429    #[inline]
430    fn from_str(s: &str) -> Result<String, Self::Err> {
431        Ok(String::from(s))
432    }
433}
434
435impl<'s> From<&'s StdString> for String {
436    #[inline]
437    fn from(other: &'s StdString) -> Self {
438        other.as_str().into()
439    }
440}
441
442impl FromIterator<char> for String {
443    fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> String {
444        let s = StdString::from_iter(iter);
445        String::from(&s)
446    }
447}
448
449impl<'a> FromIterator<&'a char> for String {
450    fn from_iter<I: IntoIterator<Item = &'a char>>(iter: I) -> String {
451        let s = StdString::from_iter(iter);
452        String::from(&s)
453    }
454}
455
456impl<'a> FromIterator<&'a str> for String {
457    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> String {
458        let s = StdString::from_iter(iter);
459        String::from(&s)
460    }
461}
462
463impl FromIterator<StdString> for String {
464    fn from_iter<I: IntoIterator<Item = StdString>>(iter: I) -> String {
465        let s = StdString::from_iter(iter);
466        String::from(&s)
467    }
468}
469
470/// Implements the `+` operator for concatenating two strings.
471///
472/// This consumes the `String` on the left-hand side and re-uses its buffer (growing it if
473/// necessary). This is done to avoid allocating a new `String` and copying the entire contents on
474/// every operation, which would lead to *O*(*n*^2) running time when building an *n*-byte string by
475/// repeated concatenation.
476///
477/// The string on the right-hand side is only borrowed; its contents are copied into the returned
478/// `String`.
479///
480/// # Examples
481///
482/// Concatenating two `String`s takes the first by value and borrows the second:
483///
484/// ```
485/// let a = eztd_core::String::from("hello");
486/// let b = eztd_core::String::from(" world");
487/// let c = &a + &b + "foo";
488/// ```
489impl<'s, S: AsRef<str>> std::ops::Add<S> for &'s String {
490    type Output = String;
491
492    #[inline]
493    fn add(self, other: S) -> String {
494        let other = other.as_ref();
495        self.join_str(other)
496    }
497}
498impl<S: AsRef<str>> std::ops::Add<S> for String {
499    type Output = String;
500
501    #[inline]
502    fn add(self, other: S) -> String {
503        let other = other.as_ref();
504        self.join_str(other)
505    }
506}
507
508// TODO: Determine policy
509// - Should we index by bytes or chars?
510// - Should we do python-style negative numbers?
511impl ops::Index<ops::Range<usize>> for String {
512    type Output = str;
513
514    #[inline]
515    fn index(&self, index: ops::Range<usize>) -> &str {
516        self.coerce_range(index)
517            .map(|index| &self.as_str()[index])
518            .unwrap_or_default()
519    }
520}
521impl ops::Index<ops::RangeTo<usize>> for String {
522    type Output = str;
523
524    #[inline]
525    fn index(&self, index: ops::RangeTo<usize>) -> &str {
526        self.coerce_range(index)
527            .map(|index| &self.as_str()[index])
528            .unwrap_or_default()
529    }
530}
531impl ops::Index<ops::RangeFrom<usize>> for String {
532    type Output = str;
533
534    #[inline]
535    fn index(&self, index: ops::RangeFrom<usize>) -> &str {
536        self.coerce_range(index)
537            .map(|index| &self.as_str()[index])
538            .unwrap_or_default()
539    }
540}
541impl ops::Index<ops::RangeFull> for String {
542    type Output = str;
543
544    #[inline]
545    fn index(&self, index: ops::RangeFull) -> &str {
546        self.coerce_range(index)
547            .map(|index| &self.as_str()[index])
548            .unwrap_or_default()
549    }
550}
551impl ops::Index<ops::RangeInclusive<usize>> for String {
552    type Output = str;
553
554    #[inline]
555    fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
556        self.coerce_range(index)
557            .map(|index| &self.as_str()[index])
558            .unwrap_or_default()
559    }
560}
561impl ops::Index<ops::RangeToInclusive<usize>> for String {
562    type Output = str;
563
564    #[inline]
565    fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
566        self.coerce_range(index)
567            .map(|index| &self.as_str()[index])
568            .unwrap_or_default()
569    }
570}
571
572impl fmt::Display for String {
573    #[inline]
574    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
575        fmt::Display::fmt(self.as_str(), f)
576    }
577}
578
579impl fmt::Debug for String {
580    #[inline]
581    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
582        fmt::Debug::fmt(self.as_str(), f)
583    }
584}
585
586impl PartialEq for String {
587    fn eq(&self, other: &Self) -> bool {
588        self.as_str() == other.as_str()
589    }
590}
591
592impl Eq for String {}
593
594macro_rules! impl_eq {
595    ($lhs:ty, $rhs: ty) => {
596        #[allow(unused_lifetimes)]
597        impl<'a, 'b> PartialEq<$rhs> for $lhs {
598            #[inline]
599            fn eq(&self, other: &$rhs) -> bool {
600                PartialEq::eq(&self[..], &other[..])
601            }
602        }
603
604        #[allow(unused_lifetimes)]
605        impl<'a, 'b> PartialEq<$lhs> for $rhs {
606            #[inline]
607            fn eq(&self, other: &$lhs) -> bool {
608                PartialEq::eq(&self[..], &other[..])
609            }
610        }
611    };
612}
613
614impl_eq! { String, str }
615impl_eq! { String, &'a str }
616impl_eq! { String, StdString }
617impl_eq! { String, &'a StdString }
618
619impl PartialOrd for String {
620    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
621        Some(self.cmp(other))
622    }
623}
624
625impl Ord for String {
626    fn cmp(&self, other: &Self) -> Ordering {
627        self.as_str().cmp(other.as_str())
628    }
629}
630
631impl Hash for String {
632    fn hash<H: Hasher>(&self, state: &mut H) {
633        self.as_str().hash(state);
634    }
635}
636
637impl AsRef<str> for String {
638    #[inline]
639    fn as_ref(&self) -> &str {
640        self.as_str()
641    }
642}
643
644#[cfg(test)]
645mod test_coerce_range {
646    use super::*;
647
648    #[test]
649    fn empty() {
650        let fixture = "";
651        let outside = 10;
652        assert_eq!(String::from(fixture).coerce_range(..), None);
653        assert_eq!(String::from(fixture).coerce_range(0..), None);
654        assert_eq!(String::from(fixture).coerce_range(outside..), None);
655        assert_eq!(String::from(fixture).coerce_range(..outside), None);
656        assert_eq!(String::from(fixture).coerce_range(..0), None);
657        assert_eq!(String::from(fixture).coerce_range(0..0), None);
658        assert_eq!(String::from(fixture).coerce_range(0..outside), None);
659        assert_eq!(String::from(fixture).coerce_range(outside..0), None);
660        assert_eq!(String::from(fixture).coerce_range(0..=0), None);
661        assert_eq!(String::from(fixture).coerce_range(0..=outside), None);
662        assert_eq!(String::from(fixture).coerce_range(outside..=0), None);
663    }
664
665    #[test]
666    fn non_empty() {
667        let fixture = "Hello";
668        let inside = 3;
669        assert!(inside < fixture.len());
670        let outside = 10;
671        assert!(fixture.len() < outside);
672
673        assert_eq!(String::from(fixture).coerce_range(..), Some(0..=4));
674        assert_eq!(String::from(fixture).coerce_range(0..), Some(0..=4));
675        assert_eq!(String::from(fixture).coerce_range(inside..), Some(3..=4));
676        assert_eq!(String::from(fixture).coerce_range(outside..), None);
677        assert_eq!(String::from(fixture).coerce_range(..inside), Some(0..=2));
678        assert_eq!(String::from(fixture).coerce_range(..outside), Some(0..=4));
679        assert_eq!(String::from(fixture).coerce_range(..0), None);
680        assert_eq!(String::from(fixture).coerce_range(0..0), None);
681        assert_eq!(String::from(fixture).coerce_range(0..inside), Some(0..=2));
682        assert_eq!(String::from(fixture).coerce_range(0..outside), Some(0..=4));
683        assert_eq!(
684            String::from(fixture).coerce_range(inside..outside),
685            Some(3..=4)
686        );
687        assert_eq!(String::from(fixture).coerce_range(inside..0), None);
688        assert_eq!(String::from(fixture).coerce_range(outside..0), None);
689        assert_eq!(String::from(fixture).coerce_range(outside..inside), None);
690        assert_eq!(String::from(fixture).coerce_range(0..=0), Some(0..=0));
691        assert_eq!(String::from(fixture).coerce_range(0..=inside), Some(0..=3));
692        assert_eq!(String::from(fixture).coerce_range(0..=outside), Some(0..=4));
693        assert_eq!(
694            String::from(fixture).coerce_range(inside..=outside),
695            Some(3..=4)
696        );
697        assert_eq!(String::from(fixture).coerce_range(inside..=0), None);
698        assert_eq!(String::from(fixture).coerce_range(outside..=0), None);
699        assert_eq!(String::from(fixture).coerce_range(outside..=inside), None);
700    }
701}