cow_utils/
lib.rs

1// Disable no_std in doctests on stable Rust.
2// See https://github.com/RReverser/cow-utils-rs/pull/1#issuecomment-586973518.
3#![cfg_attr(any(not(doctest), feature = "nightly"), no_std)]
4#![cfg_attr(feature = "nightly", feature(pattern))]
5
6extern crate alloc as std;
7
8use std::{
9    borrow::{Cow, ToOwned},
10    string::String,
11};
12
13/// This trait is a shim for the required functionality
14/// normally provided directly by [`std::str::pattern::Pattern`]
15/// (which is currently unstable).
16///
17/// On stable Rust it's implemented on the same standard types as
18/// [`std::str::pattern::Pattern`], but on nightly you can enable
19/// a `"nightly"` feature and any custom types implementing
20/// [`std::str::pattern::Pattern`] will be supported as well.
21pub trait Pattern<'s> {
22    /// This will always be [`std::str::MatchIndices<'s,
23    /// Self>`](std::str::MatchIndices) but we can't spell it out because it
24    /// requires `Self: `[`std::str::pattern::Pattern`] and that trait bound is
25    /// currently unstable and can't be written in a stable Rust.
26    type MatchIndices: Iterator<Item = (usize, &'s str)>;
27
28    /// A wrapper for [`&str::match_indices`] with a given pattern.
29    fn match_indices_in(self, s: &'s str) -> Self::MatchIndices;
30}
31
32macro_rules! impl_pattern {
33	($ty:ty $(where $($bound:tt)*)?) => {
34		impl<'s $(, $($bound)*)?> Pattern<'s> for $ty {
35			type MatchIndices = std::str::MatchIndices<'s, Self>;
36
37			fn match_indices_in(self, s: &'s str) -> Self::MatchIndices {
38				s.match_indices(self)
39			}
40		}
41	};
42}
43
44#[cfg(not(feature = "nightly"))]
45const _: () = {
46    impl_pattern!(char);
47    impl_pattern!(&str);
48    impl_pattern!(&String);
49    impl_pattern!(&[char]);
50    impl_pattern!(&&str);
51    impl_pattern!(F where F: FnMut(char) -> bool);
52};
53
54#[cfg(feature = "nightly")]
55impl_pattern!(P where P: std::str::pattern::Pattern<'s>);
56
57/// Some [`str`] methods perform destructive transformations and so
58/// return [`String`] even when no modification is necessary.
59///
60/// This helper trait provides drop-in variants of such methods, but
61/// instead avoids allocations when no modification is necessary.
62///
63/// For now only implemented for [`&str`](str) and returns
64/// [`Cow<str>`](std::borrow::Cow), but in the future might be extended
65/// to other types.
66pub trait CowUtils<'s> {
67    type Output;
68
69    /// Replaces all matches of a pattern with another string.
70    fn cow_replace(self, pattern: impl Pattern<'s>, to: &str) -> Self::Output;
71    /// Replaces first N matches of a pattern with another string.
72    fn cow_replacen(self, from: impl Pattern<'s>, to: &str, count: usize) -> Self::Output;
73    /// Returns a copy of this string where each character is mapped to its
74    /// ASCII lower case equivalent.
75    fn cow_to_ascii_lowercase(self) -> Self::Output;
76    /// Returns the lowercase equivalent of this string slice.
77    fn cow_to_lowercase(self) -> Self::Output;
78    /// Returns a copy of this string where each character is mapped to its
79    /// ASCII upper case equivalent.
80    fn cow_to_ascii_uppercase(self) -> Self::Output;
81    /// Returns the uppercase equivalent of this string slice.
82    fn cow_to_uppercase(self) -> Self::Output;
83}
84
85unsafe fn cow_replace<'s>(
86    src: &'s str,
87    match_indices: impl Iterator<Item = (usize, &'s str)>,
88    to: &str,
89) -> Cow<'s, str> {
90    let mut result = Cow::default();
91    let mut last_start = 0;
92    for (index, matched) in match_indices {
93        result += src.get_unchecked(last_start..index);
94        if !to.is_empty() {
95            result.to_mut().push_str(to);
96        }
97        last_start = index + matched.len();
98    }
99    result += src.get_unchecked(last_start..);
100    result
101}
102
103impl<'s> CowUtils<'s> for &'s str {
104    type Output = Cow<'s, str>;
105
106    /// This is similar to [`str::replace`](https://doc.rust-lang.org/std/primitive.str.html#method.replace), but returns
107    /// a slice of the original string when possible:
108    /// ```
109    /// # use cow_utils::CowUtils;
110    /// # use assert_matches::assert_matches;
111    /// # use std::borrow::Cow;
112    /// assert_matches!("abc".cow_replace("def", "ghi"), Cow::Borrowed("abc"));
113    /// assert_matches!("$$str$$".cow_replace("$", ""), Cow::Borrowed("str"));
114    /// assert_matches!("aaaaa".cow_replace("a", ""), Cow::Borrowed(""));
115    /// assert_matches!("abc".cow_replace("b", "d"), Cow::Owned(s) if s == "adc");
116    /// assert_matches!("$a$b$".cow_replace("$", ""), Cow::Owned(s) if s == "ab");
117    /// ```
118    fn cow_replace(self, pattern: impl Pattern<'s>, to: &str) -> Self::Output {
119        unsafe { cow_replace(self, pattern.match_indices_in(self), to) }
120    }
121
122    /// This is similar to [`str::replacen`](https://doc.rust-lang.org/std/primitive.str.html#method.replacen), but returns
123    /// a slice of the original string when possible:
124    /// ```
125    /// # use cow_utils::CowUtils;
126    /// # use assert_matches::assert_matches;
127    /// # use std::borrow::Cow;
128    /// assert_matches!("abc".cow_replacen("def", "ghi", 10), Cow::Borrowed("abc"));
129    /// assert_matches!("$$str$$".cow_replacen("$", "", 2), Cow::Borrowed("str$$"));
130    /// assert_matches!("$a$b$".cow_replacen("$", "", 1), Cow::Borrowed("a$b$"));
131    /// assert_matches!("aaaaa".cow_replacen("a", "", 10), Cow::Borrowed(""));
132    /// assert_matches!("aaaaa".cow_replacen("a", "b", 0), Cow::Borrowed("aaaaa"));
133    /// assert_matches!("abc".cow_replacen("b", "d", 1), Cow::Owned(s) if s == "adc");
134    /// ```
135    fn cow_replacen(self, pattern: impl Pattern<'s>, to: &str, count: usize) -> Self::Output {
136        unsafe { cow_replace(self, pattern.match_indices_in(self).take(count), to) }
137    }
138
139    /// This is similar to [`str::to_ascii_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_lowercase), but returns
140    /// original slice when possible:
141    /// ```
142    /// # use cow_utils::CowUtils;
143    /// # use assert_matches::assert_matches;
144    /// # use std::borrow::Cow;
145    /// assert_matches!("abcd123".cow_to_ascii_lowercase(), Cow::Borrowed("abcd123"));
146    /// assert_matches!("ὀδυσσεύς".cow_to_ascii_lowercase(), Cow::Borrowed("ὀδυσσεύς"));
147    /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_ascii_lowercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ"));
148    /// assert_matches!("AbCd".cow_to_ascii_lowercase(), Cow::Owned(s) if s == "abcd");
149    /// ```
150    fn cow_to_ascii_lowercase(self) -> Self::Output {
151        match self.as_bytes().iter().position(u8::is_ascii_uppercase) {
152            Some(pos) => {
153                let mut output = self.to_owned();
154                // We already know position of the first uppercase char,
155                // so no need to rescan the part before it.
156                unsafe { output.get_unchecked_mut(pos..) }.make_ascii_lowercase();
157                Cow::Owned(output)
158            }
159            None => Cow::Borrowed(self),
160        }
161    }
162
163    /// This is similar to [`str::to_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_lowercase), but returns
164    /// original slice when possible:
165    /// ```
166    /// # use cow_utils::CowUtils;
167    /// # use assert_matches::assert_matches;
168    /// # use std::borrow::Cow;
169    /// assert_matches!("abcd123".cow_to_lowercase(), Cow::Borrowed("abcd123"));
170    /// assert_matches!("ὀδυσσεύς".cow_to_lowercase(), Cow::Borrowed("ὀδυσσεύς"));
171    /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_lowercase(), Cow::Owned(s) if s == "ὀδυσσεύς");
172    /// assert_matches!("AbCd".cow_to_lowercase(), Cow::Owned(s) if s == "abcd");
173    /// assert_matches!("ᾈ".cow_to_lowercase(), Cow::Owned(s) if s == "ᾀ");
174    /// ```
175    fn cow_to_lowercase(self) -> Self::Output {
176        // `str::to_lowercase` has a tricky edgecase with handling of Σ.
177        // We could optimise this by duplicating some code from stdlib,
178        // but it wouldn't be particularly clean, so for now just check
179        // if the string contains any uppercase char and let
180        // `str::to_lowercase` rescan it again.
181        if self.chars().any(changes_when_lowercased) {
182            Cow::Owned(self.to_lowercase())
183        } else {
184            Cow::Borrowed(self)
185        }
186    }
187
188    /// This is similar to [`str::to_ascii_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_uppercase), but returns
189    /// original slice when possible:
190    /// ```
191    /// # use cow_utils::CowUtils;
192    /// # use assert_matches::assert_matches;
193    /// # use std::borrow::Cow;
194    /// assert_matches!("ABCD123".cow_to_ascii_uppercase(), Cow::Borrowed("ABCD123"));
195    /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_ascii_uppercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ"));
196    /// assert_matches!("ὀδυσσεύς".cow_to_ascii_uppercase(), Cow::Borrowed("ὀδυσσεύς"));
197    /// assert_matches!("AbCd".cow_to_ascii_uppercase(), Cow::Owned(s) if s == "ABCD");
198    /// ```
199    fn cow_to_ascii_uppercase(self) -> Self::Output {
200        match self.as_bytes().iter().position(u8::is_ascii_lowercase) {
201            Some(pos) => {
202                let mut output = self.to_owned();
203                // We already know position of the first lowercase char,
204                // so no need to rescan the part before it.
205                unsafe { output.get_unchecked_mut(pos..) }.make_ascii_uppercase();
206                Cow::Owned(output)
207            }
208            None => Cow::Borrowed(self),
209        }
210    }
211
212    /// This is similar to [`str::to_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_uppercase), but returns
213    /// original slice when possible:
214    /// ```
215    /// # use cow_utils::CowUtils;
216    /// # use assert_matches::assert_matches;
217    /// # use std::borrow::Cow;
218    /// assert_matches!("ABCD123".cow_to_uppercase(), Cow::Borrowed("ABCD123"));
219    /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_uppercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ"));
220    /// assert_matches!("ὀδυσσεύς".cow_to_uppercase(), Cow::Owned(s) if s == "ὈΔΥΣΣΕΎΣ");
221    /// assert_matches!("AbCd".cow_to_uppercase(), Cow::Owned(s) if s == "ABCD");
222    /// assert_matches!("ᾈ".cow_to_uppercase(), Cow::Owned(s) if s == "ἈΙ");
223    /// ```
224    fn cow_to_uppercase(self) -> Self::Output {
225        match self.find(changes_when_uppercased) {
226            Some(pos) => {
227                let mut output = String::with_capacity(self.len());
228                // We already know position of the first lowercase char,
229                // so no need to rescan the part before it - just copy it.
230                output.push_str(unsafe { self.get_unchecked(..pos) });
231                output.extend(
232                    unsafe { self.get_unchecked(pos..) }
233                        .chars()
234                        .flat_map(char::to_uppercase),
235                );
236                Cow::Owned(output)
237            }
238            None => Cow::Borrowed(self),
239        }
240    }
241}
242
243fn changes_when_lowercased(c: char) -> bool {
244    !core::iter::once(c).eq(c.to_lowercase())
245}
246
247fn changes_when_uppercased(c: char) -> bool {
248    !core::iter::once(c).eq(c.to_uppercase())
249}