str_utils/
escape_characters.rs

1use alloc::{borrow::Cow, str::from_utf8_unchecked, vec::Vec};
2
3/// To extend `str` and `Cow<str>` to have `escape_characters` and `escape_ascii_characters` method.
4///
5/// Typical use cases include preparing strings for SQL `LIKE` queries or other contexts where certain characters need to be escaped.
6pub trait EscapeCharacters<'a> {
7    /// Escapes all occurrences of the specified characters within a string.
8    ///
9    /// This function scans the input string for any character that matches one of
10    /// the `escaped_characters` or the `escape_character` itself, and prefixes
11    /// those characters with the provided `escape_character`.
12    fn escape_characters(self, escape_character: char, escaped_characters: &[char])
13        -> Cow<'a, str>;
14
15    /// Escapes ASCII characters within a UTF-8 string.
16    ///
17    /// Similar to [`EscapeCharacters::escape_characters`], but operates directly on bytes instead of Unicode scalar values.
18    /// This version is optimized for ASCII-only escaping and avoids unnecessary Unicode conversions.
19    fn escape_ascii_characters(
20        self,
21        escape_character: u8,
22        escaped_characters: &[u8],
23    ) -> Cow<'a, str>;
24}
25
26impl<'a> EscapeCharacters<'a> for &'a str {
27    fn escape_characters(
28        self,
29        escape_character: char,
30        escaped_characters: &[char],
31    ) -> Cow<'a, str> {
32        let s = self;
33
34        if escaped_characters.is_empty() {
35            return Cow::Borrowed(s);
36        }
37
38        let mut p = 0;
39
40        let mut chars = s.chars();
41
42        let need_escape = |c: char| {
43            c == escape_character
44                || escaped_characters.iter().any(|escaped_character| c.eq(escaped_character))
45        };
46
47        let first_c = loop {
48            let c = if let Some(c) = chars.next() {
49                c
50            } else {
51                return Cow::Borrowed(s);
52            };
53
54            if need_escape(c) {
55                break c;
56            }
57
58            p += c.len_utf8();
59        };
60
61        let mut new_s = String::with_capacity(s.len() + 1);
62
63        new_s.push_str(unsafe { from_utf8_unchecked(&s.as_bytes()[0..p]) });
64        new_s.push(escape_character);
65        new_s.push(first_c);
66
67        for c in chars {
68            if need_escape(c) {
69                new_s.push(escape_character);
70            }
71
72            new_s.push(c);
73        }
74
75        Cow::Owned(new_s)
76    }
77
78    fn escape_ascii_characters(
79        self,
80        escape_character: u8,
81        escaped_characters: &[u8],
82    ) -> Cow<'a, str> {
83        let s = self;
84
85        if escaped_characters.is_empty() {
86            return Cow::Borrowed(s);
87        }
88
89        let bytes = s.as_bytes();
90
91        let length = bytes.len();
92
93        let mut p = 0;
94
95        let need_escape = |b: u8| {
96            b == escape_character
97                || escaped_characters.iter().any(|escaped_character| b.eq(escaped_character))
98        };
99
100        loop {
101            if p == length {
102                return Cow::Borrowed(s);
103            }
104
105            let e = bytes[p];
106
107            let width = unsafe { utf8_width::get_width_assume_valid(e) };
108
109            if width == 1 && need_escape(e) {
110                break;
111            }
112
113            p += width;
114        }
115
116        let mut new_v = Vec::with_capacity(bytes.len() + 1);
117
118        new_v.extend_from_slice(&bytes[..p]);
119        new_v.push(escape_character);
120
121        let mut start = p;
122
123        p += 1;
124
125        loop {
126            if p == length {
127                break;
128            }
129
130            let e = bytes[p];
131
132            let width = unsafe { utf8_width::get_width_assume_valid(e) };
133
134            if width == 1 && need_escape(e) {
135                new_v.extend_from_slice(&bytes[start..p]);
136                start = p + 1;
137
138                new_v.push(escape_character);
139                new_v.push(e);
140            }
141
142            p += width;
143        }
144
145        new_v.extend_from_slice(&bytes[start..p]);
146
147        Cow::Owned(unsafe { String::from_utf8_unchecked(new_v) })
148    }
149}
150
151impl<'a> EscapeCharacters<'a> for Cow<'a, str> {
152    #[inline]
153    fn escape_characters(
154        self,
155        escape_character: char,
156        escaped_characters: &[char],
157    ) -> Cow<'a, str> {
158        match self {
159            Cow::Borrowed(s) => s.escape_characters(escape_character, escaped_characters),
160            Cow::Owned(s) => {
161                match s.escape_characters(escape_character, escaped_characters) {
162                    Cow::Borrowed(_) => {
163                        // it changes nothing
164                        // if there were any characters that needed to be escaped, it had to be `Cow::Owned`
165                        Cow::Owned(s)
166                    },
167                    Cow::Owned(s) => Cow::Owned(s),
168                }
169            },
170        }
171    }
172
173    #[inline]
174    fn escape_ascii_characters(
175        self,
176        escape_character: u8,
177        escaped_characters: &[u8],
178    ) -> Cow<'a, str> {
179        match self {
180            Cow::Borrowed(s) => s.escape_ascii_characters(escape_character, escaped_characters),
181            Cow::Owned(s) => {
182                match s.escape_ascii_characters(escape_character, escaped_characters) {
183                    Cow::Borrowed(_) => {
184                        // it changes nothing
185                        // if there were any characters that needed to be escaped, it had to be `Cow::Owned`
186                        Cow::Owned(s)
187                    },
188                    Cow::Owned(s) => Cow::Owned(s),
189                }
190            },
191        }
192    }
193}