iri_string/parser/
str.rs

1//! Functions for common string operations.
2
3pub(crate) use self::maybe_pct_encoded::{
4    process_percent_encoded_best_effort, PctEncodedFragments,
5};
6
7mod maybe_pct_encoded;
8
9/// Returns the inner string if wrapped.
10#[must_use]
11pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
12    let (prefix, suffix) = match s.as_bytes() {
13        [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
14        _ => return None,
15    };
16    if (prefix == open) && (suffix == close) {
17        Some(&s[1..(s.len() - 1)])
18    } else {
19        None
20    }
21}
22
23/// Returns the byte that appears first.
24#[cfg(not(feature = "memchr"))]
25#[inline]
26#[must_use]
27pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
28    haystack
29        .iter()
30        .copied()
31        .find(|&b| b == needle1 || b == needle2)
32}
33
34/// Returns the byte that appears first.
35#[cfg(feature = "memchr")]
36#[inline]
37#[must_use]
38pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
39    memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
40}
41
42/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
43#[cfg(not(feature = "memchr"))]
44#[inline]
45#[must_use]
46pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
47    haystack.iter().rposition(|&b| b == needle)
48}
49
50/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
51#[cfg(feature = "memchr")]
52#[inline]
53#[must_use]
54pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
55    memchr::memrchr(needle, haystack)
56}
57
58/// Finds the first needle, and returns the string before it and the rest.
59///
60/// If `needle` is not found, returns `None`.
61#[cfg(not(feature = "memchr"))]
62#[must_use]
63pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
64    haystack
65        .bytes()
66        .position(|b| b == needle)
67        .map(|pos| haystack.split_at(pos))
68}
69
70/// Finds the first needle, and returns the string before it and the rest.
71///
72/// If `needle` is not found, returns `None`.
73#[cfg(feature = "memchr")]
74#[must_use]
75pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
76    memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
77}
78
79/// Finds the last needle, and returns the string before it and the rest.
80///
81/// If no needles are found, returns `None`.
82#[cfg(not(feature = "memchr"))]
83#[must_use]
84pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
85    haystack
86        .bytes()
87        .rposition(|b| b == needle1 || b == needle2)
88        .map(|pos| haystack.split_at(pos))
89}
90
91/// Finds the last needle, and returns the string before it and the rest.
92///
93/// If no needles are found, returns `None`.
94#[cfg(feature = "memchr")]
95#[must_use]
96pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
97    memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
98}
99
100/// Finds the first needle, and returns the string before it and the rest.
101///
102/// If no needles are found, returns `None`.
103#[cfg(not(feature = "memchr"))]
104#[must_use]
105pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
106    haystack
107        .bytes()
108        .position(|b| b == needle1 || b == needle2)
109        .map(|pos| haystack.split_at(pos))
110}
111
112/// Finds the first needle, and returns the string before it and the rest.
113///
114/// If no needles are found, returns `None`.
115#[cfg(feature = "memchr")]
116#[must_use]
117pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
118    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
119}
120
121/// Finds the first needle, and returns the string before it and the rest.
122///
123/// If no needles are found, returns `None`.
124#[cfg(not(feature = "memchr"))]
125#[must_use]
126pub(crate) fn find_split3(
127    haystack: &str,
128    needle1: u8,
129    needle2: u8,
130    needle3: u8,
131) -> Option<(&str, &str)> {
132    haystack
133        .bytes()
134        .position(|b| b == needle1 || b == needle2 || b == needle3)
135        .map(|pos| haystack.split_at(pos))
136}
137
138/// Finds the first needle, and returns the string before it and the rest.
139///
140/// If no needles are found, returns `None`.
141#[cfg(feature = "memchr")]
142#[must_use]
143pub(crate) fn find_split3(
144    haystack: &str,
145    needle1: u8,
146    needle2: u8,
147    needle3: u8,
148) -> Option<(&str, &str)> {
149    memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
150        .map(|pos| haystack.split_at(pos))
151}
152
153/// Finds the first needle, and returns the string before it and after it.
154///
155/// If `needle` is not found, returns `None`.
156#[cfg(not(feature = "memchr"))]
157#[must_use]
158pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
159    haystack
160        .bytes()
161        .position(|b| b == needle)
162        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
163}
164
165/// Finds the first needle, and returns the string before it and after it.
166///
167/// If `needle` is not found, returns `None`.
168#[cfg(feature = "memchr")]
169#[must_use]
170pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
171    memchr::memchr(needle, haystack.as_bytes())
172        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
173}
174
175/// Finds the first needle, and returns the string before it, the needle, and the string after it.
176///
177/// If no needles are found, returns `None`.
178#[cfg(not(feature = "memchr"))]
179#[must_use]
180pub(crate) fn find_split2_hole(
181    haystack: &str,
182    needle1: u8,
183    needle2: u8,
184) -> Option<(&str, u8, &str)> {
185    haystack
186        .bytes()
187        .position(|b| b == needle1 || b == needle2)
188        .map(|pos| {
189            (
190                &haystack[..pos],
191                haystack.as_bytes()[pos],
192                &haystack[(pos + 1)..],
193            )
194        })
195}
196
197/// Finds the first needle, and returns the string before it, the needle, and the string after it.
198///
199/// If no needles are found, returns `None`.
200#[cfg(feature = "memchr")]
201#[must_use]
202pub(crate) fn find_split2_hole(
203    haystack: &str,
204    needle1: u8,
205    needle2: u8,
206) -> Option<(&str, u8, &str)> {
207    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
208        (
209            &haystack[..pos],
210            haystack.as_bytes()[pos],
211            &haystack[(pos + 1)..],
212        )
213    })
214}
215
216/// Finds the first needle, and returns the string before it, the needle, and the string after it.
217///
218/// If no needles are found, returns `None`.
219#[cfg(not(feature = "memchr"))]
220#[must_use]
221pub(crate) fn find_split4_hole(
222    haystack: &str,
223    needle1: u8,
224    needle2: u8,
225    needle3: u8,
226    needle4: u8,
227) -> Option<(&str, u8, &str)> {
228    haystack
229        .bytes()
230        .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
231        .map(|pos| {
232            (
233                &haystack[..pos],
234                haystack.as_bytes()[pos],
235                &haystack[(pos + 1)..],
236            )
237        })
238}
239
240/// Finds the first needle, and returns the string before it, the needle, and the string after it.
241///
242/// If no needles are found, returns `None`.
243#[cfg(feature = "memchr")]
244#[must_use]
245pub(crate) fn find_split4_hole(
246    haystack: &str,
247    needle1: u8,
248    needle2: u8,
249    needle3: u8,
250    needle4: u8,
251) -> Option<(&str, u8, &str)> {
252    let bytes = haystack.as_bytes();
253    let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
254        Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
255        None => memchr::memchr(needle4, bytes),
256    };
257    pos.map(|pos| {
258        (
259            &haystack[..pos],
260            haystack.as_bytes()[pos],
261            &haystack[(pos + 1)..],
262        )
263    })
264}
265
266/// Finds the last needle, and returns the string before it and after it.
267///
268/// If `needle` is not found, returns `None`.
269#[cfg(not(feature = "memchr"))]
270#[must_use]
271pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
272    haystack
273        .bytes()
274        .rposition(|b| b == needle)
275        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
276}
277
278/// Finds the last needle, and returns the string before it and after it.
279///
280/// If `needle` is not found, returns `None`.
281#[cfg(feature = "memchr")]
282#[must_use]
283pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
284    memchr::memrchr(needle, haystack.as_bytes())
285        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
286}
287
288/// Returns `true` if the string only contains the allowed characters.
289#[must_use]
290fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool
291where
292    F: Copy + Fn(u8) -> bool,
293    G: Copy + Fn(char) -> bool,
294{
295    while !s.is_empty() {
296        match s.bytes().position(|b| !b.is_ascii()) {
297            Some(nonascii_pos) => {
298                // Valdiate ASCII prefix.
299                if nonascii_pos != 0 {
300                    let (prefix, rest) = s.split_at(nonascii_pos);
301                    if !prefix.bytes().all(pred_ascii) {
302                        return false;
303                    }
304                    s = rest;
305                }
306
307                // Extract non-ASCII part and validate it.
308                let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {
309                    Some(ascii_pos) => s.split_at(ascii_pos),
310                    None => (s, ""),
311                };
312                if !prefix.chars().all(pred_nonascii) {
313                    return false;
314                }
315                s = rest;
316            }
317            None => {
318                // All chars are ASCII.
319                return s.bytes().all(pred_ascii);
320            }
321        }
322    }
323
324    true
325}
326
327/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
328#[must_use]
329pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(
330    mut s: &str,
331    pred_ascii: F,
332    pred_nonascii: G,
333) -> bool
334where
335    F: Copy + Fn(u8) -> bool,
336    G: Copy + Fn(char) -> bool,
337{
338    while let Some((prefix, suffix)) = find_split_hole(s, b'%') {
339        // Verify strings before the percent-encoded char.
340        if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) {
341            return false;
342        }
343
344        // Verify the percent-encoded char.
345        if !starts_with_double_hexdigits(suffix.as_bytes()) {
346            return false;
347        }
348
349        // Advance the cursor.
350        s = &suffix[2..];
351    }
352
353    // Verify the rest.
354    satisfy_chars(s, pred_ascii, pred_nonascii)
355}
356
357/// Returns `true` if the given string starts with two hexadecimal digits.
358#[must_use]
359pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
360    match s {
361        [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
362        _ => false,
363    }
364}
365
366/// Strips the first character if it is the given ASCII character, and returns the rest.
367///
368/// # Precondition
369///
370/// The given ASCII character (`prefix`) should be an ASCII character.
371#[must_use]
372pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
373    debug_assert!(prefix.is_ascii());
374    if s.as_bytes().first().copied() == Some(prefix) {
375        Some(&s[1..])
376    } else {
377        None
378    }
379}
380
381/// Splits the given string into the first character and the rest.
382///
383/// Returns `(first_char, rest_str)`.
384#[must_use]
385pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
386    let mut chars = s.chars();
387    let c = chars.next()?;
388    let rest = chars.as_str();
389    Some((c, rest))
390}