Skip to main content

iri_string/parser/
str.rs

1//! Functions for common string operations.
2
3use core::ops::{self, RangeFrom, RangeTo};
4
5pub(crate) use self::maybe_pct_encoded::{
6    process_percent_encoded_best_effort, PctEncodedFragments,
7};
8use crate::parser::trusted as trusted_parser;
9
10mod maybe_pct_encoded;
11
12/// Returns the inner string if wrapped.
13#[must_use]
14pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
15    let (prefix, suffix) = match s.as_bytes() {
16        [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
17        _ => return None,
18    };
19    if (prefix == open) && (suffix == close) {
20        Some(&s[1..(s.len() - 1)])
21    } else {
22        None
23    }
24}
25
26/// Returns the byte that appears first.
27#[cfg(not(feature = "memchr"))]
28#[inline]
29#[must_use]
30pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
31    haystack
32        .iter()
33        .copied()
34        .find(|&b| b == needle1 || b == needle2)
35}
36
37/// Returns the byte that appears first.
38#[cfg(feature = "memchr")]
39#[inline]
40#[must_use]
41pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
42    memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
43}
44
45/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
46#[cfg(not(feature = "memchr"))]
47#[inline]
48#[must_use]
49pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
50    haystack.iter().rposition(|&b| b == needle)
51}
52
53/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
54#[cfg(feature = "memchr")]
55#[inline]
56#[must_use]
57pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
58    memchr::memrchr(needle, haystack)
59}
60
61/// Finds the first needle, and returns the string before it and the rest.
62///
63/// If `needle` is not found, returns `None`.
64#[cfg(not(feature = "memchr"))]
65#[must_use]
66pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
67    haystack
68        .bytes()
69        .position(|b| b == needle)
70        .map(|pos| haystack.split_at(pos))
71}
72
73/// Finds the first needle, and returns the string before it and the rest.
74///
75/// If `needle` is not found, returns `None`.
76#[cfg(feature = "memchr")]
77#[must_use]
78pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
79    memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
80}
81
82/// Finds the last needle, and returns the string before it and the rest.
83///
84/// If no needles are found, returns `None`.
85#[cfg(not(feature = "memchr"))]
86#[must_use]
87pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
88    haystack
89        .bytes()
90        .rposition(|b| b == needle1 || b == needle2)
91        .map(|pos| haystack.split_at(pos))
92}
93
94/// Finds the last needle, and returns the string before it and the rest.
95///
96/// If no needles are found, returns `None`.
97#[cfg(feature = "memchr")]
98#[must_use]
99pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
100    memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
101}
102
103/// Finds the first needle, and returns the string before it and the rest.
104///
105/// If no needles are found, returns `None`.
106#[cfg(not(feature = "memchr"))]
107#[must_use]
108pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
109    haystack
110        .bytes()
111        .position(|b| b == needle1 || b == needle2)
112        .map(|pos| haystack.split_at(pos))
113}
114
115/// Finds the first needle, and returns the string before it and the rest.
116///
117/// If no needles are found, returns `None`.
118#[cfg(feature = "memchr")]
119#[must_use]
120pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
121    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
122}
123
124/// Finds the first needle, and returns the string before it and the rest.
125///
126/// If no needles are found, returns `None`.
127#[cfg(not(feature = "memchr"))]
128#[must_use]
129pub(crate) fn find_split3(
130    haystack: &str,
131    needle1: u8,
132    needle2: u8,
133    needle3: u8,
134) -> Option<(&str, &str)> {
135    haystack
136        .bytes()
137        .position(|b| b == needle1 || b == needle2 || b == needle3)
138        .map(|pos| haystack.split_at(pos))
139}
140
141/// Finds the first needle, and returns the string before it and the rest.
142///
143/// If no needles are found, returns `None`.
144#[cfg(feature = "memchr")]
145#[must_use]
146pub(crate) fn find_split3(
147    haystack: &str,
148    needle1: u8,
149    needle2: u8,
150    needle3: u8,
151) -> Option<(&str, &str)> {
152    memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
153        .map(|pos| haystack.split_at(pos))
154}
155
156/// Finds the first needle, and returns the string before it and after it.
157///
158/// If `needle` is not found, returns `None`.
159#[cfg(not(feature = "memchr"))]
160#[must_use]
161pub(crate) fn find_split_hole<T>(haystack: &T, needle: u8) -> Option<(&T, &T)>
162where
163    T: ?Sized
164        + AsRef<[u8]>
165        + ops::Index<RangeFrom<usize>, Output = T>
166        + ops::Index<RangeTo<usize>, Output = T>,
167{
168    haystack
169        .as_ref()
170        .iter()
171        .position(|&b| b == needle)
172        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
173}
174
175/// Finds the first needle, and returns the string before it and after it.
176///
177/// If `needle` is not found, returns `None`.
178#[cfg(feature = "memchr")]
179#[must_use]
180pub(crate) fn find_split_hole<T>(haystack: &T, needle: u8) -> Option<(&T, &T)>
181where
182    T: ?Sized
183        + AsRef<[u8]>
184        + ops::Index<RangeFrom<usize>, Output = T>
185        + ops::Index<RangeTo<usize>, Output = T>,
186{
187    memchr::memchr(needle, haystack.as_ref()).map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
188}
189
190/// Finds the first needle, and returns the string before it, the needle, and the string after it.
191///
192/// If no needles are found, returns `None`.
193#[cfg(not(feature = "memchr"))]
194#[must_use]
195pub(crate) fn find_split2_hole(
196    haystack: &str,
197    needle1: u8,
198    needle2: u8,
199) -> Option<(&str, u8, &str)> {
200    haystack
201        .bytes()
202        .position(|b| b == needle1 || b == needle2)
203        .map(|pos| {
204            (
205                &haystack[..pos],
206                haystack.as_bytes()[pos],
207                &haystack[(pos + 1)..],
208            )
209        })
210}
211
212/// Finds the first needle, and returns the string before it, the needle, and the string after it.
213///
214/// If no needles are found, returns `None`.
215#[cfg(feature = "memchr")]
216#[must_use]
217pub(crate) fn find_split2_hole(
218    haystack: &str,
219    needle1: u8,
220    needle2: u8,
221) -> Option<(&str, u8, &str)> {
222    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
223        (
224            &haystack[..pos],
225            haystack.as_bytes()[pos],
226            &haystack[(pos + 1)..],
227        )
228    })
229}
230
231/// Finds the first needle, and returns the string before it, the needle, and the string after it.
232///
233/// If no needles are found, returns `None`.
234#[cfg(not(feature = "memchr"))]
235#[must_use]
236pub(crate) fn find_split4_hole(
237    haystack: &str,
238    needle1: u8,
239    needle2: u8,
240    needle3: u8,
241    needle4: u8,
242) -> Option<(&str, u8, &str)> {
243    haystack
244        .bytes()
245        .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
246        .map(|pos| {
247            (
248                &haystack[..pos],
249                haystack.as_bytes()[pos],
250                &haystack[(pos + 1)..],
251            )
252        })
253}
254
255/// Finds the first needle, and returns the string before it, the needle, and the string after it.
256///
257/// If no needles are found, returns `None`.
258#[cfg(feature = "memchr")]
259#[must_use]
260pub(crate) fn find_split4_hole(
261    haystack: &str,
262    needle1: u8,
263    needle2: u8,
264    needle3: u8,
265    needle4: u8,
266) -> Option<(&str, u8, &str)> {
267    let bytes = haystack.as_bytes();
268    let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
269        Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
270        None => memchr::memchr(needle4, bytes),
271    };
272    pos.map(|pos| {
273        (
274            &haystack[..pos],
275            haystack.as_bytes()[pos],
276            &haystack[(pos + 1)..],
277        )
278    })
279}
280
281/// Finds the last needle, and returns the string before it and after it.
282///
283/// If `needle` is not found, returns `None`.
284#[cfg(not(feature = "memchr"))]
285#[must_use]
286pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
287    haystack
288        .bytes()
289        .rposition(|b| b == needle)
290        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
291}
292
293/// Finds the last needle, and returns the string before it and after it.
294///
295/// If `needle` is not found, returns `None`.
296#[cfg(feature = "memchr")]
297#[must_use]
298pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
299    memchr::memrchr(needle, haystack.as_bytes())
300        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
301}
302
303/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
304#[must_use]
305pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(s: &str, pred_ascii: F, pred_nonascii: G) -> bool
306where
307    F: Copy + Fn(u8) -> bool,
308    G: Copy + Fn(char) -> bool,
309{
310    let mut chars = s.chars();
311    while let Some(c) = chars.next() {
312        if c.is_ascii() {
313            if c == '%' {
314                // Percent-encoded triplet.
315                // TODO: `Option::is_none_or` is available since Rust 1.82.0.
316                if chars.next().filter(|c| c.is_ascii_hexdigit()).is_none() {
317                    // Upper nibble.
318                    return false;
319                }
320                if chars.next().filter(|c| c.is_ascii_hexdigit()).is_none() {
321                    // Lower nibble.
322                    return false;
323                }
324            } else if !pred_ascii(c as u8) {
325                // Unacceptable ASCII char.
326                return false;
327            }
328        } else if !pred_nonascii(c) {
329            // Unacceptable non-ASCII char.
330            return false;
331        }
332    }
333
334    true
335}
336
337/// Returns `true` if the given string starts with two hexadecimal digits.
338#[must_use]
339pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
340    match s {
341        [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
342        _ => false,
343    }
344}
345
346/// Decodes the starting two hexdigits if available, and returns the byte and the rest.
347#[must_use]
348pub(crate) fn strip_decode_xdigits2<T>(s: &T) -> (Option<u8>, &T)
349where
350    T: ?Sized + AsRef<[u8]> + ops::Index<RangeFrom<usize>, Output = T>,
351{
352    if starts_with_double_hexdigits(s.as_ref()) {
353        let (decoded, rest) = trusted_parser::take_xdigits2(s);
354        (Some(decoded), rest)
355    } else {
356        (None, s)
357    }
358}
359
360/// Strips the first character if it is the given ASCII character, and returns the rest.
361///
362/// # Precondition
363///
364/// The given ASCII character (`prefix`) should be an ASCII character.
365#[must_use]
366pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
367    debug_assert!(prefix.is_ascii());
368    if s.as_bytes().first().copied() == Some(prefix) {
369        Some(&s[1..])
370    } else {
371        None
372    }
373}
374
375/// Splits the given string into the first character and the rest.
376///
377/// Returns `(first_char, rest_str)`.
378#[must_use]
379pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
380    let mut chars = s.chars();
381    let c = chars.next()?;
382    let rest = chars.as_str();
383    Some((c, rest))
384}