pattern_3/strings/
char.rs

1use needle::*;
2use haystack::{Haystack, Span};
3use memchr::{memchr, memrchr};
4use std::ops::Range;
5
6#[derive(Debug, Clone)]
7pub struct CharSearcher {
8    // safety invariant: `utf8_size` must be less than 5
9    utf8_size: usize,
10
11    /// A utf8 encoded copy of the `needle`
12    utf8_encoded: [u8; 4],
13
14    /// The character currently being searched.
15    c: char,
16}
17
18impl CharSearcher {
19    #[inline]
20    fn as_bytes(&self) -> &[u8] {
21        &self.utf8_encoded[..self.utf8_size]
22    }
23
24    #[inline]
25    fn last_byte(&self) -> u8 {
26        self.utf8_encoded[self.utf8_size - 1]
27    }
28
29    #[inline]
30    fn new(c: char) -> Self {
31        let mut utf8_encoded = [0u8; 4];
32        let utf8_size = c.encode_utf8(&mut utf8_encoded).len();
33        CharSearcher {
34            utf8_size,
35            utf8_encoded,
36            c,
37        }
38    }
39}
40
41unsafe impl Searcher<str> for CharSearcher {
42    #[inline]
43    fn search(&mut self, span: Span<&str>) -> Option<Range<usize>> {
44        let (hay, range) = span.into_parts();
45        let mut finger = range.start;
46        let bytes = hay.as_bytes();
47        loop {
48            let index = memchr(self.last_byte(), &bytes[finger..range.end])?;
49            finger += index + 1;
50            if finger >= self.utf8_size {
51                let found = &bytes[(finger - self.utf8_size)..finger];
52                if found == self.as_bytes() {
53                    return Some((finger - self.utf8_size)..finger);
54                }
55            }
56        }
57    }
58}
59
60unsafe impl Consumer<str> for CharSearcher {
61    #[inline]
62    fn consume(&mut self, span: Span<&str>) -> Option<usize> {
63        let mut consumer = Needle::<&[u8]>::into_consumer(self.as_bytes());
64        consumer.consume(span.as_bytes())
65    }
66
67    #[inline]
68    fn trim_start(&mut self, hay: &str) -> usize {
69        let mut consumer = Needle::<&str>::into_consumer(|c: char| c == self.c);
70        consumer.trim_start(hay)
71    }
72}
73
74unsafe impl ReverseSearcher<str> for CharSearcher {
75    #[inline]
76    fn rsearch(&mut self, span: Span<&str>) -> Option<Range<usize>> {
77        let (hay, range) = span.into_parts();
78        let start = range.start;
79        let mut bytes = hay[range].as_bytes();
80        loop {
81            let index = memrchr(self.last_byte(), bytes)? + 1;
82            if index >= self.utf8_size {
83                let found = &bytes[(index - self.utf8_size)..index];
84                if found == self.as_bytes() {
85                    let index = index + start;
86                    return Some((index - self.utf8_size)..index);
87                }
88            }
89            bytes = &bytes[..(index - 1)];
90        }
91    }
92}
93
94unsafe impl ReverseConsumer<str> for CharSearcher {
95    #[inline]
96    fn rconsume(&mut self, span: Span<&str>) -> Option<usize> {
97        if self.utf8_size == 1 {
98            let mut consumer = Needle::<&[u8]>::into_consumer(|b: &u8| *b == self.c as u8);
99            consumer.rconsume(span.as_bytes())
100        } else {
101            let mut consumer = Needle::<&str>::into_consumer(|c: char| c == self.c);
102            consumer.rconsume(span)
103        }
104    }
105
106    #[inline]
107    fn trim_end(&mut self, haystack: &str) -> usize {
108        let mut consumer = Needle::<&str>::into_consumer(|c: char| c == self.c);
109        consumer.trim_end(haystack)
110    }
111}
112
113unsafe impl DoubleEndedSearcher<str> for CharSearcher {}
114unsafe impl DoubleEndedConsumer<str> for CharSearcher {}
115
116impl<H: Haystack<Target = str>> Needle<H> for char {
117    type Searcher = CharSearcher;
118    type Consumer = CharSearcher;
119
120    #[inline]
121    fn into_searcher(self) -> Self::Searcher {
122        CharSearcher::new(self)
123    }
124
125    #[inline]
126    fn into_consumer(self) -> Self::Consumer {
127        CharSearcher::new(self)
128    }
129}