monster_regex/
haystack.rs

1/// A trait for text that can be searched by the regex engine.
2/// This abstraction allows searching over non-contiguous memory (like ropes)
3/// without flattening to a single string.
4pub trait Haystack: Copy + Clone {
5    type Cursor: HaystackCursor;
6
7    /// Total length of the haystack in bytes
8    fn len(&self) -> usize;
9
10    fn is_empty(&self) -> bool {
11        self.len() == 0
12    }
13
14    /// Get a cursor for streaming access starting at `pos`
15    fn cursor_at(&self, pos: usize) -> Self::Cursor;
16
17    /// Get character at position
18    fn char_at(&self, pos: usize) -> Option<(char, usize)>;
19
20    /// Get character before position
21    fn char_before(&self, pos: usize) -> Option<char>;
22
23    /// Check if haystack starts with literal at pos
24    fn starts_with(&self, pos: usize, literal: &str) -> bool;
25
26    /// Check if range matches another range
27    fn matches_range(&self, pos: usize, other_start: usize, other_end: usize) -> bool;
28
29    /// Find the first occurrence of a byte starting at `pos`.
30    /// Returns `None` if not found.
31    fn find_byte(&self, byte: u8, pos: usize) -> Option<usize>;
32}
33
34pub trait HaystackCursor: Iterator<Item = char> + Clone {
35    /// Peek at the next character without advancing
36    fn peek(&self) -> Option<char>;
37}
38
39impl<'a> Haystack for &'a str {
40    type Cursor = StrCursor<'a>;
41
42    #[inline]
43    fn len(&self) -> usize {
44        str::len(self)
45    }
46
47    #[inline]
48    fn char_at(&self, pos: usize) -> Option<(char, usize)> {
49        if pos >= self.len() {
50            return None;
51        }
52        let c = self[pos..].chars().next()?;
53        Some((c, c.len_utf8()))
54    }
55
56    #[inline]
57    fn char_before(&self, pos: usize) -> Option<char> {
58        if pos == 0 || pos > self.len() {
59            return None;
60        }
61        self[..pos].chars().last()
62    }
63
64    #[inline]
65    fn starts_with(&self, pos: usize, literal: &str) -> bool {
66        if pos > self.len() {
67            return false;
68        }
69        self[pos..].starts_with(literal)
70    }
71
72    #[inline]
73    fn matches_range(&self, pos: usize, other_start: usize, other_end: usize) -> bool {
74        if other_end > self.len() || other_start > other_end {
75            return false;
76        }
77        let substring = &self[other_start..other_end];
78        self.starts_with(pos, substring)
79    }
80
81    #[inline]
82    fn cursor_at(&self, pos: usize) -> Self::Cursor {
83        StrCursor {
84            chars: self[pos..].chars(),
85        }
86    }
87
88    #[inline]
89    fn find_byte(&self, byte: u8, pos: usize) -> Option<usize> {
90        if pos >= self.len() {
91            return None;
92        }
93        memchr::memchr(byte, self[pos..].as_bytes()).map(|i| i + pos)
94    }
95}
96
97#[derive(Clone)]
98pub struct StrCursor<'a> {
99    chars: std::str::Chars<'a>,
100}
101
102impl<'a> Iterator for StrCursor<'a> {
103    type Item = char;
104
105    #[inline]
106    fn next(&mut self) -> Option<Self::Item> {
107        self.chars.next()
108    }
109}
110
111impl<'a> HaystackCursor for StrCursor<'a> {
112    #[inline]
113    fn peek(&self) -> Option<char> {
114        self.chars.clone().next()
115    }
116}