hpx_browser/css_parser/
source.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub struct SourceLocation {
4 pub offset: usize,
5 pub line: u32,
6 pub column: u32,
7}
8
9impl Default for SourceLocation {
10 fn default() -> Self {
11 Self {
12 offset: 0,
13 line: 1,
14 column: 1,
15 }
16 }
17}
18
19pub struct SourceInput<'a> {
21 input: &'a str,
22 bytes: &'a [u8],
23 pos: usize,
24 line: u32,
25 column: u32,
26}
27
28impl<'a> SourceInput<'a> {
29 pub fn new(input: &'a str) -> Self {
30 let bytes = input.as_bytes();
31 let mut pos = 0;
32
33 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
35 pos = 3;
36 }
37
38 Self {
39 input,
40 bytes,
41 pos,
42 line: 1,
43 column: 1,
44 }
45 }
46
47 pub fn location(&self) -> SourceLocation {
48 SourceLocation {
49 offset: self.pos,
50 line: self.line,
51 column: self.column,
52 }
53 }
54
55 pub fn pos(&self) -> usize {
56 self.pos
57 }
58
59 pub fn is_eof(&self) -> bool {
60 self.pos >= self.bytes.len()
61 }
62
63 pub fn current_char(&self) -> Option<char> {
64 if self.pos >= self.bytes.len() {
65 return None;
66 }
67 let ch = self.decode_char_at(self.pos);
68 Some(Self::normalize(ch))
69 }
70
71 pub fn peek_char(&self, n: usize) -> Option<char> {
72 let mut pos = self.pos;
73 for _ in 0..n {
74 if pos >= self.bytes.len() {
75 return None;
76 }
77 let ch = self.decode_char_at(pos);
78 pos += ch.len_utf8();
79 if ch == '\r' && pos < self.bytes.len() && self.bytes[pos] == b'\n' {
80 pos += 1;
81 }
82 }
83 if pos >= self.bytes.len() {
84 return None;
85 }
86 let ch = self.decode_char_at(pos);
87 Some(Self::normalize(ch))
88 }
89
90 pub fn next_char(&mut self) -> Option<char> {
91 if self.pos >= self.bytes.len() {
92 return None;
93 }
94
95 let ch = self.decode_char_at(self.pos);
96 self.pos += ch.len_utf8();
97
98 let normalized = Self::normalize(ch);
99
100 if ch == '\r' {
101 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
102 self.pos += 1;
103 }
104 self.line += 1;
105 self.column = 1;
106 return Some('\n');
107 }
108
109 if ch == '\n' || ch == '\x0C' {
110 self.line += 1;
111 self.column = 1;
112 } else {
113 self.column += 1;
114 }
115
116 Some(normalized)
117 }
118
119 pub fn reconsume(&mut self) {
120 if self.pos > 0 {
121 let mut back = self.pos - 1;
122 while back > 0 && !self.input.is_char_boundary(back) {
123 back -= 1;
124 }
125 self.pos = back;
126 if self.column > 1 {
127 self.column -= 1;
128 }
129 }
130 }
131
132 pub fn slice(&self, start: usize, end: usize) -> &'a str {
133 &self.input[start..end]
134 }
135
136 pub fn slice_from(&self, start: usize) -> &'a str {
137 &self.input[start..self.pos]
138 }
139
140 pub fn starts_with_ignore_case(&self, pattern: &str) -> bool {
141 let remaining = &self.bytes[self.pos..];
142 if remaining.len() < pattern.len() {
143 return false;
144 }
145 remaining[..pattern.len()]
146 .iter()
147 .zip(pattern.bytes())
148 .all(|(&a, b)| a.eq_ignore_ascii_case(&b))
149 }
150
151 fn decode_char_at(&self, pos: usize) -> char {
152 self.input[pos..].chars().next().unwrap_or('\0')
153 }
154
155 fn normalize(ch: char) -> char {
156 match ch {
157 '\0' => '\u{FFFD}',
158 '\x0C' => '\n',
159 other => other,
160 }
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn bom_is_skipped() {
170 let input = "\u{FEFF}hello";
171 let mut src = SourceInput::new(input);
172 assert_eq!(src.next_char(), Some('h'));
173 }
174
175 #[test]
176 fn cr_normalized_to_lf() {
177 let input = "a\rb";
178 let mut src = SourceInput::new(input);
179 assert_eq!(src.next_char(), Some('a'));
180 assert_eq!(src.next_char(), Some('\n'));
181 assert_eq!(src.next_char(), Some('b'));
182 }
183
184 #[test]
185 fn crlf_normalized_to_single_lf() {
186 let input = "a\r\nb";
187 let mut src = SourceInput::new(input);
188 assert_eq!(src.next_char(), Some('a'));
189 assert_eq!(src.next_char(), Some('\n'));
190 assert_eq!(src.next_char(), Some('b'));
191 }
192
193 #[test]
194 fn null_replaced_with_replacement() {
195 let input = "a\0b";
196 let mut src = SourceInput::new(input);
197 assert_eq!(src.next_char(), Some('a'));
198 assert_eq!(src.next_char(), Some('\u{FFFD}'));
199 assert_eq!(src.next_char(), Some('b'));
200 }
201
202 #[test]
203 fn location_tracking() {
204 let input = "ab\ncd";
205 let mut src = SourceInput::new(input);
206 assert_eq!(
207 src.location(),
208 SourceLocation {
209 offset: 0,
210 line: 1,
211 column: 1
212 }
213 );
214 src.next_char();
215 assert_eq!(
216 src.location(),
217 SourceLocation {
218 offset: 1,
219 line: 1,
220 column: 2
221 }
222 );
223 src.next_char();
224 assert_eq!(
225 src.location(),
226 SourceLocation {
227 offset: 2,
228 line: 1,
229 column: 3
230 }
231 );
232 src.next_char(); assert_eq!(
234 src.location(),
235 SourceLocation {
236 offset: 3,
237 line: 2,
238 column: 1
239 }
240 );
241 }
242
243 #[test]
244 fn peek_ahead() {
245 let input = "abc";
246 let src = SourceInput::new(input);
247 assert_eq!(src.current_char(), Some('a'));
248 assert_eq!(src.peek_char(0), Some('a'));
249 assert_eq!(src.peek_char(1), Some('b'));
250 assert_eq!(src.peek_char(2), Some('c'));
251 assert_eq!(src.peek_char(3), None);
252 }
253
254 #[test]
255 fn eof() {
256 let input = "";
257 let mut src = SourceInput::new(input);
258 assert!(src.is_eof());
259 assert_eq!(src.next_char(), None);
260 }
261}