Skip to main content

hpx_browser/css_parser/
source.rs

1/// Source position in the CSS input.
2#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub struct SourceLocation {
4    pub offset: usize,
5    pub line: u32,
6    pub column: u32,
7}
8
9impl Default for SourceLocation {
10    fn default() -> Self {
11        Self {
12            offset: 0,
13            line: 1,
14            column: 1,
15        }
16    }
17}
18
19/// Zero-copy input reader with CR/LF normalization and source location tracking.
20pub struct SourceInput<'a> {
21    input: &'a str,
22    bytes: &'a [u8],
23    pos: usize,
24    line: u32,
25    column: u32,
26}
27
28impl<'a> SourceInput<'a> {
29    pub fn new(input: &'a str) -> Self {
30        let bytes = input.as_bytes();
31        let mut pos = 0;
32
33        // Skip BOM (U+FEFF)
34        if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
35            pos = 3;
36        }
37
38        Self {
39            input,
40            bytes,
41            pos,
42            line: 1,
43            column: 1,
44        }
45    }
46
47    pub fn location(&self) -> SourceLocation {
48        SourceLocation {
49            offset: self.pos,
50            line: self.line,
51            column: self.column,
52        }
53    }
54
55    pub fn pos(&self) -> usize {
56        self.pos
57    }
58
59    pub fn is_eof(&self) -> bool {
60        self.pos >= self.bytes.len()
61    }
62
63    pub fn current_char(&self) -> Option<char> {
64        if self.pos >= self.bytes.len() {
65            return None;
66        }
67        let ch = self.decode_char_at(self.pos);
68        Some(Self::normalize(ch))
69    }
70
71    pub fn peek_char(&self, n: usize) -> Option<char> {
72        let mut pos = self.pos;
73        for _ in 0..n {
74            if pos >= self.bytes.len() {
75                return None;
76            }
77            let ch = self.decode_char_at(pos);
78            pos += ch.len_utf8();
79            if ch == '\r' && pos < self.bytes.len() && self.bytes[pos] == b'\n' {
80                pos += 1;
81            }
82        }
83        if pos >= self.bytes.len() {
84            return None;
85        }
86        let ch = self.decode_char_at(pos);
87        Some(Self::normalize(ch))
88    }
89
90    pub fn next_char(&mut self) -> Option<char> {
91        if self.pos >= self.bytes.len() {
92            return None;
93        }
94
95        let ch = self.decode_char_at(self.pos);
96        self.pos += ch.len_utf8();
97
98        let normalized = Self::normalize(ch);
99
100        if ch == '\r' {
101            if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
102                self.pos += 1;
103            }
104            self.line += 1;
105            self.column = 1;
106            return Some('\n');
107        }
108
109        if ch == '\n' || ch == '\x0C' {
110            self.line += 1;
111            self.column = 1;
112        } else {
113            self.column += 1;
114        }
115
116        Some(normalized)
117    }
118
119    pub fn reconsume(&mut self) {
120        if self.pos > 0 {
121            let mut back = self.pos - 1;
122            while back > 0 && !self.input.is_char_boundary(back) {
123                back -= 1;
124            }
125            self.pos = back;
126            if self.column > 1 {
127                self.column -= 1;
128            }
129        }
130    }
131
132    pub fn slice(&self, start: usize, end: usize) -> &'a str {
133        &self.input[start..end]
134    }
135
136    pub fn slice_from(&self, start: usize) -> &'a str {
137        &self.input[start..self.pos]
138    }
139
140    pub fn starts_with_ignore_case(&self, pattern: &str) -> bool {
141        let remaining = &self.bytes[self.pos..];
142        if remaining.len() < pattern.len() {
143            return false;
144        }
145        remaining[..pattern.len()]
146            .iter()
147            .zip(pattern.bytes())
148            .all(|(&a, b)| a.eq_ignore_ascii_case(&b))
149    }
150
151    fn decode_char_at(&self, pos: usize) -> char {
152        self.input[pos..].chars().next().unwrap_or('\0')
153    }
154
155    fn normalize(ch: char) -> char {
156        match ch {
157            '\0' => '\u{FFFD}',
158            '\x0C' => '\n',
159            other => other,
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn bom_is_skipped() {
170        let input = "\u{FEFF}hello";
171        let mut src = SourceInput::new(input);
172        assert_eq!(src.next_char(), Some('h'));
173    }
174
175    #[test]
176    fn cr_normalized_to_lf() {
177        let input = "a\rb";
178        let mut src = SourceInput::new(input);
179        assert_eq!(src.next_char(), Some('a'));
180        assert_eq!(src.next_char(), Some('\n'));
181        assert_eq!(src.next_char(), Some('b'));
182    }
183
184    #[test]
185    fn crlf_normalized_to_single_lf() {
186        let input = "a\r\nb";
187        let mut src = SourceInput::new(input);
188        assert_eq!(src.next_char(), Some('a'));
189        assert_eq!(src.next_char(), Some('\n'));
190        assert_eq!(src.next_char(), Some('b'));
191    }
192
193    #[test]
194    fn null_replaced_with_replacement() {
195        let input = "a\0b";
196        let mut src = SourceInput::new(input);
197        assert_eq!(src.next_char(), Some('a'));
198        assert_eq!(src.next_char(), Some('\u{FFFD}'));
199        assert_eq!(src.next_char(), Some('b'));
200    }
201
202    #[test]
203    fn location_tracking() {
204        let input = "ab\ncd";
205        let mut src = SourceInput::new(input);
206        assert_eq!(
207            src.location(),
208            SourceLocation {
209                offset: 0,
210                line: 1,
211                column: 1
212            }
213        );
214        src.next_char();
215        assert_eq!(
216            src.location(),
217            SourceLocation {
218                offset: 1,
219                line: 1,
220                column: 2
221            }
222        );
223        src.next_char();
224        assert_eq!(
225            src.location(),
226            SourceLocation {
227                offset: 2,
228                line: 1,
229                column: 3
230            }
231        );
232        src.next_char(); // \n
233        assert_eq!(
234            src.location(),
235            SourceLocation {
236                offset: 3,
237                line: 2,
238                column: 1
239            }
240        );
241    }
242
243    #[test]
244    fn peek_ahead() {
245        let input = "abc";
246        let src = SourceInput::new(input);
247        assert_eq!(src.current_char(), Some('a'));
248        assert_eq!(src.peek_char(0), Some('a'));
249        assert_eq!(src.peek_char(1), Some('b'));
250        assert_eq!(src.peek_char(2), Some('c'));
251        assert_eq!(src.peek_char(3), None);
252    }
253
254    #[test]
255    fn eof() {
256        let input = "";
257        let mut src = SourceInput::new(input);
258        assert!(src.is_eof());
259        assert_eq!(src.next_char(), None);
260    }
261}