lol_html/parser/lexer/
actions.rs

1use super::*;
2use crate::parser::ActionError;
3use crate::parser::state_machine::StateMachineActions;
4
5use NonTagContentTokenOutline::*;
6use TagTokenOutline::{EndTag, StartTag};
7
8// NOTE: use macro instead of the function to make borrow
9// checker happy with range construction inside match arm
10// with a mutable borrow of lexer.
11macro_rules! get_token_part_range {
12    ($self:tt) => {
13        Range {
14            start: $self.token_part_start,
15            end: $self.next_pos - 1,
16        }
17    };
18}
19
20impl<S: LexemeSink> Lexer<S> {
21    fn emit_eof(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
22        let lexeme = self.create_lexeme_with_raw_exclusive(
23            context.previously_consumed_byte_count,
24            input,
25            Some(Eof),
26        );
27
28        self.emit_lexeme(context, &lexeme)
29    }
30}
31
32impl<S: LexemeSink> StateMachineActions for Lexer<S> {
33    type Context = ParserContext<S>;
34
35    impl_common_sm_actions!();
36
37    fn emit_text(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
38        if self.pos() > self.lexeme_start {
39            // NOTE: unlike any other tokens (except EOF), text tokens don't have
40            // any lexical symbols that determine their bounds. Therefore,
41            // representation of text token content is the raw slice.
42            // Also, we always emit text if we encounter some other bounded
43            // lexical structure and, thus, we use exclusive range for the raw slice.
44            let lexeme = self.create_lexeme_with_raw_exclusive(
45                context.previously_consumed_byte_count,
46                input,
47                Some(Text(self.last_text_type)),
48            );
49
50            self.emit_lexeme(context, &lexeme)?;
51        }
52
53        Ok(())
54    }
55
56    #[inline(never)]
57    fn emit_text_and_eof(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
58        self.emit_text(context, input)?;
59        self.emit_eof(context, input)
60    }
61
62    #[inline(never)]
63    fn emit_current_token(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
64        let token = self.current_non_tag_content_token.take();
65        let lexeme = self.create_lexeme_with_raw_inclusive(
66            context.previously_consumed_byte_count,
67            input,
68            token,
69        );
70
71        self.emit_lexeme(context, &lexeme)
72    }
73
74    #[inline(never)]
75    fn emit_tag(&mut self, context: &mut ParserContext<S>, input: &[u8]) -> ActionResult {
76        let token = self
77            .current_tag_token
78            .take()
79            .ok_or_else(|| ActionError::internal("Tag token should exist at this point"))?;
80
81        let feedback = self.try_get_tree_builder_feedback(context, &token)?;
82
83        let mut lexeme = self.create_lexeme_with_raw_inclusive(
84            context.previously_consumed_byte_count,
85            input,
86            token,
87        );
88
89        // NOTE: exit from any non-initial text parsing mode always happens on tag emission
90        // (except for CDATA, but there is a special action to take care of it).
91        self.set_last_text_type(TextType::Data);
92
93        if let Some(feedback) = feedback {
94            self.handle_tree_builder_feedback(context, feedback, &lexeme);
95        }
96
97        if let StartTag {
98            ref mut ns,
99            name_hash,
100            ..
101        } = lexeme.token_outline
102        {
103            self.last_start_tag_name_hash = name_hash;
104            *ns = context.tree_builder_simulator.current_ns();
105        }
106
107        match self.emit_tag_lexeme(context, &lexeme)? {
108            ParserDirective::Lex => Ok(()),
109            ParserDirective::WherePossibleScanForTagsOnly => self.change_parser_directive(
110                self.lexeme_start,
111                ParserDirective::WherePossibleScanForTagsOnly,
112                FeedbackDirective::None,
113            ),
114        }
115    }
116
117    #[inline(never)]
118    fn emit_current_token_and_eof(
119        &mut self,
120        context: &mut ParserContext<S>,
121        input: &[u8],
122    ) -> ActionResult {
123        let token = self.current_non_tag_content_token.take();
124        let lexeme = self.create_lexeme_with_raw_exclusive(
125            context.previously_consumed_byte_count,
126            input,
127            token,
128        );
129
130        self.emit_lexeme(context, &lexeme)?;
131        self.emit_eof(context, input)
132    }
133
134    /// Emits `<[CDATA[` and such.
135    #[inline(never)]
136    fn emit_raw_without_token(
137        &mut self,
138        context: &mut ParserContext<S>,
139        input: &[u8],
140    ) -> ActionResult {
141        let lexeme = self.create_lexeme_with_raw_inclusive(
142            context.previously_consumed_byte_count,
143            input,
144            None,
145        );
146
147        self.emit_lexeme(context, &lexeme)
148    }
149
150    #[inline(never)]
151    fn emit_raw_without_token_and_eof(
152        &mut self,
153        context: &mut ParserContext<S>,
154        input: &[u8],
155    ) -> ActionResult {
156        // NOTE: since we are at EOF we use exclusive range for token's raw.
157        let lexeme = self.create_lexeme_with_raw_exclusive(
158            context.previously_consumed_byte_count,
159            input,
160            None,
161        );
162
163        self.emit_lexeme(context, &lexeme)?;
164        self.emit_eof(context, input)
165    }
166
167    #[inline]
168    fn create_start_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
169        self.current_tag_token = Some(StartTag {
170            name: Range::default(),
171            name_hash: LocalNameHash::new(),
172            ns: Namespace::default(),
173            attributes: Vec::new(),
174            self_closing: false,
175        });
176    }
177
178    #[inline]
179    fn create_end_tag(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
180        self.current_tag_token = Some(EndTag {
181            name: Range::default(),
182            name_hash: LocalNameHash::new(),
183        });
184    }
185
186    #[cold]
187    fn create_doctype(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
188        self.current_non_tag_content_token = Some(Doctype(Box::new(DoctypeTokenOutline {
189            name: None,
190            public_id: None,
191            system_id: None,
192            force_quirks: false,
193        })));
194    }
195
196    #[inline]
197    fn create_comment(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
198        self.current_non_tag_content_token = Some(Comment(Range::default()));
199    }
200
201    #[inline]
202    fn start_token_part(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
203        self.token_part_start = self.pos();
204    }
205
206    #[inline]
207    fn mark_comment_text_end(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
208        if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
209            *text = get_token_part_range!(self);
210        }
211    }
212
213    #[inline]
214    fn shift_comment_text_end_by(
215        &mut self,
216        _context: &mut ParserContext<S>,
217        _input: &[u8],
218        offset: usize,
219    ) {
220        if let Some(Comment(ref mut text)) = self.current_non_tag_content_token {
221            text.end += offset;
222        }
223    }
224
225    #[inline]
226    fn set_force_quirks(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
227        if let Some(Doctype(doctype)) = &mut self.current_non_tag_content_token {
228            doctype.force_quirks = true;
229        }
230    }
231
232    #[inline]
233    fn finish_doctype_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
234        if let Some(Doctype(doctype)) = &mut self.current_non_tag_content_token {
235            doctype.name = Some(get_token_part_range!(self));
236        }
237    }
238
239    #[inline]
240    fn finish_doctype_public_id(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
241        if let Some(Doctype(doctype)) = &mut self.current_non_tag_content_token {
242            doctype.public_id = Some(get_token_part_range!(self));
243        }
244    }
245
246    #[inline]
247    fn finish_doctype_system_id(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
248        if let Some(Doctype(doctype)) = &mut self.current_non_tag_content_token {
249            doctype.system_id = Some(get_token_part_range!(self));
250        }
251    }
252
253    #[inline]
254    fn finish_tag_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) -> ActionResult {
255        match self.current_tag_token {
256            Some(StartTag { ref mut name, .. } | EndTag { ref mut name, .. }) => {
257                *name = get_token_part_range!(self);
258            }
259            _ => return Err(ActionError::internal("Tag should exist at this point")),
260        }
261
262        Ok(())
263    }
264
265    #[inline]
266    fn update_tag_name_hash(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
267        if let Some(ch) = input.get(self.pos()).copied() {
268            match self.current_tag_token {
269                Some(
270                    StartTag {
271                        ref mut name_hash, ..
272                    }
273                    | EndTag {
274                        ref mut name_hash, ..
275                    },
276                ) => name_hash.update(ch),
277                _ => debug_assert!(false, "Tag should exist at this point"),
278            }
279        }
280    }
281
282    #[inline]
283    fn mark_as_self_closing(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
284        if let Some(StartTag {
285            ref mut self_closing,
286            ..
287        }) = self.current_tag_token
288        {
289            *self_closing = true;
290        }
291    }
292
293    #[inline]
294    fn start_attr(&mut self, context: &mut ParserContext<S>, input: &[u8]) {
295        // NOTE: create attribute only if we are parsing a start tag
296        if let Some(StartTag { .. }) = self.current_tag_token {
297            self.current_attr = Some(AttributeOutline::default());
298
299            self.start_token_part(context, input);
300        }
301    }
302
303    #[inline]
304    fn finish_attr_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
305        if let Some(AttributeOutline {
306            ref mut name,
307            ref mut raw_range,
308            ..
309        }) = self.current_attr
310        {
311            *name = get_token_part_range!(self);
312            *raw_range = *name;
313        }
314    }
315
316    #[inline]
317    fn finish_attr_value(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
318        if let Some(AttributeOutline {
319            ref mut value,
320            ref mut raw_range,
321            ..
322        }) = self.current_attr
323        {
324            *value = get_token_part_range!(self);
325
326            // NOTE: include closing quote into the raw value if it's present
327            raw_range.end = match input.get(self.next_pos - 1).copied() {
328                Some(ch) if ch == self.closing_quote => value.end + 1,
329                _ => value.end,
330            };
331        }
332    }
333
334    #[inline]
335    fn finish_attr(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) {
336        if let Some(attr) = self.current_attr.take() {
337            if let Some(StartTag { attributes, .. }) = self.current_tag_token.as_mut() {
338                attributes.push(attr);
339            }
340        }
341    }
342
343    noop_action!(mark_tag_start, unmark_tag_start);
344}
lol_html/parser/lexer/actions.rs

lol_html/parser/lexer/
actions.rs