1#[macro_use]
2mod syntax_dsl;
3
4#[macro_use]
5mod syntax;
6
7use crate::html::{LocalNameHash, TextType};
8use crate::parser::{ParserDirective, ParsingAmbiguityError, TreeBuilderFeedback};
9use crate::rewriter::RewritingError;
10use std::fmt::{self, Debug};
11use std::mem;
12
13pub(crate) enum FeedbackDirective {
14 ApplyUnhandledFeedback(TreeBuilderFeedback),
15 Skip,
16 None,
17}
18
19impl FeedbackDirective {
20 #[inline]
21 pub fn take(&mut self) -> Self {
22 mem::replace(self, Self::None)
23 }
24}
25
26impl Debug for FeedbackDirective {
27 #[cold]
28 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29 write!(
30 f,
31 "{}",
32 match self {
33 Self::ApplyUnhandledFeedback(_) => "ApplyPendingFeedback",
34 Self::Skip => "Skip",
35 Self::None => "None",
36 }
37 )
38 }
39}
40
41#[derive(Debug)]
42pub(crate) struct StateMachineBookmark {
43 cdata_allowed: bool,
44 text_type: TextType,
45 last_start_tag_name_hash: LocalNameHash,
46 pub pos: usize,
48 feedback_directive: FeedbackDirective,
49}
50
51pub(crate) enum ActionError {
52 RewritingError(RewritingError),
53 ParserDirectiveChangeRequired(ParserDirective, StateMachineBookmark),
54 EndOfInput { consumed_byte_count: usize },
55 Internal(&'static str),
56}
57
58impl ActionError {
59 #[cold]
60 #[cfg_attr(debug_assertions, track_caller)]
61 #[allow(clippy::unnecessary_box_returns)]
62 pub(crate) fn internal(error: &'static str) -> Box<Self> {
63 debug_assert!(false, "{error}");
64 Box::new(Self::Internal(error))
65 }
66}
67
68impl From<ParsingAmbiguityError> for Box<ActionError> {
69 #[cold]
70 fn from(err: ParsingAmbiguityError) -> Self {
71 Self::new(ActionError::RewritingError(
72 RewritingError::ParsingAmbiguity(err),
73 ))
74 }
75}
76
77impl From<RewritingError> for Box<ActionError> {
78 #[cold]
79 fn from(err: RewritingError) -> Self {
80 Self::new(ActionError::RewritingError(err))
81 }
82}
83
84pub enum Never {}
86
87pub type ActionResult<T = ()> = Result<T, Box<ActionError>>;
88pub type StateResult = ActionResult<()>;
89pub type ParseResult = ActionResult<Never>;
90
91pub(crate) trait StateMachineActions {
92 type Context;
93
94 fn emit_text_and_eof(&mut self, context: &mut Self::Context, input: &[u8]) -> ActionResult;
95 fn emit_text(&mut self, context: &mut Self::Context, input: &[u8]) -> ActionResult;
96 fn emit_current_token(&mut self, context: &mut Self::Context, input: &[u8]) -> ActionResult;
97 fn emit_tag(&mut self, context: &mut Self::Context, input: &[u8]) -> ActionResult;
98 fn emit_current_token_and_eof(
99 &mut self,
100 context: &mut Self::Context,
101 input: &[u8],
102 ) -> ActionResult;
103 fn emit_raw_without_token(&mut self, context: &mut Self::Context, input: &[u8])
104 -> ActionResult;
105 fn emit_raw_without_token_and_eof(
106 &mut self,
107 context: &mut Self::Context,
108 input: &[u8],
109 ) -> ActionResult;
110
111 fn create_start_tag(&mut self, context: &mut Self::Context, input: &[u8]);
112 fn create_end_tag(&mut self, context: &mut Self::Context, input: &[u8]);
113 fn create_doctype(&mut self, context: &mut Self::Context, input: &[u8]);
114 fn create_comment(&mut self, context: &mut Self::Context, input: &[u8]);
115
116 fn start_token_part(&mut self, context: &mut Self::Context, input: &[u8]);
117
118 fn mark_comment_text_end(&mut self, context: &mut Self::Context, input: &[u8]);
119 fn shift_comment_text_end_by(
120 &mut self,
121 context: &mut Self::Context,
122 input: &[u8],
123 offset: usize,
124 );
125
126 fn set_force_quirks(&mut self, context: &mut Self::Context, input: &[u8]);
127 fn finish_doctype_name(&mut self, context: &mut Self::Context, input: &[u8]);
128 fn finish_doctype_public_id(&mut self, context: &mut Self::Context, input: &[u8]);
129 fn finish_doctype_system_id(&mut self, context: &mut Self::Context, input: &[u8]);
130
131 fn finish_tag_name(&mut self, context: &mut Self::Context, input: &[u8]) -> ActionResult;
132 fn update_tag_name_hash(&mut self, context: &mut Self::Context, input: &[u8]);
133 fn mark_as_self_closing(&mut self, context: &mut Self::Context, input: &[u8]);
134
135 fn start_attr(&mut self, context: &mut Self::Context, input: &[u8]);
136 fn finish_attr_name(&mut self, context: &mut Self::Context, input: &[u8]);
137 fn finish_attr_value(&mut self, context: &mut Self::Context, input: &[u8]);
138 fn finish_attr(&mut self, context: &mut Self::Context, input: &[u8]);
139
140 fn set_closing_quote_to_double(&mut self, context: &mut Self::Context, input: &[u8]);
141 fn set_closing_quote_to_single(&mut self, context: &mut Self::Context, input: &[u8]);
142
143 fn mark_tag_start(&mut self, context: &mut Self::Context, input: &[u8]);
144 fn unmark_tag_start(&mut self, context: &mut Self::Context, input: &[u8]);
145
146 fn enter_cdata(&mut self, context: &mut Self::Context, input: &[u8]);
147 fn leave_cdata(&mut self, context: &mut Self::Context, input: &[u8]);
148}
149
150pub(crate) trait StateMachineConditions {
151 fn is_appropriate_end_tag(&self) -> bool;
152 fn cdata_allowed(&self) -> bool;
153}
154
155pub(crate) trait StateMachine: StateMachineActions + StateMachineConditions {
156 cdata_section_states_group!();
157 data_states_group!();
158 plaintext_states_group!();
159 rawtext_states_group!();
160 rcdata_states_group!();
161 script_data_states_group!();
162 script_data_escaped_states_group!();
163 script_data_double_escaped_states_group!();
164 tag_states_group!();
165 attributes_states_group!();
166 comment_states_group!();
167 doctype_states_group!();
168
169 fn state(&self) -> fn(&mut Self, context: &mut Self::Context, &[u8]) -> StateResult;
170 fn set_state(
171 &mut self,
172 state: fn(&mut Self, context: &mut Self::Context, &[u8]) -> StateResult,
173 );
174
175 fn last_start_tag_name_hash(&self) -> LocalNameHash;
176 fn set_last_start_tag_name_hash(&mut self, name_hash: LocalNameHash);
177
178 fn set_last_text_type(&mut self, text_type: TextType);
179 fn last_text_type(&self) -> TextType;
180
181 fn set_cdata_allowed(&mut self, cdata_allowed: bool);
182
183 fn closing_quote(&self) -> u8;
184
185 fn adjust_for_next_input(&mut self);
186 fn adjust_to_bookmark(&mut self, pos: usize, feedback_directive: FeedbackDirective);
187 fn enter_ch_sequence_matching(&mut self);
188 fn leave_ch_sequence_matching(&mut self);
189 fn get_consumed_byte_count(&self, input: &[u8]) -> usize;
190
191 fn consume_ch(&mut self, input: &[u8]) -> Option<u8>;
192 fn consume_until(&mut self, needle: u8, input: &[u8]) -> bool;
194 fn unconsume_ch(&mut self);
195 fn consume_several(&mut self, count: usize);
196 fn lookahead(&self, input: &[u8], offset: usize) -> Option<u8>;
197 fn pos(&self) -> usize;
198 fn set_pos(&mut self, pos: usize);
199 fn is_last_input(&self) -> bool;
200 fn set_is_last_input(&mut self, last: bool);
201
202 fn run_parsing_loop(
203 &mut self,
204 context: &mut Self::Context,
205 input: &[u8],
206 last: bool,
207 ) -> ParseResult {
208 self.set_is_last_input(last);
209
210 loop {
211 self.state()(self, context, input)?;
212 }
213 }
214
215 fn continue_from_bookmark(
216 &mut self,
217 context: &mut Self::Context,
218 input: &[u8],
219 last: bool,
220 bookmark: StateMachineBookmark,
221 ) -> ParseResult {
222 self.set_cdata_allowed(bookmark.cdata_allowed);
223 self.switch_text_type(bookmark.text_type);
224 self.set_last_start_tag_name_hash(bookmark.last_start_tag_name_hash);
225 self.adjust_to_bookmark(bookmark.pos, bookmark.feedback_directive);
226 self.set_pos(bookmark.pos);
227
228 self.run_parsing_loop(context, input, last)
229 }
230
231 #[cold]
232 fn break_on_end_of_input(&mut self, input: &[u8]) -> StateResult {
233 let consumed_byte_count = self.get_consumed_byte_count(input);
234
235 if !self.is_last_input() {
236 self.adjust_for_next_input();
237 }
238
239 self.set_pos(self.pos() - consumed_byte_count);
240
241 Err(Box::new(ActionError::EndOfInput {
242 consumed_byte_count,
243 }))
244 }
245
246 #[inline]
247 fn create_bookmark(
248 &self,
249 pos: usize,
250 feedback_directive: FeedbackDirective,
251 ) -> StateMachineBookmark {
252 StateMachineBookmark {
253 cdata_allowed: self.cdata_allowed(),
254 text_type: self.last_text_type(),
255 last_start_tag_name_hash: self.last_start_tag_name_hash(),
256 pos,
257 feedback_directive,
258 }
259 }
260
261 #[inline]
262 fn change_parser_directive(
263 &self,
264 pos: usize,
265 new_parser_directive: ParserDirective,
266 feedback_directive: FeedbackDirective,
267 ) -> ActionResult {
268 Err(Box::new(ActionError::ParserDirectiveChangeRequired(
269 new_parser_directive,
270 self.create_bookmark(pos, feedback_directive),
271 )))
272 }
273
274 #[inline]
275 fn switch_text_type(&mut self, text_type: TextType) {
276 self.set_last_text_type(text_type);
277 self.set_state(self.next_text_parsing_state());
278 }
279
280 #[inline]
281 fn next_text_parsing_state(&self) -> fn(&mut Self, &mut Self::Context, &[u8]) -> StateResult {
282 match self.last_text_type() {
283 TextType::Data => Self::data_state,
284 TextType::PlainText => Self::plaintext_state,
285 TextType::RCData => Self::rcdata_state,
286 TextType::RawText => Self::rawtext_state,
287 TextType::ScriptData => Self::script_data_state,
288 TextType::CDataSection => Self::cdata_section_state,
289 }
290 }
291}
292
293macro_rules! impl_common_sm_accessors {
294 () => {
295 #[inline]
296 fn set_last_text_type(&mut self, text_type: TextType) {
297 self.last_text_type = text_type;
298 }
299
300 #[inline]
301 fn last_text_type(&self) -> TextType {
302 self.last_text_type
303 }
304
305 #[inline]
306 fn closing_quote(&self) -> u8 {
307 self.closing_quote
308 }
309
310 #[inline]
311 fn last_start_tag_name_hash(&self) -> LocalNameHash {
312 self.last_start_tag_name_hash
313 }
314
315 #[inline]
316 fn set_last_start_tag_name_hash(&mut self, name_hash: LocalNameHash) {
317 self.last_start_tag_name_hash = name_hash;
318 }
319
320 #[inline]
321 fn set_cdata_allowed(&mut self, cdata_allowed: bool) {
322 self.cdata_allowed = cdata_allowed;
323 }
324 };
325}
326
327macro_rules! impl_common_sm_actions {
328 () => {
329 #[inline]
330 fn set_closing_quote_to_double(&mut self, _context: &mut Self::Context, _input: &[u8]) {
331 self.closing_quote = b'"';
332 }
333
334 #[inline]
335 fn set_closing_quote_to_single(&mut self, _context: &mut Self::Context, _input: &[u8]) {
336 self.closing_quote = b'\'';
337 }
338
339 #[inline]
340 fn enter_cdata(&mut self, _context: &mut Self::Context, _input: &[u8]) {
341 self.set_last_text_type(TextType::CDataSection);
342 }
343
344 #[inline]
345 fn leave_cdata(&mut self, _context: &mut Self::Context, _input: &[u8]) {
346 self.set_last_text_type(TextType::Data);
347 }
348 };
349}
350
351macro_rules! impl_common_input_cursor_methods {
352 () => {
353 #[inline]
354 #[allow(clippy::let_and_return)]
355 fn consume_ch(&mut self, input: &[u8]) -> Option<u8> {
356 let ch = input.get(self.next_pos).copied();
357
358 self.next_pos += 1;
359
360 trace!(@chars "consume", ch);
361
362 ch
363 }
364
365 #[inline]
366 fn consume_until(&mut self, needle: u8, input: &[u8]) -> bool {
367 let rest = input.get(self.next_pos..).unwrap_or(&input[..0]);
368
369 match memchr::memchr(needle, rest) {
370 None => {
371 self.next_pos += 1 + rest.len();
372 false
373 },
374 Some(pos) => {
375 self.next_pos += 1 + pos;
376 true
377 }
378 }
379 }
380
381 #[inline]
382 fn unconsume_ch(&mut self) {
383 self.next_pos -= 1;
384
385 trace!(@chars "unconsume");
386 }
387
388 #[inline]
389 fn consume_several(&mut self, count: usize) {
390 self.next_pos += count;
391
392 trace!(@chars "consume several");
393 }
394
395 #[inline]
396 #[allow(clippy::let_and_return)]
397 fn lookahead(&self, input: &[u8], offset: usize) -> Option<u8> {
398 let ch = input.get(self.next_pos + offset - 1).copied();
399
400 trace!(@chars "lookahead", ch);
401
402 ch
403 }
404
405 #[inline]
406 fn pos(&self) -> usize {
407 self.next_pos - 1
408 }
409
410 #[inline]
411 fn set_pos(&mut self, pos: usize) {
412 self.next_pos = pos;
413 }
414
415 #[inline]
416 fn is_last_input(&self) -> bool {
417 self.is_last_input
418 }
419
420 #[inline]
421 fn set_is_last_input(&mut self, last: bool) {
422 self.is_last_input = last;
423 }
424 };
425}
426
427macro_rules! noop_action {
428 ($($fn_name:ident),*) => {
429 $(
430 #[inline]
431 fn $fn_name(&mut self, _context: &mut Self::Context, _input: &[u8]) {
432 trace!(@noop);
433 }
434 )*
435 };
436}
437
438macro_rules! noop_action_with_result {
439 ($($fn_name:ident),*) => {
440 $(
441 #[inline]
442 fn $fn_name(&mut self, _context: &mut Self::Context, _input: &[u8]) -> ActionResult {
443 trace!(@noop);
444
445 Ok(())
446 }
447 )*
448 };
449}