1#[macro_use]
2mod state_machine;
3
4mod lexer;
5mod tag_scanner;
6mod tree_builder_simulator;
7
8use self::lexer::Lexer;
9pub(crate) use self::lexer::{
10 AttributeBuffer, AttributeOutline, Lexeme, LexemeSink, NonTagContentLexeme,
11 NonTagContentTokenOutline, TagLexeme, TagTokenOutline,
12};
13use self::state_machine::StateMachine;
14pub(crate) use self::state_machine::{ActionError, ActionResult};
15pub(crate) use self::tag_scanner::TagHintSink;
16use self::tag_scanner::TagScanner;
17pub use self::tree_builder_simulator::ParsingAmbiguityError;
18use self::tree_builder_simulator::{TreeBuilderFeedback, TreeBuilderSimulator};
19use crate::rewriter::RewritingError;
20use cfg_if::cfg_if;
21
22#[derive(Clone, Copy, Debug)]
27pub(crate) enum ParserDirective {
28 WherePossibleScanForTagsOnly,
29 Lex,
30}
31
32pub(crate) struct ParserContext<S> {
33 output_sink: S,
34 tree_builder_simulator: TreeBuilderSimulator,
35 previously_consumed_byte_count: usize,
38}
39
40pub(crate) trait ParserOutputSink: LexemeSink + TagHintSink {}
41
42pub struct Parser<S> {
44 lexer: Lexer<S>,
45 tag_scanner: TagScanner<S>,
46 current_directive: ParserDirective,
47 context: ParserContext<S>,
48}
49
50#[allow(private_bounds, private_interfaces)]
52impl<S: ParserOutputSink> Parser<S> {
53 #[must_use]
54 #[inline(never)]
55 pub fn new(output_sink: S, initial_directive: ParserDirective, strict: bool) -> Self {
56 let context = ParserContext {
57 output_sink,
58 previously_consumed_byte_count: 0,
59 tree_builder_simulator: TreeBuilderSimulator::new(strict),
60 };
61
62 Self {
63 lexer: Lexer::new(),
64 tag_scanner: TagScanner::new(),
65 current_directive: initial_directive,
66 context,
67 }
68 }
69
70 #[inline(never)]
74 pub fn parse(&mut self, input: &[u8], last: bool) -> Result<usize, RewritingError> {
75 let mut parse_result = match self.current_directive {
76 ParserDirective::WherePossibleScanForTagsOnly => {
77 self.tag_scanner
78 .run_parsing_loop(&mut self.context, input, last)
79 }
80 ParserDirective::Lex => self.lexer.run_parsing_loop(&mut self.context, input, last),
81 };
82
83 loop {
84 let unboxed = match parse_result {
85 Ok(unreachable) => match unreachable {},
86 Err(boxed) => *boxed,
87 };
88 match unboxed {
89 ActionError::EndOfInput {
90 consumed_byte_count,
91 } => {
92 self.context.previously_consumed_byte_count += consumed_byte_count;
93 return Ok(consumed_byte_count);
94 }
95 ActionError::ParserDirectiveChangeRequired(new_directive, sm_bookmark) => {
96 self.current_directive = new_directive;
97
98 trace!(@continue_from_bookmark sm_bookmark, self.current_directive, input);
99
100 parse_result = match self.current_directive {
101 ParserDirective::WherePossibleScanForTagsOnly => self
102 .tag_scanner
103 .continue_from_bookmark(&mut self.context, input, last, sm_bookmark),
104 ParserDirective::Lex => self.lexer.continue_from_bookmark(
105 &mut self.context,
106 input,
107 last,
108 sm_bookmark,
109 ),
110 };
111 }
112 ActionError::RewritingError(err) => return Err(err),
113 ActionError::Internal(err) => {
114 return Err(RewritingError::ContentHandlerError(err.into()));
115 }
116 }
117 }
118 }
119
120 pub fn get_dispatcher(&mut self) -> &mut S {
121 &mut self.context.output_sink
122 }
123}
124
125cfg_if! {
126 if #[cfg(feature = "integration_test")] {
127 use crate::html::{LocalNameHash, TextType};
128
129 #[allow(private_bounds)]
130 impl<S: ParserOutputSink> Parser<S> {
131 pub fn switch_text_type(&mut self, text_type: TextType) {
132 match self.current_directive {
133 ParserDirective::WherePossibleScanForTagsOnly => {
134 self.tag_scanner.switch_text_type(text_type);
135 }
136 ParserDirective::Lex => self.lexer.switch_text_type(text_type),
137 }
138 }
139
140 pub fn set_last_start_tag_name_hash(&mut self, name_hash: LocalNameHash) {
141 match self.current_directive {
142 ParserDirective::WherePossibleScanForTagsOnly => {
143 self.tag_scanner.set_last_start_tag_name_hash(name_hash);
144 }
145 ParserDirective::Lex => self.lexer.set_last_start_tag_name_hash(name_hash),
146 }
147 }
148 }
149 }
150}