lol_html/parser/tag_scanner/
mod.rs1#[macro_use]
2mod actions;
3mod conditions;
4
5use crate::base::{Align, Bytes, Range};
6use crate::html::{LocalName, LocalNameHash, Namespace, TextType};
7use crate::parser::state_machine::{FeedbackDirective, StateMachine, StateResult};
8use crate::parser::{ParserContext, ParserDirective, ParsingAmbiguityError, TreeBuilderFeedback};
9use crate::rewriter::RewritingError;
10use std::cmp::min;
11
12pub(crate) trait TagHintSink {
13 fn handle_start_tag_hint(
14 &mut self,
15 name: LocalName<'_>,
16 ns: Namespace,
17 ) -> Result<ParserDirective, RewritingError>;
18 fn handle_end_tag_hint(
19 &mut self,
20 name: LocalName<'_>,
21 ) -> Result<ParserDirective, RewritingError>;
22}
23
24pub(crate) type State<S> =
25 fn(&mut TagScanner<S>, context: &mut ParserContext<S>, &[u8]) -> StateResult;
26
27pub(crate) struct TagScanner<S> {
39 next_pos: usize,
40 is_last_input: bool,
41 tag_start: Option<usize>,
42 ch_sequence_matching_start: Option<usize>,
43 tag_name_start: usize,
44 is_in_end_tag: bool,
45 tag_name_hash: LocalNameHash,
46 last_start_tag_name_hash: LocalNameHash,
47 cdata_allowed: bool,
48 state: State<S>,
49 closing_quote: u8,
50 pending_text_type_change: Option<TextType>,
51 last_text_type: TextType,
52}
53
54impl<S: TagHintSink> TagScanner<S> {
55 pub fn new() -> Self {
56 Self {
57 next_pos: 0,
58 is_last_input: false,
59 tag_start: None,
60 ch_sequence_matching_start: None,
61 tag_name_start: 0,
62 is_in_end_tag: false,
63 tag_name_hash: LocalNameHash::default(),
64 last_start_tag_name_hash: LocalNameHash::default(),
65 cdata_allowed: false,
66 state: Self::data_state,
67 closing_quote: b'"',
68 pending_text_type_change: None,
69 last_text_type: TextType::Data,
70 }
71 }
72
73 fn emit_tag_hint(
74 &mut self,
75 context: &mut ParserContext<S>,
76 input: &[u8],
77 is_in_end_tag: bool,
78 ) -> Result<ParserDirective, RewritingError> {
79 let name_range = Range {
80 start: self.tag_name_start,
81 end: self.pos(),
82 };
83
84 let input_bytes = Bytes::new(input);
85 let name = LocalName::new(&input_bytes, name_range, self.tag_name_hash);
86
87 trace!(@output name);
88
89 if is_in_end_tag {
90 context.output_sink.handle_end_tag_hint(name)
91 } else {
92 self.last_start_tag_name_hash = self.tag_name_hash;
93
94 let ns = context.tree_builder_simulator.current_ns();
95
96 context.output_sink.handle_start_tag_hint(name, ns)
97 }
98 }
99
100 #[inline]
101 fn try_apply_tree_builder_feedback(
102 &mut self,
103 context: &mut ParserContext<S>,
104 ) -> Result<Option<TreeBuilderFeedback>, ParsingAmbiguityError> {
105 let feedback = if self.is_in_end_tag {
106 context
107 .tree_builder_simulator
108 .get_feedback_for_end_tag(self.tag_name_hash)
109 } else {
110 context
111 .tree_builder_simulator
112 .get_feedback_for_start_tag(self.tag_name_hash)?
113 };
114
115 Ok(match feedback {
116 TreeBuilderFeedback::SwitchTextType(text_type) => {
117 self.pending_text_type_change = Some(text_type);
120 None
121 }
122 TreeBuilderFeedback::SetAllowCdata(cdata_allowed) => {
123 self.cdata_allowed = cdata_allowed;
124 None
125 }
126 TreeBuilderFeedback::RequestLexeme(_) => Some(feedback),
127 TreeBuilderFeedback::None => None,
128 })
129 }
130
131 #[inline]
132 fn take_feedback_directive(&mut self) -> FeedbackDirective {
133 self.pending_text_type_change
134 .take()
135 .map_or(FeedbackDirective::Skip, |text_type| {
136 FeedbackDirective::ApplyUnhandledFeedback(TreeBuilderFeedback::SwitchTextType(
137 text_type,
138 ))
139 })
140 }
141}
142
143impl<S: TagHintSink> StateMachine for TagScanner<S> {
144 impl_common_sm_accessors!();
145 impl_common_input_cursor_methods!();
146
147 #[inline]
148 fn set_state(&mut self, state: State<S>) {
149 self.state = state;
150 }
151
152 #[inline]
153 fn state(&self) -> State<S> {
154 self.state
155 }
156
157 #[inline]
158 fn get_consumed_byte_count(&self, input: &[u8]) -> usize {
159 match (self.tag_start, self.ch_sequence_matching_start) {
166 (Some(tag_start), Some(ch_sequence_matching_start)) => {
167 min(tag_start, ch_sequence_matching_start)
168 }
169 (Some(tag_start), None) => tag_start,
170 (None, Some(ch_sequence_matching_start)) => ch_sequence_matching_start,
171 (None, None) => input.len(),
172 }
173 }
174
175 fn adjust_for_next_input(&mut self) {
176 if let Some(tag_start) = self.tag_start {
177 self.tag_name_start.align(tag_start);
178 self.tag_start = Some(0);
179 }
180 }
181
182 #[inline]
183 fn adjust_to_bookmark(&mut self, _pos: usize, _feedback_directive: FeedbackDirective) {
184 trace!(@noop);
185 }
186
187 #[inline]
188 fn enter_ch_sequence_matching(&mut self) {
189 self.ch_sequence_matching_start = Some(self.pos());
190 }
191
192 #[inline]
193 fn leave_ch_sequence_matching(&mut self) {
194 self.ch_sequence_matching_start = None;
195 }
196}