1use std::collections::VecDeque;
39use std::convert::Infallible;
40use std::mem::swap;
41
42use crate::utils::trace_log;
43use crate::{naive_next_state, Emitter, Error, State};
44
45#[derive(Debug)]
50pub enum CallbackEvent<'a> {
51 OpenStartTag {
56 name: &'a [u8],
58 },
59
60 AttributeName {
64 name: &'a [u8],
66 },
67
68 AttributeValue {
75 value: &'a [u8],
77 },
78
79 CloseStartTag {
82 self_closing: bool,
87 },
88
89 EndTag {
95 name: &'a [u8],
97 },
98
99 String {
105 value: &'a [u8],
107 },
108
109 Comment {
111 value: &'a [u8],
113 },
114
115 Doctype {
117 name: &'a [u8],
119 public_identifier: Option<&'a [u8]>,
121 system_identifier: Option<&'a [u8]>,
123 force_quirks: bool,
125 },
126
127 Error(Error),
129}
130
131#[derive(Debug, Clone, Copy)]
132enum CurrentTag {
133 Start,
134 End,
135}
136
137#[derive(Debug)]
138struct CallbackState<F, T> {
139 callback: F,
140 emitted_tokens: VecDeque<T>,
141}
142
143pub trait Callback<T> {
147 fn handle_event(&mut self, event: CallbackEvent<'_>) -> Option<T>;
150}
151
152impl<T, F> Callback<T> for F
153where
154 F: FnMut(CallbackEvent<'_>) -> Option<T>,
155{
156 fn handle_event(&mut self, event: CallbackEvent<'_>) -> Option<T> {
157 self(event)
158 }
159}
160
161impl<F, T> CallbackState<F, T>
162where
163 F: Callback<T>,
164{
165 fn emit_event(&mut self, event: CallbackEvent<'_>) {
166 let res = self.callback.handle_event(event);
167 if let Some(token) = res {
168 self.emitted_tokens.push_front(token);
169 }
170 }
171}
172
173impl<F, T> Default for CallbackState<F, T>
174where
175 F: Default,
176{
177 fn default() -> Self {
178 CallbackState {
179 callback: F::default(),
180 emitted_tokens: VecDeque::default(),
181 }
182 }
183}
184
185#[derive(Debug, Default)]
186struct EmitterState {
187 naively_switch_states: bool,
188
189 current_characters: Vec<u8>,
190 current_comment: Vec<u8>,
191
192 last_start_tag: Vec<u8>,
193 current_tag_had_attributes: bool,
194 current_tag_type: Option<CurrentTag>,
195 current_tag_self_closing: bool,
196 current_tag_name: Vec<u8>,
197 current_attribute_name: Vec<u8>,
198 current_attribute_value: Vec<u8>,
199
200 doctype_name: Vec<u8>,
202 doctype_has_public_identifier: bool,
203 doctype_has_system_identifier: bool,
204 doctype_public_identifier: Vec<u8>,
205 doctype_system_identifier: Vec<u8>,
206 doctype_force_quirks: bool,
207}
208
209#[derive(Debug)]
212pub struct CallbackEmitter<F, T = Infallible> {
213 callback_state: CallbackState<F, T>,
216 emitter_state: EmitterState,
217}
218
219impl<F, T> Default for CallbackEmitter<F, T>
220where
221 F: Default,
222{
223 fn default() -> Self {
224 CallbackEmitter {
225 callback_state: CallbackState::default(),
226 emitter_state: EmitterState::default(),
227 }
228 }
229}
230
231impl<F, T> CallbackEmitter<F, T>
232where
233 F: Callback<T>,
234{
235 pub fn new(callback: F) -> Self {
240 CallbackEmitter {
241 callback_state: CallbackState {
242 callback,
243 emitted_tokens: VecDeque::new(),
244 },
245 emitter_state: EmitterState::default(),
246 }
247 }
248
249 pub fn callback_mut(&mut self) -> &mut F {
251 &mut self.callback_state.callback
252 }
253
254 pub fn naively_switch_states(&mut self, yes: bool) {
258 self.emitter_state.naively_switch_states = yes;
259 }
260
261 fn flush_attribute_name(&mut self) {
262 if !self.emitter_state.current_attribute_name.is_empty() {
263 self.callback_state
264 .emit_event(CallbackEvent::AttributeName {
265 name: &self.emitter_state.current_attribute_name,
266 });
267 self.emitter_state.current_attribute_name.clear();
268 }
269 }
270
271 fn flush_attribute(&mut self) {
272 self.flush_attribute_name();
273
274 if !self.emitter_state.current_attribute_value.is_empty() {
275 self.callback_state
276 .emit_event(CallbackEvent::AttributeValue {
277 value: &self.emitter_state.current_attribute_value,
278 });
279 self.emitter_state.current_attribute_value.clear();
280 }
281 }
282
283 fn flush_open_start_tag(&mut self) {
284 if matches!(self.emitter_state.current_tag_type, Some(CurrentTag::Start))
285 && !self.emitter_state.current_tag_name.is_empty()
286 {
287 self.callback_state.emit_event(CallbackEvent::OpenStartTag {
288 name: &self.emitter_state.current_tag_name,
289 });
290
291 self.emitter_state.last_start_tag.clear();
292 swap(
293 &mut self.emitter_state.last_start_tag,
294 &mut self.emitter_state.current_tag_name,
295 );
296 }
297 }
298
299 fn flush_current_characters(&mut self) {
300 if self.emitter_state.current_characters.is_empty() {
301 return;
302 }
303
304 self.callback_state.emit_event(CallbackEvent::String {
305 value: &self.emitter_state.current_characters,
306 });
307 self.emitter_state.current_characters.clear();
308 }
309}
310impl<F, T> Emitter for CallbackEmitter<F, T>
311where
312 F: Callback<T>,
313{
314 type Token = T;
315
316 fn set_last_start_tag(&mut self, last_start_tag: Option<&[u8]>) {
317 self.emitter_state.last_start_tag.clear();
318 self.emitter_state
319 .last_start_tag
320 .extend(last_start_tag.unwrap_or_default());
321 }
322
323 fn emit_eof(&mut self) {
324 self.flush_current_characters();
325 }
326
327 fn emit_error(&mut self, error: Error) {
328 self.callback_state.emit_event(CallbackEvent::Error(error));
329 }
330
331 fn pop_token(&mut self) -> Option<Self::Token> {
332 self.callback_state.emitted_tokens.pop_back()
333 }
334
335 fn emit_string(&mut self, s: &[u8]) {
336 crate::utils::trace_log!("callbacks: emit_string, len={}", s.len());
337 self.emitter_state.current_characters.extend(s);
338 }
339
340 fn init_start_tag(&mut self) {
341 self.emitter_state.current_tag_name.clear();
342 self.emitter_state.current_tag_type = Some(CurrentTag::Start);
343 self.emitter_state.current_tag_self_closing = false;
344 }
345
346 fn init_end_tag(&mut self) {
347 self.emitter_state.current_tag_name.clear();
348 self.emitter_state.current_tag_type = Some(CurrentTag::End);
349 self.emitter_state.current_tag_had_attributes = false;
350 }
351
352 fn init_comment(&mut self) {
353 self.flush_current_characters();
354 self.emitter_state.current_comment.clear();
355 }
356
357 fn emit_current_tag(&mut self) -> Option<State> {
358 self.flush_attribute();
359 self.flush_current_characters();
360 match self.emitter_state.current_tag_type {
361 Some(CurrentTag::Start) => {
362 self.flush_open_start_tag();
363 self.callback_state
364 .emit_event(CallbackEvent::CloseStartTag {
365 self_closing: self.emitter_state.current_tag_self_closing,
366 });
367 }
368 Some(CurrentTag::End) => {
369 if self.emitter_state.current_tag_had_attributes {
370 self.emit_error(Error::EndTagWithAttributes);
371 }
372 self.emitter_state.last_start_tag.clear();
373 self.callback_state.emit_event(CallbackEvent::EndTag {
374 name: &self.emitter_state.current_tag_name,
375 });
376 }
377 _ => {}
378 }
379
380 if self.emitter_state.naively_switch_states {
381 naive_next_state(&self.emitter_state.last_start_tag)
382 } else {
383 None
384 }
385 }
386 fn emit_current_comment(&mut self) {
387 self.callback_state.emit_event(CallbackEvent::Comment {
388 value: &self.emitter_state.current_comment,
389 });
390 self.emitter_state.current_comment.clear();
391 }
392
393 fn emit_current_doctype(&mut self) {
394 self.callback_state.emit_event(CallbackEvent::Doctype {
395 name: &self.emitter_state.doctype_name,
396 public_identifier: if self.emitter_state.doctype_has_public_identifier {
397 Some(&self.emitter_state.doctype_public_identifier)
398 } else {
399 None
400 },
401 system_identifier: if self.emitter_state.doctype_has_system_identifier {
402 Some(&self.emitter_state.doctype_system_identifier)
403 } else {
404 None
405 },
406 force_quirks: self.emitter_state.doctype_force_quirks,
407 });
408 }
409
410 fn set_self_closing(&mut self) {
411 trace_log!("set_self_closing");
412 if matches!(self.emitter_state.current_tag_type, Some(CurrentTag::End)) {
413 self.callback_state
414 .emit_event(CallbackEvent::Error(Error::EndTagWithTrailingSolidus));
415 } else {
416 self.emitter_state.current_tag_self_closing = true;
417 }
418 }
419
420 fn set_force_quirks(&mut self) {
421 self.emitter_state.doctype_force_quirks = true;
422 }
423
424 fn push_tag_name(&mut self, s: &[u8]) {
425 self.emitter_state.current_tag_name.extend(s);
426 }
427
428 fn push_comment(&mut self, s: &[u8]) {
429 self.emitter_state.current_comment.extend(s);
430 }
431
432 fn push_doctype_name(&mut self, s: &[u8]) {
433 self.emitter_state.doctype_name.extend(s);
434 }
435
436 fn init_doctype(&mut self) {
437 self.flush_current_characters();
438 self.emitter_state.doctype_name.clear();
439 self.emitter_state.doctype_has_public_identifier = false;
440 self.emitter_state.doctype_has_system_identifier = false;
441 self.emitter_state.doctype_public_identifier.clear();
442 self.emitter_state.doctype_system_identifier.clear();
443 self.emitter_state.doctype_force_quirks = false;
444 }
445
446 fn init_attribute(&mut self) {
447 self.flush_open_start_tag();
448 self.flush_attribute();
449 self.emitter_state.current_tag_had_attributes = true;
450 }
451
452 fn push_attribute_name(&mut self, s: &[u8]) {
453 self.emitter_state.current_attribute_name.extend(s);
454 }
455
456 fn push_attribute_value(&mut self, s: &[u8]) {
457 self.flush_attribute_name();
458 self.emitter_state.current_attribute_value.extend(s);
459 }
460
461 fn set_doctype_public_identifier(&mut self, value: &[u8]) {
462 self.emitter_state.doctype_has_public_identifier = true;
463 self.emitter_state.doctype_public_identifier.clear();
464 self.emitter_state.doctype_public_identifier.extend(value);
465 }
466 fn set_doctype_system_identifier(&mut self, value: &[u8]) {
467 self.emitter_state.doctype_has_system_identifier = true;
468 self.emitter_state.doctype_system_identifier.clear();
469 self.emitter_state.doctype_system_identifier.extend(value);
470 }
471 fn push_doctype_public_identifier(&mut self, value: &[u8]) {
472 self.emitter_state.doctype_public_identifier.extend(value);
473 }
474 fn push_doctype_system_identifier(&mut self, value: &[u8]) {
475 self.emitter_state.doctype_system_identifier.extend(value);
476 }
477
478 fn current_is_appropriate_end_tag_token(&mut self) -> bool {
479 if self.emitter_state.last_start_tag.is_empty() {
480 crate::utils::trace_log!(
481 "current_is_appropriate_end_tag_token: no, because last_start_tag is empty"
482 );
483 return false;
484 }
485
486 if !matches!(self.emitter_state.current_tag_type, Some(CurrentTag::End)) {
487 crate::utils::trace_log!(
488 "current_is_appropriate_end_tag_token: no, because current_tag_type is not end"
489 );
490 return false;
491 }
492
493 crate::utils::trace_log!(
494 "current_is_appropriate_end_tag_token: last_start_tag = {:?}",
495 self.emitter_state.last_start_tag
496 );
497 crate::utils::trace_log!(
498 "current_is_appropriate_end_tag_token: current_tag = {:?}",
499 self.emitter_state.current_tag_name
500 );
501 self.emitter_state.last_start_tag == self.emitter_state.current_tag_name
502 }
503}