html5tokenizer/tokenizer/
machine.rs

1mod utils;
2
3use crate::entities::try_read_character_reference;
4use crate::offset::{Offset, Position};
5use crate::trace::AttrValueSyntax;
6use crate::{reader::Reader, Emitter, Error};
7use utils::{
8    ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
9};
10
11pub use utils::State;
12
13pub(super) struct Machine<R, O, E> {
14    pub(super) state: State,
15    pub(super) emitter: E,
16    temporary_buffer: String,
17    reader: R,
18    to_reconsume: Stack2<Option<char>>,
19    character_reference_code: u32,
20    return_state: Option<State>,
21    current_tag_name: String,
22    pub(super) last_start_tag_name: String,
23    is_start_tag: bool,
24    /// The reader position before the match block in [`consume`].
25    position_before_match: O,
26    /// * Set to the offset of `<` in [`State::Data`].
27    /// * Set to the offset of `-` in [`State::Comment`].
28    /// * Set to the offset of `[` in [`State::CdataSectionBracket`].
29    /// * Set to the offset of `&` in [`State::CharacterReference`].
30    some_offset: O,
31    /// This boolean flag exists so that the [`NaiveParser`](crate::NaiveParser) can work with any [`Emitter`]
32    /// (it cannot call [`Tokenizer::set_state`] using the emitted start tags since they can be of an arbitrary type).
33    ///
34    /// [`Tokenizer::set_state`]: super::Tokenizer::set_state
35    pub(crate) naively_switch_state: bool,
36}
37
38impl<R, O, E> Machine<R, O, E>
39where
40    R: Reader + Position<O>,
41    O: Offset,
42    E: Emitter<O>,
43{
44    pub fn new(reader: R, emitter: E) -> Self {
45        Self {
46            reader,
47            emitter,
48            state: State::Data,
49            to_reconsume: Stack2::default(),
50            return_state: None,
51            temporary_buffer: String::new(),
52            character_reference_code: 0,
53            current_tag_name: String::new(),
54            last_start_tag_name: String::new(),
55            is_start_tag: false,
56            position_before_match: O::default(),
57            some_offset: O::default(),
58            naively_switch_state: false,
59        }
60    }
61}
62
63pub enum ControlToken {
64    Eof,
65    Continue,
66    CdataOpen,
67}
68
69#[inline]
70pub(super) fn consume<O, R, E>(slf: &mut Machine<R, O, E>) -> Result<ControlToken, R::Error>
71where
72    O: Offset,
73    R: Reader + Position<O>,
74    E: Emitter<O>,
75{
76    macro_rules! mutate_character_reference {
77        (* $mul:literal + $x:ident - $sub:literal) => {
78            match slf
79                .character_reference_code
80                .checked_mul($mul)
81                .and_then(|cr| cr.checked_add($x as u32 - $sub))
82            {
83                Some(cr) => slf.character_reference_code = cr,
84                None => {
85                    // provoke err
86                    slf.character_reference_code = 0x110000;
87                }
88            };
89        };
90    }
91
92    slf.position_before_match = slf.reader.position();
93
94    match slf.state {
95        State::Data => match slf.read_char()? {
96            Some('&') => {
97                slf.return_state = Some(slf.state);
98                slf.state = State::CharacterReference;
99                Ok(ControlToken::Continue)
100            }
101            Some('<') => {
102                slf.some_offset = slf.position_before_match;
103                slf.state = State::TagOpen;
104                Ok(ControlToken::Continue)
105            }
106            Some('\0') => {
107                slf.emit_error(Error::UnexpectedNullCharacter);
108                slf.emit_char('\0');
109                Ok(ControlToken::Continue)
110            }
111            Some(x) => {
112                slf.emit_char(x);
113                Ok(ControlToken::Continue)
114            }
115            None => Ok(ControlToken::Eof),
116        },
117        State::Rcdata => match slf.read_char()? {
118            Some('&') => {
119                slf.return_state = Some(State::Rcdata);
120                slf.state = State::CharacterReference;
121                Ok(ControlToken::Continue)
122            }
123            Some('<') => {
124                slf.state = State::RcdataLessThanSign;
125                Ok(ControlToken::Continue)
126            }
127            Some('\0') => {
128                slf.emit_error(Error::UnexpectedNullCharacter);
129                slf.emit_char_for_source_char('\u{fffd}', '\0');
130                Ok(ControlToken::Continue)
131            }
132            Some(x) => {
133                slf.emit_char(x);
134                Ok(ControlToken::Continue)
135            }
136            None => Ok(ControlToken::Eof),
137        },
138        State::Rawtext => match slf.read_char()? {
139            Some('<') => {
140                slf.state = State::RawTextLessThanSign;
141                Ok(ControlToken::Continue)
142            }
143            Some('\0') => {
144                slf.emit_error(Error::UnexpectedNullCharacter);
145                slf.emit_char_for_source_char('\u{fffd}', '\0');
146                Ok(ControlToken::Continue)
147            }
148            Some(x) => {
149                slf.emit_char(x);
150                Ok(ControlToken::Continue)
151            }
152            None => Ok(ControlToken::Eof),
153        },
154        State::ScriptData => match slf.read_char()? {
155            Some('<') => {
156                slf.state = State::ScriptDataLessThanSign;
157                Ok(ControlToken::Continue)
158            }
159            Some('\0') => {
160                slf.emit_error(Error::UnexpectedNullCharacter);
161                slf.emit_char_for_source_char('\u{fffd}', '\0');
162                Ok(ControlToken::Continue)
163            }
164            Some(x) => {
165                slf.emit_char(x);
166                Ok(ControlToken::Continue)
167            }
168            None => Ok(ControlToken::Eof),
169        },
170        State::Plaintext => match slf.read_char()? {
171            Some('\0') => {
172                slf.emit_error(Error::UnexpectedNullCharacter);
173                slf.emit_char_for_source_char('\u{fffd}', '\0');
174                Ok(ControlToken::Continue)
175            }
176            Some(x) => {
177                slf.emit_char(x);
178                Ok(ControlToken::Continue)
179            }
180            None => Ok(ControlToken::Eof),
181        },
182        State::TagOpen => match slf.read_char()? {
183            Some('!') => {
184                slf.state = State::MarkupDeclarationOpen;
185                Ok(ControlToken::Continue)
186            }
187            Some('/') => {
188                slf.state = State::EndTagOpen;
189                Ok(ControlToken::Continue)
190            }
191            Some(x) if x.is_ascii_alphabetic() => {
192                slf.init_start_tag();
193                slf.state = State::TagName;
194                slf.unread_char(Some(x));
195                Ok(ControlToken::Continue)
196            }
197            c @ Some('?') => {
198                slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
199                slf.emitter.init_comment(slf.reader.position());
200                slf.state = State::BogusComment;
201                slf.unread_char(c);
202                Ok(ControlToken::Continue)
203            }
204            None => {
205                slf.emit_error(Error::EofBeforeTagName);
206                slf.emit_char('<');
207                Ok(ControlToken::Eof)
208            }
209            c @ Some(_) => {
210                slf.emit_error(Error::InvalidFirstCharacterOfTagName);
211                slf.state = State::Data;
212                slf.emit_char('<');
213                slf.unread_char(c);
214                Ok(ControlToken::Continue)
215            }
216        },
217        State::EndTagOpen => match slf.read_char()? {
218            Some(x) if x.is_ascii_alphabetic() => {
219                slf.init_end_tag();
220                slf.state = State::TagName;
221                slf.unread_char(Some(x));
222                Ok(ControlToken::Continue)
223            }
224            Some('>') => {
225                slf.emit_error(Error::MissingEndTagName);
226                slf.state = State::Data;
227                Ok(ControlToken::Continue)
228            }
229            None => {
230                slf.emit_error(Error::EofBeforeTagName);
231                slf.emit_chars(b"</");
232                Ok(ControlToken::Eof)
233            }
234            Some(x) => {
235                slf.emit_error(Error::InvalidFirstCharacterOfTagName);
236                slf.emitter.init_comment(slf.reader.position());
237                slf.state = State::BogusComment;
238                slf.unread_char(Some(x));
239                Ok(ControlToken::Continue)
240            }
241        },
242        State::TagName => match slf.read_char()? {
243            Some(whitespace_pat!()) => {
244                slf.emitter.terminate_tag_name(slf.position_before_match);
245                slf.state = State::BeforeAttributeName;
246                Ok(ControlToken::Continue)
247            }
248            Some('/') => {
249                slf.emitter.terminate_tag_name(slf.position_before_match);
250                slf.state = State::SelfClosingStartTag;
251                Ok(ControlToken::Continue)
252            }
253            Some('>') => {
254                slf.emitter.terminate_tag_name(slf.position_before_match);
255                slf.state = State::Data;
256                slf.emit_current_tag();
257                Ok(ControlToken::Continue)
258            }
259            Some('\0') => {
260                slf.emit_error(Error::UnexpectedNullCharacter);
261                slf.push_tag_name("\u{fffd}");
262                Ok(ControlToken::Continue)
263            }
264            Some(x) => {
265                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
266                Ok(ControlToken::Continue)
267            }
268            None => {
269                slf.emit_error(Error::EofInTag);
270                Ok(ControlToken::Eof)
271            }
272        },
273        State::RcdataLessThanSign => match slf.read_char()? {
274            Some('/') => {
275                slf.temporary_buffer.clear();
276                slf.state = State::RcdataEndTagOpen;
277                Ok(ControlToken::Continue)
278            }
279            c => {
280                slf.emit_char('<');
281                slf.state = State::Rcdata;
282                slf.unread_char(c);
283                Ok(ControlToken::Continue)
284            }
285        },
286        State::RcdataEndTagOpen => match slf.read_char()? {
287            Some(x) if x.is_ascii_alphabetic() => {
288                slf.init_end_tag();
289                slf.state = State::RcdataEndTagName;
290                slf.unread_char(Some(x));
291                Ok(ControlToken::Continue)
292            }
293            c => {
294                slf.emit_chars(b"</");
295                slf.state = State::Rcdata;
296                slf.unread_char(c);
297                Ok(ControlToken::Continue)
298            }
299        },
300        State::RcdataEndTagName => match slf.read_char()? {
301            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
302                slf.state = State::BeforeAttributeName;
303                Ok(ControlToken::Continue)
304            }
305            Some('/') if slf.current_end_tag_is_appropriate() => {
306                slf.state = State::SelfClosingStartTag;
307                Ok(ControlToken::Continue)
308            }
309            Some('>') if slf.current_end_tag_is_appropriate() => {
310                slf.state = State::Data;
311                slf.emit_current_tag();
312                Ok(ControlToken::Continue)
313            }
314            Some(x) if x.is_ascii_alphabetic() => {
315                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
316                slf.temporary_buffer.push(x);
317                Ok(ControlToken::Continue)
318            }
319            c => {
320                slf.emit_chars(b"</");
321                slf.flush_buffer_characters();
322
323                slf.state = State::Rcdata;
324                slf.unread_char(c);
325                Ok(ControlToken::Continue)
326            }
327        },
328        State::RawTextLessThanSign => match slf.read_char()? {
329            Some('/') => {
330                slf.temporary_buffer.clear();
331                slf.state = State::RawTextEndTagOpen;
332                Ok(ControlToken::Continue)
333            }
334            c => {
335                slf.emit_char('<');
336                slf.state = State::Rawtext;
337                slf.unread_char(c);
338                Ok(ControlToken::Continue)
339            }
340        },
341        State::RawTextEndTagOpen => match slf.read_char()? {
342            Some(x) if x.is_ascii_alphabetic() => {
343                slf.init_end_tag();
344                slf.state = State::RawTextEndTagName;
345                slf.unread_char(Some(x));
346                Ok(ControlToken::Continue)
347            }
348            c => {
349                slf.emit_chars(b"</");
350                slf.state = State::Rawtext;
351                slf.unread_char(c);
352                Ok(ControlToken::Continue)
353            }
354        },
355        State::RawTextEndTagName => match slf.read_char()? {
356            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
357                slf.state = State::BeforeAttributeName;
358                Ok(ControlToken::Continue)
359            }
360            Some('/') if slf.current_end_tag_is_appropriate() => {
361                slf.state = State::SelfClosingStartTag;
362                Ok(ControlToken::Continue)
363            }
364            Some('>') if slf.current_end_tag_is_appropriate() => {
365                slf.state = State::Data;
366                slf.emit_current_tag();
367                Ok(ControlToken::Continue)
368            }
369            Some(x) if x.is_ascii_alphabetic() => {
370                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
371                slf.temporary_buffer.push(x);
372                Ok(ControlToken::Continue)
373            }
374            c => {
375                slf.emit_chars(b"</");
376                slf.flush_buffer_characters();
377
378                slf.state = State::Rawtext;
379                slf.unread_char(c);
380                Ok(ControlToken::Continue)
381            }
382        },
383        State::ScriptDataLessThanSign => match slf.read_char()? {
384            Some('/') => {
385                slf.temporary_buffer.clear();
386                slf.state = State::ScriptDataEndTagOpen;
387                Ok(ControlToken::Continue)
388            }
389            Some('!') => {
390                slf.state = State::ScriptDataEscapeStart;
391                slf.emit_chars(b"<!");
392                Ok(ControlToken::Continue)
393            }
394            c => {
395                slf.emit_char('<');
396                slf.state = State::ScriptData;
397                slf.unread_char(c);
398                Ok(ControlToken::Continue)
399            }
400        },
401        State::ScriptDataEndTagOpen => match slf.read_char()? {
402            Some(x) if x.is_ascii_alphabetic() => {
403                slf.init_end_tag();
404                slf.state = State::ScriptDataEndTagName;
405                slf.unread_char(Some(x));
406                Ok(ControlToken::Continue)
407            }
408            c => {
409                slf.emit_chars(b"</");
410                slf.state = State::ScriptData;
411                slf.unread_char(c);
412                Ok(ControlToken::Continue)
413            }
414        },
415        State::ScriptDataEndTagName => match slf.read_char()? {
416            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
417                slf.state = State::BeforeAttributeName;
418                Ok(ControlToken::Continue)
419            }
420            Some('/') if slf.current_end_tag_is_appropriate() => {
421                slf.state = State::SelfClosingStartTag;
422                Ok(ControlToken::Continue)
423            }
424            Some('>') if slf.current_end_tag_is_appropriate() => {
425                slf.state = State::Data;
426                slf.emit_current_tag();
427                Ok(ControlToken::Continue)
428            }
429            Some(x) if x.is_ascii_alphabetic() => {
430                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
431                slf.temporary_buffer.push(x);
432                Ok(ControlToken::Continue)
433            }
434            c => {
435                slf.emit_chars(b"</");
436                slf.flush_buffer_characters();
437                slf.state = State::Data;
438                slf.unread_char(c);
439                Ok(ControlToken::Continue)
440            }
441        },
442        State::ScriptDataEscapeStart => match slf.read_char()? {
443            Some('-') => {
444                slf.state = State::ScriptDataEscapeStartDash;
445                slf.emit_char('-');
446                Ok(ControlToken::Continue)
447            }
448            c => {
449                slf.state = State::ScriptData;
450                slf.unread_char(c);
451                Ok(ControlToken::Continue)
452            }
453        },
454        State::ScriptDataEscapeStartDash => match slf.read_char()? {
455            Some('-') => {
456                slf.state = State::ScriptDataEscapedDashDash;
457                slf.emit_char('-');
458                Ok(ControlToken::Continue)
459            }
460            c => {
461                slf.state = State::ScriptData;
462                slf.unread_char(c);
463                Ok(ControlToken::Continue)
464            }
465        },
466        State::ScriptDataEscaped => match slf.read_char()? {
467            Some('-') => {
468                slf.state = State::ScriptDataEscapedDash;
469                slf.emit_char('-');
470                Ok(ControlToken::Continue)
471            }
472            Some('<') => {
473                slf.state = State::ScriptDataEscapedLessThanSign;
474                Ok(ControlToken::Continue)
475            }
476            Some('\0') => {
477                slf.emit_error(Error::UnexpectedNullCharacter);
478                slf.emit_char_for_source_char('\u{fffd}', '\0');
479                Ok(ControlToken::Continue)
480            }
481            None => {
482                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
483                Ok(ControlToken::Eof)
484            }
485            Some(x) => {
486                slf.emit_char(x);
487                Ok(ControlToken::Continue)
488            }
489        },
490        State::ScriptDataEscapedDash => match slf.read_char()? {
491            Some('-') => {
492                slf.state = State::ScriptDataEscapedDashDash;
493                slf.emit_char('-');
494                Ok(ControlToken::Continue)
495            }
496            Some('<') => {
497                slf.state = State::ScriptDataEscapedLessThanSign;
498                Ok(ControlToken::Continue)
499            }
500            Some('\0') => {
501                slf.emit_error(Error::UnexpectedNullCharacter);
502                slf.state = State::ScriptDataEscaped;
503                slf.emit_char_for_source_char('\u{fffd}', '\0');
504                Ok(ControlToken::Continue)
505            }
506            None => {
507                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
508                Ok(ControlToken::Eof)
509            }
510            Some(x) => {
511                slf.state = State::ScriptDataEscaped;
512                slf.emit_char(x);
513                Ok(ControlToken::Continue)
514            }
515        },
516        State::ScriptDataEscapedDashDash => match slf.read_char()? {
517            Some('-') => {
518                slf.emit_char('-');
519                Ok(ControlToken::Continue)
520            }
521            Some('<') => {
522                slf.state = State::ScriptDataEscapedLessThanSign;
523                Ok(ControlToken::Continue)
524            }
525            Some('>') => {
526                slf.state = State::ScriptData;
527                slf.emit_char('>');
528                Ok(ControlToken::Continue)
529            }
530            Some('\0') => {
531                slf.emit_error(Error::UnexpectedNullCharacter);
532                slf.state = State::ScriptDataEscaped;
533                slf.emit_char_for_source_char('\u{fffd}', '\0');
534                Ok(ControlToken::Continue)
535            }
536            None => {
537                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
538                Ok(ControlToken::Eof)
539            }
540            Some(x) => {
541                slf.state = State::ScriptDataEscaped;
542                slf.emit_char(x);
543                Ok(ControlToken::Continue)
544            }
545        },
546        State::ScriptDataEscapedLessThanSign => match slf.read_char()? {
547            Some('/') => {
548                slf.temporary_buffer.clear();
549                slf.state = State::ScriptDataEscapedEndTagOpen;
550                Ok(ControlToken::Continue)
551            }
552            Some(x) if x.is_ascii_alphabetic() => {
553                slf.temporary_buffer.clear();
554                slf.emit_char('<');
555                slf.state = State::ScriptDataDoubleEscapeStart;
556                slf.unread_char(Some(x));
557                Ok(ControlToken::Continue)
558            }
559            c => {
560                slf.emit_char('<');
561                slf.state = State::ScriptDataEscaped;
562                slf.unread_char(c);
563                Ok(ControlToken::Continue)
564            }
565        },
566        State::ScriptDataEscapedEndTagOpen => match slf.read_char()? {
567            Some(x) if x.is_ascii_alphabetic() => {
568                slf.init_end_tag();
569                slf.state = State::ScriptDataEscapedEndTagName;
570                slf.unread_char(Some(x));
571                Ok(ControlToken::Continue)
572            }
573            c => {
574                slf.emit_chars(b"</");
575                slf.unread_char(c);
576                slf.state = State::ScriptDataEscaped;
577                Ok(ControlToken::Continue)
578            }
579        },
580        State::ScriptDataEscapedEndTagName => match slf.read_char()? {
581            Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
582                slf.state = State::BeforeAttributeName;
583                Ok(ControlToken::Continue)
584            }
585            Some('/') if slf.current_end_tag_is_appropriate() => {
586                slf.state = State::SelfClosingStartTag;
587                Ok(ControlToken::Continue)
588            }
589            Some('>') if slf.current_end_tag_is_appropriate() => {
590                slf.state = State::Data;
591                slf.emit_current_tag();
592                Ok(ControlToken::Continue)
593            }
594            Some(x) if x.is_ascii_alphabetic() => {
595                slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
596                slf.temporary_buffer.push(x);
597                Ok(ControlToken::Continue)
598            }
599            c => {
600                slf.emit_chars(b"</");
601                slf.flush_buffer_characters();
602                slf.state = State::ScriptDataEscaped;
603                slf.unread_char(c);
604                Ok(ControlToken::Continue)
605            }
606        },
607        State::ScriptDataDoubleEscapeStart => match slf.read_char()? {
608            Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
609                if slf.temporary_buffer == "script" {
610                    slf.state = State::ScriptDataDoubleEscaped;
611                } else {
612                    slf.state = State::ScriptDataEscaped;
613                }
614                slf.emit_char(x);
615                Ok(ControlToken::Continue)
616            }
617            Some(x) if x.is_ascii_alphabetic() => {
618                slf.temporary_buffer.push(x.to_ascii_lowercase());
619                slf.emit_char(x);
620                Ok(ControlToken::Continue)
621            }
622            c => {
623                slf.state = State::ScriptDataEscaped;
624                slf.unread_char(c);
625                Ok(ControlToken::Continue)
626            }
627        },
628        State::ScriptDataDoubleEscaped => match slf.read_char()? {
629            Some('-') => {
630                slf.state = State::ScriptDataDoubleEscapedDash;
631                slf.emit_char('-');
632                Ok(ControlToken::Continue)
633            }
634            Some('<') => {
635                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
636                slf.emit_char('<');
637                Ok(ControlToken::Continue)
638            }
639            Some('\0') => {
640                slf.emit_error(Error::UnexpectedNullCharacter);
641                slf.emit_char_for_source_char('\u{fffd}', '\0');
642                Ok(ControlToken::Continue)
643            }
644            None => {
645                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
646                Ok(ControlToken::Eof)
647            }
648            Some(x) => {
649                slf.emit_char(x);
650                Ok(ControlToken::Continue)
651            }
652        },
653        State::ScriptDataDoubleEscapedDash => match slf.read_char()? {
654            Some('-') => {
655                slf.state = State::ScriptDataDoubleEscapedDashDash;
656                slf.emit_char('-');
657                Ok(ControlToken::Continue)
658            }
659            Some('<') => {
660                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
661                slf.emit_char('<');
662                Ok(ControlToken::Continue)
663            }
664            Some('\0') => {
665                slf.emit_error(Error::UnexpectedNullCharacter);
666                slf.state = State::ScriptDataDoubleEscaped;
667                slf.emit_char_for_source_char('\u{fffd}', '\0');
668                Ok(ControlToken::Continue)
669            }
670            None => {
671                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
672                Ok(ControlToken::Eof)
673            }
674            Some(x) => {
675                slf.state = State::ScriptDataDoubleEscaped;
676                slf.emit_char(x);
677                Ok(ControlToken::Continue)
678            }
679        },
680        State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? {
681            Some('-') => {
682                slf.emit_char('-');
683                Ok(ControlToken::Continue)
684            }
685            Some('<') => {
686                slf.emit_char('<');
687                slf.state = State::ScriptDataDoubleEscapedLessThanSign;
688                Ok(ControlToken::Continue)
689            }
690            Some('>') => {
691                slf.emit_char('>');
692                slf.state = State::ScriptData;
693                Ok(ControlToken::Continue)
694            }
695            Some('\0') => {
696                slf.emit_error(Error::UnexpectedNullCharacter);
697                slf.state = State::ScriptDataDoubleEscaped;
698                slf.emit_char_for_source_char('\u{fffd}', '\0');
699                Ok(ControlToken::Continue)
700            }
701            None => {
702                slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
703                Ok(ControlToken::Eof)
704            }
705            Some(x) => {
706                slf.state = State::ScriptDataDoubleEscaped;
707                slf.emit_char(x);
708                Ok(ControlToken::Continue)
709            }
710        },
711        State::ScriptDataDoubleEscapedLessThanSign => match slf.read_char()? {
712            Some('/') => {
713                slf.temporary_buffer.clear();
714                slf.state = State::ScriptDataDoubleEscapeEnd;
715                slf.emit_char('/');
716                Ok(ControlToken::Continue)
717            }
718            c => {
719                slf.state = State::ScriptDataDoubleEscaped;
720                slf.unread_char(c);
721                Ok(ControlToken::Continue)
722            }
723        },
724        State::ScriptDataDoubleEscapeEnd => match slf.read_char()? {
725            Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
726                if slf.temporary_buffer == "script" {
727                    slf.state = State::ScriptDataEscaped;
728                } else {
729                    slf.state = State::ScriptDataDoubleEscaped;
730                }
731
732                slf.emit_char(x);
733                Ok(ControlToken::Continue)
734            }
735            Some(x) if x.is_ascii_alphabetic() => {
736                slf.temporary_buffer.push(x.to_ascii_lowercase());
737                slf.emit_char(x);
738                Ok(ControlToken::Continue)
739            }
740            c => {
741                slf.state = State::ScriptDataDoubleEscaped;
742                slf.unread_char(c);
743                Ok(ControlToken::Continue)
744            }
745        },
746        State::BeforeAttributeName => match slf.read_char()? {
747            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
748            c @ Some('/' | '>') | c @ None => {
749                slf.state = State::AfterAttributeName;
750                slf.unread_char(c);
751                Ok(ControlToken::Continue)
752            }
753            Some('=') => {
754                slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
755                slf.emitter.init_attribute_name(slf.reader.position());
756                slf.emitter.push_attribute_name("=");
757                slf.state = State::AttributeName;
758                Ok(ControlToken::Continue)
759            }
760            Some(x) => {
761                slf.emitter.init_attribute_name(slf.position_before_match);
762                slf.state = State::AttributeName;
763                slf.unread_char(Some(x));
764                Ok(ControlToken::Continue)
765            }
766        },
767        State::AttributeName => match slf.read_char()? {
768            c @ Some(whitespace_pat!() | '/' | '>') | c @ None => {
769                slf.emitter
770                    .terminate_attribute_name(slf.position_before_match);
771                slf.state = State::AfterAttributeName;
772                slf.unread_char(c);
773                Ok(ControlToken::Continue)
774            }
775            Some('=') => {
776                slf.emitter
777                    .terminate_attribute_name(slf.position_before_match);
778                slf.state = State::BeforeAttributeValue;
779                Ok(ControlToken::Continue)
780            }
781            Some('\0') => {
782                slf.emit_error(Error::UnexpectedNullCharacter);
783                slf.emitter.push_attribute_name("\u{fffd}");
784                Ok(ControlToken::Continue)
785            }
786            Some(x @ '"' | x @ '\'' | x @ '<') => {
787                slf.emit_error(Error::UnexpectedCharacterInAttributeName);
788                slf.emitter
789                    .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
790                Ok(ControlToken::Continue)
791            }
792            Some(x) => {
793                slf.emitter
794                    .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
795                Ok(ControlToken::Continue)
796            }
797        },
798        State::AfterAttributeName => match slf.read_char()? {
799            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
800            Some('/') => {
801                slf.state = State::SelfClosingStartTag;
802                Ok(ControlToken::Continue)
803            }
804            Some('=') => {
805                slf.state = State::BeforeAttributeValue;
806                Ok(ControlToken::Continue)
807            }
808            Some('>') => {
809                slf.state = State::Data;
810                slf.emit_current_tag();
811                Ok(ControlToken::Continue)
812            }
813            None => {
814                slf.emit_error(Error::EofInTag);
815                Ok(ControlToken::Eof)
816            }
817            Some(x) => {
818                slf.emitter.init_attribute_name(slf.position_before_match);
819                slf.state = State::AttributeName;
820                slf.unread_char(Some(x));
821                Ok(ControlToken::Continue)
822            }
823        },
824        State::BeforeAttributeValue => match slf.read_char()? {
825            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
826            Some('"') => {
827                slf.emitter
828                    .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
829                slf.state = State::AttributeValueDoubleQuoted;
830                Ok(ControlToken::Continue)
831            }
832            Some('\'') => {
833                slf.emitter
834                    .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
835                slf.state = State::AttributeValueSingleQuoted;
836                Ok(ControlToken::Continue)
837            }
838            Some('>') => {
839                slf.emit_error(Error::MissingAttributeValue);
840                slf.state = State::Data;
841                slf.emit_current_tag();
842                Ok(ControlToken::Continue)
843            }
844            c => {
845                slf.emitter
846                    .init_attribute_value(AttrValueSyntax::Unquoted, slf.position_before_match);
847                slf.state = State::AttributeValueUnquoted;
848                slf.unread_char(c);
849                Ok(ControlToken::Continue)
850            }
851        },
852        State::AttributeValueDoubleQuoted => match slf.read_char()? {
853            Some('"') => {
854                slf.emitter.terminate_attribute_value(
855                    // We cannot simply pass slf.position_before_match because
856                    // State::NamedCharacterReference calls Tokenizer::unread_char
857                    // which Reader::position doesn't account for.
858                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
859                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding('"'),
860                );
861                slf.state = State::AfterAttributeValueQuoted;
862                Ok(ControlToken::Continue)
863            }
864            Some('&') => {
865                slf.return_state = Some(State::AttributeValueDoubleQuoted);
866                slf.state = State::CharacterReference;
867                Ok(ControlToken::Continue)
868            }
869            Some('\0') => {
870                slf.emit_error(Error::UnexpectedNullCharacter);
871                slf.emitter.push_attribute_value("\u{fffd}");
872                Ok(ControlToken::Continue)
873            }
874            None => {
875                slf.emit_error(Error::EofInTag);
876                Ok(ControlToken::Eof)
877            }
878            Some(x) => {
879                slf.emitter.push_attribute_value(ctostr!(x));
880                Ok(ControlToken::Continue)
881            }
882        },
883        State::AttributeValueSingleQuoted => match slf.read_char()? {
884            Some('\'') => {
885                slf.emitter.terminate_attribute_value(
886                    // We cannot simply pass slf.position_before_match because
887                    // State::NamedCharacterReference calls Tokenizer::unread_char
888                    // which Reader::position doesn't account for.
889                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
890                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding('\''),
891                );
892                slf.state = State::AfterAttributeValueQuoted;
893                Ok(ControlToken::Continue)
894            }
895            Some('&') => {
896                slf.return_state = Some(State::AttributeValueSingleQuoted);
897                slf.state = State::CharacterReference;
898                Ok(ControlToken::Continue)
899            }
900            Some('\0') => {
901                slf.emit_error(Error::UnexpectedNullCharacter);
902                slf.emitter.push_attribute_value("\u{fffd}");
903                Ok(ControlToken::Continue)
904            }
905            None => {
906                slf.emit_error(Error::EofInTag);
907                Ok(ControlToken::Eof)
908            }
909            Some(x) => {
910                slf.emitter.push_attribute_value(ctostr!(x));
911                Ok(ControlToken::Continue)
912            }
913        },
914        State::AttributeValueUnquoted => match slf.read_char()? {
915            Some(whitespace_pat!()) => {
916                slf.emitter.terminate_attribute_value(
917                    // We cannot simply pass slf.position_before_match because
918                    // State::NamedCharacterReference calls Tokenizer::unread_char
919                    // which Reader::position doesn't account for.
920                    // TODO: pass slf.position_before_match once CharacterReference has been converted to a function call
921                    slf.reader.position() - slf.reader.len_of_char_in_current_encoding(' '),
922                );
923                slf.state = State::BeforeAttributeName;
924                Ok(ControlToken::Continue)
925            }
926            Some('&') => {
927                slf.return_state = Some(State::AttributeValueUnquoted);
928                slf.state = State::CharacterReference;
929                Ok(ControlToken::Continue)
930            }
931            Some('>') => {
932                slf.state = State::Data;
933                slf.emit_current_tag();
934                Ok(ControlToken::Continue)
935            }
936            Some('\0') => {
937                slf.emit_error(Error::UnexpectedNullCharacter);
938                slf.emitter.push_attribute_value("\u{fffd}");
939                Ok(ControlToken::Continue)
940            }
941            Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
942                slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
943                slf.emitter.push_attribute_value(ctostr!(x));
944                Ok(ControlToken::Continue)
945            }
946            None => {
947                slf.emit_error(Error::EofInTag);
948                Ok(ControlToken::Eof)
949            }
950            Some(x) => {
951                slf.emitter.push_attribute_value(ctostr!(x));
952                Ok(ControlToken::Continue)
953            }
954        },
955        State::AfterAttributeValueQuoted => match slf.read_char()? {
956            Some(whitespace_pat!()) => {
957                slf.state = State::BeforeAttributeName;
958                Ok(ControlToken::Continue)
959            }
960            Some('/') => {
961                slf.state = State::SelfClosingStartTag;
962                Ok(ControlToken::Continue)
963            }
964            Some('>') => {
965                slf.state = State::Data;
966                slf.emit_current_tag();
967                Ok(ControlToken::Continue)
968            }
969            None => {
970                slf.emit_error(Error::EofInTag);
971                Ok(ControlToken::Eof)
972            }
973            Some(x) => {
974                slf.emit_error(Error::MissingWhitespaceBetweenAttributes);
975                slf.state = State::BeforeAttributeName;
976                slf.unread_char(Some(x));
977                Ok(ControlToken::Continue)
978            }
979        },
980        State::SelfClosingStartTag => match slf.read_char()? {
981            Some('>') => {
982                slf.emitter.set_self_closing(
983                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/')
984                        ..slf.position_before_match,
985                );
986                slf.state = State::Data;
987                slf.emit_current_tag();
988                Ok(ControlToken::Continue)
989            }
990            None => {
991                slf.emit_error(Error::EofInTag);
992                Ok(ControlToken::Eof)
993            }
994            Some(x) => {
995                slf.emit_error(Error::UnexpectedSolidusInTag);
996                slf.state = State::BeforeAttributeName;
997                slf.unread_char(Some(x));
998                Ok(ControlToken::Continue)
999            }
1000        },
1001        State::BogusComment => match slf.read_char()? {
1002            Some('>') => {
1003                slf.state = State::Data;
1004                slf.emitter.emit_current_comment(slf.position_before_match);
1005                Ok(ControlToken::Continue)
1006            }
1007            None => {
1008                slf.emitter.emit_current_comment(slf.position_before_match);
1009                Ok(ControlToken::Eof)
1010            }
1011            Some('\0') => {
1012                slf.emit_error(Error::UnexpectedNullCharacter);
1013                slf.emitter.push_comment("\u{fffd}");
1014                Ok(ControlToken::Continue)
1015            }
1016            Some(x) => {
1017                slf.emitter.push_comment(ctostr!(x));
1018                Ok(ControlToken::Continue)
1019            }
1020        },
1021        State::MarkupDeclarationOpen => match slf.read_char()? {
1022            Some('-') if slf.try_read_string("-", true)? => {
1023                slf.emitter.init_comment(slf.reader.position());
1024                slf.state = State::CommentStart;
1025                Ok(ControlToken::Continue)
1026            }
1027            Some('d' | 'D') if slf.try_read_string("octype", false)? => {
1028                slf.state = State::Doctype;
1029                Ok(ControlToken::Continue)
1030            }
1031            Some('[') if slf.try_read_string("CDATA[", true)? => Ok(ControlToken::CdataOpen),
1032            c => {
1033                slf.emit_error(Error::IncorrectlyOpenedComment);
1034                slf.emitter.init_comment(slf.position_before_match);
1035                slf.state = State::BogusComment;
1036                slf.unread_char(c);
1037                Ok(ControlToken::Continue)
1038            }
1039        },
1040        State::CommentStart => match slf.read_char()? {
1041            Some('-') => {
1042                slf.state = State::CommentStartDash;
1043                Ok(ControlToken::Continue)
1044            }
1045            Some('>') => {
1046                slf.emit_error(Error::AbruptClosingOfEmptyComment);
1047                slf.state = State::Data;
1048                slf.emitter.emit_current_comment(slf.position_before_match);
1049                Ok(ControlToken::Continue)
1050            }
1051            c => {
1052                slf.unread_char(c);
1053                slf.state = State::Comment;
1054                Ok(ControlToken::Continue)
1055            }
1056        },
1057        State::CommentStartDash => match slf.read_char()? {
1058            Some('-') => {
1059                slf.state = State::CommentEnd;
1060                Ok(ControlToken::Continue)
1061            }
1062            Some(c @ '>') => {
1063                slf.emit_error(Error::AbruptClosingOfEmptyComment);
1064                slf.state = State::Data;
1065                slf.emitter.emit_current_comment(
1066                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding(c),
1067                );
1068                Ok(ControlToken::Continue)
1069            }
1070            None => {
1071                slf.emit_error(Error::EofInComment);
1072                slf.emitter.emit_current_comment(
1073                    slf.position_before_match - slf.reader.len_of_char_in_current_encoding('-'),
1074                );
1075                Ok(ControlToken::Eof)
1076            }
1077            c @ Some(_) => {
1078                slf.emitter.push_comment("-");
1079                slf.unread_char(c);
1080                slf.state = State::Comment;
1081                Ok(ControlToken::Continue)
1082            }
1083        },
1084        State::Comment => match slf.read_char()? {
1085            Some('<') => {
1086                slf.emitter.push_comment("<");
1087                slf.state = State::CommentLessThanSign;
1088                Ok(ControlToken::Continue)
1089            }
1090            Some('-') => {
1091                slf.some_offset = slf.position_before_match;
1092                slf.state = State::CommentEndDash;
1093                Ok(ControlToken::Continue)
1094            }
1095            Some('\0') => {
1096                slf.emit_error(Error::UnexpectedNullCharacter);
1097                slf.emitter.push_comment("\u{fffd}");
1098                Ok(ControlToken::Continue)
1099            }
1100            None => {
1101                slf.emit_error(Error::EofInComment);
1102                slf.emitter.emit_current_comment(slf.reader.position());
1103                Ok(ControlToken::Eof)
1104            }
1105            Some(x) => {
1106                slf.emitter.push_comment(ctostr!(x));
1107                Ok(ControlToken::Continue)
1108            }
1109        },
1110        State::CommentLessThanSign => match slf.read_char()? {
1111            Some('!') => {
1112                slf.emitter.push_comment("!");
1113                slf.state = State::CommentLessThanSignBang;
1114                Ok(ControlToken::Continue)
1115            }
1116            Some('<') => {
1117                slf.emitter.push_comment("<");
1118                Ok(ControlToken::Continue)
1119            }
1120            c => {
1121                slf.unread_char(c);
1122                slf.state = State::Comment;
1123                Ok(ControlToken::Continue)
1124            }
1125        },
1126        State::CommentLessThanSignBang => match slf.read_char()? {
1127            Some('-') => {
1128                slf.state = State::CommentLessThanSignBangDash;
1129                Ok(ControlToken::Continue)
1130            }
1131            c => {
1132                slf.unread_char(c);
1133                slf.state = State::Comment;
1134                Ok(ControlToken::Continue)
1135            }
1136        },
1137        State::CommentLessThanSignBangDash => match slf.read_char()? {
1138            Some('-') => {
1139                slf.state = State::CommentLessThanSignBangDashDash;
1140                Ok(ControlToken::Continue)
1141            }
1142            c => {
1143                slf.unread_char(c);
1144                slf.state = State::CommentEndDash;
1145                Ok(ControlToken::Continue)
1146            }
1147        },
1148        State::CommentLessThanSignBangDashDash => match slf.read_char()? {
1149            c @ Some('>') | c @ None => {
1150                slf.unread_char(c);
1151                slf.state = State::CommentEnd;
1152                Ok(ControlToken::Continue)
1153            }
1154            c => {
1155                slf.emit_error(Error::NestedComment);
1156                slf.unread_char(c);
1157                slf.state = State::CommentEnd;
1158                Ok(ControlToken::Continue)
1159            }
1160        },
1161        State::CommentEndDash => match slf.read_char()? {
1162            Some('-') => {
1163                slf.state = State::CommentEnd;
1164                Ok(ControlToken::Continue)
1165            }
1166            None => {
1167                slf.emit_error(Error::EofInComment);
1168                slf.emitter.emit_current_comment(slf.some_offset);
1169                Ok(ControlToken::Eof)
1170            }
1171            c => {
1172                slf.emitter.push_comment("-");
1173                slf.unread_char(c);
1174                slf.state = State::Comment;
1175                Ok(ControlToken::Continue)
1176            }
1177        },
1178        State::CommentEnd => match slf.read_char()? {
1179            Some('>') => {
1180                slf.state = State::Data;
1181                slf.emitter.emit_current_comment(slf.some_offset);
1182                Ok(ControlToken::Continue)
1183            }
1184            Some('!') => {
1185                slf.state = State::CommentEndBang;
1186                Ok(ControlToken::Continue)
1187            }
1188            Some('-') => {
1189                slf.emitter.push_comment("-");
1190                Ok(ControlToken::Continue)
1191            }
1192            None => {
1193                slf.emit_error(Error::EofInComment);
1194                slf.emitter.emit_current_comment(slf.some_offset);
1195                Ok(ControlToken::Eof)
1196            }
1197            c @ Some(_) => {
1198                slf.emitter.push_comment("-");
1199                slf.emitter.push_comment("-");
1200                slf.unread_char(c);
1201                slf.state = State::Comment;
1202                Ok(ControlToken::Continue)
1203            }
1204        },
1205        State::CommentEndBang => match slf.read_char()? {
1206            Some('-') => {
1207                slf.emitter.push_comment("-");
1208                slf.emitter.push_comment("-");
1209                slf.emitter.push_comment("!");
1210                slf.state = State::CommentEndDash;
1211                Ok(ControlToken::Continue)
1212            }
1213            Some('>') => {
1214                slf.emit_error(Error::IncorrectlyClosedComment);
1215                slf.state = State::Data;
1216                slf.emitter.emit_current_comment(slf.some_offset);
1217                Ok(ControlToken::Continue)
1218            }
1219            None => {
1220                slf.emit_error(Error::EofInComment);
1221                slf.emitter.emit_current_comment(slf.some_offset);
1222                Ok(ControlToken::Eof)
1223            }
1224            c @ Some(_) => {
1225                slf.emitter.push_comment("-");
1226                slf.emitter.push_comment("-");
1227                slf.emitter.push_comment("!");
1228                slf.state = State::Comment;
1229                slf.unread_char(c);
1230                Ok(ControlToken::Continue)
1231            }
1232        },
1233        State::Doctype => match slf.read_char()? {
1234            Some(whitespace_pat!()) => {
1235                slf.state = State::BeforeDoctypeName;
1236                Ok(ControlToken::Continue)
1237            }
1238            c @ Some('>') => {
1239                slf.unread_char(c);
1240                slf.state = State::BeforeDoctypeName;
1241                Ok(ControlToken::Continue)
1242            }
1243            None => {
1244                slf.emit_error(Error::EofInDoctype);
1245                slf.init_doctype();
1246                slf.emitter.set_force_quirks();
1247                slf.emitter.emit_current_doctype(slf.reader.position());
1248                Ok(ControlToken::Eof)
1249            }
1250            c @ Some(_) => {
1251                slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
1252                slf.unread_char(c);
1253                slf.state = State::BeforeDoctypeName;
1254                Ok(ControlToken::Continue)
1255            }
1256        },
1257        State::BeforeDoctypeName => match slf.read_char()? {
1258            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1259            Some('\0') => {
1260                slf.emit_error(Error::UnexpectedNullCharacter);
1261                slf.init_doctype();
1262                slf.emitter.init_doctype_name(slf.position_before_match);
1263                slf.emitter.push_doctype_name("\u{fffd}");
1264                slf.state = State::DoctypeName;
1265                Ok(ControlToken::Continue)
1266            }
1267            Some('>') => {
1268                slf.emit_error(Error::MissingDoctypeName);
1269                slf.init_doctype();
1270                slf.emitter.set_force_quirks();
1271                slf.state = State::Data;
1272                slf.emitter.emit_current_doctype(slf.reader.position());
1273                Ok(ControlToken::Continue)
1274            }
1275            None => {
1276                slf.emit_error(Error::EofInDoctype);
1277                slf.init_doctype();
1278                slf.emitter.set_force_quirks();
1279                slf.emitter.emit_current_doctype(slf.reader.position());
1280                Ok(ControlToken::Eof)
1281            }
1282            Some(x) => {
1283                slf.init_doctype();
1284                slf.emitter.init_doctype_name(slf.position_before_match);
1285                slf.emitter
1286                    .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
1287                slf.state = State::DoctypeName;
1288                Ok(ControlToken::Continue)
1289            }
1290        },
1291        State::DoctypeName => match slf.read_char()? {
1292            Some(whitespace_pat!()) => {
1293                slf.emitter
1294                    .terminate_doctype_name(slf.position_before_match);
1295                slf.state = State::AfterDoctypeName;
1296                Ok(ControlToken::Continue)
1297            }
1298            Some('>') => {
1299                slf.emitter
1300                    .terminate_doctype_name(slf.position_before_match);
1301                slf.state = State::Data;
1302                slf.emitter.emit_current_doctype(slf.reader.position());
1303                Ok(ControlToken::Continue)
1304            }
1305            Some('\0') => {
1306                slf.emit_error(Error::UnexpectedNullCharacter);
1307                slf.emitter.push_doctype_name("\u{fffd}");
1308                Ok(ControlToken::Continue)
1309            }
1310            None => {
1311                slf.emit_error(Error::EofInDoctype);
1312                slf.emitter
1313                    .terminate_doctype_name(slf.position_before_match);
1314                slf.emitter.set_force_quirks();
1315                slf.emitter.emit_current_doctype(slf.reader.position());
1316                Ok(ControlToken::Eof)
1317            }
1318            Some(x) => {
1319                slf.emitter
1320                    .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
1321                Ok(ControlToken::Continue)
1322            }
1323        },
1324        State::AfterDoctypeName => match slf.read_char()? {
1325            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1326            Some('>') => {
1327                slf.state = State::Data;
1328                slf.emitter.emit_current_doctype(slf.reader.position());
1329                Ok(ControlToken::Continue)
1330            }
1331            None => {
1332                slf.emit_error(Error::EofInDoctype);
1333                slf.emitter.set_force_quirks();
1334                slf.emitter.emit_current_doctype(slf.reader.position());
1335                Ok(ControlToken::Eof)
1336            }
1337            Some('p' | 'P') if slf.try_read_string("ublic", false)? => {
1338                slf.state = State::AfterDoctypePublicKeyword;
1339                Ok(ControlToken::Continue)
1340            }
1341            Some('s' | 'S') if slf.try_read_string("ystem", false)? => {
1342                slf.state = State::AfterDoctypeSystemKeyword;
1343                Ok(ControlToken::Continue)
1344            }
1345            c @ Some(_) => {
1346                slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
1347                slf.emitter.set_force_quirks();
1348                slf.unread_char(c);
1349                slf.state = State::BogusDoctype;
1350                Ok(ControlToken::Continue)
1351            }
1352        },
1353        State::AfterDoctypePublicKeyword => match slf.read_char()? {
1354            Some(whitespace_pat!()) => {
1355                slf.state = State::BeforeDoctypePublicIdentifier;
1356                Ok(ControlToken::Continue)
1357            }
1358            Some('"') => {
1359                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
1360                slf.emitter.init_doctype_public_id(slf.reader.position());
1361                slf.state = State::DoctypePublicIdentifierDoubleQuoted;
1362                Ok(ControlToken::Continue)
1363            }
1364            Some('\'') => {
1365                slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
1366                slf.emitter.init_doctype_public_id(slf.reader.position());
1367                slf.state = State::DoctypePublicIdentifierSingleQuoted;
1368                Ok(ControlToken::Continue)
1369            }
1370            Some('>') => {
1371                slf.emit_error(Error::MissingDoctypePublicIdentifier);
1372                slf.emitter.set_force_quirks();
1373                slf.state = State::Data;
1374                slf.emitter.emit_current_doctype(slf.reader.position());
1375                Ok(ControlToken::Continue)
1376            }
1377            None => {
1378                slf.emit_error(Error::EofInDoctype);
1379                slf.emitter.set_force_quirks();
1380                slf.emitter.emit_current_doctype(slf.reader.position());
1381                Ok(ControlToken::Eof)
1382            }
1383            c @ Some(_) => {
1384                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
1385                slf.emitter.set_force_quirks();
1386                slf.unread_char(c);
1387                slf.state = State::BogusDoctype;
1388                Ok(ControlToken::Continue)
1389            }
1390        },
1391        State::BeforeDoctypePublicIdentifier => match slf.read_char()? {
1392            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1393            Some('"') => {
1394                slf.emitter.init_doctype_public_id(slf.reader.position());
1395                slf.state = State::DoctypePublicIdentifierDoubleQuoted;
1396                Ok(ControlToken::Continue)
1397            }
1398            Some('\'') => {
1399                slf.emitter.init_doctype_public_id(slf.reader.position());
1400                slf.state = State::DoctypePublicIdentifierSingleQuoted;
1401                Ok(ControlToken::Continue)
1402            }
1403            Some('>') => {
1404                slf.emit_error(Error::MissingDoctypePublicIdentifier);
1405                slf.emitter.set_force_quirks();
1406                slf.state = State::Data;
1407                slf.emitter.emit_current_doctype(slf.reader.position());
1408                Ok(ControlToken::Continue)
1409            }
1410            None => {
1411                slf.emit_error(Error::EofInDoctype);
1412                slf.emitter.set_force_quirks();
1413                slf.emitter.emit_current_doctype(slf.reader.position());
1414                Ok(ControlToken::Eof)
1415            }
1416            c @ Some(_) => {
1417                slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
1418                slf.emitter.set_force_quirks();
1419                slf.unread_char(c);
1420                slf.state = State::BogusDoctype;
1421                Ok(ControlToken::Continue)
1422            }
1423        },
1424        State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? {
1425            Some('"') => {
1426                slf.emitter
1427                    .terminate_doctype_public_id(slf.position_before_match);
1428                slf.state = State::AfterDoctypePublicIdentifier;
1429                Ok(ControlToken::Continue)
1430            }
1431            Some('\0') => {
1432                slf.emit_error(Error::UnexpectedNullCharacter);
1433                slf.emitter.push_doctype_public_id("\u{fffd}");
1434                Ok(ControlToken::Continue)
1435            }
1436            Some('>') => {
1437                slf.emitter
1438                    .terminate_doctype_public_id(slf.position_before_match);
1439                slf.emit_error(Error::AbruptDoctypePublicIdentifier);
1440                slf.emitter.set_force_quirks();
1441                slf.state = State::Data;
1442                slf.emitter.emit_current_doctype(slf.reader.position());
1443                Ok(ControlToken::Continue)
1444            }
1445            None => {
1446                slf.emitter
1447                    .terminate_doctype_public_id(slf.reader.position());
1448                slf.emit_error(Error::EofInDoctype);
1449                slf.emitter.set_force_quirks();
1450                slf.emitter.emit_current_doctype(slf.reader.position());
1451                Ok(ControlToken::Eof)
1452            }
1453            Some(x) => {
1454                slf.emitter.push_doctype_public_id(ctostr!(x));
1455                Ok(ControlToken::Continue)
1456            }
1457        },
1458        State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? {
1459            Some('\'') => {
1460                slf.emitter
1461                    .terminate_doctype_public_id(slf.position_before_match);
1462                slf.state = State::AfterDoctypePublicIdentifier;
1463                Ok(ControlToken::Continue)
1464            }
1465            Some('\0') => {
1466                slf.emit_error(Error::UnexpectedNullCharacter);
1467                slf.emitter.push_doctype_public_id("\u{fffd}");
1468                Ok(ControlToken::Continue)
1469            }
1470            Some('>') => {
1471                slf.emitter
1472                    .terminate_doctype_public_id(slf.position_before_match);
1473                slf.emit_error(Error::AbruptDoctypePublicIdentifier);
1474                slf.emitter.set_force_quirks();
1475                slf.state = State::Data;
1476                slf.emitter.emit_current_doctype(slf.reader.position());
1477                Ok(ControlToken::Continue)
1478            }
1479            None => {
1480                slf.emitter
1481                    .terminate_doctype_public_id(slf.reader.position());
1482                slf.emit_error(Error::EofInDoctype);
1483                slf.emitter.set_force_quirks();
1484                slf.emitter.emit_current_doctype(slf.reader.position());
1485                Ok(ControlToken::Eof)
1486            }
1487            Some(x) => {
1488                slf.emitter.push_doctype_public_id(ctostr!(x));
1489                Ok(ControlToken::Continue)
1490            }
1491        },
1492        State::AfterDoctypePublicIdentifier => match slf.read_char()? {
1493            Some(whitespace_pat!()) => {
1494                slf.state = State::BetweenDoctypePublicAndSystemIdentifiers;
1495                Ok(ControlToken::Continue)
1496            }
1497            Some('>') => {
1498                slf.state = State::Data;
1499                slf.emitter.emit_current_doctype(slf.reader.position());
1500                Ok(ControlToken::Continue)
1501            }
1502            Some('"') => {
1503                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
1504                slf.emitter.init_doctype_system_id(slf.reader.position());
1505                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1506                Ok(ControlToken::Continue)
1507            }
1508            Some('\'') => {
1509                slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
1510                slf.emitter.init_doctype_system_id(slf.reader.position());
1511                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1512                Ok(ControlToken::Continue)
1513            }
1514            None => {
1515                slf.emit_error(Error::EofInDoctype);
1516                slf.emitter.set_force_quirks();
1517                slf.emitter.emit_current_doctype(slf.reader.position());
1518                Ok(ControlToken::Eof)
1519            }
1520            c @ Some(_) => {
1521                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1522                slf.emitter.set_force_quirks();
1523                slf.unread_char(c);
1524                slf.state = State::BogusDoctype;
1525                Ok(ControlToken::Continue)
1526            }
1527        },
1528        State::BetweenDoctypePublicAndSystemIdentifiers => match slf.read_char()? {
1529            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1530            Some('>') => {
1531                slf.state = State::Data;
1532                slf.emitter.emit_current_doctype(slf.reader.position());
1533                Ok(ControlToken::Continue)
1534            }
1535            Some('"') => {
1536                slf.emitter.init_doctype_system_id(slf.reader.position());
1537                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1538                Ok(ControlToken::Continue)
1539            }
1540            Some('\'') => {
1541                slf.emitter.init_doctype_system_id(slf.reader.position());
1542                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1543                Ok(ControlToken::Continue)
1544            }
1545            None => {
1546                slf.emit_error(Error::EofInDoctype);
1547                slf.emitter.set_force_quirks();
1548                slf.emitter.emit_current_doctype(slf.reader.position());
1549                Ok(ControlToken::Eof)
1550            }
1551            c @ Some(_) => {
1552                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1553                slf.emitter.set_force_quirks();
1554                slf.state = State::BogusDoctype;
1555                slf.unread_char(c);
1556                Ok(ControlToken::Continue)
1557            }
1558        },
1559        State::AfterDoctypeSystemKeyword => match slf.read_char()? {
1560            Some(whitespace_pat!()) => {
1561                slf.state = State::BeforeDoctypeSystemIdentifier;
1562                Ok(ControlToken::Continue)
1563            }
1564            Some('"') => {
1565                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
1566                slf.emitter.init_doctype_system_id(slf.reader.position());
1567                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1568                Ok(ControlToken::Continue)
1569            }
1570            Some('\'') => {
1571                slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
1572                slf.emitter.init_doctype_system_id(slf.reader.position());
1573                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1574                Ok(ControlToken::Continue)
1575            }
1576            Some('>') => {
1577                slf.emit_error(Error::MissingDoctypeSystemIdentifier);
1578                slf.emitter.set_force_quirks();
1579                slf.state = State::Data;
1580                slf.emitter.emit_current_doctype(slf.reader.position());
1581                Ok(ControlToken::Continue)
1582            }
1583            None => {
1584                slf.emit_error(Error::EofInDoctype);
1585                slf.emitter.set_force_quirks();
1586                slf.emitter.emit_current_doctype(slf.reader.position());
1587                Ok(ControlToken::Eof)
1588            }
1589            c @ Some(_) => {
1590                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1591                slf.emitter.set_force_quirks();
1592                slf.state = State::BogusDoctype;
1593                slf.unread_char(c);
1594                Ok(ControlToken::Continue)
1595            }
1596        },
1597        State::BeforeDoctypeSystemIdentifier => match slf.read_char()? {
1598            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1599            Some('"') => {
1600                slf.emitter.init_doctype_system_id(slf.reader.position());
1601                slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1602                Ok(ControlToken::Continue)
1603            }
1604            Some('\'') => {
1605                slf.emitter.init_doctype_system_id(slf.reader.position());
1606                slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1607                Ok(ControlToken::Continue)
1608            }
1609            Some('>') => {
1610                slf.emit_error(Error::MissingDoctypeSystemIdentifier);
1611                slf.emitter.set_force_quirks();
1612                slf.state = State::Data;
1613                slf.emitter.emit_current_doctype(slf.reader.position());
1614                Ok(ControlToken::Continue)
1615            }
1616            None => {
1617                slf.emit_error(Error::EofInDoctype);
1618                slf.emitter.set_force_quirks();
1619                slf.emitter.emit_current_doctype(slf.reader.position());
1620                Ok(ControlToken::Eof)
1621            }
1622            c @ Some(_) => {
1623                slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1624                slf.emitter.set_force_quirks();
1625                slf.state = State::BogusDoctype;
1626                slf.unread_char(c);
1627                Ok(ControlToken::Continue)
1628            }
1629        },
1630        State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? {
1631            Some('"') => {
1632                slf.emitter
1633                    .terminate_doctype_system_id(slf.position_before_match);
1634                slf.state = State::AfterDoctypeSystemIdentifier;
1635                Ok(ControlToken::Continue)
1636            }
1637            Some('\0') => {
1638                slf.emit_error(Error::UnexpectedNullCharacter);
1639                slf.emitter.push_doctype_system_id("\u{fffd}");
1640                Ok(ControlToken::Continue)
1641            }
1642            Some('>') => {
1643                slf.emitter
1644                    .terminate_doctype_system_id(slf.position_before_match);
1645                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
1646                slf.emitter.set_force_quirks();
1647                slf.state = State::Data;
1648                slf.emitter.emit_current_doctype(slf.reader.position());
1649                Ok(ControlToken::Continue)
1650            }
1651            None => {
1652                slf.emitter
1653                    .terminate_doctype_system_id(slf.reader.position());
1654                slf.emit_error(Error::EofInDoctype);
1655                slf.emitter.set_force_quirks();
1656                slf.emitter.emit_current_doctype(slf.reader.position());
1657                Ok(ControlToken::Eof)
1658            }
1659            Some(x) => {
1660                slf.emitter.push_doctype_system_id(ctostr!(x));
1661                Ok(ControlToken::Continue)
1662            }
1663        },
1664        State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? {
1665            Some('\'') => {
1666                slf.emitter
1667                    .terminate_doctype_system_id(slf.position_before_match);
1668                slf.state = State::AfterDoctypeSystemIdentifier;
1669                Ok(ControlToken::Continue)
1670            }
1671            Some('\0') => {
1672                slf.emit_error(Error::UnexpectedNullCharacter);
1673                slf.emitter.push_doctype_system_id("\u{fffd}");
1674                Ok(ControlToken::Continue)
1675            }
1676            Some('>') => {
1677                slf.emitter
1678                    .terminate_doctype_system_id(slf.position_before_match);
1679                slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
1680                slf.emitter.set_force_quirks();
1681                slf.state = State::Data;
1682                slf.emitter.emit_current_doctype(slf.reader.position());
1683                Ok(ControlToken::Continue)
1684            }
1685            None => {
1686                slf.emitter
1687                    .terminate_doctype_system_id(slf.reader.position());
1688                slf.emit_error(Error::EofInDoctype);
1689                slf.emitter.set_force_quirks();
1690                slf.emitter.emit_current_doctype(slf.reader.position());
1691                Ok(ControlToken::Eof)
1692            }
1693            Some(x) => {
1694                slf.emitter.push_doctype_system_id(ctostr!(x));
1695                Ok(ControlToken::Continue)
1696            }
1697        },
1698        State::AfterDoctypeSystemIdentifier => match slf.read_char()? {
1699            Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1700            Some('>') => {
1701                slf.state = State::Data;
1702                slf.emitter.emit_current_doctype(slf.reader.position());
1703                Ok(ControlToken::Continue)
1704            }
1705            None => {
1706                slf.emit_error(Error::EofInDoctype);
1707                slf.emitter.set_force_quirks();
1708                slf.emitter.emit_current_doctype(slf.reader.position());
1709                Ok(ControlToken::Eof)
1710            }
1711            c @ Some(_) => {
1712                slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
1713                slf.unread_char(c);
1714                slf.state = State::BogusDoctype;
1715                Ok(ControlToken::Continue)
1716            }
1717        },
1718        State::BogusDoctype => match slf.read_char()? {
1719            Some('>') => {
1720                slf.state = State::Data;
1721                slf.emitter.emit_current_doctype(slf.reader.position());
1722                Ok(ControlToken::Continue)
1723            }
1724            Some('\0') => {
1725                slf.emit_error(Error::UnexpectedNullCharacter);
1726                Ok(ControlToken::Continue)
1727            }
1728            None => {
1729                slf.emitter.emit_current_doctype(slf.reader.position());
1730                Ok(ControlToken::Eof)
1731            }
1732            Some(_) => Ok(ControlToken::Continue),
1733        },
1734        State::CdataSection => match slf.read_char()? {
1735            Some(']') => {
1736                slf.state = State::CdataSectionBracket;
1737                Ok(ControlToken::Continue)
1738            }
1739            None => {
1740                slf.emit_error(Error::EofInCdata);
1741                Ok(ControlToken::Eof)
1742            }
1743            Some(x) => {
1744                slf.emit_char(x);
1745                Ok(ControlToken::Continue)
1746            }
1747        },
1748        State::CdataSectionBracket => match slf.read_char()? {
1749            Some(']') => {
1750                slf.state = State::CdataSectionEnd;
1751                slf.some_offset = slf.position_before_match;
1752                Ok(ControlToken::Continue)
1753            }
1754            c => {
1755                slf.emit_char(']');
1756                slf.state = State::CdataSection;
1757                slf.unread_char(c);
1758                Ok(ControlToken::Continue)
1759            }
1760        },
1761        State::CdataSectionEnd => match slf.read_char()? {
1762            Some(']') => {
1763                slf.emit_char(']');
1764                Ok(ControlToken::Continue)
1765            }
1766            Some('>') => {
1767                slf.state = State::Data;
1768                Ok(ControlToken::Continue)
1769            }
1770            c => {
1771                slf.emit_chars(b"]]");
1772                slf.unread_char(c);
1773                slf.state = State::CdataSection;
1774                Ok(ControlToken::Continue)
1775            }
1776        },
1777        State::CharacterReference => {
1778            // TODO: we can avoid these Reader method calls by changing CharacterReference to be a function instead of a state
1779            slf.some_offset =
1780                slf.reader.position() - slf.reader.len_of_char_in_current_encoding('&');
1781            slf.temporary_buffer.clear();
1782            slf.temporary_buffer.push('&');
1783            match slf.read_char()? {
1784                Some(x) if x.is_ascii_alphanumeric() => {
1785                    slf.unread_char(Some(x));
1786                    slf.state = State::NamedCharacterReference;
1787                    Ok(ControlToken::Continue)
1788                }
1789                Some('#') => {
1790                    slf.temporary_buffer.push('#');
1791                    slf.state = State::NumericCharacterReference;
1792                    Ok(ControlToken::Continue)
1793                }
1794                c => {
1795                    slf.flush_code_points_consumed_as_character_reference();
1796                    slf.state = slf.return_state.take().unwrap();
1797                    slf.unread_char(c);
1798                    Ok(ControlToken::Continue)
1799                }
1800            }
1801        }
1802        State::NamedCharacterReference => {
1803            let first_char = slf.read_char()?.unwrap(); // unwrap cannot panic since we just unread the char
1804
1805            let Some(char_ref) =
1806                try_read_character_reference(first_char, |x| slf.try_read_string(x, true))?
1807            else {
1808                slf.unread_char(Some(first_char));
1809
1810                debug_assert_eq!(slf.temporary_buffer, "&");
1811                slf.temporary_buffer.clear();
1812
1813                if slf.is_consumed_as_part_of_an_attribute() {
1814                    slf.emitter.push_attribute_value("&");
1815                } else {
1816                    slf.emitter.emit_char(
1817                        '&',
1818                        slf.some_offset
1819                            ..slf.some_offset + slf.reader.len_of_char_in_current_encoding('&'),
1820                    );
1821                }
1822
1823                slf.state = State::AmbiguousAmpersand;
1824                return Ok(ControlToken::Continue);
1825            };
1826
1827            slf.temporary_buffer.push(first_char);
1828            slf.temporary_buffer.push_str(char_ref.name);
1829            let char_ref_name_last_character = char_ref.name.chars().last();
1830
1831            let next_character = slf.read_char()?;
1832            slf.unread_char(next_character);
1833
1834            if slf.is_consumed_as_part_of_an_attribute()
1835                && char_ref_name_last_character != Some(';')
1836                && matches!(next_character, Some(x) if x == '=' || x.is_ascii_alphanumeric())
1837            {
1838                slf.flush_code_points_consumed_as_character_reference();
1839                slf.state = slf.return_state.take().unwrap();
1840                Ok(ControlToken::Continue)
1841            } else {
1842                if char_ref_name_last_character != Some(';') {
1843                    slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1844                }
1845
1846                if slf.is_consumed_as_part_of_an_attribute() {
1847                    slf.temporary_buffer.clear();
1848                    slf.temporary_buffer.push_str(char_ref.characters);
1849                    slf.emitter.push_attribute_value(&slf.temporary_buffer);
1850                } else {
1851                    for c in char_ref.characters.chars() {
1852                        slf.emitter.emit_char(
1853                            c,
1854                            slf.some_offset
1855                                ..slf.reader.position()
1856                                    - slf.reader.len_of_char_in_current_encoding(c),
1857                        );
1858                    }
1859                }
1860                slf.state = slf.return_state.take().unwrap();
1861                Ok(ControlToken::Continue)
1862            }
1863        }
1864        State::AmbiguousAmpersand => match slf.read_char()? {
1865            Some(x) if x.is_ascii_alphanumeric() => {
1866                if slf.is_consumed_as_part_of_an_attribute() {
1867                    slf.emitter.push_attribute_value(ctostr!(x));
1868                } else {
1869                    slf.emit_char(x);
1870                }
1871
1872                Ok(ControlToken::Continue)
1873            }
1874            c @ Some(';') => {
1875                slf.emit_error(Error::UnknownNamedCharacterReference);
1876                slf.unread_char(c);
1877                slf.state = slf.return_state.take().unwrap();
1878                Ok(ControlToken::Continue)
1879            }
1880            c => {
1881                slf.unread_char(c);
1882                slf.state = slf.return_state.take().unwrap();
1883                Ok(ControlToken::Continue)
1884            }
1885        },
1886        State::NumericCharacterReference => {
1887            slf.character_reference_code = 0;
1888            match slf.read_char()? {
1889                Some(x @ 'x' | x @ 'X') => {
1890                    slf.temporary_buffer.push(x);
1891                    slf.state = State::HexadecimalCharacterReferenceStart;
1892                    Ok(ControlToken::Continue)
1893                }
1894                c => {
1895                    slf.unread_char(c);
1896                    slf.state = State::DecimalCharacterReferenceStart;
1897                    Ok(ControlToken::Continue)
1898                }
1899            }
1900        }
1901        State::HexadecimalCharacterReferenceStart => match slf.read_char()? {
1902            c @ Some('0'..='9' | 'A'..='F' | 'a'..='f') => {
1903                slf.unread_char(c);
1904                slf.state = State::HexadecimalCharacterReference;
1905                Ok(ControlToken::Continue)
1906            }
1907            c => {
1908                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
1909                slf.flush_code_points_consumed_as_character_reference();
1910                slf.unread_char(c);
1911                slf.state = slf.return_state.take().unwrap();
1912                Ok(ControlToken::Continue)
1913            }
1914        },
1915        State::DecimalCharacterReferenceStart => match slf.read_char()? {
1916            Some(x @ ascii_digit_pat!()) => {
1917                slf.unread_char(Some(x));
1918                slf.state = State::DecimalCharacterReference;
1919                Ok(ControlToken::Continue)
1920            }
1921            c => {
1922                slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
1923                slf.flush_code_points_consumed_as_character_reference();
1924                slf.unread_char(c);
1925                slf.state = slf.return_state.take().unwrap();
1926                Ok(ControlToken::Continue)
1927            }
1928        },
1929        State::HexadecimalCharacterReference => match slf.read_char()? {
1930            Some(x @ ascii_digit_pat!()) => {
1931                mutate_character_reference!(*16 + x - 0x0030);
1932                Ok(ControlToken::Continue)
1933            }
1934            Some(x @ 'A'..='F') => {
1935                mutate_character_reference!(*16 + x - 0x0037);
1936                Ok(ControlToken::Continue)
1937            }
1938            Some(x @ 'a'..='f') => {
1939                mutate_character_reference!(*16 + x - 0x0057);
1940                Ok(ControlToken::Continue)
1941            }
1942            Some(';') => {
1943                slf.state = State::NumericCharacterReferenceEnd;
1944                Ok(ControlToken::Continue)
1945            }
1946            c => {
1947                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1948                slf.unread_char(c);
1949                slf.state = State::NumericCharacterReferenceEnd;
1950                Ok(ControlToken::Continue)
1951            }
1952        },
1953        State::DecimalCharacterReference => match slf.read_char()? {
1954            Some(x @ ascii_digit_pat!()) => {
1955                mutate_character_reference!(*10 + x - 0x0030);
1956                Ok(ControlToken::Continue)
1957            }
1958            Some(';') => {
1959                slf.state = State::NumericCharacterReferenceEnd;
1960                Ok(ControlToken::Continue)
1961            }
1962            c => {
1963                slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1964                slf.unread_char(c);
1965                slf.state = State::NumericCharacterReferenceEnd;
1966                Ok(ControlToken::Continue)
1967            }
1968        },
1969        State::NumericCharacterReferenceEnd => {
1970            match slf.character_reference_code {
1971                0x00 => {
1972                    slf.emit_error(Error::NullCharacterReference);
1973                    slf.character_reference_code = 0xfffd;
1974                }
1975                0x110000.. => {
1976                    slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);
1977                    slf.character_reference_code = 0xfffd;
1978                }
1979                surrogate_pat!() => {
1980                    slf.emit_error(Error::SurrogateCharacterReference);
1981                    slf.character_reference_code = 0xfffd;
1982                }
1983                // noncharacter
1984                noncharacter_pat!() => {
1985                    slf.emit_error(Error::NoncharacterCharacterReference);
1986                }
1987                // 0x000d, or a control that is not whitespace
1988                x @ 0x000d | x @ control_pat!()
1989                    if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>
1990                {
1991                    slf.emit_error(Error::ControlCharacterReference);
1992                    slf.character_reference_code = match x {
1993                        0x80 => 0x20AC, // EURO SIGN (€)
1994                        0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK (‚)
1995                        0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK (ƒ)
1996                        0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK („)
1997                        0x85 => 0x2026, // HORIZONTAL ELLIPSIS (…)
1998                        0x86 => 0x2020, // DAGGER (†)
1999                        0x87 => 0x2021, // DOUBLE DAGGER (‡)
2000                        0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ)
2001                        0x89 => 0x2030, // PER MILLE SIGN (‰)
2002                        0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON (Š)
2003                        0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹)
2004                        0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE (Œ)
2005                        0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON (Ž)
2006                        0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK (‘)
2007                        0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK (’)
2008                        0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK (“)
2009                        0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK (”)
2010                        0x95 => 0x2022, // BULLET (•)
2011                        0x96 => 0x2013, // EN DASH (–)
2012                        0x97 => 0x2014, // EM DASH (—)
2013                        0x98 => 0x02DC, // SMALL TILDE (˜)
2014                        0x99 => 0x2122, // TRADE MARK SIGN (™)
2015                        0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON (š)
2016                        0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›)
2017                        0x9C => 0x0153, // LATIN SMALL LIGATURE OE (œ)
2018                        0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON (ž)
2019                        0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ)
2020                        _ => slf.character_reference_code,
2021                    };
2022                }
2023                _ => (),
2024            }
2025
2026            let char = std::char::from_u32(slf.character_reference_code).unwrap();
2027
2028            if slf.is_consumed_as_part_of_an_attribute() {
2029                slf.temporary_buffer.clear();
2030                slf.temporary_buffer.push(char);
2031                slf.emitter.push_attribute_value(&slf.temporary_buffer);
2032            } else {
2033                slf.emitter
2034                    .emit_char(char, slf.some_offset..slf.reader.position());
2035            }
2036            slf.state = slf.return_state.take().unwrap();
2037            Ok(ControlToken::Continue)
2038        }
2039    }
2040}
2041
2042#[inline]
2043pub(super) fn handle_cdata_open<O, R, E>(
2044    slf: &mut Machine<R, O, E>,
2045    adjusted_current_node_present_and_not_in_html_namespace: bool,
2046) where
2047    O: Offset,
2048    R: Reader + Position<O>,
2049    E: Emitter<O>,
2050{
2051    if adjusted_current_node_present_and_not_in_html_namespace {
2052        slf.state = State::CdataSection;
2053    } else {
2054        slf.emit_error(Error::CdataInHtmlContent);
2055
2056        slf.emitter.init_comment(slf.reader.position());
2057        slf.emitter.push_comment("[CDATA[");
2058        slf.state = State::BogusComment;
2059    }
2060}
2061
2062// this is a stack that can hold 0 to 2 Ts
2063#[derive(Debug, Default, Clone, Copy)]
2064struct Stack2<T: Copy>(Option<(T, Option<T>)>);
2065
2066impl<T: Copy> Stack2<T> {
2067    #[inline]
2068    fn push(&mut self, c: T) {
2069        self.0 = match self.0 {
2070            None => Some((c, None)),
2071            Some((c1, None)) => Some((c1, Some(c))),
2072            Some((_c1, Some(_c2))) => panic!("stack full!"),
2073        }
2074    }
2075
2076    #[inline]
2077    fn pop(&mut self) -> Option<T> {
2078        let (new_self, rv) = match self.0 {
2079            Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
2080            Some((c1, None)) => (None, Some(c1)),
2081            None => (None, None),
2082        };
2083        self.0 = new_self;
2084        rv
2085    }
2086}