1mod utils;
2
3use crate::entities::try_read_character_reference;
4use crate::offset::{Offset, Position};
5use crate::trace::AttrValueSyntax;
6use crate::{reader::Reader, Emitter, Error};
7use utils::{
8 ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
9};
10
11pub use utils::State;
12
13pub(super) struct Machine<R, O, E> {
14 pub(super) state: State,
15 pub(super) emitter: E,
16 temporary_buffer: String,
17 reader: R,
18 to_reconsume: Stack2<Option<char>>,
19 character_reference_code: u32,
20 return_state: Option<State>,
21 current_tag_name: String,
22 pub(super) last_start_tag_name: String,
23 is_start_tag: bool,
24 position_before_match: O,
26 some_offset: O,
31 pub(crate) naively_switch_state: bool,
36}
37
38impl<R, O, E> Machine<R, O, E>
39where
40 R: Reader + Position<O>,
41 O: Offset,
42 E: Emitter<O>,
43{
44 pub fn new(reader: R, emitter: E) -> Self {
45 Self {
46 reader,
47 emitter,
48 state: State::Data,
49 to_reconsume: Stack2::default(),
50 return_state: None,
51 temporary_buffer: String::new(),
52 character_reference_code: 0,
53 current_tag_name: String::new(),
54 last_start_tag_name: String::new(),
55 is_start_tag: false,
56 position_before_match: O::default(),
57 some_offset: O::default(),
58 naively_switch_state: false,
59 }
60 }
61}
62
63pub enum ControlToken {
64 Eof,
65 Continue,
66 CdataOpen,
67}
68
69#[inline]
70pub(super) fn consume<O, R, E>(slf: &mut Machine<R, O, E>) -> Result<ControlToken, R::Error>
71where
72 O: Offset,
73 R: Reader + Position<O>,
74 E: Emitter<O>,
75{
76 macro_rules! mutate_character_reference {
77 (* $mul:literal + $x:ident - $sub:literal) => {
78 match slf
79 .character_reference_code
80 .checked_mul($mul)
81 .and_then(|cr| cr.checked_add($x as u32 - $sub))
82 {
83 Some(cr) => slf.character_reference_code = cr,
84 None => {
85 slf.character_reference_code = 0x110000;
87 }
88 };
89 };
90 }
91
92 slf.position_before_match = slf.reader.position();
93
94 match slf.state {
95 State::Data => match slf.read_char()? {
96 Some('&') => {
97 slf.return_state = Some(slf.state);
98 slf.state = State::CharacterReference;
99 Ok(ControlToken::Continue)
100 }
101 Some('<') => {
102 slf.some_offset = slf.position_before_match;
103 slf.state = State::TagOpen;
104 Ok(ControlToken::Continue)
105 }
106 Some('\0') => {
107 slf.emit_error(Error::UnexpectedNullCharacter);
108 slf.emit_char('\0');
109 Ok(ControlToken::Continue)
110 }
111 Some(x) => {
112 slf.emit_char(x);
113 Ok(ControlToken::Continue)
114 }
115 None => Ok(ControlToken::Eof),
116 },
117 State::Rcdata => match slf.read_char()? {
118 Some('&') => {
119 slf.return_state = Some(State::Rcdata);
120 slf.state = State::CharacterReference;
121 Ok(ControlToken::Continue)
122 }
123 Some('<') => {
124 slf.state = State::RcdataLessThanSign;
125 Ok(ControlToken::Continue)
126 }
127 Some('\0') => {
128 slf.emit_error(Error::UnexpectedNullCharacter);
129 slf.emit_char_for_source_char('\u{fffd}', '\0');
130 Ok(ControlToken::Continue)
131 }
132 Some(x) => {
133 slf.emit_char(x);
134 Ok(ControlToken::Continue)
135 }
136 None => Ok(ControlToken::Eof),
137 },
138 State::Rawtext => match slf.read_char()? {
139 Some('<') => {
140 slf.state = State::RawTextLessThanSign;
141 Ok(ControlToken::Continue)
142 }
143 Some('\0') => {
144 slf.emit_error(Error::UnexpectedNullCharacter);
145 slf.emit_char_for_source_char('\u{fffd}', '\0');
146 Ok(ControlToken::Continue)
147 }
148 Some(x) => {
149 slf.emit_char(x);
150 Ok(ControlToken::Continue)
151 }
152 None => Ok(ControlToken::Eof),
153 },
154 State::ScriptData => match slf.read_char()? {
155 Some('<') => {
156 slf.state = State::ScriptDataLessThanSign;
157 Ok(ControlToken::Continue)
158 }
159 Some('\0') => {
160 slf.emit_error(Error::UnexpectedNullCharacter);
161 slf.emit_char_for_source_char('\u{fffd}', '\0');
162 Ok(ControlToken::Continue)
163 }
164 Some(x) => {
165 slf.emit_char(x);
166 Ok(ControlToken::Continue)
167 }
168 None => Ok(ControlToken::Eof),
169 },
170 State::Plaintext => match slf.read_char()? {
171 Some('\0') => {
172 slf.emit_error(Error::UnexpectedNullCharacter);
173 slf.emit_char_for_source_char('\u{fffd}', '\0');
174 Ok(ControlToken::Continue)
175 }
176 Some(x) => {
177 slf.emit_char(x);
178 Ok(ControlToken::Continue)
179 }
180 None => Ok(ControlToken::Eof),
181 },
182 State::TagOpen => match slf.read_char()? {
183 Some('!') => {
184 slf.state = State::MarkupDeclarationOpen;
185 Ok(ControlToken::Continue)
186 }
187 Some('/') => {
188 slf.state = State::EndTagOpen;
189 Ok(ControlToken::Continue)
190 }
191 Some(x) if x.is_ascii_alphabetic() => {
192 slf.init_start_tag();
193 slf.state = State::TagName;
194 slf.unread_char(Some(x));
195 Ok(ControlToken::Continue)
196 }
197 c @ Some('?') => {
198 slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
199 slf.emitter.init_comment(slf.reader.position());
200 slf.state = State::BogusComment;
201 slf.unread_char(c);
202 Ok(ControlToken::Continue)
203 }
204 None => {
205 slf.emit_error(Error::EofBeforeTagName);
206 slf.emit_char('<');
207 Ok(ControlToken::Eof)
208 }
209 c @ Some(_) => {
210 slf.emit_error(Error::InvalidFirstCharacterOfTagName);
211 slf.state = State::Data;
212 slf.emit_char('<');
213 slf.unread_char(c);
214 Ok(ControlToken::Continue)
215 }
216 },
217 State::EndTagOpen => match slf.read_char()? {
218 Some(x) if x.is_ascii_alphabetic() => {
219 slf.init_end_tag();
220 slf.state = State::TagName;
221 slf.unread_char(Some(x));
222 Ok(ControlToken::Continue)
223 }
224 Some('>') => {
225 slf.emit_error(Error::MissingEndTagName);
226 slf.state = State::Data;
227 Ok(ControlToken::Continue)
228 }
229 None => {
230 slf.emit_error(Error::EofBeforeTagName);
231 slf.emit_chars(b"</");
232 Ok(ControlToken::Eof)
233 }
234 Some(x) => {
235 slf.emit_error(Error::InvalidFirstCharacterOfTagName);
236 slf.emitter.init_comment(slf.reader.position());
237 slf.state = State::BogusComment;
238 slf.unread_char(Some(x));
239 Ok(ControlToken::Continue)
240 }
241 },
242 State::TagName => match slf.read_char()? {
243 Some(whitespace_pat!()) => {
244 slf.emitter.terminate_tag_name(slf.position_before_match);
245 slf.state = State::BeforeAttributeName;
246 Ok(ControlToken::Continue)
247 }
248 Some('/') => {
249 slf.emitter.terminate_tag_name(slf.position_before_match);
250 slf.state = State::SelfClosingStartTag;
251 Ok(ControlToken::Continue)
252 }
253 Some('>') => {
254 slf.emitter.terminate_tag_name(slf.position_before_match);
255 slf.state = State::Data;
256 slf.emit_current_tag();
257 Ok(ControlToken::Continue)
258 }
259 Some('\0') => {
260 slf.emit_error(Error::UnexpectedNullCharacter);
261 slf.push_tag_name("\u{fffd}");
262 Ok(ControlToken::Continue)
263 }
264 Some(x) => {
265 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
266 Ok(ControlToken::Continue)
267 }
268 None => {
269 slf.emit_error(Error::EofInTag);
270 Ok(ControlToken::Eof)
271 }
272 },
273 State::RcdataLessThanSign => match slf.read_char()? {
274 Some('/') => {
275 slf.temporary_buffer.clear();
276 slf.state = State::RcdataEndTagOpen;
277 Ok(ControlToken::Continue)
278 }
279 c => {
280 slf.emit_char('<');
281 slf.state = State::Rcdata;
282 slf.unread_char(c);
283 Ok(ControlToken::Continue)
284 }
285 },
286 State::RcdataEndTagOpen => match slf.read_char()? {
287 Some(x) if x.is_ascii_alphabetic() => {
288 slf.init_end_tag();
289 slf.state = State::RcdataEndTagName;
290 slf.unread_char(Some(x));
291 Ok(ControlToken::Continue)
292 }
293 c => {
294 slf.emit_chars(b"</");
295 slf.state = State::Rcdata;
296 slf.unread_char(c);
297 Ok(ControlToken::Continue)
298 }
299 },
300 State::RcdataEndTagName => match slf.read_char()? {
301 Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
302 slf.state = State::BeforeAttributeName;
303 Ok(ControlToken::Continue)
304 }
305 Some('/') if slf.current_end_tag_is_appropriate() => {
306 slf.state = State::SelfClosingStartTag;
307 Ok(ControlToken::Continue)
308 }
309 Some('>') if slf.current_end_tag_is_appropriate() => {
310 slf.state = State::Data;
311 slf.emit_current_tag();
312 Ok(ControlToken::Continue)
313 }
314 Some(x) if x.is_ascii_alphabetic() => {
315 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
316 slf.temporary_buffer.push(x);
317 Ok(ControlToken::Continue)
318 }
319 c => {
320 slf.emit_chars(b"</");
321 slf.flush_buffer_characters();
322
323 slf.state = State::Rcdata;
324 slf.unread_char(c);
325 Ok(ControlToken::Continue)
326 }
327 },
328 State::RawTextLessThanSign => match slf.read_char()? {
329 Some('/') => {
330 slf.temporary_buffer.clear();
331 slf.state = State::RawTextEndTagOpen;
332 Ok(ControlToken::Continue)
333 }
334 c => {
335 slf.emit_char('<');
336 slf.state = State::Rawtext;
337 slf.unread_char(c);
338 Ok(ControlToken::Continue)
339 }
340 },
341 State::RawTextEndTagOpen => match slf.read_char()? {
342 Some(x) if x.is_ascii_alphabetic() => {
343 slf.init_end_tag();
344 slf.state = State::RawTextEndTagName;
345 slf.unread_char(Some(x));
346 Ok(ControlToken::Continue)
347 }
348 c => {
349 slf.emit_chars(b"</");
350 slf.state = State::Rawtext;
351 slf.unread_char(c);
352 Ok(ControlToken::Continue)
353 }
354 },
355 State::RawTextEndTagName => match slf.read_char()? {
356 Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
357 slf.state = State::BeforeAttributeName;
358 Ok(ControlToken::Continue)
359 }
360 Some('/') if slf.current_end_tag_is_appropriate() => {
361 slf.state = State::SelfClosingStartTag;
362 Ok(ControlToken::Continue)
363 }
364 Some('>') if slf.current_end_tag_is_appropriate() => {
365 slf.state = State::Data;
366 slf.emit_current_tag();
367 Ok(ControlToken::Continue)
368 }
369 Some(x) if x.is_ascii_alphabetic() => {
370 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
371 slf.temporary_buffer.push(x);
372 Ok(ControlToken::Continue)
373 }
374 c => {
375 slf.emit_chars(b"</");
376 slf.flush_buffer_characters();
377
378 slf.state = State::Rawtext;
379 slf.unread_char(c);
380 Ok(ControlToken::Continue)
381 }
382 },
383 State::ScriptDataLessThanSign => match slf.read_char()? {
384 Some('/') => {
385 slf.temporary_buffer.clear();
386 slf.state = State::ScriptDataEndTagOpen;
387 Ok(ControlToken::Continue)
388 }
389 Some('!') => {
390 slf.state = State::ScriptDataEscapeStart;
391 slf.emit_chars(b"<!");
392 Ok(ControlToken::Continue)
393 }
394 c => {
395 slf.emit_char('<');
396 slf.state = State::ScriptData;
397 slf.unread_char(c);
398 Ok(ControlToken::Continue)
399 }
400 },
401 State::ScriptDataEndTagOpen => match slf.read_char()? {
402 Some(x) if x.is_ascii_alphabetic() => {
403 slf.init_end_tag();
404 slf.state = State::ScriptDataEndTagName;
405 slf.unread_char(Some(x));
406 Ok(ControlToken::Continue)
407 }
408 c => {
409 slf.emit_chars(b"</");
410 slf.state = State::ScriptData;
411 slf.unread_char(c);
412 Ok(ControlToken::Continue)
413 }
414 },
415 State::ScriptDataEndTagName => match slf.read_char()? {
416 Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
417 slf.state = State::BeforeAttributeName;
418 Ok(ControlToken::Continue)
419 }
420 Some('/') if slf.current_end_tag_is_appropriate() => {
421 slf.state = State::SelfClosingStartTag;
422 Ok(ControlToken::Continue)
423 }
424 Some('>') if slf.current_end_tag_is_appropriate() => {
425 slf.state = State::Data;
426 slf.emit_current_tag();
427 Ok(ControlToken::Continue)
428 }
429 Some(x) if x.is_ascii_alphabetic() => {
430 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
431 slf.temporary_buffer.push(x);
432 Ok(ControlToken::Continue)
433 }
434 c => {
435 slf.emit_chars(b"</");
436 slf.flush_buffer_characters();
437 slf.state = State::Data;
438 slf.unread_char(c);
439 Ok(ControlToken::Continue)
440 }
441 },
442 State::ScriptDataEscapeStart => match slf.read_char()? {
443 Some('-') => {
444 slf.state = State::ScriptDataEscapeStartDash;
445 slf.emit_char('-');
446 Ok(ControlToken::Continue)
447 }
448 c => {
449 slf.state = State::ScriptData;
450 slf.unread_char(c);
451 Ok(ControlToken::Continue)
452 }
453 },
454 State::ScriptDataEscapeStartDash => match slf.read_char()? {
455 Some('-') => {
456 slf.state = State::ScriptDataEscapedDashDash;
457 slf.emit_char('-');
458 Ok(ControlToken::Continue)
459 }
460 c => {
461 slf.state = State::ScriptData;
462 slf.unread_char(c);
463 Ok(ControlToken::Continue)
464 }
465 },
466 State::ScriptDataEscaped => match slf.read_char()? {
467 Some('-') => {
468 slf.state = State::ScriptDataEscapedDash;
469 slf.emit_char('-');
470 Ok(ControlToken::Continue)
471 }
472 Some('<') => {
473 slf.state = State::ScriptDataEscapedLessThanSign;
474 Ok(ControlToken::Continue)
475 }
476 Some('\0') => {
477 slf.emit_error(Error::UnexpectedNullCharacter);
478 slf.emit_char_for_source_char('\u{fffd}', '\0');
479 Ok(ControlToken::Continue)
480 }
481 None => {
482 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
483 Ok(ControlToken::Eof)
484 }
485 Some(x) => {
486 slf.emit_char(x);
487 Ok(ControlToken::Continue)
488 }
489 },
490 State::ScriptDataEscapedDash => match slf.read_char()? {
491 Some('-') => {
492 slf.state = State::ScriptDataEscapedDashDash;
493 slf.emit_char('-');
494 Ok(ControlToken::Continue)
495 }
496 Some('<') => {
497 slf.state = State::ScriptDataEscapedLessThanSign;
498 Ok(ControlToken::Continue)
499 }
500 Some('\0') => {
501 slf.emit_error(Error::UnexpectedNullCharacter);
502 slf.state = State::ScriptDataEscaped;
503 slf.emit_char_for_source_char('\u{fffd}', '\0');
504 Ok(ControlToken::Continue)
505 }
506 None => {
507 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
508 Ok(ControlToken::Eof)
509 }
510 Some(x) => {
511 slf.state = State::ScriptDataEscaped;
512 slf.emit_char(x);
513 Ok(ControlToken::Continue)
514 }
515 },
516 State::ScriptDataEscapedDashDash => match slf.read_char()? {
517 Some('-') => {
518 slf.emit_char('-');
519 Ok(ControlToken::Continue)
520 }
521 Some('<') => {
522 slf.state = State::ScriptDataEscapedLessThanSign;
523 Ok(ControlToken::Continue)
524 }
525 Some('>') => {
526 slf.state = State::ScriptData;
527 slf.emit_char('>');
528 Ok(ControlToken::Continue)
529 }
530 Some('\0') => {
531 slf.emit_error(Error::UnexpectedNullCharacter);
532 slf.state = State::ScriptDataEscaped;
533 slf.emit_char_for_source_char('\u{fffd}', '\0');
534 Ok(ControlToken::Continue)
535 }
536 None => {
537 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
538 Ok(ControlToken::Eof)
539 }
540 Some(x) => {
541 slf.state = State::ScriptDataEscaped;
542 slf.emit_char(x);
543 Ok(ControlToken::Continue)
544 }
545 },
546 State::ScriptDataEscapedLessThanSign => match slf.read_char()? {
547 Some('/') => {
548 slf.temporary_buffer.clear();
549 slf.state = State::ScriptDataEscapedEndTagOpen;
550 Ok(ControlToken::Continue)
551 }
552 Some(x) if x.is_ascii_alphabetic() => {
553 slf.temporary_buffer.clear();
554 slf.emit_char('<');
555 slf.state = State::ScriptDataDoubleEscapeStart;
556 slf.unread_char(Some(x));
557 Ok(ControlToken::Continue)
558 }
559 c => {
560 slf.emit_char('<');
561 slf.state = State::ScriptDataEscaped;
562 slf.unread_char(c);
563 Ok(ControlToken::Continue)
564 }
565 },
566 State::ScriptDataEscapedEndTagOpen => match slf.read_char()? {
567 Some(x) if x.is_ascii_alphabetic() => {
568 slf.init_end_tag();
569 slf.state = State::ScriptDataEscapedEndTagName;
570 slf.unread_char(Some(x));
571 Ok(ControlToken::Continue)
572 }
573 c => {
574 slf.emit_chars(b"</");
575 slf.unread_char(c);
576 slf.state = State::ScriptDataEscaped;
577 Ok(ControlToken::Continue)
578 }
579 },
580 State::ScriptDataEscapedEndTagName => match slf.read_char()? {
581 Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
582 slf.state = State::BeforeAttributeName;
583 Ok(ControlToken::Continue)
584 }
585 Some('/') if slf.current_end_tag_is_appropriate() => {
586 slf.state = State::SelfClosingStartTag;
587 Ok(ControlToken::Continue)
588 }
589 Some('>') if slf.current_end_tag_is_appropriate() => {
590 slf.state = State::Data;
591 slf.emit_current_tag();
592 Ok(ControlToken::Continue)
593 }
594 Some(x) if x.is_ascii_alphabetic() => {
595 slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
596 slf.temporary_buffer.push(x);
597 Ok(ControlToken::Continue)
598 }
599 c => {
600 slf.emit_chars(b"</");
601 slf.flush_buffer_characters();
602 slf.state = State::ScriptDataEscaped;
603 slf.unread_char(c);
604 Ok(ControlToken::Continue)
605 }
606 },
607 State::ScriptDataDoubleEscapeStart => match slf.read_char()? {
608 Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
609 if slf.temporary_buffer == "script" {
610 slf.state = State::ScriptDataDoubleEscaped;
611 } else {
612 slf.state = State::ScriptDataEscaped;
613 }
614 slf.emit_char(x);
615 Ok(ControlToken::Continue)
616 }
617 Some(x) if x.is_ascii_alphabetic() => {
618 slf.temporary_buffer.push(x.to_ascii_lowercase());
619 slf.emit_char(x);
620 Ok(ControlToken::Continue)
621 }
622 c => {
623 slf.state = State::ScriptDataEscaped;
624 slf.unread_char(c);
625 Ok(ControlToken::Continue)
626 }
627 },
628 State::ScriptDataDoubleEscaped => match slf.read_char()? {
629 Some('-') => {
630 slf.state = State::ScriptDataDoubleEscapedDash;
631 slf.emit_char('-');
632 Ok(ControlToken::Continue)
633 }
634 Some('<') => {
635 slf.state = State::ScriptDataDoubleEscapedLessThanSign;
636 slf.emit_char('<');
637 Ok(ControlToken::Continue)
638 }
639 Some('\0') => {
640 slf.emit_error(Error::UnexpectedNullCharacter);
641 slf.emit_char_for_source_char('\u{fffd}', '\0');
642 Ok(ControlToken::Continue)
643 }
644 None => {
645 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
646 Ok(ControlToken::Eof)
647 }
648 Some(x) => {
649 slf.emit_char(x);
650 Ok(ControlToken::Continue)
651 }
652 },
653 State::ScriptDataDoubleEscapedDash => match slf.read_char()? {
654 Some('-') => {
655 slf.state = State::ScriptDataDoubleEscapedDashDash;
656 slf.emit_char('-');
657 Ok(ControlToken::Continue)
658 }
659 Some('<') => {
660 slf.state = State::ScriptDataDoubleEscapedLessThanSign;
661 slf.emit_char('<');
662 Ok(ControlToken::Continue)
663 }
664 Some('\0') => {
665 slf.emit_error(Error::UnexpectedNullCharacter);
666 slf.state = State::ScriptDataDoubleEscaped;
667 slf.emit_char_for_source_char('\u{fffd}', '\0');
668 Ok(ControlToken::Continue)
669 }
670 None => {
671 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
672 Ok(ControlToken::Eof)
673 }
674 Some(x) => {
675 slf.state = State::ScriptDataDoubleEscaped;
676 slf.emit_char(x);
677 Ok(ControlToken::Continue)
678 }
679 },
680 State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? {
681 Some('-') => {
682 slf.emit_char('-');
683 Ok(ControlToken::Continue)
684 }
685 Some('<') => {
686 slf.emit_char('<');
687 slf.state = State::ScriptDataDoubleEscapedLessThanSign;
688 Ok(ControlToken::Continue)
689 }
690 Some('>') => {
691 slf.emit_char('>');
692 slf.state = State::ScriptData;
693 Ok(ControlToken::Continue)
694 }
695 Some('\0') => {
696 slf.emit_error(Error::UnexpectedNullCharacter);
697 slf.state = State::ScriptDataDoubleEscaped;
698 slf.emit_char_for_source_char('\u{fffd}', '\0');
699 Ok(ControlToken::Continue)
700 }
701 None => {
702 slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
703 Ok(ControlToken::Eof)
704 }
705 Some(x) => {
706 slf.state = State::ScriptDataDoubleEscaped;
707 slf.emit_char(x);
708 Ok(ControlToken::Continue)
709 }
710 },
711 State::ScriptDataDoubleEscapedLessThanSign => match slf.read_char()? {
712 Some('/') => {
713 slf.temporary_buffer.clear();
714 slf.state = State::ScriptDataDoubleEscapeEnd;
715 slf.emit_char('/');
716 Ok(ControlToken::Continue)
717 }
718 c => {
719 slf.state = State::ScriptDataDoubleEscaped;
720 slf.unread_char(c);
721 Ok(ControlToken::Continue)
722 }
723 },
724 State::ScriptDataDoubleEscapeEnd => match slf.read_char()? {
725 Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
726 if slf.temporary_buffer == "script" {
727 slf.state = State::ScriptDataEscaped;
728 } else {
729 slf.state = State::ScriptDataDoubleEscaped;
730 }
731
732 slf.emit_char(x);
733 Ok(ControlToken::Continue)
734 }
735 Some(x) if x.is_ascii_alphabetic() => {
736 slf.temporary_buffer.push(x.to_ascii_lowercase());
737 slf.emit_char(x);
738 Ok(ControlToken::Continue)
739 }
740 c => {
741 slf.state = State::ScriptDataDoubleEscaped;
742 slf.unread_char(c);
743 Ok(ControlToken::Continue)
744 }
745 },
746 State::BeforeAttributeName => match slf.read_char()? {
747 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
748 c @ Some('/' | '>') | c @ None => {
749 slf.state = State::AfterAttributeName;
750 slf.unread_char(c);
751 Ok(ControlToken::Continue)
752 }
753 Some('=') => {
754 slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
755 slf.emitter.init_attribute_name(slf.reader.position());
756 slf.emitter.push_attribute_name("=");
757 slf.state = State::AttributeName;
758 Ok(ControlToken::Continue)
759 }
760 Some(x) => {
761 slf.emitter.init_attribute_name(slf.position_before_match);
762 slf.state = State::AttributeName;
763 slf.unread_char(Some(x));
764 Ok(ControlToken::Continue)
765 }
766 },
767 State::AttributeName => match slf.read_char()? {
768 c @ Some(whitespace_pat!() | '/' | '>') | c @ None => {
769 slf.emitter
770 .terminate_attribute_name(slf.position_before_match);
771 slf.state = State::AfterAttributeName;
772 slf.unread_char(c);
773 Ok(ControlToken::Continue)
774 }
775 Some('=') => {
776 slf.emitter
777 .terminate_attribute_name(slf.position_before_match);
778 slf.state = State::BeforeAttributeValue;
779 Ok(ControlToken::Continue)
780 }
781 Some('\0') => {
782 slf.emit_error(Error::UnexpectedNullCharacter);
783 slf.emitter.push_attribute_name("\u{fffd}");
784 Ok(ControlToken::Continue)
785 }
786 Some(x @ '"' | x @ '\'' | x @ '<') => {
787 slf.emit_error(Error::UnexpectedCharacterInAttributeName);
788 slf.emitter
789 .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
790 Ok(ControlToken::Continue)
791 }
792 Some(x) => {
793 slf.emitter
794 .push_attribute_name(ctostr!(x.to_ascii_lowercase()));
795 Ok(ControlToken::Continue)
796 }
797 },
798 State::AfterAttributeName => match slf.read_char()? {
799 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
800 Some('/') => {
801 slf.state = State::SelfClosingStartTag;
802 Ok(ControlToken::Continue)
803 }
804 Some('=') => {
805 slf.state = State::BeforeAttributeValue;
806 Ok(ControlToken::Continue)
807 }
808 Some('>') => {
809 slf.state = State::Data;
810 slf.emit_current_tag();
811 Ok(ControlToken::Continue)
812 }
813 None => {
814 slf.emit_error(Error::EofInTag);
815 Ok(ControlToken::Eof)
816 }
817 Some(x) => {
818 slf.emitter.init_attribute_name(slf.position_before_match);
819 slf.state = State::AttributeName;
820 slf.unread_char(Some(x));
821 Ok(ControlToken::Continue)
822 }
823 },
824 State::BeforeAttributeValue => match slf.read_char()? {
825 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
826 Some('"') => {
827 slf.emitter
828 .init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
829 slf.state = State::AttributeValueDoubleQuoted;
830 Ok(ControlToken::Continue)
831 }
832 Some('\'') => {
833 slf.emitter
834 .init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
835 slf.state = State::AttributeValueSingleQuoted;
836 Ok(ControlToken::Continue)
837 }
838 Some('>') => {
839 slf.emit_error(Error::MissingAttributeValue);
840 slf.state = State::Data;
841 slf.emit_current_tag();
842 Ok(ControlToken::Continue)
843 }
844 c => {
845 slf.emitter
846 .init_attribute_value(AttrValueSyntax::Unquoted, slf.position_before_match);
847 slf.state = State::AttributeValueUnquoted;
848 slf.unread_char(c);
849 Ok(ControlToken::Continue)
850 }
851 },
852 State::AttributeValueDoubleQuoted => match slf.read_char()? {
853 Some('"') => {
854 slf.emitter.terminate_attribute_value(
855 slf.reader.position() - slf.reader.len_of_char_in_current_encoding('"'),
860 );
861 slf.state = State::AfterAttributeValueQuoted;
862 Ok(ControlToken::Continue)
863 }
864 Some('&') => {
865 slf.return_state = Some(State::AttributeValueDoubleQuoted);
866 slf.state = State::CharacterReference;
867 Ok(ControlToken::Continue)
868 }
869 Some('\0') => {
870 slf.emit_error(Error::UnexpectedNullCharacter);
871 slf.emitter.push_attribute_value("\u{fffd}");
872 Ok(ControlToken::Continue)
873 }
874 None => {
875 slf.emit_error(Error::EofInTag);
876 Ok(ControlToken::Eof)
877 }
878 Some(x) => {
879 slf.emitter.push_attribute_value(ctostr!(x));
880 Ok(ControlToken::Continue)
881 }
882 },
883 State::AttributeValueSingleQuoted => match slf.read_char()? {
884 Some('\'') => {
885 slf.emitter.terminate_attribute_value(
886 slf.reader.position() - slf.reader.len_of_char_in_current_encoding('\''),
891 );
892 slf.state = State::AfterAttributeValueQuoted;
893 Ok(ControlToken::Continue)
894 }
895 Some('&') => {
896 slf.return_state = Some(State::AttributeValueSingleQuoted);
897 slf.state = State::CharacterReference;
898 Ok(ControlToken::Continue)
899 }
900 Some('\0') => {
901 slf.emit_error(Error::UnexpectedNullCharacter);
902 slf.emitter.push_attribute_value("\u{fffd}");
903 Ok(ControlToken::Continue)
904 }
905 None => {
906 slf.emit_error(Error::EofInTag);
907 Ok(ControlToken::Eof)
908 }
909 Some(x) => {
910 slf.emitter.push_attribute_value(ctostr!(x));
911 Ok(ControlToken::Continue)
912 }
913 },
914 State::AttributeValueUnquoted => match slf.read_char()? {
915 Some(whitespace_pat!()) => {
916 slf.emitter.terminate_attribute_value(
917 slf.reader.position() - slf.reader.len_of_char_in_current_encoding(' '),
922 );
923 slf.state = State::BeforeAttributeName;
924 Ok(ControlToken::Continue)
925 }
926 Some('&') => {
927 slf.return_state = Some(State::AttributeValueUnquoted);
928 slf.state = State::CharacterReference;
929 Ok(ControlToken::Continue)
930 }
931 Some('>') => {
932 slf.state = State::Data;
933 slf.emit_current_tag();
934 Ok(ControlToken::Continue)
935 }
936 Some('\0') => {
937 slf.emit_error(Error::UnexpectedNullCharacter);
938 slf.emitter.push_attribute_value("\u{fffd}");
939 Ok(ControlToken::Continue)
940 }
941 Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
942 slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
943 slf.emitter.push_attribute_value(ctostr!(x));
944 Ok(ControlToken::Continue)
945 }
946 None => {
947 slf.emit_error(Error::EofInTag);
948 Ok(ControlToken::Eof)
949 }
950 Some(x) => {
951 slf.emitter.push_attribute_value(ctostr!(x));
952 Ok(ControlToken::Continue)
953 }
954 },
955 State::AfterAttributeValueQuoted => match slf.read_char()? {
956 Some(whitespace_pat!()) => {
957 slf.state = State::BeforeAttributeName;
958 Ok(ControlToken::Continue)
959 }
960 Some('/') => {
961 slf.state = State::SelfClosingStartTag;
962 Ok(ControlToken::Continue)
963 }
964 Some('>') => {
965 slf.state = State::Data;
966 slf.emit_current_tag();
967 Ok(ControlToken::Continue)
968 }
969 None => {
970 slf.emit_error(Error::EofInTag);
971 Ok(ControlToken::Eof)
972 }
973 Some(x) => {
974 slf.emit_error(Error::MissingWhitespaceBetweenAttributes);
975 slf.state = State::BeforeAttributeName;
976 slf.unread_char(Some(x));
977 Ok(ControlToken::Continue)
978 }
979 },
980 State::SelfClosingStartTag => match slf.read_char()? {
981 Some('>') => {
982 slf.emitter.set_self_closing(
983 slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/')
984 ..slf.position_before_match,
985 );
986 slf.state = State::Data;
987 slf.emit_current_tag();
988 Ok(ControlToken::Continue)
989 }
990 None => {
991 slf.emit_error(Error::EofInTag);
992 Ok(ControlToken::Eof)
993 }
994 Some(x) => {
995 slf.emit_error(Error::UnexpectedSolidusInTag);
996 slf.state = State::BeforeAttributeName;
997 slf.unread_char(Some(x));
998 Ok(ControlToken::Continue)
999 }
1000 },
1001 State::BogusComment => match slf.read_char()? {
1002 Some('>') => {
1003 slf.state = State::Data;
1004 slf.emitter.emit_current_comment(slf.position_before_match);
1005 Ok(ControlToken::Continue)
1006 }
1007 None => {
1008 slf.emitter.emit_current_comment(slf.position_before_match);
1009 Ok(ControlToken::Eof)
1010 }
1011 Some('\0') => {
1012 slf.emit_error(Error::UnexpectedNullCharacter);
1013 slf.emitter.push_comment("\u{fffd}");
1014 Ok(ControlToken::Continue)
1015 }
1016 Some(x) => {
1017 slf.emitter.push_comment(ctostr!(x));
1018 Ok(ControlToken::Continue)
1019 }
1020 },
1021 State::MarkupDeclarationOpen => match slf.read_char()? {
1022 Some('-') if slf.try_read_string("-", true)? => {
1023 slf.emitter.init_comment(slf.reader.position());
1024 slf.state = State::CommentStart;
1025 Ok(ControlToken::Continue)
1026 }
1027 Some('d' | 'D') if slf.try_read_string("octype", false)? => {
1028 slf.state = State::Doctype;
1029 Ok(ControlToken::Continue)
1030 }
1031 Some('[') if slf.try_read_string("CDATA[", true)? => Ok(ControlToken::CdataOpen),
1032 c => {
1033 slf.emit_error(Error::IncorrectlyOpenedComment);
1034 slf.emitter.init_comment(slf.position_before_match);
1035 slf.state = State::BogusComment;
1036 slf.unread_char(c);
1037 Ok(ControlToken::Continue)
1038 }
1039 },
1040 State::CommentStart => match slf.read_char()? {
1041 Some('-') => {
1042 slf.state = State::CommentStartDash;
1043 Ok(ControlToken::Continue)
1044 }
1045 Some('>') => {
1046 slf.emit_error(Error::AbruptClosingOfEmptyComment);
1047 slf.state = State::Data;
1048 slf.emitter.emit_current_comment(slf.position_before_match);
1049 Ok(ControlToken::Continue)
1050 }
1051 c => {
1052 slf.unread_char(c);
1053 slf.state = State::Comment;
1054 Ok(ControlToken::Continue)
1055 }
1056 },
1057 State::CommentStartDash => match slf.read_char()? {
1058 Some('-') => {
1059 slf.state = State::CommentEnd;
1060 Ok(ControlToken::Continue)
1061 }
1062 Some(c @ '>') => {
1063 slf.emit_error(Error::AbruptClosingOfEmptyComment);
1064 slf.state = State::Data;
1065 slf.emitter.emit_current_comment(
1066 slf.position_before_match - slf.reader.len_of_char_in_current_encoding(c),
1067 );
1068 Ok(ControlToken::Continue)
1069 }
1070 None => {
1071 slf.emit_error(Error::EofInComment);
1072 slf.emitter.emit_current_comment(
1073 slf.position_before_match - slf.reader.len_of_char_in_current_encoding('-'),
1074 );
1075 Ok(ControlToken::Eof)
1076 }
1077 c @ Some(_) => {
1078 slf.emitter.push_comment("-");
1079 slf.unread_char(c);
1080 slf.state = State::Comment;
1081 Ok(ControlToken::Continue)
1082 }
1083 },
1084 State::Comment => match slf.read_char()? {
1085 Some('<') => {
1086 slf.emitter.push_comment("<");
1087 slf.state = State::CommentLessThanSign;
1088 Ok(ControlToken::Continue)
1089 }
1090 Some('-') => {
1091 slf.some_offset = slf.position_before_match;
1092 slf.state = State::CommentEndDash;
1093 Ok(ControlToken::Continue)
1094 }
1095 Some('\0') => {
1096 slf.emit_error(Error::UnexpectedNullCharacter);
1097 slf.emitter.push_comment("\u{fffd}");
1098 Ok(ControlToken::Continue)
1099 }
1100 None => {
1101 slf.emit_error(Error::EofInComment);
1102 slf.emitter.emit_current_comment(slf.reader.position());
1103 Ok(ControlToken::Eof)
1104 }
1105 Some(x) => {
1106 slf.emitter.push_comment(ctostr!(x));
1107 Ok(ControlToken::Continue)
1108 }
1109 },
1110 State::CommentLessThanSign => match slf.read_char()? {
1111 Some('!') => {
1112 slf.emitter.push_comment("!");
1113 slf.state = State::CommentLessThanSignBang;
1114 Ok(ControlToken::Continue)
1115 }
1116 Some('<') => {
1117 slf.emitter.push_comment("<");
1118 Ok(ControlToken::Continue)
1119 }
1120 c => {
1121 slf.unread_char(c);
1122 slf.state = State::Comment;
1123 Ok(ControlToken::Continue)
1124 }
1125 },
1126 State::CommentLessThanSignBang => match slf.read_char()? {
1127 Some('-') => {
1128 slf.state = State::CommentLessThanSignBangDash;
1129 Ok(ControlToken::Continue)
1130 }
1131 c => {
1132 slf.unread_char(c);
1133 slf.state = State::Comment;
1134 Ok(ControlToken::Continue)
1135 }
1136 },
1137 State::CommentLessThanSignBangDash => match slf.read_char()? {
1138 Some('-') => {
1139 slf.state = State::CommentLessThanSignBangDashDash;
1140 Ok(ControlToken::Continue)
1141 }
1142 c => {
1143 slf.unread_char(c);
1144 slf.state = State::CommentEndDash;
1145 Ok(ControlToken::Continue)
1146 }
1147 },
1148 State::CommentLessThanSignBangDashDash => match slf.read_char()? {
1149 c @ Some('>') | c @ None => {
1150 slf.unread_char(c);
1151 slf.state = State::CommentEnd;
1152 Ok(ControlToken::Continue)
1153 }
1154 c => {
1155 slf.emit_error(Error::NestedComment);
1156 slf.unread_char(c);
1157 slf.state = State::CommentEnd;
1158 Ok(ControlToken::Continue)
1159 }
1160 },
1161 State::CommentEndDash => match slf.read_char()? {
1162 Some('-') => {
1163 slf.state = State::CommentEnd;
1164 Ok(ControlToken::Continue)
1165 }
1166 None => {
1167 slf.emit_error(Error::EofInComment);
1168 slf.emitter.emit_current_comment(slf.some_offset);
1169 Ok(ControlToken::Eof)
1170 }
1171 c => {
1172 slf.emitter.push_comment("-");
1173 slf.unread_char(c);
1174 slf.state = State::Comment;
1175 Ok(ControlToken::Continue)
1176 }
1177 },
1178 State::CommentEnd => match slf.read_char()? {
1179 Some('>') => {
1180 slf.state = State::Data;
1181 slf.emitter.emit_current_comment(slf.some_offset);
1182 Ok(ControlToken::Continue)
1183 }
1184 Some('!') => {
1185 slf.state = State::CommentEndBang;
1186 Ok(ControlToken::Continue)
1187 }
1188 Some('-') => {
1189 slf.emitter.push_comment("-");
1190 Ok(ControlToken::Continue)
1191 }
1192 None => {
1193 slf.emit_error(Error::EofInComment);
1194 slf.emitter.emit_current_comment(slf.some_offset);
1195 Ok(ControlToken::Eof)
1196 }
1197 c @ Some(_) => {
1198 slf.emitter.push_comment("-");
1199 slf.emitter.push_comment("-");
1200 slf.unread_char(c);
1201 slf.state = State::Comment;
1202 Ok(ControlToken::Continue)
1203 }
1204 },
1205 State::CommentEndBang => match slf.read_char()? {
1206 Some('-') => {
1207 slf.emitter.push_comment("-");
1208 slf.emitter.push_comment("-");
1209 slf.emitter.push_comment("!");
1210 slf.state = State::CommentEndDash;
1211 Ok(ControlToken::Continue)
1212 }
1213 Some('>') => {
1214 slf.emit_error(Error::IncorrectlyClosedComment);
1215 slf.state = State::Data;
1216 slf.emitter.emit_current_comment(slf.some_offset);
1217 Ok(ControlToken::Continue)
1218 }
1219 None => {
1220 slf.emit_error(Error::EofInComment);
1221 slf.emitter.emit_current_comment(slf.some_offset);
1222 Ok(ControlToken::Eof)
1223 }
1224 c @ Some(_) => {
1225 slf.emitter.push_comment("-");
1226 slf.emitter.push_comment("-");
1227 slf.emitter.push_comment("!");
1228 slf.state = State::Comment;
1229 slf.unread_char(c);
1230 Ok(ControlToken::Continue)
1231 }
1232 },
1233 State::Doctype => match slf.read_char()? {
1234 Some(whitespace_pat!()) => {
1235 slf.state = State::BeforeDoctypeName;
1236 Ok(ControlToken::Continue)
1237 }
1238 c @ Some('>') => {
1239 slf.unread_char(c);
1240 slf.state = State::BeforeDoctypeName;
1241 Ok(ControlToken::Continue)
1242 }
1243 None => {
1244 slf.emit_error(Error::EofInDoctype);
1245 slf.init_doctype();
1246 slf.emitter.set_force_quirks();
1247 slf.emitter.emit_current_doctype(slf.reader.position());
1248 Ok(ControlToken::Eof)
1249 }
1250 c @ Some(_) => {
1251 slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
1252 slf.unread_char(c);
1253 slf.state = State::BeforeDoctypeName;
1254 Ok(ControlToken::Continue)
1255 }
1256 },
1257 State::BeforeDoctypeName => match slf.read_char()? {
1258 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1259 Some('\0') => {
1260 slf.emit_error(Error::UnexpectedNullCharacter);
1261 slf.init_doctype();
1262 slf.emitter.init_doctype_name(slf.position_before_match);
1263 slf.emitter.push_doctype_name("\u{fffd}");
1264 slf.state = State::DoctypeName;
1265 Ok(ControlToken::Continue)
1266 }
1267 Some('>') => {
1268 slf.emit_error(Error::MissingDoctypeName);
1269 slf.init_doctype();
1270 slf.emitter.set_force_quirks();
1271 slf.state = State::Data;
1272 slf.emitter.emit_current_doctype(slf.reader.position());
1273 Ok(ControlToken::Continue)
1274 }
1275 None => {
1276 slf.emit_error(Error::EofInDoctype);
1277 slf.init_doctype();
1278 slf.emitter.set_force_quirks();
1279 slf.emitter.emit_current_doctype(slf.reader.position());
1280 Ok(ControlToken::Eof)
1281 }
1282 Some(x) => {
1283 slf.init_doctype();
1284 slf.emitter.init_doctype_name(slf.position_before_match);
1285 slf.emitter
1286 .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
1287 slf.state = State::DoctypeName;
1288 Ok(ControlToken::Continue)
1289 }
1290 },
1291 State::DoctypeName => match slf.read_char()? {
1292 Some(whitespace_pat!()) => {
1293 slf.emitter
1294 .terminate_doctype_name(slf.position_before_match);
1295 slf.state = State::AfterDoctypeName;
1296 Ok(ControlToken::Continue)
1297 }
1298 Some('>') => {
1299 slf.emitter
1300 .terminate_doctype_name(slf.position_before_match);
1301 slf.state = State::Data;
1302 slf.emitter.emit_current_doctype(slf.reader.position());
1303 Ok(ControlToken::Continue)
1304 }
1305 Some('\0') => {
1306 slf.emit_error(Error::UnexpectedNullCharacter);
1307 slf.emitter.push_doctype_name("\u{fffd}");
1308 Ok(ControlToken::Continue)
1309 }
1310 None => {
1311 slf.emit_error(Error::EofInDoctype);
1312 slf.emitter
1313 .terminate_doctype_name(slf.position_before_match);
1314 slf.emitter.set_force_quirks();
1315 slf.emitter.emit_current_doctype(slf.reader.position());
1316 Ok(ControlToken::Eof)
1317 }
1318 Some(x) => {
1319 slf.emitter
1320 .push_doctype_name(ctostr!(x.to_ascii_lowercase()));
1321 Ok(ControlToken::Continue)
1322 }
1323 },
1324 State::AfterDoctypeName => match slf.read_char()? {
1325 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1326 Some('>') => {
1327 slf.state = State::Data;
1328 slf.emitter.emit_current_doctype(slf.reader.position());
1329 Ok(ControlToken::Continue)
1330 }
1331 None => {
1332 slf.emit_error(Error::EofInDoctype);
1333 slf.emitter.set_force_quirks();
1334 slf.emitter.emit_current_doctype(slf.reader.position());
1335 Ok(ControlToken::Eof)
1336 }
1337 Some('p' | 'P') if slf.try_read_string("ublic", false)? => {
1338 slf.state = State::AfterDoctypePublicKeyword;
1339 Ok(ControlToken::Continue)
1340 }
1341 Some('s' | 'S') if slf.try_read_string("ystem", false)? => {
1342 slf.state = State::AfterDoctypeSystemKeyword;
1343 Ok(ControlToken::Continue)
1344 }
1345 c @ Some(_) => {
1346 slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
1347 slf.emitter.set_force_quirks();
1348 slf.unread_char(c);
1349 slf.state = State::BogusDoctype;
1350 Ok(ControlToken::Continue)
1351 }
1352 },
1353 State::AfterDoctypePublicKeyword => match slf.read_char()? {
1354 Some(whitespace_pat!()) => {
1355 slf.state = State::BeforeDoctypePublicIdentifier;
1356 Ok(ControlToken::Continue)
1357 }
1358 Some('"') => {
1359 slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
1360 slf.emitter.init_doctype_public_id(slf.reader.position());
1361 slf.state = State::DoctypePublicIdentifierDoubleQuoted;
1362 Ok(ControlToken::Continue)
1363 }
1364 Some('\'') => {
1365 slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
1366 slf.emitter.init_doctype_public_id(slf.reader.position());
1367 slf.state = State::DoctypePublicIdentifierSingleQuoted;
1368 Ok(ControlToken::Continue)
1369 }
1370 Some('>') => {
1371 slf.emit_error(Error::MissingDoctypePublicIdentifier);
1372 slf.emitter.set_force_quirks();
1373 slf.state = State::Data;
1374 slf.emitter.emit_current_doctype(slf.reader.position());
1375 Ok(ControlToken::Continue)
1376 }
1377 None => {
1378 slf.emit_error(Error::EofInDoctype);
1379 slf.emitter.set_force_quirks();
1380 slf.emitter.emit_current_doctype(slf.reader.position());
1381 Ok(ControlToken::Eof)
1382 }
1383 c @ Some(_) => {
1384 slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
1385 slf.emitter.set_force_quirks();
1386 slf.unread_char(c);
1387 slf.state = State::BogusDoctype;
1388 Ok(ControlToken::Continue)
1389 }
1390 },
1391 State::BeforeDoctypePublicIdentifier => match slf.read_char()? {
1392 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1393 Some('"') => {
1394 slf.emitter.init_doctype_public_id(slf.reader.position());
1395 slf.state = State::DoctypePublicIdentifierDoubleQuoted;
1396 Ok(ControlToken::Continue)
1397 }
1398 Some('\'') => {
1399 slf.emitter.init_doctype_public_id(slf.reader.position());
1400 slf.state = State::DoctypePublicIdentifierSingleQuoted;
1401 Ok(ControlToken::Continue)
1402 }
1403 Some('>') => {
1404 slf.emit_error(Error::MissingDoctypePublicIdentifier);
1405 slf.emitter.set_force_quirks();
1406 slf.state = State::Data;
1407 slf.emitter.emit_current_doctype(slf.reader.position());
1408 Ok(ControlToken::Continue)
1409 }
1410 None => {
1411 slf.emit_error(Error::EofInDoctype);
1412 slf.emitter.set_force_quirks();
1413 slf.emitter.emit_current_doctype(slf.reader.position());
1414 Ok(ControlToken::Eof)
1415 }
1416 c @ Some(_) => {
1417 slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
1418 slf.emitter.set_force_quirks();
1419 slf.unread_char(c);
1420 slf.state = State::BogusDoctype;
1421 Ok(ControlToken::Continue)
1422 }
1423 },
1424 State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? {
1425 Some('"') => {
1426 slf.emitter
1427 .terminate_doctype_public_id(slf.position_before_match);
1428 slf.state = State::AfterDoctypePublicIdentifier;
1429 Ok(ControlToken::Continue)
1430 }
1431 Some('\0') => {
1432 slf.emit_error(Error::UnexpectedNullCharacter);
1433 slf.emitter.push_doctype_public_id("\u{fffd}");
1434 Ok(ControlToken::Continue)
1435 }
1436 Some('>') => {
1437 slf.emitter
1438 .terminate_doctype_public_id(slf.position_before_match);
1439 slf.emit_error(Error::AbruptDoctypePublicIdentifier);
1440 slf.emitter.set_force_quirks();
1441 slf.state = State::Data;
1442 slf.emitter.emit_current_doctype(slf.reader.position());
1443 Ok(ControlToken::Continue)
1444 }
1445 None => {
1446 slf.emitter
1447 .terminate_doctype_public_id(slf.reader.position());
1448 slf.emit_error(Error::EofInDoctype);
1449 slf.emitter.set_force_quirks();
1450 slf.emitter.emit_current_doctype(slf.reader.position());
1451 Ok(ControlToken::Eof)
1452 }
1453 Some(x) => {
1454 slf.emitter.push_doctype_public_id(ctostr!(x));
1455 Ok(ControlToken::Continue)
1456 }
1457 },
1458 State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? {
1459 Some('\'') => {
1460 slf.emitter
1461 .terminate_doctype_public_id(slf.position_before_match);
1462 slf.state = State::AfterDoctypePublicIdentifier;
1463 Ok(ControlToken::Continue)
1464 }
1465 Some('\0') => {
1466 slf.emit_error(Error::UnexpectedNullCharacter);
1467 slf.emitter.push_doctype_public_id("\u{fffd}");
1468 Ok(ControlToken::Continue)
1469 }
1470 Some('>') => {
1471 slf.emitter
1472 .terminate_doctype_public_id(slf.position_before_match);
1473 slf.emit_error(Error::AbruptDoctypePublicIdentifier);
1474 slf.emitter.set_force_quirks();
1475 slf.state = State::Data;
1476 slf.emitter.emit_current_doctype(slf.reader.position());
1477 Ok(ControlToken::Continue)
1478 }
1479 None => {
1480 slf.emitter
1481 .terminate_doctype_public_id(slf.reader.position());
1482 slf.emit_error(Error::EofInDoctype);
1483 slf.emitter.set_force_quirks();
1484 slf.emitter.emit_current_doctype(slf.reader.position());
1485 Ok(ControlToken::Eof)
1486 }
1487 Some(x) => {
1488 slf.emitter.push_doctype_public_id(ctostr!(x));
1489 Ok(ControlToken::Continue)
1490 }
1491 },
1492 State::AfterDoctypePublicIdentifier => match slf.read_char()? {
1493 Some(whitespace_pat!()) => {
1494 slf.state = State::BetweenDoctypePublicAndSystemIdentifiers;
1495 Ok(ControlToken::Continue)
1496 }
1497 Some('>') => {
1498 slf.state = State::Data;
1499 slf.emitter.emit_current_doctype(slf.reader.position());
1500 Ok(ControlToken::Continue)
1501 }
1502 Some('"') => {
1503 slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
1504 slf.emitter.init_doctype_system_id(slf.reader.position());
1505 slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1506 Ok(ControlToken::Continue)
1507 }
1508 Some('\'') => {
1509 slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
1510 slf.emitter.init_doctype_system_id(slf.reader.position());
1511 slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1512 Ok(ControlToken::Continue)
1513 }
1514 None => {
1515 slf.emit_error(Error::EofInDoctype);
1516 slf.emitter.set_force_quirks();
1517 slf.emitter.emit_current_doctype(slf.reader.position());
1518 Ok(ControlToken::Eof)
1519 }
1520 c @ Some(_) => {
1521 slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1522 slf.emitter.set_force_quirks();
1523 slf.unread_char(c);
1524 slf.state = State::BogusDoctype;
1525 Ok(ControlToken::Continue)
1526 }
1527 },
1528 State::BetweenDoctypePublicAndSystemIdentifiers => match slf.read_char()? {
1529 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1530 Some('>') => {
1531 slf.state = State::Data;
1532 slf.emitter.emit_current_doctype(slf.reader.position());
1533 Ok(ControlToken::Continue)
1534 }
1535 Some('"') => {
1536 slf.emitter.init_doctype_system_id(slf.reader.position());
1537 slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1538 Ok(ControlToken::Continue)
1539 }
1540 Some('\'') => {
1541 slf.emitter.init_doctype_system_id(slf.reader.position());
1542 slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1543 Ok(ControlToken::Continue)
1544 }
1545 None => {
1546 slf.emit_error(Error::EofInDoctype);
1547 slf.emitter.set_force_quirks();
1548 slf.emitter.emit_current_doctype(slf.reader.position());
1549 Ok(ControlToken::Eof)
1550 }
1551 c @ Some(_) => {
1552 slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1553 slf.emitter.set_force_quirks();
1554 slf.state = State::BogusDoctype;
1555 slf.unread_char(c);
1556 Ok(ControlToken::Continue)
1557 }
1558 },
1559 State::AfterDoctypeSystemKeyword => match slf.read_char()? {
1560 Some(whitespace_pat!()) => {
1561 slf.state = State::BeforeDoctypeSystemIdentifier;
1562 Ok(ControlToken::Continue)
1563 }
1564 Some('"') => {
1565 slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
1566 slf.emitter.init_doctype_system_id(slf.reader.position());
1567 slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1568 Ok(ControlToken::Continue)
1569 }
1570 Some('\'') => {
1571 slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
1572 slf.emitter.init_doctype_system_id(slf.reader.position());
1573 slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1574 Ok(ControlToken::Continue)
1575 }
1576 Some('>') => {
1577 slf.emit_error(Error::MissingDoctypeSystemIdentifier);
1578 slf.emitter.set_force_quirks();
1579 slf.state = State::Data;
1580 slf.emitter.emit_current_doctype(slf.reader.position());
1581 Ok(ControlToken::Continue)
1582 }
1583 None => {
1584 slf.emit_error(Error::EofInDoctype);
1585 slf.emitter.set_force_quirks();
1586 slf.emitter.emit_current_doctype(slf.reader.position());
1587 Ok(ControlToken::Eof)
1588 }
1589 c @ Some(_) => {
1590 slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1591 slf.emitter.set_force_quirks();
1592 slf.state = State::BogusDoctype;
1593 slf.unread_char(c);
1594 Ok(ControlToken::Continue)
1595 }
1596 },
1597 State::BeforeDoctypeSystemIdentifier => match slf.read_char()? {
1598 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1599 Some('"') => {
1600 slf.emitter.init_doctype_system_id(slf.reader.position());
1601 slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
1602 Ok(ControlToken::Continue)
1603 }
1604 Some('\'') => {
1605 slf.emitter.init_doctype_system_id(slf.reader.position());
1606 slf.state = State::DoctypeSystemIdentifierSingleQuoted;
1607 Ok(ControlToken::Continue)
1608 }
1609 Some('>') => {
1610 slf.emit_error(Error::MissingDoctypeSystemIdentifier);
1611 slf.emitter.set_force_quirks();
1612 slf.state = State::Data;
1613 slf.emitter.emit_current_doctype(slf.reader.position());
1614 Ok(ControlToken::Continue)
1615 }
1616 None => {
1617 slf.emit_error(Error::EofInDoctype);
1618 slf.emitter.set_force_quirks();
1619 slf.emitter.emit_current_doctype(slf.reader.position());
1620 Ok(ControlToken::Eof)
1621 }
1622 c @ Some(_) => {
1623 slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
1624 slf.emitter.set_force_quirks();
1625 slf.state = State::BogusDoctype;
1626 slf.unread_char(c);
1627 Ok(ControlToken::Continue)
1628 }
1629 },
1630 State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? {
1631 Some('"') => {
1632 slf.emitter
1633 .terminate_doctype_system_id(slf.position_before_match);
1634 slf.state = State::AfterDoctypeSystemIdentifier;
1635 Ok(ControlToken::Continue)
1636 }
1637 Some('\0') => {
1638 slf.emit_error(Error::UnexpectedNullCharacter);
1639 slf.emitter.push_doctype_system_id("\u{fffd}");
1640 Ok(ControlToken::Continue)
1641 }
1642 Some('>') => {
1643 slf.emitter
1644 .terminate_doctype_system_id(slf.position_before_match);
1645 slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
1646 slf.emitter.set_force_quirks();
1647 slf.state = State::Data;
1648 slf.emitter.emit_current_doctype(slf.reader.position());
1649 Ok(ControlToken::Continue)
1650 }
1651 None => {
1652 slf.emitter
1653 .terminate_doctype_system_id(slf.reader.position());
1654 slf.emit_error(Error::EofInDoctype);
1655 slf.emitter.set_force_quirks();
1656 slf.emitter.emit_current_doctype(slf.reader.position());
1657 Ok(ControlToken::Eof)
1658 }
1659 Some(x) => {
1660 slf.emitter.push_doctype_system_id(ctostr!(x));
1661 Ok(ControlToken::Continue)
1662 }
1663 },
1664 State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? {
1665 Some('\'') => {
1666 slf.emitter
1667 .terminate_doctype_system_id(slf.position_before_match);
1668 slf.state = State::AfterDoctypeSystemIdentifier;
1669 Ok(ControlToken::Continue)
1670 }
1671 Some('\0') => {
1672 slf.emit_error(Error::UnexpectedNullCharacter);
1673 slf.emitter.push_doctype_system_id("\u{fffd}");
1674 Ok(ControlToken::Continue)
1675 }
1676 Some('>') => {
1677 slf.emitter
1678 .terminate_doctype_system_id(slf.position_before_match);
1679 slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
1680 slf.emitter.set_force_quirks();
1681 slf.state = State::Data;
1682 slf.emitter.emit_current_doctype(slf.reader.position());
1683 Ok(ControlToken::Continue)
1684 }
1685 None => {
1686 slf.emitter
1687 .terminate_doctype_system_id(slf.reader.position());
1688 slf.emit_error(Error::EofInDoctype);
1689 slf.emitter.set_force_quirks();
1690 slf.emitter.emit_current_doctype(slf.reader.position());
1691 Ok(ControlToken::Eof)
1692 }
1693 Some(x) => {
1694 slf.emitter.push_doctype_system_id(ctostr!(x));
1695 Ok(ControlToken::Continue)
1696 }
1697 },
1698 State::AfterDoctypeSystemIdentifier => match slf.read_char()? {
1699 Some(whitespace_pat!()) => Ok(ControlToken::Continue),
1700 Some('>') => {
1701 slf.state = State::Data;
1702 slf.emitter.emit_current_doctype(slf.reader.position());
1703 Ok(ControlToken::Continue)
1704 }
1705 None => {
1706 slf.emit_error(Error::EofInDoctype);
1707 slf.emitter.set_force_quirks();
1708 slf.emitter.emit_current_doctype(slf.reader.position());
1709 Ok(ControlToken::Eof)
1710 }
1711 c @ Some(_) => {
1712 slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
1713 slf.unread_char(c);
1714 slf.state = State::BogusDoctype;
1715 Ok(ControlToken::Continue)
1716 }
1717 },
1718 State::BogusDoctype => match slf.read_char()? {
1719 Some('>') => {
1720 slf.state = State::Data;
1721 slf.emitter.emit_current_doctype(slf.reader.position());
1722 Ok(ControlToken::Continue)
1723 }
1724 Some('\0') => {
1725 slf.emit_error(Error::UnexpectedNullCharacter);
1726 Ok(ControlToken::Continue)
1727 }
1728 None => {
1729 slf.emitter.emit_current_doctype(slf.reader.position());
1730 Ok(ControlToken::Eof)
1731 }
1732 Some(_) => Ok(ControlToken::Continue),
1733 },
1734 State::CdataSection => match slf.read_char()? {
1735 Some(']') => {
1736 slf.state = State::CdataSectionBracket;
1737 Ok(ControlToken::Continue)
1738 }
1739 None => {
1740 slf.emit_error(Error::EofInCdata);
1741 Ok(ControlToken::Eof)
1742 }
1743 Some(x) => {
1744 slf.emit_char(x);
1745 Ok(ControlToken::Continue)
1746 }
1747 },
1748 State::CdataSectionBracket => match slf.read_char()? {
1749 Some(']') => {
1750 slf.state = State::CdataSectionEnd;
1751 slf.some_offset = slf.position_before_match;
1752 Ok(ControlToken::Continue)
1753 }
1754 c => {
1755 slf.emit_char(']');
1756 slf.state = State::CdataSection;
1757 slf.unread_char(c);
1758 Ok(ControlToken::Continue)
1759 }
1760 },
1761 State::CdataSectionEnd => match slf.read_char()? {
1762 Some(']') => {
1763 slf.emit_char(']');
1764 Ok(ControlToken::Continue)
1765 }
1766 Some('>') => {
1767 slf.state = State::Data;
1768 Ok(ControlToken::Continue)
1769 }
1770 c => {
1771 slf.emit_chars(b"]]");
1772 slf.unread_char(c);
1773 slf.state = State::CdataSection;
1774 Ok(ControlToken::Continue)
1775 }
1776 },
1777 State::CharacterReference => {
1778 slf.some_offset =
1780 slf.reader.position() - slf.reader.len_of_char_in_current_encoding('&');
1781 slf.temporary_buffer.clear();
1782 slf.temporary_buffer.push('&');
1783 match slf.read_char()? {
1784 Some(x) if x.is_ascii_alphanumeric() => {
1785 slf.unread_char(Some(x));
1786 slf.state = State::NamedCharacterReference;
1787 Ok(ControlToken::Continue)
1788 }
1789 Some('#') => {
1790 slf.temporary_buffer.push('#');
1791 slf.state = State::NumericCharacterReference;
1792 Ok(ControlToken::Continue)
1793 }
1794 c => {
1795 slf.flush_code_points_consumed_as_character_reference();
1796 slf.state = slf.return_state.take().unwrap();
1797 slf.unread_char(c);
1798 Ok(ControlToken::Continue)
1799 }
1800 }
1801 }
1802 State::NamedCharacterReference => {
1803 let first_char = slf.read_char()?.unwrap(); let Some(char_ref) =
1806 try_read_character_reference(first_char, |x| slf.try_read_string(x, true))?
1807 else {
1808 slf.unread_char(Some(first_char));
1809
1810 debug_assert_eq!(slf.temporary_buffer, "&");
1811 slf.temporary_buffer.clear();
1812
1813 if slf.is_consumed_as_part_of_an_attribute() {
1814 slf.emitter.push_attribute_value("&");
1815 } else {
1816 slf.emitter.emit_char(
1817 '&',
1818 slf.some_offset
1819 ..slf.some_offset + slf.reader.len_of_char_in_current_encoding('&'),
1820 );
1821 }
1822
1823 slf.state = State::AmbiguousAmpersand;
1824 return Ok(ControlToken::Continue);
1825 };
1826
1827 slf.temporary_buffer.push(first_char);
1828 slf.temporary_buffer.push_str(char_ref.name);
1829 let char_ref_name_last_character = char_ref.name.chars().last();
1830
1831 let next_character = slf.read_char()?;
1832 slf.unread_char(next_character);
1833
1834 if slf.is_consumed_as_part_of_an_attribute()
1835 && char_ref_name_last_character != Some(';')
1836 && matches!(next_character, Some(x) if x == '=' || x.is_ascii_alphanumeric())
1837 {
1838 slf.flush_code_points_consumed_as_character_reference();
1839 slf.state = slf.return_state.take().unwrap();
1840 Ok(ControlToken::Continue)
1841 } else {
1842 if char_ref_name_last_character != Some(';') {
1843 slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1844 }
1845
1846 if slf.is_consumed_as_part_of_an_attribute() {
1847 slf.temporary_buffer.clear();
1848 slf.temporary_buffer.push_str(char_ref.characters);
1849 slf.emitter.push_attribute_value(&slf.temporary_buffer);
1850 } else {
1851 for c in char_ref.characters.chars() {
1852 slf.emitter.emit_char(
1853 c,
1854 slf.some_offset
1855 ..slf.reader.position()
1856 - slf.reader.len_of_char_in_current_encoding(c),
1857 );
1858 }
1859 }
1860 slf.state = slf.return_state.take().unwrap();
1861 Ok(ControlToken::Continue)
1862 }
1863 }
1864 State::AmbiguousAmpersand => match slf.read_char()? {
1865 Some(x) if x.is_ascii_alphanumeric() => {
1866 if slf.is_consumed_as_part_of_an_attribute() {
1867 slf.emitter.push_attribute_value(ctostr!(x));
1868 } else {
1869 slf.emit_char(x);
1870 }
1871
1872 Ok(ControlToken::Continue)
1873 }
1874 c @ Some(';') => {
1875 slf.emit_error(Error::UnknownNamedCharacterReference);
1876 slf.unread_char(c);
1877 slf.state = slf.return_state.take().unwrap();
1878 Ok(ControlToken::Continue)
1879 }
1880 c => {
1881 slf.unread_char(c);
1882 slf.state = slf.return_state.take().unwrap();
1883 Ok(ControlToken::Continue)
1884 }
1885 },
1886 State::NumericCharacterReference => {
1887 slf.character_reference_code = 0;
1888 match slf.read_char()? {
1889 Some(x @ 'x' | x @ 'X') => {
1890 slf.temporary_buffer.push(x);
1891 slf.state = State::HexadecimalCharacterReferenceStart;
1892 Ok(ControlToken::Continue)
1893 }
1894 c => {
1895 slf.unread_char(c);
1896 slf.state = State::DecimalCharacterReferenceStart;
1897 Ok(ControlToken::Continue)
1898 }
1899 }
1900 }
1901 State::HexadecimalCharacterReferenceStart => match slf.read_char()? {
1902 c @ Some('0'..='9' | 'A'..='F' | 'a'..='f') => {
1903 slf.unread_char(c);
1904 slf.state = State::HexadecimalCharacterReference;
1905 Ok(ControlToken::Continue)
1906 }
1907 c => {
1908 slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
1909 slf.flush_code_points_consumed_as_character_reference();
1910 slf.unread_char(c);
1911 slf.state = slf.return_state.take().unwrap();
1912 Ok(ControlToken::Continue)
1913 }
1914 },
1915 State::DecimalCharacterReferenceStart => match slf.read_char()? {
1916 Some(x @ ascii_digit_pat!()) => {
1917 slf.unread_char(Some(x));
1918 slf.state = State::DecimalCharacterReference;
1919 Ok(ControlToken::Continue)
1920 }
1921 c => {
1922 slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
1923 slf.flush_code_points_consumed_as_character_reference();
1924 slf.unread_char(c);
1925 slf.state = slf.return_state.take().unwrap();
1926 Ok(ControlToken::Continue)
1927 }
1928 },
1929 State::HexadecimalCharacterReference => match slf.read_char()? {
1930 Some(x @ ascii_digit_pat!()) => {
1931 mutate_character_reference!(*16 + x - 0x0030);
1932 Ok(ControlToken::Continue)
1933 }
1934 Some(x @ 'A'..='F') => {
1935 mutate_character_reference!(*16 + x - 0x0037);
1936 Ok(ControlToken::Continue)
1937 }
1938 Some(x @ 'a'..='f') => {
1939 mutate_character_reference!(*16 + x - 0x0057);
1940 Ok(ControlToken::Continue)
1941 }
1942 Some(';') => {
1943 slf.state = State::NumericCharacterReferenceEnd;
1944 Ok(ControlToken::Continue)
1945 }
1946 c => {
1947 slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1948 slf.unread_char(c);
1949 slf.state = State::NumericCharacterReferenceEnd;
1950 Ok(ControlToken::Continue)
1951 }
1952 },
1953 State::DecimalCharacterReference => match slf.read_char()? {
1954 Some(x @ ascii_digit_pat!()) => {
1955 mutate_character_reference!(*10 + x - 0x0030);
1956 Ok(ControlToken::Continue)
1957 }
1958 Some(';') => {
1959 slf.state = State::NumericCharacterReferenceEnd;
1960 Ok(ControlToken::Continue)
1961 }
1962 c => {
1963 slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
1964 slf.unread_char(c);
1965 slf.state = State::NumericCharacterReferenceEnd;
1966 Ok(ControlToken::Continue)
1967 }
1968 },
1969 State::NumericCharacterReferenceEnd => {
1970 match slf.character_reference_code {
1971 0x00 => {
1972 slf.emit_error(Error::NullCharacterReference);
1973 slf.character_reference_code = 0xfffd;
1974 }
1975 0x110000.. => {
1976 slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);
1977 slf.character_reference_code = 0xfffd;
1978 }
1979 surrogate_pat!() => {
1980 slf.emit_error(Error::SurrogateCharacterReference);
1981 slf.character_reference_code = 0xfffd;
1982 }
1983 noncharacter_pat!() => {
1985 slf.emit_error(Error::NoncharacterCharacterReference);
1986 }
1987 x @ 0x000d | x @ control_pat!()
1989 if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>
1990 {
1991 slf.emit_error(Error::ControlCharacterReference);
1992 slf.character_reference_code = match x {
1993 0x80 => 0x20AC, 0x82 => 0x201A, 0x83 => 0x0192, 0x84 => 0x201E, 0x85 => 0x2026, 0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x02C6, 0x89 => 0x2030, 0x8A => 0x0160, 0x8B => 0x2039, 0x8C => 0x0152, 0x8E => 0x017D, 0x91 => 0x2018, 0x92 => 0x2019, 0x93 => 0x201C, 0x94 => 0x201D, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014, 0x98 => 0x02DC, 0x99 => 0x2122, 0x9A => 0x0161, 0x9B => 0x203A, 0x9C => 0x0153, 0x9E => 0x017E, 0x9F => 0x0178, _ => slf.character_reference_code,
2021 };
2022 }
2023 _ => (),
2024 }
2025
2026 let char = std::char::from_u32(slf.character_reference_code).unwrap();
2027
2028 if slf.is_consumed_as_part_of_an_attribute() {
2029 slf.temporary_buffer.clear();
2030 slf.temporary_buffer.push(char);
2031 slf.emitter.push_attribute_value(&slf.temporary_buffer);
2032 } else {
2033 slf.emitter
2034 .emit_char(char, slf.some_offset..slf.reader.position());
2035 }
2036 slf.state = slf.return_state.take().unwrap();
2037 Ok(ControlToken::Continue)
2038 }
2039 }
2040}
2041
2042#[inline]
2043pub(super) fn handle_cdata_open<O, R, E>(
2044 slf: &mut Machine<R, O, E>,
2045 adjusted_current_node_present_and_not_in_html_namespace: bool,
2046) where
2047 O: Offset,
2048 R: Reader + Position<O>,
2049 E: Emitter<O>,
2050{
2051 if adjusted_current_node_present_and_not_in_html_namespace {
2052 slf.state = State::CdataSection;
2053 } else {
2054 slf.emit_error(Error::CdataInHtmlContent);
2055
2056 slf.emitter.init_comment(slf.reader.position());
2057 slf.emitter.push_comment("[CDATA[");
2058 slf.state = State::BogusComment;
2059 }
2060}
2061
2062#[derive(Debug, Default, Clone, Copy)]
2064struct Stack2<T: Copy>(Option<(T, Option<T>)>);
2065
2066impl<T: Copy> Stack2<T> {
2067 #[inline]
2068 fn push(&mut self, c: T) {
2069 self.0 = match self.0 {
2070 None => Some((c, None)),
2071 Some((c1, None)) => Some((c1, Some(c))),
2072 Some((_c1, Some(_c2))) => panic!("stack full!"),
2073 }
2074 }
2075
2076 #[inline]
2077 fn pop(&mut self) -> Option<T> {
2078 let (new_self, rv) = match self.0 {
2079 Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
2080 Some((c1, None)) => (None, Some(c1)),
2081 None => (None, None),
2082 };
2083 self.0 = new_self;
2084 rv
2085 }
2086}