html_minifier/html_minifier_helper.rs
1use std::{borrow::Cow, str::from_utf8_unchecked};
2
3use cow_utils::CowUtils;
4use educe::Educe;
5pub use minifier::{css, js};
6
7use crate::{functions::*, HTMLMinifierError, HTMLWriter};
8
9#[derive(Educe, Debug, Copy, Clone, Eq, PartialEq)]
10#[educe(Default)]
11enum Step {
12 #[educe(Default)]
13 Initial,
14 InitialRemainOneWhitespace,
15 InitialIgnoreWhitespace,
16 StartTagInitial,
17 EndTagInitial,
18 StartTag,
19 StartTagIn,
20 StartTagAttributeName,
21 StartTagAttributeNameWaitingValue,
22 StartTagAttributeValueInitial,
23 StartTagUnquotedAttributeValue,
24 StartTagQuotedAttributeValue,
25 EndTag,
26 TagEnd,
27 Doctype,
28 Comment,
29 ScriptDefault,
30 ScriptJavaScript,
31 StyleDefault,
32 StyleCSS,
33 Pre,
34 Code,
35 Textarea,
36}
37
38/// This struct helps you generate and minify your HTML code in the same time. The output destination is outside this struct.
39#[derive(Educe, Clone)]
40#[educe(Debug, Default(new))]
41pub struct HTMLMinifierHelper {
42 #[educe(Default = true)]
43 /// Remove HTML comments.
44 pub remove_comments: bool,
45 #[educe(Default = true)]
46 /// Minify the content in the `code` element.
47 pub minify_code: bool,
48
49 // Buffers
50 #[educe(Debug(method = "str_bytes_fmt"))]
51 buffer: Vec<u8>,
52 #[educe(Debug(method = "str_bytes_fmt"))]
53 tag: Vec<u8>,
54 #[educe(Debug(method = "str_bytes_fmt"))]
55 attribute_type: Vec<u8>,
56
57 // Steps
58 step: Step,
59 step_counter: u8,
60
61 // Temp
62 quote: u8,
63 last_space: u8,
64
65 // Flags
66 quoted_value_spacing: bool,
67 quoted_value_empty: bool,
68 in_handled_attribute: bool,
69 in_attribute_type: bool,
70}
71
72impl HTMLMinifierHelper {
73 #[inline]
74 fn set_flags_by_attribute(&mut self) {
75 match self.buffer.as_slice() {
76 b"class" => {
77 self.in_handled_attribute = true;
78 self.in_attribute_type = false;
79 },
80 b"type" => match self.tag.as_slice() {
81 b"script" | b"style" => {
82 self.in_handled_attribute = true;
83 self.in_attribute_type = true;
84 },
85 _ => (),
86 },
87 _ => {
88 self.in_handled_attribute = false;
89 self.in_attribute_type = false;
90 },
91 }
92 }
93
94 #[inline]
95 fn finish_buffer(&mut self) {
96 if self.in_attribute_type {
97 if let Cow::Owned(attribute_value) = html_escape::decode_html_entities(unsafe {
98 from_utf8_unchecked(&self.attribute_type)
99 }) {
100 self.attribute_type = attribute_value.into_bytes();
101 }
102
103 if let Cow::Owned(attribute_value) =
104 unsafe { from_utf8_unchecked(&self.attribute_type) }.cow_to_ascii_lowercase()
105 {
106 self.attribute_type = attribute_value.into_bytes();
107 }
108 }
109 }
110
111 #[inline]
112 fn end_start_tag_and_get_next_step(
113 &mut self,
114 out: &mut impl HTMLWriter,
115 text_bytes: &[u8],
116 start: &mut usize,
117 p: usize,
118 ) -> Result<Step, HTMLMinifierError> {
119 let step = match self.tag.as_slice() {
120 b"script" => {
121 self.step_counter = 0;
122
123 match self.attribute_type.as_slice() {
124 b"" | b"application/javascript" | b"module" => {
125 out.push_bytes(&text_bytes[*start..=p])?;
126 *start = p + 1;
127
128 self.attribute_type.clear();
129 self.buffer.clear();
130
131 Step::ScriptJavaScript
132 },
133 _ => {
134 self.attribute_type.clear();
135
136 Step::ScriptDefault
137 },
138 }
139 },
140 b"style" => {
141 self.step_counter = 0;
142
143 match self.attribute_type.as_slice() {
144 b"" | b"text/css" => {
145 out.push_bytes(&text_bytes[*start..=p])?;
146 *start = p + 1;
147
148 self.attribute_type.clear();
149 self.buffer.clear();
150
151 Step::StyleCSS
152 },
153 _ => {
154 self.attribute_type.clear();
155
156 Step::StyleDefault
157 },
158 }
159 },
160 b"pre" => {
161 self.step_counter = 0;
162 Step::Pre
163 },
164 b"code" => {
165 if self.minify_code {
166 self.last_space = 0;
167
168 Step::InitialRemainOneWhitespace
169 } else {
170 self.step_counter = 0;
171 Step::Code
172 }
173 },
174 b"textarea" => {
175 self.step_counter = 0;
176 Step::Textarea
177 },
178 _ => {
179 self.last_space = 0;
180
181 Step::InitialRemainOneWhitespace
182 },
183 };
184
185 Ok(step)
186 }
187}
188
189impl HTMLMinifierHelper {
190 /// Reset this html minifier helper. The option settings and allocated memory will be be preserved.
191 #[inline]
192 pub fn reset(&mut self) {
193 self.step = Step::default();
194
195 self.attribute_type.clear();
196 }
197
198 /// Input some text to generate HTML code. It is not necessary to input a full HTML text at once.
199 pub fn digest<S: AsRef<[u8]>, W: HTMLWriter>(
200 &mut self,
201 text: S,
202 out: &mut W,
203 ) -> Result<(), HTMLMinifierError> {
204 let text_bytes = text.as_ref();
205 let text_length = text_bytes.len();
206
207 let mut start = 0;
208 let mut p = 0;
209
210 while p < text_length {
211 let e = text_bytes[p];
212
213 if e <= 0x7F {
214 // ASCII
215 if is_ascii_control(e) {
216 out.push_bytes(&text_bytes[start..p])?;
217 start = p + 1;
218 } else {
219 match self.step {
220 Step::Initial => {
221 // ?
222 match e {
223 b'<' => {
224 out.push_bytes(&text_bytes[start..p])?;
225 start = p + 1;
226
227 self.step = Step::StartTagInitial;
228 },
229 _ => {
230 if is_whitespace(e) {
231 debug_assert_eq!(start, p);
232 start = p + 1;
233 } else {
234 self.last_space = 0;
235 self.step = Step::InitialRemainOneWhitespace;
236 }
237 },
238 }
239 },
240 Step::InitialRemainOneWhitespace => {
241 // a?
242 if is_whitespace(e) {
243 out.push_bytes(&text_bytes[start..p])?;
244 start = p + 1;
245
246 self.last_space = e;
247
248 self.step = Step::InitialIgnoreWhitespace;
249 } else if e == b'<' {
250 out.push_bytes(&text_bytes[start..p])?;
251 start = p + 1;
252
253 self.step = Step::StartTagInitial;
254 } else {
255 self.last_space = 0;
256 }
257 },
258 Step::InitialIgnoreWhitespace => {
259 // a ?
260 match e {
261 b'\n' => {
262 debug_assert_eq!(start, p);
263 start = p + 1;
264
265 if self.last_space > 0 {
266 self.last_space = b'\n';
267 }
268 },
269 0x09 | 0x0B..=0x0D | 0x1C..=0x20 => {
270 debug_assert_eq!(start, p);
271 start = p + 1;
272 },
273 b'<' => {
274 // This can just push ' ', but the minified HTML would be ugly
275 if self.last_space == b'\n' {
276 out.push(b'\n')?;
277 } else if self.last_space > 0 {
278 out.push(b' ')?;
279 }
280
281 out.push_bytes(&text_bytes[start..p])?;
282 start = p + 1;
283
284 self.step = Step::StartTagInitial;
285 },
286 _ => {
287 if self.last_space == b'\n' {
288 out.push(b'\n')?;
289 } else if self.last_space > 0 {
290 out.push(b' ')?;
291 }
292
293 self.last_space = 0;
294 self.step = Step::InitialRemainOneWhitespace;
295 },
296 }
297 },
298 Step::StartTagInitial => {
299 debug_assert_eq!(start, p);
300
301 // <?
302 match e {
303 b'/' => {
304 start = p + 1;
305
306 self.step = Step::EndTagInitial;
307 },
308 b'!' => {
309 // <!
310 start = p + 1;
311
312 self.step_counter = 0;
313 self.step = Step::Doctype;
314 },
315 b'>' => {
316 // <>
317 start = p + 1;
318
319 self.last_space = 0;
320 self.step = Step::InitialRemainOneWhitespace;
321 },
322 _ => {
323 out.push(b'<')?;
324
325 if is_whitespace(e) {
326 out.push_bytes(&text_bytes[start..p])?;
327 start = p + 1;
328
329 self.last_space = e;
330
331 self.step = Step::InitialIgnoreWhitespace;
332 } else {
333 self.tag.clear();
334 self.tag.push(e.to_ascii_lowercase());
335
336 self.step = Step::StartTag;
337 }
338 },
339 }
340 },
341 Step::EndTagInitial => {
342 // </?
343 match e {
344 b'>' => {
345 // </>
346 start = p + 1;
347
348 self.last_space = 0;
349 self.step = Step::InitialRemainOneWhitespace;
350 },
351 _ => {
352 out.push_bytes(b"</")?;
353
354 if is_whitespace(e) {
355 start = p + 1;
356
357 self.last_space = e;
358
359 self.step = Step::InitialIgnoreWhitespace;
360 } else {
361 self.step = Step::EndTag;
362 }
363 },
364 }
365 },
366 Step::StartTag => {
367 // <a?
368 if is_whitespace(e) {
369 out.push_bytes(&text_bytes[start..p])?;
370 start = p + 1;
371
372 self.buffer.clear(); // the buffer may be used for the `type` attribute
373
374 self.last_space = 0;
375 self.step = Step::StartTagIn;
376 } else {
377 match e {
378 b'/' => self.step = Step::TagEnd,
379 b'>' => {
380 self.buffer.clear(); // the buffer may be used for the `type` attribute
381
382 self.step = self.end_start_tag_and_get_next_step(
383 out, text_bytes, &mut start, p,
384 )?;
385 },
386 _ => self.tag.push(e.to_ascii_lowercase()),
387 }
388 }
389 },
390 Step::StartTagIn => {
391 // <a ?
392 match e {
393 b'/' => {
394 if self.last_space > 0 {
395 out.push(b' ')?;
396 }
397
398 self.step = Step::TagEnd;
399 },
400 b'>' => {
401 self.step = self.end_start_tag_and_get_next_step(
402 out, text_bytes, &mut start, p,
403 )?;
404 },
405 _ => {
406 if is_whitespace(e) {
407 debug_assert_eq!(start, p);
408 start = p + 1;
409 } else {
410 out.push(b' ')?;
411
412 self.buffer.clear();
413 self.buffer.push(e.to_ascii_lowercase());
414
415 self.step = Step::StartTagAttributeName;
416 }
417 },
418 }
419 },
420 Step::StartTagAttributeName => {
421 // <a a?
422 match e {
423 b'/' => self.step = Step::TagEnd,
424 b'>' => {
425 self.step = self.end_start_tag_and_get_next_step(
426 out, text_bytes, &mut start, p,
427 )?;
428 },
429 b'=' => {
430 out.push_bytes(&text_bytes[start..p])?;
431 start = p + 1;
432
433 self.set_flags_by_attribute();
434
435 self.step = Step::StartTagAttributeValueInitial;
436 },
437 _ => {
438 if is_whitespace(e) {
439 out.push_bytes(&text_bytes[start..p])?;
440 start = p + 1;
441
442 self.step = Step::StartTagAttributeNameWaitingValue;
443 } else {
444 self.buffer.push(e.to_ascii_lowercase());
445 }
446 },
447 }
448 },
449 Step::StartTagAttributeNameWaitingValue => {
450 // <a a ?
451 match e {
452 b'/' => self.step = Step::TagEnd,
453 b'>' => {
454 self.step = self.end_start_tag_and_get_next_step(
455 out, text_bytes, &mut start, p,
456 )?;
457 },
458 b'=' => {
459 out.push_bytes(&text_bytes[start..p])?;
460 start = p + 1;
461
462 self.set_flags_by_attribute();
463
464 self.step = Step::StartTagAttributeValueInitial;
465 },
466 _ => {
467 if is_whitespace(e) {
468 debug_assert_eq!(start, p);
469 start = p + 1;
470 } else {
471 out.push(b' ')?;
472
473 self.buffer.clear();
474 self.buffer.push(e.to_ascii_lowercase());
475
476 self.step = Step::StartTagAttributeName;
477 }
478 },
479 }
480 },
481 Step::StartTagAttributeValueInitial => {
482 // <a a=?
483 debug_assert_eq!(start, p);
484
485 match e {
486 b'/' => {
487 self.step = Step::TagEnd;
488 },
489 b'>' => {
490 self.step = self.end_start_tag_and_get_next_step(
491 out, text_bytes, &mut start, p,
492 )?;
493 },
494 b'"' | b'\'' => {
495 self.quoted_value_spacing = false;
496 self.quoted_value_empty = true;
497
498 start = p + 1;
499
500 self.quote = e;
501 self.step = Step::StartTagQuotedAttributeValue;
502 },
503 _ => {
504 if is_whitespace(e) {
505 start = p + 1;
506 } else {
507 if self.in_attribute_type {
508 self.attribute_type.push(e);
509 }
510
511 out.push(b'=')?;
512
513 self.step = Step::StartTagUnquotedAttributeValue;
514 }
515 },
516 }
517 },
518 Step::StartTagQuotedAttributeValue => {
519 // <a a="?
520 // <a a='?
521 // NOTE: Backslashes cannot be used for escaping.
522 if e == self.quote {
523 if self.quoted_value_empty {
524 start = p + 1;
525 }
526
527 self.finish_buffer();
528
529 out.push_bytes(&text_bytes[start..=p])?;
530 start = p + 1;
531
532 self.last_space = 0;
533 self.step = Step::StartTagIn;
534 } else if self.in_handled_attribute && is_whitespace(e) {
535 if self.quoted_value_empty {
536 start = p + 1;
537 } else if self.quoted_value_spacing {
538 debug_assert_eq!(start, p);
539 start = p + 1;
540 } else {
541 out.push_bytes(&text_bytes[start..p])?;
542 start = p + 1;
543
544 self.quoted_value_spacing = true;
545 self.quoted_value_empty = false;
546 }
547 } else {
548 if self.quoted_value_empty {
549 self.quoted_value_empty = false;
550
551 out.push_bytes(&[b'=', self.quote])?;
552 } else if self.quoted_value_spacing {
553 out.push_bytes(&text_bytes[start..p])?;
554 start = p;
555
556 out.push(b' ')?;
557 }
558
559 if self.in_attribute_type {
560 if self.quoted_value_spacing {
561 self.attribute_type.push(b' ');
562 }
563
564 self.attribute_type.push(e);
565 }
566
567 self.quoted_value_spacing = false;
568 }
569 },
570 Step::StartTagUnquotedAttributeValue => {
571 // <a a=v?
572 // <a a=v?
573 match e {
574 b'>' => {
575 self.finish_buffer();
576
577 self.last_space = 0;
578 self.step = Step::InitialRemainOneWhitespace;
579 },
580 _ => {
581 if is_whitespace(e) {
582 self.finish_buffer();
583
584 out.push_bytes(&text_bytes[start..p])?;
585 start = p + 1;
586
587 self.last_space = e;
588 self.step = Step::StartTagIn;
589 } else if self.in_attribute_type {
590 self.attribute_type.push(e);
591 }
592 },
593 }
594 },
595 Step::EndTag => {
596 // </a?
597 if is_whitespace(e) {
598 out.push_bytes(&text_bytes[start..p])?;
599 start = p + 1;
600
601 self.step = Step::TagEnd;
602 } else if e == b'>' {
603 self.last_space = 0;
604 self.step = Step::InitialRemainOneWhitespace;
605 }
606 },
607 Step::TagEnd => {
608 // <a/?
609 // </a ?
610 match e {
611 b'>' => {
612 self.last_space = 0;
613 self.step = Step::InitialRemainOneWhitespace;
614 },
615 _ => {
616 out.push_bytes(&text_bytes[start..p])?;
617 start = p + 1;
618 },
619 }
620 },
621 Step::Doctype => {
622 // <!?
623 if e == b'>' {
624 if self.step_counter == 0 {
625 out.push_bytes(b"<!")?;
626 }
627
628 self.last_space = 0;
629 self.step = Step::InitialRemainOneWhitespace;
630 } else {
631 match self.step_counter {
632 0 => match e {
633 b'-' => {
634 start = p + 1;
635
636 self.step_counter = 1;
637 },
638 _ => {
639 out.push_bytes(b"<!")?;
640
641 self.step_counter = 255;
642 },
643 },
644 1 => match e {
645 b'-' => {
646 if !self.remove_comments {
647 out.push_bytes(b"<!--")?;
648 }
649
650 start = p + 1;
651
652 self.step_counter = 0;
653 self.step = Step::Comment;
654 },
655 _ => {
656 out.push_bytes(b"<!-")?;
657
658 self.step_counter = 255;
659 },
660 },
661 255 => (),
662 _ => unreachable!(),
663 }
664 }
665 },
666 Step::Comment => {
667 // <!--?
668 if self.remove_comments {
669 debug_assert_eq!(start, p);
670 start = p + 1;
671 }
672
673 match self.step_counter {
674 0 => {
675 if e == b'-' {
676 self.step_counter = 1;
677 }
678 },
679 1 => match e {
680 b'-' => self.step_counter = 2,
681 _ => self.step_counter = 0,
682 },
683 2 => {
684 match e {
685 b'>' => {
686 if self.last_space > 0 {
687 self.last_space = 0;
688
689 self.step = Step::InitialIgnoreWhitespace;
690 } else {
691 // No need to set `last_space`.
692 self.step = Step::InitialRemainOneWhitespace;
693 }
694 },
695 _ => self.step_counter = 0,
696 }
697 },
698 _ => unreachable!(),
699 }
700 },
701 Step::ScriptDefault => match self.step_counter {
702 0 => {
703 if e == b'<' {
704 self.step_counter = 1;
705 }
706 },
707 1 => match e {
708 b'/' => self.step_counter = 2,
709 _ => self.step_counter = 0,
710 },
711 2 => match e {
712 b's' | b'S' => self.step_counter = 3,
713 _ => self.step_counter = 0,
714 },
715 3 => match e {
716 b'c' | b'C' => self.step_counter = 4,
717 _ => self.step_counter = 0,
718 },
719 4 => match e {
720 b'r' | b'R' => self.step_counter = 5,
721 _ => self.step_counter = 0,
722 },
723 5 => match e {
724 b'i' | b'I' => self.step_counter = 6,
725 _ => self.step_counter = 0,
726 },
727 6 => match e {
728 b'p' | b'P' => self.step_counter = 7,
729 _ => self.step_counter = 0,
730 },
731 7 => match e {
732 b't' | b'T' => self.step_counter = 8,
733 _ => self.step_counter = 0,
734 },
735 8 => match e {
736 b'>' => {
737 self.last_space = 0;
738 self.step = Step::InitialRemainOneWhitespace;
739 },
740 _ => {
741 if is_whitespace(e) {
742 out.push_bytes(&text_bytes[start..p])?;
743 start = p + 1;
744
745 self.step = Step::TagEnd;
746 } else {
747 self.step_counter = 0;
748 }
749 },
750 },
751 _ => unreachable!(),
752 },
753 Step::ScriptJavaScript => match self.step_counter {
754 0 => {
755 if e == b'<' {
756 self.step_counter = 1;
757 }
758 },
759 1 => match e {
760 b'/' => self.step_counter = 2,
761 _ => self.step_counter = 0,
762 },
763 2 => match e {
764 b's' | b'S' => self.step_counter = 3,
765 _ => self.step_counter = 0,
766 },
767 3 => match e {
768 b'c' | b'C' => self.step_counter = 4,
769 _ => self.step_counter = 0,
770 },
771 4 => match e {
772 b'r' | b'R' => self.step_counter = 5,
773 _ => self.step_counter = 0,
774 },
775 5 => match e {
776 b'i' | b'I' => self.step_counter = 6,
777 _ => self.step_counter = 0,
778 },
779 6 => match e {
780 b'p' | b'P' => self.step_counter = 7,
781 _ => self.step_counter = 0,
782 },
783 7 => match e {
784 b't' | b'T' => self.step_counter = 8,
785 _ => self.step_counter = 0,
786 },
787 8 => match e {
788 b'>' => {
789 self.buffer.extend_from_slice(&text_bytes[start..=p]);
790 start = p + 1;
791
792 let script_length = self.buffer.len() - 9;
793
794 let minified_js = js::minify(unsafe {
795 from_utf8_unchecked(&self.buffer[..script_length])
796 });
797 out.push_bytes(minified_js.to_string().as_bytes())?;
798 out.push_bytes(&self.buffer[script_length..])?;
799
800 self.last_space = 0;
801 self.step = Step::InitialRemainOneWhitespace;
802 },
803 _ => {
804 if is_whitespace(e) {
805 self.buffer.extend_from_slice(&text_bytes[start..p]);
806 start = p + 1;
807
808 let buffer_length = self.buffer.len();
809 let script_length = buffer_length - 8;
810
811 let minified_js = js::minify(unsafe {
812 from_utf8_unchecked(&self.buffer[..script_length])
813 });
814 out.push_bytes(minified_js.to_string().as_bytes())?;
815 out.push_bytes(&self.buffer[script_length..])?;
816
817 self.step = Step::TagEnd;
818 } else {
819 self.step_counter = 0;
820 }
821 },
822 },
823 _ => unreachable!(),
824 },
825 Step::StyleDefault => match self.step_counter {
826 0 => {
827 if e == b'<' {
828 self.step_counter = 1;
829 }
830 },
831 1 => match e {
832 b'/' => self.step_counter = 2,
833 _ => self.step_counter = 0,
834 },
835 2 => match e {
836 b's' | b'S' => self.step_counter = 3,
837 _ => self.step_counter = 0,
838 },
839 3 => match e {
840 b't' | b'T' => self.step_counter = 4,
841 _ => self.step_counter = 0,
842 },
843 4 => match e {
844 b'y' | b'Y' => self.step_counter = 5,
845 _ => self.step_counter = 0,
846 },
847 5 => match e {
848 b'l' | b'L' => self.step_counter = 6,
849 _ => self.step_counter = 0,
850 },
851 6 => match e {
852 b'e' | b'E' => self.step_counter = 7,
853 _ => self.step_counter = 0,
854 },
855 7 => match e {
856 b'>' => {
857 self.last_space = 0;
858 self.step = Step::InitialRemainOneWhitespace;
859 },
860 _ => {
861 if is_whitespace(e) {
862 out.push_bytes(&text_bytes[start..p])?;
863 start = p + 1;
864
865 self.step = Step::TagEnd;
866 } else {
867 self.step_counter = 0;
868 }
869 },
870 },
871 _ => unreachable!(),
872 },
873 Step::StyleCSS => match self.step_counter {
874 0 => {
875 if e == b'<' {
876 self.step_counter = 1;
877 }
878 },
879 1 => match e {
880 b'/' => self.step_counter = 2,
881 _ => self.step_counter = 0,
882 },
883 2 => match e {
884 b's' | b'S' => self.step_counter = 3,
885 _ => self.step_counter = 0,
886 },
887 3 => match e {
888 b't' | b'T' => self.step_counter = 4,
889 _ => self.step_counter = 0,
890 },
891 4 => match e {
892 b'y' | b'Y' => self.step_counter = 5,
893 _ => self.step_counter = 0,
894 },
895 5 => match e {
896 b'l' | b'L' => self.step_counter = 6,
897 _ => self.step_counter = 0,
898 },
899 6 => match e {
900 b'e' | b'E' => self.step_counter = 7,
901 _ => self.step_counter = 0,
902 },
903 7 => match e {
904 b'>' => {
905 self.buffer.extend_from_slice(&text_bytes[start..=p]);
906 start = p + 1;
907
908 let script_length = self.buffer.len() - 8;
909
910 let minified_css = css::minify(unsafe {
911 from_utf8_unchecked(&self.buffer[..script_length])
912 })
913 .map_err(HTMLMinifierError::CSSError)?;
914 out.push_bytes(minified_css.to_string().as_bytes())?;
915 out.push_bytes(&self.buffer[script_length..])?;
916
917 self.last_space = 0;
918 self.step = Step::InitialRemainOneWhitespace;
919 },
920 _ => {
921 if is_whitespace(e) {
922 self.buffer.extend_from_slice(&text_bytes[start..p]);
923 start = p + 1;
924
925 let buffer_length = self.buffer.len();
926 let script_length = buffer_length - 7;
927
928 let minified_css = css::minify(unsafe {
929 from_utf8_unchecked(&self.buffer[..script_length])
930 })
931 .map_err(HTMLMinifierError::CSSError)?;
932 out.push_bytes(minified_css.to_string().as_bytes())?;
933 out.push_bytes(&self.buffer[script_length..])?;
934
935 self.step = Step::TagEnd;
936 } else {
937 self.step_counter = 0;
938 }
939 },
940 },
941 _ => unreachable!(),
942 },
943 Step::Pre => match self.step_counter {
944 0 => {
945 if e == b'<' {
946 self.step_counter = 1;
947 }
948 },
949 1 => match e {
950 b'/' => self.step_counter = 2,
951 _ => self.step_counter = 0,
952 },
953 2 => match e {
954 b'p' | b'P' => self.step_counter = 3,
955 _ => self.step_counter = 0,
956 },
957 3 => match e {
958 b'r' | b'R' => self.step_counter = 4,
959 _ => self.step_counter = 0,
960 },
961 4 => match e {
962 b'e' | b'E' => self.step_counter = 5,
963 _ => self.step_counter = 0,
964 },
965 5 => match e {
966 b'>' => {
967 self.last_space = 0;
968 self.step = Step::InitialRemainOneWhitespace;
969 },
970 _ => {
971 if is_whitespace(e) {
972 out.push_bytes(&text_bytes[start..p])?;
973 start = p + 1;
974
975 self.step = Step::TagEnd;
976 } else {
977 self.step_counter = 0;
978 }
979 },
980 },
981 _ => unreachable!(),
982 },
983 Step::Code => match self.step_counter {
984 0 => {
985 if e == b'<' {
986 self.step_counter = 1;
987 }
988 },
989 1 => match e {
990 b'/' => self.step_counter = 2,
991 _ => self.step_counter = 0,
992 },
993 2 => match e {
994 b'c' | b'C' => self.step_counter = 3,
995 _ => self.step_counter = 0,
996 },
997 3 => match e {
998 b'o' | b'O' => self.step_counter = 4,
999 _ => self.step_counter = 0,
1000 },
1001 4 => match e {
1002 b'd' | b'D' => self.step_counter = 5,
1003 _ => self.step_counter = 0,
1004 },
1005 5 => match e {
1006 b'e' | b'E' => self.step_counter = 6,
1007 _ => self.step_counter = 0,
1008 },
1009 6 => match e {
1010 b'>' => {
1011 self.last_space = 0;
1012 self.step = Step::InitialRemainOneWhitespace;
1013 },
1014 _ => {
1015 if is_whitespace(e) {
1016 out.push_bytes(&text_bytes[start..p])?;
1017 start = p + 1;
1018
1019 self.step = Step::TagEnd;
1020 } else {
1021 self.step_counter = 0;
1022 }
1023 },
1024 },
1025 _ => unreachable!(),
1026 },
1027 Step::Textarea => match self.step_counter {
1028 0 => {
1029 if e == b'<' {
1030 self.step_counter = 1;
1031 }
1032 },
1033 1 => match e {
1034 b'/' => self.step_counter = 2,
1035 _ => self.step_counter = 0,
1036 },
1037 2 => match e {
1038 b't' | b'T' => self.step_counter = 3,
1039 _ => self.step_counter = 0,
1040 },
1041 3 => match e {
1042 b'e' | b'E' => self.step_counter = 4,
1043 _ => self.step_counter = 0,
1044 },
1045 4 => match e {
1046 b'x' | b'X' => self.step_counter = 5,
1047 _ => self.step_counter = 0,
1048 },
1049 5 => match e {
1050 b't' | b'T' => self.step_counter = 6,
1051 _ => self.step_counter = 0,
1052 },
1053 6 => match e {
1054 b'a' | b'A' => self.step_counter = 7,
1055 _ => self.step_counter = 0,
1056 },
1057 7 => match e {
1058 b'r' | b'R' => self.step_counter = 8,
1059 _ => self.step_counter = 0,
1060 },
1061 8 => match e {
1062 b'e' | b'E' => self.step_counter = 9,
1063 _ => self.step_counter = 0,
1064 },
1065 9 => match e {
1066 b'a' | b'A' => self.step_counter = 10,
1067 _ => self.step_counter = 0,
1068 },
1069 10 => match e {
1070 b'>' => {
1071 self.last_space = 0;
1072 self.step = Step::InitialRemainOneWhitespace;
1073 },
1074 _ => {
1075 if is_whitespace(e) {
1076 out.push_bytes(&text_bytes[start..p])?;
1077 start = p + 1;
1078
1079 self.step = Step::TagEnd;
1080 } else {
1081 self.step_counter = 0;
1082 }
1083 },
1084 },
1085 _ => unreachable!(),
1086 },
1087 }
1088 }
1089 } else {
1090 // non-ASCII
1091 match self.step {
1092 Step::Initial => {
1093 // ?
1094 self.last_space = 0;
1095 self.step = Step::InitialRemainOneWhitespace;
1096 },
1097 Step::InitialRemainOneWhitespace => {
1098 // a?
1099 self.last_space = 0;
1100 },
1101 Step::InitialIgnoreWhitespace => {
1102 // a ?
1103 if self.last_space == b'\n' {
1104 out.push(b'\n')?;
1105 } else if self.last_space > 0 {
1106 out.push(b' ')?;
1107 }
1108
1109 self.last_space = 0;
1110 self.step = Step::InitialRemainOneWhitespace;
1111 },
1112 Step::StartTagInitial => {
1113 // <?
1114 // To `InitialRemainOneWhitespace`.
1115 debug_assert_eq!(start, p);
1116
1117 out.push(b'<')?;
1118
1119 self.last_space = 0;
1120 self.step = Step::InitialRemainOneWhitespace;
1121 },
1122 Step::EndTagInitial => {
1123 // </?
1124 // To `InitialRemainOneWhitespace`.
1125 out.push_bytes(b"</")?;
1126
1127 self.last_space = 0;
1128 self.step = Step::InitialRemainOneWhitespace;
1129 },
1130 Step::StartTag | Step::EndTag => {
1131 // <a?
1132 // </a?
1133 // To `InitialRemainOneWhitespace`.
1134 self.last_space = 0;
1135 self.step = Step::InitialRemainOneWhitespace;
1136 },
1137 Step::StartTagIn => {
1138 // <a ?
1139 out.push(b' ')?;
1140
1141 self.buffer.clear();
1142 self.buffer.push(e);
1143
1144 self.step = Step::StartTagAttributeName;
1145 },
1146 Step::StartTagAttributeName => {
1147 // <a a?
1148 self.buffer.push(e);
1149 },
1150 Step::StartTagAttributeNameWaitingValue => {
1151 // <a a ?
1152 out.push(b' ')?;
1153
1154 self.buffer.clear();
1155 self.buffer.push(e);
1156
1157 self.step = Step::StartTagAttributeName;
1158 },
1159 Step::StartTagAttributeValueInitial => {
1160 // <a a=?
1161 debug_assert_eq!(start, p);
1162
1163 if self.in_attribute_type {
1164 self.attribute_type.push(e);
1165 }
1166
1167 out.push(b'=')?;
1168
1169 self.step = Step::StartTagUnquotedAttributeValue;
1170 },
1171 Step::StartTagQuotedAttributeValue => {
1172 // <a a="?
1173 // <a a='?
1174 if self.quoted_value_empty {
1175 self.quoted_value_empty = false;
1176
1177 out.push_bytes(&[b'=', self.quote])?;
1178 }
1179
1180 self.quoted_value_spacing = false;
1181
1182 if self.in_attribute_type {
1183 self.attribute_type.push(e);
1184 }
1185 },
1186 Step::StartTagUnquotedAttributeValue => {
1187 // <a a=v?
1188 // <a a=v?
1189 if self.in_attribute_type {
1190 self.attribute_type.push(e);
1191 }
1192 },
1193 Step::TagEnd => {
1194 // <a/?
1195 // </a ?
1196 out.push_bytes(&text_bytes[start..p])?;
1197 start = p + 1;
1198 },
1199 Step::Doctype => {
1200 // <!?
1201 if self.step_counter == 0 {
1202 out.push_bytes(b"<!")?;
1203 }
1204
1205 self.step_counter = 255;
1206 },
1207 Step::Comment => {
1208 // <!--?
1209 if self.remove_comments {
1210 debug_assert_eq!(start, p);
1211 start = p + 1;
1212 }
1213
1214 self.step_counter = 0;
1215 },
1216 Step::ScriptDefault
1217 | Step::StyleDefault
1218 | Step::Pre
1219 | Step::Code
1220 | Step::Textarea
1221 | Step::ScriptJavaScript
1222 | Step::StyleCSS => {
1223 self.step_counter = 0;
1224 },
1225 }
1226 }
1227
1228 p += 1;
1229 }
1230
1231 match self.step {
1232 Step::ScriptJavaScript | Step::StyleCSS => {
1233 self.buffer.extend_from_slice(&text_bytes[start..p]);
1234 },
1235 _ => out.push_bytes(&text_bytes[start..p])?,
1236 }
1237
1238 Ok(())
1239 }
1240}