#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
#[macro_use]
extern crate educe;
extern crate cow_utils;
extern crate minifier;
extern crate utf8_width;
mod errors;
use core::cmp::Ordering;
use core::fmt::{self, Formatter};
use core::str::{from_utf8_unchecked, FromStr};
use alloc::borrow::Cow;
use alloc::string::String;
use alloc::vec::Vec;
use cow_utils::CowUtils;
pub use minifier::{css, js};
pub use errors::*;
#[inline]
fn str_bytes_fmt(v: &[u8], f: &mut Formatter) -> Result<(), fmt::Error> {
f.write_fmt(format_args!("{:?}", unsafe { from_utf8_unchecked(v) }))
}
#[derive(Educe, Debug, Copy, Clone, Eq, PartialEq)]
#[educe(Default)]
enum Step {
#[educe(Default)]
Initial,
InitialRemainOneWhitespace,
InitialIgnoreWhitespace,
StartTagInitial,
EndTagInitial,
StartTag,
StartTagIn,
StartTagAttributeName,
StartTagAttributeNameWaitingValue,
StartTagAttributeValueInitial,
StartTagUnquotedAttributeValue,
StartTagQuotedAttributeValue,
EndTag,
TagEnd,
Doctype,
Comment,
ScriptDefault,
ScriptJavaScript,
StyleDefault,
StyleCSS,
Pre,
Code,
Textarea,
}
#[derive(Educe, Clone)]
#[educe(Debug, Default(new))]
pub struct HTMLMinifier {
#[educe(Default = true)]
pub remove_comments: bool,
#[educe(Default = true)]
pub minify_code: bool,
#[educe(Debug(method = "str_bytes_fmt"))]
out: Vec<u8>,
#[educe(Debug(method = "str_bytes_fmt"))]
tag: Vec<u8>,
#[educe(Debug(method = "str_bytes_fmt"))]
attribute_name: Vec<u8>,
#[educe(Debug(method = "str_bytes_fmt"))]
buffer: Vec<u8>,
step: Step,
step_counter: u8,
quote: u8,
last_space: u8,
quoted_value_spacing: bool,
quoted_value_empty: bool,
in_handled_attribute: bool,
in_attribute_type: bool,
last_cj: bool,
}
#[inline]
fn is_whitespace(e: u8) -> bool {
match e {
0x09..=0x0D | 0x1C..=0x20 => true,
_ => false,
}
}
#[inline]
fn is_ascii_control(e: u8) -> bool {
match e {
0..=8 | 17..=31 | 127 => true,
_ => false,
}
}
#[inline]
fn is_cj(c: char) -> bool {
(c >= '\u{2E80}' && c <= '\u{2EF3}')
|| (c >= '\u{2F00}' && c <= '\u{2FDF}')
|| (c >= '\u{2FF0}' && c <= '\u{2FD5}')
|| (c >= '\u{3005}' && c <= '\u{303B}')
|| (c >= '\u{3040}' && c <= '\u{309F}')
|| (c >= '\u{30A0}' && c <= '\u{30FF}')
|| (c >= '\u{3100}' && c <= '\u{312F}')
|| (c >= '\u{31A0}' && c <= '\u{31BA}')
|| (c >= '\u{31C0}' && c <= '\u{31E3}')
|| (c >= '\u{31F0}' && c <= '\u{31FF}')
|| (c >= '\u{32D0}' && c <= '\u{32FF}')
|| (c >= '\u{3300}' && c <= '\u{33FF}')
|| (c >= '\u{3400}' && c <= '\u{4DBF}')
|| (c >= '\u{4E00}' && c <= '\u{9FFF}')
|| (c >= '\u{A000}' && c <= '\u{A48C}')
|| (c >= '\u{A490}' && c <= '\u{A4C6}')
|| (c >= '\u{F900}' && c <= '\u{FAFF}')
|| (c >= '\u{FE30}' && c <= '\u{FE4F}')
|| (c >= '\u{FF65}' && c <= '\u{FF9D}')
|| (c >= '\u{1B000}' && c <= '\u{1B0FE}')
|| (c >= '\u{1B100}' && c <= '\u{1B11E}')
|| (c >= '\u{1B150}' && c <= '\u{1B152}')
|| (c >= '\u{1B164}' && c <= '\u{1B167}')
|| c == '\u{1F200}'
|| (c >= '\u{20000}' && c <= '\u{2A6DF}')
|| (c >= '\u{2A700}' && c <= '\u{2B73F}')
|| (c >= '\u{2B740}' && c <= '\u{2B81F}')
|| (c >= '\u{2B820}' && c <= '\u{2CEAF}')
|| (c >= '\u{2CEB0}' && c <= '\u{2EBEF}')
|| (c >= '\u{2F800}' && c <= '\u{2FA1F}')
}
#[inline]
fn is_bytes_cj(bytes: &[u8]) -> bool {
match char::from_str(unsafe { from_utf8_unchecked(bytes) }) {
Ok(c) => is_cj(c),
Err(_) => false,
}
}
impl HTMLMinifier {
#[inline]
fn remove(&mut self, text_bytes: &[u8], start: usize, p: usize, count: usize) {
let buffer_length = p - start;
match buffer_length.cmp(&count) {
Ordering::Equal => (),
Ordering::Greater => self.out.extend_from_slice(&text_bytes[start..(p - count)]),
Ordering::Less => unsafe {
self.out.set_len(self.out.len() - (count - buffer_length));
},
}
}
#[inline]
fn set_flags_by_attribute(&mut self) {
match self.attribute_name.as_slice() {
b"class" => {
self.in_handled_attribute = true;
self.in_attribute_type = false;
}
b"type" => {
match self.tag.as_slice() {
b"script" | b"style" => {
self.in_handled_attribute = true;
self.in_attribute_type = true;
self.buffer.clear();
}
_ => (),
}
}
_ => {
self.in_handled_attribute = false;
self.in_attribute_type = false;
}
}
}
#[inline]
fn finish_buffer(&mut self) {
if self.in_attribute_type {
if let Cow::Owned(attribute_value) =
html_escape::decode_html_entities(unsafe { from_utf8_unchecked(&self.buffer) })
{
self.buffer = attribute_value.into_bytes();
}
if let Cow::Owned(attribute_value) =
unsafe { from_utf8_unchecked(&self.buffer) }.cow_to_ascii_lowercase()
{
self.buffer = attribute_value.into_bytes();
}
}
}
#[inline]
fn end_start_tag_and_get_next_step(
&mut self,
text_bytes: &[u8],
start: &mut usize,
p: usize,
) -> Step {
match self.tag.as_slice() {
b"script" => {
self.step_counter = 0;
match self.buffer.as_slice() {
b"" | b"application/javascript" => {
self.out.extend_from_slice(&text_bytes[*start..=p]);
*start = p + 1;
self.buffer.clear();
Step::ScriptJavaScript
}
_ => Step::ScriptDefault,
}
}
b"style" => {
self.step_counter = 0;
match self.buffer.as_slice() {
b"" | b"text/css" => {
self.out.extend_from_slice(&text_bytes[*start..=p]);
*start = p + 1;
self.buffer.clear();
Step::StyleCSS
}
_ => Step::StyleDefault,
}
}
b"pre" => {
self.step_counter = 0;
Step::Pre
}
b"code" => {
if self.minify_code {
Step::InitialRemainOneWhitespace
} else {
self.step_counter = 0;
Step::Code
}
}
b"textarea" => {
self.step_counter = 0;
Step::Textarea
}
_ => Step::InitialRemainOneWhitespace,
}
}
}
impl HTMLMinifier {
#[inline]
pub fn reset(&mut self) {
self.out.clear();
self.step = Step::default();
}
pub fn digest<S: AsRef<str>>(&mut self, text: S) -> Result<(), HTMLMinifierError> {
let text = text.as_ref();
let text_bytes = text.as_bytes();
let text_length = text_bytes.len();
self.out.reserve(text_length);
let mut start = 0;
let mut p = 0;
while p < text_length {
let e = text_bytes[p];
let width = unsafe { utf8_width::get_width_assume_valid(e) };
match width {
1 => {
let e = text_bytes[p];
if is_ascii_control(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
} else {
match self.step {
Step::Initial => {
match e {
b'<' => self.step = Step::StartTagInitial,
_ => {
if is_whitespace(e) {
debug_assert_eq!(start, p);
start = p + 1;
} else {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
}
}
}
Step::InitialRemainOneWhitespace => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.last_space = e;
self.step = Step::InitialIgnoreWhitespace;
} else if e == b'<' {
self.step = Step::StartTagInitial;
} else {
self.last_cj = false;
self.last_space = 0;
}
}
Step::InitialIgnoreWhitespace => {
match e {
b'\n' => {
debug_assert_eq!(start, p);
start = p + 1;
self.last_space = b'\n';
}
0x09 | 0x0B..=0x0D | 0x1C..=0x20 => {
debug_assert_eq!(start, p);
start = p + 1;
}
b'<' => {
if self.last_space > 0 {
self.out.push(b' ');
}
self.step = Step::StartTagInitial;
}
_ => {
if self.last_space > 0 {
self.out.push(b' ');
}
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
}
}
Step::StartTagInitial => {
match e {
b'/' => self.step = Step::EndTagInitial,
b'!' => {
self.step_counter = 0;
self.step = Step::Doctype;
}
b'>' => {
self.remove(text_bytes, start, p, 1);
start = p + 1;
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.last_space = e;
self.step = Step::InitialIgnoreWhitespace;
} else {
self.tag.clear();
self.tag.push(e.to_ascii_lowercase());
self.step = Step::StartTag;
}
}
}
}
Step::EndTagInitial => {
match e {
b'>' => {
self.remove(text_bytes, start, p, 2);
start = p + 1;
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.last_space = e;
self.step = Step::InitialIgnoreWhitespace;
} else {
self.step = Step::EndTag;
}
}
}
}
Step::StartTag => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::StartTagIn;
} else {
match e {
b'/' => self.step = Step::TagEnd,
b'>' => {
self.step = self.end_start_tag_and_get_next_step(
text_bytes, &mut start, p,
)
}
_ => self.tag.push(e.to_ascii_lowercase()),
}
}
}
Step::StartTagIn => {
match e {
b'/' => self.step = Step::TagEnd,
b'>' => {
self.step = self.end_start_tag_and_get_next_step(
text_bytes, &mut start, p,
)
}
_ => {
if is_whitespace(e) {
debug_assert_eq!(start, p);
start = p + 1;
} else {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.push(e.to_ascii_lowercase());
self.step = Step::StartTagAttributeName;
}
}
}
}
Step::StartTagAttributeName => {
match e {
b'/' => self.step = Step::TagEnd,
b'>' => {
self.step = self.end_start_tag_and_get_next_step(
text_bytes, &mut start, p,
)
}
b'=' => {
self.set_flags_by_attribute();
self.step = Step::StartTagAttributeValueInitial;
}
_ => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::StartTagAttributeNameWaitingValue;
} else {
self.attribute_name.push(e.to_ascii_lowercase());
}
}
}
}
Step::StartTagAttributeNameWaitingValue => {
match e {
b'/' => self.step = Step::TagEnd,
b'>' => {
self.step = self.end_start_tag_and_get_next_step(
text_bytes, &mut start, p,
)
}
b'=' => {
self.set_flags_by_attribute();
self.step = Step::StartTagAttributeValueInitial;
}
_ => {
if is_whitespace(e) {
debug_assert_eq!(start, p);
start = p + 1;
} else {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.push(e.to_ascii_lowercase());
self.step = Step::StartTagAttributeName;
}
}
}
}
Step::StartTagAttributeValueInitial => {
match e {
b'/' => {
self.remove(text_bytes, start, p, 1);
start = p;
self.step = Step::TagEnd;
}
b'>' => {
self.remove(text_bytes, start, p, 1);
start = p;
self.step = self.end_start_tag_and_get_next_step(
text_bytes, &mut start, p,
);
}
b'"' | b'\'' => {
self.quoted_value_spacing = false;
self.quoted_value_empty = true;
self.quote = e;
self.step = Step::StartTagQuotedAttributeValue;
}
_ => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
} else {
if self.in_attribute_type {
self.buffer.push(e);
}
self.step = Step::StartTagUnquotedAttributeValue;
}
}
}
}
Step::StartTagQuotedAttributeValue => {
if e == self.quote {
if self.quoted_value_empty {
self.remove(text_bytes, start, p, 2);
start = p + 1;
} else if self.quoted_value_spacing {
self.remove(text_bytes, start, p, 1);
start = p;
if self.in_attribute_type {
unsafe {
self.buffer.set_len(self.buffer.len() - 1);
}
}
}
self.finish_buffer();
self.out.extend_from_slice(&text_bytes[start..=p]);
start = p + 1;
self.step = Step::StartTagIn;
} else {
if self.in_handled_attribute && is_whitespace(e) {
if self.quoted_value_empty {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
} else {
if self.quoted_value_spacing {
debug_assert_eq!(start, p);
start = p + 1;
} else {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.out.push(b' ');
if self.in_attribute_type {
self.buffer.push(b' ');
}
self.quoted_value_spacing = true;
self.quoted_value_empty = false;
}
}
} else {
self.quoted_value_spacing = false;
self.quoted_value_empty = false;
if self.in_attribute_type {
self.buffer.push(e);
}
}
}
}
Step::StartTagUnquotedAttributeValue => {
match e {
b'>' => {
self.finish_buffer();
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.finish_buffer();
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::StartTagIn;
} else if self.in_attribute_type {
self.buffer.push(e);
}
}
}
}
Step::EndTag => {
if is_whitespace(e) {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => (),
}
}
}
Step::TagEnd => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
}
}
}
Step::Doctype => {
if e == b'>' {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
} else {
match self.step_counter {
0 => {
match e {
b'-' => self.step_counter = 1,
_ => self.step_counter = 255,
}
}
1 => {
match e {
b'-' => {
if self.remove_comments {
if self.last_space > 0 {
self.remove(text_bytes, start, p, 4);
} else {
self.remove(text_bytes, start, p, 3);
}
} else {
self.out.extend_from_slice(
&text_bytes[start..=p],
);
}
start = p + 1;
self.step_counter = 0;
self.step = Step::Comment;
}
_ => self.step_counter = 255,
}
}
255 => (),
_ => unreachable!(),
}
}
}
Step::Comment => {
if self.remove_comments {
debug_assert_eq!(start, p);
start = p + 1;
}
match self.step_counter {
0 => {
match e {
b'-' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'-' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b'>' => {
if self.last_space > 0 {
self.step = Step::InitialIgnoreWhitespace;
} else {
self.step = Step::InitialRemainOneWhitespace;
}
}
_ => self.step_counter = 0,
}
}
_ => unreachable!(),
}
}
Step::ScriptDefault => {
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b's' | b'S' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b'c' | b'C' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'r' | b'R' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'i' | b'I' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'p' | b'P' => self.step_counter = 7,
_ => self.step_counter = 0,
}
}
7 => {
match e {
b't' | b'T' => self.step_counter = 8,
_ => self.step_counter = 0,
}
}
8 => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out
.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::ScriptJavaScript => {
debug_assert_eq!(start, p);
start = p + 1;
self.buffer.push(e);
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b's' | b'S' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b'c' | b'C' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'r' | b'R' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'i' | b'I' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'p' | b'P' => self.step_counter = 7,
_ => self.step_counter = 0,
}
}
7 => {
match e {
b't' | b'T' => self.step_counter = 8,
_ => self.step_counter = 0,
}
}
8 => {
match e {
b'>' => {
let script_length = self.buffer.len() - 9;
let minified_js = js::minify(unsafe {
from_utf8_unchecked(
&self.buffer[..script_length],
)
});
self.out.extend_from_slice(minified_js.as_bytes());
self.out.extend_from_slice(
&self.buffer[script_length..],
);
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
let buffer_length = self.buffer.len();
let script_length = buffer_length - 9;
let minified_js = js::minify(unsafe {
from_utf8_unchecked(
&self.buffer[..script_length],
)
});
self.out
.extend_from_slice(minified_js.as_bytes());
self.out.extend_from_slice(
&self.buffer
[script_length..(buffer_length - 1)],
);
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::StyleDefault => {
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b's' | b'S' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b't' | b'T' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'y' | b'Y' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'l' | b'L' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'e' | b'E' => self.step_counter = 7,
_ => self.step_counter = 0,
}
}
7 => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out
.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::StyleCSS => {
debug_assert_eq!(start, p);
start = p + 1;
self.buffer.push(e);
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b's' | b'S' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b't' | b'T' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'y' | b'Y' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'l' | b'L' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'e' | b'E' => self.step_counter = 7,
_ => self.step_counter = 0,
}
}
7 => {
match e {
b'>' => {
let script_length = self.buffer.len() - 8;
let minified_css = css::minify(unsafe {
from_utf8_unchecked(
&self.buffer[..script_length],
)
})
.map_err(|error| {
HTMLMinifierError::CSSError(error)
})?;
self.out.extend_from_slice(minified_css.as_bytes());
self.out.extend_from_slice(
&self.buffer[script_length..],
);
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
let buffer_length = self.buffer.len();
let script_length = buffer_length - 8;
let minified_css = css::minify(unsafe {
from_utf8_unchecked(
&self.buffer[..script_length],
)
})
.map_err(|error| {
HTMLMinifierError::CSSError(error)
})?;
self.out
.extend_from_slice(minified_css.as_bytes());
self.out.extend_from_slice(
&self.buffer
[script_length..(buffer_length - 1)],
);
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::Pre => {
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b'p' | b'P' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b'r' | b'R' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'e' | b'E' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out
.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::Code => {
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b'c' | b'C' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b'o' | b'O' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'd' | b'D' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b'e' | b'E' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out
.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
Step::Textarea => {
match self.step_counter {
0 => {
match e {
b'<' => self.step_counter = 1,
_ => (),
}
}
1 => {
match e {
b'/' => self.step_counter = 2,
_ => self.step_counter = 0,
}
}
2 => {
match e {
b't' | b'T' => self.step_counter = 3,
_ => self.step_counter = 0,
}
}
3 => {
match e {
b'e' | b'E' => self.step_counter = 4,
_ => self.step_counter = 0,
}
}
4 => {
match e {
b'x' | b'X' => self.step_counter = 5,
_ => self.step_counter = 0,
}
}
5 => {
match e {
b't' | b'T' => self.step_counter = 6,
_ => self.step_counter = 0,
}
}
6 => {
match e {
b'a' | b'A' => self.step_counter = 7,
_ => self.step_counter = 0,
}
}
7 => {
match e {
b'r' | b'R' => self.step_counter = 8,
_ => self.step_counter = 0,
}
}
8 => {
match e {
b'e' | b'E' => self.step_counter = 9,
_ => self.step_counter = 0,
}
}
9 => {
match e {
b'a' | b'A' => self.step_counter = 10,
_ => self.step_counter = 0,
}
}
10 => {
match e {
b'>' => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
_ => {
if is_whitespace(e) {
self.out
.extend_from_slice(&text_bytes[start..p]);
start = p + 1;
self.step = Step::TagEnd;
} else {
self.step_counter = 0;
}
}
}
}
_ => unreachable!(),
}
}
}
}
}
2 => {
match self.step {
Step::Initial => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::InitialRemainOneWhitespace => {
self.last_cj = false;
self.last_space = 0;
}
Step::InitialIgnoreWhitespace => {
if self.last_space > 0 {
self.out.push(b' ');
}
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::StartTagInitial
| Step::EndTagInitial
| Step::StartTag
| Step::EndTag => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::StartTagIn => {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.push(e);
self.attribute_name.push(text_bytes[p + 1]);
self.step = Step::StartTagAttributeName;
}
Step::StartTagAttributeName => {
self.attribute_name.push(e);
self.attribute_name.push(text_bytes[p + 1]);
}
Step::StartTagAttributeNameWaitingValue => {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.push(e);
self.attribute_name.push(text_bytes[p + 1]);
self.step = Step::StartTagAttributeName;
}
Step::StartTagAttributeValueInitial => {
if self.in_attribute_type {
self.buffer.push(e);
self.buffer.push(text_bytes[p + 1]);
}
self.step = Step::StartTagUnquotedAttributeValue;
}
Step::StartTagQuotedAttributeValue => {
self.quoted_value_spacing = false;
self.quoted_value_empty = false;
if self.in_attribute_type {
self.buffer.push(e);
self.buffer.push(text_bytes[p + 1]);
}
}
Step::StartTagUnquotedAttributeValue => {
if self.in_attribute_type {
self.buffer.push(e);
self.buffer.push(text_bytes[p + 1]);
}
}
Step::TagEnd => {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + 2;
}
Step::Doctype => {
self.step_counter = 255;
}
Step::Comment => {
if self.remove_comments {
debug_assert_eq!(start, p);
start = p + 2;
}
self.step_counter = 0;
}
Step::ScriptDefault
| Step::StyleDefault
| Step::Pre
| Step::Code
| Step::Textarea => {
self.step_counter = 0;
}
Step::ScriptJavaScript | Step::StyleCSS => {
debug_assert_eq!(start, p);
start = p + 2;
self.buffer.push(e);
self.buffer.push(text_bytes[p + 1]);
self.step_counter = 0;
}
}
}
_ => {
match self.step {
Step::Initial => {
self.last_cj = is_bytes_cj(&text_bytes[p..(p + width)]);
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::InitialRemainOneWhitespace => {
self.last_cj = is_bytes_cj(&text_bytes[p..(p + width)]);
self.last_space = 0;
}
Step::InitialIgnoreWhitespace => {
let cj = is_bytes_cj(&text_bytes[p..(p + width)]);
if self.last_space > 0
&& (self.last_space != b'\n' || !(cj && self.last_cj))
{
self.out.push(b' ');
}
self.last_cj = cj;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::StartTagInitial
| Step::EndTagInitial
| Step::StartTag
| Step::EndTag => {
self.last_cj = false;
self.last_space = 0;
self.step = Step::InitialRemainOneWhitespace;
}
Step::StartTagIn => {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.extend_from_slice(&text_bytes[p..(p + width)]);
self.step = Step::StartTagAttributeName;
}
Step::StartTagAttributeName => {
self.attribute_name.extend_from_slice(&text_bytes[p..(p + width)]);
}
Step::StartTagAttributeNameWaitingValue => {
self.out.push(b' ');
self.attribute_name.clear();
self.attribute_name.extend_from_slice(&text_bytes[p..(p + width)]);
self.step = Step::StartTagAttributeName;
}
Step::StartTagAttributeValueInitial => {
if self.in_attribute_type {
self.buffer.extend_from_slice(&text_bytes[p..(p + width)]);
}
self.step = Step::StartTagUnquotedAttributeValue;
}
Step::StartTagQuotedAttributeValue => {
self.quoted_value_spacing = false;
self.quoted_value_empty = false;
if self.in_attribute_type {
self.buffer.extend_from_slice(&text_bytes[p..(p + width)]);
}
}
Step::StartTagUnquotedAttributeValue => {
if self.in_attribute_type {
self.buffer.extend_from_slice(&text_bytes[p..(p + width)]);
}
}
Step::TagEnd => {
self.out.extend_from_slice(&text_bytes[start..p]);
start = p + width;
}
Step::Doctype => {
self.step_counter = 255;
}
Step::Comment => {
if self.remove_comments {
debug_assert_eq!(start, p);
start = p + width;
}
self.step_counter = 0;
}
Step::ScriptDefault
| Step::StyleDefault
| Step::Pre
| Step::Code
| Step::Textarea => {
self.step_counter = 0;
}
Step::ScriptJavaScript | Step::StyleCSS => {
debug_assert_eq!(start, p);
start = p + width;
self.buffer.extend_from_slice(&text_bytes[p..(p + width)]);
self.step_counter = 0;
}
}
}
}
p += width;
}
self.out.extend_from_slice(&text_bytes[start..p]);
Ok(())
}
#[inline]
pub fn get_html(&mut self) -> &str {
unsafe { from_utf8_unchecked(self.out.as_slice()) }
}
}
#[inline]
pub fn minify<S: AsRef<str>>(html: S) -> Result<String, HTMLMinifierError> {
let mut minifier = HTMLMinifier::new();
minifier.digest(html.as_ref())?;
Ok(String::from(minifier.get_html()))
}