1#![warn(missing_docs)]
2#![deny(warnings, clippy::pedantic, clippy::nursery)]
3
4use std::{cell::RefCell, io, rc::Rc, str};
7
8use html5ever::{
9 parse_document,
10 tendril::{fmt::UTF8, Tendril, TendrilSink},
11 Attribute, ParseOpts, QualName,
12};
13use markup5ever_rcdom::{Node, NodeData, RcDom};
14
15pub trait Minify {
17 fn minify(&self) -> Result<Vec<u8>, io::Error>;
24}
25
26#[inline]
34pub fn minify<R: io::Read, W: io::Write>(mut r: &mut R, w: &mut W) -> io::Result<()> {
35 Minifier::new(w).minify(&mut r)
36}
37
38impl<T> Minify for T
39where
40 T: AsRef<[u8]>,
41{
42 #[inline]
43 fn minify(&self) -> Result<Vec<u8>, io::Error> {
44 let mut minified = vec![];
45
46 minify(&mut self.as_ref(), &mut minified)?;
47
48 Ok(minified)
49 }
50}
51
52#[allow(clippy::struct_excessive_bools)]
54pub struct Minifier<'a, W: io::Write> {
55 w: &'a mut W,
56 omit_doctype: bool,
57 collapse_whitespace: bool,
58 preserve_comments: bool,
59 preceding_whitespace: bool,
60}
61
62struct Context<'a> {
64 parent: &'a Node,
65 parent_context: Option<&'a Context<'a>>,
66 left: Option<&'a [Rc<Node>]>,
67 right: Option<&'a [Rc<Node>]>,
68}
69
70impl<'a> Context<'a> {
71 fn trim(&self, preceding_whitespace: bool) -> (bool, bool) {
74 (preceding_whitespace || self.trim_left(), self.trim_right())
75 }
76
77 fn trim_left(&self) -> bool {
78 self.left.map_or_else(
79 || is_block_element(self.parent) || self.parent_trim_left(),
80 |siblings| {
81 siblings
82 .iter()
83 .rev()
84 .find_map(Self::is_block_element)
85 .unwrap_or_else(|| self.parent_trim_left())
86 },
87 )
88 }
89
90 fn parent_trim_left(&self) -> bool {
91 self.parent_context.map_or(true, Context::trim_left)
92 }
93
94 fn trim_right(&self) -> bool {
95 self.right.map_or(true, |siblings| {
96 siblings
97 .iter()
98 .find_map(Self::is_block_element)
99 .unwrap_or(true)
100 })
101 }
102
103 fn next_element(&self) -> Option<&Rc<Node>> {
104 self.right.and_then(|siblings| {
105 siblings
106 .iter()
107 .find(|node| matches!(node.data, NodeData::Element { .. }))
108 })
109 }
110
111 fn is_block_element(node: &Rc<Node>) -> Option<bool> {
112 if let NodeData::Element { name, .. } = &node.data {
113 Some(is_block_element_name(name.local.as_ref()))
114 } else {
115 None
116 }
117 }
118}
119
120impl<'a, W> Minifier<'a, W>
121where
122 W: io::Write,
123{
124 #[inline]
126 pub fn new(w: &'a mut W) -> Self {
127 Self {
128 w,
129 omit_doctype: false,
130 collapse_whitespace: true,
131 preserve_comments: false,
132 preceding_whitespace: false,
133 }
134 }
135
136 #[inline]
139 pub fn collapse_whitespace(&mut self, collapse: bool) -> &mut Self {
140 self.collapse_whitespace = collapse;
141 self
142 }
143
144 #[inline]
147 pub fn omit_doctype(&mut self, omit: bool) -> &mut Self {
148 self.omit_doctype = omit;
149 self
150 }
151
152 #[inline]
155 pub fn preserve_comments(&mut self, preserve: bool) -> &mut Self {
156 self.preserve_comments = preserve;
157 self
158 }
159
160 #[inline]
166 pub fn minify<R: io::Read>(&mut self, mut r: &mut R) -> io::Result<()> {
167 let dom = parse_document(RcDom::default(), ParseOpts::default())
168 .from_utf8()
169 .read_from(&mut r)?;
170
171 if !self.omit_doctype {
172 self.w.write_all(b"<!doctype html>")?;
173 }
174
175 self.minify_node(&None, &dom.document)
176 }
177
178 fn minify_node<'b>(&mut self, ctx: &'b Option<Context>, node: &'b Node) -> io::Result<()> {
179 match &node.data {
180 NodeData::Text { contents } => {
181 let contents = contents.borrow();
183 let contents = contents.as_ref();
184
185 if !self.collapse_whitespace {
186 return self.w.write_all(contents.as_bytes());
187 }
188
189 let (skip_collapse_whitespace, contains_code) =
191 ctx.as_ref().map_or((false, false), |ctx| {
192 if let NodeData::Element { name, .. } = &ctx.parent.data {
193 let name = name.local.as_ref();
194
195 (preserve_whitespace(name), contains_code(name))
196 } else {
197 (false, false)
198 }
199 });
200
201 if skip_collapse_whitespace {
202 return self.w.write_all(contents.as_bytes());
203 }
204
205 if contains_code {
206 return self
207 .w
208 .write_all(contents.trim_matches(is_ascii_whitespace).as_bytes());
209 }
210
211 if contents.is_empty() {
213 return io::Result::Ok(());
214 }
215
216 let (trim_left, trim_right) = ctx
217 .as_ref()
218 .map_or((true, true), |ctx| ctx.trim(self.preceding_whitespace));
219 let contents = match (trim_left, trim_right) {
220 (true, true) => contents.trim_matches(is_ascii_whitespace),
221 (true, false) => contents.trim_start_matches(is_ascii_whitespace),
222 (false, true) => contents.trim_end_matches(is_ascii_whitespace),
223 _ => contents,
224 };
225
226 if !contents.is_empty() {
228 self.write_collapse_whitespace(contents.as_bytes(), reserved_entity, None)?;
229
230 self.preceding_whitespace = !trim_right
231 && contents
232 .as_bytes()
233 .iter()
234 .last()
235 .map_or(false, u8::is_ascii_whitespace);
236 }
237
238 Ok(())
239 }
240
241 NodeData::Comment { contents } if self.preserve_comments => {
242 self.w.write_all(b"<!--")?;
243 self.w.write_all(contents.as_bytes())?;
244 self.w.write_all(b"-->")
245 }
246
247 NodeData::Document => self.minify_children(ctx, node),
248
249 NodeData::Element { name, attrs, .. } => {
250 let attrs = attrs.borrow();
251 let tag = name.local.as_ref();
252
253 if is_self_closing(tag) {
254 return self.write_start_tag(name, &attrs);
255 }
256
257 let (omit_start_tag, omit_end_tag) =
258 self.omit_tags(ctx, node, tag, attrs.is_empty());
259
260 if !omit_start_tag {
261 self.write_start_tag(name, &attrs)?;
262 }
263
264 self.minify_children(ctx, node)?;
265
266 if !omit_end_tag {
267 self.write_end_tag(name)?;
268 }
269
270 Ok(())
271 }
272
273 _ => Ok(()),
274 }
275 }
276
277 fn next_is_comment<'b, I>(&self, v: I) -> bool
278 where
279 I: IntoIterator<Item = &'b Rc<Node>>,
280 {
281 v.into_iter()
282 .find_map(|node| match &node.data {
283 NodeData::Text { contents } => {
284 if self.collapse_whitespace && is_whitespace(contents) {
285 None
287 } else {
288 Some(false)
289 }
290 }
291 NodeData::Comment { .. } => Some(self.preserve_comments),
292 _ => Some(false),
293 })
294 .unwrap_or(false)
295 }
296
297 fn is_whitespace(&self, s: &RefCell<Tendril<UTF8>>) -> Option<bool> {
298 if self.collapse_whitespace && is_whitespace(s) {
299 None
300 } else {
301 Some(
302 !s.borrow()
303 .as_bytes()
304 .iter()
305 .next()
306 .map_or(false, u8::is_ascii_whitespace),
307 )
308 }
309 }
310
311 #[allow(clippy::too_many_lines)]
314 fn omit_tags(
315 &self,
316 ctx: &Option<Context>,
317 node: &Node,
318 name: &str,
319 empty_attributes: bool,
320 ) -> (bool, bool) {
321 ctx.as_ref().map_or((false, false), |ctx| match name {
322 "html" => {
323 let omit_end = ctx.right.map_or(true, |right| !self.next_is_comment(right));
325 let omit_start =
327 empty_attributes && omit_end && !self.next_is_comment(&*node.children.borrow());
328
329 (omit_start, omit_end)
330 }
331 "head" => {
332 let omit_end = ctx.right.map_or(true, |right| {
334 right
335 .iter()
336 .find_map(|node| match &node.data {
337 NodeData::Text { contents } => self.is_whitespace(contents),
338 NodeData::Comment { .. } => {
339 if self.preserve_comments {
340 Some(false)
341 } else {
342 None
343 }
344 }
345 _ => Some(true),
346 })
347 .unwrap_or(true)
348 });
349 let omit_start = empty_attributes
351 && omit_end
352 && node
353 .children
354 .borrow()
355 .iter()
356 .find_map(|node| match &node.data {
357 NodeData::Text { contents } => self.is_whitespace(contents),
358 NodeData::Element { .. } => Some(true),
359 NodeData::Comment { .. } => {
360 if self.preserve_comments {
361 Some(false)
362 } else {
363 None
364 }
365 }
366 _ => Some(false),
367 })
368 .unwrap_or(true);
369
370 (omit_start, omit_end)
371 }
372 "body" => {
373 let omit_start = empty_attributes
375 && node
376 .children
377 .borrow()
378 .iter()
379 .find_map(|node| match &node.data {
380 NodeData::Text { contents } => self.is_whitespace(contents),
381 NodeData::Element { name, .. } => {
382 Some(!matches!(name.local.as_ref(), "script" | "style"))
383 }
384 NodeData::Comment { .. } => {
385 if self.preserve_comments {
386 Some(false)
387 } else {
388 None
389 }
390 }
391 _ => Some(true),
392 })
393 .unwrap_or(true);
394 let omit_end = ctx.right.map_or(true, |right| !self.next_is_comment(right));
396
397 (omit_start && omit_end, omit_end)
398 }
399 "p" => {
400 let omit_end = ctx.next_element().map_or(true, |node| {
401 if let NodeData::Element { name, .. } = &node.data {
402 matches!(
403 name.local.as_ref().to_ascii_lowercase().as_str(),
404 "address"
405 | "article"
406 | "aside"
407 | "blockquote"
408 | "div"
409 | "dl"
410 | "fieldset"
411 | "footer"
412 | "form"
413 | "h1"
414 | "h2"
415 | "h3"
416 | "h4"
417 | "h5"
418 | "h6"
419 | "header"
420 | "hr"
421 | "menu"
422 | "nav"
423 | "ol"
424 | "p"
425 | "pre"
426 | "section"
427 | "table"
428 | "ul"
429 )
430 } else {
431 false
432 }
433 });
434
435 (false, omit_end)
436 }
437 _ => (false, optional_end_tag(name)),
439 })
440 }
441
442 #[allow(clippy::needless_pass_by_value)]
443 fn minify_children(&mut self, ctx: &Option<Context>, node: &Node) -> io::Result<()> {
444 let children = node.children.borrow();
445 let l = children.len();
446
447 children.iter().enumerate().try_for_each(|(i, child)| {
448 if self.preceding_whitespace && is_block_element(child) {
449 self.preceding_whitespace = false;
450 }
451
452 self.minify_node(
453 &Some(Context {
454 parent: node,
455 parent_context: ctx.as_ref(),
456 left: if i > 0 { Some(&children[..i]) } else { None },
457 right: if i + 1 < l {
458 Some(&children[i + 1..])
459 } else {
460 None
461 },
462 }),
463 child,
464 )
465 })
466 }
467
468 fn write_qualified_name(&mut self, name: &QualName) -> io::Result<()> {
469 if let Some(prefix) = &name.prefix {
470 self.w
471 .write_all(prefix.as_ref().to_ascii_lowercase().as_bytes())?;
472 self.w.write_all(b":")?;
473 }
474
475 self.w
476 .write_all(name.local.as_ref().to_ascii_lowercase().as_bytes())
477 }
478
479 fn write_start_tag(&mut self, name: &QualName, attrs: &[Attribute]) -> io::Result<()> {
480 self.w.write_all(b"<")?;
481 self.write_qualified_name(name)?;
482
483 attrs
484 .iter()
485 .try_for_each(|attr| self.write_attribute(attr))?;
486
487 self.w.write_all(b">")
488 }
489
490 fn write_end_tag(&mut self, name: &QualName) -> io::Result<()> {
491 self.w.write_all(b"</")?;
492 self.write_qualified_name(name)?;
493 self.w.write_all(b">")
494 }
495
496 fn write_attribute(&mut self, attr: &Attribute) -> io::Result<()> {
497 self.w.write_all(b" ")?;
498 self.write_qualified_name(&attr.name)?;
499
500 let value = attr.value.as_ref();
501 let value = if self.collapse_whitespace {
502 value.trim_matches(is_ascii_whitespace)
503 } else {
504 value
505 };
506
507 if value.is_empty() {
508 return io::Result::Ok(());
509 }
510
511 self.w.write_all(b"=")?;
512
513 let b = value.as_bytes();
514 let (unquoted, double, _) =
515 b.iter()
516 .fold((true, false, false), |(unquoted, double, single), &c| {
517 let (double, single) = (double || c == b'"', single || c == b'\'');
518 let unquoted =
519 unquoted && !double && !single && c != b'=' && !c.is_ascii_whitespace();
520
521 (unquoted, double, single)
522 });
523
524 if unquoted {
525 self.w.write_all(b)
526 } else if double {
527 self.write_attribute_value(b, b"'", reserved_entity_with_apos)
528 } else {
529 self.write_attribute_value(b, b"\"", reserved_entity)
530 }
531 }
532
533 fn write_attribute_value<T: AsRef<[u8]>>(
534 &mut self,
535 v: T,
536 quote: &[u8],
537 f: EntityFn,
538 ) -> io::Result<()> {
539 self.w.write_all(quote)?;
540
541 let b = v.as_ref();
542
543 if self.collapse_whitespace {
544 self.write_collapse_whitespace(b, f, Some(false))
545 } else {
546 self.w.write_all(b)
547 }?;
548
549 self.w.write_all(quote)
550 }
551
552 fn write_collapse_whitespace(
555 &mut self,
556 b: &[u8],
557 f: EntityFn,
558 preceding_whitespace: Option<bool>,
559 ) -> io::Result<()> {
560 b.iter()
561 .enumerate()
562 .try_fold(
563 (0, preceding_whitespace.unwrap_or(self.preceding_whitespace)),
564 |(pos, preceding_whitespace), (i, &c)| {
565 let is_whitespace = c.is_ascii_whitespace();
566
567 Ok(if is_whitespace && preceding_whitespace {
568 if i != pos {
569 self.write(&b[pos..i], f)?;
570 }
571
572 (i + 1, true)
574 } else {
575 (pos, is_whitespace)
576 })
577 },
578 )
579 .and_then(|(pos, _)| {
580 if pos < b.len() {
581 self.write(&b[pos..], f)?;
582 }
583
584 Ok(())
585 })
586 }
587
588 fn write(&mut self, b: &[u8], f: EntityFn) -> io::Result<()> {
589 b.iter()
590 .enumerate()
591 .try_fold(0, |pos, (i, &c)| {
592 Ok(if let Some(entity) = f(c) {
593 self.w.write_all(&b[pos..i])?;
594 self.w.write_all(entity)?;
595
596 i + 1
598 } else {
599 pos
600 })
601 })
602 .and_then(|pos| {
603 if pos < b.len() {
604 self.w.write_all(&b[pos..])?;
605 }
606
607 Ok(())
608 })
609 }
610}
611
612type EntityFn = fn(u8) -> Option<&'static [u8]>;
613
614const fn reserved_entity(v: u8) -> Option<&'static [u8]> {
615 match v {
616 b'<' => Some(b"<"),
617 b'>' => Some(b">"),
618 b'&' => Some(b"&"),
619 _ => None,
620 }
621}
622
623const fn reserved_entity_with_apos(v: u8) -> Option<&'static [u8]> {
624 if v == b'\'' {
625 Some(b"'")
626 } else {
627 reserved_entity(v)
628 }
629}
630
631fn is_whitespace(s: &RefCell<Tendril<UTF8>>) -> bool {
632 s.borrow().as_bytes().iter().all(u8::is_ascii_whitespace)
633}
634
635fn is_block_element_name(name: &str) -> bool {
636 matches!(
637 name,
638 "address"
639 | "article"
640 | "aside"
641 | "blockquote"
642 | "body"
643 | "br"
644 | "details"
645 | "dialog"
646 | "dd"
647 | "div"
648 | "dl"
649 | "dt"
650 | "fieldset"
651 | "figcaption"
652 | "figure"
653 | "footer"
654 | "form"
655 | "h1"
656 | "h2"
657 | "h3"
658 | "h4"
659 | "h5"
660 | "h6"
661 | "head"
662 | "header"
663 | "hgroup"
664 | "hr"
665 | "html"
666 | "li"
667 | "link"
668 | "main"
669 | "meta"
670 | "nav"
671 | "ol"
672 | "option"
673 | "p"
674 | "pre"
675 | "script"
676 | "section"
677 | "source"
678 | "table"
679 | "td"
680 | "th"
681 | "title"
682 | "tr"
683 | "ul"
684 )
685}
686
687fn is_block_element(node: &Node) -> bool {
688 match &node.data {
689 NodeData::Element { ref name, .. } => is_block_element_name(name.local.as_ref()),
690 NodeData::Document => true,
691 _ => false,
692 }
693}
694
695#[allow(clippy::missing_const_for_fn)]
696fn is_ascii_whitespace(c: char) -> bool {
697 c.is_ascii_whitespace()
698}
699
700fn preserve_whitespace(name: &str) -> bool {
701 matches!(name, "pre" | "textarea")
702}
703
704fn contains_code(name: &str) -> bool {
705 matches!(name, "script" | "style")
706}
707
708fn is_self_closing(name: &str) -> bool {
709 matches!(
710 name,
711 "area"
712 | "base"
713 | "br"
714 | "col"
715 | "embed"
716 | "hr"
717 | "img"
718 | "input"
719 | "link"
720 | "meta"
721 | "param"
722 | "source"
723 | "track"
724 | "wbr"
725 | "command"
726 | "keygen"
727 | "menuitem"
728 )
729}
730
731fn optional_end_tag(name: &str) -> bool {
732 matches!(
733 name,
734 "basefont"
735 | "colgroup"
736 | "dd"
737 | "dt"
738 | "frame"
739 | "isindex"
740 | "li"
741 | "option"
742 | "p"
743 | "tbody"
744 | "td"
745 | "tfoot"
746 | "th"
747 | "thead"
748 | "tr"
749 )
750}
751
752#[cfg(test)]
753mod tests {
754 use super::*;
755 use std::{fs, path::PathBuf, str};
756
757 use glob::glob;
758
759 fn for_each_test_file(test: fn(&PathBuf)) {
760 glob("testdata/*.html")
761 .expect("Failed to read glob pattern")
762 .for_each(|path| {
763 let path = path.expect("Failed to get entry");
764
765 if path.is_dir() {
766 return;
767 }
768
769 test(&path);
770 });
771 }
772
773 #[test]
774 fn test_minify() {
775 for_each_test_file(|path| {
776 let html = fs::read_to_string(&path).expect("Failed to read HTML");
777 let path = path.to_string_lossy().to_string();
778 let minified_expected =
779 fs::read_to_string(path + ".minified").expect("Failed to read minified HTML");
780 let minified = html.minify().expect("Failed to minify HTML");
781 let minified = str::from_utf8(&minified).expect("Failed to convert to string");
782
783 assert_eq!(minified_expected, minified);
784 });
785 }
786
787 #[test]
788 fn test_minifier() {
789 for_each_test_file(|path| {
790 let html = fs::read(&path).expect("Failed to read HTML");
791 let path = path.to_string_lossy().to_string();
792 let minified_expected =
793 fs::read(path + ".minified").expect("Failed to read minified HTML");
794 let mut minified = vec![];
795
796 Minifier::new(&mut minified)
797 .minify(&mut html.as_slice())
798 .expect("Failed to minify HTML");
799
800 assert_eq!(minified_expected, minified);
801 });
802 }
803
804 #[test]
805 fn test_write_collapse_whitespace() {
806 for &(input, expected, preceding_whitespace) in &[
807 ("", "", false),
808 (" ", " ", false),
809 (" ", " ", false),
810 (" ", "", true),
811 (" x y ", " x y ", false),
812 (" x y ", "x y ", true),
813 (" x \n \t \n y ", " x y ", false),
814 (" x \n \t \n y ", "x y ", true),
815 ] {
816 let mut w = vec![];
817 let mut minifier = Minifier::new(&mut w);
818 minifier.preceding_whitespace = preceding_whitespace;
819 minifier
820 .write_collapse_whitespace(
821 input.as_bytes(),
822 reserved_entity,
823 Some(preceding_whitespace),
824 )
825 .unwrap();
826
827 let s = str::from_utf8(&w).unwrap();
828
829 assert_eq!(expected, s);
830 }
831 }
832
833 #[test]
834 fn test_omit_tags() {
835 for &(input, expected, collapse_whitespace, preserve_comments) in &[
836 ("<html>", "", true, false),
838 ("<html><!-- -->", "", true, false),
840 ("<html> <!-- --> ", "<html><!-- -->", true, true),
842 ("<html><!-- --></html>", "<html><!-- -->", true, true),
843 (
844 "<html><!-- --></html><!-- -->",
845 "<html><!-- --></html><!-- -->",
846 true,
847 true,
848 ),
849 (
850 "<html> <!-- --> </html> <!-- --> ",
851 "<html><!-- --></html><!-- -->",
852 true,
853 true,
854 ),
855 (
856 "<html> <!-- --> </html> <!-- --> ",
857 "<html><!-- --><body> </html><!-- -->",
859 false,
860 true,
861 ),
862 (
864 "<html> <head> <title>A</title> </head> <body><p> B </p> </body>",
865 "<title>A</title><p>B",
866 true,
867 false,
868 ),
869 (
870 "<html> <head> <title>A</title> </head> <body><p> B </p> </body>",
871 "<head> <title>A</title> </head> <p> B ",
872 false,
873 false,
874 ),
875 (
876 "<html> <head><!-- --> <title>A</title> </head> <body><p> B </p> </body>",
877 "<head><!-- --><title>A</title><p>B",
878 true,
879 true,
880 ),
881 ("<body>", "", true, false),
883 (
884 "<body> <script>let x = 1;</script> ",
885 "<body><script>let x = 1;</script>",
886 true,
887 false,
888 ),
889 (
890 "<body> <style>body{margin:1em}</style>",
891 "<body><style>body{margin:1em}</style>",
892 true,
893 false,
894 ),
895 ("<body> <p>A", "<p>A", true, false),
896 ("<body id=main> <p>A", "<body id=main><p>A", true, false),
897 (
899 " <body> <p>A ",
900 "<body> <p>A ",
901 false,
902 false,
903 ),
904 ("<body><p>A</body>", "<p>A", false, false),
906 ("<body><p>A</body><!-- -->", "<p>A", false, false),
908 (
910 "<body><p>A</body><!-- -->",
911 "<body><p>A</body><!-- -->",
912 false,
913 true,
914 ),
915 ("<p>Some text</p><button></button>", "<p>Some text</p><button></button>", false, false),
917 ] {
918 let mut w = vec![];
919 let mut minifier = Minifier::new(&mut w);
920 minifier
921 .omit_doctype(true)
922 .collapse_whitespace(collapse_whitespace)
923 .preserve_comments(preserve_comments);
924 minifier.minify(&mut input.as_bytes()).unwrap();
925
926 let s = str::from_utf8(&w).unwrap();
927
928 assert_eq!(expected, s);
929 }
930 }
931}