prettyish_html/
lib.rs

1/*
2a probably good enough HTML prettifier
3Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
4
5This program is free software: you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation, either version 3 of the License, or
8(at your option) any later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program.  If not, see <https://www.gnu.org/licenses/>.
17*/
18//! prettyish-html
19//! ===========
20//!
21//! A "meh, good enough" HTML prettifier. The output can
22//! definitely be improved, but it's fast and lightweight.
23//! It might become better in the future, just depends.
24//!
25//! It assumes some level of well-formedness of the HTML,
26//! though it doesn't do any form of validation either.
27//!
28//! Usage
29//! ```
30//! println!("{}", prettyish_html::prettify("<html><body>...</body></html>"));
31//! ```
32use lazy_static::lazy_static;
33use regex::Regex;
34
35/// Prettify HTML input
36pub fn prettify(input: &str) -> String {
37    lazy_static! {
38        static ref OPEN_TAG: Regex = Regex::new("(?P<tag><[A-z])").unwrap();
39    }
40
41    // First get all tags on their own lines
42    let mut stage1 = input.to_string();
43    stage1 = stage1.replace("<!--", "\n<!--");
44    stage1 = stage1.replace("-->", "-->\n");
45    stage1 = stage1.replace("</", "\n</");
46    stage1 = OPEN_TAG.replace_all(&stage1, "\n$tag").to_string();
47    stage1 = stage1.trim().to_string();
48
49    // Now fix indentation
50    let mut stage2: Vec<String> = vec![];
51    let mut indent = 0;
52    for line in stage1.split('\n') {
53        let mut post_add = 0;
54        if line.starts_with("</") {
55            indent -= 1;
56        } else if line.ends_with("/>") || line.starts_with("<!DOCTYPE") {
57            // Self-closing, nothing
58            // or DOCTYPE, also nothing
59        } else if line.starts_with('<') {
60            post_add += 1;
61        }
62
63        stage2.push(format!("{}{}", "\t".repeat(indent), line));
64        indent += post_add;
65    }
66
67    stage2.join("\n")
68}