1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/*
a probably good enough HTML prettifier
Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/
//! prettyish-html
//! ===========
//!
//! A "meh, good enough" HTML prettifier. The output can
//! definitely be improved, but it's fast and lightweight.
//! It might become better in the future, just depends.
//!
//! It assumes some level of well-formedness of the HTML,
//! though it doesn't do any form of validation either.
//!
//! Usage
//! ```
//! println!("{}", prettyish_html::prettify("<html><body>...</body></html>"));
//! ```
use lazy_static::lazy_static;
use regex::Regex;

/// Prettify HTML input
pub fn prettify(input: &str) -> String {
    lazy_static! {
        static ref OPEN_TAG: Regex = Regex::new("(?P<tag><[A-z])").unwrap();
    }

    // First get all tags on their own lines
    let mut stage1 = input.to_string();
    stage1 = stage1.replace("<!--", "\n<!--");
    stage1 = stage1.replace("-->", "-->\n");
    stage1 = stage1.replace("</", "\n</");
    stage1 = OPEN_TAG.replace_all(&stage1, "\n$tag").to_string();
    stage1 = stage1.trim().to_string();

    // Now fix indentation
    let mut stage2: Vec<String> = vec![];
    let mut indent = 0;
    for line in stage1.split('\n') {
        let mut post_add = 0;
        if line.starts_with("</") {
            indent -= 1;
        } else if line.ends_with("/>") || line.starts_with("<!DOCTYPE") {
            // Self-closing, nothing
            // or DOCTYPE, also nothing
        } else if line.starts_with('<') {
            post_add += 1;
        }

        stage2.push(format!("{}{}", "\t".repeat(indent), line));
        indent += post_add;
    }

    stage2.join("\n")
}