minifier 0.0.43

Minifier tool/lib for JS/CSS/JSON files
Documentation
// Take a look at the license at the top of the repository in the LICENSE file.

use regex::{Captures, Regex};

fn condense_whitespace(source: &str) -> String {
    let lower_source = source.to_lowercase();
    if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() {
        // maybe should be better not to recreate Regex every time?
        let re = Regex::new(r">\s+<").unwrap();
        let source = re.replace_all(source, "> <").into_owned();
        let re = Regex::new(r"\s{2,}|[\r\n]").unwrap();
        re.replace_all(&source, " ").into_owned()
    } else {
        source.trim().to_owned()
    }
}

fn condense(source: &str) -> String {
    let re = Regex::new(r"<(style|script)[\w|\s].*?>").unwrap();
    let type_reg = Regex::new(r#"\s*?type="[\w|\s].*?""#).unwrap();
    re.replace_all(source, |caps: &Captures| {
        type_reg.replace_all(&caps[0], "").into_owned()
    })
    .into_owned()
}

fn clean_unneeded_tags(source: &str) -> String {
    let useless_tags = [
        "</area>",
        "</base>",
        "<body>",
        "</body>",
        "</br>",
        "</col>",
        "</colgroup>",
        "</dd>",
        "</dt>",
        "<head>",
        "</head>",
        "</hr>",
        "<html>",
        "</html>",
        "</img>",
        "</input>",
        "</li>",
        "</link>",
        "</meta>",
        "</option>",
        "</param>",
        "<tbody>",
        "</tbody>",
        "</td>",
        "</tfoot>",
        "</th>",
        "</thead>",
        "</tr>",
        "</basefont>",
        "</isindex>",
        "</param>",
    ];
    let mut res = source.to_owned();
    for useless_tag in &useless_tags {
        res = res.replace(useless_tag, "");
    }
    res
}

fn remove_comments(source: &str) -> String {
    // "build" and "endbuild" should be matched case insensitively.
    let re = Regex::new("<!--(.|\n)*?-->").unwrap();
    re.replace_all(source, |caps: &Captures| {
        if caps[0].replace("<!--", " ").trim().starts_with("[") {
            caps[0].to_owned()
        } else {
            " ".to_owned()
        }
    })
    .into_owned()
}

fn unquote_attributes(source: &str) -> String {
    // Some attributes like width, height, etc... don't need quotes.
    let any_tag = Regex::new(r"<\w.*?>").unwrap();
    let extra_spaces = Regex::new(r" \s+|\s +").unwrap();
    let between_words = Regex::new(r"\w\s+\w").unwrap();
    let spaces_before_close = Regex::new(r##""\s+>"##).unwrap();
    let spaces_before_close2 = Regex::new(r"'\s+>").unwrap();
    let extra_spaces2 = Regex::new(r##""\s\s+\w+="|'\s\s+\w+='|"\s\s+\w+=|'\s\s+\w+="##).unwrap();
    let extra_spaces3 = Regex::new(r"\d\s+>").unwrap();
    let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap();

    any_tag
        .replace_all(source, |caps: &Captures| {
            let cap = format!("{}", &caps[0]);
            if cap.starts_with("<!") || cap.find("</").is_some() {
                cap
            } else {
                let tag = spaces_before_close.replace_all(&cap, "\">").into_owned();
                let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned();
                let tag_c = tag.clone();

                let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect();
                let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect();
                let mut pos = 0;
                loop {
                    let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) {
                        (Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()),
                        (None, Some(b)) => format!("{}", b.as_str()),
                        (Some(a), None) => format!("{}", a.as_str()),
                        _ => break,
                    };
                    pos += 1;
                    tag = tag.replace(
                        &replacement,
                        &extra_spaces.replace_all(&replacement, " ").into_owned(),
                    );
                }
                let mut output = tag.clone();
                for caps in extra_spaces2.find_iter(&tag) {
                    let c = caps.as_str().chars().next().unwrap_or('\0');
                    output = output.replace(
                        caps.as_str(),
                        &format!(
                            "{} {}",
                            if c == '\0' {
                                String::new()
                            } else {
                                format!("{}", c)
                            },
                            caps.as_str()[1..].trim_start()
                        ),
                    );
                }
                tag = quotes_in_tag
                    .replace_all(&output, |caps: &Captures| match &caps[1] {
                        "width" | "height" => format!("{}={}", &caps[1], &caps[2]),
                        x => format!("{}=\"{}\"", x, &caps[2]),
                    })
                    .into_owned();
                if cap != tag {
                    tag
                } else {
                    cap
                }
            }
        })
        .trim()
        .to_owned()
}

/// Returns a minified version of the provided HTML source.
pub fn minify(source: &str) -> String {
    let source = remove_comments(source);
    let source = condense(&source);
    let source = clean_unneeded_tags(&source);
    let source = condense_whitespace(&source);
    unquote_attributes(&source).trim().to_owned()
}

#[test]
fn html_minify_test() {
    let source = r##"<head>
    <title>Some huge title</title>
    <link rel="stylesheet" type="text/css"   href="something.css"   >
    <style type="text/css">
        .some_class {
            color: red;
        }
    </style>
</head>
<body>
    <header>
        <div>
            <i>    <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b>    </i>
            <h1    style="width:100%;text-align:center;"   >Big header</h1>
        </div>
    <!-- commeeeeeeeents !!! -->
    </header>
    <div id="some_id">
        <!-- another comment
        on
multi
lines -->
        <div id="another_id" class="another_class" width="100">
            <h2>A little sub title</h2>
            <ul>
                <li>A list!</li>
                <li>Who doesn't like lists?</li>
                <li height="12" class="fooool">Well, who cares...</li>
            </ul>
        </div>
    </div>
    <script type="text/javascript"    >
        console.log("foo");
    </script>
    <style type="text/css" src="../foo.css">
    <script src="../foo.js">
</body>
"##;

    let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \
                           type=\"text/css\" href=\"something.css\"> <style> .some_class \
                           { color: red; } </style> <header> <div> <i> <b><a \
                           href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \
                           <h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \
                           </header> <div id=\"some_id\"> <div id=\"another_id\" \
                           class=\"another_class\" width=100> <h2>A little sub \
                           title</h2> <ul> <li>A list! <li>Who doesn't like lists? \
                           <li height=12 class=\"fooool\">Well, who cares... </ul> </div> \
                           </div> <script > console.log(\"foo\"); </script> <style \
                           src=\"../foo.css\"> <script src=\"../foo.js\">";
    assert_eq!(minify(source), expected_result);
}

#[test]
fn html_keep_important_comments() {
    let source = r#"
<div>
    <!-- normal comment -->
    <div>content</div>
    <!--[if lte IE 8]>
    <div class="warning">This old browser is unsupported and will most likely display funky things.
    </div>
    <![endif]-->
</div>
"#;

    let expected_result =
        "<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \
                           old browser is unsupported and will most likely display funky things. \
                           </div> <![endif]--> </div>";
    assert_eq!(minify(source), expected_result);
}