nipper 0.1.9

HTML manipulation with CSS seletors
Documentation
use html5ever::serialize;
use html5ever::serialize::{SerializeOpts, TraversalScope};
use html5ever::QualName;
use html5ever::{parse_document, parse_fragment};
use markup5ever::{local_name, namespace_url, ns};
use nipper::{Document, SerializableNodeRef};
use tendril::SliceExt;
use tendril::StrTendril;
use tendril::TendrilSink;

fn parse_and_serialize(input: StrTendril) -> StrTendril {
    let dom = parse_fragment(
        Document::default(),
        Default::default(),
        QualName::new(None, ns!(html), local_name!("body")),
        vec![],
    )
    .one(input);

    let root = dom.root();
    let inner: SerializableNodeRef = root.first_child().unwrap().into();

    let mut result = vec![];
    serialize(&mut result, &inner, Default::default()).unwrap();
    StrTendril::try_from_byte_slice(&result).unwrap()
}
macro_rules! test_fn {
    ($f:ident, $name:ident, $input:expr, $output:expr) => {
        #[test]
        fn $name() {
            assert_eq!($output, &*$f($input.to_tendril()));
        }
    };

    // Shorthand for $output = $input
    ($f:ident, $name:ident, $input:expr) => {
        test_fn!($f, $name, $input, $input);
    };
}

macro_rules! test {
    ($($t:tt)*) => {
        test_fn!(parse_and_serialize, $($t)*);
    };
}

test!(empty, r#""#);
test!(fuzz, "<a a=\r\n", "");
test!(smoke_test, r#"<p><i>Hello</i>, World!</p>"#);

test!(
    misnest,
    r#"<p><i>Hello!</p>, World!</i>"#,
    r#"<p><i>Hello!</i></p><i>, World!</i>"#
);

test!(attr_literal, r#"<base foo="<'>">"#);
test!(attr_escape_amp, r#"<base foo="&amp;">"#);
test!(
    attr_escape_amp_2,
    r#"<base foo=&amp>"#,
    r#"<base foo="&amp;">"#
);
test!(
    attr_escape_nbsp,
    "<base foo=x\u{a0}y>",
    r#"<base foo="x&nbsp;y">"#
);
test!(
    attr_escape_quot,
    r#"<base foo='"'>"#,
    r#"<base foo="&quot;">"#
);
test!(
    attr_escape_several,
    r#"<span foo=3 title='test "with" &amp;quot;'>"#,
    r#"<span foo="3" title="test &quot;with&quot; &amp;quot;"></span>"#
);

test!(text_literal, r#"<p>"'"</p>"#);
test!(text_escape_amp, r#"<p>&amp;</p>"#);
test!(text_escape_amp_2, r#"<p>&amp</p>"#, r#"<p>&amp;</p>"#);
test!(text_escape_nbsp, "<p>x\u{a0}y</p>", r#"<p>x&nbsp;y</p>"#);
test!(text_escape_lt, r#"<p>&lt;</p>"#);
test!(text_escape_gt, r#"<p>&gt;</p>"#);
test!(text_escape_gt2, r#"<p>></p>"#, r#"<p>&gt;</p>"#);

test!(
    script_literal,
    r#"<script>(x & 1) < 2; y > "foo" + 'bar'</script>"#
);
test!(
    style_literal,
    r#"<style>(x & 1) < 2; y > "foo" + 'bar'</style>"#
);
test!(xmp_literal, r#"<xmp>(x & 1) < 2; y > "foo" + 'bar'</xmp>"#);
test!(
    iframe_literal,
    r#"<iframe>(x & 1) < 2; y > "foo" + 'bar'</iframe>"#
);
test!(
    noembed_literal,
    r#"<noembed>(x & 1) < 2; y > "foo" + 'bar'</noembed>"#
);
test!(
    noframes_literal,
    r#"<noframes>(x & 1) < 2; y > "foo" + 'bar'</noframes>"#
);

test!(pre_lf_0, "<pre>foo bar</pre>");
test!(pre_lf_1, "<pre>\nfoo bar</pre>", "<pre>foo bar</pre>");
test!(pre_lf_2, "<pre>\n\nfoo bar</pre>", "<pre>\nfoo bar</pre>");

test!(textarea_lf_0, "<textarea>foo bar</textarea>");
test!(
    textarea_lf_1,
    "<textarea>\nfoo bar</textarea>",
    "<textarea>foo bar</textarea>"
);
test!(
    textarea_lf_2,
    "<textarea>\n\nfoo bar</textarea>",
    "<textarea>\nfoo bar</textarea>"
);

test!(listing_lf_0, "<listing>foo bar</listing>");
test!(
    listing_lf_1,
    "<listing>\nfoo bar</listing>",
    "<listing>foo bar</listing>"
);
test!(
    listing_lf_2,
    "<listing>\n\nfoo bar</listing>",
    "<listing>\nfoo bar</listing>"
);

test!(comment_1, r#"<p>hi <!--world--></p>"#);
test!(comment_2, r#"<p>hi <!-- world--></p>"#);
test!(comment_3, r#"<p>hi <!--world --></p>"#);
test!(comment_4, r#"<p>hi <!-- world --></p>"#);

// FIXME: test serialization of qualified tag/attribute names that can't be
// parsed from HTML

test!(attr_ns_1, r#"<svg xmlns="bleh"></svg>"#);
test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
test!(attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"#);
test!(attr_ns_4, r#"<svg xlink:href="bleh"></svg>"#);

#[test]
fn doctype() {
    let dom = parse_document(Document::default(), Default::default()).one("<!doctype html>");
    let mut result = vec![];
    let root = dom.root();
    let document: SerializableNodeRef = root.first_child().unwrap().into();
    serialize(
        &mut result,
        &document,
        SerializeOpts {
            scripting_enabled: true,
            traversal_scope: TraversalScope::IncludeNode,
            create_missing_parent: false,
        },
    )
    .unwrap();
    assert_eq!(String::from_utf8(result).unwrap(), "<!DOCTYPE html>");
}