quick-xml 0.25.0

High performance xml reader and writer
Documentation
use quick_xml::events::Event;
use quick_xml::Reader;

mod decode {
    use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8};
    use pretty_assertions::assert_eq;
    use quick_xml::encoding::*;

    static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16be-bom.xml");
    static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16le-bom.xml");
    static UTF8_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf8-bom.xml");

    static UTF8_TEXT: &str = r#"<?xml version="1.0"?>
<project name="project-name">
</project>
"#;

    #[test]
    fn test_detect_encoding() {
        // No BOM
        assert_eq!(detect_encoding(UTF8_TEXT.as_bytes()), Some((UTF_8, 0)));
        // BOM
        assert_eq!(detect_encoding(UTF8_TEXT_WITH_BOM), Some((UTF_8, 3)));
        assert_eq!(detect_encoding(UTF16BE_TEXT_WITH_BOM), Some((UTF_16BE, 2)));
        assert_eq!(detect_encoding(UTF16LE_TEXT_WITH_BOM), Some((UTF_16LE, 2)));
    }
}

#[test]
fn test_koi8_r_encoding() {
    let src = include_bytes!("documents/opennews_all.rss").as_ref();
    let mut buf = vec![];
    let mut r = Reader::from_reader(src);
    r.trim_text(true).expand_empty_elements(false);
    loop {
        match r.read_event_into(&mut buf) {
            Ok(Event::Text(e)) => {
                e.unescape().unwrap();
            }
            Ok(Event::Eof) => break,
            _ => (),
        }
    }
}

/// Test data generated by helper project `test-gen`, which requires checkout of
/// an `encoding` submodule
mod detect {
    use super::*;
    use encoding_rs::*;
    use pretty_assertions::assert_eq;

    macro_rules! detect_test {
        ($test:ident, $enc:ident, $file:literal $($break:stmt)?) => {
            #[test]
            fn $test() {
                let mut r = Reader::from_reader(
                    include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(),
                );
                assert_eq!(r.decoder().encoding(), UTF_8);

                let mut buf = Vec::new();
                loop {
                    match dbg!(r.read_event_into(&mut buf).unwrap()) {
                        Event::Eof => break,
                        _ => {}
                    }
                    assert_eq!(r.decoder().encoding(), $enc);
                    buf.clear();
                    $($break)?
                }
            }
        };
    }

    // Without BOM
    detect_test!(utf8, UTF_8, "utf8");
    detect_test!(utf16be, UTF_16BE, "utf16be");
    detect_test!(utf16le, UTF_16LE, "utf16le");

    // With BOM
    detect_test!(utf8_bom, UTF_8, "utf8-bom");
    detect_test!(utf16be_bom, UTF_16BE, "utf16be-bom");
    detect_test!(utf16le_bom, UTF_16LE, "utf16le-bom");

    // legacy multi-byte encodings (7)
    detect_test!(big5, BIG5, "Big5");
    detect_test!(euc_jp, EUC_JP, "EUC-JP");
    detect_test!(euc_kr, EUC_KR, "EUC-KR");
    detect_test!(gb18030, GB18030, "gb18030");
    detect_test!(gbk, GBK, "GBK");
    // TODO: XML in this encoding cannot be parsed successfully until #158 resolves
    // We only read the first event to ensure, that encoding detected correctly
    detect_test!(iso_2022_jp, ISO_2022_JP, "ISO-2022-JP" break);
    detect_test!(shift_jis, SHIFT_JIS, "Shift_JIS");

    // legacy single-byte encodings (19)
    detect_test!(ibm866, IBM866, "IBM866");
    detect_test!(iso_8859_2, ISO_8859_2, "ISO-8859-2");
    detect_test!(iso_8859_3, ISO_8859_3, "ISO-8859-3");
    detect_test!(iso_8859_4, ISO_8859_4, "ISO-8859-4");
    detect_test!(iso_8859_5, ISO_8859_5, "ISO-8859-5");
    detect_test!(iso_8859_6, ISO_8859_6, "ISO-8859-6");
    detect_test!(iso_8859_7, ISO_8859_7, "ISO-8859-7");
    detect_test!(iso_8859_8, ISO_8859_8, "ISO-8859-8");
    detect_test!(iso_8859_8_i, ISO_8859_8_I, "ISO-8859-8-I");
    detect_test!(iso_8859_10, ISO_8859_10, "ISO-8859-10");
    detect_test!(iso_8859_13, ISO_8859_13, "ISO-8859-13");
    detect_test!(iso_8859_14, ISO_8859_14, "ISO-8859-14");
    detect_test!(iso_8859_15, ISO_8859_15, "ISO-8859-15");
    detect_test!(iso_8859_16, ISO_8859_16, "ISO-8859-16");
    detect_test!(koi8_r, KOI8_R, "KOI8-R");
    detect_test!(koi8_u, KOI8_U, "KOI8-U");
    detect_test!(macintosh, MACINTOSH, "macintosh");
    detect_test!(windows_874, WINDOWS_874, "windows-874");
    detect_test!(windows_1250, WINDOWS_1250, "windows-1250");
    detect_test!(windows_1251, WINDOWS_1251, "windows-1251");
    detect_test!(windows_1252, WINDOWS_1252, "windows-1252");
    detect_test!(windows_1253, WINDOWS_1253, "windows-1253");
    detect_test!(windows_1254, WINDOWS_1254, "windows-1254");
    detect_test!(windows_1255, WINDOWS_1255, "windows-1255");
    detect_test!(windows_1256, WINDOWS_1256, "windows-1256");
    detect_test!(windows_1257, WINDOWS_1257, "windows-1257");
    detect_test!(windows_1258, WINDOWS_1258, "windows-1258");
    detect_test!(x_mac_cyrillic, X_MAC_CYRILLIC, "x-mac-cyrillic");
    detect_test!(x_user_defined, X_USER_DEFINED, "x-user-defined");
}