use quick_xml::events::Event;
use quick_xml::Reader;
mod decode {
use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8};
use pretty_assertions::assert_eq;
use quick_xml::encoding::*;
static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16be-bom.xml");
static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16le-bom.xml");
static UTF8_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf8-bom.xml");
static UTF8_TEXT: &str = r#"<?xml version="1.0"?>
<project name="project-name">
</project>
"#;
#[test]
fn test_detect_encoding() {
assert_eq!(detect_encoding(UTF8_TEXT.as_bytes()), Some((UTF_8, 0)));
assert_eq!(detect_encoding(UTF8_TEXT_WITH_BOM), Some((UTF_8, 3)));
assert_eq!(detect_encoding(UTF16BE_TEXT_WITH_BOM), Some((UTF_16BE, 2)));
assert_eq!(detect_encoding(UTF16LE_TEXT_WITH_BOM), Some((UTF_16LE, 2)));
}
}
#[test]
fn test_koi8_r_encoding() {
let src = include_bytes!("documents/opennews_all.rss").as_ref();
let mut buf = vec![];
let mut r = Reader::from_reader(src);
r.trim_text(true).expand_empty_elements(false);
loop {
match r.read_event_into(&mut buf) {
Ok(Event::Text(e)) => {
e.unescape().unwrap();
}
Ok(Event::Eof) => break,
_ => (),
}
}
}
mod detect {
use super::*;
use encoding_rs::*;
use pretty_assertions::assert_eq;
macro_rules! detect_test {
($test:ident, $enc:ident, $file:literal $($break:stmt)?) => {
#[test]
fn $test() {
let mut r = Reader::from_reader(
include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(),
);
assert_eq!(r.decoder().encoding(), UTF_8);
let mut buf = Vec::new();
loop {
match dbg!(r.read_event_into(&mut buf).unwrap()) {
Event::Eof => break,
_ => {}
}
assert_eq!(r.decoder().encoding(), $enc);
buf.clear();
$($break)?
}
}
};
}
detect_test!(utf8, UTF_8, "utf8");
detect_test!(utf16be, UTF_16BE, "utf16be");
detect_test!(utf16le, UTF_16LE, "utf16le");
detect_test!(utf8_bom, UTF_8, "utf8-bom");
detect_test!(utf16be_bom, UTF_16BE, "utf16be-bom");
detect_test!(utf16le_bom, UTF_16LE, "utf16le-bom");
detect_test!(big5, BIG5, "Big5");
detect_test!(euc_jp, EUC_JP, "EUC-JP");
detect_test!(euc_kr, EUC_KR, "EUC-KR");
detect_test!(gb18030, GB18030, "gb18030");
detect_test!(gbk, GBK, "GBK");
detect_test!(iso_2022_jp, ISO_2022_JP, "ISO-2022-JP" break);
detect_test!(shift_jis, SHIFT_JIS, "Shift_JIS");
detect_test!(ibm866, IBM866, "IBM866");
detect_test!(iso_8859_2, ISO_8859_2, "ISO-8859-2");
detect_test!(iso_8859_3, ISO_8859_3, "ISO-8859-3");
detect_test!(iso_8859_4, ISO_8859_4, "ISO-8859-4");
detect_test!(iso_8859_5, ISO_8859_5, "ISO-8859-5");
detect_test!(iso_8859_6, ISO_8859_6, "ISO-8859-6");
detect_test!(iso_8859_7, ISO_8859_7, "ISO-8859-7");
detect_test!(iso_8859_8, ISO_8859_8, "ISO-8859-8");
detect_test!(iso_8859_8_i, ISO_8859_8_I, "ISO-8859-8-I");
detect_test!(iso_8859_10, ISO_8859_10, "ISO-8859-10");
detect_test!(iso_8859_13, ISO_8859_13, "ISO-8859-13");
detect_test!(iso_8859_14, ISO_8859_14, "ISO-8859-14");
detect_test!(iso_8859_15, ISO_8859_15, "ISO-8859-15");
detect_test!(iso_8859_16, ISO_8859_16, "ISO-8859-16");
detect_test!(koi8_r, KOI8_R, "KOI8-R");
detect_test!(koi8_u, KOI8_U, "KOI8-U");
detect_test!(macintosh, MACINTOSH, "macintosh");
detect_test!(windows_874, WINDOWS_874, "windows-874");
detect_test!(windows_1250, WINDOWS_1250, "windows-1250");
detect_test!(windows_1251, WINDOWS_1251, "windows-1251");
detect_test!(windows_1252, WINDOWS_1252, "windows-1252");
detect_test!(windows_1253, WINDOWS_1253, "windows-1253");
detect_test!(windows_1254, WINDOWS_1254, "windows-1254");
detect_test!(windows_1255, WINDOWS_1255, "windows-1255");
detect_test!(windows_1256, WINDOWS_1256, "windows-1256");
detect_test!(windows_1257, WINDOWS_1257, "windows-1257");
detect_test!(windows_1258, WINDOWS_1258, "windows-1258");
detect_test!(x_mac_cyrillic, X_MAC_CYRILLIC, "x-mac-cyrillic");
detect_test!(x_user_defined, X_USER_DEFINED, "x-user-defined");
}