1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
//mod relaxng;
//mod xml_id;
/*
use encoding_rs::UTF_8;
use encoding_rs::UTF_16BE;
use encoding_rs::UTF_16LE;
use encoding_rs::WINDOWS_1252;
use encoding_rs_io::DecodeReaderBytesBuilder;
use std::fs;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use xrust::{Error, ErrorKind};
fn dtdfileresolve() -> fn(Option<String>, String) -> Result<String, Error> {
move |locdir, uri| {
let u = match locdir {
None => uri,
Some(ld) => ld + uri.as_str(),
};
match fs::read_to_string(u) {
Err(_) => Err(Error::new(
ErrorKind::Unknown,
"Unable to read external DTD".to_string(),
)),
Ok(s) => Ok(s),
}
}
}
fn non_utf8_file_reader(filedir: &str) -> String {
/*
xRust itself will most likely be UTF-8 only, but there are UTF-16 files in the conformance
suite. I could change them, but best leave as-is in case we do try to support later.
*/
let mut file_in = File::open(filedir).unwrap();
let mut buffer = [0; 4];
// read exactly 4 bytes
let _ = file_in.read_exact(&mut buffer);
let _ = file_in.seek(SeekFrom::Start(0));
let enc = match buffer {
//[0, 0, 254, 255] => {} //UCS-4, big-endian machine (1234 order)
//[255, 254, 0, 0] => {} //UCS-4, little-endian machine (4321 order)
//[0, 0, 255, 254] => {} //UCS-4, unusual octet order (2143)
//[254, 255, 0, 0] => {} //UCS-4, unusual octet order (3412)
[254, 255, _, _] => Some(UTF_16BE), //UTF-16, big-endian
[255, 254, _, _] => Some(UTF_16LE), //UTF-16, little-endian
[239, 187, 191, _] => Some(UTF_8), //UTF-8
[60, 63, 120, 109] => Some(WINDOWS_1252), //UTF-8
_ => Some(UTF_8), //Other
};
let mut decoded_stream = DecodeReaderBytesBuilder::new().encoding(enc).build(file_in);
let mut dest = String::new();
let _ = decoded_stream.read_to_string(&mut dest);
dest
}
*/