1#[cfg(feature = "auto_encoding_reader")]
24pub mod auto_encoding_reader;
25mod candidates;
26mod detector;
27#[cfg(feature = "encoding")]
28pub mod encoding;
29mod error;
30
31extern crate uchardet_git_sys as sys;
32
33pub use candidates::*;
34pub use detector::*;
35pub use error::*;
36
37#[cfg(feature = "encoding")]
38pub fn detect_encoding(data: impl AsRef<[u8]>) -> Result<&'static encoding_rs::Encoding, Error> {
39 let candidates = CharsetDetector::detect_data(data)?;
40 candidates
41 .best()
42 .ok_or(Error::UnrecognizableCharset)?
43 .encoding()
44}
45
46pub fn detect_encoding_name(data: impl AsRef<[u8]>) -> Result<String, Error> {
47 let candidates = CharsetDetector::detect_data(data)?;
48 candidates
49 .best()
50 .ok_or(Error::UnrecognizableCharset)?
51 .encoding_name()
52 .map(ToOwned::to_owned)
53}
54
55#[cfg(test)]
56mod tests {
57 fn assert_detected_encoding(data: &[u8], expected: &str) {
58 let encoding =
59 crate::detect_encoding_name(data).expect("should have at least one candidate");
60 assert_eq!(encoding, expected);
61 }
62
63 #[test]
64 fn test_detect_encoding_ascii() {
65 assert_detected_encoding(b"ascii", "ASCII");
66 }
67
68 #[test]
69 fn test_detect_encoding_utf8() {
70 assert_detected_encoding("©français".as_bytes(), "UTF-8");
71 }
72
73 #[test]
74 fn test_detect_encoding_windows1252() {
75 let data = &[
76 0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73, 0xe9, 0x94,
77 ];
78 assert_detected_encoding(data, "WINDOWS-1252");
79 }
80}