1#![doc = include_str!("../README.md")]
24
25#[cfg(feature = "auto_encoding_reader")]
26pub mod auto_encoding_reader;
27mod candidates;
28mod detector;
29#[cfg(feature = "encoding")]
30pub mod encoding;
31mod error;
32
33extern crate uchardet_git_sys as sys;
34
35pub use candidates::*;
36pub use detector::*;
37pub use error::*;
38
39#[cfg(feature = "encoding")]
40pub fn detect_encoding(data: impl AsRef<[u8]>) -> Result<&'static encoding_rs::Encoding, Error> {
41 let candidates = CharsetDetector::detect_data(data)?;
42 candidates
43 .best()
44 .ok_or(Error::UnrecognizableCharset)?
45 .encoding_whatwg()
46}
47
48pub fn detect_encoding_name(data: impl AsRef<[u8]>) -> Result<String, Error> {
49 let candidates = CharsetDetector::detect_data(data)?;
50 candidates
51 .best()
52 .ok_or(Error::UnrecognizableCharset)?
53 .encoding_name()
54 .map(ToOwned::to_owned)
55}
56
57#[cfg(test)]
58mod tests {
59 fn assert_detected_encoding(data: &[u8], expected: &str) {
60 let encoding =
61 crate::detect_encoding_name(data).expect("should have at least one candidate");
62 assert_eq!(encoding, expected);
63 }
64
65 #[test]
66 fn test_detect_encoding_ascii() {
67 assert_detected_encoding(b"ascii", "ASCII");
68 }
69
70 #[test]
71 fn test_detect_encoding_utf8() {
72 assert_detected_encoding("©français".as_bytes(), "UTF-8");
73 }
74
75 #[test]
76 fn test_detect_encoding_windows1252() {
77 let data = &[
78 0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73, 0xe9, 0x94,
79 ];
80 assert_detected_encoding(data, "WINDOWS-1252");
81 }
82}