Skip to main content

uchardet_git/
lib.rs

1// MIT License
2//
3// Copyright (c) 2026 worksoup <https://github.com/worksoup/>
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#[cfg(feature = "auto_encoding_reader")]
24pub mod auto_encoding_reader;
25mod candidates;
26mod detector;
27#[cfg(feature = "encoding")]
28pub mod encoding;
29mod error;
30
31extern crate uchardet_git_sys as sys;
32
33pub use candidates::*;
34pub use detector::*;
35pub use error::*;
36
37#[cfg(feature = "encoding")]
38pub fn detect_encoding(data: impl AsRef<[u8]>) -> Result<&'static encoding_rs::Encoding, Error> {
39    let candidates = CharsetDetector::detect_data(data)?;
40    candidates
41        .best()
42        .ok_or(Error::UnrecognizableCharset)?
43        .encoding()
44}
45
46pub fn detect_encoding_name(data: impl AsRef<[u8]>) -> Result<String, Error> {
47    let candidates = CharsetDetector::detect_data(data)?;
48    candidates
49        .best()
50        .ok_or(Error::UnrecognizableCharset)?
51        .encoding_name()
52        .map(ToOwned::to_owned)
53}
54
55#[cfg(test)]
56mod tests {
57    fn assert_detected_encoding(data: &[u8], expected: &str) {
58        let encoding =
59            crate::detect_encoding_name(data).expect("should have at least one candidate");
60        assert_eq!(encoding, expected);
61    }
62
63    #[test]
64    fn test_detect_encoding_ascii() {
65        assert_detected_encoding(b"ascii", "ASCII");
66    }
67
68    #[test]
69    fn test_detect_encoding_utf8() {
70        assert_detected_encoding("©français".as_bytes(), "UTF-8");
71    }
72
73    #[test]
74    fn test_detect_encoding_windows1252() {
75        let data = &[
76            0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73, 0xe9, 0x94,
77        ];
78        assert_detected_encoding(data, "WINDOWS-1252");
79    }
80}