Skip to main content

uchardet_git/
lib.rs

1// MIT License
2//
3// Copyright (c) 2026 worksoup <https://github.com/worksoup/>
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#![doc = include_str!("../README.md")]
24
25#[cfg(feature = "auto_encoding_reader")]
26pub mod auto_encoding_reader;
27mod candidates;
28mod detector;
29#[cfg(feature = "encoding")]
30pub mod encoding;
31mod error;
32
33extern crate uchardet_git_sys as sys;
34
35pub use candidates::*;
36pub use detector::*;
37pub use error::*;
38
39#[cfg(feature = "encoding")]
40pub fn detect_encoding(data: impl AsRef<[u8]>) -> Result<&'static encoding_rs::Encoding, Error> {
41    let candidates = CharsetDetector::detect_data(data)?;
42    candidates
43        .best()
44        .ok_or(Error::UnrecognizableCharset)?
45        .encoding_whatwg()
46}
47
48pub fn detect_encoding_name(data: impl AsRef<[u8]>) -> Result<String, Error> {
49    let candidates = CharsetDetector::detect_data(data)?;
50    candidates
51        .best()
52        .ok_or(Error::UnrecognizableCharset)?
53        .encoding_name()
54        .map(ToOwned::to_owned)
55}
56
57#[cfg(test)]
58mod tests {
59    fn assert_detected_encoding(data: &[u8], expected: &str) {
60        let encoding =
61            crate::detect_encoding_name(data).expect("should have at least one candidate");
62        assert_eq!(encoding, expected);
63    }
64
65    #[test]
66    fn test_detect_encoding_ascii() {
67        assert_detected_encoding(b"ascii", "ASCII");
68    }
69
70    #[test]
71    fn test_detect_encoding_utf8() {
72        assert_detected_encoding("©français".as_bytes(), "UTF-8");
73    }
74
75    #[test]
76    fn test_detect_encoding_windows1252() {
77        let data = &[
78            0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73, 0xe9, 0x94,
79        ];
80        assert_detected_encoding(data, "WINDOWS-1252");
81    }
82}