Skip to main content

uchardet_git/
candidates.rs

1// MIT License
2//
3// Copyright (c) 2026 worksoup <https://github.com/worksoup/>
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23use std::ffi::CStr;
24
25use crate::{CharsetDetector, Error};
26
27pub struct Candidates {
28    pub(crate) detector: CharsetDetector,
29    pub(crate) n_candidates: usize,
30}
31
32impl Candidates {
33    pub fn detect(data: impl AsRef<[u8]>) -> Result<Candidates, Error> {
34        CharsetDetector::detect_data(data)
35    }
36
37    pub fn detector(&self) -> &CharsetDetector {
38        &self.detector
39    }
40
41    pub fn reset(mut self) -> CharsetDetector {
42        self.detector.reset();
43        self.detector
44    }
45
46    pub fn len(&self) -> usize {
47        self.n_candidates
48    }
49
50    pub fn is_empty(&self) -> bool {
51        self.n_candidates == 0
52    }
53
54    pub fn get(&self, index: usize) -> Option<Candidate<'_>> {
55        if index < self.n_candidates {
56            Some(Candidate {
57                parent: self,
58                index,
59            })
60        } else {
61            None
62        }
63    }
64
65    pub fn best(&self) -> Option<Candidate<'_>> {
66        self.get(0)
67    }
68
69    pub fn iter(&self) -> Iter<'_> {
70        Iter::new(self)
71    }
72}
73
74pub struct Candidate<'a> {
75    parent: &'a Candidates,
76    index: usize,
77}
78
79impl<'a> Candidate<'a> {
80    #[cfg(feature = "encoding")]
81    pub fn encoding_whatwg(&self) -> Result<&'static encoding_rs::Encoding, Error> {
82        crate::encoding::as_whatwg(self.encoding_name()?).ok_or(Error::NonStandardCharset)
83    }
84
85    pub fn encoding_name(&self) -> Result<&'a str, Error> {
86        let ptr = unsafe { sys::uchardet_get_encoding(self.parent.detector.ptr, self.index) };
87        debug_assert!(!ptr.is_null());
88        unsafe { CStr::from_ptr(ptr) }
89            .to_str()
90            .map_err(|_| Error::InvalidCharset)
91            .and_then(|s| {
92                if s.is_empty() {
93                    Err(Error::UnrecognizableCharset)
94                } else {
95                    Ok(s)
96                }
97            })
98    }
99
100    pub fn confidence(&self) -> f32 {
101        unsafe { sys::uchardet_get_confidence(self.parent.detector.ptr, self.index) }
102    }
103
104    pub fn language(&self) -> Result<Option<&'a str>, Error> {
105        let ptr = unsafe { sys::uchardet_get_language(self.parent.detector.ptr, self.index) };
106        if ptr.is_null() {
107            return Ok(None);
108        }
109        let s = unsafe { CStr::from_ptr(ptr) }.to_str()?;
110        Ok(if s.is_empty() { None } else { Some(s) })
111    }
112}
113
114pub struct Iter<'a> {
115    parent: &'a Candidates,
116    start: usize,
117    end: usize,
118}
119
120impl<'a> Iter<'a> {
121    fn new(parent: &'a Candidates) -> Self {
122        Iter {
123            parent,
124            start: 0,
125            end: parent.n_candidates,
126        }
127    }
128}
129
130impl<'a> Iterator for Iter<'a> {
131    type Item = Candidate<'a>;
132
133    fn next(&mut self) -> Option<Self::Item> {
134        if self.start < self.end {
135            let item = Candidate {
136                parent: self.parent,
137                index: self.start,
138            };
139            self.start += 1;
140            Some(item)
141        } else {
142            None
143        }
144    }
145
146    fn size_hint(&self) -> (usize, Option<usize>) {
147        let len = self.end - self.start;
148        (len, Some(len))
149    }
150}
151
152impl<'a> DoubleEndedIterator for Iter<'a> {
153    fn next_back(&mut self) -> Option<Self::Item> {
154        if self.start < self.end {
155            self.end -= 1;
156            Some(Candidate {
157                parent: self.parent,
158                index: self.end,
159            })
160        } else {
161            None
162        }
163    }
164}
165
166impl<'a> ExactSizeIterator for Iter<'a> {
167    fn len(&self) -> usize {
168        self.end - self.start
169    }
170}
171
172impl<'a> IntoIterator for &'a Candidates {
173    type Item = Candidate<'a>;
174    type IntoIter = Iter<'a>;
175
176    fn into_iter(self) -> Self::IntoIter {
177        self.iter()
178    }
179}