subx_cli/core/formats/encoding/charset.rs
1/// 字符集與編碼資訊定義
2#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3pub enum Charset {
4 Utf8,
5 Utf16Le,
6 Utf16Be,
7 Utf32Le,
8 Utf32Be,
9 Gbk,
10 ShiftJis,
11 Iso88591,
12 Windows1252,
13 Big5,
14 Euckr,
15 Unknown,
16}
17
18/// 編碼檢測結果資訊
19#[derive(Debug, Clone)]
20pub struct EncodingInfo {
21 /// 偵測到的字符集
22 pub charset: Charset,
23 /// 檢測信心度 (0.0-1.0)
24 pub confidence: f32,
25 /// 是否檢測到 BOM
26 pub bom_detected: bool,
27 /// 解碼後的樣本文字
28 pub sample_text: String,
29}