mux_media/types/
char_encoding.rs1use crate::Extension;
2use std::{fs::File, io::Read, path::Path};
3
4#[derive(Clone, Debug, PartialEq)]
6pub enum CharEncoding {
7 Utf8Compatible,
8 NotUtf8Compatible(String),
9 NotRecognized,
10}
11
12impl CharEncoding {
13 pub fn new(file: impl AsRef<Path>) -> CharEncoding {
14 let f = file.as_ref();
15
16 if f.extension().map_or(false, |ext| {
17 Extension::new_and_is_matroska(ext.as_encoded_bytes())
18 }) {
19 return Self::Utf8Compatible;
21 }
22
23 return match detect_chardet(f) {
24 Some(s) if is_utf8_compatible(&s) => Self::Utf8Compatible,
25 Some(s) => Self::NotUtf8Compatible(s),
26 None => Self::NotRecognized,
27 };
28
29 fn detect_chardet(f: &Path) -> Option<String> {
30 const READ_LIMIT: usize = 128 * 1024; const LIM_CONFIDENCE: f32 = 0.8;
32
33 let mut file = File::open(f).ok()?;
34 let mut bytes = [0u8; READ_LIMIT];
35 let bytes_read = file.read(&mut bytes).ok()?;
36
37 match chardet::detect(&bytes[..bytes_read]) {
38 det if det.1 >= LIM_CONFIDENCE => Some(det.0),
39 _ => None,
40 }
41 }
42
43 fn is_utf8_compatible(s: &str) -> bool {
44 let s = s.trim();
45 s.eq_ignore_ascii_case("ascii") || s.eq_ignore_ascii_case("utf-8")
46 }
47 }
48
49 pub(crate) fn get_ffmpeg_sub_charenc(&self) -> Option<&str> {
50 match self {
51 Self::Utf8Compatible => None,
52 Self::NotUtf8Compatible(s) => Some(&s),
53 Self::NotRecognized => None,
54 }
55 }
56}