use super::enums::ProbingState;
use super::charsetprober::{CharsetProber, filter_with_english_letters};
const FREQ_CAT_NUM:usize = 4;
const UDF:u8 = 0; const OTH:u8 = 1; const ASC:u8 = 2; const ASS:u8 = 3; const ACV:u8 = 4; const ACO:u8 = 5; const ASV:u8 = 6; const ASO:u8 = 7; const CLASS_NUM:usize = 8;
#[allow(non_upper_case_globals)]
const Latin1_CharToClass:&[u8] = &[
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, ];
#[allow(non_upper_case_globals)]
const Latin1ClassModel:&[u8] = &[
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 1, 1, 3, 3, 0, 3, 3, 3, 1, 2, 1, 2, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 1, 3, 1, 1, 1, 3, 0, 3, 1, 3, 1, 1, 3, 3, ];
pub struct Latin1Prober {
m_state: ProbingState,
m_last_char_class: u8,
m_freq_counter: [usize;FREQ_CAT_NUM],
}
impl Latin1Prober {
pub fn new() -> Latin1Prober {
Latin1Prober {
m_state: ProbingState::Detecting,
m_last_char_class: OTH,
m_freq_counter: [0;FREQ_CAT_NUM],
}
}
}
impl CharsetProber for Latin1Prober {
fn reset(&mut self) {
self.m_state = ProbingState::Detecting;
self.m_last_char_class = OTH;
self.m_freq_counter = [0;FREQ_CAT_NUM];
}
fn feed(&mut self, byte_str: &[u8]) -> &ProbingState {
let byte_str2 = filter_with_english_letters(byte_str);
for c in byte_str2 {
let char_class = Latin1_CharToClass[c as usize];
let freq = Latin1ClassModel[(self.m_last_char_class as usize)*CLASS_NUM+(char_class as usize)];
if freq == 0 {
self.m_state = ProbingState::NotMe;
break;
}
self.m_freq_counter[freq as usize] += 1;
self.m_last_char_class = char_class;
}
&self.m_state
}
fn get_charset(&self) -> String {
"ISO-8859-1".to_string()
}
fn get_confidence(&self) -> f32 {
if self.m_state == ProbingState::NotMe {
return 0.01;
}
let mut confidence:f32;
let total:usize = self.m_freq_counter.iter().sum();
if total < 1 {
confidence = 0.0;
} else {
confidence = ((self.m_freq_counter[3] as f32) - (self.m_freq_counter[1] as f32) * 20.0) / (total as f32);
}
if confidence < 0.0 {
confidence = 0.0;
}
confidence * 0.73
}
fn get_language(&self) -> String {
"".to_string()
}
fn get_state(&self) -> &ProbingState {
&self.m_state
}
}