chardet 0.2.4

rust version of chardet
Documentation
use super::charsetprober::{CharsetProber, filter_international_words};
use super::enums::{ProbingState, SequenceLikelihood, CharacterCategory};

#[derive(Debug)]
pub struct SBStateMachineModel<'a> {
    pub char_to_order_map: &'a [u8],
    pub precedence_matrix: &'a [u8],
    pub typical_positive_ratio: f32,
    pub keep_english_letter: bool,
    pub charset_name: &'a str,
    pub language: &'a str,
}

pub struct SingleByteCharsetProber<'a> {
    c_sample_size: usize,
    c_sb_enough_rel_threshold: usize,
    c_positive_shortcut_threshold: f32,
    c_negative_shortcut_threshold: f32,
    m_state: ProbingState,
    m_model: &'a SBStateMachineModel<'a>,
    m_reversed: bool,
    m_last_order: u8,
    m_seq_counters: [usize; SequenceLikelihood::CATEGORIES],
    m_total_seqs: usize,
    m_total_char: usize,
    m_freq_char: usize,
}

impl<'a> SingleByteCharsetProber<'a> {
    pub fn new(model: &'a SBStateMachineModel, reversed: bool) -> SingleByteCharsetProber<'a> {
        SingleByteCharsetProber {
            c_sample_size: 64,
            c_sb_enough_rel_threshold: 1024,
            c_positive_shortcut_threshold: 0.95,
            c_negative_shortcut_threshold: 0.05,
            m_state: ProbingState::Detecting,
            m_model: model,
            m_reversed: reversed,
            m_last_order: 255,
            m_seq_counters: [0; SequenceLikelihood::CATEGORIES],
            m_total_seqs: 0,
            m_total_char: 0,
            m_freq_char: 0,
        }
    }
}
impl<'a> CharsetProber for SingleByteCharsetProber<'a> {
    fn reset(&mut self) {
        self.m_state = ProbingState::Detecting;
        self.m_last_order = 255;
        self.m_seq_counters = [0; SequenceLikelihood::CATEGORIES];
        self.m_total_seqs = 0;
        self.m_total_char = 0;
        self.m_freq_char = 0;
    }
    fn feed(&mut self, byte_str: &[u8]) -> &ProbingState {
        let byte_str2;
        let byte_str_len;
        if !self.m_model.keep_english_letter {
            byte_str2 = filter_international_words(byte_str);
            byte_str_len = byte_str2.len();
        } else {
            byte_str2 = Vec::new();
            byte_str_len = byte_str.len();
        }
        if byte_str_len <= 0 {
            return &self.m_state;
        }
        let char_to_order_map = self.m_model.char_to_order_map;
        for i in 0..byte_str_len {
            let ch = if self.m_model.keep_english_letter {
                byte_str[i]
            } else {
                byte_str2[i]
            };
            let order = char_to_order_map[ch as usize];
            if order < CharacterCategory::CONTROL {
                self.m_total_char += 1;
            }
            if (order as usize) < self.c_sample_size {
                self.m_freq_char += 1;
                if (self.m_last_order as usize) < self.c_sample_size {
                    self.m_total_seqs += 1;
                    let model;
                    if self.m_reversed {
                        model = self.m_model.precedence_matrix[(order as usize) *
                                                                   self.c_sample_size +
                                                                   (self.m_last_order as usize)];
                    } else {
                        model = self.m_model.precedence_matrix[(self.m_last_order as usize) *
                                                                   self.c_sample_size +
                                                                   (order as usize)];
                    }
                    self.m_seq_counters[model as usize] += 1;
                }
            }
            self.m_last_order = order;
        }
        if self.m_state == ProbingState::Detecting {
            if self.m_total_seqs > self.c_sb_enough_rel_threshold {
                let confidence = self.get_confidence();
                if confidence > self.c_positive_shortcut_threshold {
                    self.m_state = ProbingState::FoundIt;
                } else if confidence < self.c_negative_shortcut_threshold {
                    self.m_state = ProbingState::NotMe;
                }
            }
        }
        &self.m_state
    }
    fn get_charset(&self) -> String {
        self.m_model.charset_name.to_string()
    }
    fn get_confidence(&self) -> f32 {
        let mut r: f32 = 0.01;
        if self.m_total_seqs > 0 {
            r = (self.m_seq_counters[SequenceLikelihood::POSITIVE] as f32) /
                (self.m_total_seqs as f32) / self.m_model.typical_positive_ratio;
            r = r * (self.m_freq_char as f32) / (self.m_total_char as f32);
            if r >= 1.0 {
                r = 0.99;
            }
        }
        r
    }
    fn get_language(&self) -> String {
        self.m_model.language.to_string()
    }
    fn get_state(&self) -> &ProbingState {
        &self.m_state
    }
}