Skip to main content

TextProcessor

Trait TextProcessor 

Source
pub trait TextProcessor: Send + Sync {
    // Required methods
    fn preprocess(&self, text: &str) -> String;
    fn postprocess(&self, text: &str) -> String;
    fn detect_language(&self, text: &str) -> Option<String>;
    fn sentence_split(&self, text: &str) -> Vec<String>;
    fn estimate_token_count(&self, text: &str) -> usize;
}
Expand description

Text processing utilities

Required Methods§

Source

fn preprocess(&self, text: &str) -> String

Clean and normalize text for tokenization

Source

fn postprocess(&self, text: &str) -> String

Post-process decoded text

Source

fn detect_language(&self, text: &str) -> Option<String>

Detect language of text (if supported)

Source

fn sentence_split(&self, text: &str) -> Vec<String>

Split text into sentences

Source

fn estimate_token_count(&self, text: &str) -> usize

Count approximate tokens without full tokenization

Implementors§