pub trait TextProcessor: Send + Sync {
// Required methods
fn preprocess(&self, text: &str) -> String;
fn postprocess(&self, text: &str) -> String;
fn detect_language(&self, text: &str) -> Option<String>;
fn sentence_split(&self, text: &str) -> Vec<String>;
fn estimate_token_count(&self, text: &str) -> usize;
}Expand description
Text processing utilities
Required Methods§
Sourcefn preprocess(&self, text: &str) -> String
fn preprocess(&self, text: &str) -> String
Clean and normalize text for tokenization
Sourcefn postprocess(&self, text: &str) -> String
fn postprocess(&self, text: &str) -> String
Post-process decoded text
Sourcefn detect_language(&self, text: &str) -> Option<String>
fn detect_language(&self, text: &str) -> Option<String>
Detect language of text (if supported)
Sourcefn sentence_split(&self, text: &str) -> Vec<String>
fn sentence_split(&self, text: &str) -> Vec<String>
Split text into sentences
Sourcefn estimate_token_count(&self, text: &str) -> usize
fn estimate_token_count(&self, text: &str) -> usize
Count approximate tokens without full tokenization