use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenizationConfig {
pub tokenizer_type: String, pub vocab_path: Option<PathBuf>,
pub max_length: Option<usize>,
pub padding: bool,
pub truncation: bool,
pub lowercase: bool,
}
impl Default for TokenizationConfig {
fn default() -> Self {
Self {
tokenizer_type: "whitespace".to_string(),
vocab_path: None,
max_length: Some(512),
padding: true,
truncation: true,
lowercase: true,
}
}
}