use crate::{
TokenDecoderOptions,
TokenEncoderOptions,
TokenType,
Tokenizer,
UnifiedTokenVocab,
alloc::sync::Arc,
};
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct TokenizerOptions {
pub encoder: TokenEncoderOptions,
pub decoder: TokenDecoderOptions,
}
impl TokenizerOptions {
pub fn accelerated_lexers(&self) -> bool {
self.encoder.accelerated_lexers()
}
pub fn set_accelerated_lexers(
&mut self,
accelerated_lexers: bool,
) {
self.encoder.set_accelerated_lexers(accelerated_lexers);
}
pub fn with_accelerated_lexers(
mut self,
accelerated_lexers: bool,
) -> Self {
self.set_accelerated_lexers(accelerated_lexers);
self
}
pub fn parallel(&self) -> bool {
self.encoder.parallel() || self.decoder.parallel()
}
pub fn set_parallel(
&mut self,
parallel: bool,
) {
self.encoder.set_parallel(parallel);
self.decoder.set_parallel(parallel);
}
pub fn with_parallel(
mut self,
parallel: bool,
) -> Self {
self.set_parallel(parallel);
self
}
pub fn is_concurrent(&self) -> bool {
self.concurrent() || self.parallel()
}
pub fn concurrent(&self) -> bool {
self.encoder.concurrent()
}
pub fn set_concurrent(
&mut self,
concurrent: bool,
) {
self.encoder.set_concurrent(concurrent);
}
pub fn with_concurrent(
mut self,
concurrent: bool,
) -> Self {
self.set_concurrent(concurrent);
self
}
pub fn build<T: TokenType>(
&self,
vocab: Arc<UnifiedTokenVocab<T>>,
) -> Arc<Tokenizer<T>> {
Tokenizer::new(
vocab.clone(),
self.encoder.build(vocab.clone()),
self.decoder.build(vocab),
)
.into()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_coherent() {
let options = TokenizerOptions::default()
.with_parallel(false)
.with_concurrent(false);
assert_eq!(options.is_concurrent(), false);
let options = TokenizerOptions::default()
.with_parallel(true)
.with_concurrent(false);
assert_eq!(options.is_concurrent(), true);
let options = TokenizerOptions::default()
.with_parallel(false)
.with_concurrent(true);
assert_eq!(options.is_concurrent(), true);
}
#[test]
fn test_tokenizer_options_defaults() {
let options = TokenizerOptions::default();
assert_eq!(options.encoder, TokenEncoderOptions::default());
assert_eq!(options.decoder, TokenDecoderOptions::default());
assert_eq!(options.accelerated_lexers(), true);
assert_eq!(options.parallel(), false);
let options = options.with_parallel(true).with_accelerated_lexers(false);
assert_eq!(options.accelerated_lexers(), false);
assert_eq!(options.parallel(), true);
assert_eq!(options.encoder.parallel(), true);
assert_eq!(options.decoder.parallel(), true);
}
}