tokenizers/pre_tokenizers/
delimiter.rs1use serde::{Deserialize, Serialize};
2
3use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
4use crate::utils::macro_rules_attribute;
5
6#[derive(Copy, Clone, Debug, PartialEq, Eq)]
7#[non_exhaustive]
8#[macro_rules_attribute(impl_serde_type!)]
9pub struct CharDelimiterSplit {
10 pub delimiter: char,
11}
12
13impl CharDelimiterSplit {
14 pub fn new(delimiter: char) -> Self {
15 Self { delimiter }
16 }
17}
18
19impl PreTokenizer for CharDelimiterSplit {
20 fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> Result<()> {
21 pretokenized.split(|_, normalized| {
23 normalized.split(self.delimiter, SplitDelimiterBehavior::Removed)
24 })
25 }
26}