1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
use super::Tokenizer;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct AlphabetTokenizer {}
impl Tokenizer for AlphabetTokenizer {
fn load() -> Self {
AlphabetTokenizer{}
}
fn tokenize(&self, string: &str) -> Vec<String> {
let tokens: Vec<String> = string.split("").map(|f| {f.to_string()}).collect();
tokens[1..tokens.len()-1].to_vec()
}
fn batch_tokenize(&self, strings: Vec<String>) -> Vec<Vec<String>> {
strings.iter().map(|string| {
string.split("").map(|f| {f.to_string()}).collect()
}).collect()
}
fn untokenize(&self, tokens: Vec<String>) -> String {
tokens.join("")
}
fn batch_untokenize(&self, tokens: Vec<Vec<String>>) -> Vec<String> {
tokens.iter().map(|tokens| {
tokens.join("")
}).collect()
}
}