tokenizers/normalizers/
prepend.rs1use crate::tokenizer::{NormalizedString, Normalizer, Result};
2use serde::{Deserialize, Serialize};
3
4#[derive(Clone, Debug, Deserialize, Serialize)]
5#[serde(tag = "type")]
6pub struct Prepend {
7 pub prepend: String,
8}
9
10impl Prepend {
11 pub fn new(prepend: String) -> Self {
12 Self { prepend }
13 }
14}
15
16impl Normalizer for Prepend {
17 fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
19 if !normalized.is_empty() {
20 normalized.prepend(&self.prepend);
21 }
22 Ok(())
23 }
24}
25
26#[cfg(test)]
27mod tests {
28 use super::*;
29
30 #[test]
31 fn test_prepend() {
32 let original = "Hello";
33 let normalized = "▁Hello";
34 assert_ne!(original, normalized);
35 let mut n = NormalizedString::from(original);
36 let prepend = Prepend::new("▁".to_string());
37 prepend.normalize(&mut n).unwrap();
38 assert_eq!(&n.get(), &normalized);
39 assert_eq!(
40 n,
41 NormalizedString::new(
42 original.to_string(),
43 normalized.to_string(),
44 vec![
45 (0, 1),
46 (0, 1),
47 (0, 1),
48 (0, 1),
49 (1, 2),
50 (2, 3),
51 (3, 4),
52 (4, 5)
53 ],
54 0
55 )
56 );
57 assert_eq!(
58 n.alignments_original(),
59 vec![(0, 4), (4, 5), (5, 6), (6, 7), (7, 8)]
60 );
61 }
62}