lindera_filter/token_filter/
uppercase.rs1use lindera_core::LinderaResult;
2
3use crate::token::Token;
4use crate::token_filter::TokenFilter;
5
6pub const UPPERCASE_TOKEN_FILTER_NAME: &str = "uppercase";
7
8#[derive(Clone, Debug)]
11pub struct UppercaseTokenFilter {}
12
13impl UppercaseTokenFilter {
14 pub fn new() -> Self {
15 Self {}
16 }
17}
18
19impl Default for UppercaseTokenFilter {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl TokenFilter for UppercaseTokenFilter {
26 fn name(&self) -> &'static str {
27 UPPERCASE_TOKEN_FILTER_NAME
28 }
29
30 fn apply<'a>(&self, tokens: &mut Vec<Token>) -> LinderaResult<()> {
31 for token in tokens.iter_mut() {
32 token.text = token.text.to_uppercase();
33 }
34
35 Ok(())
36 }
37}
38
39#[cfg(test)]
40mod tests {
41 #[cfg(feature = "ipadic")]
42 use lindera_core::word_entry::WordId;
43
44 #[cfg(feature = "ipadic")]
45 use crate::{
46 token::Token,
47 token_filter::{uppercase::UppercaseTokenFilter, TokenFilter},
48 };
49
50 #[test]
51 #[cfg(feature = "ipadic")]
52 fn test_uppercase_token_filter_apply() {
53 let filter = UppercaseTokenFilter::default();
54
55 let mut tokens: Vec<Token> = vec![Token {
56 text: "Rust".to_string(),
57 byte_start: 0,
58 byte_end: 4,
59 position: 0,
60 position_length: 1,
61 word_id: WordId(4294967295, true),
62 details: vec!["UNK".to_string()],
63 }];
64
65 filter.apply(&mut tokens).unwrap();
66
67 assert_eq!(tokens.len(), 1);
68 assert_eq!(&tokens[0].text, "RUST");
69 }
70}