rust_macios/natural_language/
nl_tokenizer.rs1use block::{ConcreteBlock, IntoConcreteBlock};
2use objc::{msg_send, sel, sel_impl};
3
4use crate::{
5 foundation::{NSArray, NSRange, NSString, UInt},
6 object,
7 objective_c_runtime::{
8 macros::interface_impl,
9 traits::{FromId, PNSObject},
10 INSValue,
11 },
12};
13
14use super::{NLLanguage, NLTokenUnit};
15
16#[derive(Debug, PartialEq, Eq)]
18#[repr(u64)]
19pub enum NLTokenizerAttributes {
20 None = 0,
22 Numeric = 1 << 0,
24 Symbolic = 1 << 1,
26 Emoji = 1 << 2,
28}
29
30object! {
31 unsafe pub struct NLTokenizer;
33}
34
35#[interface_impl(NSObject)]
36impl NLTokenizer {
37 #[method]
42 pub fn init_with_unit(&mut self, unit: NLTokenUnit) -> Self
43 where
44 Self: Sized + FromId,
45 {
46 unsafe { Self::from_id(msg_send![self.m_self(), initWithUnit: unit]) }
47 }
48
49 #[property]
54 pub fn string(&self) -> NSString {
55 unsafe { NSString::from_id(msg_send![self.m_self(), string]) }
56 }
57
58 #[property]
60 pub fn set_string(&mut self, string: NSString) {
61 unsafe { msg_send![self.m_self(), setString: string] }
62 }
63
64 #[method]
66 pub fn set_language(&mut self, language: NLLanguage) {
67 unsafe { msg_send![self.m_self(), setLanguage: language] }
68 }
69
70 #[property]
72 pub fn unit(&self) -> NLTokenUnit {
73 unsafe { msg_send![self.m_self(), unit] }
74 }
75
76 #[method]
81 pub fn enumerate_tokens_in_range_using_block<F>(&self, range: NSRange, block: F)
82 where
83 F: IntoConcreteBlock<(NSRange, NLTokenizerAttributes, *mut bool), Ret = ()> + 'static,
84 {
85 let block = ConcreteBlock::new(block);
86 let block = block.copy();
87 unsafe {
88 msg_send![
89 self.m_self(),
90 enumerateTokensInRange: range
91 usingBlock: block
92 ]
93 }
94 }
95
96 #[method]
98 pub fn tokens_for_range<T>(&self, range: NSRange) -> NSArray<T>
99 where
100 T: INSValue,
101 {
102 unsafe { NSArray::from_id(msg_send![self.m_self(), tokensForRange: range]) }
103 }
104
105 #[method]
107 pub fn token_range_at_index(&self, character_index: UInt) -> NSRange {
108 unsafe { msg_send![self.m_self(), tokenRangeAtIndex: character_index] }
109 }
110
111 #[method]
113 pub fn token_range_for_range(&self, range: NSRange) -> NSRange {
114 unsafe { msg_send![self.m_self(), tokenRangeForRange: range] }
115 }
116}
117
118impl Default for NLTokenizer {
119 fn default() -> Self {
120 Self::m_new()
121 }
122}
123
124#[cfg(test)]
125mod tests {
126
127 use crate::natural_language::English;
128
129 use super::*;
130
131 #[test]
132 fn test_init() {
133 let mut tokenizer = NLTokenizer::default();
134 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
135 assert_ne!(tokenizer.unit(), super::NLTokenUnit::Sentence);
136 assert_eq!(tokenizer.unit(), super::NLTokenUnit::Word);
137 }
138
139 #[test]
140 fn test_string() {
141 let mut tokenizer = NLTokenizer::default();
142 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
143 tokenizer.set_string("Hello, world!".into());
144 assert_ne!(tokenizer.string(), "Goodbye, world!");
145 assert_eq!(tokenizer.string(), "Hello, world!");
146 }
147
148 #[test]
149 fn test_set_string() {
150 let mut tokenizer = NLTokenizer::default();
151 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
152 tokenizer.set_string("Hello, world!".into());
153 assert_ne!(tokenizer.string(), "Goodbye, world!");
154 assert_eq!(tokenizer.string(), "Hello, world!");
155 }
156
157 #[test]
158 fn test_set_language() {
159 let mut tokenizer = NLTokenizer::default();
160 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
161 tokenizer.set_language(unsafe { English.clone() });
162 }
163
164 #[test]
165 fn test_unit() {
166 let mut tokenizer = NLTokenizer::default();
167 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
168 assert_ne!(tokenizer.unit(), NLTokenUnit::Sentence);
169 assert_eq!(tokenizer.unit(), NLTokenUnit::Word);
170 }
171
172 #[test]
173 fn test_token_range_at_index() {
174 let mut tokenizer = NLTokenizer::default();
175 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
176 tokenizer.set_string("Hello, world!".into());
177 assert_eq!(tokenizer.token_range_at_index(0), (0..5).into());
178 }
179
180 #[test]
181 fn test_token_range_for_range() {
182 let mut tokenizer = NLTokenizer::default();
183 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
184 tokenizer.set_string("Hello, world!".into());
185 assert_eq!(
186 tokenizer.token_range_for_range((0..5).into()),
187 (0..5).into()
188 );
189 }
190
191 #[test]
192 fn test_enumerate_tokens_in_range() {
193 let mut tokenizer = NLTokenizer::default();
194 tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
195 let text = "Hello";
196 tokenizer.set_string(text.into());
197 tokenizer.enumerate_tokens_in_range_using_block((0..text.len()).into(), |_, attr, _| {
198 assert_eq!(attr, NLTokenizerAttributes::None);
199 });
200 }
201}