use block::{ConcreteBlock, IntoConcreteBlock};
use objc::{msg_send, sel, sel_impl};
use crate::{
foundation::{NSArray, NSRange, NSString, UInt},
object,
objective_c_runtime::{
macros::interface_impl,
traits::{FromId, PNSObject},
INSValue,
},
};
use super::{NLLanguage, NLTokenUnit};
#[derive(Debug, PartialEq, Eq)]
#[repr(u64)]
pub enum NLTokenizerAttributes {
None = 0,
Numeric = 1 << 0,
Symbolic = 1 << 1,
Emoji = 1 << 2,
}
object! {
unsafe pub struct NLTokenizer;
}
#[interface_impl(NSObject)]
impl NLTokenizer {
#[method]
pub fn init_with_unit(&mut self, unit: NLTokenUnit) -> Self
where
Self: Sized + FromId,
{
unsafe { Self::from_id(msg_send![self.m_self(), initWithUnit: unit]) }
}
#[property]
pub fn string(&self) -> NSString {
unsafe { NSString::from_id(msg_send![self.m_self(), string]) }
}
#[property]
pub fn set_string(&mut self, string: NSString) {
unsafe { msg_send![self.m_self(), setString: string] }
}
#[method]
pub fn set_language(&mut self, language: NLLanguage) {
unsafe { msg_send![self.m_self(), setLanguage: language] }
}
#[property]
pub fn unit(&self) -> NLTokenUnit {
unsafe { msg_send![self.m_self(), unit] }
}
#[method]
pub fn enumerate_tokens_in_range_using_block<F>(&self, range: NSRange, block: F)
where
F: IntoConcreteBlock<(NSRange, NLTokenizerAttributes, *mut bool), Ret = ()> + 'static,
{
let block = ConcreteBlock::new(block);
let block = block.copy();
unsafe {
msg_send![
self.m_self(),
enumerateTokensInRange: range
usingBlock: block
]
}
}
#[method]
pub fn tokens_for_range<T>(&self, range: NSRange) -> NSArray<T>
where
T: INSValue,
{
unsafe { NSArray::from_id(msg_send![self.m_self(), tokensForRange: range]) }
}
#[method]
pub fn token_range_at_index(&self, character_index: UInt) -> NSRange {
unsafe { msg_send![self.m_self(), tokenRangeAtIndex: character_index] }
}
#[method]
pub fn token_range_for_range(&self, range: NSRange) -> NSRange {
unsafe { msg_send![self.m_self(), tokenRangeForRange: range] }
}
}
impl Default for NLTokenizer {
fn default() -> Self {
Self::m_new()
}
}
#[cfg(test)]
mod tests {
use crate::natural_language::English;
use super::*;
#[test]
fn test_init() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
assert_ne!(tokenizer.unit(), super::NLTokenUnit::Sentence);
assert_eq!(tokenizer.unit(), super::NLTokenUnit::Word);
}
#[test]
fn test_string() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
tokenizer.set_string("Hello, world!".into());
assert_ne!(tokenizer.string(), "Goodbye, world!");
assert_eq!(tokenizer.string(), "Hello, world!");
}
#[test]
fn test_set_string() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
tokenizer.set_string("Hello, world!".into());
assert_ne!(tokenizer.string(), "Goodbye, world!");
assert_eq!(tokenizer.string(), "Hello, world!");
}
#[test]
fn test_set_language() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
tokenizer.set_language(unsafe { English.clone() });
}
#[test]
fn test_unit() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
assert_ne!(tokenizer.unit(), NLTokenUnit::Sentence);
assert_eq!(tokenizer.unit(), NLTokenUnit::Word);
}
#[test]
fn test_token_range_at_index() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
tokenizer.set_string("Hello, world!".into());
assert_eq!(tokenizer.token_range_at_index(0), (0..5).into());
}
#[test]
fn test_token_range_for_range() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
tokenizer.set_string("Hello, world!".into());
assert_eq!(
tokenizer.token_range_for_range((0..5).into()),
(0..5).into()
);
}
#[test]
fn test_enumerate_tokens_in_range() {
let mut tokenizer = NLTokenizer::default();
tokenizer = tokenizer.init_with_unit(NLTokenUnit::Word);
let text = "Hello";
tokenizer.set_string(text.into());
tokenizer.enumerate_tokens_in_range_using_block((0..text.len()).into(), |_, attr, _| {
assert_eq!(attr, NLTokenizerAttributes::None);
});
}
}