pub struct WhitespaceTokenizer;Expand description
Whitespace tokenizer that splits text on Unicode whitespace characters.
This is the simplest tokenizer, splitting on any Unicode whitespace (spaces, tabs, newlines, etc.). It preserves punctuation attached to words.
§Examples
use aprender::text::{Tokenizer, tokenize::WhitespaceTokenizer};
let tokenizer = WhitespaceTokenizer::new();
// Basic tokenization
let tokens = tokenizer.tokenize("Hello, world!").expect("tokenize should succeed");
assert_eq!(tokens, vec!["Hello,", "world!"]);
// Handles multiple spaces
let tokens = tokenizer.tokenize("foo bar").expect("tokenize should succeed");
assert_eq!(tokens, vec!["foo", "bar"]);
// Handles newlines and tabs
let tokens = tokenizer.tokenize("line1\nline2\ttab").expect("tokenize should succeed");
assert_eq!(tokens, vec!["line1", "line2", "tab"]);Implementations§
Source§impl WhitespaceTokenizer
impl WhitespaceTokenizer
Sourcepub fn new() -> WhitespaceTokenizer
pub fn new() -> WhitespaceTokenizer
Create a new whitespace tokenizer.
§Examples
use aprender::text::tokenize::WhitespaceTokenizer;
let tokenizer = WhitespaceTokenizer::new();Trait Implementations§
Source§impl Clone for WhitespaceTokenizer
impl Clone for WhitespaceTokenizer
Source§fn clone(&self) -> WhitespaceTokenizer
fn clone(&self) -> WhitespaceTokenizer
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for WhitespaceTokenizer
impl Debug for WhitespaceTokenizer
Source§impl Default for WhitespaceTokenizer
impl Default for WhitespaceTokenizer
Source§fn default() -> WhitespaceTokenizer
fn default() -> WhitespaceTokenizer
Returns the “default value” for a type. Read more
Auto Trait Implementations§
impl Freeze for WhitespaceTokenizer
impl RefUnwindSafe for WhitespaceTokenizer
impl Send for WhitespaceTokenizer
impl Sync for WhitespaceTokenizer
impl Unpin for WhitespaceTokenizer
impl UnsafeUnpin for WhitespaceTokenizer
impl UnwindSafe for WhitespaceTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more