Expand description
Axonml Text - Natural Language Processing Utilities
This crate provides text processing functionality for the Axonml ML framework:
- Vocabulary: Token-to-index mapping with special tokens
- Tokenizers: Various tokenization strategies (whitespace, char, BPE)
- Datasets: Text classification, language modeling, seq2seq
§Example
ⓘ
use axonml_text::prelude::*;
// Build vocabulary from text
let vocab = Vocab::from_text("the quick brown fox", 1);
// Tokenize text
let tokenizer = WhitespaceTokenizer::new();
let tokens = tokenizer.tokenize("hello world");
// Create a sentiment dataset
let dataset = SyntheticSentimentDataset::small();@version 0.1.0
@author AutomataNexus Development Team
Re-exports§
pub use vocab::Vocab;pub use vocab::BOS_TOKEN;pub use vocab::EOS_TOKEN;pub use vocab::MASK_TOKEN;pub use vocab::PAD_TOKEN;pub use vocab::UNK_TOKEN;pub use tokenizer::BasicBPETokenizer;pub use tokenizer::CharTokenizer;pub use tokenizer::NGramTokenizer;pub use tokenizer::Tokenizer;pub use tokenizer::UnigramTokenizer;pub use tokenizer::WhitespaceTokenizer;pub use tokenizer::WordPunctTokenizer;pub use datasets::LanguageModelDataset;pub use datasets::SyntheticSentimentDataset;pub use datasets::SyntheticSeq2SeqDataset;pub use datasets::TextDataset;