scirs2-text 0.4.3

Text processing module for SciRS2 (scirs2-text)
Documentation
//! Advanced tokenization algorithms.
//!
//! - [`bpe`]: Byte-Pair Encoding tokenizer with training support.
//! - [`wordpiece`]: BERT-style WordPiece tokenizer.
//! - [`language_agnostic`]: Language-agnostic Unicode tokenizer (UAX #29, CJK-aware).

pub mod bpe;
pub mod byte_level_bpe;
pub mod hf_json;
pub mod language_agnostic;
pub mod llama;
pub mod multilingual_bpe;
pub mod unicode_bpe;
pub mod unicode_normalizer;
pub mod wordpiece;

pub use bpe::{compute_merges, BpeTokenizer, BpeVocab};
pub use language_agnostic::LanguageAgnosticTokenizer;
pub use wordpiece::{BasicTokenizer, WordPieceTokenizer};