ferrum_tokenizer/lib.rs
1//! # Ferrum Tokenizer
2//!
3//! MVP tokenizer implementation for Ferrum inference stack.
4//!
5//! This crate provides HuggingFace tokenizers integration and implements
6//! the tokenizer interfaces defined in `ferrum-interfaces`.
7//!
8//! ## Features
9//!
10//! - **HuggingFace Integration**: Load tokenizers from HF Hub or local files
11//! - **Incremental Decoding**: Efficient token-by-token decoding for streaming
12//! - **Chat Templates**: Support for conversation formatting (basic)
13//! - **Special Tokens**: Proper handling of BOS, EOS, PAD tokens
14
15pub mod implementations;
16
17// Re-export interface types
18pub use ferrum_interfaces::{
19 tokenizer::TokenizerType, IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo,
20};
21
22pub use ferrum_types::{Result, SpecialTokens, TokenId};
23
24// Re-export implementations
25pub use implementations::*;
26
27/// Default tokenizer factory using HuggingFace backend
28pub fn default_factory() -> HuggingFaceTokenizerFactory {
29 HuggingFaceTokenizerFactory::new()
30}
31
32/// Load tokenizer from file
33pub async fn load_from_file(path: &str) -> Result<Box<dyn Tokenizer>> {
34 default_factory().load_from_file(path).await
35}
36
37/// Load tokenizer from HuggingFace Hub
38pub async fn load_from_hub(repo_id: &str, revision: Option<&str>) -> Result<Box<dyn Tokenizer>> {
39 default_factory().load_from_hub(repo_id, revision).await
40}