kitoken 0.11.0

Fast tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
use alloc::string::{String, ToString};

pub fn parse_url(url: &str) -> String {
    if url.starts_with("hf:") {
        let repo = url.strip_prefix("hf:").unwrap();
        [
            "https://huggingface.co",
            repo,
            "resolve/main/tokenizer.json",
        ]
        .join("/")
        .to_string()
    } else {
        url.to_string()
    }
}