tokenizers 0.22.2

Provides an implementation of today's most used tokenizers, with a focus on performances and versatility.
Documentation
#![cfg(feature = "http")]
use tokenizers::{FromPretrainedParameters, Result, Tokenizer};

#[test]
fn test_from_pretrained() -> Result<()> {
    let tokenizer = Tokenizer::from_pretrained("bert-base-cased", None)?;
    let encoding = tokenizer.encode("Hey there dear friend!", false)?;
    assert_eq!(
        encoding.get_tokens(),
        &["Hey", "there", "dear", "friend", "!"]
    );
    Ok(())
}

#[test]
fn test_from_pretrained_revision() -> Result<()> {
    let tokenizer = Tokenizer::from_pretrained("anthony/tokenizers-test", None)?;
    let encoding = tokenizer.encode("Hey there dear friend!", false)?;
    assert_eq!(
        encoding.get_tokens(),
        &["hey", "there", "dear", "friend", "!"]
    );

    let tokenizer = Tokenizer::from_pretrained(
        "anthony/tokenizers-test",
        Some(FromPretrainedParameters {
            revision: "gpt-2".to_string(),
            ..Default::default()
        }),
    )?;
    let encoding = tokenizer.encode("Hey there dear friend!", false)?;
    assert_eq!(
        encoding.get_tokens(),
        &["Hey", "Ġthere", "Ġdear", "Ġfriend", "!"]
    );

    Ok(())
}

#[test]
fn test_from_pretrained_invalid_model() {
    let tokenizer = Tokenizer::from_pretrained("docs?", None);
    assert!(tokenizer.is_err());
}

#[test]
fn test_from_pretrained_invalid_revision() {
    let tokenizer = Tokenizer::from_pretrained(
        "bert-base-cased",
        Some(FromPretrainedParameters {
            revision: "gpt?".to_string(),
            ..Default::default()
        }),
    );
    assert!(tokenizer.is_err());
}