sapient-generate 0.3.1

Text generation for SAPIENT — Pipeline::from_pretrained, KV cache, greedy/top-p/top-k sampling, streaming
Documentation

sapient-generate — LLM text generation pipeline.

The main entry point is [Pipeline], which provides a dead-simple API for running any HuggingFace LLM:

use sapient_generate::Pipeline;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let pipeline = Pipeline::from_pretrained("microsoft/phi-2").await?;

    // Simple completion
    let text = pipeline.generate("The meaning of life is").await?;
    println!("{text}");

    // Chat (for instruct models)
    use sapient_tokenizers::ChatMessage;
    let reply = pipeline.chat(&[
        ChatMessage::system("You are a helpful assistant."),
        ChatMessage::user("Explain quantum computing in simple terms."),
    ]).await?;
    println!("{reply}");

    // Streaming
    use futures::StreamExt;
    let mut stream = pipeline.generate_stream("Once upon a time").await;
    while let Some(token) = stream.next().await {
        print!("{token}");
    }
    Ok(())
}