Expand description
sapient-generate — LLM text generation pipeline.
The main entry point is Pipeline, which provides a dead-simple API
for running any HuggingFace LLM:
use sapient_generate::Pipeline;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let pipeline = Pipeline::from_pretrained("microsoft/phi-2").await?;
// Simple completion
let text = pipeline.generate("The meaning of life is").await?;
println!("{text}");
// Chat (for instruct models)
use sapient_tokenizers::ChatMessage;
let reply = pipeline.chat(&[
ChatMessage::system("You are a helpful assistant."),
ChatMessage::user("Explain quantum computing in simple terms."),
]).await?;
println!("{reply}");
// Streaming
use futures::StreamExt;
let mut stream = pipeline.generate_stream("Once upon a time").await;
while let Some(token) = stream.next().await {
print!("{token}");
}
Ok(())
}Re-exports§
pub use kv_cache::KVCache;pub use pipeline::GenerationConfig;pub use pipeline::LoadOptions;pub use pipeline::Pipeline;pub use sampler::Sampler;pub use sampler::SamplingStrategy;
Modules§
- kv_
cache - KV-cache for incremental autoregressive decoding.
- pipeline
Pipeline— the main user-facing LLM inference API.- sampler
- Token sampling strategies for text generation.