Expand description
High-level inference API.
Provides GenerationConfig and ChatModel for easy token-level
text generation. Users encode/decode text with their own tokenizer.
§Non-streaming (all tokens at once)
use multiscreen_rs::prelude::*;
fn main() -> multiscreen_rs::Result<()> {
let model = ChatModel::load("checkpoints/latest.mpk")?;
let token_ids = model.generate(&[1, 2, 3], GenerationConfig::default())?;
println!("generated tokens: {:?}", token_ids);
Ok(())
}§Streaming (token by token, like ChatGPT)
use multiscreen_rs::prelude::*;
fn main() -> multiscreen_rs::Result<()> {
let model = ChatModel::load("checkpoints/latest.mpk")?;
let full = model.generate_stream(
&[1, 2, 3],
GenerationConfig::default(),
|token_id, _index| {
// Decode with YOUR tokenizer and print word-by-word
print!("{} ", token_id);
true // return false to stop early
},
)?;
Ok(())
}Structs§
- Chat
Model - High-level model for token-level text generation.
- Generation
Config - Configuration for text generation.