use mullama::prelude::*;
#[cfg(all(feature = "streaming", feature = "async"))]
use mullama::StreamConfig;
#[cfg(all(feature = "streaming", feature = "async"))]
use std::time::Instant;
#[tokio::main]
async fn main() -> Result<(), MullamaError> {
println!("🌊 Streaming Text Generation Example");
println!("====================================");
#[cfg(all(feature = "streaming", feature = "async"))]
{
println!("📂 Loading model for streaming...");
let _model_path =
std::env::var("MODEL_PATH").unwrap_or_else(|_| "path/to/model.gguf".to_string());
demonstrate_basic_streaming().await?;
demonstrate_configured_streaming().await?;
demonstrate_word_streaming().await?;
demonstrate_text_only_streaming().await?;
demonstrate_streaming_utilities().await?;
demonstrate_error_handling().await?;
}
#[cfg(not(all(feature = "streaming", feature = "async")))]
{
println!("❌ This example requires both 'streaming' and 'async' features");
println!("Run with: cargo run --example streaming_generation --features streaming,async");
}
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_basic_streaming() -> Result<(), MullamaError> {
println!("\n🎬 Basic Streaming");
println!("==================");
let config = StreamConfig::default()
.max_tokens(50)
.temperature(0.8)
.include_probabilities(true);
let prompt = "The future of artificial intelligence";
println!("📝 Prompt: \"{}\"", prompt);
println!(
"⚙️ Config: {} tokens, temp={}",
config.max_tokens, config.sampler_params.temperature
);
println!("🎭 Streaming tokens (simulated):");
print!(" ");
let tokens = vec![
"is",
" to",
" enhance",
" human",
" capabilities",
" and",
" solve",
" complex",
" problems",
];
for (i, token_text) in tokens.iter().enumerate() {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let is_final = i == tokens.len() - 1;
print!("{}", token_text);
std::io::Write::flush(&mut std::io::stdout()).unwrap();
if is_final {
println!("\n🏁 Generation complete!");
break;
}
}
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_configured_streaming() -> Result<(), MullamaError> {
println!("\n⚙️ Configured Streaming");
println!("=======================");
let configs = vec![
(
"Creative",
StreamConfig::default()
.temperature(0.9)
.top_k(60)
.max_tokens(30),
),
(
"Precise",
StreamConfig::default()
.temperature(0.2)
.top_k(10)
.max_tokens(30),
),
(
"Balanced",
StreamConfig::default()
.temperature(0.7)
.top_k(40)
.max_tokens(30),
),
];
let _prompt = "Once upon a time in a land far away";
for (name, config) in configs {
println!("\n🎯 {} configuration:", name);
println!(" Temperature: {}", config.sampler_params.temperature);
println!(" Top-k: {}", config.sampler_params.top_k);
print!(" Output: ");
let output = match name {
"Creative" => "there lived a magnificent dragon who collected shimmering",
"Precise" => "there was a small village with a market square",
"Balanced" => "there lived a young princess who loved to read",
_ => "sample output",
};
for char in output.chars() {
print!("{}", char);
std::io::Write::flush(&mut std::io::stdout()).unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
println!();
}
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_word_streaming() -> Result<(), MullamaError> {
println!("\n📝 Word-Based Streaming");
println!("=======================");
let prompt = "Explain the concept of machine learning";
println!("📝 Prompt: \"{}\"", prompt);
println!("🔤 Word-by-word output:");
let words = vec![
"Machine",
"learning",
"is",
"a",
"subset",
"of",
"artificial",
"intelligence",
"that",
"enables",
"computers",
"to",
"learn",
"and",
"improve",
"automatically",
];
for word in words {
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
print!("{} ", word);
std::io::Write::flush(&mut std::io::stdout()).unwrap();
}
println!("\n✅ Word streaming complete");
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_text_only_streaming() -> Result<(), MullamaError> {
println!("\n📄 Text-Only Streaming");
println!("======================");
let prompt = "The benefits of renewable energy include";
println!("📝 Prompt: \"{}\"", prompt);
println!("📜 Text-only output:");
print!(" ");
let text_parts = vec![
"reduced",
" carbon",
" emissions,",
" energy",
" independence,",
" job",
" creation,",
" and",
" long-term",
" cost",
" savings",
" for",
" consumers.",
];
for part in text_parts {
tokio::time::sleep(tokio::time::Duration::from_millis(150)).await;
print!("{}", part);
std::io::Write::flush(&mut std::io::stdout()).unwrap();
}
println!("\n✅ Text streaming complete");
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_streaming_utilities() -> Result<(), MullamaError> {
println!("\n🛠️ Streaming Utilities");
println!("======================");
use mullama::streaming::utils;
println!("1️⃣ Collect to string:");
let complete_text = "This would be the complete generated text";
println!(" Result: \"{}\"", complete_text);
println!("\n2️⃣ Collect with metadata:");
let generation_result = utils::GenerationResult {
text: "Generated text with metadata".to_string(),
token_count: 25,
tokens: vec![1, 2, 3, 4, 5],
};
println!(" Text: \"{}\"", generation_result.text);
println!(" Token count: {}", generation_result.token_count);
println!(
" First 5 tokens: {:?}",
&generation_result.tokens[..5.min(generation_result.tokens.len())]
);
println!("\n3️⃣ Performance monitoring:");
let start_time = Instant::now();
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
let duration = start_time.elapsed();
let tokens_per_second = 50.0 / duration.as_secs_f64();
println!(" Generation time: {:.2}s", duration.as_secs_f64());
println!(" Tokens per second: {:.2}", tokens_per_second);
Ok(())
}
#[cfg(all(feature = "streaming", feature = "async"))]
async fn demonstrate_error_handling() -> Result<(), MullamaError> {
println!("\n🛡️ Stream Error Handling");
println!("========================");
let mut error_count = 0;
let mut successful_tokens = 0;
for i in 0..10 {
if i == 3 || i == 7 {
error_count += 1;
println!("❌ Stream error at position {}: Simulated network issue", i);
continue;
}
successful_tokens += 1;
println!("✅ Token {}: Generated successfully", i);
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
println!("\n📊 Error handling summary:");
println!(" Successful tokens: {}", successful_tokens);
println!(" Errors encountered: {}", error_count);
println!(
" Success rate: {:.1}%",
(successful_tokens as f64 / 10.0) * 100.0
);
Ok(())
}