1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
//! # Async Text Generation Example
//!
//! This example demonstrates async/await support in Mullama for non-blocking
//! text generation operations.
//!
//! Run with: cargo run --example async_generation --features async
use mullama::prelude::*;
#[tokio::main]
async fn main() -> Result<(), MullamaError> {
println!("🚀 Async Text Generation Example");
println!("=================================");
#[cfg(feature = "async")]
{
// Load model asynchronously without blocking
println!("📂 Loading model asynchronously...");
// Note: Replace with actual model path
let _model_path =
std::env::var("MODEL_PATH").unwrap_or_else(|_| "path/to/model.gguf".to_string());
// This would load the model in a real scenario:
// let model = AsyncModel::load(&model_path).await?;
// println!("✅ Model loaded successfully");
// Get model information asynchronously
// let info = model.info_async().await;
// println!("📊 Model Info:");
// println!(" Vocabulary size: {}", info.vocab_size);
// println!(" Context size: {}", info.n_ctx_train);
// println!(" Embedding size: {}", info.n_embd);
// println!(" Layers: {}", info.n_layer);
// Create context with custom parameters
let _context_params = ContextParams {
n_ctx: 2048,
n_batch: 512,
n_threads: 8,
embeddings: false,
flash_attn_type: mullama::sys::llama_flash_attn_type::LLAMA_FLASH_ATTN_TYPE_ENABLED,
..Default::default()
};
// let context = model.create_context_async(context_params).await?;
// println!("🧠 Context created with {} tokens capacity", 2048);
// Generate text asynchronously
let prompts = vec![
"The future of artificial intelligence is",
"In a world where technology advances rapidly,",
"The most important aspect of machine learning is",
];
// Process multiple prompts concurrently
let mut handles = Vec::new();
for (i, prompt) in prompts.iter().enumerate() {
// In a real scenario:
// let model_clone = model.clone();
let prompt = prompt.to_string();
let handle = tokio::spawn(async move {
println!("🤖 Task {}: Starting generation for: \"{}\"", i + 1, prompt);
// Simulate async generation
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// In real scenario:
// let result = model_clone.generate_async(&prompt_clone, 100).await?;
let result = format!("Generated text for prompt: {}", prompt);
println!("✅ Task {}: Completed", i + 1);
Ok::<String, MullamaError>(result)
});
handles.push(handle);
}
// Wait for all generations to complete
println!("⏳ Waiting for all generations to complete...");
for (i, handle) in handles.into_iter().enumerate() {
match handle.await {
Ok(Ok(result)) => {
println!("📝 Result {}: {}", i + 1, result);
}
Ok(Err(e)) => {
eprintln!("❌ Task {} failed: {}", i + 1, e);
}
Err(e) => {
eprintln!("❌ Task {} panicked: {}", i + 1, e);
}
}
}
// Demonstrate advanced async patterns
demonstrate_async_patterns().await?;
demonstrate_error_handling().await?;
}
#[cfg(not(feature = "async"))]
{
println!("❌ This example requires the 'async' feature to be enabled");
println!("Run with: cargo run --example async_generation --features async");
}
Ok(())
}
#[cfg(feature = "async")]
async fn demonstrate_async_patterns() -> Result<(), MullamaError> {
println!("\n🔄 Advanced Async Patterns");
println!("==========================");
// Pattern 1: Async model loading with custom parameters
println!("1️⃣ Custom model loading...");
let _model_params = ModelParams {
n_gpu_layers: 32,
use_mmap: true,
use_mlock: false,
check_tensors: true,
vocab_only: false,
..Default::default()
};
// In real scenario:
// let model = AsyncModel::load_with_params("model.gguf", model_params).await?;
println!(" ✅ Model would be loaded with custom parameters");
// Pattern 2: Concurrent context creation
println!("2️⃣ Concurrent context creation...");
let _context_configs = vec![
ContextParams {
n_ctx: 1024,
n_batch: 256,
..Default::default()
},
ContextParams {
n_ctx: 2048,
n_batch: 512,
..Default::default()
},
ContextParams {
n_ctx: 4096,
n_batch: 1024,
..Default::default()
},
];
// In real scenario, create multiple contexts concurrently:
// let contexts = futures::future::try_join_all(
// context_configs.into_iter().map(|params| {
// model.create_context_async(params)
// })
// ).await?;
println!(" ✅ Multiple contexts would be created concurrently");
// Pattern 3: Async generation with timeout
println!("3️⃣ Generation with timeout...");
let generation_task = async {
// Simulate generation
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
Ok::<String, MullamaError>("Generated text".to_string())
};
match tokio::time::timeout(tokio::time::Duration::from_millis(100), generation_task).await {
Ok(Ok(result)) => println!(" ✅ Generation completed: {}", result),
Ok(Err(e)) => println!(" ❌ Generation failed: {}", e),
Err(_) => println!(" ⏰ Generation timed out"),
}
// Pattern 4: Batch processing with async
println!("4️⃣ Batch processing...");
let batch_prompts = vec!["Prompt 1", "Prompt 2", "Prompt 3", "Prompt 4", "Prompt 5"];
// Process in batches of 2
for chunk in batch_prompts.chunks(2) {
let batch_futures = chunk.iter().map(|prompt| async move {
// Simulate processing
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
format!("Processed: {}", prompt)
});
let results = futures::future::join_all(batch_futures).await;
println!(" 📦 Batch processed: {:?}", results);
}
Ok(())
}
#[cfg(feature = "async")]
async fn demonstrate_error_handling() -> Result<(), MullamaError> {
println!("\n🛡️ Async Error Handling");
println!("=======================");
// Graceful error handling in async context
let result = async {
// Simulate potential failure
if rand::random::<bool>() {
Err(MullamaError::ModelLoadError("Simulated error".to_string()))
} else {
Ok("Success".to_string())
}
}
.await;
match result {
Ok(value) => println!("✅ Operation succeeded: {}", value),
Err(e) => println!("❌ Operation failed gracefully: {}", e),
}
Ok(())
}