1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
//! # Mullama Integration Showcase
//!
//! This example demonstrates all the advanced integration features of Mullama:
//! - Async/await support for non-blocking operations
//! - Streaming interfaces for real-time token generation
//! - Configuration management with serde
//! - Builder patterns for fluent APIs
//! - Web framework integration with Axum
//!
//! Run with: cargo run --example integration_showcase --features full
use mullama::config::presets;
use mullama::prelude::*;
#[cfg(feature = "streaming")]
use mullama::StreamConfig;
#[tokio::main]
async fn main() -> Result<(), MullamaError> {
println!("š Mullama Integration Showcase");
println!("================================");
// Example 1: Configuration Management
showcase_configuration().await?;
// Example 2: Builder Patterns
showcase_builder_patterns().await?;
#[cfg(feature = "async")]
{
// Example 3: Async Model Loading
showcase_async_operations().await?;
// Example 4: Streaming Generation
showcase_streaming().await?;
// Example 5: Web Service Integration
showcase_web_integration().await?;
}
println!("\n⨠All integration features showcased successfully!");
Ok(())
}
/// Showcase configuration management with serde
async fn showcase_configuration() -> Result<(), MullamaError> {
println!("\nš Configuration Management");
println!("---------------------------");
// Create configuration programmatically
let config = MullamaConfig {
model: mullama::config::ModelConfig {
path: "path/to/model.gguf".to_string(),
gpu_layers: 32,
context_size: 4096,
..Default::default()
},
sampling: mullama::config::SamplingConfig {
temperature: 0.8,
top_k: 50,
top_p: 0.95,
..Default::default()
},
..Default::default()
};
// Serialize to JSON
let json = serde_json::to_string_pretty(&config)
.map_err(|e| MullamaError::ConfigError(format!("JSON serialization failed: {}", e)))?;
println!("š Configuration as JSON:\n{}", json);
// Use preset configurations
let creative_config = presets::creative_writing();
println!(
"šØ Creative writing preset: temp={}, top_k={}",
creative_config.sampling.temperature, creative_config.sampling.top_k
);
let code_config = presets::code_generation();
println!(
"š» Code generation preset: temp={}, top_k={}",
code_config.sampling.temperature, code_config.sampling.top_k
);
// Validate configuration
match config.validate() {
Ok(_) => println!("ā
Configuration is valid"),
Err(e) => println!("ā Configuration error: {}", e),
}
Ok(())
}
/// Showcase builder patterns for fluent API
async fn showcase_builder_patterns() -> Result<(), MullamaError> {
println!("\nš§ Builder Patterns");
println!("-------------------");
// Model builder with fluent API
#[cfg(feature = "async")]
{
let _model_builder = ModelBuilder::new()
.path("path/to/model.gguf")
.gpu_layers(32)
.context_size(4096)
.memory_mapping(true)
.preset(mullama::builder::presets::performance_optimized);
println!("šļø Model builder configured with performance optimizations");
// Context builder with optimization presets
// Note: This would need an actual model in a real scenario
// let context_builder = ContextBuilder::new(model.clone())
// .context_size(4096)
// .batch_size(512)
// .threads(8)
// .optimize_for_performance();
println!("šļø Context builder configured for performance");
// Sampler builder with penalty configuration
let _sampler_builder = SamplerBuilder::new()
.temperature(0.8)
.top_k(50)
.nucleus(0.95)
.penalties(|p| p.repetition(1.1).frequency(0.1).presence(0.1))
.preset(mullama::builder::presets::creative_sampling);
println!("šļø Sampler builder configured with creative sampling");
}
Ok(())
}
/// Showcase async operations
#[cfg(feature = "async")]
async fn showcase_async_operations() -> Result<(), MullamaError> {
println!("\nā” Async Operations");
println!("------------------");
println!("š Loading model asynchronously...");
// Note: In a real scenario, you'd use an actual model path
// let model = AsyncModel::load("path/to/model.gguf").await?;
// println!("ā
Model loaded successfully");
// let info = model.info_async().await;
// println!("š Model info - Vocab: {}, Layers: {}", info.vocab_size, info.n_layer);
// Generate text asynchronously
// let result = model.generate_async("The future of AI is", 50).await?;
// println!("š¤ Generated: {}", result);
println!("ā
Async operations demonstrated (with placeholder model)");
Ok(())
}
/// Showcase streaming token generation
#[cfg(feature = "streaming")]
async fn showcase_streaming() -> Result<(), MullamaError> {
println!("\nš Streaming Generation");
println!("----------------------");
// Note: In a real scenario, you'd use an actual model
// let model = AsyncModel::load("path/to/model.gguf").await?;
// Configure streaming
let config = StreamConfig::default()
.max_tokens(50)
.temperature(0.8)
.include_probabilities(true);
println!(
"š” Stream config: max_tokens={}, temp={}",
config.max_tokens, config.sampler_params.temperature
);
// Create token stream (placeholder)
// let mut stream = TokenStream::new(model, "Once upon a time", config).await?;
// Process stream
// println!("š¬ Streaming tokens:");
// while let Some(result) = stream.next().await {
// match result {
// Ok(token_data) => {
// print!("{}", token_data.text);
// if token_data.is_final {
// println!("\nš Generation complete!");
// break;
// }
// }
// Err(e) => {
// eprintln!("ā Stream error: {}", e);
// break;
// }
// }
// }
println!("ā
Streaming demonstrated (with placeholder model)");
Ok(())
}
/// Showcase web service integration
#[cfg(feature = "web")]
async fn showcase_web_integration() -> Result<(), MullamaError> {
println!("\nš Web Service Integration");
println!("-------------------------");
// Note: In a real scenario, you'd use an actual model
// let model = AsyncModel::load("path/to/model.gguf").await?;
// Create application state (placeholder)
// let app_state = AppState {
// model,
// default_config: MullamaConfig::default(),
// metrics: Arc::new(tokio::sync::RwLock::new(ApiMetrics::default())),
// };
// Create router with all endpoints
// let app = create_router(app_state);
println!("š ļø Router created with endpoints:");
println!(" š POST /generate - Text generation");
println!(" š POST /tokenize - Text tokenization");
println!(" š GET /stream/:prompt - Server-sent events streaming");
println!(" š GET /health - Health check");
println!(" š GET /metrics - API metrics");
// In a real application, you would bind and serve:
// let listener = TcpListener::bind("0.0.0.0:3000").await
// .map_err(|e| MullamaError::ConfigError(format!("Failed to bind: {}", e)))?;
// println!("š Server running on http://0.0.0.0:3000");
// axum::serve(listener, app).await
// .map_err(|e| MullamaError::ConfigError(format!("Server error: {}", e)))?;
println!("ā
Web integration demonstrated (server not started)");
Ok(())
}