local_streaming/
local_streaming.rs1#[cfg(not(feature = "local"))]
11fn main() {
12 eprintln!("ā This example requires the 'local' feature to be enabled.");
13 eprintln!("Run with: cargo run --example local_streaming --features local");
14 std::process::exit(1);
15}
16
17#[cfg(feature = "local")]
18use helios_engine::config::LocalConfig;
19#[cfg(feature = "local")]
20use helios_engine::{ChatMessage, LLMClient};
21#[cfg(feature = "local")]
22use std::io::{self, Write};
23
24#[cfg(feature = "local")]
25#[tokio::main]
26async fn main() -> helios_engine::Result<()> {
27 println!("š Helios Engine - Local Model Streaming Example");
28 println!("=================================================\n");
29
30 let local_config = LocalConfig {
32 huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
33 model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
34 context_size: 2048,
35 temperature: 0.7,
36 max_tokens: 512,
37 };
38
39 println!("š„ Loading local model...");
40 println!(" Repository: {}", local_config.huggingface_repo);
41 println!(" Model: {}\n", local_config.model_file);
42
43 let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
45
46 println!("ā Model loaded successfully!\n");
47
48 println!("Example 1: Simple Streaming Response");
50 println!("======================================\n");
51
52 let messages = vec![
53 ChatMessage::system("You are a helpful coding assistant."),
54 ChatMessage::user("Write a short explanation of what Rust is."),
55 ];
56
57 print!("Assistant: ");
58 io::stdout().flush()?;
59
60 let _response = client
62 .chat_stream(messages, None, None, None, None, |chunk| {
63 print!("{}", chunk);
64 io::stdout().flush().unwrap();
65 })
66 .await?;
67
68 println!("\n");
69
70 println!("Example 2: Interactive Streaming");
72 println!("==================================\n");
73
74 let questions = vec![
75 "What are the main benefits of Rust?",
76 "Give me a simple code example.",
77 ];
78
79 let mut session = helios_engine::ChatSession::new()
80 .with_system_prompt("You are a helpful programming assistant.");
81
82 for question in questions {
83 println!("User: {}", question);
84 session.add_user_message(question);
85
86 print!("Assistant: ");
87 io::stdout().flush()?;
88
89 let response = client
91 .chat_stream(session.get_messages(), None, None, None, None, |chunk| {
92 print!("{}", chunk);
93 io::stdout().flush().unwrap();
94 })
95 .await?;
96
97 session.add_assistant_message(&response.content);
98 println!("\n");
99 }
100
101 println!(" Local model streaming completed successfully!");
102 println!("\nš” Features:");
103 println!(" ⢠Token-by-token streaming for local models");
104 println!(" ⢠Real-time response display (no more instant full responses)");
105 println!(" ⢠Same streaming API for both local and remote models");
106 println!(" ⢠Improved user experience with progressive output");
107
108 Ok(())
109}