local_streaming/
local_streaming.rs

1//! # Example: Local Model Streaming
2//!
3//! This example demonstrates how to use the streaming capabilities of the Helios Engine
4//! with a local model. The response from the model is streamed token by token,
5//! providing a real-time experience.
6//!
7//! Note: This example requires the `local` feature to be enabled.
8//! Run with: cargo run --example local_streaming --features local
9
10#[cfg(not(feature = "local"))]
11fn main() {
12    eprintln!("āŒ This example requires the 'local' feature to be enabled.");
13    eprintln!("Run with: cargo run --example local_streaming --features local");
14    std::process::exit(1);
15}
16
17#[cfg(feature = "local")]
18use helios_engine::config::LocalConfig;
19#[cfg(feature = "local")]
20use helios_engine::{ChatMessage, LLMClient};
21#[cfg(feature = "local")]
22use std::io::{self, Write};
23
24#[cfg(feature = "local")]
25#[tokio::main]
26async fn main() -> helios_engine::Result<()> {
27    println!("šŸš€ Helios Engine - Local Model Streaming Example");
28    println!("=================================================\n");
29
30    // Configure the local model to use.
31    let local_config = LocalConfig {
32        huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
33        model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
34        context_size: 2048,
35        temperature: 0.7,
36        max_tokens: 512,
37    };
38
39    println!("šŸ“„ Loading local model...");
40    println!("   Repository: {}", local_config.huggingface_repo);
41    println!("   Model: {}\n", local_config.model_file);
42
43    // Create a new LLM client with the local model configuration.
44    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
45
46    println!("āœ“ Model loaded successfully!\n");
47
48    // --- Example 1: Simple streaming response ---
49    println!("Example 1: Simple Streaming Response");
50    println!("======================================\n");
51
52    let messages = vec![
53        ChatMessage::system("You are a helpful coding assistant."),
54        ChatMessage::user("Write a short explanation of what Rust is."),
55    ];
56
57    print!("Assistant: ");
58    io::stdout().flush()?;
59
60    // Stream the response from the model, printing each chunk as it arrives.
61    let _response = client
62        .chat_stream(messages, None, None, None, None, |chunk| {
63            print!("{}", chunk);
64            io::stdout().flush().unwrap();
65        })
66        .await?;
67
68    println!("\n");
69
70    // --- Example 2: Interactive streaming ---
71    println!("Example 2: Interactive Streaming");
72    println!("==================================\n");
73
74    let questions = vec![
75        "What are the main benefits of Rust?",
76        "Give me a simple code example.",
77    ];
78
79    let mut session = helios_engine::ChatSession::new()
80        .with_system_prompt("You are a helpful programming assistant.");
81
82    for question in questions {
83        println!("User: {}", question);
84        session.add_user_message(question);
85
86        print!("Assistant: ");
87        io::stdout().flush()?;
88
89        // Stream the response, maintaining the conversation context.
90        let response = client
91            .chat_stream(session.get_messages(), None, None, None, None, |chunk| {
92                print!("{}", chunk);
93                io::stdout().flush().unwrap();
94            })
95            .await?;
96
97        session.add_assistant_message(&response.content);
98        println!("\n");
99    }
100
101    println!(" Local model streaming completed successfully!");
102    println!("\nšŸ’” Features:");
103    println!("  • Token-by-token streaming for local models");
104    println!("  • Real-time response display (no more instant full responses)");
105    println!("  • Same streaming API for both local and remote models");
106    println!("  • Improved user experience with progressive output");
107
108    Ok(())
109}