local_streaming/
local_streaming.rs

1//! # Example: Local Model Streaming
2//!
3//! This example demonstrates how to use the streaming capabilities of the Helios Engine
4//! with a local model. The response from the model is streamed token by token,
5//! providing a real-time experience.
6
7use helios_engine::config::LocalConfig;
8use helios_engine::{ChatMessage, LLMClient};
9use std::io::{self, Write};
10
11#[tokio::main]
12async fn main() -> helios_engine::Result<()> {
13    println!("šŸš€ Helios Engine - Local Model Streaming Example");
14    println!("=================================================\n");
15
16    // Configure the local model to use.
17    let local_config = LocalConfig {
18        huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
19        model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
20        context_size: 2048,
21        temperature: 0.7,
22        max_tokens: 512,
23    };
24
25    println!("šŸ“„ Loading local model...");
26    println!("   Repository: {}", local_config.huggingface_repo);
27    println!("   Model: {}\n", local_config.model_file);
28
29    // Create a new LLM client with the local model configuration.
30    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
31
32    println!("āœ“ Model loaded successfully!\n");
33
34    // --- Example 1: Simple streaming response ---
35    println!("Example 1: Simple Streaming Response");
36    println!("======================================\n");
37
38    let messages = vec![
39        ChatMessage::system("You are a helpful coding assistant."),
40        ChatMessage::user("Write a short explanation of what Rust is."),
41    ];
42
43    print!("Assistant: ");
44    io::stdout().flush()?;
45
46    // Stream the response from the model, printing each chunk as it arrives.
47    let _response = client
48        .chat_stream(messages, None, None, None, None, |chunk| {
49            print!("{}", chunk);
50            io::stdout().flush().unwrap();
51        })
52        .await?;
53
54    println!("\n");
55
56    // --- Example 2: Interactive streaming ---
57    println!("Example 2: Interactive Streaming");
58    println!("==================================\n");
59
60    let questions = vec![
61        "What are the main benefits of Rust?",
62        "Give me a simple code example.",
63    ];
64
65    let mut session = helios_engine::ChatSession::new()
66        .with_system_prompt("You are a helpful programming assistant.");
67
68    for question in questions {
69        println!("User: {}", question);
70        session.add_user_message(question);
71
72        print!("Assistant: ");
73        io::stdout().flush()?;
74
75        // Stream the response, maintaining the conversation context.
76        let response = client
77            .chat_stream(session.get_messages(), None, None, None, None, |chunk| {
78                print!("{}", chunk);
79                io::stdout().flush().unwrap();
80            })
81            .await?;
82
83        session.add_assistant_message(&response.content);
84        println!("\n");
85    }
86
87    println!("āœ… Local model streaming completed successfully!");
88    println!("\nšŸ’” Features:");
89    println!("  • Token-by-token streaming for local models");
90    println!("  • Real-time response display (no more instant full responses)");
91    println!("  • Same streaming API for both local and remote models");
92    println!("  • Improved user experience with progressive output");
93
94    Ok(())
95}