local_streaming/
local_streaming.rs

1/// Example: Streaming with Local Models
2///
3/// This example demonstrates the new streaming capability for local models.
4/// Previously, local models would display their full response instantly.
5/// Now they stream token-by-token just like remote models.
6
7use helios_engine::config::LocalConfig;
8use helios_engine::{ChatMessage, LLMClient};
9use std::io::{self, Write};
10
11#[tokio::main]
12async fn main() -> helios_engine::Result<()> {
13    println!("šŸš€ Helios Engine - Local Model Streaming Example");
14    println!("=================================================\n");
15
16    // Configure local model
17    let local_config = LocalConfig {
18        huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
19        model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
20        context_size: 2048,
21        temperature: 0.7,
22        max_tokens: 512,
23    };
24
25    println!("šŸ“„ Loading local model...");
26    println!("   Repository: {}", local_config.huggingface_repo);
27    println!("   Model: {}\n", local_config.model_file);
28
29    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
30
31    println!("āœ“ Model loaded successfully!\n");
32
33    // Example 1: Simple streaming
34    println!("Example 1: Simple Streaming Response");
35    println!("======================================\n");
36
37    let messages = vec![
38        ChatMessage::system("You are a helpful coding assistant."),
39        ChatMessage::user("Write a short explanation of what Rust is."),
40    ];
41
42    print!("Assistant: ");
43    io::stdout().flush()?;
44
45    let _response = client
46        .chat_stream(messages, None, |chunk| {
47            print!("{}", chunk);
48            io::stdout().flush().unwrap();
49        })
50        .await?;
51
52    println!("\n");
53
54    // Example 2: Multiple questions with streaming
55    println!("Example 2: Interactive Streaming");
56    println!("==================================\n");
57
58    let questions = vec![
59        "What are the main benefits of Rust?",
60        "Give me a simple code example.",
61    ];
62
63    let mut session = helios_engine::ChatSession::new()
64        .with_system_prompt("You are a helpful programming assistant.");
65
66    for question in questions {
67        println!("User: {}", question);
68        session.add_user_message(question);
69
70        print!("Assistant: ");
71        io::stdout().flush()?;
72
73        let response = client
74            .chat_stream(session.get_messages(), None, |chunk| {
75                print!("{}", chunk);
76                io::stdout().flush().unwrap();
77            })
78            .await?;
79
80        session.add_assistant_message(&response.content);
81        println!("\n");
82    }
83
84    println!("āœ… Local model streaming completed successfully!");
85    println!("\nšŸ’” Features:");
86    println!("  • Token-by-token streaming for local models");
87    println!("  • Real-time response display (no more instant full responses)");
88    println!("  • Same streaming API for both local and remote models");
89    println!("  • Improved user experience with progressive output");
90
91    Ok(())
92}