streaming_chat/
streaming_chat.rs

1//! # Example: Streaming Chat
2//!
3//! This example demonstrates how to use the streaming API of the Helios Engine to get
4//! real-time responses from the LLM. It also shows how to detect and display
5//! "thinking" tags that some models use to indicate their reasoning process.
6
7#![allow(dead_code)]
8#![allow(unused_variables)]
9use helios_engine::config::LLMConfig;
10use helios_engine::{ChatMessage, ChatSession, LLMClient};
11use std::io::{self, Write};
12
13#[tokio::main]
14async fn main() -> helios_engine::Result<()> {
15    println!("šŸš€ Helios Engine - Streaming Example");
16    println!("=====================================\n");
17
18    // Set up the LLM configuration.
19    let llm_config = LLMConfig {
20        model_name: "gpt-3.5-turbo".to_string(),
21        base_url: "https://api.openai.com/v1".to_string(),
22        api_key: std::env::var("OPENAI_API_KEY")
23            .unwrap_or_else(|_| "your-api-key-here".to_string()),
24        temperature: 0.7,
25        max_tokens: 2048,
26    };
27
28    // Create a new LLM client.
29    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
30
31    // --- Example 1: Simple streaming response ---
32    println!("Example 1: Simple Streaming Response");
33    println!("======================================\n");
34
35    let messages = vec![
36        ChatMessage::system("You are a helpful assistant."),
37        ChatMessage::user("Write a short poem about coding."),
38    ];
39
40    print!("Assistant: ");
41    io::stdout().flush()?;
42
43    // Stream the response from the model, printing each chunk as it arrives.
44    let response = client
45        .chat_stream(messages, None, None, None, None, |chunk| {
46            print!("{}", chunk);
47            io::stdout().flush().unwrap();
48        })
49        .await?;
50
51    println!("\n\n");
52
53    // --- Example 2: Interactive streaming chat ---
54    println!("Example 2: Interactive Streaming Chat");
55    println!("======================================\n");
56
57    let mut session = ChatSession::new().with_system_prompt("You are a helpful coding assistant.");
58
59    let questions = vec![
60        "What is Rust?",
61        "What are its main benefits?",
62        "Show me a simple example.",
63    ];
64
65    for question in questions {
66        println!("User: {}", question);
67        session.add_user_message(question);
68
69        print!("Assistant: ");
70        io::stdout().flush()?;
71
72        // Stream the response, maintaining the conversation context.
73        let response = client
74            .chat_stream(session.get_messages(), None, None, None, None, |chunk| {
75                print!("{}", chunk);
76                io::stdout().flush().unwrap();
77            })
78            .await?;
79
80        session.add_assistant_message(&response.content);
81        println!("\n");
82    }
83
84    // --- Example 3: Streaming with thinking tags ---
85    println!("\nExample 3: Streaming with Thinking Tags");
86    println!("=========================================\n");
87    println!("When using models that support thinking tags (like o1),");
88    println!("you can detect and display them during streaming.\n");
89
90    /// A helper struct to track and display thinking tags in streamed responses.
91    struct ThinkingTracker {
92        in_thinking: bool,
93        thinking_buffer: String,
94    }
95
96    impl ThinkingTracker {
97        /// Creates a new `ThinkingTracker`.
98        fn new() -> Self {
99            Self {
100                in_thinking: false,
101                thinking_buffer: String::new(),
102            }
103        }
104
105        /// Processes a chunk of a streamed response and returns the processed output.
106        fn process_chunk(&mut self, chunk: &str) -> String {
107            let mut output = String::new();
108            let mut chars = chunk.chars().peekable();
109
110            while let Some(c) = chars.next() {
111                if c == '<' {
112                    let remaining: String = chars.clone().collect();
113                    if remaining.starts_with("thinking>") {
114                        self.in_thinking = true;
115                        self.thinking_buffer.clear();
116                        output.push_str("\nšŸ’­ [Thinking");
117                        for _ in 0..9 {
118                            chars.next();
119                        }
120                        continue;
121                    } else if remaining.starts_with("/thinking>") {
122                        self.in_thinking = false;
123                        output.push_str("]\n");
124                        for _ in 0..10 {
125                            chars.next();
126                        }
127                        continue;
128                    }
129                }
130
131                if self.in_thinking {
132                    self.thinking_buffer.push(c);
133                    if self.thinking_buffer.len() % 3 == 0 {
134                        output.push('.');
135                    }
136                } else {
137                    output.push(c);
138                }
139            }
140
141            output
142        }
143    }
144
145    let messages = vec![ChatMessage::user(
146        "Solve this problem: What is 15 * 234 + 89?",
147    )];
148
149    let mut tracker = ThinkingTracker::new();
150    print!("Assistant: ");
151    io::stdout().flush()?;
152
153    // Stream the response, processing thinking tags as they arrive.
154    let _response = client
155        .chat_stream(messages, None, None, None, None, |chunk| {
156            let output = tracker.process_chunk(chunk);
157            print!("{}", output);
158            io::stdout().flush().unwrap();
159        })
160        .await?;
161
162    println!("\n\nāœ… Streaming examples completed!");
163    println!("\nKey benefits of streaming:");
164    println!("  • Real-time response display");
165    println!("  • Better user experience for long responses");
166    println!("  • Ability to show thinking/reasoning process");
167    println!("  • Early cancellation possible (future feature)");
168
169    Ok(())
170}