Struct LLMClient

Source

pub struct LLMClient { /* private fields */ }

Implementations§

Source §

impl LLMClient

Source

pub async fn new(provider_type: LLMProviderType) -> Result<Self>

Examples found in repository ?

examples/direct_llm_usage.rs (line 62)

50async fn simple_call() -> helios_engine::Result<()> {
51    // Create configuration
52    let llm_config = LLMConfig {
53        model_name: "gpt-3.5-turbo".to_string(),
54        base_url: "https://api.openai.com/v1".to_string(),
55        api_key: std::env::var("OPENAI_API_KEY")
56            .unwrap_or_else(|_| "your-api-key-here".to_string()),
57        temperature: 0.7,
58        max_tokens: 2048,
59    };
60
61    // Create client
62    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
63
64    // Prepare messages
65    let messages = vec![
66        ChatMessage::system("You are a helpful assistant that gives concise answers."),
67        ChatMessage::user("What is the capital of France? Answer in one sentence."),
68    ];
69
70    // Make the call
71    println!("Sending request...");
72    match client.chat(messages, None).await {
73        Ok(response) => {
74            println!("✓ Response: {}", response.content);
75        }
76        Err(e) => {
77            println!("✗ Error: {}", e);
78            println!("  (Make sure to set OPENAI_API_KEY environment variable)");
79        }
80    }
81
82    Ok(())
83}
84
85/// Example 2: Multi-turn conversation with context
86async fn conversation_with_context() -> helios_engine::Result<()> {
87    let llm_config = LLMConfig {
88        model_name: "gpt-3.5-turbo".to_string(),
89        base_url: "https://api.openai.com/v1".to_string(),
90        api_key: std::env::var("OPENAI_API_KEY")
91            .unwrap_or_else(|_| "your-api-key-here".to_string()),
92        temperature: 0.7,
93        max_tokens: 2048,
94    };
95
96    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
97
98    // Use ChatSession to manage conversation
99    let mut session = ChatSession::new()
100        .with_system_prompt("You are a helpful math tutor. Give brief, clear explanations.");
101
102    // First turn
103    println!("Turn 1:");
104    session.add_user_message("What is 15 * 23?");
105    print!("  User: What is 15 * 23?\n  ");
106
107    match client.chat(session.get_messages(), None).await {
108        Ok(response) => {
109            session.add_assistant_message(&response.content);
110            println!("Assistant: {}", response.content);
111        }
112        Err(e) => {
113            println!("Error: {}", e);
114            return Ok(());
115        }
116    }
117
118    // Second turn (with context from first turn)
119    println!("\nTurn 2:");
120    session.add_user_message("Now divide that by 5.");
121    print!("  User: Now divide that by 5.\n  ");
122
123    match client.chat(session.get_messages(), None).await {
124        Ok(response) => {
125            session.add_assistant_message(&response.content);
126            println!("Assistant: {}", response.content);
127        }
128        Err(e) => {
129            println!("Error: {}", e);
130        }
131    }
132
133    println!("\n💡 Notice how the assistant remembered the result from the first calculation!");
134
135    Ok(())
136}
137
138/// Example 3: Information about using different providers
139fn different_providers_info() {
140    println!("You can use Helios with various LLM providers:\n");
141
142    println!("🔵 OpenAI:");
143    println!("   LLMConfig {{");
144    println!("       model_name: \"gpt-4\".to_string(),");
145    println!("       base_url: \"https://api.openai.com/v1\".to_string(),");
146    println!("       api_key: env::var(\"OPENAI_API_KEY\").unwrap(),");
147    println!("       temperature: 0.7,");
148    println!("       max_tokens: 2048,");
149    println!("   }}\n");
150
151    println!("🟢 Local LM Studio:");
152    println!("   LLMConfig {{");
153    println!("       model_name: \"local-model\".to_string(),");
154    println!("       base_url: \"http://localhost:1234/v1\".to_string(),");
155    println!("       api_key: \"not-needed\".to_string(),");
156    println!("       temperature: 0.7,");
157    println!("       max_tokens: 2048,");
158    println!("   }}\n");
159
160    println!("🦙 Ollama:");
161    println!("   LLMConfig {{");
162    println!("       model_name: \"llama2\".to_string(),");
163    println!("       base_url: \"http://localhost:11434/v1\".to_string(),");
164    println!("       api_key: \"not-needed\".to_string(),");
165    println!("       temperature: 0.7,");
166    println!("       max_tokens: 2048,");
167    println!("   }}\n");
168
169    println!("🔷 Azure OpenAI:");
170    println!("   LLMConfig {{");
171    println!("       model_name: \"gpt-35-turbo\".to_string(),");
172    println!("       base_url: \"https://your-resource.openai.azure.com/...\".to_string(),");
173    println!("       api_key: env::var(\"AZURE_OPENAI_KEY\").unwrap(),");
174    println!("       temperature: 0.7,");
175    println!("       max_tokens: 2048,");
176    println!("   }}\n");
177}
178
179/// Example 4: Interactive chat session
180async fn interactive_chat() -> helios_engine::Result<()> {
181    let llm_config = LLMConfig {
182        model_name: "gpt-3.5-turbo".to_string(),
183        base_url: "https://api.openai.com/v1".to_string(),
184        api_key: std::env::var("OPENAI_API_KEY")
185            .unwrap_or_else(|_| "your-api-key-here".to_string()),
186        temperature: 0.7,
187        max_tokens: 2048,
188    };
189
190    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
191    let mut session =
192        ChatSession::new().with_system_prompt("You are a friendly and helpful AI assistant.");
193
194    println!("Chat started! Type 'exit' or 'quit' to end the conversation.\n");
195
196    loop {
197        print!("You: ");
198        io::stdout().flush()?;
199
200        let mut input = String::new();
201        io::stdin().read_line(&mut input)?;
202        let input = input.trim();
203
204        if input.is_empty() {
205            continue;
206        }
207
208        if input == "exit" || input == "quit" {
209            println!("\n👋 Goodbye!");
210            break;
211        }
212
213        // Special commands
214        if input == "clear" {
215            session.clear();
216            println!("🧹 Conversation cleared!\n");
217            continue;
218        }
219
220        if input == "history" {
221            println!("\n📜 Conversation history:");
222            for (i, msg) in session.messages.iter().enumerate() {
223                println!("  {}. {:?}: {}", i + 1, msg.role, msg.content);
224            }
225            println!();
226            continue;
227        }
228
229        session.add_user_message(input);
230
231        print!("Assistant: ");
232        io::stdout().flush()?;
233
234        match client.chat(session.get_messages(), None).await {
235            Ok(response) => {
236                session.add_assistant_message(&response.content);
237                println!("{}\n", response.content);
238            }
239            Err(e) => {
240                println!("\n❌ Error: {}", e);
241                println!("   (Make sure OPENAI_API_KEY is set correctly)\n");
242                // Remove the last user message since it failed
243                session.messages.pop();
244            }
245        }
246    }
247
248    Ok(())
249}

More examples

Hide additional examples

examples/local_streaming.rs (line 29)

12async fn main() -> helios_engine::Result<()> {
13    println!("🚀 Helios Engine - Local Model Streaming Example");
14    println!("=================================================\n");
15
16    // Configure local model
17    let local_config = LocalConfig {
18        huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
19        model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
20        context_size: 2048,
21        temperature: 0.7,
22        max_tokens: 512,
23    };
24
25    println!("📥 Loading local model...");
26    println!("   Repository: {}", local_config.huggingface_repo);
27    println!("   Model: {}\n", local_config.model_file);
28
29    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
30
31    println!("✓ Model loaded successfully!\n");
32
33    // Example 1: Simple streaming
34    println!("Example 1: Simple Streaming Response");
35    println!("======================================\n");
36
37    let messages = vec![
38        ChatMessage::system("You are a helpful coding assistant."),
39        ChatMessage::user("Write a short explanation of what Rust is."),
40    ];
41
42    print!("Assistant: ");
43    io::stdout().flush()?;
44
45    let _response = client
46        .chat_stream(messages, None, |chunk| {
47            print!("{}", chunk);
48            io::stdout().flush().unwrap();
49        })
50        .await?;
51
52    println!("\n");
53
54    // Example 2: Multiple questions with streaming
55    println!("Example 2: Interactive Streaming");
56    println!("==================================\n");
57
58    let questions = vec![
59        "What are the main benefits of Rust?",
60        "Give me a simple code example.",
61    ];
62
63    let mut session = helios_engine::ChatSession::new()
64        .with_system_prompt("You are a helpful programming assistant.");
65
66    for question in questions {
67        println!("User: {}", question);
68        session.add_user_message(question);
69
70        print!("Assistant: ");
71        io::stdout().flush()?;
72
73        let response = client
74            .chat_stream(session.get_messages(), None, |chunk| {
75                print!("{}", chunk);
76                io::stdout().flush().unwrap();
77            })
78            .await?;
79
80        session.add_assistant_message(&response.content);
81        println!("\n");
82    }
83
84    println!("✅ Local model streaming completed successfully!");
85    println!("\n💡 Features:");
86    println!("  • Token-by-token streaming for local models");
87    println!("  • Real-time response display (no more instant full responses)");
88    println!("  • Same streaming API for both local and remote models");
89    println!("  • Improved user experience with progressive output");
90
91    Ok(())
92}

examples/streaming_chat.rs (line 26)

12async fn main() -> helios_engine::Result<()> {
13    println!("🚀 Helios Engine - Streaming Example");
14    println!("=====================================\n");
15
16    // Setup LLM configuration
17    let llm_config = LLMConfig {
18        model_name: "gpt-3.5-turbo".to_string(),
19        base_url: "https://api.openai.com/v1".to_string(),
20        api_key: std::env::var("OPENAI_API_KEY")
21            .unwrap_or_else(|_| "your-api-key-here".to_string()),
22        temperature: 0.7,
23        max_tokens: 2048,
24    };
25
26    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
27
28    println!("Example 1: Simple Streaming Response");
29    println!("======================================\n");
30
31    let messages = vec![
32        ChatMessage::system("You are a helpful assistant."),
33        ChatMessage::user("Write a short poem about coding."),
34    ];
35
36    print!("Assistant: ");
37    io::stdout().flush()?;
38
39    let response = client
40        .chat_stream(messages, None, |chunk| {
41            print!("{}", chunk);
42            io::stdout().flush().unwrap();
43        })
44        .await?;
45
46    println!("\n\n");
47
48    println!("Example 2: Interactive Streaming Chat");
49    println!("======================================\n");
50
51    let mut session = ChatSession::new().with_system_prompt("You are a helpful coding assistant.");
52
53    let questions = vec![
54        "What is Rust?",
55        "What are its main benefits?",
56        "Show me a simple example.",
57    ];
58
59    for question in questions {
60        println!("User: {}", question);
61        session.add_user_message(question);
62
63        print!("Assistant: ");
64        io::stdout().flush()?;
65
66        let response = client
67            .chat_stream(session.get_messages(), None, |chunk| {
68                print!("{}", chunk);
69                io::stdout().flush().unwrap();
70            })
71            .await?;
72
73        session.add_assistant_message(&response.content);
74        println!("\n");
75    }
76
77    println!("\nExample 3: Streaming with Thinking Tags");
78    println!("=========================================\n");
79    println!("When using models that support thinking tags (like o1),");
80    println!("you can detect and display them during streaming.\n");
81
82    struct ThinkingTracker {
83        in_thinking: bool,
84        thinking_buffer: String,
85    }
86
87    impl ThinkingTracker {
88        fn new() -> Self {
89            Self {
90                in_thinking: false,
91                thinking_buffer: String::new(),
92            }
93        }
94
95        fn process_chunk(&mut self, chunk: &str) -> String {
96            let mut output = String::new();
97            let mut chars = chunk.chars().peekable();
98
99            while let Some(c) = chars.next() {
100                if c == '<' {
101                    let remaining: String = chars.clone().collect();
102                    if remaining.starts_with("thinking>") {
103                        self.in_thinking = true;
104                        self.thinking_buffer.clear();
105                        output.push_str("\n💭 [Thinking");
106                        for _ in 0..9 {
107                            chars.next();
108                        }
109                        continue;
110                    } else if remaining.starts_with("/thinking>") {
111                        self.in_thinking = false;
112                        output.push_str("]\n");
113                        for _ in 0..10 {
114                            chars.next();
115                        }
116                        continue;
117                    }
118                }
119
120                if self.in_thinking {
121                    self.thinking_buffer.push(c);
122                    if self.thinking_buffer.len() % 3 == 0 {
123                        output.push('.');
124                    }
125                } else {
126                    output.push(c);
127                }
128            }
129
130            output
131        }
132    }
133
134    let messages = vec![ChatMessage::user(
135        "Solve this problem: What is 15 * 234 + 89?",
136    )];
137
138    let mut tracker = ThinkingTracker::new();
139    print!("Assistant: ");
140    io::stdout().flush()?;
141
142    let _response = client
143        .chat_stream(messages, None, |chunk| {
144            let output = tracker.process_chunk(chunk);
145            print!("{}", output);
146            io::stdout().flush().unwrap();
147        })
148        .await?;
149
150    println!("\n\n✅ Streaming examples completed!");
151    println!("\nKey benefits of streaming:");
152    println!("  • Real-time response display");
153    println!("  • Better user experience for long responses");
154    println!("  • Ability to show thinking/reasoning process");
155    println!("  • Early cancellation possible (future feature)");
156
157    Ok(())
158}

Source

pub fn provider_type(&self) -> &LLMProviderType

Source §

impl LLMClient

Source

pub async fn chat( &self, messages: Vec<ChatMessage>, tools: Option<Vec<ToolDefinition>>, ) -> Result<ChatMessage>

Examples found in repository ?

examples/direct_llm_usage.rs (line 72)

50async fn simple_call() -> helios_engine::Result<()> {
51    // Create configuration
52    let llm_config = LLMConfig {
53        model_name: "gpt-3.5-turbo".to_string(),
54        base_url: "https://api.openai.com/v1".to_string(),
55        api_key: std::env::var("OPENAI_API_KEY")
56            .unwrap_or_else(|_| "your-api-key-here".to_string()),
57        temperature: 0.7,
58        max_tokens: 2048,
59    };
60
61    // Create client
62    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
63
64    // Prepare messages
65    let messages = vec![
66        ChatMessage::system("You are a helpful assistant that gives concise answers."),
67        ChatMessage::user("What is the capital of France? Answer in one sentence."),
68    ];
69
70    // Make the call
71    println!("Sending request...");
72    match client.chat(messages, None).await {
73        Ok(response) => {
74            println!("✓ Response: {}", response.content);
75        }
76        Err(e) => {
77            println!("✗ Error: {}", e);
78            println!("  (Make sure to set OPENAI_API_KEY environment variable)");
79        }
80    }
81
82    Ok(())
83}
84
85/// Example 2: Multi-turn conversation with context
86async fn conversation_with_context() -> helios_engine::Result<()> {
87    let llm_config = LLMConfig {
88        model_name: "gpt-3.5-turbo".to_string(),
89        base_url: "https://api.openai.com/v1".to_string(),
90        api_key: std::env::var("OPENAI_API_KEY")
91            .unwrap_or_else(|_| "your-api-key-here".to_string()),
92        temperature: 0.7,
93        max_tokens: 2048,
94    };
95
96    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
97
98    // Use ChatSession to manage conversation
99    let mut session = ChatSession::new()
100        .with_system_prompt("You are a helpful math tutor. Give brief, clear explanations.");
101
102    // First turn
103    println!("Turn 1:");
104    session.add_user_message("What is 15 * 23?");
105    print!("  User: What is 15 * 23?\n  ");
106
107    match client.chat(session.get_messages(), None).await {
108        Ok(response) => {
109            session.add_assistant_message(&response.content);
110            println!("Assistant: {}", response.content);
111        }
112        Err(e) => {
113            println!("Error: {}", e);
114            return Ok(());
115        }
116    }
117
118    // Second turn (with context from first turn)
119    println!("\nTurn 2:");
120    session.add_user_message("Now divide that by 5.");
121    print!("  User: Now divide that by 5.\n  ");
122
123    match client.chat(session.get_messages(), None).await {
124        Ok(response) => {
125            session.add_assistant_message(&response.content);
126            println!("Assistant: {}", response.content);
127        }
128        Err(e) => {
129            println!("Error: {}", e);
130        }
131    }
132
133    println!("\n💡 Notice how the assistant remembered the result from the first calculation!");
134
135    Ok(())
136}
137
138/// Example 3: Information about using different providers
139fn different_providers_info() {
140    println!("You can use Helios with various LLM providers:\n");
141
142    println!("🔵 OpenAI:");
143    println!("   LLMConfig {{");
144    println!("       model_name: \"gpt-4\".to_string(),");
145    println!("       base_url: \"https://api.openai.com/v1\".to_string(),");
146    println!("       api_key: env::var(\"OPENAI_API_KEY\").unwrap(),");
147    println!("       temperature: 0.7,");
148    println!("       max_tokens: 2048,");
149    println!("   }}\n");
150
151    println!("🟢 Local LM Studio:");
152    println!("   LLMConfig {{");
153    println!("       model_name: \"local-model\".to_string(),");
154    println!("       base_url: \"http://localhost:1234/v1\".to_string(),");
155    println!("       api_key: \"not-needed\".to_string(),");
156    println!("       temperature: 0.7,");
157    println!("       max_tokens: 2048,");
158    println!("   }}\n");
159
160    println!("🦙 Ollama:");
161    println!("   LLMConfig {{");
162    println!("       model_name: \"llama2\".to_string(),");
163    println!("       base_url: \"http://localhost:11434/v1\".to_string(),");
164    println!("       api_key: \"not-needed\".to_string(),");
165    println!("       temperature: 0.7,");
166    println!("       max_tokens: 2048,");
167    println!("   }}\n");
168
169    println!("🔷 Azure OpenAI:");
170    println!("   LLMConfig {{");
171    println!("       model_name: \"gpt-35-turbo\".to_string(),");
172    println!("       base_url: \"https://your-resource.openai.azure.com/...\".to_string(),");
173    println!("       api_key: env::var(\"AZURE_OPENAI_KEY\").unwrap(),");
174    println!("       temperature: 0.7,");
175    println!("       max_tokens: 2048,");
176    println!("   }}\n");
177}
178
179/// Example 4: Interactive chat session
180async fn interactive_chat() -> helios_engine::Result<()> {
181    let llm_config = LLMConfig {
182        model_name: "gpt-3.5-turbo".to_string(),
183        base_url: "https://api.openai.com/v1".to_string(),
184        api_key: std::env::var("OPENAI_API_KEY")
185            .unwrap_or_else(|_| "your-api-key-here".to_string()),
186        temperature: 0.7,
187        max_tokens: 2048,
188    };
189
190    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
191    let mut session =
192        ChatSession::new().with_system_prompt("You are a friendly and helpful AI assistant.");
193
194    println!("Chat started! Type 'exit' or 'quit' to end the conversation.\n");
195
196    loop {
197        print!("You: ");
198        io::stdout().flush()?;
199
200        let mut input = String::new();
201        io::stdin().read_line(&mut input)?;
202        let input = input.trim();
203
204        if input.is_empty() {
205            continue;
206        }
207
208        if input == "exit" || input == "quit" {
209            println!("\n👋 Goodbye!");
210            break;
211        }
212
213        // Special commands
214        if input == "clear" {
215            session.clear();
216            println!("🧹 Conversation cleared!\n");
217            continue;
218        }
219
220        if input == "history" {
221            println!("\n📜 Conversation history:");
222            for (i, msg) in session.messages.iter().enumerate() {
223                println!("  {}. {:?}: {}", i + 1, msg.role, msg.content);
224            }
225            println!();
226            continue;
227        }
228
229        session.add_user_message(input);
230
231        print!("Assistant: ");
232        io::stdout().flush()?;
233
234        match client.chat(session.get_messages(), None).await {
235            Ok(response) => {
236                session.add_assistant_message(&response.content);
237                println!("{}\n", response.content);
238            }
239            Err(e) => {
240                println!("\n❌ Error: {}", e);
241                println!("   (Make sure OPENAI_API_KEY is set correctly)\n");
242                // Remove the last user message since it failed
243                session.messages.pop();
244            }
245        }
246    }
247
248    Ok(())
249}

Source

pub async fn chat_stream<F>( &self, messages: Vec<ChatMessage>, tools: Option<Vec<ToolDefinition>>, on_chunk: F, ) -> Result<ChatMessage>
where F: FnMut(&str) + Send,

Examples found in repository ?

examples/local_streaming.rs (lines 46-49)

12async fn main() -> helios_engine::Result<()> {
13    println!("🚀 Helios Engine - Local Model Streaming Example");
14    println!("=================================================\n");
15
16    // Configure local model
17    let local_config = LocalConfig {
18        huggingface_repo: "unsloth/Qwen2.5-0.5B-Instruct-GGUF".to_string(),
19        model_file: "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf".to_string(),
20        context_size: 2048,
21        temperature: 0.7,
22        max_tokens: 512,
23    };
24
25    println!("📥 Loading local model...");
26    println!("   Repository: {}", local_config.huggingface_repo);
27    println!("   Model: {}\n", local_config.model_file);
28
29    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Local(local_config)).await?;
30
31    println!("✓ Model loaded successfully!\n");
32
33    // Example 1: Simple streaming
34    println!("Example 1: Simple Streaming Response");
35    println!("======================================\n");
36
37    let messages = vec![
38        ChatMessage::system("You are a helpful coding assistant."),
39        ChatMessage::user("Write a short explanation of what Rust is."),
40    ];
41
42    print!("Assistant: ");
43    io::stdout().flush()?;
44
45    let _response = client
46        .chat_stream(messages, None, |chunk| {
47            print!("{}", chunk);
48            io::stdout().flush().unwrap();
49        })
50        .await?;
51
52    println!("\n");
53
54    // Example 2: Multiple questions with streaming
55    println!("Example 2: Interactive Streaming");
56    println!("==================================\n");
57
58    let questions = vec![
59        "What are the main benefits of Rust?",
60        "Give me a simple code example.",
61    ];
62
63    let mut session = helios_engine::ChatSession::new()
64        .with_system_prompt("You are a helpful programming assistant.");
65
66    for question in questions {
67        println!("User: {}", question);
68        session.add_user_message(question);
69
70        print!("Assistant: ");
71        io::stdout().flush()?;
72
73        let response = client
74            .chat_stream(session.get_messages(), None, |chunk| {
75                print!("{}", chunk);
76                io::stdout().flush().unwrap();
77            })
78            .await?;
79
80        session.add_assistant_message(&response.content);
81        println!("\n");
82    }
83
84    println!("✅ Local model streaming completed successfully!");
85    println!("\n💡 Features:");
86    println!("  • Token-by-token streaming for local models");
87    println!("  • Real-time response display (no more instant full responses)");
88    println!("  • Same streaming API for both local and remote models");
89    println!("  • Improved user experience with progressive output");
90
91    Ok(())
92}

More examples

Hide additional examples

examples/streaming_chat.rs (lines 40-43)

12async fn main() -> helios_engine::Result<()> {
13    println!("🚀 Helios Engine - Streaming Example");
14    println!("=====================================\n");
15
16    // Setup LLM configuration
17    let llm_config = LLMConfig {
18        model_name: "gpt-3.5-turbo".to_string(),
19        base_url: "https://api.openai.com/v1".to_string(),
20        api_key: std::env::var("OPENAI_API_KEY")
21            .unwrap_or_else(|_| "your-api-key-here".to_string()),
22        temperature: 0.7,
23        max_tokens: 2048,
24    };
25
26    let client = LLMClient::new(helios_engine::llm::LLMProviderType::Remote(llm_config)).await?;
27
28    println!("Example 1: Simple Streaming Response");
29    println!("======================================\n");
30
31    let messages = vec![
32        ChatMessage::system("You are a helpful assistant."),
33        ChatMessage::user("Write a short poem about coding."),
34    ];
35
36    print!("Assistant: ");
37    io::stdout().flush()?;
38
39    let response = client
40        .chat_stream(messages, None, |chunk| {
41            print!("{}", chunk);
42            io::stdout().flush().unwrap();
43        })
44        .await?;
45
46    println!("\n\n");
47
48    println!("Example 2: Interactive Streaming Chat");
49    println!("======================================\n");
50
51    let mut session = ChatSession::new().with_system_prompt("You are a helpful coding assistant.");
52
53    let questions = vec![
54        "What is Rust?",
55        "What are its main benefits?",
56        "Show me a simple example.",
57    ];
58
59    for question in questions {
60        println!("User: {}", question);
61        session.add_user_message(question);
62
63        print!("Assistant: ");
64        io::stdout().flush()?;
65
66        let response = client
67            .chat_stream(session.get_messages(), None, |chunk| {
68                print!("{}", chunk);
69                io::stdout().flush().unwrap();
70            })
71            .await?;
72
73        session.add_assistant_message(&response.content);
74        println!("\n");
75    }
76
77    println!("\nExample 3: Streaming with Thinking Tags");
78    println!("=========================================\n");
79    println!("When using models that support thinking tags (like o1),");
80    println!("you can detect and display them during streaming.\n");
81
82    struct ThinkingTracker {
83        in_thinking: bool,
84        thinking_buffer: String,
85    }
86
87    impl ThinkingTracker {
88        fn new() -> Self {
89            Self {
90                in_thinking: false,
91                thinking_buffer: String::new(),
92            }
93        }
94
95        fn process_chunk(&mut self, chunk: &str) -> String {
96            let mut output = String::new();
97            let mut chars = chunk.chars().peekable();
98
99            while let Some(c) = chars.next() {
100                if c == '<' {
101                    let remaining: String = chars.clone().collect();
102                    if remaining.starts_with("thinking>") {
103                        self.in_thinking = true;
104                        self.thinking_buffer.clear();
105                        output.push_str("\n💭 [Thinking");
106                        for _ in 0..9 {
107                            chars.next();
108                        }
109                        continue;
110                    } else if remaining.starts_with("/thinking>") {
111                        self.in_thinking = false;
112                        output.push_str("]\n");
113                        for _ in 0..10 {
114                            chars.next();
115                        }
116                        continue;
117                    }
118                }
119
120                if self.in_thinking {
121                    self.thinking_buffer.push(c);
122                    if self.thinking_buffer.len() % 3 == 0 {
123                        output.push('.');
124                    }
125                } else {
126                    output.push(c);
127                }
128            }
129
130            output
131        }
132    }
133
134    let messages = vec![ChatMessage::user(
135        "Solve this problem: What is 15 * 234 + 89?",
136    )];
137
138    let mut tracker = ThinkingTracker::new();
139    print!("Assistant: ");
140    io::stdout().flush()?;
141
142    let _response = client
143        .chat_stream(messages, None, |chunk| {
144            let output = tracker.process_chunk(chunk);
145            print!("{}", output);
146            io::stdout().flush().unwrap();
147        })
148        .await?;
149
150    println!("\n\n✅ Streaming examples completed!");
151    println!("\nKey benefits of streaming:");
152    println!("  • Real-time response display");
153    println!("  • Better user experience for long responses");
154    println!("  • Ability to show thinking/reasoning process");
155    println!("  • Early cancellation possible (future feature)");
156
157    Ok(())
158}