1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// Import necessary dependencies.
use ambi::llm::providers::openai_api::config::OpenAIEngineConfig;
use ambi::{Agent, AgentState, ChatRunner, LLMEngineConfig};
use anyhow::Result;
use futures::StreamExt;
use std::io::Write;
use std::sync::Arc;
use tokio::sync::RwLock;
#[tokio::main]
async fn main() -> Result<()> {
// Step 1: Configure the cloud-based LLM engine.
// We use a mock API key and OpenAI's base URL for this streaming example.
let engine_config = LLMEngineConfig::OpenAI(OpenAIEngineConfig {
api_key: "mock-key".to_string(),
base_url: "https://api.openai.com/v1".to_string(),
model_name: "gpt-4o-mini".to_string(),
temp: 0.7,
top_p: 0.9,
});
// Step 2: Instantiate the ChatRunner. This is used to distinguish which `ChatRunner` it comes from.
let chat_runner = ChatRunner;
// Step 3: Instantiate the Agent with the provided configuration.
// Wait for the asynchronous initialization to complete.
let agent = Agent::make(engine_config).await?;
// Step 4: Initialize the agent state. The state will be stored here.
let agent_state = Arc::new(RwLock::new(AgentState::new()));
// Step 5: Initiate an asynchronous streaming chat request.
// This returns a stream that yields text chunks as they are generated by the model.
let mut res_stream = chat_runner
.chat_stream(&agent, &agent_state, "Who are you and what can you do?")
.await?;
// Step 6: Iterate over the incoming stream asynchronously.
// `next().await` fetches the next available chunk from the LLM.
while let Some(chunk) = res_stream.next().await {
if let Ok(text) = chunk {
// Step 5: Print the text chunk immediately without a newline.
print!("{}", text);
// Step 6: Flush standard output to ensure the text appears on the console in real-time.
let _ = std::io::stdout().flush();
}
}
// Step 7: Print a final newline when the stream is complete.
println!();
Ok(())
}