llm_stream_req/
llm_stream_req.rs

1use std::{io::Write, time::Duration};
2
3use futures_util::StreamExt;
4use serde_json::json;
5use service_utils_rs::{error::Result, utils::Request};
6use tokio::time::sleep;
7
8#[tokio::main]
9async fn main() -> Result<()> {
10    // 创建一个新的 Request 实例
11    let mut client = Request::new();
12
13    // 设置 Ollama 流式请求 base_url
14    client.set_base_url("http://localhost:11434")?;
15    let stream_headers = vec![("Content-Type", "application/json".to_string())];
16    client.set_default_headers(stream_headers)?;
17
18    // 构造 Ollama 请求体
19    let stream_body = json!({
20        "model": "llama3.2",
21        "stream": true,
22        "messages": [
23            {"role": "user", "content": "Hello, who are you?"}
24        ]
25    });
26
27    let mut stream = client.post_stream("api/chat", &stream_body, None).await?;
28
29    println!("Streaming Response:");
30    while let Some(chunk) = stream.next().await {
31        let data = chunk?;
32        let s = std::str::from_utf8(&data).unwrap();
33
34        for line in s.lines().filter(|l| !l.trim().is_empty()) {
35            match serde_json::from_str::<serde_json::Value>(line) {
36                Ok(json) => {
37                    if let Some(content) = json["message"]["content"].as_str() {
38                        print!("{}", content);
39                        std::io::stdout().flush().unwrap();
40                    }
41                    if json["done"] == true {
42                        println!();
43                        break;
44                    }
45                }
46                Err(err) => {
47                    eprintln!("Parse error: {}", err);
48                }
49            }
50        }
51
52        // 可选:稍作等待,避免拉取过快影响显示
53        sleep(Duration::from_millis(20)).await;
54    }
55
56    Ok(())
57}
58
59// cargo run --example llm_stream_req --features request