use runcycles::models::*;
use runcycles::{with_cycles, CyclesClient, WithCyclesConfig};
async fn call_llm(prompt: &str, max_tokens: i64) -> (String, i64, i64) {
let reply = format!("Response to: {prompt}");
let input_tokens = prompt.len() as i64;
let output_tokens = reply.len() as i64;
let _ = max_tokens;
(reply, input_tokens, output_tokens)
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = CyclesClient::builder("my-api-key", "http://localhost:7878")
.tenant("acme")
.build();
let reply = with_cycles(
&client,
WithCyclesConfig::new(Amount::tokens(1000))
.action("llm.completion", "gpt-4o")
.subject(Subject {
tenant: Some("acme".into()),
..Default::default()
}),
|_ctx| async move {
let (reply, _inp, out) = call_llm("Hello", 1000).await;
Ok((reply, Amount::tokens(out)))
},
)
.await?;
println!("Simple: {reply}");
let reply = with_cycles(
&client,
WithCyclesConfig::new(Amount::tokens(2000))
.action("llm.completion", "gpt-4o")
.subject(Subject {
tenant: Some("acme".into()),
..Default::default()
}),
|ctx| async move {
let max_tokens = ctx.caps.as_ref().and_then(|c| c.max_tokens).unwrap_or(2000);
let (reply, inp, out) = call_llm("Write a poem", max_tokens).await;
println!("Used {inp} input + {out} output tokens");
Ok((reply, Amount::tokens(inp + out)))
},
)
.await?;
println!("With caps: {reply}");
let reply = with_cycles(
&client,
WithCyclesConfig::new(Amount::usd_microcents(50000))
.action("llm.completion", "gpt-4o")
.subject(Subject {
tenant: Some("acme".into()),
..Default::default()
})
.metrics(CyclesMetrics {
model_version: Some("gpt-4o-2024-05".into()),
..Default::default()
}),
|_ctx| async move {
let (reply, _inp, _out) = call_llm("Explain Rust", 500).await;
Ok((reply, Amount::usd_microcents(32000)))
},
)
.await?;
println!("With metrics: {reply}");
Ok(())
}