use multi_llm::{
unwrap_response, DefaultLLMParams, LLMConfig, LlmProvider, OllamaConfig, UnifiedLLMClient,
UnifiedLLMRequest, UnifiedMessage,
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let model = std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.2".to_string());
let config = LLMConfig {
provider: Box::new(OllamaConfig {
base_url: "http://localhost:11434".to_string(),
default_model: model.clone(),
max_context_tokens: 8192,
retry_policy: Default::default(),
}),
default_params: DefaultLLMParams::default(),
};
let client = UnifiedLLMClient::from_config(config)?;
let request = UnifiedLLMRequest::new(vec![
UnifiedMessage::system("You are a helpful assistant. Be concise."),
UnifiedMessage::user("What is the capital of France? Answer in one sentence."),
]);
println!(
"Sending request to Ollama (local) using model: {}...",
model
);
println!("Note: First request may be slow if the model isn't loaded yet.\n");
let response = unwrap_response!(client.execute_llm(request, None, None).await?);
println!("Response: {}", response.content);
if let Some(usage) = &response.usage {
println!(
"\nToken usage: {} input + {} output = {} total",
usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
);
}
Ok(())
}