#![allow(dead_code)]
use super::{ChatProvider, ProviderType};
use crate::models::{ChatMessage, ChatSession};
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
pub struct LlamaFileProvider {
endpoint: String,
available: bool,
model_name: Option<String>,
}
#[derive(Debug, Deserialize)]
struct LlamaFileModel {
id: String,
object: String,
owned_by: Option<String>,
}
#[derive(Debug, Serialize)]
struct LlamaFileChatRequest {
model: Option<String>,
messages: Vec<LlamaFileMessage>,
temperature: Option<f32>,
max_tokens: Option<i32>,
stream: Option<bool>,
stop: Option<Vec<String>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
struct LlamaFileMessage {
role: String,
content: String,
}
#[derive(Debug, Deserialize)]
struct LlamaFileChatResponse {
id: Option<String>,
object: String,
created: i64,
model: Option<String>,
choices: Vec<LlamaFileChoice>,
usage: Option<LlamaFileUsage>,
}
#[derive(Debug, Deserialize)]
struct LlamaFileChoice {
index: i32,
message: LlamaFileMessage,
finish_reason: Option<String>,
}
#[derive(Debug, Deserialize)]
struct LlamaFileUsage {
prompt_tokens: i32,
completion_tokens: i32,
total_tokens: i32,
}
#[derive(Debug, Serialize)]
struct LlamaFileCompletionRequest {
prompt: String,
temperature: Option<f32>,
n_predict: Option<i32>,
stop: Option<Vec<String>>,
}
#[derive(Debug, Deserialize)]
struct LlamaFileCompletionResponse {
content: String,
model: Option<String>,
prompt: Option<String>,
stop: bool,
tokens_predicted: Option<i32>,
}
#[derive(Debug, Deserialize)]
struct LlamaFileHealth {
status: String,
slots_idle: Option<i32>,
slots_processing: Option<i32>,
}
impl LlamaFileProvider {
pub fn discover() -> Option<Self> {
let endpoints = Self::possible_endpoints();
for endpoint in endpoints {
if let Some(model_name) = Self::check_endpoint(&endpoint) {
return Some(Self {
endpoint,
available: true,
model_name: Some(model_name),
});
}
}
Some(Self {
endpoint: "http://localhost:8080".to_string(),
available: false,
model_name: None,
})
}
pub fn with_endpoint(endpoint: &str) -> Self {
let model_name = Self::check_endpoint(endpoint);
Self {
endpoint: endpoint.to_string(),
available: model_name.is_some(),
model_name,
}
}
fn possible_endpoints() -> Vec<String> {
let mut endpoints = Vec::new();
if let Ok(endpoint) = std::env::var("LLAMAFILE_API") {
endpoints.push(endpoint);
}
endpoints.push("http://localhost:8080".to_string()); endpoints.push("http://127.0.0.1:8080".to_string());
endpoints.push("http://localhost:8081".to_string()); endpoints.push("http://localhost:8000".to_string());
endpoints
}
fn check_endpoint(endpoint: &str) -> Option<String> {
let models_url = format!("{}/v1/models", endpoint);
if let Ok(response) = ureq::get(&models_url)
.timeout(std::time::Duration::from_secs(2))
.call()
{
if response.status() == 200 {
if let Ok(json) = response.into_json::<serde_json::Value>() {
if let Some(data) = json.get("data").and_then(|d| d.as_array()) {
if let Some(model) = data.first() {
if let Some(id) = model.get("id").and_then(|id| id.as_str()) {
return Some(id.to_string());
}
}
}
}
}
}
let health_url = format!("{}/health", endpoint);
if let Ok(response) = ureq::get(&health_url)
.timeout(std::time::Duration::from_secs(2))
.call()
{
if response.status() == 200 {
return Some("llamafile".to_string());
}
}
None
}
pub fn model_name(&self) -> Option<&str> {
self.model_name.as_deref()
}
pub fn health(&self) -> Result<LlamaFileHealth> {
let url = format!("{}/health", self.endpoint);
let response: LlamaFileHealth = ureq::get(&url).call()?.into_json()?;
Ok(response)
}
pub fn chat(&self, messages: &[ChatMessage]) -> Result<String> {
if !self.available {
return Err(anyhow!("LlamaFile is not running"));
}
let api_messages: Vec<LlamaFileMessage> = messages
.iter()
.map(|m| LlamaFileMessage {
role: m.role.clone(),
content: m.content.clone(),
})
.collect();
let request = LlamaFileChatRequest {
model: self.model_name.clone(),
messages: api_messages,
temperature: Some(0.7),
max_tokens: Some(2048),
stream: Some(false),
stop: None,
};
let url = format!("{}/v1/chat/completions", self.endpoint);
let response: LlamaFileChatResponse = ureq::post(&url)
.set("Content-Type", "application/json")
.send_json(&request)?
.into_json()?;
response
.choices
.first()
.map(|c| c.message.content.clone())
.ok_or_else(|| anyhow!("No response from LlamaFile"))
}
pub fn complete(&self, prompt: &str) -> Result<String> {
if !self.available {
return Err(anyhow!("LlamaFile is not running"));
}
let request = LlamaFileCompletionRequest {
prompt: prompt.to_string(),
temperature: Some(0.7),
n_predict: Some(2048),
stop: None,
};
let url = format!("{}/completion", self.endpoint);
let response: LlamaFileCompletionResponse = ureq::post(&url)
.set("Content-Type", "application/json")
.send_json(&request)?
.into_json()?;
Ok(response.content)
}
}
impl ChatProvider for LlamaFileProvider {
fn provider_type(&self) -> ProviderType {
ProviderType::LlamaFile
}
fn name(&self) -> &str {
"LlamaFile"
}
fn is_available(&self) -> bool {
self.available
}
fn sessions_path(&self) -> Option<PathBuf> {
None
}
fn list_sessions(&self) -> Result<Vec<ChatSession>> {
Ok(Vec::new())
}
fn import_session(&self, _session_id: &str) -> Result<ChatSession> {
Err(anyhow!("LlamaFile does not store sessions"))
}
fn export_session(&self, _session: &ChatSession) -> Result<()> {
Err(anyhow!("LlamaFile does not support session export"))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_discover() {
let provider = LlamaFileProvider::discover();
println!("LlamaFile discovered: {:?}", provider.is_some());
if let Some(p) = provider {
println!("Available: {}", p.available);
println!("Model: {:?}", p.model_name);
}
}
}