use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, uniffi::Enum)]
#[serde(rename_all = "lowercase")]
pub enum ChatRole {
System,
User,
Assistant,
}
impl std::fmt::Display for ChatRole {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ChatRole::System => write!(f, "system"),
ChatRole::User => write!(f, "user"),
ChatRole::Assistant => write!(f, "assistant"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct ChatMessage {
pub role: ChatRole,
pub content: String,
}
impl ChatMessage {
pub fn system(content: impl Into<String>) -> Self {
Self {
role: ChatRole::System,
content: content.into(),
}
}
pub fn user(content: impl Into<String>) -> Self {
Self {
role: ChatRole::User,
content: content.into(),
}
}
pub fn assistant(content: impl Into<String>) -> Self {
Self {
role: ChatRole::Assistant,
content: content.into(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct SamplingConfig {
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<u64>,
pub min_p: Option<f64>,
pub max_tokens: Option<u64>,
pub frequency_penalty: Option<f32>,
pub presence_penalty: Option<f32>,
}
impl Default for SamplingConfig {
fn default() -> Self {
Self {
temperature: Some(0.7),
top_p: Some(0.95),
top_k: None,
min_p: None,
max_tokens: Some(512),
frequency_penalty: None,
presence_penalty: None,
}
}
}
impl SamplingConfig {
pub fn deterministic() -> Self {
Self {
temperature: Some(0.0),
top_p: None,
top_k: None,
min_p: None,
max_tokens: Some(512),
frequency_penalty: None,
presence_penalty: None,
}
}
pub fn mobile() -> Self {
Self {
temperature: Some(0.7),
top_p: Some(0.95),
top_k: None,
min_p: None,
max_tokens: Some(128),
frequency_penalty: None,
presence_penalty: None,
}
}
pub fn coding() -> Self {
Self {
temperature: Some(0.2),
top_p: Some(0.95),
top_k: None,
min_p: None,
max_tokens: Some(1024),
frequency_penalty: Some(1.15),
presence_penalty: Some(0.1),
}
}
pub fn coding_mobile() -> Self {
Self {
max_tokens: Some(256),
..Self::coding()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct GgufModelConfig {
pub model_id: String,
pub files: Vec<String>,
pub tok_model_id: Option<String>,
pub display_name: String,
pub approx_memory: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct IsqModelConfig {
pub model_id: String,
pub isq_bits: u8,
pub display_name: String,
pub approx_memory: String,
}
impl IsqModelConfig {
pub fn qwen25_coder_7b_isq4() -> Self {
Self {
model_id: super::models::QWEN25_CODER_7B_INSTRUCT.into(),
isq_bits: 4,
display_name: "Qwen 2.5 Coder 7B (ISQ 4-bit)".into(),
approx_memory: "~4.5 GB (ISQ Q4K, Metal)".into(),
}
}
pub fn qwen25_coder_7b_isq8() -> Self {
Self {
model_id: super::models::QWEN25_CODER_7B_INSTRUCT.into(),
isq_bits: 8,
display_name: "Qwen 2.5 Coder 7B (ISQ 8-bit)".into(),
approx_memory: "~9 GB (ISQ Q8K, Metal)".into(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct InferenceResult {
pub text: String,
pub duration_secs: f64,
pub duration_display: String,
pub finish_reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct StreamChunk {
pub delta: String,
pub done: bool,
pub finish_reason: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, uniffi::Enum)]
#[serde(rename_all = "snake_case")]
pub enum EngineStatus {
Unloaded,
Loading,
Ready,
Generating,
Error,
}
impl std::fmt::Display for EngineStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EngineStatus::Unloaded => write!(f, "unloaded"),
EngineStatus::Loading => write!(f, "loading"),
EngineStatus::Ready => write!(f, "ready"),
EngineStatus::Generating => write!(f, "generating"),
EngineStatus::Error => write!(f, "error"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, uniffi::Record)]
pub struct EngineInfo {
pub status: EngineStatus,
pub model_name: Option<String>,
pub approx_memory: Option<String>,
pub history_length: u64,
}
#[derive(Debug, thiserror::Error, uniffi::Error)]
pub enum InferenceError {
#[error("No model loaded — call `load_model` first")]
NoModelLoaded,
#[error("Model is already loaded: {model_name}")]
AlreadyLoaded { model_name: String },
#[error("Failed to build model: {reason}")]
ModelBuild { reason: String },
#[error("Inference failed: {reason}")]
Inference { reason: String },
#[error("Model loading was cancelled")]
Cancelled,
#[error("{reason}")]
Other { reason: String },
}
pub fn format_duration(d: std::time::Duration) -> String {
let total_secs = d.as_secs_f64();
let mins = (total_secs / 60.0).floor() as u64;
let secs = total_secs - (mins as f64 * 60.0);
if mins > 0 {
format!("{}m {:.1}s", mins, secs)
} else {
format!("{:.1}s", secs)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn chat_message_constructors() {
let sys = ChatMessage::system("You are helpful.");
assert_eq!(sys.role, ChatRole::System);
assert_eq!(sys.content, "You are helpful.");
let user = ChatMessage::user("Hello");
assert_eq!(user.role, ChatRole::User);
let asst = ChatMessage::assistant("Hi there!");
assert_eq!(asst.role, ChatRole::Assistant);
}
#[test]
fn chat_role_display() {
assert_eq!(ChatRole::System.to_string(), "system");
assert_eq!(ChatRole::User.to_string(), "user");
assert_eq!(ChatRole::Assistant.to_string(), "assistant");
}
#[test]
fn chat_role_serde_rename() {
assert_eq!(format!("{}", ChatRole::System), "system");
assert_eq!(format!("{}", ChatRole::User), "user");
assert_eq!(format!("{}", ChatRole::Assistant), "assistant");
}
#[test]
fn sampling_config_defaults() {
let cfg = SamplingConfig::default();
assert_eq!(cfg.temperature, Some(0.7));
assert_eq!(cfg.top_p, Some(0.95));
assert_eq!(cfg.max_tokens, Some(512));
}
#[test]
fn sampling_config_deterministic() {
let cfg = SamplingConfig::deterministic();
assert_eq!(cfg.temperature, Some(0.0));
assert!(cfg.top_p.is_none());
}
#[test]
fn sampling_config_mobile() {
let cfg = SamplingConfig::mobile();
assert_eq!(cfg.max_tokens, Some(128));
}
#[test]
fn engine_status_display() {
assert_eq!(EngineStatus::Ready.to_string(), "ready");
assert_eq!(EngineStatus::Generating.to_string(), "generating");
}
#[test]
fn format_duration_under_minute() {
let d = std::time::Duration::from_secs_f64(4.567);
assert_eq!(format_duration(d), "4.6s");
}
#[test]
fn format_duration_over_minute() {
let d = std::time::Duration::from_secs_f64(125.3);
assert_eq!(format_duration(d), "2m 5.3s");
}
#[test]
fn inference_error_display() {
let err = InferenceError::NoModelLoaded;
assert_eq!(err.to_string(), "No model loaded — call `load_model` first");
let err = InferenceError::ModelBuild {
reason: "out of memory".into(),
};
assert_eq!(err.to_string(), "Failed to build model: out of memory");
let err = InferenceError::AlreadyLoaded {
model_name: "Qwen 2.5".into(),
};
assert_eq!(err.to_string(), "Model is already loaded: Qwen 2.5");
}
#[test]
fn engine_info_history_is_u64() {
let info = EngineInfo {
status: EngineStatus::Ready,
model_name: Some("Test".into()),
approx_memory: None,
history_length: 42,
};
assert_eq!(info.history_length, 42u64);
}
}