velocia 0.3.1 - Docs.rs

use serde::{Deserialize, Serialize};

/// External model provider (e.g. a LiteLLM proxy).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProviderConfig {
    pub name: String,
    pub endpoint: Option<String>,
}

/// Exponential back-off retry configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HttpRetryOptions {
    /// Initial delay in seconds between retries.
    #[serde(default = "HttpRetryOptions::default_initial_delay")]
    pub initial_delay: u32,
    /// Total number of attempts (including the first).
    #[serde(default = "HttpRetryOptions::default_attempts")]
    pub attempts: u32,
    /// Base for exponential back-off (delay multiplied by `exp_base^n`).
    #[serde(default = "HttpRetryOptions::default_exp_base")]
    pub exp_base: u32,
}

impl HttpRetryOptions {
    fn default_initial_delay() -> u32 { 1 }
    fn default_attempts() -> u32 { 5 }
    fn default_exp_base() -> u32 { 7 }
}

impl Default for HttpRetryOptions {
    fn default() -> Self {
        Self {
            initial_delay: Self::default_initial_delay(),
            attempts: Self::default_attempts(),
            exp_base: Self::default_exp_base(),
        }
    }
}

/// LLM generation hyperparameters.  All fields are optional; unset fields
/// delegate to the model's own defaults.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ModelHyperparameters {
    /// Sampling temperature (0.0–2.0).  Lower = more deterministic.
    pub temperature: Option<f32>,
    /// Nucleus-sampling probability cutoff (0.0–1.0).
    pub top_p: Option<f32>,
    /// Top-K vocabulary restriction (0 = no limit).
    pub top_k: Option<u32>,
    /// Alias for `max_output_tokens`.
    pub max_tokens: Option<u32>,
    /// Maximum tokens to generate.
    pub max_output_tokens: Option<u32>,
    /// Presence penalty (-2.0–2.0).
    pub presence_penalty: Option<f32>,
    /// Frequency penalty (-2.0–2.0).
    pub frequency_penalty: Option<f32>,
    /// Stop sequences that terminate generation.
    pub stop_sequences: Option<Vec<String>>,
    /// Reproducibility seed.
    pub seed: Option<u64>,
    /// HTTP-level retry configuration.
    pub retry_options: Option<HttpRetryOptions>,
}

impl ModelHyperparameters {
    /// Returns the effective max output tokens, preferring `max_output_tokens`
    /// over the `max_tokens` alias.
    pub fn effective_max_output_tokens(&self) -> Option<u32> {
        self.max_output_tokens.or(self.max_tokens)
    }
}

/// LLM model configuration loaded from `agent_config.yaml`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelConfig {
    /// Model identifier (e.g. `"gemini-2.0-flash"`, `"gpt-4o"`).
    pub name: String,
    /// Optional external provider (activates LiteLLM-proxy routing when set).
    pub provider: Option<ProviderConfig>,
    /// Optional generation hyperparameters.
    pub hyperparameters: Option<ModelHyperparameters>,
}