openai-protocol 1.0.0

//! Worker management API specifications
//!
//! Defines the request/response structures for worker management endpoints

use std::collections::HashMap;

#[cfg(feature = "axum")]
use axum::{
    http::StatusCode,
    response::{IntoResponse, Response},
    Json,
};
use serde::{Deserialize, Serialize};
#[cfg(feature = "axum")]
use serde_json::{json, Value};

use super::UNKNOWN_MODEL_ID;

/// Worker configuration for API requests
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerConfigRequest {
    /// Worker URL (required)
    pub url: String,

    /// Worker API key (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub api_key: Option<String>,

    /// Model ID (optional, will query from server if not provided)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_id: Option<String>,

    /// Worker priority (optional, default: 50, higher = preferred)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    /// Worker cost factor (optional, default: 1.0)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    /// Worker type (optional: "regular", "prefill", "decode")
    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,

    /// Bootstrap port for prefill workers (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub bootstrap_port: Option<u16>,

    /// Runtime type (optional: "sglang", "vllm", default: "sglang")
    /// Only relevant for gRPC workers
    #[serde(skip_serializing_if = "Option::is_none")]
    pub runtime: Option<String>,

    // gRPC-specific configuration (optional, ignored in HTTP mode)
    /// Tokenizer path for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    /// Reasoning parser type for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    /// Tool parser type for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    /// Chat template for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,

    /// Additional labels (optional)
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub labels: HashMap<String, String>,

    /// Health check timeout in seconds (default: 30)
    #[serde(default = "default_health_check_timeout")]
    pub health_check_timeout_secs: u64,

    /// Health check interval in seconds (default: 60)
    #[serde(default = "default_health_check_interval")]
    pub health_check_interval_secs: u64,

    /// Number of successful health checks needed to mark worker as healthy (default: 2)
    #[serde(default = "default_health_success_threshold")]
    pub health_success_threshold: u32,

    /// Number of failed health checks before marking worker as unhealthy (default: 3)
    #[serde(default = "default_health_failure_threshold")]
    pub health_failure_threshold: u32,

    /// Disable periodic health checks for this worker (default: false)
    #[serde(default)]
    pub disable_health_check: bool,

    /// Maximum connection attempts during worker registration (default: 20)
    #[serde(default = "default_max_connection_attempts")]
    pub max_connection_attempts: u32,

    /// Enable data parallelism aware scheduling (default: false)
    #[serde(default)]
    pub dp_aware: bool,
}

// Default value functions for serde
fn default_health_check_timeout() -> u64 {
    30
}

fn default_health_check_interval() -> u64 {
    60
}

fn default_health_success_threshold() -> u32 {
    2
}

fn default_health_failure_threshold() -> u32 {
    3
}

fn default_max_connection_attempts() -> u32 {
    20
}

/// Worker information for API responses
#[derive(Debug, Clone, Serialize)]
pub struct WorkerInfo {
    /// Worker unique identifier
    pub id: String,

    /// Worker URL
    pub url: String,

    /// Model ID this worker serves
    pub model_id: String,

    /// Worker priority
    pub priority: u32,

    /// Worker cost factor
    pub cost: f32,

    /// Worker type
    pub worker_type: String,

    /// Whether the worker is healthy
    pub is_healthy: bool,

    /// Current load on the worker
    pub load: usize,

    /// Connection mode (http or grpc)
    pub connection_mode: String,

    /// Runtime type (sglang or vllm, for gRPC workers)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub runtime_type: Option<String>,

    // gRPC-specific fields (None for HTTP workers)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,

    /// Bootstrap port for prefill workers
    #[serde(skip_serializing_if = "Option::is_none")]
    pub bootstrap_port: Option<u16>,

    /// Additional metadata
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    pub metadata: HashMap<String, String>,

    /// Whether health checks are disabled for this worker
    pub disable_health_check: bool,

    /// Job status for async operations (if available)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub job_status: Option<JobStatus>,
}

impl WorkerInfo {
    /// Create a partial WorkerInfo for pending workers (not yet registered).
    /// Used when a worker ID maps to a URL but the worker is still being registered.
    pub fn pending(worker_id: &str, url: String, job_status: Option<JobStatus>) -> Self {
        Self {
            id: worker_id.to_string(),
            url,
            model_id: UNKNOWN_MODEL_ID.to_string(),
            priority: 0,
            cost: 1.0,
            worker_type: UNKNOWN_MODEL_ID.to_string(),
            is_healthy: false,
            load: 0,
            connection_mode: UNKNOWN_MODEL_ID.to_string(),
            runtime_type: None,
            tokenizer_path: None,
            reasoning_parser: None,
            tool_parser: None,
            chat_template: None,
            bootstrap_port: None,
            metadata: HashMap::new(),
            disable_health_check: false,
            job_status,
        }
    }
}

/// Job status for async control plane operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobStatus {
    pub job_type: String,
    pub worker_url: String,
    pub status: String,
    pub message: Option<String>,
    pub timestamp: u64,
}

impl JobStatus {
    /// Create a pending job status
    pub fn pending(job_type: &str, worker_url: &str) -> Self {
        Self {
            job_type: job_type.to_string(),
            worker_url: worker_url.to_string(),
            status: "pending".to_string(),
            message: None,
            timestamp: std::time::SystemTime::now()
                .duration_since(std::time::SystemTime::UNIX_EPOCH)
                .unwrap()
                .as_secs(),
        }
    }

    /// Create a processing job status
    pub fn processing(job_type: &str, worker_url: &str) -> Self {
        Self {
            job_type: job_type.to_string(),
            worker_url: worker_url.to_string(),
            status: "processing".to_string(),
            message: None,
            timestamp: std::time::SystemTime::now()
                .duration_since(std::time::SystemTime::UNIX_EPOCH)
                .unwrap()
                .as_secs(),
        }
    }

    /// Create a failed job status
    pub fn failed(job_type: &str, worker_url: &str, error: String) -> Self {
        Self {
            job_type: job_type.to_string(),
            worker_url: worker_url.to_string(),
            status: "failed".to_string(),
            message: Some(error),
            timestamp: std::time::SystemTime::now()
                .duration_since(std::time::SystemTime::UNIX_EPOCH)
                .unwrap()
                .as_secs(),
        }
    }
}

/// Worker list response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerListResponse {
    /// List of workers
    pub workers: Vec<WorkerInfo>,

    /// Total count
    pub total: usize,

    /// Statistics
    pub stats: WorkerStats,
}

/// Worker statistics
#[derive(Debug, Clone, Serialize)]
pub struct WorkerStats {
    pub total_workers: usize,
    pub healthy_workers: usize,
    pub total_models: usize,
    pub total_load: usize,
    pub by_type: WorkerTypeStats,
}

/// Worker statistics by type
#[derive(Debug, Clone, Serialize)]
pub struct WorkerTypeStats {
    pub regular: usize,
    pub prefill: usize,
    pub decode: usize,
}

/// Worker update request
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkerUpdateRequest {
    /// Update priority
    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    /// Update cost
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    /// Update labels
    #[serde(skip_serializing_if = "Option::is_none")]
    pub labels: Option<HashMap<String, String>>,

    /// Update API key (for key rotation)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub api_key: Option<String>,

    /// Update health check timeout in seconds
    #[serde(skip_serializing_if = "Option::is_none")]
    pub health_check_timeout_secs: Option<u64>,

    /// Update health check interval in seconds
    #[serde(skip_serializing_if = "Option::is_none")]
    pub health_check_interval_secs: Option<u64>,

    /// Update health success threshold
    #[serde(skip_serializing_if = "Option::is_none")]
    pub health_success_threshold: Option<u32>,

    /// Update health failure threshold
    #[serde(skip_serializing_if = "Option::is_none")]
    pub health_failure_threshold: Option<u32>,

    /// Disable periodic health checks for this worker
    #[serde(skip_serializing_if = "Option::is_none")]
    pub disable_health_check: Option<bool>,
}

/// Generic API response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerApiResponse {
    pub success: bool,
    pub message: String,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker: Option<WorkerInfo>,
}

/// Error response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerErrorResponse {
    pub error: String,
    pub code: String,
}

/// Server info response from /get_server_info endpoint
#[derive(Debug, Clone, Deserialize)]
pub struct ServerInfo {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_id: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,

    // gRPC-specific
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,
}

/// Result from flush cache operations across workers
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct FlushCacheResult {
    /// URLs of workers where cache flush succeeded
    pub successful: Vec<String>,
    /// URLs and error messages for workers where cache flush failed
    pub failed: Vec<(String, String)>,
    /// Total number of workers attempted
    pub total_workers: usize,
    /// Number of HTTP workers (gRPC workers don't support flush cache)
    pub http_workers: usize,
    /// Human-readable summary message
    pub message: String,
}

/// Result from getting worker loads
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerLoadsResult {
    /// Worker URL and load pairs
    pub loads: Vec<WorkerLoadInfo>,
    /// Total number of workers
    pub total_workers: usize,
    /// Number of workers with successful load fetches
    pub successful: usize,
    /// Number of workers with failed load fetches
    pub failed: usize,
}

/// Individual worker load information
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerLoadInfo {
    /// Worker URL
    pub worker: String,
    /// Worker type (regular, prefill, decode)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,
    /// Current load (-1 indicates failure to fetch)
    pub load: isize,
}

#[cfg(feature = "axum")]
impl IntoResponse for FlushCacheResult {
    fn into_response(self) -> Response {
        let status = if self.failed.is_empty() {
            StatusCode::OK
        } else {
            StatusCode::PARTIAL_CONTENT
        };

        let mut body = json!({
            "status": if self.failed.is_empty() { "success" } else { "partial_success" },
            "message": self.message,
            "workers_flushed": self.successful.len(),
            "total_http_workers": self.http_workers,
            "total_workers": self.total_workers
        });

        if !self.failed.is_empty() {
            body["successful"] = json!(self.successful);
            body["failed"] = json!(self
                .failed
                .into_iter()
                .map(|(url, err)| json!({"worker": url, "error": err}))
                .collect::<Vec<_>>());
        }

        (status, Json(body)).into_response()
    }
}

#[cfg(feature = "axum")]
impl IntoResponse for WorkerLoadsResult {
    fn into_response(self) -> Response {
        let loads: Vec<Value> = self
            .loads
            .iter()
            .map(|info| json!({"worker": &info.worker, "load": info.load}))
            .collect();
        Json(json!({"workers": loads})).into_response()
    }
}