Skip to main content

offline_intelligence/
admin.rs

1// Server/src/admin.rs
2// Simplified for 1-hop architecture - removed external process dependencies
3
4use axum::extract::{State, Json};
5use axum::http::StatusCode;
6use axum::response::IntoResponse;
7use crate::config::Config;
8use crate::metrics;
9use crate::shared_state::SharedState;
10use serde::{Deserialize, Serialize};
11use tracing::info;
12use std::sync::Arc;
13use sysinfo::System;
14use crate::cache_management::KVCacheManager;
15use crate::memory_db::MemoryDatabase;
16
17
18#[allow(dead_code)]
19#[derive(Clone)]
20pub struct AdminState {
21    pub cfg: Config,
22    pub shared_state: Arc<SharedState>,
23}
24
25#[derive(Deserialize)]
26pub struct LoadModelRequest {
27    pub model_path: String,
28    pub ctx_size: Option<u32>,
29    pub gpu_layers: Option<u32>,
30    pub batch_size: Option<u32>,
31}
32
33#[derive(Serialize)]
34pub struct StatusResponse {
35    pub current_model: Option<String>,
36    pub current_port: Option<u16>,
37    pub gpu_layers: Option<u32>,
38    pub ctx_size: Option<u32>,        // Add context size
39    pub batch_size: Option<u32>,      // Add batch size
40    pub is_healthy: bool,
41    pub uptime_seconds: Option<u64>,
42    pub memory_usage: Option<String>, // Add memory info
43}
44
45pub async fn get_status(
46    State(state): State<AdminState>,
47) -> impl IntoResponse {
48    // Simplified status for 1-hop architecture
49    let is_healthy = true; // Always healthy in direct memory access
50    
51    // Memory info
52    let memory_usage = {
53        let mut sys = System::new_all();
54        sys.refresh_memory();
55        let used = sys.used_memory();
56        let total = sys.total_memory();
57        Some(format!("{}/{} MB", used / 1024 / 1024, total / 1024 / 1024))
58    };
59
60    let response = StatusResponse {
61        current_model: Some("direct-llm".to_string()),
62        current_port: None,
63        gpu_layers: Some(0),
64        ctx_size: Some(state.cfg.ctx_size),
65        batch_size: Some(state.cfg.batch_size),
66        is_healthy,
67        uptime_seconds: Some(0),
68        memory_usage,
69    };
70    
71    metrics::inc_request("admin_status", "ok");
72    (StatusCode::OK, Json(response))
73}
74
75pub async fn load_model(
76    State(_state): State<AdminState>,
77    Json(req): Json<LoadModelRequest>,
78) -> impl IntoResponse {
79    info!("Received load model request for: {} with ctx_size: {:?}, gpu_layers: {:?}", 
80          req.model_path, req.ctx_size, req.gpu_layers);
81    
82    // In 1-hop architecture, model loading happens directly through shared state
83    // This is a placeholder implementation
84    metrics::inc_request("admin_load", "ok");
85    (StatusCode::OK, format!("Model loading initiated: {}", req.model_path))
86}
87
88pub async fn stop_backend(
89    State(state): State<crate::shared_state::UnifiedAppState>,
90) -> impl IntoResponse {
91    info!("Graceful shutdown initiated");
92
93    // Clone Arc references outside the lock to avoid holding guards across await points
94    let cache_mgr = state.shared_state.cache_manager.read()
95        .ok()
96        .and_then(|guard| guard.clone());
97
98    if let Some(cache_manager) = cache_mgr {
99        info!("Flushing KV cache to database...");
100        if let Err(e) = cache_manager.flush_to_database("global_shutdown", &[]).await {
101            info!("Cache flush during shutdown: {}", e);
102        } else {
103            info!("KV cache flushed to database");
104        }
105    }
106
107    // Clone runtime manager Arc outside the lock
108    let rt_mgr = state.shared_state.runtime_manager.read()
109        .ok()
110        .and_then(|guard| guard.clone());
111
112    if let Some(rt_mgr) = rt_mgr {
113        info!("Shutting down runtime manager...");
114        if let Err(e) = rt_mgr.shutdown().await {
115            info!("Runtime shutdown: {}", e);
116        } else {
117            info!("Runtime manager shut down");
118        }
119    }
120
121    metrics::inc_request("admin_stop", "ok");
122    (StatusCode::OK, "System shutdown initiated".to_string())
123}
124