offline_intelligence/
admin.rs1use axum::extract::{State, Json};
5use axum::http::StatusCode;
6use axum::response::IntoResponse;
7use crate::config::Config;
8use crate::metrics;
9use crate::shared_state::SharedState;
10use serde::{Deserialize, Serialize};
11use tracing::info;
12use std::sync::Arc;
13use sysinfo::System;
14use crate::cache_management::KVCacheManager;
15use crate::memory_db::MemoryDatabase;
16
17
18#[allow(dead_code)]
19#[derive(Clone)]
20pub struct AdminState {
21 pub cfg: Config,
22 pub shared_state: Arc<SharedState>,
23}
24
25#[derive(Deserialize)]
26pub struct LoadModelRequest {
27 pub model_path: String,
28 pub ctx_size: Option<u32>,
29 pub gpu_layers: Option<u32>,
30 pub batch_size: Option<u32>,
31}
32
33#[derive(Serialize)]
34pub struct StatusResponse {
35 pub current_model: Option<String>,
36 pub current_port: Option<u16>,
37 pub gpu_layers: Option<u32>,
38 pub ctx_size: Option<u32>, pub batch_size: Option<u32>, pub is_healthy: bool,
41 pub uptime_seconds: Option<u64>,
42 pub memory_usage: Option<String>, }
44
45pub async fn get_status(
46 State(state): State<AdminState>,
47) -> impl IntoResponse {
48 let is_healthy = true; let memory_usage = {
53 let mut sys = System::new_all();
54 sys.refresh_memory();
55 let used = sys.used_memory();
56 let total = sys.total_memory();
57 Some(format!("{}/{} MB", used / 1024 / 1024, total / 1024 / 1024))
58 };
59
60 let response = StatusResponse {
61 current_model: Some("direct-llm".to_string()),
62 current_port: None,
63 gpu_layers: Some(0),
64 ctx_size: Some(state.cfg.ctx_size),
65 batch_size: Some(state.cfg.batch_size),
66 is_healthy,
67 uptime_seconds: Some(0),
68 memory_usage,
69 };
70
71 metrics::inc_request("admin_status", "ok");
72 (StatusCode::OK, Json(response))
73}
74
75pub async fn load_model(
76 State(_state): State<AdminState>,
77 Json(req): Json<LoadModelRequest>,
78) -> impl IntoResponse {
79 info!("Received load model request for: {} with ctx_size: {:?}, gpu_layers: {:?}",
80 req.model_path, req.ctx_size, req.gpu_layers);
81
82 metrics::inc_request("admin_load", "ok");
85 (StatusCode::OK, format!("Model loading initiated: {}", req.model_path))
86}
87
88pub async fn stop_backend(
89 State(state): State<crate::shared_state::UnifiedAppState>,
90) -> impl IntoResponse {
91 info!("Graceful shutdown initiated");
92
93 let cache_mgr = state.shared_state.cache_manager.read()
95 .ok()
96 .and_then(|guard| guard.clone());
97
98 if let Some(cache_manager) = cache_mgr {
99 info!("Flushing KV cache to database...");
100 if let Err(e) = cache_manager.flush_to_database("global_shutdown", &[]).await {
101 info!("Cache flush during shutdown: {}", e);
102 } else {
103 info!("KV cache flushed to database");
104 }
105 }
106
107 let rt_mgr = state.shared_state.runtime_manager.read()
109 .ok()
110 .and_then(|guard| guard.clone());
111
112 if let Some(rt_mgr) = rt_mgr {
113 info!("Shutting down runtime manager...");
114 if let Err(e) = rt_mgr.shutdown().await {
115 info!("Runtime shutdown: {}", e);
116 } else {
117 info!("Runtime manager shut down");
118 }
119 }
120
121 metrics::inc_request("admin_stop", "ok");
122 (StatusCode::OK, "System shutdown initiated".to_string())
123}
124