Skip to main content

offline_intelligence/
admin.rs

1// Server/src/admin.rs
2// Simplified for 1-hop architecture - removed external process dependencies
3
4use axum::extract::{State, Json};
5use axum::http::StatusCode;
6use axum::response::IntoResponse;
7use crate::config::Config;
8use crate::metrics;
9use crate::shared_state::SharedState;
10use serde::{Deserialize, Serialize};
11use tracing::{info, error};
12use std::sync::Arc;
13use sysinfo::System;
14
15
16#[allow(dead_code)]
17#[derive(Clone)]
18pub struct AdminState {
19    pub cfg: Config,
20    pub shared_state: Arc<SharedState>,
21}
22
23#[derive(Deserialize)]
24pub struct LoadModelRequest {
25    pub model_path: String,
26    pub ctx_size: Option<u32>,
27    pub gpu_layers: Option<u32>,
28    pub batch_size: Option<u32>,
29}
30
31#[derive(Serialize)]
32pub struct StatusResponse {
33    pub current_model: Option<String>,
34    pub current_port: Option<u16>,
35    pub gpu_layers: Option<u32>,
36    pub ctx_size: Option<u32>,        // Add context size
37    pub batch_size: Option<u32>,      // Add batch size
38    pub is_healthy: bool,
39    pub uptime_seconds: Option<u64>,
40    pub memory_usage: Option<String>, // Add memory info
41}
42
43pub async fn get_status(
44    State(state): State<AdminState>,
45) -> impl IntoResponse {
46    // Simplified status for 1-hop architecture
47    let is_healthy = true; // Always healthy in direct memory access
48    
49    // Memory info
50    let memory_usage = {
51        let mut sys = System::new_all();
52        sys.refresh_memory();
53        let used = sys.used_memory();
54        let total = sys.total_memory();
55        Some(format!("{}/{} MB", used / 1024 / 1024, total / 1024 / 1024))
56    };
57
58    let response = StatusResponse {
59        current_model: Some("direct-llm".to_string()),
60        current_port: None,
61        gpu_layers: Some(0),
62        ctx_size: Some(state.cfg.ctx_size),
63        batch_size: Some(state.cfg.batch_size),
64        is_healthy,
65        uptime_seconds: Some(0),
66        memory_usage,
67    };
68    
69    metrics::inc_request("admin_status", "ok");
70    (StatusCode::OK, Json(response))
71}
72
73pub async fn load_model(
74    State(_state): State<AdminState>,
75    Json(req): Json<LoadModelRequest>,
76) -> impl IntoResponse {
77    info!("Received load model request for: {} with ctx_size: {:?}, gpu_layers: {:?}", 
78          req.model_path, req.ctx_size, req.gpu_layers);
79    
80    // In 1-hop architecture, model loading happens directly through shared state
81    // This is a placeholder implementation
82    metrics::inc_request("admin_load", "ok");
83    (StatusCode::OK, format!("Model loading initiated: {}", req.model_path))
84}
85
86pub async fn stop_backend(
87    State(_state): State<AdminState>,
88) -> impl IntoResponse {
89    info!("Received stop backend request");
90    
91    // In 1-hop architecture, there's no separate backend to stop
92    // This is a placeholder implementation
93    metrics::inc_request("admin_stop", "ok");
94    (StatusCode::OK, "System shutdown initiated".to_string())
95}
96