offline_intelligence/
lib.rs1pub mod config;
7pub mod metrics;
8pub mod telemetry;
9pub mod utils;
10
11pub mod llm_integration;
13
14pub mod api;
16
17pub mod memory;
19pub mod memory_db;
20
21pub mod proxy;
23
24pub mod admin;
26
27pub mod resources;
29
30pub use config::*;
38pub use llm_integration::*;
39pub use metrics::*;
40pub use proxy::*;
41pub use admin::*;
42
43use axum::{
44 Router,
45 routing::{get, post},
46 extract::{State, FromRef, Path},
47 response::IntoResponse,
48 Json,
49};
50use axum::http::Method;
51use std::{path::Path as StdPath, sync::Arc, time::Duration};
52use tokio::sync::RwLock;
53use tower::limit::ConcurrencyLimitLayer;
54use tower_http::{
55 cors::{Any, CorsLayer},
56 trace::TraceLayer,
57 timeout::TimeoutLayer,
58};
59use tracing::{info, warn, error};
60
61use memory_db::MemoryDatabase;
64#[derive(Clone)]
68pub struct UnifiedAppState {
69 pub proxy: proxy::AppState,
70 pub admin: admin::AdminState,
71 pub llm_engine: Arc<LLMEngine>,
72}
73
74impl FromRef<UnifiedAppState> for proxy::AppState {
75 fn from_ref(state: &UnifiedAppState) -> Self {
76 state.proxy.clone()
77 }
78}
79
80impl FromRef<UnifiedAppState> for admin::AdminState {
81 fn from_ref(state: &UnifiedAppState) -> Self {
82 state.admin.clone()
83 }
84}
85
86impl FromRef<UnifiedAppState> for Arc<LLMEngine> {
87 fn from_ref(state: &UnifiedAppState) -> Self {
88 state.llm_engine.clone()
89 }
90}
91
92async fn health_check() -> &'static str {
93 "OK"
94}
95
96async fn ready_check() -> &'static str {
97 "READY"
98}
99
100async fn get_status_wrapper(
101 State(state): State<UnifiedAppState>,
102) -> impl IntoResponse {
103 admin::get_status(State(state.admin)).await
104}
105
106async fn load_model_wrapper(
107 State(state): State<UnifiedAppState>,
108 Json(req): Json<admin::LoadModelRequest>,
109) -> impl IntoResponse {
110 admin::load_model(State(state.admin), Json(req)).await
111}
112
113async fn stop_backend_wrapper(
114 State(state): State<UnifiedAppState>,
115) -> impl IntoResponse {
116 admin::stop_backend(State(state.admin)).await
117}
118
119async fn memory_stats_wrapper(
120 State(state): State<UnifiedAppState>,
121 Path(session_id): Path<String>,
122) -> impl IntoResponse {
123 api::memory_stats(State(state), Path(session_id)).await
124}
125
126async fn memory_optimize_wrapper(
127 State(state): State<UnifiedAppState>,
128 Json(req): Json<api::memory_api::MemoryOptimizeRequest>,
129) -> impl IntoResponse {
130 api::memory_optimize(State(state), Json(req)).await
131}
132
133async fn memory_cleanup_wrapper(
134 State(state): State<UnifiedAppState>,
135 Json(req): Json<api::memory_api::MemoryCleanupRequest>,
136) -> impl IntoResponse {
137 api::memory_cleanup(State(state), Json(req)).await
138}
139
140async fn init_cache_manager(
141 _memory_database: Arc<MemoryDatabase>,
142) -> anyhow::Result<Option<Arc<()>>> {
143 Ok(None)
146}
147
148pub async fn run_server(cfg: Config) -> anyhow::Result<()> {
150 telemetry::init_tracing();
151 metrics::init_metrics();
152 cfg.print_config();
153
154 let llm_engine = Arc::new(LLMEngine::new(cfg.clone()));
156
157 info!("🚀 Initializing LLM engine...");
158 match llm_engine.initialize().await {
159 Ok(()) => {
160 info!("✅ LLM engine initialized successfully");
161 }
162 Err(e) => {
163 error!("❌ Failed to initialize LLM engine: {}", e);
164 return Err(e);
165 }
166 }
167
168 let admin_state = admin::AdminState {
169 cfg: cfg.clone(),
170 llm_engine: llm_engine.clone(),
171 };
172
173 let memory_db_path = StdPath::new("./data/conversations.db");
174 let memory_database = match MemoryDatabase::new(memory_db_path) {
175 Ok(db) => {
176 info!("Memory database initialized at: {}", memory_db_path.display());
177 Arc::new(db)
178 }
179 Err(e) => {
180 warn!("Failed to initialize memory database: {}. Falling back to in-memory.", e);
181 Arc::new(MemoryDatabase::new_in_memory()?)
182 }
183 };
184
185 let proxy_state_simple = proxy::AppState {
199 llm_engine: llm_engine.clone(),
200 cfg: cfg.clone(),
201 };
204
205 let unified_state = UnifiedAppState {
206 proxy: proxy_state_simple,
207 admin: admin_state,
208 llm_engine,
212 };
213
214 let cors = CorsLayer::new()
215 .allow_origin(Any)
216 .allow_methods([Method::GET, Method::POST])
217 .allow_headers(Any);
218
219 let app = Router::new()
220 .route("/generate/stream", post(proxy::generate_stream_endpoint))
221 .route("/healthz", get(health_check))
222 .route("/readyz", get(ready_check))
223 .route("/metrics", get(metrics::get_metrics))
224 .route("/admin/status", get(get_status_wrapper))
225 .route("/admin/load", post(load_model_wrapper))
226 .route("/admin/stop", post(stop_backend_wrapper))
227 .route("/memory/optimize", post(memory_optimize_wrapper))
228 .route("/memory/stats/:session_id", get(memory_stats_wrapper))
229 .route("/memory/cleanup", post(memory_cleanup_wrapper))
230 .layer(cors)
231 .layer(TraceLayer::new_for_http())
232 .layer(ConcurrencyLimitLayer::new(cfg.max_concurrent_streams as usize))
233 .layer(TimeoutLayer::new(Duration::from_secs(cfg.generate_timeout_seconds)))
234 .with_state(unified_state);
235
236 info!("Starting server on {}:{}", cfg.api_host, cfg.api_port);
237 let listener = tokio::net::TcpListener::bind(format!("{}:{}", cfg.api_host, cfg.api_port)).await?;
238
239 axum::serve(listener, app).await?;
240
241 Ok(())
242}