Skip to main content

offline_intelligence/
thread_server.rs

1//! Thread-based server implementation
2//!
3//! This module provides the server startup that uses thread-based
4//! shared memory architecture. All API handlers access state through
5//! Arc-wrapped shared memory (UnifiedAppState) — zero network hops
6//! between components. The only network call is to the localhost llama-server.
7
8use std::sync::Arc;
9use tokio::sync::RwLock;
10use tracing::{info, warn, debug, error};
11use anyhow::anyhow;
12
13use crate::{
14    config::Config,
15    shared_state::{SharedState, UnifiedAppState},
16    thread_pool::{ThreadPool, ThreadPoolConfig},
17    worker_threads::{ContextWorker, CacheWorker, DatabaseWorker, LLMWorker},
18    memory_db::MemoryDatabase,
19    model_management::ModelManager,
20};
21
22/// Thread-based unified application state (internal, used during initialization)
23#[derive(Clone)]
24pub struct ThreadBasedAppState {
25    pub shared_state: Arc<SharedState>,
26    pub thread_pool: Arc<RwLock<Option<ThreadPool>>>,
27    pub context_worker: Arc<ContextWorker>,
28    pub cache_worker: Arc<CacheWorker>,
29    pub database_worker: Arc<DatabaseWorker>,
30    pub llm_worker: Arc<LLMWorker>,
31}
32
33/// Run server with thread-based architecture
34/// 
35/// # Arguments
36/// * `cfg` - Server configuration
37/// * `port_tx` - Optional channel to communicate the selected port back to caller
38///               This is used when the configured port is unavailable and a random port is selected
39pub async fn run_thread_server(cfg: Config, port_tx: Option<std::sync::mpsc::Sender<u16>>) -> anyhow::Result<()> {
40    crate::telemetry::init_tracing();
41    crate::metrics::init_metrics();
42    cfg.print_config();
43
44    info!("Starting thread-based server architecture");
45
46    // ── Phase 1: Fast setup (database + managers) ─────────────────────────────
47    // Everything here finishes in < 10 seconds so the port can be bound and
48    // communicated to the main thread well within its 60-second timeout.
49
50    // Initialize database
51    let memory_db_path = dirs::data_dir()
52        .unwrap_or_else(|| std::env::current_dir().unwrap_or_default())
53        .join("Aud.io")
54        .join("data")
55        .join("memory.db");
56
57    if let Some(parent) = memory_db_path.parent() {
58        if let Err(e) = std::fs::create_dir_all(parent) {
59            warn!("Failed to create data directory {:?}: {}", parent, e);
60        } else {
61            info!("Created data directory: {:?}", parent);
62        }
63    }
64
65    let memory_database = match MemoryDatabase::new(&memory_db_path) {
66        Ok(db) => {
67            info!("Memory database initialized at: {}", memory_db_path.display());
68            Arc::new(db)
69        }
70        Err(e) => {
71            // In-memory fallback means ALL user data (conversations, API key metadata,
72            // settings) is lost on every restart. This must be clearly visible.
73            error!(
74                "❌ CRITICAL: Failed to open SQLite database at {}: {}\n\
75                 Falling back to IN-MEMORY storage — all user data will be lost on exit.\n\
76                 Check that the directory is writable and no other process holds a lock on the file.",
77                memory_db_path.display(), e
78            );
79            Arc::new(MemoryDatabase::new_in_memory()?)
80        }
81    };
82
83    // Shared state
84    let mut shared_state = SharedState::new(cfg.clone(), memory_database.clone())?;
85
86    // Model Manager (catalog scan, usually < 3 s)
87    info!("📦 Initializing Model Manager");
88    match ModelManager::new() {
89        Ok(model_manager) => {
90            let model_manager_arc = Arc::new(model_manager);
91            if let Err(e) = model_manager_arc.initialize(&cfg).await {
92                warn!("⚠️  Model manager initialization failed: {}", e);
93                shared_state.model_manager = Some(model_manager_arc);
94            } else {
95                info!("✅ Model manager initialized successfully");
96                shared_state.model_manager = Some(model_manager_arc);
97            }
98        }
99        Err(e) => warn!("⚠️  Failed to create model manager: {}", e),
100    }
101
102    // Engine Manager - BLOCK on startup until engine is ready (like Ollama)
103    // This ensures the app is fully ready before accepting connections
104    info!("⚙️  Initializing Engine Manager (blocking until ready)...");
105    match crate::engine_management::EngineManager::new() {
106        Ok(engine_manager) => {
107            let engine_manager_arc = Arc::new(engine_manager);
108            match engine_manager_arc.initialize(&cfg).await {
109                Ok(true) => {
110                    info!("✅ Engine manager initialized with engine ready");
111                    shared_state.engine_manager = Some(engine_manager_arc.clone());
112                    shared_state.engine_available.store(true, std::sync::atomic::Ordering::Relaxed);
113                }
114                Ok(false) => {
115                    // No engine binary found.  Do NOT block here — the port must be
116                    // bound quickly so main.rs doesn't time out.  The user can
117                    // download an engine from the Models page after the app opens.
118                    info!("⏳ No engine found — starting in online-only mode. Download from the Models page.");
119                    shared_state.engine_manager = Some(engine_manager_arc);
120                    shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
121                }
122                Err(e) => {
123                    warn!("⚠️  Engine manager scan failed: {}", e);
124                    shared_state.engine_manager = Some(engine_manager_arc);
125                    shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
126                }
127            }
128        }
129        Err(e) => {
130            error!("❌ Failed to create engine manager: {}", e);
131            shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
132        }
133    }
134
135    let shared_state = Arc::new(shared_state);
136
137    // Workers (fast construction)
138    let _context_worker: Arc<ContextWorker> = Arc::new(ContextWorker::new(shared_state.clone()));
139    let _cache_worker: Arc<CacheWorker> = Arc::new(CacheWorker::new(shared_state.clone()));
140    let _database_worker: Arc<DatabaseWorker> = Arc::new(DatabaseWorker::new(shared_state.clone()));
141    let _llm_worker = shared_state.llm_worker.clone();
142
143    // Cache manager
144    let cache_manager = match crate::cache_management::create_default_cache_manager(
145        crate::cache_management::KVCacheConfig::default(),
146        memory_database.clone(),
147    ) {
148        Ok(manager) => { info!("Cache manager initialized"); Some(Arc::new(manager)) }
149        Err(e) => { warn!("Cache manager failed: {}, disabled", e); None }
150    };
151    {
152        let mut g = shared_state.cache_manager.write()
153            .map_err(|_| anyhow::anyhow!("Failed to acquire cache manager write lock"))?;
154        *g = cache_manager;
155    }
156
157    // Embedding index (fast, reads disk)
158    if let Err(e) = shared_state.database_pool.embeddings.initialize_index("llama-server") {
159        debug!("Embedding index init: {} (will build on first store)", e);
160    } else {
161        info!("Embedding HNSW index loaded from existing data");
162    }
163
164    // Thread pool
165    let thread_pool_config = ThreadPoolConfig::new(&cfg);
166    let mut thread_pool = ThreadPool::new(thread_pool_config, shared_state.clone());
167    thread_pool.start().await?;
168
169    // ── Phase 2: Bind port immediately ────────────────────────────────────────
170    // Port is bound NOW, before any slow I/O, so the main thread's
171    // 60-second actual_port_rx timeout is satisfied within seconds.
172
173    let unified_state = UnifiedAppState::new(shared_state.clone());
174    let app = build_compatible_router(unified_state);
175
176    let (listener, selected_port) = match try_bind_port(&cfg.api_host, cfg.api_port).await {
177        Ok(listener) => {
178            let port = listener.local_addr()?.port();
179            info!("✅ HTTP server bound to {}:{}", cfg.api_host, port);
180            (listener, port)
181        }
182        Err(e) => {
183            warn!("⚠️ Failed to bind to port {}: {}", cfg.api_port, e);
184            warn!("🔄 Scanning 8002-8999 for available port...");
185            let mut last_error = None;
186            let mut found_listener = None;
187            let mut found_port = 0u16;
188            for port in 8002u16..=8999 {
189                match try_bind_port(&cfg.api_host, port).await {
190                    Ok(listener) => {
191                        found_port = listener.local_addr()?.port();
192                        info!("✅ HTTP server bound to alternative port {}", found_port);
193                        if let Ok(mut g) = shared_state.http_port.write() { *g = found_port; }
194                        found_listener = Some(listener);
195                        break;
196                    }
197                    Err(e) => { last_error = Some(e); }
198                }
199            }
200            let listener = found_listener.ok_or_else(|| anyhow!(
201                "Failed to find available port after scanning 8002-8999.\n  Last error: {:?}\n  Hints: disable firewall, close other Aud.io instances, or run as Administrator.",
202                last_error
203            ))?;
204            (listener, found_port)
205        }
206    };
207
208    // Send port to main thread — satisfies the 60-second actual_port_rx timeout.
209    if let Some(ref tx) = port_tx {
210        if let Err(e) = tx.send(selected_port) {
211            warn!("Failed to send port to main thread: {}", e);
212        } else {
213            info!("✅ Port {} communicated to main thread", selected_port);
214        }
215    }
216    info!("🌐 Server will accept connections on port {}", selected_port);
217
218    // ── Phase 3: Slow runtime init in background ──────────────────────────────
219    // Starting llama-server and waiting for it to be healthy can take 30-120 s.
220    // We do this in a background task; the HTTP server starts immediately and
221    // returns {"status":"initializing"} until mark_initialization_complete() fires.
222    {
223        let shared_state_bg = shared_state.clone();
224        let cfg_bg = cfg.clone();
225        let memory_database_bg = memory_database.clone();
226        tokio::spawn(async move {
227            // ── Mark initialization complete IMMEDIATELY ──────────────────────────
228            // The health endpoint now returns "degraded" right away (init done, no
229            // model loaded yet). The frontend LoadingScreen accepts "degraded" and
230            // opens the app; the local model auto-load continues in the background.
231            // This must be the very first statement so the ~100 ms polling window
232            // in LoadingScreen.tsx resolves on the first tick after axum starts.
233            shared_state_bg.mark_initialization_complete();
234            info!("✅ Backend marked as initialized — frontend may proceed");
235
236            // Context orchestrator (may query DB but is quick)
237            let context_orchestrator = match crate::context_engine::create_default_orchestrator(
238                memory_database_bg,
239            ).await {
240                Ok(mut orchestrator) => {
241                    orchestrator.set_llm_worker(shared_state_bg.llm_worker.clone());
242                    info!("Context orchestrator initialized");
243                    Some(orchestrator)
244                }
245                Err(e) => {
246                    warn!("Context orchestrator failed: {}. Memory features disabled.", e);
247                    None
248                }
249            };
250            {
251                let mut g = shared_state_bg.context_orchestrator.write().await;
252                *g = context_orchestrator;
253            }
254
255            // Runtime Manager — this is the slow part (starts llama-server)
256            info!("🚀 Initializing Runtime Manager");
257            let runtime_manager = Arc::new(crate::model_runtime::RuntimeManager::new());
258            let runtime_config = crate::model_runtime::RuntimeConfig {
259                model_path: std::path::PathBuf::from(&cfg_bg.model_path),
260                format: crate::model_runtime::ModelFormat::GGUF,
261                host: cfg_bg.llama_host.clone(),
262                port: cfg_bg.llama_port,
263                context_size: cfg_bg.ctx_size,
264                batch_size: cfg_bg.batch_size,
265                threads: cfg_bg.threads,
266                gpu_layers: cfg_bg.gpu_layers,
267                runtime_binary: if cfg_bg.llama_bin.is_empty() { None } else { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) },
268                extra_config: serde_json::json!({}),
269            };
270
271            // Check whether an engine binary exists (registry OR bundled config binary)
272            let llama_bin_exists = !cfg_bg.llama_bin.is_empty()
273                && std::path::Path::new(&cfg_bg.llama_bin).exists();
274            let has_engine = if let Some(ref em) = shared_state_bg.engine_manager {
275                let reg = em.registry.read().await;
276                reg.get_default_engine_binary_path().is_some() || llama_bin_exists
277            } else {
278                llama_bin_exists
279            };
280
281            // Always store the RuntimeManager so that switch_model can work
282            // even when no engine is currently installed (user can download later).
283            if let Err(e) = shared_state_bg.set_runtime_manager(runtime_manager.clone()) {
284                error!("❌ Failed to set runtime manager: {}", e);
285            }
286            shared_state_bg.llm_worker.set_runtime_manager(runtime_manager.clone());
287            info!("🔗 LLM worker linked to runtime manager");
288
289            if has_engine {
290                // Try to auto-load the last used model
291                let last_model_loaded = 'load: {
292                    let Some(data_dir) = dirs::data_dir() else { break 'load false; };
293                    let last_model_path = data_dir.join("Aud.io").join("last_model.txt");
294                    let Ok(last_model_id_raw) = std::fs::read_to_string(&last_model_path) else {
295                        info!("ℹ️  No last used model found");
296                        break 'load false;
297                    };
298                    let last_model_id = last_model_id_raw.trim().to_string();
299                    info!("🔄 Found last used model: {}", last_model_id);
300
301                    let Some(ref model_manager) = shared_state_bg.model_manager else {
302                        info!("ℹ️  Model manager not available - skipping auto-load");
303                        break 'load false;
304                    };
305                    let registry = model_manager.registry.read().await;
306                    let Some(model_info) = registry.get_model(&last_model_id) else {
307                        drop(registry);
308                        warn!("⚠️  Last used model not found in registry: {}", last_model_id);
309                        break 'load false;
310                    };
311                    if model_info.status != crate::model_management::registry::ModelStatus::Installed {
312                        drop(registry);
313                        info!("ℹ️  Last used model not installed");
314                        break 'load false;
315                    }
316                    let Some(ref filename) = model_info.filename else {
317                        drop(registry);
318                        warn!("⚠️  Last used model has no filename");
319                        break 'load false;
320                    };
321                    let model_path_for_runtime = model_manager.storage.model_path(&last_model_id, filename);
322                    drop(registry);
323
324                    if !model_path_for_runtime.exists() {
325                        warn!("⚠️  Last used model file not found: {}", model_path_for_runtime.display());
326                        break 'load false;
327                    }
328                    info!("✅ Auto-loading last used model from: {}", model_path_for_runtime.display());
329
330                    let default_engine = if let Some(ref em) = shared_state_bg.engine_manager {
331                        let reg = em.registry.read().await;
332                        reg.get_default_engine_binary_path()
333                            .or_else(|| if !cfg_bg.llama_bin.is_empty() { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) } else { None })
334                    } else if !cfg_bg.llama_bin.is_empty() {
335                        Some(std::path::PathBuf::from(&cfg_bg.llama_bin))
336                    } else { None };
337
338                    let mut updated_config = runtime_config.clone();
339                    updated_config.model_path = model_path_for_runtime;
340                    updated_config.runtime_binary = default_engine;
341
342                    match runtime_manager.initialize_auto(updated_config).await {
343                        Ok(base_url) => {
344                            info!("✅ Last used model auto-loaded at {}", base_url);
345                            match runtime_manager.health_check().await {
346                                Ok(status) => { info!("✅ Runtime health check passed: {}", status); true }
347                                Err(e) => { warn!("⚠️  Runtime health check failed: {}", e); false }
348                            }
349                        }
350                        Err(e) => { warn!("⚠️  Failed to auto-load last used model: {}", e); false }
351                    }
352                };
353
354                if !last_model_loaded && !cfg_bg.model_path.is_empty() {
355                    let default_engine = if let Some(ref em) = shared_state_bg.engine_manager {
356                        let reg = em.registry.read().await;
357                        reg.get_default_engine_binary_path()
358                            .or_else(|| if !cfg_bg.llama_bin.is_empty() { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) } else { None })
359                    } else if !cfg_bg.llama_bin.is_empty() {
360                        Some(std::path::PathBuf::from(&cfg_bg.llama_bin))
361                    } else { None };
362                    let mut updated_config = runtime_config;
363                    updated_config.runtime_binary = default_engine;
364                    info!("🚀 Initializing runtime with config model path...");
365                    match runtime_manager.initialize_auto(updated_config).await {
366                        Ok(base_url) => {
367                            info!("✅ Runtime initialized at {}", base_url);
368                            shared_state_bg.llm_worker.set_runtime_manager(runtime_manager);
369                        }
370                        Err(e) => warn!("⚠️  Runtime initialization failed: {}. Online-only mode.", e),
371                    }
372                }
373            } else {
374                info!("⏳ No engine found - starting in online-only mode");
375            }
376
377            info!("✅ Background initialization complete");
378        });
379    }
380
381    // Spawn attachment cache eviction task.
382    // Runs every 5 minutes and removes entries older than 30 minutes so the
383    // DashMap doesn't grow unboundedly when users attach many files without sending.
384    {
385        let cache = shared_state.attachment_cache.clone();
386        tokio::spawn(async move {
387            let interval = std::time::Duration::from_secs(300); // 5 minutes
388            loop {
389                tokio::time::sleep(interval).await;
390                let before = cache.len();
391                cache.retain(|_, v: &mut crate::shared_state::PreExtracted| {
392                    !v.is_stale(crate::api::attachment_api::CACHE_TTL_SECS)
393                });
394                let removed = before - cache.len();
395                if removed > 0 {
396                    info!("Attachment cache eviction: removed {} stale entries", removed);
397                }
398            }
399        });
400    }
401
402    // Start server — this blocks until the process exits.
403    info!("🟢 Axum server starting on port {}...", selected_port);
404    if let Err(e) = axum::serve(listener, app).await {
405        error!("Axum server error: {}", e);
406    }
407    
408    info!("Axum server stopped");
409    Ok(())
410}
411
412/// Try to bind to a specific port, returning the listener if successful
413async fn try_bind_port(host: &str, port: u16) -> anyhow::Result<tokio::net::TcpListener> {
414    let addr = format!("{}:{}", host, port);
415    match tokio::net::TcpListener::bind(&addr).await {
416        Ok(listener) => Ok(listener),
417        Err(e) if e.kind() == std::io::ErrorKind::AddrInUse => {
418            Err(anyhow::anyhow!("Port {} is already in use", port))
419        }
420        Err(e) => Err(anyhow::anyhow!("Failed to bind to {}: {}", addr, e)),
421    }
422}
423
424/// Health response structure with detailed runtime status
425#[derive(serde::Serialize)]
426struct HealthResponse {
427    status: String,  // "ready", "initializing", "degraded"
428    runtime_ready: bool,
429    message: Option<String>,
430}
431
432/// Health check handler that verifies backend is fully initialized AND runtime is ready
433async fn health_check(axum::extract::State(state): axum::extract::State<UnifiedAppState>) -> axum::response::Response {
434    use axum::Json;
435    use axum::response::IntoResponse;
436
437    // Check if backend initialization is complete
438    if !state.shared_state.is_initialization_complete() {
439        return Json(HealthResponse {
440            status: "initializing".to_string(),
441            runtime_ready: false,
442            message: Some("Backend initializing...".to_string()),
443        })
444        .into_response();
445    }
446
447    // Check if runtime is actually ready for inference
448    let runtime_ready = state.shared_state.llm_worker.is_runtime_ready().await;
449
450    let (status, message) = if runtime_ready {
451        ("ready", None)
452    } else {
453        (
454            "degraded",
455            Some("No model loaded. Please activate a model from the Models page.".to_string())
456        )
457    };
458
459    Json(HealthResponse {
460        status: status.to_string(),
461        runtime_ready,
462        message,
463    })
464    .into_response()
465}
466
467/// Build router for 1-hop architecture
468fn build_compatible_router(mut state: UnifiedAppState) -> axum::Router {
469    use axum::{
470        Router,
471        routing::{get, post, put, delete},
472        extract::DefaultBodyLimit,
473    };
474    use tower_http::{
475        cors::{Any, CorsLayer},
476        trace::TraceLayer,
477        timeout::TimeoutLayer,
478    };
479    use std::time::Duration;
480
481    // Allow any origin in all builds.
482    //
483    // Rationale: this server only ever binds to 127.0.0.1 (localhost), so it is
484    // unreachable from any remote host.  The WebView origin varies by platform
485    // and Tauri version (tauri://localhost, http://localhost, null, etc.).
486    // Restricting to a hard-coded origin silently breaks all fetch() calls when
487    // the actual origin doesn't match — the root cause of the 135-second loading
488    // screen hang.  Security is provided by Tauri's capability / CSP layer, not
489    // by CORS headers on a local-only server.
490    let cors = CorsLayer::new()
491        .allow_origin(Any)
492        .allow_methods([axum::http::Method::GET, axum::http::Method::POST, axum::http::Method::PUT, axum::http::Method::DELETE])
493        .allow_headers(Any);
494
495    // Get JWT secret from environment or generate a default
496    let jwt_secret = std::env::var("JWT_SECRET")
497        .unwrap_or_else(|_| "aud-io-default-secret-change-in-production".to_string());
498
499    // Get users store from database
500    let users_store = state.shared_state.database_pool.users.clone();
501
502    // Initialize Google OAuth state.
503    //
504    // Resolution order (first non-empty value wins):
505    //  1. Compile-time constant via `option_env!()` — baked into the binary at `cargo build`.
506    //     Set these in your CI/CD pipeline or locally before running `cargo tauri build`.
507    //     End users of the shipped installer never need to set anything.
508    //  2. Runtime environment variable — useful during local development / debugging.
509    let google_oauth = {
510        let client_id = option_env!("GOOGLE_CLIENT_ID")
511            .map(|s| s.to_string())
512            .filter(|s| !s.is_empty())
513            .or_else(|| std::env::var("GOOGLE_CLIENT_ID").ok().filter(|s| !s.is_empty()));
514
515        let client_secret = option_env!("GOOGLE_CLIENT_SECRET")
516            .map(|s| s.to_string())
517            .filter(|s| !s.is_empty())
518            .or_else(|| std::env::var("GOOGLE_CLIENT_SECRET").ok().filter(|s| !s.is_empty()));
519
520        match (client_id, client_secret) {
521            (Some(id), Some(secret)) => {
522                tracing::info!(
523                    "Google OAuth configured (client_id: {}...)",
524                    &id[..id.len().min(12)]
525                );
526                Some(crate::api::auth_api::GoogleOAuthPending {
527                    states: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
528                    client_id: id,
529                    client_secret: secret,
530                })
531            }
532            _ => {
533                tracing::info!(
534                    "Google OAuth not configured — set GOOGLE_CLIENT_ID + GOOGLE_CLIENT_SECRET before building"
535                );
536                None
537            }
538        }
539    };
540
541    // Create and set auth state
542    state.auth_state = Some(Arc::new(crate::api::auth_api::AuthState {
543        users: users_store,
544        jwt_secret,
545        google: google_oauth,
546    }));
547
548    Router::new()
549        // Auth routes — email/password (legacy) + Google OAuth
550        .route("/auth/signup", post(crate::api::auth_api::signup))
551        .route("/auth/login", post(crate::api::auth_api::login))
552        .route("/auth/verify-email", post(crate::api::auth_api::verify_email))
553        .route("/auth/me", post(crate::api::auth_api::get_current_user))
554        // Google OAuth endpoints
555        .route("/auth/google/init", post(crate::api::auth_api::google_init))
556        .route("/auth/google/callback", get(crate::api::auth_api::google_callback))
557        .route("/auth/google/status", get(crate::api::auth_api::google_status))
558        // Core 1-hop streaming endpoint
559        .route("/generate/stream", post(crate::api::stream_api::generate_stream))
560        // Online mode streaming endpoint
561        .route("/online/stream", post(crate::api::online_api::online_stream))
562        // Title generation via shared memory -> LLM worker
563        .route("/generate/title", post(crate::api::title_api::generate_title))
564        // Conversation CRUD via shared memory -> database
565        .route("/conversations", get(crate::api::conversation_api::get_conversations))
566        .route("/conversations/db-stats", get(crate::api::conversation_api::get_conversations_db_stats))
567        .route("/conversations/:id", get(crate::api::conversation_api::get_conversation))
568        .route("/conversations/:id/title", put(crate::api::conversation_api::update_conversation_title))
569        .route("/conversations/:id/pinned", post(crate::api::conversation_api::update_conversation_pinned))
570        .route("/conversations/:id", delete(crate::api::conversation_api::delete_conversation))
571        // Model management endpoints
572        .route("/models", get(crate::api::model_api::list_models))
573        .route("/models/by-mode", get(crate::api::model_api::list_models_by_mode))
574        .route("/models/active", get(crate::api::model_api::get_active_model))
575        .route("/models/search", get(crate::api::model_api::search_models))
576        .route("/models/install", post(crate::api::model_api::install_model))
577        .route("/models/remove", delete(crate::api::model_api::remove_model))
578        .route("/models/progress", get(crate::api::model_api::get_download_progress))
579        .route("/models/downloads", get(crate::api::model_api::get_active_downloads))
580        .route("/models/downloads/cancel", post(crate::api::model_api::cancel_download))
581        .route("/models/downloads/pause", post(crate::api::model_api::pause_download))
582        .route("/models/downloads/resume", post(crate::api::model_api::resume_download))
583        .route("/models/recommendations", get(crate::api::model_api::get_recommended_models))
584        .route("/models/preferences", post(crate::api::model_api::update_preferences))
585        .route("/models/refresh", post(crate::api::model_api::refresh_models))
586        .route("/models/switch", post(crate::api::model_api::switch_model))
587        // Phase A: HF gated model access check
588        .route("/models/hf/access", get(crate::api::model_api::check_hf_access))
589        // Phase B: Full OpenRouter catalog (paginated + filtered) and quota
590        .route("/models/openrouter/catalog", get(crate::api::model_api::openrouter_catalog))
591        .route("/models/openrouter/quota", get(crate::api::model_api::openrouter_quota))
592        .route("/hardware/recommendations", get(crate::api::model_api::get_hardware_recommendations))
593        .route("/hardware/info", get(crate::api::model_api::get_hardware_info))
594        .route("/metrics/system", get(crate::api::model_api::get_system_metrics))
595        .route("/storage/metadata", get(crate::api::model_api::get_storage_metadata))
596        // API Keys management endpoints
597        .route("/api-keys", post(crate::api::api_keys_api::save_api_key))
598        .route("/api-keys", get(crate::api::api_keys_api::get_api_key))
599        .route("/api-keys/all", get(crate::api::api_keys_api::get_all_api_keys))
600        .route("/api-keys", delete(crate::api::api_keys_api::delete_api_key))
601        .route("/api-keys/mark-used", post(crate::api::api_keys_api::mark_key_used))
602        .route("/api-keys/verify", post(crate::api::api_keys_api::verify_api_key))
603        // Mode management endpoints (online/offline)
604        .route("/mode/switch", post(crate::api::mode_api::switch_mode))
605        .route("/mode/status", get(crate::api::mode_api::get_mode_status))
606        // Files API endpoints (database-backed with nested folder support)
607        .route("/files", get(crate::api::files_api::get_files))
608        .route("/files/all", get(crate::api::files_api::get_all_files))
609        .route("/files/search", get(crate::api::files_api::search_files))
610        .route("/files/folder", post(crate::api::files_api::create_folder))
611        .route("/files/upload", post(crate::api::files_api::upload_file))
612        .route("/files/sync", post(crate::api::files_api::sync_files))
613        .route("/files/resync", post(crate::api::files_api::resync_files))
614        .route("/files/:id", get(crate::api::files_api::get_file_by_id))
615        .route("/files/:id/content", get(crate::api::files_api::get_file_content))
616        .route("/files/:id", delete(crate::api::files_api::delete_file_by_id))
617        .route("/files", delete(crate::api::files_api::delete_file))
618        // All Files API endpoints (unlimited storage for all file formats)
619        .route("/all-files", get(crate::api::all_files_api::get_all_files))
620        .route("/all-files/all", get(crate::api::all_files_api::get_all_files_flat))
621        .route("/all-files/search", get(crate::api::all_files_api::search_all_files))
622        .route("/all-files/folder", post(crate::api::all_files_api::create_all_files_folder))
623        .route("/all-files/upload", post(crate::api::all_files_api::upload_all_file))
624        .route("/all-files/upload-structure", post(crate::api::all_files_api::upload_all_files_structure))
625        .route("/all-files/:id", get(crate::api::all_files_api::get_all_file_by_id))
626        .route("/all-files/:id/content", get(crate::api::all_files_api::get_all_file_content))
627        .route("/all-files/:id", delete(crate::api::all_files_api::delete_all_file_by_id))
628        .route("/all-files", delete(crate::api::all_files_api::delete_all_file))
629        // Feedback endpoint
630        .route("/feedback", post(crate::api::feedback_api::submit_feedback))
631        // Login notification endpoint
632        .route("/notify-login", post(crate::api::login_notification_api::notify_user_login))
633        // Attachment pre-extraction endpoint
634        .route("/attachments/preprocess", post(crate::api::attachment_api::preprocess_attachments))
635        // Metrics endpoint
636        .route("/metrics", get(crate::metrics::get_metrics))
637        .route("/healthz", get(health_check))
638        .route("/admin/shutdown", post(crate::admin::stop_backend))
639.layer(cors)
640        .layer(TraceLayer::new_for_http())
641        .layer(TimeoutLayer::new(Duration::from_secs(600)))
642        .layer(DefaultBodyLimit::max(50 * 1024 * 1024))
643        .with_state(state)
644}