Skip to main content

offline_intelligence/
thread_server.rs

1//! Thread-based server implementation
2//!
3//! This module provides the server startup that uses thread-based
4//! shared memory architecture. All API handlers access state through
5//! Arc-wrapped shared memory (UnifiedAppState) — zero network hops
6//! between components. The only network call is to the localhost llama-server.
7
8use std::sync::Arc;
9use tokio::sync::RwLock;
10use tracing::{info, warn, debug, error};
11use anyhow::anyhow;
12
13use crate::{
14    config::Config,
15    shared_state::{SharedState, UnifiedAppState},
16    thread_pool::{ThreadPool, ThreadPoolConfig},
17    worker_threads::{ContextWorker, CacheWorker, DatabaseWorker, LLMWorker},
18    memory_db::MemoryDatabase,
19    model_management::ModelManager,
20};
21
22/// Thread-based unified application state (internal, used during initialization)
23#[derive(Clone)]
24pub struct ThreadBasedAppState {
25    pub shared_state: Arc<SharedState>,
26    pub thread_pool: Arc<RwLock<Option<ThreadPool>>>,
27    pub context_worker: Arc<ContextWorker>,
28    pub cache_worker: Arc<CacheWorker>,
29    pub database_worker: Arc<DatabaseWorker>,
30    pub llm_worker: Arc<LLMWorker>,
31}
32
33/// Run server with thread-based architecture
34/// 
35/// # Arguments
36/// * `cfg` - Server configuration
37/// * `port_tx` - Optional channel to communicate the selected port back to caller
38///               This is used when the configured port is unavailable and a random port is selected
39pub async fn run_thread_server(cfg: Config, port_tx: Option<std::sync::mpsc::Sender<u16>>) -> anyhow::Result<()> {
40    crate::telemetry::init_tracing();
41    crate::metrics::init_metrics();
42    cfg.print_config();
43
44    info!("Starting thread-based server architecture");
45
46    // ── Phase 1: Fast setup (database + managers) ─────────────────────────────
47    // Everything here finishes in < 10 seconds so the port can be bound and
48    // communicated to the main thread well within its 60-second timeout.
49
50    // Initialize database
51    let memory_db_path = dirs::data_dir()
52        .unwrap_or_else(|| std::env::current_dir().unwrap_or_default())
53        .join("Aud.io")
54        .join("data")
55        .join("memory.db");
56
57    if let Some(parent) = memory_db_path.parent() {
58        if let Err(e) = std::fs::create_dir_all(parent) {
59            warn!("Failed to create data directory {:?}: {}", parent, e);
60        } else {
61            info!("Created data directory: {:?}", parent);
62        }
63    }
64
65    let memory_database = match MemoryDatabase::new(&memory_db_path) {
66        Ok(db) => {
67            info!("Memory database initialized at: {}", memory_db_path.display());
68            Arc::new(db)
69        }
70        Err(e) => {
71            // In-memory fallback means ALL user data (conversations, API key metadata,
72            // settings) is lost on every restart. This must be clearly visible.
73            error!(
74                "❌ CRITICAL: Failed to open SQLite database at {}: {}\n\
75                 Falling back to IN-MEMORY storage — all user data will be lost on exit.\n\
76                 Check that the directory is writable and no other process holds a lock on the file.",
77                memory_db_path.display(), e
78            );
79            Arc::new(MemoryDatabase::new_in_memory()?)
80        }
81    };
82
83    // Shared state
84    let mut shared_state = SharedState::new(cfg.clone(), memory_database.clone())?;
85
86    // Model Manager (catalog scan, usually < 3 s)
87    info!("📦 Initializing Model Manager");
88    match ModelManager::new() {
89        Ok(model_manager) => {
90            let model_manager_arc = Arc::new(model_manager);
91            if let Err(e) = model_manager_arc.initialize(&cfg).await {
92                warn!("⚠️  Model manager initialization failed: {}", e);
93                shared_state.model_manager = Some(model_manager_arc);
94            } else {
95                info!("✅ Model manager initialized successfully");
96                shared_state.model_manager = Some(model_manager_arc);
97            }
98        }
99        Err(e) => warn!("⚠️  Failed to create model manager: {}", e),
100    }
101
102    // Engine Manager - BLOCK on startup until engine is ready (like Ollama)
103    // This ensures the app is fully ready before accepting connections
104    info!("⚙️  Initializing Engine Manager (blocking until ready)...");
105    match crate::engine_management::EngineManager::new() {
106        Ok(engine_manager) => {
107            let engine_manager_arc = Arc::new(engine_manager);
108            match engine_manager_arc.initialize(&cfg).await {
109                Ok(true) => {
110                    info!("✅ Engine manager initialized with engine ready");
111                    shared_state.engine_manager = Some(engine_manager_arc.clone());
112                    shared_state.engine_available.store(true, std::sync::atomic::Ordering::Relaxed);
113                }
114                Ok(false) => {
115                    // No engine binary found.  Do NOT block here — the port must be
116                    // bound quickly so main.rs doesn't time out.  The user can
117                    // download an engine from the Models page after the app opens.
118                    info!("⏳ No engine found — starting in online-only mode. Download from the Models page.");
119                    shared_state.engine_manager = Some(engine_manager_arc);
120                    shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
121                }
122                Err(e) => {
123                    warn!("⚠️  Engine manager scan failed: {}", e);
124                    shared_state.engine_manager = Some(engine_manager_arc);
125                    shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
126                }
127            }
128        }
129        Err(e) => {
130            error!("❌ Failed to create engine manager: {}", e);
131            shared_state.engine_available.store(false, std::sync::atomic::Ordering::Relaxed);
132        }
133    }
134
135    let shared_state = Arc::new(shared_state);
136
137    // Workers (fast construction)
138    let _context_worker: Arc<ContextWorker> = Arc::new(ContextWorker::new(shared_state.clone()));
139    let _cache_worker: Arc<CacheWorker> = Arc::new(CacheWorker::new(shared_state.clone()));
140    let _database_worker: Arc<DatabaseWorker> = Arc::new(DatabaseWorker::new(shared_state.clone()));
141    let _llm_worker = shared_state.llm_worker.clone();
142
143    // Cache manager — derive config from model context window, wire in LLM worker
144    let cache_manager = match crate::cache_management::create_default_cache_manager(
145        crate::cache_management::KVCacheConfig::from_ctx_size(cfg.ctx_size),
146        memory_database.clone(),
147        Some(shared_state.llm_worker.clone()),
148    ) {
149        Ok(manager) => { info!("Cache manager initialized"); Some(Arc::new(manager)) }
150        Err(e) => { warn!("Cache manager failed: {}, disabled", e); None }
151    };
152    {
153        let mut g = shared_state.cache_manager.write()
154            .map_err(|_| anyhow::anyhow!("Failed to acquire cache manager write lock"))?;
155        *g = cache_manager;
156    }
157
158    // Embedding index (fast, reads disk)
159    if let Err(e) = shared_state.database_pool.embeddings.initialize_index("llama-server") {
160        debug!("Embedding index init: {} (will build on first store)", e);
161    } else {
162        info!("Embedding HNSW index loaded from existing data");
163    }
164
165    // Thread pool
166    let thread_pool_config = ThreadPoolConfig::new(&cfg);
167    let mut thread_pool = ThreadPool::new(thread_pool_config, shared_state.clone());
168    thread_pool.start().await?;
169
170    // ── Phase 2: Bind port immediately ────────────────────────────────────────
171    // Port is bound NOW, before any slow I/O, so the main thread's
172    // 60-second actual_port_rx timeout is satisfied within seconds.
173
174    let unified_state = UnifiedAppState::new(shared_state.clone());
175    let app = build_compatible_router(unified_state);
176
177    let (listener, selected_port) = match try_bind_port(&cfg.api_host, cfg.api_port).await {
178        Ok(listener) => {
179            let port = listener.local_addr()?.port();
180            info!("✅ HTTP server bound to {}:{}", cfg.api_host, port);
181            (listener, port)
182        }
183        Err(e) => {
184            warn!("⚠️ Failed to bind to port {}: {}", cfg.api_port, e);
185            warn!("🔄 Scanning 8002-8999 for available port...");
186            let mut last_error = None;
187            let mut found_listener = None;
188            let mut found_port = 0u16;
189            for port in 8002u16..=8999 {
190                match try_bind_port(&cfg.api_host, port).await {
191                    Ok(listener) => {
192                        found_port = listener.local_addr()?.port();
193                        info!("✅ HTTP server bound to alternative port {}", found_port);
194                        if let Ok(mut g) = shared_state.http_port.write() { *g = found_port; }
195                        found_listener = Some(listener);
196                        break;
197                    }
198                    Err(e) => { last_error = Some(e); }
199                }
200            }
201            let listener = found_listener.ok_or_else(|| anyhow!(
202                "Failed to find available port after scanning 8002-8999.\n  Last error: {:?}\n  Hints: disable firewall, close other Aud.io instances, or run as Administrator.",
203                last_error
204            ))?;
205            (listener, found_port)
206        }
207    };
208
209    // Send port to main thread — satisfies the 60-second actual_port_rx timeout.
210    if let Some(ref tx) = port_tx {
211        if let Err(e) = tx.send(selected_port) {
212            warn!("Failed to send port to main thread: {}", e);
213        } else {
214            info!("✅ Port {} communicated to main thread", selected_port);
215        }
216    }
217    info!("🌐 Server will accept connections on port {}", selected_port);
218
219    // ── Phase 3: Slow runtime init in background ──────────────────────────────
220    // Starting llama-server and waiting for it to be healthy can take 30-120 s.
221    // We do this in a background task; the HTTP server starts immediately and
222    // returns {"status":"initializing"} until mark_initialization_complete() fires.
223    {
224        let shared_state_bg = shared_state.clone();
225        let cfg_bg = cfg.clone();
226        let memory_database_bg = memory_database.clone();
227        tokio::spawn(async move {
228            // ── Mark initialization complete IMMEDIATELY ──────────────────────────
229            // The health endpoint now returns "degraded" right away (init done, no
230            // model loaded yet). The frontend LoadingScreen accepts "degraded" and
231            // opens the app; the local model auto-load continues in the background.
232            // This must be the very first statement so the ~100 ms polling window
233            // in LoadingScreen.tsx resolves on the first tick after axum starts.
234            shared_state_bg.mark_initialization_complete();
235            info!("✅ Backend marked as initialized — frontend may proceed");
236
237            // Context orchestrator — token limits derived from model's ctx_size
238            let context_orchestrator = match crate::context_engine::create_default_orchestrator(
239                memory_database_bg,
240                cfg_bg.ctx_size,
241            ).await {
242                Ok(mut orchestrator) => {
243                    orchestrator.set_llm_worker(shared_state_bg.llm_worker.clone());
244                    info!("Context orchestrator initialized");
245                    Some(orchestrator)
246                }
247                Err(e) => {
248                    warn!("Context orchestrator failed: {}. Memory features disabled.", e);
249                    None
250                }
251            };
252            {
253                let mut g = shared_state_bg.context_orchestrator.write().await;
254                *g = context_orchestrator;
255            }
256
257            // Runtime Manager — this is the slow part (starts llama-server)
258            info!("🚀 Initializing Runtime Manager");
259            let runtime_manager = Arc::new(crate::model_runtime::RuntimeManager::new());
260            let runtime_config = crate::model_runtime::RuntimeConfig {
261                model_path: std::path::PathBuf::from(&cfg_bg.model_path),
262                format: crate::model_runtime::ModelFormat::GGUF,
263                host: cfg_bg.llama_host.clone(),
264                port: cfg_bg.llama_port,
265                context_size: cfg_bg.ctx_size,
266                batch_size: cfg_bg.batch_size,
267                threads: cfg_bg.threads,
268                gpu_layers: cfg_bg.gpu_layers,
269                parallel_slots: cfg_bg.parallel_slots,
270                ubatch_size: cfg_bg.ubatch_size,
271                runtime_binary: if cfg_bg.llama_bin.is_empty() { None } else { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) },
272                draft_model_path: if cfg_bg.draft_model_path == "none" || cfg_bg.draft_model_path.is_empty() {
273                    None
274                } else {
275                    Some(std::path::PathBuf::from(&cfg_bg.draft_model_path))
276                },
277                speculative_draft_max: cfg_bg.speculative_draft_max,
278                speculative_draft_p_min: cfg_bg.speculative_draft_p_min,
279                extra_config: serde_json::json!({}),
280            };
281
282            // Check whether an engine binary exists (registry OR bundled config binary)
283            let llama_bin_exists = !cfg_bg.llama_bin.is_empty()
284                && std::path::Path::new(&cfg_bg.llama_bin).exists();
285            let has_engine = if let Some(ref em) = shared_state_bg.engine_manager {
286                let reg = em.registry.read().await;
287                reg.get_default_engine_binary_path().is_some() || llama_bin_exists
288            } else {
289                llama_bin_exists
290            };
291
292            // Always store the RuntimeManager so that switch_model can work
293            // even when no engine is currently installed (user can download later).
294            if let Err(e) = shared_state_bg.set_runtime_manager(runtime_manager.clone()) {
295                error!("❌ Failed to set runtime manager: {}", e);
296            }
297            shared_state_bg.llm_worker.set_runtime_manager(runtime_manager.clone());
298            info!("🔗 LLM worker linked to runtime manager");
299
300            if has_engine {
301                // Try to auto-load the last used model
302                let last_model_loaded = 'load: {
303                    let Some(data_dir) = dirs::data_dir() else { break 'load false; };
304                    let last_model_path = data_dir.join("Aud.io").join("last_model.txt");
305                    let Ok(last_model_id_raw) = std::fs::read_to_string(&last_model_path) else {
306                        info!("ℹ️  No last used model found");
307                        break 'load false;
308                    };
309                    let last_model_id = last_model_id_raw.trim().to_string();
310                    info!("🔄 Found last used model: {}", last_model_id);
311
312                    let Some(ref model_manager) = shared_state_bg.model_manager else {
313                        info!("ℹ️  Model manager not available - skipping auto-load");
314                        break 'load false;
315                    };
316                    let registry = model_manager.registry.read().await;
317                    let Some(model_info) = registry.get_model(&last_model_id) else {
318                        drop(registry);
319                        warn!("⚠️  Last used model not found in registry: {}", last_model_id);
320                        break 'load false;
321                    };
322                    if model_info.status != crate::model_management::registry::ModelStatus::Installed {
323                        drop(registry);
324                        info!("ℹ️  Last used model not installed");
325                        break 'load false;
326                    }
327                    let Some(ref filename) = model_info.filename else {
328                        drop(registry);
329                        warn!("⚠️  Last used model has no filename");
330                        break 'load false;
331                    };
332                    let model_path_for_runtime = model_manager.storage.model_path(&last_model_id, filename);
333                    drop(registry);
334
335                    if !model_path_for_runtime.exists() {
336                        warn!("⚠️  Last used model file not found: {}", model_path_for_runtime.display());
337                        break 'load false;
338                    }
339                    info!("✅ Auto-loading last used model from: {}", model_path_for_runtime.display());
340
341                    let default_engine = if let Some(ref em) = shared_state_bg.engine_manager {
342                        let reg = em.registry.read().await;
343                        reg.get_default_engine_binary_path()
344                            .or_else(|| if !cfg_bg.llama_bin.is_empty() { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) } else { None })
345                    } else if !cfg_bg.llama_bin.is_empty() {
346                        Some(std::path::PathBuf::from(&cfg_bg.llama_bin))
347                    } else { None };
348
349                    let mut updated_config = runtime_config.clone();
350                    updated_config.model_path = model_path_for_runtime;
351                    updated_config.runtime_binary = default_engine;
352
353                    match runtime_manager.initialize_auto(updated_config).await {
354                        Ok(base_url) => {
355                            info!("✅ Last used model auto-loaded at {}", base_url);
356                            match runtime_manager.health_check().await {
357                                Ok(status) => { info!("✅ Runtime health check passed: {}", status); true }
358                                Err(e) => { warn!("⚠️  Runtime health check failed: {}", e); false }
359                            }
360                        }
361                        Err(e) => { warn!("⚠️  Failed to auto-load last used model: {}", e); false }
362                    }
363                };
364
365                if !last_model_loaded && !cfg_bg.model_path.is_empty() {
366                    let default_engine = if let Some(ref em) = shared_state_bg.engine_manager {
367                        let reg = em.registry.read().await;
368                        reg.get_default_engine_binary_path()
369                            .or_else(|| if !cfg_bg.llama_bin.is_empty() { Some(std::path::PathBuf::from(&cfg_bg.llama_bin)) } else { None })
370                    } else if !cfg_bg.llama_bin.is_empty() {
371                        Some(std::path::PathBuf::from(&cfg_bg.llama_bin))
372                    } else { None };
373                    let mut updated_config = runtime_config;
374                    updated_config.runtime_binary = default_engine;
375                    info!("🚀 Initializing runtime with config model path...");
376                    match runtime_manager.initialize_auto(updated_config).await {
377                        Ok(base_url) => {
378                            info!("✅ Runtime initialized at {}", base_url);
379                            shared_state_bg.llm_worker.set_runtime_manager(runtime_manager);
380                        }
381                        Err(e) => warn!("⚠️  Runtime initialization failed: {}. Online-only mode.", e),
382                    }
383                }
384            } else {
385                info!("⏳ No engine found - starting in online-only mode");
386            }
387
388            info!("✅ Background initialization complete");
389        });
390    }
391
392    // Spawn attachment cache eviction task.
393    // Runs every 5 minutes and removes entries older than 30 minutes so the
394    // DashMap doesn't grow unboundedly when users attach many files without sending.
395    {
396        let cache = shared_state.attachment_cache.clone();
397        tokio::spawn(async move {
398            let interval = std::time::Duration::from_secs(300); // 5 minutes
399            loop {
400                tokio::time::sleep(interval).await;
401                let before = cache.len();
402                cache.retain(|_, v: &mut crate::shared_state::PreExtracted| {
403                    !v.is_stale(crate::api::attachment_api::CACHE_TTL_SECS)
404                });
405                let removed = before - cache.len();
406                if removed > 0 {
407                    info!("Attachment cache eviction: removed {} stale entries", removed);
408                }
409            }
410        });
411    }
412
413    // Start server — this blocks until the process exits.
414    info!("🟢 Axum server starting on port {}...", selected_port);
415    if let Err(e) = axum::serve(listener, app).await {
416        error!("Axum server error: {}", e);
417    }
418    
419    info!("Axum server stopped");
420    Ok(())
421}
422
423/// Try to bind to a specific port, returning the listener if successful
424async fn try_bind_port(host: &str, port: u16) -> anyhow::Result<tokio::net::TcpListener> {
425    let addr = format!("{}:{}", host, port);
426    match tokio::net::TcpListener::bind(&addr).await {
427        Ok(listener) => Ok(listener),
428        Err(e) if e.kind() == std::io::ErrorKind::AddrInUse => {
429            Err(anyhow::anyhow!("Port {} is already in use", port))
430        }
431        Err(e) => Err(anyhow::anyhow!("Failed to bind to {}: {}", addr, e)),
432    }
433}
434
435/// Health response structure with detailed runtime status
436#[derive(serde::Serialize)]
437struct HealthResponse {
438    status: String,  // "ready", "initializing", "degraded"
439    runtime_ready: bool,
440    message: Option<String>,
441}
442
443/// Health check handler that verifies backend is fully initialized AND runtime is ready
444async fn health_check(axum::extract::State(state): axum::extract::State<UnifiedAppState>) -> axum::response::Response {
445    use axum::Json;
446    use axum::response::IntoResponse;
447
448    // Check if backend initialization is complete
449    if !state.shared_state.is_initialization_complete() {
450        return Json(HealthResponse {
451            status: "initializing".to_string(),
452            runtime_ready: false,
453            message: Some("Backend initializing...".to_string()),
454        })
455        .into_response();
456    }
457
458    // Check if runtime is actually ready for inference
459    let runtime_ready = state.shared_state.llm_worker.is_runtime_ready().await;
460
461    let (status, message) = if runtime_ready {
462        ("ready", None)
463    } else {
464        (
465            "degraded",
466            Some("No model loaded. Please activate a model from the Models page.".to_string())
467        )
468    };
469
470    Json(HealthResponse {
471        status: status.to_string(),
472        runtime_ready,
473        message,
474    })
475    .into_response()
476}
477
478/// Build router for 1-hop architecture
479fn build_compatible_router(mut state: UnifiedAppState) -> axum::Router {
480    use axum::{
481        Router,
482        routing::{get, post, put, delete},
483        extract::DefaultBodyLimit,
484    };
485    use tower_http::{
486        cors::{Any, CorsLayer},
487        trace::TraceLayer,
488        timeout::TimeoutLayer,
489    };
490    use std::time::Duration;
491
492    // Allow any origin in all builds.
493    //
494    // Rationale: this server only ever binds to 127.0.0.1 (localhost), so it is
495    // unreachable from any remote host.  The WebView origin varies by platform
496    // and Tauri version (tauri://localhost, http://localhost, null, etc.).
497    // Restricting to a hard-coded origin silently breaks all fetch() calls when
498    // the actual origin doesn't match — the root cause of the 135-second loading
499    // screen hang.  Security is provided by Tauri's capability / CSP layer, not
500    // by CORS headers on a local-only server.
501    let cors = CorsLayer::new()
502        .allow_origin(Any)
503        .allow_methods([axum::http::Method::GET, axum::http::Method::POST, axum::http::Method::PUT, axum::http::Method::DELETE])
504        .allow_headers(Any);
505
506    // Get JWT secret from environment or generate a default
507    let jwt_secret = std::env::var("JWT_SECRET")
508        .unwrap_or_else(|_| "aud-io-default-secret-change-in-production".to_string());
509
510    // Get users store from database
511    let users_store = state.shared_state.database_pool.users.clone();
512
513    // Initialize Google OAuth state.
514    //
515    // Resolution order (first non-empty value wins):
516    //  1. Compile-time constant via `option_env!()` — baked into the binary at `cargo build`.
517    //     Set these in your CI/CD pipeline or locally before running `cargo tauri build`.
518    //     End users of the shipped installer never need to set anything.
519    //  2. Runtime environment variable — useful during local development / debugging.
520    let google_oauth = {
521        let client_id = option_env!("GOOGLE_CLIENT_ID")
522            .map(|s| s.to_string())
523            .filter(|s| !s.is_empty())
524            .or_else(|| std::env::var("GOOGLE_CLIENT_ID").ok().filter(|s| !s.is_empty()));
525
526        let client_secret = option_env!("GOOGLE_CLIENT_SECRET")
527            .map(|s| s.to_string())
528            .filter(|s| !s.is_empty())
529            .or_else(|| std::env::var("GOOGLE_CLIENT_SECRET").ok().filter(|s| !s.is_empty()));
530
531        match (client_id, client_secret) {
532            (Some(id), Some(secret)) => {
533                tracing::info!(
534                    "Google OAuth configured (client_id: {}...)",
535                    &id[..id.len().min(12)]
536                );
537                Some(crate::api::auth_api::GoogleOAuthPending {
538                    states: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
539                    client_id: id,
540                    client_secret: secret,
541                })
542            }
543            _ => {
544                tracing::info!(
545                    "Google OAuth not configured — set GOOGLE_CLIENT_ID + GOOGLE_CLIENT_SECRET before building"
546                );
547                None
548            }
549        }
550    };
551
552    // Create and set auth state
553    state.auth_state = Some(Arc::new(crate::api::auth_api::AuthState {
554        users: users_store,
555        jwt_secret,
556        google: google_oauth,
557    }));
558
559    Router::new()
560        // Auth routes — email/password (legacy) + Google OAuth
561        .route("/auth/signup", post(crate::api::auth_api::signup))
562        .route("/auth/login", post(crate::api::auth_api::login))
563        .route("/auth/verify-email", post(crate::api::auth_api::verify_email))
564        .route("/auth/me", post(crate::api::auth_api::get_current_user))
565        // Google OAuth endpoints
566        .route("/auth/google/init", post(crate::api::auth_api::google_init))
567        .route("/auth/google/callback", get(crate::api::auth_api::google_callback))
568        .route("/auth/google/status", get(crate::api::auth_api::google_status))
569        // Core 1-hop streaming endpoint
570        .route("/generate/stream", post(crate::api::stream_api::generate_stream))
571        // Online mode streaming endpoint
572        .route("/online/stream", post(crate::api::online_api::online_stream))
573        // Title generation via shared memory -> LLM worker
574        .route("/generate/title", post(crate::api::title_api::generate_title))
575        // Conversation CRUD via shared memory -> database
576        .route("/conversations", get(crate::api::conversation_api::get_conversations))
577        .route("/conversations/db-stats", get(crate::api::conversation_api::get_conversations_db_stats))
578        .route("/conversations/:id", get(crate::api::conversation_api::get_conversation))
579        .route("/conversations/:id/title", put(crate::api::conversation_api::update_conversation_title))
580        .route("/conversations/:id/pinned", post(crate::api::conversation_api::update_conversation_pinned))
581        .route("/conversations/:id", delete(crate::api::conversation_api::delete_conversation))
582        // Model management endpoints
583        .route("/models", get(crate::api::model_api::list_models))
584        .route("/models/by-mode", get(crate::api::model_api::list_models_by_mode))
585        .route("/models/active", get(crate::api::model_api::get_active_model))
586        .route("/models/search", get(crate::api::model_api::search_models))
587        .route("/models/install", post(crate::api::model_api::install_model))
588        .route("/models/remove", delete(crate::api::model_api::remove_model))
589        .route("/models/progress", get(crate::api::model_api::get_download_progress))
590        .route("/models/downloads", get(crate::api::model_api::get_active_downloads))
591        .route("/models/downloads/cancel", post(crate::api::model_api::cancel_download))
592        .route("/models/downloads/pause", post(crate::api::model_api::pause_download))
593        .route("/models/downloads/resume", post(crate::api::model_api::resume_download))
594        .route("/models/recommendations", get(crate::api::model_api::get_recommended_models))
595        .route("/models/preferences", post(crate::api::model_api::update_preferences))
596        .route("/models/refresh", post(crate::api::model_api::refresh_models))
597        .route("/models/switch", post(crate::api::model_api::switch_model))
598        // Phase A: HF gated model access check
599        .route("/models/hf/access", get(crate::api::model_api::check_hf_access))
600        // Phase B: Full OpenRouter catalog (paginated + filtered) and quota
601        .route("/models/openrouter/catalog", get(crate::api::model_api::openrouter_catalog))
602        .route("/models/openrouter/quota", get(crate::api::model_api::openrouter_quota))
603        .route("/hardware/recommendations", get(crate::api::model_api::get_hardware_recommendations))
604        .route("/hardware/info", get(crate::api::model_api::get_hardware_info))
605        .route("/metrics/system", get(crate::api::model_api::get_system_metrics))
606        .route("/storage/metadata", get(crate::api::model_api::get_storage_metadata))
607        // API Keys management endpoints
608        .route("/api-keys", post(crate::api::api_keys_api::save_api_key))
609        .route("/api-keys", get(crate::api::api_keys_api::get_api_key))
610        .route("/api-keys/all", get(crate::api::api_keys_api::get_all_api_keys))
611        .route("/api-keys", delete(crate::api::api_keys_api::delete_api_key))
612        .route("/api-keys/mark-used", post(crate::api::api_keys_api::mark_key_used))
613        .route("/api-keys/verify", post(crate::api::api_keys_api::verify_api_key))
614        // Mode management endpoints (online/offline)
615        .route("/mode/switch", post(crate::api::mode_api::switch_mode))
616        .route("/mode/status", get(crate::api::mode_api::get_mode_status))
617        // Files API endpoints (database-backed with nested folder support)
618        .route("/files", get(crate::api::files_api::get_files))
619        .route("/files/all", get(crate::api::files_api::get_all_files))
620        .route("/files/search", get(crate::api::files_api::search_files))
621        .route("/files/folder", post(crate::api::files_api::create_folder))
622        .route("/files/upload", post(crate::api::files_api::upload_file))
623        .route("/files/sync", post(crate::api::files_api::sync_files))
624        .route("/files/resync", post(crate::api::files_api::resync_files))
625        .route("/files/:id", get(crate::api::files_api::get_file_by_id))
626        .route("/files/:id/content", get(crate::api::files_api::get_file_content))
627        .route("/files/:id", delete(crate::api::files_api::delete_file_by_id))
628        .route("/files", delete(crate::api::files_api::delete_file))
629        // All Files API endpoints (unlimited storage for all file formats)
630        .route("/all-files", get(crate::api::all_files_api::get_all_files))
631        .route("/all-files/all", get(crate::api::all_files_api::get_all_files_flat))
632        .route("/all-files/search", get(crate::api::all_files_api::search_all_files))
633        .route("/all-files/folder", post(crate::api::all_files_api::create_all_files_folder))
634        .route("/all-files/upload", post(crate::api::all_files_api::upload_all_file))
635        .route("/all-files/upload-structure", post(crate::api::all_files_api::upload_all_files_structure))
636        .route("/all-files/:id", get(crate::api::all_files_api::get_all_file_by_id))
637        .route("/all-files/:id/content", get(crate::api::all_files_api::get_all_file_content))
638        .route("/all-files/:id", delete(crate::api::all_files_api::delete_all_file_by_id))
639        .route("/all-files", delete(crate::api::all_files_api::delete_all_file))
640        // Feedback endpoint
641        .route("/feedback", post(crate::api::feedback_api::submit_feedback))
642        // Login notification endpoint
643        .route("/notify-login", post(crate::api::login_notification_api::notify_user_login))
644        // Attachment pre-extraction endpoint
645        .route("/attachments/preprocess", post(crate::api::attachment_api::preprocess_attachments))
646        // Metrics endpoint
647        .route("/metrics", get(crate::metrics::get_metrics))
648        .route("/healthz", get(health_check))
649        .route("/admin/shutdown", post(crate::admin::stop_backend))
650.layer(cors)
651        .layer(TraceLayer::new_for_http())
652        .layer(TimeoutLayer::new(Duration::from_secs(600)))
653        .layer(DefaultBodyLimit::max(50 * 1024 * 1024))
654        .with_state(state)
655}