Skip to main content

kindling_server/
lib.rs

1//! kindling daemon — HTTP API over a Unix domain socket.
2//!
3//! A long-running per-user process that serves the kindling v1 HTTP API,
4//! wrapping the in-process [`kindling_service::KindlingService`]. This is the
5//! `kindling serve` backend; the CLI wiring lives in a later task (PORT-013).
6//! This crate exposes a library surface so it can be both unit/integration
7//! tested and driven by the future CLI.
8//!
9//! # v1 HTTP API
10//!
11//! ```text
12//! GET    /v1/health                  → 200 { version, schemaVersion, projects: [...] }
13//! POST   /v1/capsules                → 201 Capsule
14//! GET    /v1/capsules/open?sessionId → 200 Capsule | null
15//! PATCH  /v1/capsules/:id/close      → 200 Capsule
16//! POST   /v1/observations            → 201 Observation
17//! POST   /v1/observations/:id/forget  → 204 (redact an observation)
18//! POST   /v1/retrieve                → 200 RetrieveResult
19//! POST   /v1/pins                    → 201 Pin
20//! DELETE /v1/pins/:id                → 204
21//! POST   /v1/context/session-start   → 200 { additionalContext: string | null }
22//! POST   /v1/context/pre-compact     → 200 { additionalContext: string | null }
23//! ```
24//!
25//! Request bodies are camelCase JSON; response bodies serialize the domain
26//! types (already camelCase). See [`dto`] for the request shapes. The
27//! `/v1/context/*` endpoints assemble AND format the injected-context markdown
28//! server-side (the byte-for-byte date/markdown logic lives in [`inject`]).
29//!
30//! # Per-project routing
31//!
32//! Every data endpoint requires the `X-Kindling-Project` header. Its value is
33//! the **project root string**; the daemon derives the SQLite DB via
34//! [`kindling_store::project_db_path`] under [`ServerConfig::kindling_home`]
35//! and caches one service per project. `/v1/health` needs no header; any other
36//! endpoint without it returns `400`.
37//!
38//! # Lifecycle
39//!
40//! [`serve`] acquires a PID lock (cleaning up a stale file — see [`pid`]), binds
41//! the UDS at mode `0600`, builds the router, and runs until idle. The daemon
42//! shuts down after [`ServerConfig::idle_timeout`] of no in-flight and no
43//! recent requests, then removes the socket and PID file.
44
45mod config;
46mod dto;
47mod error;
48mod handlers;
49pub mod inject;
50mod pid;
51mod state;
52
53pub use config::{ServerConfig, Transport, DEFAULT_IDLE_TIMEOUT};
54pub use error::{ApiError, ServerError};
55pub use handlers::{PROJECT_HEADER, SESSION_HEADER};
56pub use pid::{acquire_pid_lock, PidGuard};
57pub use state::AppState;
58
59use std::sync::Arc;
60use std::time::Duration;
61
62use axum::routing::{delete, patch, post};
63use axum::Router;
64
65/// Build the v1 API router over the given [`AppState`].
66///
67/// Exposed so integration tests (and the future CLI) can drive routes either
68/// through the full [`serve`] over a temp socket or by serving this router
69/// directly. An activity-tracking middleware updates the idle clock on every
70/// request.
71pub fn build_router(state: AppState) -> Router {
72    let activity = Arc::clone(state.activity());
73    Router::new()
74        .route("/v1/health", axum::routing::get(handlers::health))
75        .route("/v1/capsules", post(handlers::open_capsule))
76        .route(
77            "/v1/capsules/open",
78            axum::routing::get(handlers::get_open_capsule),
79        )
80        .route("/v1/capsules/{id}/close", patch(handlers::close_capsule))
81        .route("/v1/observations", post(handlers::append_observation))
82        .route(
83            "/v1/observations/{id}/forget",
84            post(handlers::forget_observation),
85        )
86        .route("/v1/retrieve", post(handlers::retrieve))
87        .route("/v1/pins", post(handlers::create_pin))
88        .route("/v1/pins/{id}", delete(handlers::unpin))
89        .route(
90            "/v1/context/session-start",
91            post(handlers::session_start_context),
92        )
93        .route(
94            "/v1/context/pre-compact",
95            post(handlers::pre_compact_context),
96        )
97        .layer(axum::middleware::from_fn(
98            move |req, next: axum::middleware::Next| {
99                let activity = Arc::clone(&activity);
100                async move {
101                    activity.enter();
102                    let response = next.run(req).await;
103                    activity.leave();
104                    response
105                }
106            },
107        ))
108        .with_state(state)
109}
110
111/// Run the daemon to completion: acquire the PID lock, bind the UDS at mode
112/// `0600`, serve the v1 API, and shut down on idle — cleaning up the socket and
113/// PID file on exit.
114///
115/// Resolves `Ok(())` on a clean idle shutdown, so callers (and tests) can wrap
116/// it in a `tokio::time::timeout`.
117pub async fn serve(config: ServerConfig) -> Result<(), ServerError> {
118    let _pid_guard = acquire_pid_lock(&config.pid_path)?;
119    let state = AppState::new(config.kindling_home.clone());
120    let app = build_router(state.clone());
121
122    match config.transport {
123        #[cfg(unix)]
124        Transport::Uds => {
125            serve_on_uds(&config, app, state.activity().clone()).await?;
126            // Best-effort socket cleanup; the PID guard removes the PID file on
127            // drop.
128            let _ = remove_socket(&config.socket_path);
129        }
130        Transport::Tcp => {
131            serve_on_tcp(&config, app, state.activity().clone()).await?;
132        }
133    }
134    Ok(())
135}
136
137/// Idle-shutdown future: resolves once the daemon has been idle for
138/// `idle_timeout`. Polled at a fraction of the timeout (min 25ms) so short
139/// test timeouts still fire promptly.
140async fn wait_until_idle(activity: Arc<state::Activity>, idle_timeout: Duration) {
141    let poll = idle_timeout
142        .checked_div(4)
143        .unwrap_or(idle_timeout)
144        .max(Duration::from_millis(25));
145    loop {
146        tokio::time::sleep(poll).await;
147        if activity.is_idle_for(idle_timeout) {
148            return;
149        }
150    }
151}
152
153#[cfg(unix)]
154async fn serve_on_uds(
155    config: &ServerConfig,
156    app: Router,
157    activity: Arc<state::Activity>,
158) -> Result<(), ServerError> {
159    use std::os::unix::fs::PermissionsExt;
160    use tokio::net::UnixListener;
161
162    // A leftover socket from an unclean shutdown would make bind fail with
163    // EADDRINUSE. Remove it first (the PID lock already guarantees no live
164    // daemon is using it).
165    let _ = remove_socket(&config.socket_path);
166    if let Some(parent) = config.socket_path.parent() {
167        if !parent.as_os_str().is_empty() {
168            std::fs::create_dir_all(parent)?;
169            // Defence in depth: the socket is chmod'd to 0600 only after bind,
170            // so for a brief window it carries the process umask. Lock the
171            // containing directory to the owner (0700) so no other local user
172            // can reach the socket during that window — filesystem permissions
173            // are the daemon's only authn (per the design spec).
174            std::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700))?;
175        }
176    }
177
178    let listener = UnixListener::bind(&config.socket_path)?;
179    // Restrict the socket to the owning user (0600) after bind, before serving.
180    std::fs::set_permissions(&config.socket_path, std::fs::Permissions::from_mode(0o600))?;
181
182    let idle_timeout = config.idle_timeout;
183    axum::serve(listener, app)
184        .with_graceful_shutdown(wait_until_idle(activity, idle_timeout))
185        .await?;
186    Ok(())
187}
188
189/// Serve over loopback TCP on an ephemeral `127.0.0.1` port.
190///
191/// Compiled on all platforms (it is the Windows default, and is exercised by
192/// the Linux test suite). Binds `127.0.0.1:0`, reads back the OS-assigned port,
193/// and publishes it as decimal text to [`ServerConfig::port_path`] so the
194/// client can discover where to connect — TCP has no filesystem rendezvous like
195/// a UDS path. The port file is removed (best-effort) on shutdown.
196async fn serve_on_tcp(
197    config: &ServerConfig,
198    app: Router,
199    activity: Arc<state::Activity>,
200) -> Result<(), ServerError> {
201    use tokio::net::TcpListener;
202
203    let listener = TcpListener::bind(("127.0.0.1", 0)).await?;
204    let port = listener.local_addr()?.port();
205
206    if let Some(parent) = config.port_path.parent() {
207        if !parent.as_os_str().is_empty() {
208            std::fs::create_dir_all(parent)?;
209        }
210    }
211    std::fs::write(&config.port_path, port.to_string())?;
212
213    let idle_timeout = config.idle_timeout;
214    let serve_result = axum::serve(listener, app)
215        .with_graceful_shutdown(wait_until_idle(activity, idle_timeout))
216        .await;
217
218    // Best-effort port-file cleanup; mirrors the UDS socket cleanup.
219    let _ = remove_socket(&config.port_path);
220    serve_result?;
221    Ok(())
222}
223
224fn remove_socket(path: &std::path::Path) -> std::io::Result<()> {
225    match std::fs::remove_file(path) {
226        Ok(()) => Ok(()),
227        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
228        Err(e) => Err(e),
229    }
230}