Skip to main content

kintsugi_daemon/
lib.rs

1//! Kintsugi resident daemon library.
2//!
3//! Long-lived process that owns the event log and runs the decision loop. The
4//! interception layer connects over a local socket, sends a [`ProposedCommand`],
5//! and blocks on the returned [`Verdict`].
6//!
7//! In Phase 0 the daemon is a pure recorder: it logs every proposal and allows
8//! it. The Tier-1 rule engine (Phase 1) plugs into [`Daemon::decide`] without
9//! changing the IPC or logging paths.
10
11#![forbid(unsafe_code)]
12
13pub mod ipc;
14pub mod watch;
15
16use std::path::PathBuf;
17
18use std::cell::{Cell, RefCell};
19
20use anyhow::{Context, Result};
21use directories::ProjectDirs;
22use kintsugi_core::admin::{self, SealedVault, VaultState};
23use kintsugi_core::{Decision, EventLog, Mode, ProposedCommand, Verdict};
24
25pub use ipc::{Client, Observation, Resolution, Server};
26
27pub const VERSION: &str = env!("CARGO_PKG_VERSION");
28
29/// The kill-switch flag file name, alongside the event-log database.
30pub const KILL_SWITCH_FILE: &str = "panic.flag";
31
32/// Path to the panic kill-switch flag (alongside the default event log).
33pub fn kill_switch_path() -> PathBuf {
34    default_db_path()
35        .parent()
36        .map(|p| p.join(KILL_SWITCH_FILE))
37        .unwrap_or_else(|| std::env::temp_dir().join(KILL_SWITCH_FILE))
38}
39
40/// The fail-closed marker file name, alongside the event-log database.
41pub const FAIL_CLOSED_FILE: &str = "fail-closed.flag";
42
43/// Path to the fail-closed marker (alongside the default event log). Its mere
44/// existence is the signal — the content is irrelevant. The interception layer
45/// (shim/hook/MCP) reads it **without** the daemon, so that killing the daemon
46/// can't be used to open the gate: with the marker present, an unreachable
47/// daemon means *block*, not *run unguarded*.
48pub fn fail_closed_marker_path() -> PathBuf {
49    default_db_path().with_file_name(FAIL_CLOSED_FILE)
50}
51
52/// Whether the admin-set fail-closed marker is present. Cheap, daemon-free, and
53/// callable from the interception fast path. In the locked posture the marker is
54/// owned by the privileged account (root / a dedicated `kintsugi` user), so an
55/// audited non-root agent cannot remove it to re-open the gate.
56pub fn is_fail_closed_marked() -> bool {
57    fail_closed_marker_path().exists()
58}
59
60/// Create or remove the fail-closed marker to match `on`. Best-effort, atomic
61/// create; called by the admin flow when the locked `fail_closed` setting
62/// changes so the posture survives a daemon restart and a kill.
63pub fn set_fail_closed_marker(on: bool) -> std::io::Result<()> {
64    let path = fail_closed_marker_path();
65    if on {
66        if let Some(parent) = path.parent() {
67            std::fs::create_dir_all(parent)?;
68        }
69        // 0644: world-readable (the shim must read it) but, in the locked posture,
70        // owned by the privileged account so the audited user can't delete it.
71        std::fs::write(&path, b"fail-closed\n")?;
72    } else if path.exists() {
73        std::fs::remove_file(&path)?;
74    }
75    Ok(())
76}
77
78/// Resolve the event-log database path. Override with `KINTSUGI_DB` (handy in tests).
79pub fn default_db_path() -> PathBuf {
80    if let Ok(p) = std::env::var("KINTSUGI_DB") {
81        return PathBuf::from(p);
82    }
83    if let Some(dirs) = ProjectDirs::from("", "", "kintsugi") {
84        return dirs.data_dir().join("events.db");
85    }
86    std::env::temp_dir().join("kintsugi-events.db")
87}
88
89/// The resident decision loop: owns the event log, the warm scorer, classifies,
90/// records.
91pub struct Daemon {
92    log: EventLog,
93    mode: Mode,
94    scorer: Box<dyn kintsugi_model::Scorer>,
95    snapshot_dir: PathBuf,
96    kill_path: PathBuf,
97    /// The admin vault loaded at *daemon* startup (not at request time), so the
98    /// auth decision is made against the path the daemon resolved — a caller's
99    /// environment can't redirect it. `None` = unprovisioned (no lock).
100    vault: Option<SealedVault>,
101    /// The vault file exists but is unreadable/corrupt → stay locked (fail-closed):
102    /// refuse authenticated shutdown rather than silently allow it.
103    vault_degraded: bool,
104    /// The last challenge nonce issued (and its op), consumed once by `Shutdown`.
105    pending: RefCell<Option<(Vec<u8>, String)>>,
106    /// Set when an authenticated shutdown has been accepted; the serve loop exits.
107    shutdown: Cell<bool>,
108    /// In-memory brute-force throttle for admin authentication.
109    throttle: RefCell<AuthThrottle>,
110}
111
112/// Rate-limit + lockout for admin authentication. The daemon is the single
113/// authority, so a process-local counter is enough: after a few consecutive
114/// failures it locks out for an exponentially growing window (defeating a script
115/// hammering the admin password), and a success resets it.
116#[derive(Default)]
117struct AuthThrottle {
118    failures: u32,
119    locked_until: Option<std::time::Instant>,
120}
121
122impl AuthThrottle {
123    /// Failed attempts allowed before the first lockout.
124    const FREE_ATTEMPTS: u32 = 5;
125
126    /// Remaining lockout duration, if currently locked out.
127    fn lockout_remaining(&self) -> Option<std::time::Duration> {
128        self.locked_until
129            .and_then(|t| t.checked_duration_since(std::time::Instant::now()))
130    }
131
132    /// Count a failed attempt; arm/extend the lockout once past the free budget.
133    fn record_failure(&mut self) {
134        self.failures = self.failures.saturating_add(1);
135        if self.failures >= Self::FREE_ATTEMPTS {
136            // 30s, then doubling (60s, 120s, …) capped at one hour.
137            let over = (self.failures - Self::FREE_ATTEMPTS).min(7);
138            self.locked_until = Some(
139                std::time::Instant::now()
140                    + std::time::Duration::from_secs((30u64 << over).min(3600)),
141            );
142        }
143    }
144
145    fn reset(&mut self) {
146        self.failures = 0;
147        self.locked_until = None;
148    }
149}
150
151impl Daemon {
152    /// Open the daemon backed by the event log at `db_path`, creating parent dirs.
153    pub fn open(db_path: impl Into<PathBuf>) -> Result<Self> {
154        let db_path = db_path.into();
155        if let Some(parent) = db_path.parent() {
156            std::fs::create_dir_all(parent)
157                .with_context(|| format!("create data dir {}", parent.display()))?;
158        }
159        let data_dir = db_path
160            .parent()
161            .unwrap_or_else(|| std::path::Path::new("."))
162            .to_path_buf();
163        // Keep the data dir private to the owning user: the event log records raw
164        // commands verbatim (spine #3), which can include secrets passed on a
165        // command line. We never scrub the verbatim record, so we protect it at
166        // rest (0700 dir, 0600 db) instead of leaving it world-readable.
167        #[cfg(unix)]
168        ipc::set_mode(&data_dir, 0o700);
169        let snapshot_dir = data_dir.join("snapshots");
170        let kill_path = data_dir.join(KILL_SWITCH_FILE);
171        let log = EventLog::open(&db_path)
172            .with_context(|| format!("open event log at {}", db_path.display()))?;
173        // Owner-only on the db (and its WAL/SHM siblings) — it holds verbatim
174        // commands that may contain secrets.
175        #[cfg(unix)]
176        for suffix in ["", "-wal", "-shm"] {
177            let p = if suffix.is_empty() {
178                db_path.clone()
179            } else {
180                PathBuf::from(format!("{}{suffix}", db_path.display()))
181            };
182            if p.exists() {
183                ipc::set_mode(&p, 0o600);
184            }
185        }
186        // Load the admin vault ONCE, here at daemon startup, from the path the
187        // daemon resolves (the daemon is launched by the admin/systemd, so its
188        // environment — not a later caller's — decides the vault location).
189        let (vault, vault_degraded) = match admin::load_vault(&admin::default_vault_path()) {
190            VaultState::Locked(v) => (Some(*v), false),
191            VaultState::Unprovisioned => (None, false),
192            VaultState::Degraded(_) => (None, true),
193        };
194        Ok(Self {
195            log,
196            mode: Mode::default(),
197            scorer: kintsugi_model::default_scorer(),
198            snapshot_dir,
199            kill_path,
200            vault,
201            vault_degraded,
202            pending: RefCell::new(None),
203            shutdown: Cell::new(false),
204            throttle: RefCell::new(AuthThrottle::default()),
205        })
206    }
207
208    /// Whether an authenticated shutdown has been accepted (serve loop should exit).
209    pub fn should_shutdown(&self) -> bool {
210        self.shutdown.get()
211    }
212
213    /// Issue a challenge for a privileged op. `locked=false` means no vault, so the
214    /// caller may proceed without a proof.
215    fn auth_begin(&self, op: &str) -> ipc::Response {
216        if self.vault_degraded {
217            return ipc::Response::Error {
218                message: "admin vault is degraded; refusing privileged operations".into(),
219            };
220        }
221        match &self.vault {
222            Some(v) => {
223                let nonce = match admin::random_auth_nonce() {
224                    Ok(n) => n,
225                    Err(_) => {
226                        return ipc::Response::Error {
227                            message: "could not generate a challenge".into(),
228                        }
229                    }
230                };
231                let (salt, params) = v.auth_challenge();
232                *self.pending.borrow_mut() = Some((nonce.clone(), op.to_string()));
233                ipc::Response::Challenge {
234                    locked: true,
235                    nonce: hex::encode(&nonce),
236                    salt,
237                    params,
238                }
239            }
240            None => ipc::Response::Challenge {
241                locked: false,
242                nonce: String::new(),
243                salt: String::new(),
244                params: kintsugi_core::admin::KdfParams::production(),
245            },
246        }
247    }
248
249    /// Complete an authenticated shutdown. Enforced against the daemon's own vault.
250    fn shutdown_op(&self, op: &str, nonce_hex: &str, proof_hex: &str) -> ipc::Response {
251        if self.vault_degraded {
252            self.record_admin(op, false, "vault degraded");
253            return ipc::Response::Error {
254                message: "admin vault is degraded; refusing to stop".into(),
255            };
256        }
257        let Some(vault) = &self.vault else {
258            // Unprovisioned: there is no lock, so a clean shutdown is allowed.
259            self.record_admin(op, true, "unprovisioned");
260            self.shutdown.set(true);
261            return ipc::Response::Ack;
262        };
263        // Brute-force lockout: after repeated failures, refuse without even
264        // checking the proof until the window elapses (the attempt is still
265        // logged). Defeats a script hammering the admin password.
266        if let Some(rem) = self.throttle.borrow().lockout_remaining() {
267            self.record_admin(op, false, "locked out");
268            return ipc::Response::Error {
269                message: format!(
270                    "too many failed attempts; locked out for {}s",
271                    rem.as_secs() + 1
272                ),
273            };
274        }
275        // The challenge is one-shot: take it regardless of the outcome.
276        let pending = self.pending.borrow_mut().take();
277        let ok = match (pending, hex::decode(nonce_hex), hex::decode(proof_hex)) {
278            (Some((issued_nonce, issued_op)), Ok(nonce), Ok(proof)) => {
279                issued_op == op
280                    && issued_nonce == nonce
281                    && vault.verify_proof(&nonce, op.as_bytes(), &proof)
282            }
283            _ => false,
284        };
285        if ok {
286            self.throttle.borrow_mut().reset();
287            self.record_admin(op, true, "authenticated");
288            self.shutdown.set(true);
289            ipc::Response::Ack
290        } else {
291            self.throttle.borrow_mut().record_failure();
292            self.record_admin(op, false, "authentication failed");
293            ipc::Response::Error {
294                message: "authentication failed".into(),
295            }
296        }
297    }
298
299    /// Record a privileged-operation attempt as a hash-chained audit event, so a
300    /// forced stop — successful or not — is always visible on the timeline.
301    fn record_admin(&self, op: &str, ok: bool, reason: &str) {
302        let raw = format!(
303            "admin {op} — {}",
304            if ok { "authenticated" } else { "denied" }
305        );
306        let cmd = ProposedCommand::new(
307            "admin",
308            std::path::Path::new("."),
309            vec!["admin".to_string(), op.to_string()],
310            raw,
311        );
312        let decision = if ok { Decision::Allow } else { Decision::Deny };
313        let verdict = Verdict::rules(
314            kintsugi_core::Class::Safe,
315            decision,
316            format!("admin:{op}:{reason}"),
317        );
318        let _ = self.log.log_event(&cmd, &verdict, None);
319    }
320
321    /// Whether the panic kill-switch is currently engaged.
322    pub fn kill_switch_engaged(&self) -> bool {
323        self.kill_path.exists()
324    }
325
326    /// The directory snapshots are stored under.
327    pub fn snapshot_dir(&self) -> &std::path::Path {
328        &self.snapshot_dir
329    }
330
331    /// Swap in a specific scorer (used by tests).
332    pub fn with_scorer(mut self, scorer: Box<dyn kintsugi_model::Scorer>) -> Self {
333        self.scorer = scorer;
334        self
335    }
336
337    /// The name of the active Tier-2 scorer backend.
338    pub fn scorer_name(&self) -> &str {
339        self.scorer.name()
340    }
341
342    /// Open the daemon at the default database path.
343    pub fn open_default() -> Result<Self> {
344        Self::open(default_db_path())
345    }
346
347    /// Set the operating mode (attended / unattended / notify).
348    pub fn with_mode(mut self, mode: Mode) -> Self {
349        self.mode = mode;
350        self
351    }
352
353    /// The current operating mode.
354    pub fn mode(&self) -> Mode {
355        self.mode
356    }
357
358    /// Decide what to do with a proposed command.
359    ///
360    /// Order: (1) load the effective policy (global ← repo) which may set the mode
361    /// and risk threshold; (2) classify with the Tier-1 rule engine; (3) **Tier-2
362    /// model** — for the ambiguous band only, fill `summary`+`risk` and, in
363    /// unattended mode, apply the graduated threshold (below → allow, at/above →
364    /// deny); the model summarizes a catastrophic command for the hold card but
365    /// never changes its decision; (4) apply policy allow/deny (never a
366    /// catastrophic downgrade); (5) apply decision memory.
367    ///
368    /// Security spine: rules classify; the model only explains and scores the
369    /// ambiguous band, and its influence is escalation-only. Safe stays on the
370    /// model-free fast path.
371    pub fn decide(&self, cmd: &ProposedCommand) -> Verdict {
372        // Panic kill-switch: halt everything, including Safe, the instant it is
373        // engaged. Checked first, before any other logic.
374        if self.kill_switch_engaged() {
375            let m = kintsugi_core::classify(cmd);
376            return Verdict::rules(m.class, Decision::Deny, "kill-switch: all actions halted");
377        }
378
379        let policy = load_policy(&cmd.cwd);
380        let mode = policy.mode.unwrap_or(self.mode);
381
382        let m = kintsugi_core::classify(cmd);
383        let mut verdict = Verdict::rules(m.class, kintsugi_core::decide(m.class, mode), &m.rule);
384
385        // Tier-2 model: ambiguous band gets summary + risk (+ graduated decision);
386        // catastrophic gets a summary for the hold card. Safe is never scored.
387        match m.class {
388            kintsugi_core::Class::Ambiguous => {
389                let out = self.scorer.score(cmd, m.class, &m.rule);
390                verdict.summary = Some(out.summary);
391                verdict.risk = Some(out.risk);
392                verdict.tier = 2;
393                if mode == Mode::Unattended {
394                    // Spine rule #2 (monotonic model influence): the model may only
395                    // ADD caution. The unattended baseline for an ambiguous command
396                    // is Deny (queued for a human); the model records risk for that
397                    // review but NEVER downgrades Deny -> Allow. Auto-proceeding an
398                    // ambiguous command unattended is only possible via an explicit
399                    // human allowlist (.kintsugi.toml / decision memory) below — a human
400                    // decision, not the model's.
401                    verdict.reason = format!(
402                        "model:risk={} ({}) — unattended holds ambiguous for review",
403                        out.risk, m.rule
404                    );
405                }
406            }
407            kintsugi_core::Class::Catastrophic => {
408                let out = self.scorer.score(cmd, m.class, &m.rule);
409                verdict.summary = Some(out.summary);
410                verdict.tier = 2;
411            }
412            kintsugi_core::Class::Safe => {}
413        }
414
415        // Policy can escalate (deny) or tame (allow) — never downgrade catastrophic.
416        let action = policy.action_for(&cmd.raw);
417        verdict = kintsugi_core::adjust_for_policy(verdict, action, mode);
418
419        // Decision memory has the final say — but, like policy, it can never
420        // auto-downgrade a CATASTROPHIC command (that hard floor only lifts via an
421        // in-the-moment human decision, never a stored/replayed one). Memory deny
422        // always applies (escalation-only).
423        let repo = repo_key(&cmd.cwd);
424        let hash = kintsugi_core::command_hash(&cmd.raw);
425        match self.log.memory_lookup(&repo, &hash) {
426            Ok(Some(Decision::Allow)) if verdict.class != kintsugi_core::Class::Catastrophic => {
427                verdict.decision = Decision::Allow;
428                verdict.reason = format!("memory:allow ({})", verdict.reason);
429            }
430            Ok(Some(Decision::Deny)) => {
431                verdict.decision = Decision::Deny;
432                verdict.reason = format!("memory:deny ({})", verdict.reason);
433            }
434            _ => {}
435        }
436        verdict
437    }
438
439    /// Handle one proposal: decide, snapshot if destructive+allowed, record, and —
440    /// if held — enqueue it for approval. Returns the verdict.
441    pub fn handle(&self, cmd: ProposedCommand) -> Verdict {
442        let verdict = self.decide(&cmd);
443        let snapshot_id = self.maybe_snapshot(&cmd, &verdict);
444        if let Err(e) = self.log.log_event(&cmd, &verdict, snapshot_id.as_deref()) {
445            // Recording is best-effort at the IPC boundary; never crash the daemon.
446            eprintln!("kintsugi-daemon: failed to record event: {e}");
447        }
448        if verdict.decision == Decision::Hold {
449            if let Err(e) = self
450                .log
451                .enqueue_pending(&cmd, verdict.class, &verdict.reason)
452            {
453                eprintln!("kintsugi-daemon: failed to enqueue pending: {e}");
454            }
455        }
456        verdict
457    }
458
459    /// Approve or deny a queued command by id: record the human decision (and, on
460    /// allow, snapshot), then mark the queue entry resolved. The originating
461    /// caller (MCP poll / shim) executes; this never runs the command itself.
462    ///
463    /// A human may approve any class here — including catastrophic — which is the
464    /// deliberate human override (the *model* never can). Returns whether the id
465    /// was found in the queue.
466    pub fn resolve_pending(&self, id: &str, decision: Decision) -> Result<bool> {
467        // While the kill-switch is engaged, nothing is approvable.
468        if decision == Decision::Allow && self.kill_switch_engaged() {
469            anyhow::bail!("kill-switch engaged; clear it with `kintsugi resume` before approving");
470        }
471        let status = if decision == Decision::Allow {
472            "approved"
473        } else {
474            "denied"
475        };
476        // Claim the entry exactly once. If the CAS doesn't win, the command was
477        // already resolved (or never queued) — return false rather than snapshot
478        // and log a second time, which is what would double-run an approved cmd.
479        if !self.log.cas_pending_status(id, "pending", status)? {
480            return Ok(false);
481        }
482        let Some(cmd) = self.log.pending_command(id)? else {
483            return Ok(false);
484        };
485        self.resolve(&ipc::Resolution {
486            command: cmd,
487            decision,
488            remember: false,
489        })?;
490        Ok(true)
491    }
492
493    /// Snapshot the paths a command will touch, when it is allowed and not Safe.
494    /// Returns the snapshot id to attach to the event, if one was taken.
495    fn maybe_snapshot(&self, cmd: &ProposedCommand, verdict: &Verdict) -> Option<String> {
496        if verdict.decision != Decision::Allow || verdict.class == kintsugi_core::Class::Safe {
497            return None;
498        }
499        match kintsugi_core::capture_snapshot(&self.snapshot_dir, cmd) {
500            Ok(Some(manifest)) => {
501                if let Err(e) = self.log.record_snapshot(&manifest) {
502                    eprintln!("kintsugi-daemon: failed to record snapshot: {e}");
503                    return None;
504                }
505                Some(manifest.id)
506            }
507            Ok(None) => None,
508            Err(e) => {
509                eprintln!("kintsugi-daemon: snapshot failed: {e}");
510                None
511            }
512        }
513    }
514
515    /// Handle a human's resolution of a held command: record the final decision
516    /// and, if requested, remember it for this exact command in this repo.
517    pub fn resolve(&self, resolution: &ipc::Resolution) -> Result<()> {
518        // Kill-switch hard floor: while engaged, no Allow resolves — not via the
519        // queue (resolve_pending) and not via this direct path (shim hold card /
520        // raw Request::Resolve). Mirrors the guard in resolve_pending().
521        if resolution.decision == Decision::Allow && self.kill_switch_engaged() {
522            anyhow::bail!("kill-switch engaged; clear it with `kintsugi resume` before allowing");
523        }
524        let cmd = &resolution.command;
525        // Re-classify so the recorded class is accurate even though a human chose.
526        let m = kintsugi_core::classify(cmd);
527        // A catastrophic command is never *remembered* as always-allow — the hard
528        // floor must re-prompt every time; `[r]` on a catastrophic acts as allow-once.
529        let remember = resolution.remember
530            && !(resolution.decision == Decision::Allow
531                && m.class == kintsugi_core::Class::Catastrophic);
532        let reason = match resolution.decision {
533            Decision::Allow if remember => "human:always-allow",
534            Decision::Allow => "human:allow",
535            Decision::Deny if remember => "human:always-deny",
536            Decision::Deny => "human:deny",
537            Decision::Hold => "human:hold",
538        };
539        let verdict = Verdict::rules(m.class, resolution.decision, reason);
540        // Snapshot before a human-approved destructive command runs.
541        let snapshot_id = self.maybe_snapshot(cmd, &verdict);
542        self.log.log_event(cmd, &verdict, snapshot_id.as_deref())?;
543
544        if remember && resolution.decision != Decision::Hold {
545            let repo = repo_key(&cmd.cwd);
546            let hash = kintsugi_core::command_hash(&cmd.raw);
547            self.log.remember(&repo, &hash, resolution.decision)?;
548        }
549
550        // If this command was queued (e.g. a shim hold the human just answered),
551        // mark the queue entry resolved so it leaves `kintsugi queue`.
552        if resolution.decision != Decision::Hold {
553            let status = if resolution.decision == Decision::Allow {
554                "approved"
555            } else {
556                "denied"
557            };
558            let _ = self.log.set_pending_status(&cmd.id.to_string(), status);
559        }
560        Ok(())
561    }
562
563    /// Record an observed filesystem change from the backstop watcher. Logged as
564    /// `agent = "fs-watch"`, decision Allow (it already happened) — its purpose is
565    /// to keep the timeline and undo complete for actions that bypassed
566    /// interception.
567    pub fn observe(&self, obs: &ipc::Observation) -> Result<()> {
568        let raw = format!("{} {}", obs.kind, obs.path);
569        let cwd = std::path::Path::new(&obs.path)
570            .parent()
571            .map(|p| p.to_path_buf())
572            .unwrap_or_default();
573        let cmd = ProposedCommand::new(
574            "fs-watch",
575            cwd,
576            vec![obs.kind.clone(), obs.path.clone()],
577            raw,
578        );
579        let verdict = Verdict::rules(
580            kintsugi_core::Class::Safe,
581            Decision::Allow,
582            format!("fs:{}", obs.kind),
583        );
584        self.log.log_event(&cmd, &verdict, None)?;
585        Ok(())
586    }
587
588    /// Record a shell command from a human shell session (passive recording, no
589    /// AI-agent hook). Logged as `agent = "shell"`, decision Allow — it is never
590    /// blocked (the recorder is an audit/undo trail, not a gate). We **classify**
591    /// it with the Tier-1 rules so the event carries the real class (a destructive
592    /// command a DBA ran is flagged in the timeline and `kintsugi report`), and we
593    /// **snapshot destructive commands** so `kintsugi undo` can recover a human's
594    /// mistake. The model never runs on this path.
595    ///
596    /// The hard floor stays honest: this is an audit record of the past, not a
597    /// gate. The "nothing un-warned" guarantee never applied to commands a human
598    /// ran outside Kintsugi; the "tamper-evident record of everything" one does,
599    /// which is exactly what this preserves.
600    pub fn record_shell(&self, cmd: &ProposedCommand) -> Result<()> {
601        // Provenance: the recorder is for human shell sessions, so force the agent
602        // label to "shell" regardless of what the caller sent. A local peer that
603        // can reach the socket therefore cannot forge a record attributed to an
604        // AI agent ("claude-code") or the watcher ("fs-watch"); the worst it can
605        // do is inject a self-reported *shell* event, which the Audit view treats
606        // accordingly. (The socket is already owner-only; this is defense in depth.)
607        let mut cmd = cmd.clone();
608        cmd.agent = "shell".to_string();
609        let m = kintsugi_core::classify(&cmd);
610        // Allow, not the rule's gate decision: the command already executed, so
611        // recording a Hold/Deny here would be a lie about what happened. The
612        // class still rides along (verdict.class) so the timeline flags danger.
613        let verdict = Verdict::rules(m.class, Decision::Allow, format!("recorded:{}", m.rule));
614        // Recoverer: snapshot the paths a *destructive* human command will touch,
615        // so `kintsugi undo` can roll back a person's *filesystem* mistake (rm -rf,
616        // a clobbering overwrite) the same way it rolls back an agent's. The shell
617        // preexec hook fires before the command runs, so this is a just-in-time
618        // capture; `maybe_snapshot` no-ops for Safe commands and reflinks where it
619        // can, so the common case stays cheap. Best-effort: if the snapshot loses
620        // the race (or the fs can't reflink), the filesystem-watcher backstop still
621        // records the change. This is a filesystem recoverer — an in-database
622        // DROP/TRUNCATE is not a file, so it's flagged/recorded but recovery there
623        // is your DB's PITR/backups. The honest guarantee is "recoverable", not
624        // transactional.
625        let snapshot_id = self.maybe_snapshot(&cmd, &verdict);
626        self.log.log_event(&cmd, &verdict, snapshot_id.as_deref())?;
627        Ok(())
628    }
629
630    /// Dispatch an IPC request to its handler.
631    pub fn handle_request(&self, req: ipc::Request) -> ipc::Response {
632        match req {
633            ipc::Request::Propose(cmd) => ipc::Response::Verdict(self.handle(cmd)),
634            ipc::Request::Resolve(resolution) => match self.resolve(&resolution) {
635                Ok(()) => ipc::Response::Ack,
636                Err(e) => ipc::Response::Error {
637                    message: e.to_string(),
638                },
639            },
640            ipc::Request::Observe(obs) => match self.observe(&obs) {
641                Ok(()) => ipc::Response::Ack,
642                Err(e) => ipc::Response::Error {
643                    message: e.to_string(),
644                },
645            },
646            ipc::Request::Record(cmd) => match self.record_shell(&cmd) {
647                Ok(()) => ipc::Response::Ack,
648                Err(e) => ipc::Response::Error {
649                    message: e.to_string(),
650                },
651            },
652            ipc::Request::ListPending => match self.log.list_pending() {
653                Ok(items) => ipc::Response::PendingList { items },
654                Err(e) => ipc::Response::Error {
655                    message: e.to_string(),
656                },
657            },
658            ipc::Request::PendingStatus { id } => match self.log.pending_status(&id) {
659                Ok(status) => ipc::Response::Pending {
660                    status: status.unwrap_or_else(|| "gone".to_string()),
661                },
662                Err(e) => ipc::Response::Error {
663                    message: e.to_string(),
664                },
665            },
666            ipc::Request::Approve { id } => self.resolve_pending_response(&id, Decision::Allow),
667            ipc::Request::Deny { id } => self.resolve_pending_response(&id, Decision::Deny),
668            ipc::Request::Status => ipc::Response::Status {
669                scorer: self.scorer_name().to_string(),
670            },
671            ipc::Request::AuthBegin { op } => self.auth_begin(&op),
672            ipc::Request::Shutdown { op, nonce, proof } => self.shutdown_op(&op, &nonce, &proof),
673        }
674    }
675
676    fn resolve_pending_response(&self, id: &str, decision: Decision) -> ipc::Response {
677        match self.resolve_pending(id, decision) {
678            Ok(true) => ipc::Response::Ack,
679            Ok(false) => ipc::Response::Error {
680                message: format!("no pending command with id {id}"),
681            },
682            Err(e) => ipc::Response::Error {
683                message: e.to_string(),
684            },
685        }
686    }
687
688    /// Borrow the underlying event log (read-only queries).
689    pub fn log(&self) -> &EventLog {
690        &self.log
691    }
692}
693
694/// Load and merge the effective policy for a command's working directory:
695/// global defaults (config dir) overridden by the repo's `.kintsugi.toml`.
696pub fn load_policy(cwd: &std::path::Path) -> kintsugi_core::Policy {
697    let global = read_policy_file(&global_policy_path()).unwrap_or_default();
698    let repo = find_repo_policy(cwd)
699        .and_then(|p| read_policy_file(&p))
700        .unwrap_or_default();
701    kintsugi_core::Policy::merge(global, repo)
702}
703
704/// Path to the global policy file. Override with `KINTSUGI_CONFIG` (used in tests).
705fn global_policy_path() -> PathBuf {
706    if let Ok(p) = std::env::var("KINTSUGI_CONFIG") {
707        return PathBuf::from(p);
708    }
709    if let Some(dirs) = ProjectDirs::from("", "", "kintsugi") {
710        return dirs.config_dir().join("config.toml");
711    }
712    std::env::temp_dir().join("kintsugi-config.toml")
713}
714
715/// Find the nearest `.kintsugi.toml` from `cwd` upward.
716fn find_repo_policy(cwd: &std::path::Path) -> Option<PathBuf> {
717    let mut dir = Some(cwd);
718    while let Some(d) = dir {
719        let candidate = d.join(".kintsugi.toml");
720        if candidate.is_file() {
721            return Some(candidate);
722        }
723        dir = d.parent();
724    }
725    None
726}
727
728fn read_policy_file(path: &std::path::Path) -> Option<kintsugi_core::Policy> {
729    let text = std::fs::read_to_string(path).ok()?;
730    match kintsugi_core::Policy::parse(&text) {
731        Ok(p) => Some(p),
732        Err(e) => {
733            eprintln!(
734                "kintsugi-daemon: ignoring invalid policy {}: {e}",
735                path.display()
736            );
737            None
738        }
739    }
740}
741
742/// Identify the "repo" a command runs in: the nearest ancestor containing a
743/// `.git` directory, else the working directory itself.
744pub fn repo_key(cwd: &std::path::Path) -> String {
745    let mut dir = Some(cwd);
746    while let Some(d) = dir {
747        if d.join(".git").exists() {
748            return d.to_string_lossy().to_string();
749        }
750        dir = d.parent();
751    }
752    cwd.to_string_lossy().to_string()
753}
754
755/// Run the daemon: open the default log, bind the socket, serve forever.
756pub fn run() -> Result<()> {
757    let daemon = Daemon::open_default()?;
758    let server = Server::bind()?;
759    // Record our PID so `kintsugi stop` can find and stop us (any launch path).
760    let _ = std::fs::write(pid_file_path(), std::process::id().to_string());
761    eprintln!(
762        "kintsugi-daemon {} listening on {}",
763        VERSION,
764        Server::endpoint().display()
765    );
766    server.serve_until(
767        |req| daemon.handle_request(req),
768        || daemon.should_shutdown(),
769    )?;
770    // An authenticated shutdown landed: clean up the PID file and exit.
771    let _ = std::fs::remove_file(pid_file_path());
772    eprintln!("kintsugi-daemon: authenticated shutdown — exiting.");
773    Ok(())
774}
775
776/// Path to the daemon's PID file (next to the event log).
777pub fn pid_file_path() -> PathBuf {
778    default_db_path().with_file_name("kintsugi.pid")
779}