Skip to main content

kintsugi_daemon/
lib.rs

1//! Kintsugi resident daemon library.
2//!
3//! Long-lived process that owns the event log and runs the decision loop. The
4//! interception layer connects over a local socket, sends a [`ProposedCommand`],
5//! and blocks on the returned [`Verdict`].
6//!
7//! In Phase 0 the daemon is a pure recorder: it logs every proposal and allows
8//! it. The Tier-1 rule engine (Phase 1) plugs into [`Daemon::decide`] without
9//! changing the IPC or logging paths.
10
11#![forbid(unsafe_code)]
12
13pub mod ipc;
14pub mod watch;
15
16use std::path::PathBuf;
17
18use std::cell::{Cell, RefCell};
19
20use anyhow::{Context, Result};
21use directories::ProjectDirs;
22use kintsugi_core::admin::{self, SealedVault, VaultState};
23use kintsugi_core::{Decision, EventLog, Mode, ProposedCommand, Verdict};
24
25pub use ipc::{Client, Observation, Resolution, Server};
26
27pub const VERSION: &str = env!("CARGO_PKG_VERSION");
28
29/// The kill-switch flag file name, alongside the event-log database.
30pub const KILL_SWITCH_FILE: &str = "panic.flag";
31
32/// Path to the panic kill-switch flag (alongside the default event log).
33pub fn kill_switch_path() -> PathBuf {
34    default_db_path()
35        .parent()
36        .map(|p| p.join(KILL_SWITCH_FILE))
37        .unwrap_or_else(|| std::env::temp_dir().join(KILL_SWITCH_FILE))
38}
39
40/// The fail-closed marker file name, alongside the event-log database.
41pub const FAIL_CLOSED_FILE: &str = "fail-closed.flag";
42
43/// Path to the fail-closed marker (alongside the default event log). Its mere
44/// existence is the signal — the content is irrelevant. The interception layer
45/// (shim/hook/MCP) reads it **without** the daemon, so that killing the daemon
46/// can't be used to open the gate: with the marker present, an unreachable
47/// daemon means *block*, not *run unguarded*.
48pub fn fail_closed_marker_path() -> PathBuf {
49    default_db_path().with_file_name(FAIL_CLOSED_FILE)
50}
51
52/// Whether the admin-set fail-closed marker is present. Cheap, daemon-free, and
53/// callable from the interception fast path. In the locked posture the marker is
54/// owned by the privileged account (root / a dedicated `kintsugi` user), so an
55/// audited non-root agent cannot remove it to re-open the gate.
56pub fn is_fail_closed_marked() -> bool {
57    fail_closed_marker_path().exists()
58}
59
60/// Create or remove the fail-closed marker to match `on`. Best-effort, atomic
61/// create; called by the admin flow when the locked `fail_closed` setting
62/// changes so the posture survives a daemon restart and a kill.
63pub fn set_fail_closed_marker(on: bool) -> std::io::Result<()> {
64    let path = fail_closed_marker_path();
65    if on {
66        if let Some(parent) = path.parent() {
67            std::fs::create_dir_all(parent)?;
68        }
69        // 0644: world-readable (the shim must read it) but, in the locked posture,
70        // owned by the privileged account so the audited user can't delete it.
71        std::fs::write(&path, b"fail-closed\n")?;
72    } else if path.exists() {
73        std::fs::remove_file(&path)?;
74    }
75    Ok(())
76}
77
78/// Resolve the event-log database path. Override with `KINTSUGI_DB` (handy in tests).
79pub fn default_db_path() -> PathBuf {
80    if let Ok(p) = std::env::var("KINTSUGI_DB") {
81        return PathBuf::from(p);
82    }
83    if let Some(dirs) = ProjectDirs::from("", "", "kintsugi") {
84        return dirs.data_dir().join("events.db");
85    }
86    std::env::temp_dir().join("kintsugi-events.db")
87}
88
89/// The resident decision loop: owns the event log, the warm scorer, classifies,
90/// records.
91pub struct Daemon {
92    log: EventLog,
93    mode: Mode,
94    scorer: Box<dyn kintsugi_model::Scorer>,
95    snapshot_dir: PathBuf,
96    kill_path: PathBuf,
97    /// The admin vault loaded at *daemon* startup (not at request time), so the
98    /// auth decision is made against the path the daemon resolved — a caller's
99    /// environment can't redirect it. `None` = unprovisioned (no lock).
100    vault: Option<SealedVault>,
101    /// The vault file exists but is unreadable/corrupt → stay locked (fail-closed):
102    /// refuse authenticated shutdown rather than silently allow it.
103    vault_degraded: bool,
104    /// The last challenge nonce issued (and its op), consumed once by `Shutdown`.
105    pending: RefCell<Option<(Vec<u8>, String)>>,
106    /// Set when an authenticated shutdown has been accepted; the serve loop exits.
107    shutdown: Cell<bool>,
108    /// In-memory brute-force throttle for admin authentication.
109    throttle: RefCell<AuthThrottle>,
110}
111
112/// Rate-limit + lockout for admin authentication. The daemon is the single
113/// authority, so a process-local counter is enough: after a few consecutive
114/// failures it locks out for an exponentially growing window (defeating a script
115/// hammering the admin password), and a success resets it.
116#[derive(Default)]
117struct AuthThrottle {
118    failures: u32,
119    locked_until: Option<std::time::Instant>,
120}
121
122impl AuthThrottle {
123    /// Failed attempts allowed before the first lockout.
124    const FREE_ATTEMPTS: u32 = 5;
125
126    /// Remaining lockout duration, if currently locked out.
127    fn lockout_remaining(&self) -> Option<std::time::Duration> {
128        self.locked_until
129            .and_then(|t| t.checked_duration_since(std::time::Instant::now()))
130    }
131
132    /// Count a failed attempt; arm/extend the lockout once past the free budget.
133    fn record_failure(&mut self) {
134        self.failures = self.failures.saturating_add(1);
135        if self.failures >= Self::FREE_ATTEMPTS {
136            // 30s, then doubling (60s, 120s, …) capped at one hour.
137            let over = (self.failures - Self::FREE_ATTEMPTS).min(7);
138            self.locked_until = Some(
139                std::time::Instant::now()
140                    + std::time::Duration::from_secs((30u64 << over).min(3600)),
141            );
142        }
143    }
144
145    fn reset(&mut self) {
146        self.failures = 0;
147        self.locked_until = None;
148    }
149}
150
151impl Daemon {
152    /// Open the daemon backed by the event log at `db_path`, creating parent dirs.
153    pub fn open(db_path: impl Into<PathBuf>) -> Result<Self> {
154        let db_path = db_path.into();
155        if let Some(parent) = db_path.parent() {
156            std::fs::create_dir_all(parent)
157                .with_context(|| format!("create data dir {}", parent.display()))?;
158        }
159        let data_dir = db_path
160            .parent()
161            .unwrap_or_else(|| std::path::Path::new("."))
162            .to_path_buf();
163        // Keep the data dir private to the owning user: the event log records raw
164        // commands verbatim (spine #3), which can include secrets passed on a
165        // command line. We never scrub the verbatim record, so we protect it at
166        // rest (0700 dir, 0600 db) instead of leaving it world-readable.
167        #[cfg(unix)]
168        ipc::set_mode(&data_dir, 0o700);
169        let snapshot_dir = data_dir.join("snapshots");
170        let kill_path = data_dir.join(KILL_SWITCH_FILE);
171        let log = EventLog::open(&db_path)
172            .with_context(|| format!("open event log at {}", db_path.display()))?;
173        // Owner-only on the db (and its WAL/SHM siblings) — it holds verbatim
174        // commands that may contain secrets.
175        #[cfg(unix)]
176        for suffix in ["", "-wal", "-shm"] {
177            let p = if suffix.is_empty() {
178                db_path.clone()
179            } else {
180                PathBuf::from(format!("{}{suffix}", db_path.display()))
181            };
182            if p.exists() {
183                ipc::set_mode(&p, 0o600);
184            }
185        }
186        // Load the admin vault ONCE, here at daemon startup, from the path the
187        // daemon resolves (the daemon is launched by the admin/systemd, so its
188        // environment — not a later caller's — decides the vault location).
189        let (vault, vault_degraded) = match admin::load_vault(&admin::default_vault_path()) {
190            VaultState::Locked(v) => (Some(*v), false),
191            VaultState::Unprovisioned => (None, false),
192            VaultState::Degraded(_) => (None, true),
193        };
194        Ok(Self {
195            log,
196            mode: Mode::default(),
197            scorer: kintsugi_model::default_scorer(),
198            snapshot_dir,
199            kill_path,
200            vault,
201            vault_degraded,
202            pending: RefCell::new(None),
203            shutdown: Cell::new(false),
204            throttle: RefCell::new(AuthThrottle::default()),
205        })
206    }
207
208    /// Whether an authenticated shutdown has been accepted (serve loop should exit).
209    pub fn should_shutdown(&self) -> bool {
210        self.shutdown.get()
211    }
212
213    /// Issue a challenge for a privileged op. `locked=false` means no vault, so the
214    /// caller may proceed without a proof.
215    fn auth_begin(&self, op: &str) -> ipc::Response {
216        if self.vault_degraded {
217            return ipc::Response::Error {
218                message: "admin vault is degraded; refusing privileged operations".into(),
219            };
220        }
221        match &self.vault {
222            Some(v) => {
223                let nonce = match admin::random_auth_nonce() {
224                    Ok(n) => n,
225                    Err(_) => {
226                        return ipc::Response::Error {
227                            message: "could not generate a challenge".into(),
228                        }
229                    }
230                };
231                let (salt, params) = v.auth_challenge();
232                *self.pending.borrow_mut() = Some((nonce.clone(), op.to_string()));
233                ipc::Response::Challenge {
234                    locked: true,
235                    nonce: hex::encode(&nonce),
236                    salt,
237                    params,
238                }
239            }
240            None => ipc::Response::Challenge {
241                locked: false,
242                nonce: String::new(),
243                salt: String::new(),
244                params: kintsugi_core::admin::KdfParams::production(),
245            },
246        }
247    }
248
249    /// Complete an authenticated shutdown. Enforced against the daemon's own vault.
250    fn shutdown_op(&self, op: &str, nonce_hex: &str, proof_hex: &str) -> ipc::Response {
251        if self.vault_degraded {
252            self.record_admin(op, false, "vault degraded");
253            return ipc::Response::Error {
254                message: "admin vault is degraded; refusing to stop".into(),
255            };
256        }
257        let Some(vault) = &self.vault else {
258            // Unprovisioned: there is no lock, so a clean shutdown is allowed.
259            self.record_admin(op, true, "unprovisioned");
260            self.shutdown.set(true);
261            return ipc::Response::Ack;
262        };
263        // Brute-force lockout: after repeated failures, refuse without even
264        // checking the proof until the window elapses (the attempt is still
265        // logged). Defeats a script hammering the admin password.
266        if let Some(rem) = self.throttle.borrow().lockout_remaining() {
267            self.record_admin(op, false, "locked out");
268            return ipc::Response::Error {
269                message: format!(
270                    "too many failed attempts; locked out for {}s",
271                    rem.as_secs() + 1
272                ),
273            };
274        }
275        // The challenge is one-shot: take it regardless of the outcome.
276        let pending = self.pending.borrow_mut().take();
277        let ok = match (pending, hex::decode(nonce_hex), hex::decode(proof_hex)) {
278            (Some((issued_nonce, issued_op)), Ok(nonce), Ok(proof)) => {
279                issued_op == op
280                    && issued_nonce == nonce
281                    && vault.verify_proof(&nonce, op.as_bytes(), &proof)
282            }
283            _ => false,
284        };
285        if ok {
286            self.throttle.borrow_mut().reset();
287            self.record_admin(op, true, "authenticated");
288            self.shutdown.set(true);
289            ipc::Response::Ack
290        } else {
291            self.throttle.borrow_mut().record_failure();
292            self.record_admin(op, false, "authentication failed");
293            ipc::Response::Error {
294                message: "authentication failed".into(),
295            }
296        }
297    }
298
299    /// Record a privileged-operation attempt as a hash-chained audit event, so a
300    /// forced stop — successful or not — is always visible on the timeline.
301    fn record_admin(&self, op: &str, ok: bool, reason: &str) {
302        let raw = format!(
303            "admin {op} — {}",
304            if ok { "authenticated" } else { "denied" }
305        );
306        let cmd = ProposedCommand::new(
307            "admin",
308            std::path::Path::new("."),
309            vec!["admin".to_string(), op.to_string()],
310            raw,
311        );
312        let decision = if ok { Decision::Allow } else { Decision::Deny };
313        let verdict = Verdict::rules(
314            kintsugi_core::Class::Safe,
315            decision,
316            format!("admin:{op}:{reason}"),
317        );
318        let _ = self.log.log_event(&cmd, &verdict, None);
319    }
320
321    /// Whether the panic kill-switch is currently engaged.
322    pub fn kill_switch_engaged(&self) -> bool {
323        self.kill_path.exists()
324    }
325
326    /// The directory snapshots are stored under.
327    pub fn snapshot_dir(&self) -> &std::path::Path {
328        &self.snapshot_dir
329    }
330
331    /// Swap in a specific scorer (used by tests).
332    pub fn with_scorer(mut self, scorer: Box<dyn kintsugi_model::Scorer>) -> Self {
333        self.scorer = scorer;
334        self
335    }
336
337    /// The name of the active Tier-2 scorer backend.
338    pub fn scorer_name(&self) -> &str {
339        self.scorer.name()
340    }
341
342    /// Open the daemon at the default database path.
343    pub fn open_default() -> Result<Self> {
344        Self::open(default_db_path())
345    }
346
347    /// Set the operating mode (attended / unattended / notify).
348    pub fn with_mode(mut self, mode: Mode) -> Self {
349        self.mode = mode;
350        self
351    }
352
353    /// The current operating mode.
354    pub fn mode(&self) -> Mode {
355        self.mode
356    }
357
358    /// Decide what to do with a proposed command.
359    ///
360    /// Order: (1) load the effective policy (global ← repo) which may set the mode
361    /// and risk threshold; (2) classify with the Tier-1 rule engine; (3) **Tier-2
362    /// model** — for the ambiguous band only, fill `summary`+`risk` and, in
363    /// unattended mode, apply the graduated threshold (below → allow, at/above →
364    /// deny); the model summarizes a catastrophic command for the hold card but
365    /// never changes its decision; (4) apply policy allow/deny (never a
366    /// catastrophic downgrade); (5) apply decision memory.
367    ///
368    /// Security spine: rules classify; the model only explains and scores the
369    /// ambiguous band, and its influence is escalation-only. Safe stays on the
370    /// model-free fast path.
371    pub fn decide(&self, cmd: &ProposedCommand) -> Verdict {
372        // Panic kill-switch: halt everything, including Safe, the instant it is
373        // engaged. Checked first, before any other logic.
374        if self.kill_switch_engaged() {
375            let m = kintsugi_core::classify(cmd);
376            return Verdict::rules(m.class, Decision::Deny, "kill-switch: all actions halted");
377        }
378
379        let policy = load_policy(&cmd.cwd);
380        let mode = policy.mode.unwrap_or(self.mode);
381
382        let m = kintsugi_core::classify(cmd);
383        let mut verdict = Verdict::rules(m.class, kintsugi_core::decide(m.class, mode), &m.rule);
384
385        // Tier-2 model: ambiguous band gets summary + risk (+ graduated decision);
386        // catastrophic gets a summary for the hold card. Safe is never scored.
387        match m.class {
388            kintsugi_core::Class::Ambiguous => {
389                let out = self.scorer.score(cmd, m.class, &m.rule);
390                verdict.summary = Some(out.summary);
391                verdict.risk = Some(out.risk);
392                verdict.tier = 2;
393                if mode == Mode::Unattended {
394                    // Spine rule #2 (monotonic model influence): the model may only
395                    // ADD caution. The unattended baseline for an ambiguous command
396                    // is Deny (queued for a human); the model records risk for that
397                    // review but NEVER downgrades Deny -> Allow. Auto-proceeding an
398                    // ambiguous command unattended is only possible via an explicit
399                    // human allowlist (.kintsugi.toml / decision memory) below — a human
400                    // decision, not the model's.
401                    verdict.reason = format!(
402                        "model:risk={} ({}) — unattended holds ambiguous for review",
403                        out.risk, m.rule
404                    );
405                }
406            }
407            kintsugi_core::Class::Catastrophic => {
408                let out = self.scorer.score(cmd, m.class, &m.rule);
409                verdict.summary = Some(out.summary);
410                verdict.tier = 2;
411            }
412            kintsugi_core::Class::Safe => {}
413        }
414
415        // Policy can escalate (deny) or tame (allow) — never downgrade catastrophic.
416        let action = policy.action_for(&cmd.raw);
417        verdict = kintsugi_core::adjust_for_policy(verdict, action, mode);
418
419        // Decision memory has the final say — but, like policy, it can never
420        // auto-downgrade a CATASTROPHIC command (that hard floor only lifts via an
421        // in-the-moment human decision, never a stored/replayed one). Memory deny
422        // always applies (escalation-only).
423        let repo = repo_key(&cmd.cwd);
424        let hash = kintsugi_core::command_hash(&cmd.raw);
425        match self.log.memory_lookup(&repo, &hash) {
426            Ok(Some(Decision::Allow)) if verdict.class != kintsugi_core::Class::Catastrophic => {
427                verdict.decision = Decision::Allow;
428                verdict.reason = format!("memory:allow ({})", verdict.reason);
429            }
430            Ok(Some(Decision::Deny)) => {
431                verdict.decision = Decision::Deny;
432                verdict.reason = format!("memory:deny ({})", verdict.reason);
433            }
434            _ => {}
435        }
436        verdict
437    }
438
439    /// Handle one proposal: decide, snapshot if destructive+allowed, record, and —
440    /// if held — enqueue it for approval. Returns the verdict.
441    pub fn handle(&self, cmd: ProposedCommand) -> Verdict {
442        let mut verdict = self.decide(&cmd);
443        let (snapshot_id, snapshot_failed) = self.maybe_snapshot(&cmd, &verdict);
444        if verdict.decision == Decision::Allow && verdict.class != kintsugi_core::Class::Safe {
445            let note = if snapshot_failed {
446                Some("snapshot failed — NOT reversible by undo")
447            } else if !kintsugi_core::snapshot::is_fully_reversible(&cmd) {
448                Some("target can't be fully snapshotted — undo may not restore everything")
449            } else {
450                None
451            };
452            if let Some(n) = note {
453                verdict.reason = format!("{} [⚠ {n}]", verdict.reason);
454            }
455        }
456        if let Err(e) = self.log.log_event(&cmd, &verdict, snapshot_id.as_deref()) {
457            // Spine #4: a command we cannot write to the append-only log must not run
458            // unrecorded. Fail closed — refuse an Allow rather than execute it dark.
459            eprintln!("kintsugi-daemon: failed to record event: {e}");
460            if verdict.decision == Decision::Allow {
461                verdict.decision = Decision::Deny;
462                verdict.reason = format!(
463                    "audit-log write failed; denied fail-closed ({})",
464                    verdict.reason
465                );
466            }
467        }
468        if verdict.decision == Decision::Hold {
469            if let Err(e) = self
470                .log
471                .enqueue_pending(&cmd, verdict.class, &verdict.reason)
472            {
473                eprintln!("kintsugi-daemon: failed to enqueue pending: {e}");
474            }
475        }
476        verdict
477    }
478
479    /// Approve or deny a queued command by id: record the human decision (and, on
480    /// allow, snapshot), then mark the queue entry resolved. The originating
481    /// caller (MCP poll / shim) executes; this never runs the command itself.
482    ///
483    /// A human may approve any class here — including catastrophic — which is the
484    /// deliberate human override (the *model* never can). Returns whether the id
485    /// was found in the queue.
486    pub fn resolve_pending(&self, id: &str, decision: Decision) -> Result<bool> {
487        // While the kill-switch is engaged, nothing is approvable.
488        if decision == Decision::Allow && self.kill_switch_engaged() {
489            anyhow::bail!("kill-switch engaged; clear it with `kintsugi resume` before approving");
490        }
491        let status = if decision == Decision::Allow {
492            "approved"
493        } else {
494            "denied"
495        };
496        // Claim the entry exactly once. If the CAS doesn't win, the command was
497        // already resolved (or never queued) — return false rather than snapshot
498        // and log a second time, which is what would double-run an approved cmd.
499        if !self.log.cas_pending_status(id, "pending", status)? {
500            return Ok(false);
501        }
502        let Some(cmd) = self.log.pending_command(id)? else {
503            return Ok(false);
504        };
505        self.resolve(&ipc::Resolution {
506            command: cmd,
507            decision,
508            remember: false,
509        })?;
510        Ok(true)
511    }
512
513    /// Snapshot the paths a command will touch, when it is allowed and not Safe.
514    /// Returns the snapshot id to attach to the event, if one was taken.
515    fn maybe_snapshot(&self, cmd: &ProposedCommand, verdict: &Verdict) -> (Option<String>, bool) {
516        if verdict.decision != Decision::Allow || verdict.class == kintsugi_core::Class::Safe {
517            return (None, false);
518        }
519        match kintsugi_core::capture_snapshot(&self.snapshot_dir, cmd) {
520            Ok(Some(manifest)) => {
521                if let Err(e) = self.log.record_snapshot(&manifest) {
522                    eprintln!("kintsugi-daemon: failed to record snapshot: {e}");
523                    return (None, true);
524                }
525                (Some(manifest.id), false)
526            }
527            Ok(None) => (None, false),
528            Err(e) => {
529                eprintln!("kintsugi-daemon: snapshot failed: {e}");
530                (None, true)
531            }
532        }
533    }
534
535    /// Handle a human's resolution of a held command: record the final decision
536    /// and, if requested, remember it for this exact command in this repo.
537    pub fn resolve(&self, resolution: &ipc::Resolution) -> Result<()> {
538        // Kill-switch hard floor: while engaged, no Allow resolves — not via the
539        // queue (resolve_pending) and not via this direct path (shim hold card /
540        // raw Request::Resolve). Mirrors the guard in resolve_pending().
541        if resolution.decision == Decision::Allow && self.kill_switch_engaged() {
542            anyhow::bail!("kill-switch engaged; clear it with `kintsugi resume` before allowing");
543        }
544        let cmd = &resolution.command;
545        // Re-classify so the recorded class is accurate even though a human chose.
546        let m = kintsugi_core::classify(cmd);
547        // A catastrophic command is never *remembered* as always-allow — the hard
548        // floor must re-prompt every time; `[r]` on a catastrophic acts as allow-once.
549        let remember = resolution.remember
550            && !(resolution.decision == Decision::Allow
551                && m.class == kintsugi_core::Class::Catastrophic);
552        let reason = match resolution.decision {
553            Decision::Allow if remember => "human:always-allow",
554            Decision::Allow => "human:allow",
555            Decision::Deny if remember => "human:always-deny",
556            Decision::Deny => "human:deny",
557            Decision::Hold => "human:hold",
558        };
559        let verdict = Verdict::rules(m.class, resolution.decision, reason);
560        // Snapshot before a human-approved destructive command runs.
561        let (snapshot_id, _) = self.maybe_snapshot(cmd, &verdict);
562        self.log.log_event(cmd, &verdict, snapshot_id.as_deref())?;
563
564        if remember && resolution.decision != Decision::Hold {
565            let repo = repo_key(&cmd.cwd);
566            let hash = kintsugi_core::command_hash(&cmd.raw);
567            self.log.remember(&repo, &hash, resolution.decision)?;
568        }
569
570        // If this command was queued (e.g. a shim hold the human just answered),
571        // mark the queue entry resolved so it leaves `kintsugi queue`.
572        if resolution.decision != Decision::Hold {
573            let status = if resolution.decision == Decision::Allow {
574                "approved"
575            } else {
576                "denied"
577            };
578            let _ = self.log.set_pending_status(&cmd.id.to_string(), status);
579        }
580        Ok(())
581    }
582
583    /// Record an observed filesystem change from the backstop watcher. Logged as
584    /// `agent = "fs-watch"`, decision Allow (it already happened) — its purpose is
585    /// to keep the timeline and undo complete for actions that bypassed
586    /// interception.
587    pub fn observe(&self, obs: &ipc::Observation) -> Result<()> {
588        let raw = format!("{} {}", obs.kind, obs.path);
589        let cwd = std::path::Path::new(&obs.path)
590            .parent()
591            .map(|p| p.to_path_buf())
592            .unwrap_or_default();
593        let cmd = ProposedCommand::new(
594            "fs-watch",
595            cwd,
596            vec![obs.kind.clone(), obs.path.clone()],
597            raw,
598        );
599        let verdict = Verdict::rules(
600            kintsugi_core::Class::Safe,
601            Decision::Allow,
602            format!("fs:{}", obs.kind),
603        );
604        self.log.log_event(&cmd, &verdict, None)?;
605        Ok(())
606    }
607
608    /// Record a shell command from a human shell session (passive recording, no
609    /// AI-agent hook). Logged as `agent = "shell"`, decision Allow — it is never
610    /// blocked (the recorder is an audit/undo trail, not a gate). We **classify**
611    /// it with the Tier-1 rules so the event carries the real class (a destructive
612    /// command a DBA ran is flagged in the timeline and `kintsugi report`), and we
613    /// **snapshot destructive commands** so `kintsugi undo` can recover a human's
614    /// mistake. The model never runs on this path.
615    ///
616    /// The hard floor stays honest: this is an audit record of the past, not a
617    /// gate. The "nothing un-warned" guarantee never applied to commands a human
618    /// ran outside Kintsugi; the "tamper-evident record of everything" one does,
619    /// which is exactly what this preserves.
620    pub fn record_shell(&self, cmd: &ProposedCommand) -> Result<()> {
621        // Provenance: the recorder is for human shell sessions, so force the agent
622        // label to "shell" regardless of what the caller sent. A local peer that
623        // can reach the socket therefore cannot forge a record attributed to an
624        // AI agent ("claude-code") or the watcher ("fs-watch"); the worst it can
625        // do is inject a self-reported *shell* event, which the Audit view treats
626        // accordingly. (The socket is already owner-only; this is defense in depth.)
627        let mut cmd = cmd.clone();
628        cmd.agent = "shell".to_string();
629        let m = kintsugi_core::classify(&cmd);
630        // Allow, not the rule's gate decision: the command already executed, so
631        // recording a Hold/Deny here would be a lie about what happened. The
632        // class still rides along (verdict.class) so the timeline flags danger.
633        let verdict = Verdict::rules(m.class, Decision::Allow, format!("recorded:{}", m.rule));
634        // Recoverer: snapshot the paths a *destructive* human command will touch,
635        // so `kintsugi undo` can roll back a person's *filesystem* mistake (rm -rf,
636        // a clobbering overwrite) the same way it rolls back an agent's. The shell
637        // preexec hook fires before the command runs, so this is a just-in-time
638        // capture; `maybe_snapshot` no-ops for Safe commands and reflinks where it
639        // can, so the common case stays cheap. Best-effort: if the snapshot loses
640        // the race (or the fs can't reflink), the filesystem-watcher backstop still
641        // records the change. This is a filesystem recoverer — an in-database
642        // DROP/TRUNCATE is not a file, so it's flagged/recorded but recovery there
643        // is your DB's PITR/backups. The honest guarantee is "recoverable", not
644        // transactional.
645        let (snapshot_id, _) = self.maybe_snapshot(&cmd, &verdict);
646        self.log.log_event(&cmd, &verdict, snapshot_id.as_deref())?;
647        Ok(())
648    }
649
650    /// Dispatch an IPC request to its handler.
651    pub fn handle_request(&self, req: ipc::Request) -> ipc::Response {
652        match req {
653            ipc::Request::Propose(cmd) => ipc::Response::Verdict(self.handle(cmd)),
654            ipc::Request::Resolve(resolution) => match self.resolve(&resolution) {
655                Ok(()) => ipc::Response::Ack,
656                Err(e) => ipc::Response::Error {
657                    message: e.to_string(),
658                },
659            },
660            ipc::Request::Observe(obs) => match self.observe(&obs) {
661                Ok(()) => ipc::Response::Ack,
662                Err(e) => ipc::Response::Error {
663                    message: e.to_string(),
664                },
665            },
666            ipc::Request::Record(cmd) => match self.record_shell(&cmd) {
667                Ok(()) => ipc::Response::Ack,
668                Err(e) => ipc::Response::Error {
669                    message: e.to_string(),
670                },
671            },
672            ipc::Request::ListPending => match self.log.list_pending() {
673                Ok(items) => ipc::Response::PendingList { items },
674                Err(e) => ipc::Response::Error {
675                    message: e.to_string(),
676                },
677            },
678            ipc::Request::PendingStatus { id } => match self.log.pending_status(&id) {
679                Ok(status) => ipc::Response::Pending {
680                    status: status.unwrap_or_else(|| "gone".to_string()),
681                },
682                Err(e) => ipc::Response::Error {
683                    message: e.to_string(),
684                },
685            },
686            ipc::Request::Approve { id } => self.resolve_pending_response(&id, Decision::Allow),
687            ipc::Request::Deny { id } => self.resolve_pending_response(&id, Decision::Deny),
688            ipc::Request::Status => ipc::Response::Status {
689                scorer: self.scorer_name().to_string(),
690            },
691            ipc::Request::AuthBegin { op } => self.auth_begin(&op),
692            ipc::Request::Shutdown { op, nonce, proof } => self.shutdown_op(&op, &nonce, &proof),
693        }
694    }
695
696    fn resolve_pending_response(&self, id: &str, decision: Decision) -> ipc::Response {
697        match self.resolve_pending(id, decision) {
698            Ok(true) => ipc::Response::Ack,
699            Ok(false) => ipc::Response::Error {
700                message: format!("no pending command with id {id}"),
701            },
702            Err(e) => ipc::Response::Error {
703                message: e.to_string(),
704            },
705        }
706    }
707
708    /// Borrow the underlying event log (read-only queries).
709    pub fn log(&self) -> &EventLog {
710        &self.log
711    }
712}
713
714/// Load and merge the effective policy for a command's working directory:
715/// global defaults (config dir) overridden by the repo's `.kintsugi.toml`.
716pub fn load_policy(cwd: &std::path::Path) -> kintsugi_core::Policy {
717    let global = read_policy_file(&global_policy_path()).unwrap_or_default();
718    let repo = find_repo_policy(cwd)
719        .and_then(|p| read_policy_file(&p))
720        .unwrap_or_default();
721    kintsugi_core::Policy::merge(global, repo)
722}
723
724/// Path to the global policy file. Override with `KINTSUGI_CONFIG` (used in tests).
725fn global_policy_path() -> PathBuf {
726    if let Ok(p) = std::env::var("KINTSUGI_CONFIG") {
727        return PathBuf::from(p);
728    }
729    if let Some(dirs) = ProjectDirs::from("", "", "kintsugi") {
730        return dirs.config_dir().join("config.toml");
731    }
732    std::env::temp_dir().join("kintsugi-config.toml")
733}
734
735/// Find the nearest `.kintsugi.toml` from `cwd` upward.
736fn find_repo_policy(cwd: &std::path::Path) -> Option<PathBuf> {
737    let mut dir = Some(cwd);
738    while let Some(d) = dir {
739        let candidate = d.join(".kintsugi.toml");
740        if candidate.is_file() {
741            return Some(candidate);
742        }
743        dir = d.parent();
744    }
745    None
746}
747
748fn read_policy_file(path: &std::path::Path) -> Option<kintsugi_core::Policy> {
749    let text = std::fs::read_to_string(path).ok()?;
750    match kintsugi_core::Policy::parse(&text) {
751        Ok(p) => Some(p),
752        Err(e) => {
753            eprintln!(
754                "kintsugi-daemon: ignoring invalid policy {}: {e}",
755                path.display()
756            );
757            None
758        }
759    }
760}
761
762/// Identify the "repo" a command runs in: the nearest ancestor containing a
763/// `.git` directory, else the working directory itself.
764pub fn repo_key(cwd: &std::path::Path) -> String {
765    let mut dir = Some(cwd);
766    while let Some(d) = dir {
767        if d.join(".git").exists() {
768            return d.to_string_lossy().to_string();
769        }
770        dir = d.parent();
771    }
772    cwd.to_string_lossy().to_string()
773}
774
775/// Run the daemon: open the default log, bind the socket, serve forever.
776pub fn run() -> Result<()> {
777    let daemon = Daemon::open_default()?;
778    let server = Server::bind()?;
779    // Record our PID so `kintsugi stop` can find and stop us (any launch path).
780    let _ = std::fs::write(pid_file_path(), std::process::id().to_string());
781    eprintln!(
782        "kintsugi-daemon {} listening on {}",
783        VERSION,
784        Server::endpoint().display()
785    );
786    server.serve_until(
787        |req| daemon.handle_request(req),
788        || daemon.should_shutdown(),
789    )?;
790    // An authenticated shutdown landed: clean up the PID file and exit.
791    let _ = std::fs::remove_file(pid_file_path());
792    eprintln!("kintsugi-daemon: authenticated shutdown — exiting.");
793    Ok(())
794}
795
796/// Path to the daemon's PID file (next to the event log).
797pub fn pid_file_path() -> PathBuf {
798    default_db_path().with_file_name("kintsugi.pid")
799}