Skip to main content

kaizen/shell/
ingest.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! `kaizen ingest` — hook ingestion (stdin or explicit payload for MCP).
3
4use crate::collect::hooks::EventKind;
5use crate::core::config;
6use crate::store::Store;
7use crate::{collect, core::event::SessionRecord, prompt};
8use anyhow::Result;
9use serde_json::Value;
10use std::ffi::OsString;
11use std::path::PathBuf;
12
13/// Hook source, aligned with the `kaizen ingest hook --source` CLI.
14#[derive(Clone, Copy, Debug, serde::Deserialize, serde::Serialize)]
15#[serde(rename_all = "lowercase")]
16pub enum IngestSource {
17    Cursor,
18    Claude,
19    Openclaw,
20    Vibe,
21}
22
23impl IngestSource {
24    pub fn parse(s: &str) -> Option<Self> {
25        match s.to_lowercase().as_str() {
26            "cursor" => Some(Self::Cursor),
27            "claude" => Some(Self::Claude),
28            "openclaw" => Some(Self::Openclaw),
29            "vibe" => Some(Self::Vibe),
30            _ => None,
31        }
32    }
33
34    pub fn agent(self) -> &'static str {
35        match self {
36            Self::Cursor => "cursor",
37            Self::Claude => "claude",
38            Self::Openclaw => "openclaw",
39            Self::Vibe => "vibe",
40        }
41    }
42}
43
44/// Process hook JSON (same as stdin for `kaizen ingest hook`). On success, returns empty string (CLI prints nothing).
45pub fn ingest_hook_string(
46    source: IngestSource,
47    input: &str,
48    workspace: Option<PathBuf>,
49) -> Result<String> {
50    ingest_hook_text(source, input, workspace)?;
51    Ok(String::new())
52}
53
54/// Process hook JSON (same as stdin for `kaizen ingest hook`).
55pub fn ingest_hook_text(
56    source: IngestSource,
57    input: &str,
58    workspace: Option<PathBuf>,
59) -> Result<()> {
60    let event = match source {
61        IngestSource::Cursor => collect::hooks::cursor::parse_cursor_hook(input)?,
62        IngestSource::Claude => collect::hooks::claude::parse_claude_hook(input)?,
63        IngestSource::Openclaw => collect::hooks::openclaw::parse_openclaw_hook(input)?,
64        IngestSource::Vibe => collect::hooks::vibe::parse_vibe_hook(input)?,
65    };
66    let ws = workspace.unwrap_or_else(|| std::env::current_dir().expect("cwd"));
67    let cfg = config::load(&ws)?;
68    let sync_ctx = crate::sync::ingest_ctx(&cfg, ws.clone());
69    let db_path = ws.join(".kaizen/kaizen.db");
70    let store = Store::open(&db_path)?;
71    let now_ms = std::time::SystemTime::now()
72        .duration_since(std::time::UNIX_EPOCH)
73        .map(|d| d.as_millis() as u64)
74        .unwrap_or(0);
75    let ts = if event.ts_ms == 0 {
76        now_ms
77    } else {
78        event.ts_ms
79    };
80    let mut event = event;
81    event.ts_ms = ts;
82    let ev = collect::hooks::normalize::hook_to_event(&event, 0);
83    if let Some(status) = collect::hooks::normalize::hook_to_status(&event.kind) {
84        if matches!(event.kind, collect::hooks::EventKind::SessionStart) {
85            let snap = prompt::snapshot::capture(&ws, now_ms).ok();
86            let fingerprint = snap.as_ref().map(|s| s.fingerprint.clone());
87            if let Some(ref s) = snap {
88                let _ = store.upsert_prompt_snapshot(s);
89            }
90            let model = collect::model_from_json::from_value(&event.payload);
91            let env = session_env_fields(&event.payload);
92            let record = SessionRecord {
93                id: event.session_id.clone(),
94                agent: source.agent().to_string(),
95                model,
96                workspace: ws.to_string_lossy().to_string(),
97                started_at_ms: event.ts_ms,
98                ended_at_ms: None,
99                status: status.clone(),
100                trace_path: String::new(),
101                start_commit: None,
102                end_commit: None,
103                branch: None,
104                dirty_start: None,
105                dirty_end: None,
106                repo_binding_source: None,
107                prompt_fingerprint: fingerprint,
108                parent_session_id: None,
109                agent_version: env.0,
110                os: env.1,
111                arch: env.2,
112                repo_file_count: None,
113                repo_total_loc: None,
114            };
115            store.upsert_session(&record)?;
116        } else {
117            store.ensure_session_stub(
118                &event.session_id,
119                source.agent(),
120                &ws.to_string_lossy(),
121                event.ts_ms,
122            )?;
123            if matches!(event.kind, collect::hooks::EventKind::Stop) {
124                maybe_emit_prompt_changed(
125                    &store,
126                    &event.session_id,
127                    &ws,
128                    now_ms,
129                    &ev,
130                    sync_ctx.as_ref(),
131                )?;
132            }
133            store.update_session_status(&event.session_id, status)?;
134        }
135    } else {
136        store.ensure_session_stub(
137            &event.session_id,
138            source.agent(),
139            &ws.to_string_lossy(),
140            event.ts_ms,
141        )?;
142    }
143    store.append_event_with_sync(&ev, sync_ctx.as_ref())?;
144    post_ingest_detached(&event, &cfg, &ws)?;
145    Ok(())
146}
147
148/// Non-blocking sidecars: outcome worker, sampler child, stop file (hooks stay short).
149fn post_ingest_detached(
150    event: &collect::hooks::HookEvent,
151    cfg: &config::Config,
152    ws: &std::path::Path,
153) -> Result<()> {
154    if matches!(event.kind, EventKind::Stop) {
155        if cfg.collect.outcomes.enabled {
156            spawn_outcome_measure(ws, &event.session_id);
157        }
158        if cfg.collect.system_sampler.enabled {
159            touch_sampler_stop_file(ws, &event.session_id);
160        }
161    }
162    if matches!(event.kind, EventKind::SessionStart)
163        && cfg.collect.system_sampler.enabled
164        && let Some(pid) = payload_pid(&event.payload)
165    {
166        spawn_sampler_run(ws, &event.session_id, pid);
167    }
168    Ok(())
169}
170
171fn payload_pid(v: &Value) -> Option<u32> {
172    v.get("pid")
173        .and_then(|x| x.as_u64().map(|n| n as u32))
174        .or_else(|| {
175            v.get("pid")
176                .and_then(|x| x.as_i64())
177                .and_then(|i| u32::try_from(i).ok())
178        })
179}
180
181fn spawn_outcome_measure(ws: &std::path::Path, session_id: &str) {
182    let args = vec![
183        OsString::from("outcomes"),
184        OsString::from("measure"),
185        OsString::from("--workspace"),
186        ws.as_os_str().to_owned(),
187        OsString::from("--session"),
188        OsString::from(session_id),
189    ];
190    if let Err(e) = super::kaizen_child::spawn_kaizen_detached(&args) {
191        tracing::warn!(?e, "kaizen outcomes measure");
192    }
193}
194
195fn spawn_sampler_run(ws: &std::path::Path, session_id: &str, pid: u32) {
196    let args = vec![
197        OsString::from("__sampler-run"),
198        OsString::from("--workspace"),
199        ws.as_os_str().to_owned(),
200        OsString::from("--session"),
201        OsString::from(session_id),
202        OsString::from("--pid"),
203        OsString::from(pid.to_string()),
204    ];
205    if let Err(e) = super::kaizen_child::spawn_kaizen_detached(&args) {
206        tracing::warn!(?e, "kaizen sampler");
207    }
208}
209
210fn touch_sampler_stop_file(ws: &std::path::Path, session_id: &str) {
211    let dir = ws.join(".kaizen/sampler-stop");
212    if let Err(e) = std::fs::create_dir_all(&dir) {
213        tracing::warn!(?e, "sampler-stop mkdir");
214        return;
215    }
216    let path = dir.join(session_id);
217    if let Err(e) = std::fs::File::create(&path) {
218        tracing::warn!(?e, "sampler-stop touch");
219    }
220}
221
222fn session_env_fields(payload: &Value) -> (Option<String>, Option<String>, Option<String>) {
223    let ver = [
224        "cursor_version",
225        "claude_version",
226        "agent_version",
227        "version",
228    ]
229    .into_iter()
230    .find_map(|k| {
231        payload
232            .get(k)
233            .and_then(|v| v.as_str())
234            .map(|s| s.to_string())
235    });
236    let os = payload
237        .get("os")
238        .and_then(|v| v.as_str())
239        .map(|s| s.to_string());
240    let arch = payload
241        .get("arch")
242        .and_then(|v| v.as_str())
243        .map(|s| s.to_string());
244    (ver, os, arch)
245}
246
247fn maybe_emit_prompt_changed(
248    store: &Store,
249    session_id: &str,
250    ws: &std::path::Path,
251    now_ms: u64,
252    trigger_ev: &crate::core::event::Event,
253    sync_ctx: Option<&crate::sync::context::SyncIngestContext>,
254) -> Result<()> {
255    let Some(session) = store.get_session(session_id)? else {
256        return Ok(());
257    };
258    let Some(from_fp) = session.prompt_fingerprint else {
259        return Ok(());
260    };
261    let snap = prompt::snapshot::capture(ws, now_ms).ok();
262    let Some(snap) = snap else { return Ok(()) };
263    if snap.fingerprint == from_fp {
264        return Ok(());
265    }
266    let _ = store.upsert_prompt_snapshot(&snap);
267    let changed_ev = crate::core::event::Event {
268        session_id: session_id.to_string(),
269        seq: trigger_ev.seq + 1,
270        ts_ms: now_ms,
271        ts_exact: true,
272        kind: crate::core::event::EventKind::Hook,
273        source: crate::core::event::EventSource::Hook,
274        tool: None,
275        tool_call_id: None,
276        tokens_in: None,
277        tokens_out: None,
278        reasoning_tokens: None,
279        cost_usd_e6: None,
280        stop_reason: None,
281        latency_ms: None,
282        ttft_ms: None,
283        retry_count: None,
284        context_used_tokens: None,
285        context_max_tokens: None,
286        cache_creation_tokens: None,
287        cache_read_tokens: None,
288        system_prompt_tokens: None,
289        payload: serde_json::json!({
290            "kind": "prompt_changed",
291            "from_fingerprint": from_fp,
292            "to_fingerprint": snap.fingerprint,
293        }),
294    };
295    store.append_event_with_sync(&changed_ev, sync_ctx)?;
296    Ok(())
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302    use tempfile::TempDir;
303
304    fn ws_with_kaizen_dir() -> TempDir {
305        let dir = TempDir::new().unwrap();
306        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
307        dir
308    }
309
310    #[test]
311    fn session_start_records_source_as_agent_not_unknown() {
312        let dir = ws_with_kaizen_dir();
313        let payload =
314            r#"{"hook_event_name":"SessionStart","session_id":"s-agent-1","source":"startup"}"#;
315        ingest_hook_text(
316            IngestSource::Claude,
317            payload,
318            Some(dir.path().to_path_buf()),
319        )
320        .unwrap();
321
322        let db = Store::open(&dir.path().join(".kaizen/kaizen.db")).unwrap();
323        let sessions = db
324            .list_sessions(dir.path().to_string_lossy().as_ref())
325            .unwrap();
326        assert_eq!(sessions.len(), 1);
327        assert_eq!(sessions[0].agent, "claude");
328    }
329
330    #[test]
331    fn missing_timestamp_falls_back_to_now() {
332        let dir = ws_with_kaizen_dir();
333        // No timestamp_ms field — Claude Code never sends one.
334        let payload =
335            r#"{"hook_event_name":"SessionStart","session_id":"s-ts","source":"startup"}"#;
336        ingest_hook_text(
337            IngestSource::Claude,
338            payload,
339            Some(dir.path().to_path_buf()),
340        )
341        .unwrap();
342
343        let db = Store::open(&dir.path().join(".kaizen/kaizen.db")).unwrap();
344        let sessions = db
345            .list_sessions(dir.path().to_string_lossy().as_ref())
346            .unwrap();
347        assert!(sessions[0].started_at_ms > 0, "started_at_ms must not be 0");
348    }
349
350    #[test]
351    fn post_tool_use_without_session_start_auto_provisions_stub() {
352        let dir = ws_with_kaizen_dir();
353        // Hooks installed mid-session: first event is PostToolUse, no SessionStart.
354        let payload = r#"{"event":"PostToolUse","session_id":"s-stub","tool_name":"Read","tool_input":{"file_path":"/tmp/x"},"tool_response":{"content":"hi"}}"#;
355        ingest_hook_text(
356            IngestSource::Cursor,
357            payload,
358            Some(dir.path().to_path_buf()),
359        )
360        .unwrap();
361
362        let db = Store::open(&dir.path().join(".kaizen/kaizen.db")).unwrap();
363        let sessions = db
364            .list_sessions(dir.path().to_string_lossy().as_ref())
365            .unwrap();
366        assert_eq!(sessions.len(), 1);
367        assert_eq!(sessions[0].agent, "cursor");
368        assert_eq!(sessions[0].id, "s-stub");
369    }
370}