Skip to main content

kaizen/shell/
ingest.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! `kaizen ingest` — hook ingestion (stdin or explicit payload for MCP).
3
4use crate::collect::hooks::EventKind;
5use crate::core::config;
6use crate::store::Store;
7use crate::{collect, core::event::SessionRecord, prompt};
8use anyhow::Result;
9use serde_json::Value;
10use std::ffi::OsString;
11use std::path::PathBuf;
12
13/// Hook source, aligned with the `kaizen ingest hook --source` CLI.
14#[derive(Clone, Copy, Debug, serde::Deserialize, serde::Serialize)]
15#[serde(rename_all = "lowercase")]
16pub enum IngestSource {
17    Cursor,
18    Claude,
19    Openclaw,
20    Vibe,
21}
22
23impl IngestSource {
24    pub fn parse(s: &str) -> Option<Self> {
25        match s.to_lowercase().as_str() {
26            "cursor" => Some(Self::Cursor),
27            "claude" => Some(Self::Claude),
28            "openclaw" => Some(Self::Openclaw),
29            "vibe" => Some(Self::Vibe),
30            _ => None,
31        }
32    }
33
34    pub fn agent(self) -> &'static str {
35        match self {
36            Self::Cursor => "cursor",
37            Self::Claude => "claude",
38            Self::Openclaw => "openclaw",
39            Self::Vibe => "vibe",
40        }
41    }
42}
43
44/// Process hook JSON (same as stdin for `kaizen ingest hook`). On success, returns empty string (CLI prints nothing).
45pub fn ingest_hook_string(
46    source: IngestSource,
47    input: &str,
48    workspace: Option<PathBuf>,
49) -> Result<String> {
50    ingest_hook_text(source, input, workspace)?;
51    Ok(String::new())
52}
53
54/// Process hook JSON (same as stdin for `kaizen ingest hook`).
55pub fn ingest_hook_text(
56    source: IngestSource,
57    input: &str,
58    workspace: Option<PathBuf>,
59) -> Result<()> {
60    let event = match source {
61        IngestSource::Cursor => collect::hooks::cursor::parse_cursor_hook(input)?,
62        IngestSource::Claude => collect::hooks::claude::parse_claude_hook(input)?,
63        IngestSource::Openclaw => collect::hooks::openclaw::parse_openclaw_hook(input)?,
64        IngestSource::Vibe => collect::hooks::vibe::parse_vibe_hook(input)?,
65    };
66    let ws = workspace.unwrap_or_else(|| std::env::current_dir().expect("cwd"));
67    let cfg = config::load(&ws)?;
68    let sync_ctx = crate::sync::ingest_ctx(&cfg, ws.clone());
69    let db_path = crate::core::workspace::db_path(&ws)?;
70    let store = Store::open(&db_path)?;
71    let now_ms = std::time::SystemTime::now()
72        .duration_since(std::time::UNIX_EPOCH)
73        .map(|d| d.as_millis() as u64)
74        .unwrap_or(0);
75    let ts = if event.ts_ms == 0 {
76        now_ms
77    } else {
78        event.ts_ms
79    };
80    let mut event = event;
81    event.ts_ms = ts;
82    let ev = collect::hooks::normalize::hook_to_event(&event, 0);
83    if let Some(status) = collect::hooks::normalize::hook_to_status(&event.kind) {
84        if matches!(event.kind, collect::hooks::EventKind::SessionStart) {
85            let snap = prompt::snapshot::capture(&ws, now_ms).ok();
86            let fingerprint = snap.as_ref().map(|s| s.fingerprint.clone());
87            if let Some(ref s) = snap {
88                let _ = store.upsert_prompt_snapshot(s);
89            }
90            let model = collect::model_from_json::from_value(&event.payload);
91            let env = session_env_fields(&event.payload);
92            let record = SessionRecord {
93                id: event.session_id.clone(),
94                agent: source.agent().to_string(),
95                model,
96                workspace: ws.to_string_lossy().to_string(),
97                started_at_ms: event.ts_ms,
98                ended_at_ms: None,
99                status: status.clone(),
100                trace_path: String::new(),
101                start_commit: None,
102                end_commit: None,
103                branch: None,
104                dirty_start: None,
105                dirty_end: None,
106                repo_binding_source: None,
107                prompt_fingerprint: fingerprint,
108                parent_session_id: None,
109                agent_version: env.0,
110                os: env.1,
111                arch: env.2,
112                repo_file_count: None,
113                repo_total_loc: None,
114            };
115            store.upsert_session(&record)?;
116        } else {
117            store.ensure_session_stub(
118                &event.session_id,
119                source.agent(),
120                &ws.to_string_lossy(),
121                event.ts_ms,
122            )?;
123            if matches!(event.kind, collect::hooks::EventKind::Stop) {
124                maybe_emit_prompt_changed(
125                    &store,
126                    &event.session_id,
127                    &ws,
128                    now_ms,
129                    &ev,
130                    sync_ctx.as_ref(),
131                )?;
132            }
133            store.update_session_status(&event.session_id, status)?;
134        }
135    } else {
136        store.ensure_session_stub(
137            &event.session_id,
138            source.agent(),
139            &ws.to_string_lossy(),
140            event.ts_ms,
141        )?;
142    }
143    store.append_event_with_sync(&ev, sync_ctx.as_ref())?;
144    post_ingest_detached(&event, &cfg, &ws)?;
145    Ok(())
146}
147
148/// Non-blocking sidecars: outcome worker, sampler child, stop file (hooks stay short).
149fn post_ingest_detached(
150    event: &collect::hooks::HookEvent,
151    cfg: &config::Config,
152    ws: &std::path::Path,
153) -> Result<()> {
154    if matches!(event.kind, EventKind::Stop) {
155        if cfg.collect.outcomes.enabled {
156            spawn_outcome_measure(ws, &event.session_id);
157        }
158        if cfg.collect.system_sampler.enabled {
159            touch_sampler_stop_file(ws, &event.session_id);
160        }
161    }
162    if matches!(event.kind, EventKind::SessionStart)
163        && cfg.collect.system_sampler.enabled
164        && let Some(pid) = payload_pid(&event.payload)
165    {
166        spawn_sampler_run(ws, &event.session_id, pid);
167    }
168    Ok(())
169}
170
171fn payload_pid(v: &Value) -> Option<u32> {
172    v.get("pid")
173        .and_then(|x| x.as_u64().map(|n| n as u32))
174        .or_else(|| {
175            v.get("pid")
176                .and_then(|x| x.as_i64())
177                .and_then(|i| u32::try_from(i).ok())
178        })
179}
180
181fn spawn_outcome_measure(ws: &std::path::Path, session_id: &str) {
182    let args = vec![
183        OsString::from("outcomes"),
184        OsString::from("measure"),
185        OsString::from("--workspace"),
186        ws.as_os_str().to_owned(),
187        OsString::from("--session"),
188        OsString::from(session_id),
189    ];
190    if let Err(e) = super::kaizen_child::spawn_kaizen_detached(&args) {
191        tracing::warn!(?e, "kaizen outcomes measure");
192    }
193}
194
195fn spawn_sampler_run(ws: &std::path::Path, session_id: &str, pid: u32) {
196    let args = vec![
197        OsString::from("__sampler-run"),
198        OsString::from("--workspace"),
199        ws.as_os_str().to_owned(),
200        OsString::from("--session"),
201        OsString::from(session_id),
202        OsString::from("--pid"),
203        OsString::from(pid.to_string()),
204    ];
205    if let Err(e) = super::kaizen_child::spawn_kaizen_detached(&args) {
206        tracing::warn!(?e, "kaizen sampler");
207    }
208}
209
210fn touch_sampler_stop_file(ws: &std::path::Path, session_id: &str) {
211    let dir = match crate::core::paths::project_data_dir(ws) {
212        Ok(d) => d.join("sampler-stop"),
213        Err(e) => {
214            tracing::warn!(?e, "sampler-stop: no data dir");
215            return;
216        }
217    };
218    if let Err(e) = std::fs::create_dir_all(&dir) {
219        tracing::warn!(?e, "sampler-stop mkdir");
220        return;
221    }
222    let path = dir.join(session_id);
223    if let Err(e) = std::fs::File::create(&path) {
224        tracing::warn!(?e, "sampler-stop touch");
225    }
226}
227
228fn session_env_fields(payload: &Value) -> (Option<String>, Option<String>, Option<String>) {
229    let ver = [
230        "cursor_version",
231        "claude_version",
232        "agent_version",
233        "version",
234    ]
235    .into_iter()
236    .find_map(|k| {
237        payload
238            .get(k)
239            .and_then(|v| v.as_str())
240            .map(|s| s.to_string())
241    });
242    let os = payload
243        .get("os")
244        .and_then(|v| v.as_str())
245        .map(|s| s.to_string());
246    let arch = payload
247        .get("arch")
248        .and_then(|v| v.as_str())
249        .map(|s| s.to_string());
250    (ver, os, arch)
251}
252
253fn maybe_emit_prompt_changed(
254    store: &Store,
255    session_id: &str,
256    ws: &std::path::Path,
257    now_ms: u64,
258    trigger_ev: &crate::core::event::Event,
259    sync_ctx: Option<&crate::sync::context::SyncIngestContext>,
260) -> Result<()> {
261    let Some(session) = store.get_session(session_id)? else {
262        return Ok(());
263    };
264    let Some(from_fp) = session.prompt_fingerprint else {
265        return Ok(());
266    };
267    let snap = prompt::snapshot::capture(ws, now_ms).ok();
268    let Some(snap) = snap else { return Ok(()) };
269    if snap.fingerprint == from_fp {
270        return Ok(());
271    }
272    let _ = store.upsert_prompt_snapshot(&snap);
273    let changed_ev = crate::core::event::Event {
274        session_id: session_id.to_string(),
275        seq: trigger_ev.seq + 1,
276        ts_ms: now_ms,
277        ts_exact: true,
278        kind: crate::core::event::EventKind::Hook,
279        source: crate::core::event::EventSource::Hook,
280        tool: None,
281        tool_call_id: None,
282        tokens_in: None,
283        tokens_out: None,
284        reasoning_tokens: None,
285        cost_usd_e6: None,
286        stop_reason: None,
287        latency_ms: None,
288        ttft_ms: None,
289        retry_count: None,
290        context_used_tokens: None,
291        context_max_tokens: None,
292        cache_creation_tokens: None,
293        cache_read_tokens: None,
294        system_prompt_tokens: None,
295        payload: serde_json::json!({
296            "kind": "prompt_changed",
297            "from_fingerprint": from_fp,
298            "to_fingerprint": snap.fingerprint,
299        }),
300    };
301    store.append_event_with_sync(&changed_ev, sync_ctx)?;
302    Ok(())
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308    use crate::core::paths::test_lock;
309    use tempfile::TempDir;
310
311    fn setup_ws() -> (TempDir, TempDir) {
312        let home = TempDir::new().unwrap();
313        let ws = TempDir::new().unwrap();
314        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
315        (home, ws)
316    }
317
318    #[test]
319    fn session_start_records_source_as_agent_not_unknown() {
320        let _guard = test_lock::global().lock().unwrap();
321        let (_home, ws) = setup_ws();
322        let payload =
323            r#"{"hook_event_name":"SessionStart","session_id":"s-agent-1","source":"startup"}"#;
324        ingest_hook_text(IngestSource::Claude, payload, Some(ws.path().to_path_buf())).unwrap();
325        let db = Store::open(&crate::core::workspace::db_path(ws.path()).unwrap()).unwrap();
326        let sessions = db
327            .list_sessions(ws.path().to_string_lossy().as_ref())
328            .unwrap();
329        unsafe { std::env::remove_var("KAIZEN_HOME") };
330        assert_eq!(sessions.len(), 1);
331        assert_eq!(sessions[0].agent, "claude");
332    }
333
334    #[test]
335    fn missing_timestamp_falls_back_to_now() {
336        let _guard = test_lock::global().lock().unwrap();
337        let (_home, ws) = setup_ws();
338        let payload =
339            r#"{"hook_event_name":"SessionStart","session_id":"s-ts","source":"startup"}"#;
340        ingest_hook_text(IngestSource::Claude, payload, Some(ws.path().to_path_buf())).unwrap();
341        let db = Store::open(&crate::core::workspace::db_path(ws.path()).unwrap()).unwrap();
342        let sessions = db
343            .list_sessions(ws.path().to_string_lossy().as_ref())
344            .unwrap();
345        unsafe { std::env::remove_var("KAIZEN_HOME") };
346        assert!(sessions[0].started_at_ms > 0, "started_at_ms must not be 0");
347    }
348
349    #[test]
350    fn post_tool_use_without_session_start_auto_provisions_stub() {
351        let _guard = test_lock::global().lock().unwrap();
352        let (_home, ws) = setup_ws();
353        let payload = r#"{"event":"PostToolUse","session_id":"s-stub","tool_name":"Read","tool_input":{"file_path":"/tmp/x"},"tool_response":{"content":"hi"}}"#;
354        ingest_hook_text(IngestSource::Cursor, payload, Some(ws.path().to_path_buf())).unwrap();
355        let db = Store::open(&crate::core::workspace::db_path(ws.path()).unwrap()).unwrap();
356        let sessions = db
357            .list_sessions(ws.path().to_string_lossy().as_ref())
358            .unwrap();
359        unsafe { std::env::remove_var("KAIZEN_HOME") };
360        assert_eq!(sessions.len(), 1);
361        assert_eq!(sessions[0].agent, "cursor");
362        assert_eq!(sessions[0].id, "s-stub");
363    }
364}