mi6_cli/commands/
log.rs

1use std::collections::HashMap;
2use std::io::Read;
3use std::path::Path;
4
5use anyhow::{Context, Result};
6use chrono::Utc;
7use mi6_core::{
8    Config, EventBuilder, EventType, FrameworkAdapter, ParsedHookInput, Storage, default_adapter,
9    detect_all_frameworks, detect_framework, get_adapter, get_branch_info, get_github_repo,
10    get_local_git_dir, is_branch_changing_command,
11};
12
13use crate::process::{ClaudeProcessInfo, find_claude_process, get_parent_pid};
14
15/// Result of running the log command.
16pub struct LogResult {
17    /// Transcript path to scan, if available
18    pub transcript_path: Option<String>,
19    /// Machine ID for transcript parsing
20    pub machine_id: String,
21    /// Session ID for potential backfill operations
22    pub session_id: String,
23}
24
25/// Run the log command. This must be fast (<10ms) and never block the calling framework.
26///
27/// Returns information needed for optional transcript scanning.
28pub fn run_log<S: Storage>(
29    storage: &S,
30    event_type_arg: Option<String>,
31    json_payload: Option<String>,
32    framework_name: Option<String>,
33) -> Result<LogResult> {
34    // Determine the framework adapter:
35    // 1. Use explicit --framework flag if provided
36    // 2. Otherwise, try to auto-detect from environment variables
37    // 3. Fall back to default (claude)
38    let adapter: &dyn FrameworkAdapter = if let Some(ref name) = framework_name {
39        get_adapter(name).ok_or_else(|| anyhow::anyhow!("unknown framework: {}", name))?
40    } else {
41        // Warn if multiple frameworks detected (to stderr, doesn't affect hook)
42        let detected = detect_all_frameworks();
43        if detected.len() > 1 {
44            let names: Vec<_> = detected.iter().map(|a| a.name()).collect();
45            eprintln!(
46                "mi6: warning: multiple frameworks detected ({}), using {}",
47                names.join(", "),
48                detected[0].name()
49            );
50        }
51        detect_framework().unwrap_or_else(default_adapter)
52    };
53
54    // Handle argument parsing:
55    // - Claude/Gemini: `mi6 ingest event SessionStart` with JSON on stdin
56    // - Codex: `mi6 ingest event --framework codex '{"type":"agent-turn-complete",...}'`
57    //
58    // Codex passes JSON as the only positional arg, which clap captures as event_type_arg.
59    // We detect this by checking if event_type_arg looks like JSON (starts with '{').
60    let (actual_event_type, json_str) = if let Some(ref arg) = event_type_arg {
61        if arg.trim().starts_with('{') {
62            // First positional arg is JSON (Codex format) - extract event type from it
63            (None, arg.clone())
64        } else {
65            // First positional arg is event type (Claude/Gemini format)
66            let json = if let Some(payload) = json_payload {
67                payload
68            } else {
69                let mut stdin_data = String::new();
70                std::io::stdin()
71                    .read_to_string(&mut stdin_data)
72                    .context("failed to read stdin")?;
73                stdin_data
74            };
75            (Some(arg.clone()), json)
76        }
77    } else {
78        // No positional args - read JSON from stdin
79        let mut stdin_data = String::new();
80        std::io::stdin()
81            .read_to_string(&mut stdin_data)
82            .context("failed to read stdin")?;
83        (None, stdin_data)
84    };
85
86    // Parse the hook JSON
87    let hook_data: serde_json::Value = if json_str.trim().is_empty() {
88        serde_json::json!({})
89    } else {
90        serde_json::from_str(&json_str).context("failed to parse hook JSON")?
91    };
92
93    // Determine event type:
94    // 1. Use explicit CLI argument if provided
95    // 2. Otherwise extract from JSON payload's "type" field (Codex CLI format)
96    // 3. Fall back to "Unknown" if neither available
97    let event_type_str = if let Some(ref et) = actual_event_type {
98        et.clone()
99    } else {
100        // Extract from JSON "type" field (Codex CLI format)
101        hook_data
102            .get("type")
103            .and_then(|v| v.as_str())
104            .map_or_else(|| "Unknown".to_string(), String::from)
105    };
106
107    // Map the event type using the adapter (handles framework-specific event names)
108    let event_type: EventType = adapter.map_event_type(&event_type_str);
109
110    // Use adapter to parse hook input into normalized fields
111    let parsed = adapter.parse_hook_input(&event_type_str, &hook_data);
112
113    // Extract session_id (required field, default to "unknown")
114    let session_id = parsed
115        .session_id
116        .clone()
117        .unwrap_or_else(|| "unknown".to_string());
118
119    // Find framework process for SessionStart OR frameworks without SessionStart events.
120    // This walks up the process tree to find the actual framework process PID.
121    //
122    // Note: For Cursor, the PID is captured for display purposes but is NOT used for
123    // liveness detection (handled in builder.rs). Cursor is an IDE where multiple
124    // conversation sessions share the same process, so PID-based liveness doesn't work.
125    //
126    // Codex and Cursor don't have SessionStart events, so we capture the process info
127    // on every event to ensure we get the PID on the first event.
128    let process_info =
129        if event_type == EventType::SessionStart || matches!(adapter.name(), "codex" | "cursor") {
130            find_claude_process()
131        } else {
132            None
133        };
134
135    // Capture PID - use cached process info for SessionStart, parent PID otherwise
136    let pid = process_info
137        .as_ref()
138        .map(|info| info.pid)
139        .or_else(get_parent_pid);
140
141    // Build payload: merge hook JSON with environment variables and process info
142    let payload = build_payload(hook_data, &event_type, process_info.as_ref())?;
143
144    // Load config and get machine_id
145    let config = Config::load().unwrap_or_default();
146    let machine_id = config.machine_id();
147
148    // Get timestamp for both git context upsert and event
149    let now = Utc::now();
150    let timestamp_ms = now.timestamp_millis();
151
152    // Capture git context BEFORE inserting the event.
153    // This ensures github_repo is set before branch parsing, so that when
154    // issue/PR numbers are extracted from the branch name, they're associated
155    // with the correct repository. If the repo changes, issue/PR are cleared first.
156    upsert_git_context_if_available(
157        storage,
158        &session_id,
159        &machine_id,
160        adapter.name(),
161        timestamp_ms,
162        &parsed,
163    );
164
165    // Create and insert event
166    let mut builder = EventBuilder::new(&machine_id, event_type.clone(), session_id.clone())
167        .framework(adapter.name())
168        .timestamp(now)
169        .tool_use_id_opt(parsed.tool_use_id.clone())
170        .spawned_agent_id_opt(parsed.spawned_agent_id.clone())
171        .tool_name_opt(parsed.tool_name.clone())
172        .subagent_type_opt(parsed.subagent_type.clone())
173        .permission_mode_opt(parsed.permission_mode.clone())
174        .transcript_path_opt(parsed.transcript_path.clone())
175        .model_opt(parsed.model.clone())
176        .duration_ms_opt(parsed.duration_ms)
177        .pid_opt(pid)
178        .cwd_opt(parsed.cwd.clone())
179        .payload(payload)
180        .source("hook");
181
182    // Add token fields if present (for OpenCode ApiRequest events)
183    if let (Some(input), Some(output)) = (parsed.tokens_input, parsed.tokens_output) {
184        builder = builder.tokens(input, output);
185    }
186    if let (Some(read), Some(write)) = (parsed.tokens_cache_read, parsed.tokens_cache_write) {
187        builder = builder.cache_tokens(read, write);
188    }
189    if let Some(cost) = parsed.cost_usd {
190        builder = builder.cost(cost);
191    }
192
193    let event = builder.build();
194
195    storage.insert(&event).context("failed to insert event")?;
196
197    // Capture git branch info for relevant event types (branch changes, SessionStart)
198    capture_git_branch_if_needed(storage, &event_type, &session_id, &parsed);
199
200    // Opportunistic GC (~1.2% of calls: 3/256)
201    if rand::random::<u8>() < 3 {
202        let _ = storage.gc(config.history_duration());
203    }
204
205    // Output hook response for frameworks with blocking hooks (e.g., Cursor)
206    // This must be written to stdout for the framework to receive it
207    if let Some(response) = adapter.hook_response(&event_type_str) {
208        println!("{}", response);
209    }
210
211    // Return info for optional transcript scanning
212    Ok(LogResult {
213        transcript_path: parsed.transcript_path.clone(),
214        machine_id,
215        session_id,
216    })
217}
218
219/// Build the payload JSON by merging hook data with environment variables
220fn build_payload(
221    mut hook_data: serde_json::Value,
222    event_type: &EventType,
223    process_info: Option<&ClaudeProcessInfo>,
224) -> Result<String> {
225    // Environment variables to capture
226    let env_vars = [
227        ("CLAUDE_PROJECT_DIR", "project_dir"),
228        ("CLAUDE_FILE_PATHS", "file_paths"),
229        ("CLAUDE_TOOL_INPUT", "tool_input_env"),
230        ("CLAUDE_TOOL_OUTPUT", "tool_output_env"),
231        ("CLAUDE_NOTIFICATION", "notification_env"),
232        ("CLAUDE_CODE_REMOTE", "remote"),
233        ("CLAUDE_ENV_FILE", "env_file"),
234    ];
235
236    // Ensure we have an object to work with
237    if !hook_data.is_object() {
238        hook_data = serde_json::json!({ "_raw": hook_data });
239    }
240
241    let Some(obj) = hook_data.as_object_mut() else {
242        // We just ensured it's an object above, so this is unreachable
243        return Ok(serde_json::to_string(&hook_data)?);
244    };
245
246    // Add environment variables to the payload
247    let mut env_data: HashMap<String, String> = HashMap::new();
248    for (env_var, key) in env_vars {
249        if let Ok(value) = std::env::var(env_var) {
250            env_data.insert(key.to_string(), value);
251        }
252    }
253
254    if !env_data.is_empty() {
255        obj.insert("_env".to_string(), serde_json::to_value(env_data)?);
256    }
257
258    // For SessionStart, add process info to payload (using cached info)
259    if *event_type == EventType::SessionStart
260        && let Some(info) = process_info
261    {
262        obj.insert(
263            "_claude_process".to_string(),
264            serde_json::json!({
265                "pid": info.pid,
266                "comm": info.comm
267            }),
268        );
269    }
270
271    Ok(serde_json::to_string(&hook_data)?)
272}
273
274/// Upsert git context (local_git_dir and github_repo) for the session.
275///
276/// This is called BEFORE the event is inserted to ensure git context is set
277/// before branch parsing. When github_repo changes, issue/PR numbers are cleared
278/// because they're only meaningful in the context of a specific repository.
279///
280/// The function is designed to be fast (~50µs with direct file access) and never fail -
281/// errors are silently ignored since git info is supplementary data.
282fn upsert_git_context_if_available<S: Storage>(
283    storage: &S,
284    session_id: &str,
285    machine_id: &str,
286    framework: &str,
287    timestamp_ms: i64,
288    parsed: &ParsedHookInput,
289) {
290    if let Some(ref cwd) = parsed.cwd {
291        let cwd_path = Path::new(cwd);
292
293        // Capture git directory path and GitHub repo
294        let local_git_dir = get_local_git_dir(cwd_path);
295        let github_repo = get_github_repo(cwd_path);
296
297        if local_git_dir.is_some() || github_repo.is_some() {
298            let _ = storage.upsert_session_git_context(
299                session_id,
300                machine_id,
301                framework,
302                timestamp_ms,
303                local_git_dir.as_deref(),
304                github_repo.as_deref(),
305            );
306        }
307    }
308}
309
310/// Capture git branch info for specific event types.
311///
312/// Handles:
313/// - SessionStart: Captures initial git branch
314/// - PostToolUse (Bash): Detects git branch-changing commands and updates branch info
315///
316/// Branch parsing extracts issue/PR numbers from branch names like:
317/// - `feature/issue-123` -> github_issue: 123
318/// - `fix/GH-456` -> github_issue: 456
319/// - `pr-789` -> github_pr: 789
320fn capture_git_branch_if_needed<S: Storage>(
321    storage: &S,
322    event_type: &EventType,
323    session_id: &str,
324    parsed: &ParsedHookInput,
325) {
326    match event_type {
327        EventType::SessionStart => {
328            if let Some(ref cwd) = parsed.cwd {
329                let cwd_path = Path::new(cwd);
330
331                // Capture initial git branch from session's cwd
332                if let Some(git_info) = get_branch_info(cwd_path) {
333                    let _ = storage.update_session_git_info(session_id, &git_info);
334                }
335            }
336        }
337        EventType::PostToolUse => {
338            // Check if this is a Bash tool call with a git branch-changing command
339            if parsed.tool_name.as_deref() == Some("Bash") {
340                // Get command from CLAUDE_TOOL_INPUT environment variable
341                if let Ok(tool_input) = std::env::var("CLAUDE_TOOL_INPUT")
342                    && let Some(cmd) = extract_bash_command(&tool_input)
343                    && is_branch_changing_command(&cmd)
344                    && let Some(ref cwd) = parsed.cwd
345                    && let Some(git_info) = get_branch_info(Path::new(cwd))
346                {
347                    let _ = storage.update_session_git_info(session_id, &git_info);
348                }
349            }
350        }
351        _ => {}
352    }
353}
354
355/// Extract the bash command from tool_input.
356///
357/// The tool_input can be either:
358/// - Raw command string
359/// - JSON object with a "command" field
360fn extract_bash_command(tool_input: &str) -> Option<String> {
361    // First try to parse as JSON
362    if let Ok(json) = serde_json::from_str::<serde_json::Value>(tool_input)
363        && let Some(cmd) = json.get("command").and_then(|v| v.as_str())
364    {
365        return Some(cmd.to_string());
366    }
367    // Otherwise, assume it's a raw command
368    Some(tool_input.to_string())
369}