mi6_cli/commands/
log.rs

1use std::collections::HashMap;
2use std::io::Read;
3use std::path::Path;
4
5use anyhow::{Context, Result};
6use chrono::Utc;
7use mi6_core::{
8    Config, EventBuilder, EventType, FrameworkAdapter, ParsedHookInput, Storage, default_adapter,
9    detect_all_frameworks, detect_framework, get_adapter, get_branch_info, get_github_repo,
10    get_local_git_dir, is_branch_changing_command,
11};
12
13use crate::process::{ClaudeProcessInfo, find_claude_process, get_parent_pid};
14
15/// Result of running the log command.
16pub struct LogResult {
17    /// Transcript path to scan, if available
18    pub transcript_path: Option<String>,
19    /// Machine ID for transcript parsing
20    pub machine_id: String,
21    /// Session ID for potential backfill operations
22    pub session_id: String,
23}
24
25/// Run the log command. This must be fast (<10ms) and never block the calling framework.
26///
27/// Returns information needed for optional transcript scanning.
28pub fn run_log<S: Storage>(
29    storage: &S,
30    event_type_arg: Option<String>,
31    json_payload: Option<String>,
32    framework_name: Option<String>,
33) -> Result<LogResult> {
34    // Determine the framework adapter:
35    // 1. Use explicit --framework flag if provided
36    // 2. Otherwise, try to auto-detect from environment variables
37    // 3. Fall back to default (claude)
38    let adapter: &dyn FrameworkAdapter = if let Some(ref name) = framework_name {
39        get_adapter(name).ok_or_else(|| anyhow::anyhow!("unknown framework: {}", name))?
40    } else {
41        // Warn if multiple frameworks detected (to stderr, doesn't affect hook)
42        let detected = detect_all_frameworks();
43        if detected.len() > 1 {
44            let names: Vec<_> = detected.iter().map(|a| a.name()).collect();
45            eprintln!(
46                "mi6: warning: multiple frameworks detected ({}), using {}",
47                names.join(", "),
48                detected[0].name()
49            );
50        }
51        detect_framework().unwrap_or_else(default_adapter)
52    };
53
54    // Handle argument parsing:
55    // - Claude/Gemini: `mi6 ingest event SessionStart` with JSON on stdin
56    // - Codex: `mi6 ingest event --framework codex '{"type":"agent-turn-complete",...}'`
57    //
58    // Codex passes JSON as the only positional arg, which clap captures as event_type_arg.
59    // We detect this by checking if event_type_arg looks like JSON (starts with '{').
60    let (actual_event_type, json_str) = if let Some(ref arg) = event_type_arg {
61        if arg.trim().starts_with('{') {
62            // First positional arg is JSON (Codex format) - extract event type from it
63            (None, arg.clone())
64        } else {
65            // First positional arg is event type (Claude/Gemini format)
66            let json = if let Some(payload) = json_payload {
67                payload
68            } else {
69                let mut stdin_data = String::new();
70                std::io::stdin()
71                    .read_to_string(&mut stdin_data)
72                    .context("failed to read stdin")?;
73                stdin_data
74            };
75            (Some(arg.clone()), json)
76        }
77    } else {
78        // No positional args - read JSON from stdin
79        let mut stdin_data = String::new();
80        std::io::stdin()
81            .read_to_string(&mut stdin_data)
82            .context("failed to read stdin")?;
83        (None, stdin_data)
84    };
85
86    // Parse the hook JSON
87    let hook_data: serde_json::Value = if json_str.trim().is_empty() {
88        serde_json::json!({})
89    } else {
90        serde_json::from_str(&json_str).context("failed to parse hook JSON")?
91    };
92
93    // Determine event type:
94    // 1. Use explicit CLI argument if provided
95    // 2. Otherwise extract from JSON payload's "type" field (Codex CLI format)
96    // 3. Fall back to "Unknown" if neither available
97    let event_type_str = if let Some(ref et) = actual_event_type {
98        et.clone()
99    } else {
100        // Extract from JSON "type" field (Codex CLI format)
101        hook_data
102            .get("type")
103            .and_then(|v| v.as_str())
104            .map_or_else(|| "Unknown".to_string(), String::from)
105    };
106
107    // Map the event type using the adapter (handles framework-specific event names)
108    let event_type: EventType = adapter.map_event_type(&event_type_str);
109
110    // Use adapter to parse hook input into normalized fields
111    let parsed = adapter.parse_hook_input(&event_type_str, &hook_data);
112
113    // Extract session_id (required field, default to "unknown")
114    let session_id = parsed
115        .session_id
116        .clone()
117        .unwrap_or_else(|| "unknown".to_string());
118
119    // Find framework process for SessionStart OR frameworks without SessionStart events.
120    // This walks up the process tree to find the actual framework process PID.
121    //
122    // Note: For Cursor, the PID is captured for display purposes but is NOT used for
123    // liveness detection (handled in builder.rs). Cursor is an IDE where multiple
124    // conversation sessions share the same process, so PID-based liveness doesn't work.
125    //
126    // Codex and Cursor don't have SessionStart events, so we capture the process info
127    // on every event to ensure we get the PID on the first event.
128    let process_info =
129        if event_type == EventType::SessionStart || matches!(adapter.name(), "codex" | "cursor") {
130            find_claude_process()
131        } else {
132            None
133        };
134
135    // Capture PID - use cached process info for SessionStart, parent PID otherwise
136    let pid = process_info
137        .as_ref()
138        .map(|info| info.pid)
139        .or_else(get_parent_pid);
140
141    // Capture process start time for PID identity validation.
142    // This creates a globally unique (pid, start_time) identifier that:
143    // - Prevents false positives from PID reuse
144    // - Handles late mi6 installation (process running before mi6 started)
145    let process_start_time = pid.and_then(prock::get_start_time);
146
147    // Build payload: merge hook JSON with environment variables and process info
148    let payload = build_payload(hook_data, &event_type, process_info.as_ref())?;
149
150    // Load config and get machine_id
151    let config = Config::load().unwrap_or_default();
152    let machine_id = config.machine_id();
153
154    // Get timestamp for both git context upsert and event
155    let now = Utc::now();
156    let timestamp_ms = now.timestamp_millis();
157
158    // Capture git context BEFORE inserting the event.
159    // This ensures github_repo is set before branch parsing, so that when
160    // issue/PR numbers are extracted from the branch name, they're associated
161    // with the correct repository. If the repo changes, issue/PR are cleared first.
162    upsert_git_context_if_available(
163        storage,
164        &session_id,
165        &machine_id,
166        adapter.name(),
167        timestamp_ms,
168        &parsed,
169    );
170
171    // Create and insert event
172    let mut builder = EventBuilder::new(&machine_id, event_type.clone(), session_id.clone())
173        .framework(adapter.name())
174        .timestamp(now)
175        .tool_use_id_opt(parsed.tool_use_id.clone())
176        .spawned_agent_id_opt(parsed.spawned_agent_id.clone())
177        .tool_name_opt(parsed.tool_name.clone())
178        .subagent_type_opt(parsed.subagent_type.clone())
179        .permission_mode_opt(parsed.permission_mode.clone())
180        .transcript_path_opt(parsed.transcript_path.clone())
181        .model_opt(parsed.model.clone())
182        .duration_ms_opt(parsed.duration_ms)
183        .pid_opt(pid)
184        .process_start_time_opt(process_start_time)
185        .cwd_opt(parsed.cwd.clone())
186        .payload(payload)
187        .source("hook");
188
189    // Add token fields if present (for OpenCode ApiRequest events)
190    if let (Some(input), Some(output)) = (parsed.tokens_input, parsed.tokens_output) {
191        builder = builder.tokens(input, output);
192    }
193    if let (Some(read), Some(write)) = (parsed.tokens_cache_read, parsed.tokens_cache_write) {
194        builder = builder.cache_tokens(read, write);
195    }
196    if let Some(cost) = parsed.cost_usd {
197        builder = builder.cost(cost);
198    }
199
200    let event = builder.build();
201
202    storage.insert(&event).context("failed to insert event")?;
203
204    // Capture git branch info for relevant event types (branch changes, SessionStart)
205    capture_git_branch_if_needed(storage, &event_type, &session_id, &parsed);
206
207    // Opportunistic GC (~1.2% of calls: 3/256)
208    if rand::random::<u8>() < 3 {
209        let _ = storage.gc(config.history_duration());
210    }
211
212    // Output hook response for frameworks with blocking hooks (e.g., Cursor)
213    // This must be written to stdout for the framework to receive it
214    if let Some(response) = adapter.hook_response(&event_type_str) {
215        println!("{}", response);
216    }
217
218    // Return info for optional transcript scanning
219    Ok(LogResult {
220        transcript_path: parsed.transcript_path.clone(),
221        machine_id,
222        session_id,
223    })
224}
225
226/// Build the payload JSON by merging hook data with environment variables
227fn build_payload(
228    mut hook_data: serde_json::Value,
229    event_type: &EventType,
230    process_info: Option<&ClaudeProcessInfo>,
231) -> Result<String> {
232    // Environment variables to capture
233    let env_vars = [
234        ("CLAUDE_PROJECT_DIR", "project_dir"),
235        ("CLAUDE_FILE_PATHS", "file_paths"),
236        ("CLAUDE_TOOL_INPUT", "tool_input_env"),
237        ("CLAUDE_TOOL_OUTPUT", "tool_output_env"),
238        ("CLAUDE_NOTIFICATION", "notification_env"),
239        ("CLAUDE_CODE_REMOTE", "remote"),
240        ("CLAUDE_ENV_FILE", "env_file"),
241    ];
242
243    // Ensure we have an object to work with
244    if !hook_data.is_object() {
245        hook_data = serde_json::json!({ "_raw": hook_data });
246    }
247
248    let Some(obj) = hook_data.as_object_mut() else {
249        // We just ensured it's an object above, so this is unreachable
250        return Ok(serde_json::to_string(&hook_data)?);
251    };
252
253    // Add environment variables to the payload
254    let mut env_data: HashMap<String, String> = HashMap::new();
255    for (env_var, key) in env_vars {
256        if let Ok(value) = std::env::var(env_var) {
257            env_data.insert(key.to_string(), value);
258        }
259    }
260
261    if !env_data.is_empty() {
262        obj.insert("_env".to_string(), serde_json::to_value(env_data)?);
263    }
264
265    // For SessionStart, add process info to payload (using cached info)
266    if *event_type == EventType::SessionStart
267        && let Some(info) = process_info
268    {
269        obj.insert(
270            "_claude_process".to_string(),
271            serde_json::json!({
272                "pid": info.pid,
273                "comm": info.comm
274            }),
275        );
276    }
277
278    Ok(serde_json::to_string(&hook_data)?)
279}
280
281/// Upsert git context (local_git_dir and github_repo) for the session.
282///
283/// This is called BEFORE the event is inserted to ensure git context is set
284/// before branch parsing. When github_repo changes, issue/PR numbers are cleared
285/// because they're only meaningful in the context of a specific repository.
286///
287/// The function is designed to be fast (~50µs with direct file access) and never fail -
288/// errors are silently ignored since git info is supplementary data.
289fn upsert_git_context_if_available<S: Storage>(
290    storage: &S,
291    session_id: &str,
292    machine_id: &str,
293    framework: &str,
294    timestamp_ms: i64,
295    parsed: &ParsedHookInput,
296) {
297    if let Some(ref cwd) = parsed.cwd {
298        let cwd_path = Path::new(cwd);
299
300        // Capture git directory path and GitHub repo
301        let local_git_dir = get_local_git_dir(cwd_path);
302        let github_repo = get_github_repo(cwd_path);
303
304        if local_git_dir.is_some() || github_repo.is_some() {
305            let _ = storage.upsert_session_git_context(
306                session_id,
307                machine_id,
308                framework,
309                timestamp_ms,
310                local_git_dir.as_deref(),
311                github_repo.as_deref(),
312            );
313        }
314    }
315}
316
317/// Capture git branch info for specific event types.
318///
319/// Handles:
320/// - SessionStart: Captures initial git branch
321/// - PostToolUse (Bash): Detects git branch-changing commands and updates branch info
322///
323/// Branch parsing extracts issue/PR numbers from branch names like:
324/// - `feature/issue-123` -> github_issue: 123
325/// - `fix/GH-456` -> github_issue: 456
326/// - `pr-789` -> github_pr: 789
327fn capture_git_branch_if_needed<S: Storage>(
328    storage: &S,
329    event_type: &EventType,
330    session_id: &str,
331    parsed: &ParsedHookInput,
332) {
333    match event_type {
334        EventType::SessionStart => {
335            if let Some(ref cwd) = parsed.cwd {
336                let cwd_path = Path::new(cwd);
337
338                // Capture initial git branch from session's cwd
339                if let Some(git_info) = get_branch_info(cwd_path) {
340                    let _ = storage.update_session_git_info(session_id, &git_info);
341                }
342            }
343        }
344        EventType::PostToolUse => {
345            // Check if this is a Bash tool call with a git branch-changing command
346            if parsed.tool_name.as_deref() == Some("Bash") {
347                // Get command from CLAUDE_TOOL_INPUT environment variable
348                if let Ok(tool_input) = std::env::var("CLAUDE_TOOL_INPUT")
349                    && let Some(cmd) = extract_bash_command(&tool_input)
350                    && is_branch_changing_command(&cmd)
351                    && let Some(ref cwd) = parsed.cwd
352                    && let Some(git_info) = get_branch_info(Path::new(cwd))
353                {
354                    let _ = storage.update_session_git_info(session_id, &git_info);
355                }
356            }
357        }
358        _ => {}
359    }
360}
361
362/// Extract the bash command from tool_input.
363///
364/// The tool_input can be either:
365/// - Raw command string
366/// - JSON object with a "command" field
367fn extract_bash_command(tool_input: &str) -> Option<String> {
368    // First try to parse as JSON
369    if let Ok(json) = serde_json::from_str::<serde_json::Value>(tool_input)
370        && let Some(cmd) = json.get("command").and_then(|v| v.as_str())
371    {
372        return Some(cmd.to_string());
373    }
374    // Otherwise, assume it's a raw command
375    Some(tool_input.to_string())
376}