mi6_cli/commands/
log.rs

1use std::collections::HashMap;
2use std::io::Read;
3use std::path::Path;
4
5use anyhow::{Context, Result};
6use chrono::Utc;
7use mi6_core::{
8    Config, EventBuilder, EventType, FrameworkAdapter, FrameworkProcessInfo, ParsedHookInput,
9    Storage, default_adapter, detect_all_frameworks, detect_framework, find_framework_process,
10    get_adapter, get_branch_info, get_github_repo, get_local_git_dir, is_branch_changing_command,
11};
12
13/// Result of running the log command.
14pub struct LogResult {
15    /// Transcript path to scan, if available
16    pub transcript_path: Option<String>,
17    /// Machine ID for transcript parsing
18    pub machine_id: String,
19    /// Session ID for potential backfill operations
20    pub session_id: String,
21}
22
23/// Run the log command. This must be fast (<10ms) and never block the calling framework.
24///
25/// Returns information needed for optional transcript scanning.
26pub fn run_log<S: Storage>(
27    storage: &S,
28    event_type_arg: Option<String>,
29    json_payload: Option<String>,
30    framework_name: Option<String>,
31) -> Result<LogResult> {
32    // Determine the framework adapter:
33    // 1. Use explicit --framework flag if provided
34    // 2. Otherwise, try to auto-detect from environment variables
35    // 3. Fall back to default (claude)
36    let adapter: &dyn FrameworkAdapter = if let Some(ref name) = framework_name {
37        get_adapter(name).ok_or_else(|| anyhow::anyhow!("unknown framework: {}", name))?
38    } else {
39        // Warn if multiple frameworks detected (to stderr, doesn't affect hook)
40        let detected = detect_all_frameworks();
41        if detected.len() > 1 {
42            let names: Vec<_> = detected.iter().map(|a| a.name()).collect();
43            eprintln!(
44                "mi6: warning: multiple frameworks detected ({}), using {}",
45                names.join(", "),
46                detected[0].name()
47            );
48        }
49        detect_framework().unwrap_or_else(default_adapter)
50    };
51
52    // Handle argument parsing:
53    // - Claude/Gemini: `mi6 ingest event SessionStart` with JSON on stdin
54    // - Codex: `mi6 ingest event --framework codex '{"type":"agent-turn-complete",...}'`
55    //
56    // Codex passes JSON as the only positional arg, which clap captures as event_type_arg.
57    // We detect this by checking if event_type_arg looks like JSON (starts with '{').
58    let (actual_event_type, json_str) = if let Some(ref arg) = event_type_arg {
59        if arg.trim().starts_with('{') {
60            // First positional arg is JSON (Codex format) - extract event type from it
61            (None, arg.clone())
62        } else {
63            // First positional arg is event type (Claude/Gemini format)
64            let json = if let Some(payload) = json_payload {
65                payload
66            } else {
67                let mut stdin_data = String::new();
68                std::io::stdin()
69                    .read_to_string(&mut stdin_data)
70                    .context("failed to read stdin")?;
71                stdin_data
72            };
73            (Some(arg.clone()), json)
74        }
75    } else {
76        // No positional args - read JSON from stdin
77        let mut stdin_data = String::new();
78        std::io::stdin()
79            .read_to_string(&mut stdin_data)
80            .context("failed to read stdin")?;
81        (None, stdin_data)
82    };
83
84    // Parse the hook JSON
85    let hook_data: serde_json::Value = if json_str.trim().is_empty() {
86        serde_json::json!({})
87    } else {
88        serde_json::from_str(&json_str).context("failed to parse hook JSON")?
89    };
90
91    // Determine event type:
92    // 1. Use explicit CLI argument if provided
93    // 2. Otherwise extract from JSON payload's "type" field (Codex CLI format)
94    // 3. Fall back to "Unknown" if neither available
95    let event_type_str = if let Some(ref et) = actual_event_type {
96        et.clone()
97    } else {
98        // Extract from JSON "type" field (Codex CLI format)
99        hook_data
100            .get("type")
101            .and_then(|v| v.as_str())
102            .map_or_else(|| "Unknown".to_string(), String::from)
103    };
104
105    // Map the event type using the adapter (handles framework-specific event names)
106    let event_type: EventType = adapter.map_event_type(&event_type_str);
107
108    // Use adapter to parse hook input into normalized fields
109    let parsed = adapter.parse_hook_input(&event_type_str, &hook_data);
110
111    // Extract session_id (required field, default to "unknown")
112    let session_id = parsed
113        .session_id
114        .clone()
115        .unwrap_or_else(|| "unknown".to_string());
116
117    // Find framework process by walking up the process tree.
118    // This correctly finds Claude's PID even on Linux where bash doesn't exec into the command,
119    // creating an intermediate shell process between Claude and mi6.
120    //
121    // We capture PID on every event (not just SessionStart) to handle session continuations
122    // where a dead session is revived with `--continue` and gets a new PID.
123    //
124    // Note: For Cursor, the PID is captured for display purposes but is NOT used for
125    // liveness detection (handled in builder.rs). Cursor is an IDE where multiple
126    // conversation sessions share the same process, so PID-based liveness doesn't work.
127    let process_info = find_framework_process();
128    let pid = process_info.as_ref().map(|info| info.pid);
129
130    // Capture process start time for PID identity validation.
131    // This creates a globally unique (pid, start_time) identifier that:
132    // - Prevents false positives from PID reuse
133    // - Handles late mi6 installation (process running before mi6 started)
134    let process_start_time = pid.and_then(prock::get_start_time);
135
136    // Build payload: merge hook JSON with environment variables and process info
137    let payload = build_payload(hook_data, &event_type, process_info.as_ref())?;
138
139    // Load config and get machine_id
140    let config = Config::load().unwrap_or_default();
141    let machine_id = config.machine_id();
142
143    // Get timestamp for both git context upsert and event
144    let now = Utc::now();
145    let timestamp_ms = now.timestamp_millis();
146
147    // Capture git context BEFORE inserting the event.
148    // This ensures github_repo is set before branch parsing, so that when
149    // issue/PR numbers are extracted from the branch name, they're associated
150    // with the correct repository. If the repo changes, issue/PR are cleared first.
151    upsert_git_context_if_available(
152        storage,
153        &session_id,
154        &machine_id,
155        adapter.name(),
156        timestamp_ms,
157        &parsed,
158    );
159
160    // Create and insert event
161    let mut builder = EventBuilder::new(&machine_id, event_type.clone(), session_id.clone())
162        .framework(adapter.name())
163        .timestamp(now)
164        .tool_use_id_opt(parsed.tool_use_id.clone())
165        .spawned_agent_id_opt(parsed.spawned_agent_id.clone())
166        .tool_name_opt(parsed.tool_name.clone())
167        .subagent_type_opt(parsed.subagent_type.clone())
168        .permission_mode_opt(parsed.permission_mode.clone())
169        .transcript_path_opt(parsed.transcript_path.clone())
170        .model_opt(parsed.model.clone())
171        .duration_ms_opt(parsed.duration_ms)
172        .pid_opt(pid)
173        .process_start_time_opt(process_start_time)
174        .cwd_opt(parsed.cwd.clone())
175        .payload(payload)
176        .source("hook");
177
178    // Add token fields if present (for OpenCode ApiRequest events)
179    if let (Some(input), Some(output)) = (parsed.tokens_input, parsed.tokens_output) {
180        builder = builder.tokens(input, output);
181    }
182    if let (Some(read), Some(write)) = (parsed.tokens_cache_read, parsed.tokens_cache_write) {
183        builder = builder.cache_tokens(read, write);
184    }
185    if let Some(cost) = parsed.cost_usd {
186        builder = builder.cost(cost);
187    }
188
189    let event = builder.build();
190
191    storage.insert(&event).context("failed to insert event")?;
192
193    // Capture git branch info for relevant event types (branch changes, SessionStart)
194    capture_git_branch_if_needed(storage, &event_type, &session_id, &parsed);
195
196    // Opportunistic GC (~1.2% of calls: 3/256)
197    if rand::random::<u8>() < 3 {
198        let _ = storage.gc(config.history_duration());
199    }
200
201    // Output hook response for frameworks with blocking hooks (e.g., Cursor)
202    // This must be written to stdout for the framework to receive it
203    if let Some(response) = adapter.hook_response(&event_type_str) {
204        println!("{}", response);
205    }
206
207    // Return info for optional transcript scanning
208    Ok(LogResult {
209        transcript_path: parsed.transcript_path.clone(),
210        machine_id,
211        session_id,
212    })
213}
214
215/// Build the payload JSON by merging hook data with environment variables
216fn build_payload(
217    mut hook_data: serde_json::Value,
218    event_type: &EventType,
219    process_info: Option<&FrameworkProcessInfo>,
220) -> Result<String> {
221    // Environment variables to capture
222    let env_vars = [
223        ("CLAUDE_PROJECT_DIR", "project_dir"),
224        ("CLAUDE_FILE_PATHS", "file_paths"),
225        ("CLAUDE_TOOL_INPUT", "tool_input_env"),
226        ("CLAUDE_TOOL_OUTPUT", "tool_output_env"),
227        ("CLAUDE_NOTIFICATION", "notification_env"),
228        ("CLAUDE_CODE_REMOTE", "remote"),
229        ("CLAUDE_ENV_FILE", "env_file"),
230    ];
231
232    // Ensure we have an object to work with
233    if !hook_data.is_object() {
234        hook_data = serde_json::json!({ "_raw": hook_data });
235    }
236
237    let Some(obj) = hook_data.as_object_mut() else {
238        // We just ensured it's an object above, so this is unreachable
239        return Ok(serde_json::to_string(&hook_data)?);
240    };
241
242    // Add environment variables to the payload
243    let mut env_data: HashMap<String, String> = HashMap::new();
244    for (env_var, key) in env_vars {
245        if let Ok(value) = std::env::var(env_var) {
246            env_data.insert(key.to_string(), value);
247        }
248    }
249
250    if !env_data.is_empty() {
251        obj.insert("_env".to_string(), serde_json::to_value(env_data)?);
252    }
253
254    // For SessionStart, add process info to payload (using cached info)
255    if *event_type == EventType::SessionStart
256        && let Some(info) = process_info
257    {
258        obj.insert(
259            "_claude_process".to_string(),
260            serde_json::json!({
261                "pid": info.pid,
262                "comm": info.comm
263            }),
264        );
265    }
266
267    Ok(serde_json::to_string(&hook_data)?)
268}
269
270/// Upsert git context (local_git_dir and github_repo) for the session.
271///
272/// This is called BEFORE the event is inserted to ensure git context is set
273/// before branch parsing. When github_repo changes, issue/PR numbers are cleared
274/// because they're only meaningful in the context of a specific repository.
275///
276/// The function is designed to be fast (~50µs with direct file access) and never fail -
277/// errors are silently ignored since git info is supplementary data.
278fn upsert_git_context_if_available<S: Storage>(
279    storage: &S,
280    session_id: &str,
281    machine_id: &str,
282    framework: &str,
283    timestamp_ms: i64,
284    parsed: &ParsedHookInput,
285) {
286    if let Some(ref cwd) = parsed.cwd {
287        let cwd_path = Path::new(cwd);
288
289        // Capture git directory path and GitHub repo
290        let local_git_dir = get_local_git_dir(cwd_path);
291        let github_repo = get_github_repo(cwd_path);
292
293        if local_git_dir.is_some() || github_repo.is_some() {
294            let _ = storage.upsert_session_git_context(
295                session_id,
296                machine_id,
297                framework,
298                timestamp_ms,
299                local_git_dir.as_deref(),
300                github_repo.as_deref(),
301            );
302        }
303    }
304}
305
306/// Capture git branch info for specific event types.
307///
308/// Handles:
309/// - SessionStart: Captures initial git branch
310/// - PostToolUse (Bash): Detects git branch-changing commands and updates branch info
311///
312/// Branch parsing extracts issue/PR numbers from branch names like:
313/// - `feature/issue-123` -> github_issue: 123
314/// - `fix/GH-456` -> github_issue: 456
315/// - `pr-789` -> github_pr: 789
316fn capture_git_branch_if_needed<S: Storage>(
317    storage: &S,
318    event_type: &EventType,
319    session_id: &str,
320    parsed: &ParsedHookInput,
321) {
322    match event_type {
323        EventType::SessionStart => {
324            if let Some(ref cwd) = parsed.cwd {
325                let cwd_path = Path::new(cwd);
326
327                // Capture initial git branch from session's cwd
328                if let Some(git_info) = get_branch_info(cwd_path) {
329                    let _ = storage.update_session_git_info(session_id, &git_info);
330                }
331            }
332        }
333        EventType::PostToolUse => {
334            // Check if this is a Bash tool call with a git branch-changing command
335            if parsed.tool_name.as_deref() == Some("Bash") {
336                // Get command from CLAUDE_TOOL_INPUT environment variable
337                if let Ok(tool_input) = std::env::var("CLAUDE_TOOL_INPUT")
338                    && let Some(cmd) = extract_bash_command(&tool_input)
339                    && is_branch_changing_command(&cmd)
340                    && let Some(ref cwd) = parsed.cwd
341                    && let Some(git_info) = get_branch_info(Path::new(cwd))
342                {
343                    let _ = storage.update_session_git_info(session_id, &git_info);
344                }
345            }
346        }
347        _ => {}
348    }
349}
350
351/// Extract the bash command from tool_input.
352///
353/// The tool_input can be either:
354/// - Raw command string
355/// - JSON object with a "command" field
356fn extract_bash_command(tool_input: &str) -> Option<String> {
357    // First try to parse as JSON
358    if let Ok(json) = serde_json::from_str::<serde_json::Value>(tool_input)
359        && let Some(cmd) = json.get("command").and_then(|v| v.as_str())
360    {
361        return Some(cmd.to_string());
362    }
363    // Otherwise, assume it's a raw command
364    Some(tool_input.to_string())
365}