use std::io::{self, Write};
use std::process::ExitCode;
use super::session::{self, parse_duration_ago, AgentKind, ToolInput, ToolInvocation};
pub(crate) fn run(args: &[String]) -> anyhow::Result<ExitCode> {
if args.iter().any(|a| matches!(a.as_str(), "--help" | "-h")) {
print_help();
return Ok(ExitCode::SUCCESS);
}
let config = parse_args(args)?;
let providers = session::get_providers(config.agent_filter);
if providers.is_empty() {
println!("No AI agent sessions found.");
println!("hint: skim discover scans for Claude Code sessions in ~/.claude/projects/");
return Ok(ExitCode::SUCCESS);
}
let filter = session::TimeFilter {
since: config.since,
latest_only: config.session_latest,
};
let all_invocations = session::collect_invocations(&providers, &filter)?;
if all_invocations.is_empty() {
println!("No tool invocations found in the specified time window.");
return Ok(ExitCode::SUCCESS);
}
let analysis = analyze_invocations(&all_invocations, &config);
if config.json_output {
print_json_report(&analysis, &config)?;
} else {
print_text_report(&analysis, &config);
}
Ok(ExitCode::SUCCESS)
}
#[derive(Debug)]
struct DiscoverConfig {
since: Option<std::time::SystemTime>,
session_latest: bool,
agent_filter: Option<AgentKind>,
json_output: bool,
debug: bool,
}
fn parse_args(args: &[String]) -> anyhow::Result<DiscoverConfig> {
let mut config = DiscoverConfig {
since: Some(std::time::SystemTime::now() - std::time::Duration::from_secs(24 * 3600)),
session_latest: false,
agent_filter: None,
json_output: false,
debug: crate::debug::is_debug_enabled(),
};
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--since" => {
i += 1;
if i >= args.len() {
anyhow::bail!("--since requires a value (e.g., 7d, 24h, 1w)");
}
config.since = Some(parse_duration_ago(&args[i])?);
}
"--session" => {
i += 1;
if i >= args.len() {
anyhow::bail!("--session requires a value (e.g., latest)");
}
if args[i] == "latest" {
config.session_latest = true;
} else {
anyhow::bail!("--session only supports 'latest'");
}
}
"--agent" => {
i += 1;
if i >= args.len() {
anyhow::bail!("--agent requires a value (e.g., claude-code)");
}
config.agent_filter = Some(AgentKind::parse_cli_arg(&args[i])?);
}
"--json" => {
config.json_output = true;
}
"--debug" => {
config.debug = true;
crate::debug::force_enable_debug();
}
other => {
anyhow::bail!(
"unknown flag: '{other}'\n\nUsage: skim discover [--since <duration>] [--session latest] [--agent <name>] [--json] [--debug]"
);
}
}
i += 1;
}
Ok(config)
}
struct DiscoverAnalysis {
total_invocations: usize,
code_reads: Vec<CodeReadInfo>,
bash_commands: Vec<BashCommandInfo>,
total_read_tokens: usize,
potential_savings_tokens: usize,
non_rewritable_commands: Vec<(String, usize)>,
}
struct CodeReadInfo {
file_path: String,
#[allow(dead_code)]
result_bytes: usize,
result_tokens: usize,
could_skim: bool,
}
struct BashCommandInfo {
command: String,
has_rewrite: bool,
rewrite_target: Option<String>,
}
fn analyze_invocations(
invocations: &[ToolInvocation],
config: &DiscoverConfig,
) -> DiscoverAnalysis {
let mut code_reads = Vec::new();
let mut bash_commands = Vec::new();
let mut total_read_tokens = 0usize;
let mut potential_savings = 0usize;
let mut non_rewritable_counts: Option<std::collections::HashMap<String, usize>> =
if config.debug {
Some(std::collections::HashMap::new())
} else {
None
};
for inv in invocations {
match &inv.input {
ToolInput::Read { file_path } => {
let result_content = inv
.result
.as_ref()
.map(|r| r.content.as_str())
.unwrap_or_default();
let result_bytes = result_content.len();
let result_tokens = estimate_tokens(result_content);
total_read_tokens += result_tokens;
let could_skim = is_skimmable_file(file_path);
if could_skim && !result_content.is_empty() {
let estimated_savings = (result_tokens as f64 * 0.7) as usize;
potential_savings += estimated_savings;
}
code_reads.push(CodeReadInfo {
file_path: file_path.clone(),
result_bytes,
result_tokens,
could_skim,
});
}
ToolInput::Bash { command } => {
if let Some(info) = classify_bash_command(command) {
if let Some(ref mut counts) = non_rewritable_counts {
if !info.has_rewrite {
let prefix: String = command
.split_whitespace()
.take(3)
.collect::<Vec<_>>()
.join(" ");
if !prefix.is_empty() {
*counts.entry(prefix).or_insert(0) += 1;
}
}
}
bash_commands.push(info);
}
}
_ => {}
}
}
let mut non_rewritable_commands: Vec<(String, usize)> = non_rewritable_counts
.map(|m| m.into_iter().collect())
.unwrap_or_default();
non_rewritable_commands.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
DiscoverAnalysis {
total_invocations: invocations.len(),
code_reads,
bash_commands,
total_read_tokens,
potential_savings_tokens: potential_savings,
non_rewritable_commands,
}
}
fn classify_bash_command(command: &str) -> Option<BashCommandInfo> {
if command.starts_with("skim ") {
return None;
}
let rewrite_target = super::rewrite::would_rewrite(command);
let has_rewrite = rewrite_target.is_some();
Some(BashCommandInfo {
command: command.to_string(),
has_rewrite,
rewrite_target,
})
}
fn estimate_tokens(content: &str) -> usize {
crate::tokens::count_tokens(content).unwrap_or(content.len() / 4)
}
fn is_skimmable_file(path: &str) -> bool {
std::path::Path::new(path)
.extension()
.and_then(|ext| ext.to_str())
.and_then(rskim_core::Language::from_extension)
.is_some()
}
fn print_text_report(analysis: &DiscoverAnalysis, config: &DiscoverConfig) {
println!("skim discover -- optimization opportunities\n");
println!(
"Sessions scanned: found {} tool invocations",
analysis.total_invocations
);
println!();
print_code_reads_section(analysis);
print_commands_section(analysis, config.debug);
println!();
println!("hint: run `skim init` to install the PreToolUse hook for automatic optimization");
}
fn print_code_reads_section(analysis: &DiscoverAnalysis) {
let skimmable_count = analysis.code_reads.iter().filter(|r| r.could_skim).count();
let non_skimmable_count = analysis.code_reads.iter().filter(|r| !r.could_skim).count();
println!(
"Code Reads: {} total ({} skimmable, {} non-code)",
analysis.code_reads.len(),
skimmable_count,
non_skimmable_count,
);
if skimmable_count > 0 {
println!(" Tokens consumed: {}", analysis.total_read_tokens);
println!(
" Estimated savings with skim: ~{} tokens (~{:.0}%)",
analysis.potential_savings_tokens,
if analysis.total_read_tokens > 0 {
(analysis.potential_savings_tokens as f64 / analysis.total_read_tokens as f64)
* 100.0
} else {
0.0
},
);
println!();
let mut sorted_reads: Vec<_> = analysis
.code_reads
.iter()
.filter(|r| r.could_skim)
.collect();
sorted_reads.sort_by(|a, b| b.result_tokens.cmp(&a.result_tokens));
println!(" Top files by token count:");
for read in sorted_reads.iter().take(10) {
println!(" {} ({} tokens)", read.file_path, read.result_tokens);
}
}
println!();
}
fn print_commands_section(analysis: &DiscoverAnalysis, debug: bool) {
let rewritable_count = analysis
.bash_commands
.iter()
.filter(|c| c.has_rewrite)
.count();
println!(
"Commands: {} total ({} rewritable)",
analysis.bash_commands.len(),
rewritable_count,
);
if rewritable_count > 0 {
println!();
println!(" Rewritable commands:");
let mut seen = std::collections::HashSet::new();
for cmd in analysis.bash_commands.iter().filter(|c| c.has_rewrite) {
let prefix: String = cmd
.command
.split_whitespace()
.take(3)
.collect::<Vec<_>>()
.join(" ");
if seen.insert(prefix.clone()) {
println!(
" {} -> {}",
prefix,
cmd.rewrite_target.as_deref().unwrap_or("skim equivalent"),
);
}
}
}
print_debug_section(analysis, debug);
}
fn print_debug_section(analysis: &DiscoverAnalysis, debug: bool) {
if debug && !analysis.non_rewritable_commands.is_empty() {
println!();
println!(
" Non-rewritable commands ({}):",
analysis.non_rewritable_commands.len()
);
for (prefix, count) in &analysis.non_rewritable_commands {
println!(
" {} ({} occurrence{})",
prefix,
count,
if *count == 1 { "" } else { "s" }
);
}
}
}
fn print_json_report(analysis: &DiscoverAnalysis, config: &DiscoverConfig) -> anyhow::Result<()> {
let skimmable_reads: Vec<_> = analysis
.code_reads
.iter()
.filter(|r| r.could_skim)
.collect();
let rewritable: Vec<_> = analysis
.bash_commands
.iter()
.filter(|c| c.has_rewrite)
.collect();
let mut commands_obj = serde_json::json!({
"total": analysis.bash_commands.len(),
"rewritable": rewritable.len(),
"rewrites": rewritable.iter().filter_map(|c| {
c.rewrite_target.as_ref().map(|t| {
serde_json::json!({
"original": c.command,
"target": t,
})
})
}).collect::<Vec<_>>(),
});
if config.debug && !analysis.non_rewritable_commands.is_empty() {
let non_rewritable: Vec<_> = analysis
.non_rewritable_commands
.iter()
.map(|(prefix, count)| {
serde_json::json!({
"prefix": prefix,
"count": count,
})
})
.collect();
commands_obj["non_rewritable_commands"] = serde_json::json!(non_rewritable);
}
let report = serde_json::json!({
"version": 1,
"total_invocations": analysis.total_invocations,
"code_reads": {
"total": analysis.code_reads.len(),
"skimmable": skimmable_reads.len(),
"total_tokens": analysis.total_read_tokens,
"potential_savings_tokens": analysis.potential_savings_tokens,
"top_files": skimmable_reads.iter().take(10).map(|r| {
serde_json::json!({
"path": r.file_path,
"tokens": r.result_tokens,
})
}).collect::<Vec<_>>(),
},
"commands": commands_obj,
});
let stdout = io::stdout();
let mut handle = stdout.lock();
serde_json::to_writer_pretty(&mut handle, &report)?;
writeln!(handle)?;
Ok(())
}
fn print_help() {
println!("skim discover");
println!();
println!(" Identify missed optimization opportunities in AI agent sessions");
println!();
println!("Usage: skim discover [OPTIONS]");
println!();
println!("Options:");
println!(" --since <duration> Time window (e.g., 24h, 7d, 1w) [default: 24h]");
println!(" (24h default suits recent-session exploration;");
println!(" use --since 7d for broader analysis)");
println!(" --session latest Only scan the most recent session");
println!(" --agent <name> Only scan sessions from a specific agent");
println!(" --json Output machine-readable JSON");
println!(" --debug Show non-rewritable commands (also: SKIM_DEBUG=1)");
println!(" --help, -h Print help information");
println!();
println!("Supported agents:");
println!(" claude-code Claude Code (~/.claude/projects/)");
println!();
println!("Examples:");
println!(" skim discover Last 24h, all detected agents");
println!(" skim discover --since 7d Last 7 days");
println!(" skim discover --session latest Most recent session only");
println!(" skim discover --agent claude-code Only Claude Code sessions");
println!(" skim discover --json Machine-readable output");
}
// ============================================================================
// Clap command for completions
// ============================================================================
/// Build the clap [`Command`] for `skim discover` (used for shell completions).
///
/// SYNC NOTE: This must remain in sync with [`parse_args()`] — any flag added
/// here must also be handled in `parse_args`, and vice versa.
pub(super) fn command() -> clap::Command {
clap::Command::new("discover")
.about("Identify missed optimization opportunities in AI agent sessions")
.arg(
clap::Arg::new("since")
.long("since")
.value_name("DURATION")
.help("Time window (e.g., 24h, 7d, 1w) [default: 24h]"),
)
.arg(
clap::Arg::new("session")
.long("session")
.value_name("VALUE")
.help("Session filter (e.g., latest)"),
)
.arg(
clap::Arg::new("agent")
.long("agent")
.value_name("NAME")
.help("Only scan sessions from a specific agent"),
)
.arg(
clap::Arg::new("json")
.long("json")
.action(clap::ArgAction::SetTrue)
.help("Output machine-readable JSON"),
)
.arg(
clap::Arg::new("debug")
.long("debug")
.action(clap::ArgAction::SetTrue)
.help("Show non-rewritable commands (also: SKIM_DEBUG=1)"),
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cmd::rewrite::would_rewrite;
#[test]
fn test_parse_duration_ago_days() {
let result = parse_duration_ago("7d");
assert!(result.is_ok());
}
#[test]
fn test_parse_duration_ago_hours() {
let result = parse_duration_ago("24h");
assert!(result.is_ok());
}
#[test]
fn test_parse_duration_ago_weeks() {
let result = parse_duration_ago("1w");
assert!(result.is_ok());
}
#[test]
fn test_parse_duration_ago_invalid() {
assert!(parse_duration_ago("abc").is_err());
assert!(parse_duration_ago("7x").is_err());
assert!(parse_duration_ago("").is_err());
}
#[test]
fn test_is_skimmable_file() {
assert!(is_skimmable_file("/tmp/test.rs"));
assert!(is_skimmable_file("/tmp/test.ts"));
assert!(is_skimmable_file("/tmp/test.py"));
assert!(is_skimmable_file("/tmp/test.go"));
assert!(is_skimmable_file("/tmp/test.java"));
assert!(is_skimmable_file("/tmp/test.json"));
assert!(!is_skimmable_file("/tmp/test.txt"));
assert!(!is_skimmable_file("/tmp/test"));
assert!(!is_skimmable_file("/tmp/.env"));
}
/// Verify that `would_rewrite` (from the rewrite engine) correctly identifies
/// rewritable commands — replaces the deleted `check_has_rewrite` heuristic.
#[test]
fn test_would_rewrite_basic() {
// Rewritable commands
assert!(would_rewrite("cargo test").is_some());
assert!(would_rewrite("cargo clippy").is_some());
assert!(would_rewrite("cargo build").is_some());
assert!(would_rewrite("pytest").is_some());
assert!(would_rewrite("go test ./...").is_some());
assert!(would_rewrite("git status").is_some());
assert!(would_rewrite("git diff").is_some());
assert!(would_rewrite("tsc").is_some());
assert!(would_rewrite("cat file.rs").is_some());
// Non-rewritable commands
assert!(would_rewrite("cat file.txt").is_none());
assert!(would_rewrite("ls").is_none());
assert!(would_rewrite("echo hello").is_none());
assert!(would_rewrite("cargo run").is_none());
// Already a skim command
assert!(would_rewrite("skim test cargo").is_none());
}
#[test]
fn test_would_rewrite_lint_and_infra_tools() {
// lint tools
assert!(would_rewrite("prettier --check .").is_some());
assert!(would_rewrite("rustfmt --check src/main.rs").is_some());
assert!(would_rewrite("npx prettier --check .").is_some());
// infra tools
assert!(would_rewrite("gh pr list").is_some());
assert!(would_rewrite("gh issue list").is_some());
assert!(would_rewrite("gh run list").is_some());
assert!(would_rewrite("gh release list").is_some());
assert!(would_rewrite("aws s3 ls").is_some());
assert!(would_rewrite("curl https: assert!(would_rewrite("wget https://example.com/file").is_some());
assert!(would_rewrite("gh auth login").is_none());
}
#[test]
fn test_would_rewrite_targets() {
assert_eq!(
would_rewrite("cargo test"),
Some("skim test cargo".to_string())
);
assert_eq!(
would_rewrite("git status"),
Some("skim git status".to_string())
);
assert_eq!(
would_rewrite("cat file.rs"),
Some("skim file.rs --mode=pseudo".to_string())
);
assert_eq!(would_rewrite("ls"), None);
}
#[test]
fn test_would_rewrite_file_ops() {
assert!(would_rewrite("find . -name '*.rs'").is_some());
assert!(would_rewrite("tree src/").is_some());
assert!(would_rewrite("rg fn main src/").is_some());
assert!(would_rewrite("grep -r TODO src/").is_some());
assert!(would_rewrite("grep -rn TODO src/").is_some());
assert!(would_rewrite("grep TODO file.rs").is_none());
assert!(would_rewrite("ls -la").is_some());
assert!(would_rewrite("ls -R src/").is_some());
assert!(would_rewrite("ls").is_none());
}
#[test]
fn test_parse_args_defaults() {
let config = parse_args(&[]).unwrap();
assert!(config.since.is_some());
assert!(!config.session_latest);
assert!(config.agent_filter.is_none());
assert!(!config.json_output);
}
#[test]
fn test_parse_args_json() {
let config = parse_args(&["--json".to_string()]).unwrap();
assert!(config.json_output);
}
#[test]
fn test_parse_args_agent() {
let config = parse_args(&["--agent".to_string(), "claude-code".to_string()]).unwrap();
assert_eq!(config.agent_filter, Some(AgentKind::ClaudeCode));
}
#[test]
fn test_parse_args_unknown_agent() {
let result = parse_args(&["--agent".to_string(), "nonexistent".to_string()]);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("unknown agent"));
}
#[test]
fn test_parse_args_unknown_flag() {
let result = parse_args(&["--bogus".to_string()]);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("unknown flag"));
}
#[test]
fn test_parse_args_session_latest() {
let config = parse_args(&["--session".to_string(), "latest".to_string()]).unwrap();
assert!(config.session_latest);
}
#[test]
fn test_parse_args_since() {
let config = parse_args(&["--since".to_string(), "7d".to_string()]).unwrap();
assert!(config.since.is_some());
}
#[test]
fn test_classify_bash_skips_skim_commands() {
assert!(classify_bash_command("skim test cargo").is_none());
assert!(classify_bash_command("skim build clippy").is_none());
assert!(classify_bash_command("skim git status").is_none());
assert!(classify_bash_command("skim file.rs").is_none());
}
#[test]
fn test_classify_bash_rewritable_commands() {
let info = classify_bash_command("cargo test").unwrap();
assert!(info.has_rewrite);
assert!(info.rewrite_target.is_some());
assert_eq!(info.command, "cargo test");
let info = classify_bash_command("git status").unwrap();
assert!(info.has_rewrite);
assert!(info.rewrite_target.is_some());
let info = classify_bash_command("cat file.rs").unwrap();
assert!(info.has_rewrite);
assert!(info.rewrite_target.is_some());
}
#[test]
fn test_classify_bash_non_rewritable_commands() {
let info = classify_bash_command("ls").unwrap();
assert!(!info.has_rewrite);
assert!(info.rewrite_target.is_none());
assert_eq!(info.command, "ls");
let info = classify_bash_command("echo hello").unwrap();
assert!(!info.has_rewrite);
assert!(info.rewrite_target.is_none());
let info = classify_bash_command("node server.js").unwrap();
assert!(!info.has_rewrite);
assert!(info.rewrite_target.is_none());
}
fn default_config() -> DiscoverConfig {
DiscoverConfig {
since: None,
session_latest: false,
agent_filter: None,
json_output: false,
debug: false,
}
}
fn make_bash_invocation(command: &str) -> ToolInvocation {
ToolInvocation {
tool_name: "Bash".to_string(),
input: ToolInput::Bash {
command: command.to_string(),
},
timestamp: "2024-01-01T00:00:00Z".to_string(),
session_id: "sess1".to_string(),
agent: AgentKind::ClaudeCode,
result: Some(session::ToolResult {
content: "output".to_string(),
is_error: false,
}),
}
}
#[test]
fn test_analyze_excludes_already_rewritten_commands() {
let inv1 = make_bash_invocation("skim test cargo --nocapture");
let inv2 = make_bash_invocation("skim build clippy");
let inv3 = make_bash_invocation("cargo test"); let invocations = vec![inv1, inv2, inv3];
let config = default_config();
let analysis = analyze_invocations(&invocations, &config);
assert_eq!(analysis.bash_commands.len(), 1);
assert_eq!(analysis.bash_commands[0].command, "cargo test");
assert!(analysis.bash_commands[0].has_rewrite);
}
#[test]
fn test_analyze_counts_non_skim_commands() {
let inv1 = make_bash_invocation("ls -la");
let inv2 = make_bash_invocation("cargo test");
let invocations = vec![inv1, inv2];
let config = default_config();
let analysis = analyze_invocations(&invocations, &config);
assert_eq!(analysis.bash_commands.len(), 2);
}
#[test]
fn test_analyze_collects_non_rewritable_when_debug() {
let inv1 = make_bash_invocation("node server.js --port 3000");
let inv2 = make_bash_invocation("node server.js --port 4000");
let inv3 = make_bash_invocation("cargo run --bin myapp");
let invocations = vec![inv1, inv2, inv3];
let analysis = analyze_invocations(&invocations, &default_config());
assert!(analysis.non_rewritable_commands.is_empty());
let config_debug = DiscoverConfig {
debug: true,
..default_config()
};
let analysis = analyze_invocations(&invocations, &config_debug);
assert!(!analysis.non_rewritable_commands.is_empty());
let node_entry = analysis
.non_rewritable_commands
.iter()
.find(|(p, _)| p.starts_with("node server.js"));
assert!(node_entry.is_some());
assert_eq!(node_entry.unwrap().1, 2);
}
#[test]
fn test_parse_args_debug() {
let config = parse_args(&["--debug".to_string()]).unwrap();
assert!(config.debug);
crate::debug::reset_debug_for_tests();
}
#[test]
fn test_parse_args_and_command_are_in_sync() {
let cmd = command();
let all_args = [
"--since",
"7d",
"--agent",
"claude-code",
"--json",
"--debug",
"--session",
"latest",
];
cmd.clone()
.try_get_matches_from(std::iter::once("discover").chain(all_args.iter().copied()))
.expect("clap rejected flags that parse_args accepts — sync is broken");
let string_args: Vec<String> = all_args.iter().map(|s| s.to_string()).collect();
parse_args(&string_args)
.expect("parse_args rejected flags that clap accepts — sync is broken");
let matches = cmd
.try_get_matches_from(std::iter::once("discover").chain(all_args.iter().copied()))
.unwrap();
assert!(matches.get_flag("json"), "clap should see --json as true");
assert!(matches.get_flag("debug"), "clap should see --debug as true");
assert_eq!(
matches.get_one::<String>("since").map(|s| s.as_str()),
Some("7d"),
"clap --since value should be '7d'"
);
assert_eq!(
matches.get_one::<String>("agent").map(|s| s.as_str()),
Some("claude-code"),
"clap --agent value should be 'claude-code'"
);
assert_eq!(
matches.get_one::<String>("session").map(|s| s.as_str()),
Some("latest"),
"clap --session value should be 'latest'"
);
crate::debug::reset_debug_for_tests();
}
}