use std::io::Read as _;
use std::path::Path;
#[cfg(unix)]
use std::path::PathBuf;
#[cfg(unix)]
use std::time::Duration;
use rag_rat_core::config::Config;
use rag_rat_core::index::{CloneCheckInput, IndexDatabase, TextCloneMatch};
use rag_rat_core::language::Language;
use rag_rat_core::locks;
use rag_rat_core::query::grep_augment;
use rag_rat_core::query::orientation::Orientation;
use rag_rat_core::storage::IndexConnection;
use serde::Deserialize;
const MAX_CLONE_CHECK_FUNCTIONS: u64 = 40_000;
const HOOK_NEAR_THRESHOLD: f64 = 0.85;
const MAX_CLONE_REFS: usize = 5;
#[cfg(unix)]
const SOCKET_BUDGET: Duration = Duration::from_millis(250);
#[derive(Debug, Default, Deserialize)]
pub struct HookInput {
#[serde(default)]
pub session_id: String,
#[serde(default)]
pub cwd: String,
pub hook_event_name: Option<String>,
pub source: Option<String>,
#[serde(default)]
pub tool_name: String,
#[serde(default)]
pub tool_input: serde_json::Value,
}
pub struct Search {
pub pattern: String,
pub search_path: Option<String>,
#[cfg_attr(not(unix), allow(dead_code))]
pub source: &'static str,
}
pub fn extract_search(input: &HookInput) -> Option<Search> {
match input.tool_name.as_str() {
"Grep" => {
let pattern = input.tool_input.get("pattern")?.as_str()?.to_string();
let search_path =
input.tool_input.get("path").and_then(|v| v.as_str()).map(str::to_string);
Some(Search { pattern, search_path, source: "grep_tool" })
},
"Bash" => {
let command = input.tool_input.get("command")?.as_str()?;
let (pattern, search_path) = parse_bash_search(command)?;
Some(Search { pattern, search_path, source: "bash" })
},
_ => None,
}
}
const SEARCH_COMMANDS: &[&str] = &["grep", "rg", "ag"];
const ARG_FLAGS: &[&str] = &[
"-A",
"-B",
"-C",
"-m",
"-g",
"-t",
"-T",
"-f",
"-M",
"--glob",
"--type",
"--type-not",
"--include",
"--exclude",
"--exclude-dir",
"--max-count",
"--max-depth",
"--context",
"--after-context",
"--before-context",
"--file",
"--ignore-file",
"--threads",
"--colors",
];
pub fn parse_bash_search(command: &str) -> Option<(String, Option<String>)> {
if command.contains('`') || command.contains("$(") {
return None; }
for (piped, segment) in split_top_level(command) {
let tokens = shell_tokens(&segment)?;
let mut tokens = tokens.as_slice();
while tokens.first().is_some_and(|t| t.contains('=') && !t.starts_with('-')) {
tokens = &tokens[1..];
}
let Some(command_word) = tokens.first() else { continue };
let base = command_word.rsplit('/').next().unwrap_or(command_word);
if base == "xargs" || base == "find" {
return None; }
if !SEARCH_COMMANDS.contains(&base) {
continue;
}
if piped {
continue;
}
let mut pattern: Option<String> = None;
let mut path: Option<String> = None;
let mut rest = tokens[1..].iter();
while let Some(token) = rest.next() {
if let Some(value) = token.strip_prefix("--regexp=") {
pattern.get_or_insert_with(|| value.to_string());
} else if token == "-e" || token == "--regexp" {
if let Some(value) = rest.next() {
pattern.get_or_insert_with(|| value.to_string());
}
} else if ARG_FLAGS.contains(&token.as_str()) {
rest.next(); } else if token.starts_with('-') && token.len() > 1 {
} else if pattern.is_none() {
pattern = Some(token.to_string());
} else if path.is_none() {
path = Some(token.to_string());
}
}
return pattern.map(|p| (p, path));
}
None
}
fn split_top_level(command: &str) -> Vec<(bool, String)> {
let mut segments = Vec::new();
let mut current = String::new();
let mut quote: Option<char> = None;
let mut piped = false;
let mut chars = command.chars().peekable();
while let Some(ch) = chars.next() {
match (quote, ch) {
(Some(q), c) if c == q => {
quote = None;
current.push(c);
},
(Some(_), c) => current.push(c),
(None, '\'' | '"') => {
quote = Some(ch);
current.push(ch);
},
(None, '|') => {
let next_piped = match chars.peek() {
Some('|') => {
chars.next();
false
},
Some('&') => {
chars.next();
true
},
_ => true,
};
segments.push((piped, std::mem::take(&mut current)));
piped = next_piped;
},
(None, ';') => {
segments.push((piped, std::mem::take(&mut current)));
piped = false;
},
(None, '&') => {
if chars.peek() == Some(&'&') {
chars.next();
}
segments.push((piped, std::mem::take(&mut current)));
piped = false;
},
(None, c) => current.push(c),
}
}
segments.push((piped, current));
segments
.into_iter()
.map(|(piped, s)| (piped, s.trim().to_string()))
.filter(|(_, s)| !s.is_empty() && !s.starts_with("cd ") && *s != "cd")
.collect()
}
fn shell_tokens(segment: &str) -> Option<Vec<String>> {
let mut tokens = Vec::new();
let mut current = String::new();
let mut quote: Option<char> = None;
let mut quoted = false;
for ch in segment.chars() {
match (quote, ch) {
(Some(q), c) if c == q => quote = None,
(Some(_), c) => current.push(c),
(None, '\'' | '"') => {
quote = Some(ch);
quoted = true;
},
(None, c) if c.is_whitespace() =>
if !current.is_empty() || quoted {
tokens.push(std::mem::take(&mut current));
quoted = false;
},
(None, c) => current.push(c),
}
}
if quote.is_some() {
return None;
}
if !current.is_empty() || quoted {
tokens.push(current);
}
Some(tokens)
}
pub fn run() -> anyhow::Result<()> {
let _ = run_inner(); Ok(())
}
fn run_inner() -> anyhow::Result<()> {
let mut raw = String::new();
std::io::stdin().read_to_string(&mut raw)?;
let input: HookInput = serde_json::from_str(&raw).unwrap_or_default();
match input.hook_event_name.as_deref() {
Some("SessionStart") => session_start(&input),
_ => pretooluse(&input),
}
}
fn session_start(input: &HookInput) -> anyhow::Result<()> {
match input.source.as_deref() {
Some("startup") | Some("clear") | Some("compact") => {},
_ => return Ok(()),
}
let Some(config) = find_config(Path::new(&input.cwd)) else { return Ok(()) };
if !config.database.is_file() {
print!("{}", db_absent_notice());
return Ok(());
}
let conn = IndexConnection::open_read_only(&config.database)?;
let o = rag_rat_core::query::orientation::orientation(
conn.connection(),
&config.root,
Path::new(&input.cwd),
)?;
let (live, enabled) = watcher_state(&config);
print!("{}", format_digest(&o, live, enabled));
if let Some(line) = version_check_line(&config) {
print!("{line}");
}
Ok(())
}
fn version_check_line(config: &Config) -> Option<String> {
let status =
rag_rat_core::version_check::cached_status(config.version_check.enabled, &config.database)?;
version_line(&status)
}
fn version_line(status: &rag_rat_core::version_check::VersionStatus) -> Option<String> {
let latest = status.latest_version.as_deref()?;
if status.update_available {
Some(format!(
"\n⚠ rag-rat update available: {} → {} — run `{}`\n",
status.current_version, latest, status.update_command
))
} else if latest == status.current_version {
Some(format!("\nrag-rat {} (latest on crates.io)\n", status.current_version))
} else {
Some(format!("\nrag-rat {} (ahead of crates.io latest {latest})\n", status.current_version))
}
}
fn pretooluse(input: &HookInput) -> anyhow::Result<()> {
if matches!(input.tool_name.as_str(), "Write" | "Edit" | "MultiEdit") {
return clone_check(input);
}
let Some(search) = extract_search(input) else { return Ok(()) };
let Some(config) = find_config(Path::new(&input.cwd)) else { return Ok(()) };
let context = ask_listener(&config, &input.session_id, &input.cwd, &search)
.unwrap_or_else(|| fallback_compose(&config, &input.cwd, &search));
if let Some(context) = context {
println!(
"{}",
serde_json::json!({
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "allow",
"additionalContext": context,
}
})
);
}
Ok(())
}
fn clone_check_skipped_for_size(indexed: bool, function_count: u64) -> bool {
!indexed && function_count > MAX_CLONE_CHECK_FUNCTIONS
}
fn clone_check(input: &HookInput) -> anyhow::Result<()> {
let Some(config) = find_config(Path::new(&input.cwd)) else { return Ok(()) };
if !config.database.is_file() {
return Ok(()); }
let Some(db) = IndexDatabase::try_open_config_read_only(&config)? else { return Ok(()) };
let indexed = db.clone_check_indexed_generation().unwrap_or(None).is_some();
if clone_check_skipped_for_size(indexed, db.clone_check_function_count().unwrap_or(u64::MAX)) {
return Ok(());
}
let inputs = extract_clone_inputs(input, &config.root);
if inputs.is_empty() {
return Ok(());
}
let matches = db.clones_of_texts(&inputs, HOOK_NEAR_THRESHOLD)?;
if let Some(context) = format_clone_warning(&matches) {
println!(
"{}",
serde_json::json!({
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "allow",
"additionalContext": context,
}
})
);
}
Ok(())
}
fn extract_clone_inputs(input: &HookInput, root: &Path) -> Vec<CloneCheckInput> {
let ti = &input.tool_input;
let Some(file_path) = ti.get("file_path").and_then(|v| v.as_str()) else { return Vec::new() };
let abs = Path::new(file_path);
let Some(language) = Language::from_path(abs) else { return Vec::new() };
let rel = abs.strip_prefix(root).unwrap_or(abs).to_path_buf();
let texts: Vec<String> = match input.tool_name.as_str() {
"Write" => ti
.get("content")
.and_then(|v| v.as_str())
.map(|s| vec![s.to_string()])
.unwrap_or_default(),
"Edit" => ti
.get("new_string")
.and_then(|v| v.as_str())
.map(|s| vec![s.to_string()])
.unwrap_or_default(),
"MultiEdit" => ti
.get("edits")
.and_then(|v| v.as_array())
.map(|edits| {
edits
.iter()
.filter_map(|e| {
e.get("new_string").and_then(|v| v.as_str()).map(str::to_string)
})
.collect()
})
.unwrap_or_default(),
_ => Vec::new(),
};
texts.into_iter().map(|text| CloneCheckInput { text, language, path: rel.clone() }).collect()
}
fn format_clone_warning(matches: &[TextCloneMatch]) -> Option<String> {
if matches.is_empty() {
return None;
}
let mut out = String::from(
"▶ rag-rat clone check — code you're writing duplicates existing functions:\n",
);
for m in matches {
let label = if m.kind == "exact" {
"identical to".to_string()
} else {
format!("~{:.0}% similar to", m.similarity * 100.0)
};
let shown = m.clone_of.iter().take(MAX_CLONE_REFS).cloned().collect::<Vec<_>>().join(", ");
let extra = m.clone_of.len().saturating_sub(MAX_CLONE_REFS);
let more = if extra > 0 { format!(" (+{extra} more)") } else { String::new() };
out.push_str(&format!(
" • `{}` (line {}) is {} {shown}{more}\n",
m.name, m.start_line, label,
));
}
out.push_str(
"Prefer reusing the existing function(s) over duplicating — impact_surface / \
symbol_lookup to inspect them.\n",
);
Some(out)
}
fn db_absent_notice() -> String {
format!("{}\nindex not built — run 'rag-rat index'\n", ATTRIBUTION_HEADER.trim_end())
}
pub fn watcher_state(config: &Config) -> (bool /* live */, bool /* enabled */) {
let enabled = config.watch.enabled && std::env::var_os("RAG_RAT_NO_WATCH").is_none();
let base_dir =
config.database.parent().map(Path::to_path_buf).unwrap_or_else(|| config.root.clone());
let election_path = locks::election_lock_path(&base_dir, &config.root);
let live = matches!(locks::FileLock::try_acquire(&election_path), Ok(None));
(live, enabled)
}
const ATTRIBUTION_HEADER: &str = "\
▶ rag-rat repo intelligence — injected by the rag-rat MCP server (prefer it over grep/cat)
concept → semantic_search · callers/callees → find_callers/trace_callees
before editing a symbol → impact_surface · exact symbol → symbol_lookup
why/rationale → repo memories ride along; memory_search to dig
";
pub fn short_path(p: &str) -> String {
let parts: Vec<&str> = p.splitn(4, '/').collect();
if parts.len() == 4 && parts[0] == "crates" && parts[2] == "src" {
parts[3].to_string()
} else {
p.to_string()
}
}
pub fn format_digest(o: &Orientation, live: bool, enabled: bool) -> String {
let mut out = String::with_capacity(2048);
out.push_str(ATTRIBUTION_HEADER);
out.push('\n');
if let Some(ref title) = o.tree.root_memory_title {
out.push_str(title);
out.push('\n');
}
out.push_str(&format!("LAYOUT ({} files · ‹…› = directory memory)\n", o.total_files));
for node in &o.tree.nodes {
let indent = " ".repeat(node.depth as usize);
if let Some(ref title) = node.memory_title {
out.push_str(&format!("{}{} ‹{}›\n", indent, node.label, title));
} else {
out.push_str(&format!("{}{}\n", indent, node.label));
}
}
if o.tree.truncated > 0 {
out.push_str(&format!(" … (+{} more)\n", o.tree.truncated));
}
if !o.load_bearing.is_empty() {
let parts: Vec<String> = o
.load_bearing
.iter()
.map(|(p, fi)| format!("{} (fan_in {})", short_path(p), fi))
.collect();
out.push_str(&format!("load-bearing: {}\n", parts.join(" · ")));
}
{
let mut line_parts: Vec<String> = Vec::new();
if !o.recent_commits.is_empty() {
line_parts.push(format!("recent: {}", o.recent_commits.join(" · ")));
}
if !o.hot_files.is_empty() {
let short_hot: Vec<String> = o.hot_files.iter().map(|p| short_path(p)).collect();
line_parts.push(format!("hot: {}", short_hot.join(", ")));
}
if !line_parts.is_empty() {
out.push_str(&format!("{}\n", line_parts.join(" · ")));
}
}
if !o.active_memory_titles.is_empty() {
let mut mem_line = o.active_memory_titles.join(" · ");
let extra = (o.active_memory_total as usize).saturating_sub(o.active_memory_titles.len());
if extra > 0 {
mem_line.push_str(&format!(" (+{extra} more)"));
}
out.push_str(&format!("memories: {mem_line}\n"));
}
let fresh = o.head == o.indexed_head || o.head.is_empty() || o.indexed_head.is_empty();
let health_status = match (live, enabled, fresh) {
(true, _, true) => "index fresh (watcher live)".to_string(),
(true, _, false) => "index syncing (watcher live)".to_string(),
(false, true, false) => "index stale — start the rag-rat MCP server".to_string(),
(false, false, false) => "watcher off; index stale — run 'rag-rat index'".to_string(),
_ => "index fresh".to_string(),
};
let active = o.anchor.current + o.anchor.relocated;
let mut health = format!("health: {} · memories {} active", health_status, active);
if o.anchor.stale > 0 {
health.push_str(&format!("/{} stale", o.anchor.stale));
}
if o.anchor.gone > 0 {
health.push_str(&format!(" · {} gone → run 'rag-rat memory doctor'", o.anchor.gone));
}
if o.parser_failures > 0 {
health.push_str(&format!(" · parser failures: {}", o.parser_failures));
}
out.push_str(&health);
out.push('\n');
out
}
fn find_config(start: &Path) -> Option<Config> {
let mut dir = Some(start);
while let Some(current) = dir {
let candidate = current.join("rag-rat.toml");
if candidate.is_file() {
return Config::load(&candidate).ok();
}
dir = current.parent();
}
None
}
fn ask_listener(
config: &Config,
session_id: &str,
cwd: &str,
search: &Search,
) -> Option<Option<String>> {
#[cfg(unix)]
{
use std::io::{BufRead, BufReader, Write as _};
use std::os::unix::net::UnixStream;
let socket = socket_path(config);
let stream = UnixStream::connect(&socket).ok()?;
stream.set_read_timeout(Some(SOCKET_BUDGET)).ok()?;
stream.set_write_timeout(Some(SOCKET_BUDGET)).ok()?;
let request = serde_json::json!({
"v": 1, "kind": "grep_augment", "session_id": session_id,
"cwd": cwd,
"pattern": search.pattern, "search_path": search.search_path,
"source": search.source,
});
let mut writer = stream.try_clone().ok()?;
writeln!(writer, "{request}").ok()?;
let mut line = String::new();
BufReader::new(stream).read_line(&mut line).ok()?;
let reply: serde_json::Value = serde_json::from_str(&line).ok()?;
if reply.get("v")?.as_u64()? != 1 {
return None;
}
Some(reply.get("context")?.as_str().map(str::to_string))
}
#[cfg(not(unix))]
{
let _ = (config, session_id, cwd, search);
None
}
}
#[cfg(unix)]
fn socket_path(config: &Config) -> PathBuf {
locks::hook_socket_path_for(config)
}
fn fallback_compose(config: &Config, cwd: &str, search: &Search) -> Option<String> {
let conn = IndexConnection::open_read_only(&config.database).ok()?;
rag_rat_core::index::install_worktree_scope_view(
conn.connection(),
&config.root,
Path::new(cwd),
)
.ok()?;
grep_augment::compose(
conn.connection(),
&search.pattern,
search.search_path.as_deref(),
&grep_augment::DedupeFilter::default(),
)
.ok()
.flatten()
.map(|out| out.context)
}
#[cfg(test)]
mod tests;