hematite/tools/
file_ops.rs

1use serde_json::Value;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6use walkdir::WalkDir;
7
8// ── Ghost Ledger ──────────────────────────────────────────────────────────────
9
10const MAX_GHOST_BACKUPS: usize = 8;
11
12fn prune_ghost_backups(ghost_dir: &Path) {
13    let Ok(entries) = fs::read_dir(ghost_dir) else {
14        return;
15    };
16
17    let mut backups: Vec<_> = entries
18        .filter_map(Result::ok)
19        .filter(|entry| {
20            entry
21                .path()
22                .extension()
23                .and_then(|ext| ext.to_str())
24                .map(|ext| ext.eq_ignore_ascii_case("bak"))
25                .unwrap_or(false)
26        })
27        .collect();
28
29    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
30    backups.reverse();
31
32    let retained: std::collections::HashSet<String> = backups
33        .iter()
34        .take(MAX_GHOST_BACKUPS)
35        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
36        .collect();
37
38    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
39        let _ = fs::remove_file(entry.path());
40    }
41
42    let ledger_path = ghost_dir.join("ledger.txt");
43    let Ok(content) = fs::read_to_string(&ledger_path) else {
44        return;
45    };
46
47    let filtered_lines: Vec<String> = content
48        .lines()
49        .filter_map(|line| {
50            let parts: Vec<&str> = line.splitn(2, '|').collect();
51            if parts.len() != 2 {
52                return None;
53            }
54
55            let backup_path = parts[1].replace('\\', "/");
56            if retained.contains(&backup_path) {
57                Some(line.to_string())
58            } else {
59                None
60            }
61        })
62        .collect();
63
64    let rewritten = if filtered_lines.is_empty() {
65        String::new()
66    } else {
67        filtered_lines.join("\n") + "\n"
68    };
69    let _ = fs::write(ledger_path, rewritten);
70}
71
72fn save_ghost_backup(target_path: &str, content: &str) {
73    let ws = workspace_root();
74
75    // Phase 1: Try Git Ghost Snapshot
76    if crate::agent::git::is_git_repo(&ws) {
77        let _ = crate::agent::git::create_ghost_snapshot(&ws);
78    }
79
80    // Phase 2: Fallback to local file backup (Ghost Ledger)
81    let ghost_dir = ws.join(".hematite").join("ghost");
82    let _ = fs::create_dir_all(&ghost_dir);
83    let ts = std::time::SystemTime::now()
84        .duration_since(std::time::UNIX_EPOCH)
85        .unwrap()
86        .as_millis();
87    let safe_name = Path::new(target_path)
88        .file_name()
89        .unwrap_or_default()
90        .to_string_lossy();
91    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
92
93    if fs::write(&backup_file, content).is_ok() {
94        use std::io::Write;
95        if let Ok(mut f) = fs::OpenOptions::new()
96            .create(true)
97            .append(true)
98            .open(ghost_dir.join("ledger.txt"))
99        {
100            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
101        }
102        prune_ghost_backups(&ghost_dir);
103    }
104}
105
106pub fn pop_ghost_ledger() -> Result<String, String> {
107    let ws = workspace_root();
108    let ghost_dir = ws.join(".hematite").join("ghost");
109    let ledger_path = ghost_dir.join("ledger.txt");
110
111    if !ledger_path.exists() {
112        return Err("Ghost Ledger is empty — no edits to undo".into());
113    }
114
115    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
116    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
117
118    if lines.is_empty() {
119        return Err("Ghost Ledger is empty".into());
120    }
121
122    let last_line = lines.pop().unwrap();
123    let parts: Vec<&str> = last_line.splitn(2, '|').collect();
124    if parts.len() != 2 {
125        return Err("Corrupted ledger entry".into());
126    }
127
128    let target_path = parts[0];
129    let backup_path = parts[1];
130
131    // Priority 1: Try Git Rollback
132    if crate::agent::git::is_git_repo(&ws) {
133        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
134            let _ = fs::remove_file(backup_path);
135            let new_ledger = lines.join("\n");
136            let _ = fs::write(
137                &ledger_path,
138                if new_ledger.is_empty() {
139                    String::new()
140                } else {
141                    new_ledger + "\n"
142                },
143            );
144            return Ok(msg);
145        }
146    }
147
148    // Priority 2: Standard File Rollback
149    let original_content =
150        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
151    let abs_target = ws.join(target_path);
152    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
153
154    let new_ledger = lines.join("\n");
155    let _ = fs::write(
156        &ledger_path,
157        if new_ledger.is_empty() {
158            String::new()
159        } else {
160            new_ledger + "\n"
161        },
162    );
163    let _ = fs::remove_file(backup_path);
164
165    Ok(format!("Restored {} from Ghost Ledger", target_path))
166}
167
168// ── read_file ─────────────────────────────────────────────────────────────────
169
170pub async fn read_file(args: &Value) -> Result<String, String> {
171    let path = require_str(args, "path")?;
172    let offset = get_usize_arg(args, "offset");
173    let limit = get_usize_arg(args, "limit");
174
175    let abs = safe_path(path)?;
176    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
177
178    let lines: Vec<&str> = raw.lines().collect();
179    let total = lines.len();
180    let start = offset.unwrap_or(0).min(total);
181    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
182
183    let mut content = lines[start..end].join("\n");
184    if end < total {
185        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
186        content.push_str(&format!("This file has {} more lines below. ", total - end));
187        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
188                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
189    }
190
191    Ok(format!(
192        "[{path}  lines {}-{} of {}]\n{}",
193        start + 1,
194        end,
195        total,
196        content
197    ))
198}
199
200// ── inspect_lines ─────────────────────────────────────────────────────────────
201
202pub async fn inspect_lines(args: &Value) -> Result<String, String> {
203    let path = require_str(args, "path")?;
204    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
205    let end_line = get_usize_arg(args, "end_line");
206
207    let abs = safe_path(path)?;
208    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
209
210    let lines: Vec<&str> = raw.lines().collect();
211    let total = lines.len();
212
213    let start = start_line.saturating_sub(1).min(total);
214    let end = end_line.unwrap_or(total).min(total);
215
216    if start >= end && total > 0 {
217        return Err(format!(
218            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
219            end_line.unwrap_or(total)
220        ));
221    }
222
223    let mut output = format!(
224        "[inspect_lines: {path} lines {}-{} of {}]\n",
225        start + 1,
226        end,
227        total
228    );
229    for i in start..end {
230        output.push_str(&format!("[{:>4}] | {}\n", i + 1, lines[i]));
231    }
232
233    Ok(output)
234}
235
236// ── tail_file ─────────────────────────────────────────────────────────────────
237
238pub async fn tail_file(args: &Value) -> Result<String, String> {
239    let path = require_str(args, "path")?;
240    let n = args
241        .get("lines")
242        .and_then(|v| v.as_u64())
243        .unwrap_or(50)
244        .min(500) as usize;
245    let grep_pat = args.get("grep").and_then(|v| v.as_str());
246
247    let abs = safe_path(path)?;
248    let raw = fs::read_to_string(&abs).map_err(|e| format!("tail_file: {e} ({path})"))?;
249
250    let all_lines: Vec<&str> = raw.lines().collect();
251    let total = all_lines.len();
252
253    // Apply optional grep filter before slicing — model asks for the last N
254    // matching lines, not the last N lines containing maybe 0 matches.
255    let filtered: Vec<(usize, &str)> = if let Some(pat) = grep_pat {
256        let re = regex::Regex::new(pat)
257            .map_err(|e| format!("tail_file: invalid grep pattern '{pat}': {e}"))?;
258        all_lines
259            .iter()
260            .enumerate()
261            .filter(|(_, l)| re.is_match(l))
262            .map(|(i, l)| (i, *l))
263            .collect()
264    } else {
265        all_lines.iter().enumerate().map(|(i, l)| (i, *l)).collect()
266    };
267
268    let total_filtered = filtered.len();
269    let skip = total_filtered.saturating_sub(n);
270    let window = &filtered[skip..];
271
272    if window.is_empty() {
273        let note = if grep_pat.is_some() {
274            format!(" matching '{}'", grep_pat.unwrap())
275        } else {
276            String::new()
277        };
278        return Ok(format!(
279            "[tail_file: {path} — no lines{note} found (total {total} lines)]"
280        ));
281    }
282
283    let first_abs = window[0].0 + 1;
284    let last_abs = window[window.len() - 1].0 + 1;
285    let mut out = format!(
286        "[tail_file: {path} — lines {first_abs}–{last_abs} of {total} (last {n} of {total_filtered} matched)]\n"
287    );
288    for (abs_idx, line) in window {
289        out.push_str(&format!("[{:>5}] {}\n", abs_idx + 1, line));
290    }
291
292    Ok(out)
293}
294
295// ── write_file ────────────────────────────────────────────────────────────────
296
297pub async fn write_file(args: &Value) -> Result<String, String> {
298    let path = require_str(args, "path")?;
299    let content = require_str(args, "content")?;
300
301    let abs = safe_path_allow_new(path)?;
302    if let Some(parent) = abs.parent() {
303        fs::create_dir_all(parent)
304            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
305    }
306
307    let existed = abs.exists();
308    if existed {
309        if let Ok(orig) = fs::read_to_string(&abs) {
310            save_ghost_backup(path, &orig);
311        }
312    }
313
314    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
315
316    let action = if existed { "Updated" } else { "Created" };
317    Ok(format!("{action} {path}  ({} bytes)", content.len()))
318}
319
320// ── edit_file ─────────────────────────────────────────────────────────────────
321
322pub async fn edit_file(args: &Value) -> Result<String, String> {
323    let path = require_str(args, "path")?;
324    let search = require_str(args, "search")?;
325    let replace = require_str(args, "replace")?;
326    let replace_all = args
327        .get("replace_all")
328        .and_then(|v| v.as_bool())
329        .unwrap_or(false);
330
331    if search == replace {
332        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
333    }
334
335    let abs = safe_path(path)?;
336    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
337    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
338    let original = raw.replace("\r\n", "\n");
339
340    save_ghost_backup(path, &original);
341
342    let search_trimmed = search.trim();
343    let search_non_ws_len = search_trimmed
344        .chars()
345        .filter(|c| !c.is_whitespace())
346        .count();
347    let search_line_count = search_trimmed.lines().count();
348    if search_non_ws_len < 12 && search_line_count <= 1 {
349        return Err(format!(
350            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
351             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
352        ));
353    }
354
355    // ── Exact match first ────────────────────────────────────────────────────
356    let (effective_search, was_repaired) = if original.contains(search) {
357        let exact_match_count = original.matches(search).count();
358        if exact_match_count > 1 && !replace_all {
359            return Err(format!(
360                "edit_file: search string matched {} times in {path}.\n\
361                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
362                exact_match_count
363            ));
364        }
365        (search.to_string(), false)
366    } else {
367        // ── Fuzzy repair: progressive normalisation ───────────────────────
368        // Level 1: rstrip only — preserves indentation, strips trailing spaces.
369        // Level 2: full strip — corrects indentation mismatches.
370        // Level 3: cross-file hint — tells the model which file has the string.
371        let span =
372            rstrip_find_span(&original, search).or_else(|| fuzzy_find_span(&original, search));
373        match span {
374            Some(span) => {
375                let real_slice = original[span.clone()].to_string();
376                (real_slice, true)
377            }
378            None => {
379                let hint = nearest_lines(&original, search);
380                let cross_hint = find_search_in_workspace(search, path)
381                    .map(|found| format!("\nNote: search string found in '{found}' — did you mean to edit that file?"))
382                    .unwrap_or_default();
383                return Err(format!(
384                    "edit_file: search string not found in {path}.\n\
385                     The 'search' value must match the file content exactly \
386                     (including whitespace/indentation).\n\
387                     {hint}{cross_hint}"
388                ));
389            }
390        }
391    };
392
393    // When a fuzzy match was used, adjust the replace string's indentation to
394    // match the file's actual indent level (not the model's potentially-wrong indent).
395    let effective_replace = if was_repaired {
396        adjust_replace_indent(search, effective_search.as_str(), replace)
397    } else {
398        replace.to_string()
399    };
400
401    let updated = if replace_all {
402        original.replace(effective_search.as_str(), effective_replace.as_str())
403    } else {
404        original.replacen(effective_search.as_str(), effective_replace.as_str(), 1)
405    };
406
407    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
408
409    let removed = original.lines().count();
410    let added = updated.lines().count();
411    let repair_note = if was_repaired {
412        "  [indent auto-corrected]"
413    } else {
414        ""
415    };
416
417    let mut diff_block = String::new();
418    diff_block.push_str("\n--- DIFF \n");
419    for line in effective_search.lines() {
420        diff_block.push_str(&format!("- {}\n", line));
421    }
422    for line in effective_replace.lines() {
423        diff_block.push_str(&format!("+ {}\n", line));
424    }
425
426    Ok(format!(
427        "Edited {path}  ({} -> {} lines){repair_note}{}",
428        removed, added, diff_block
429    ))
430}
431
432// ── patch_hunk ────────────────────────────────────────────────────────────────
433
434pub async fn patch_hunk(args: &Value) -> Result<String, String> {
435    let path = require_str(args, "path")?;
436    let start_line = require_usize(args, "start_line")?;
437    let end_line = require_usize(args, "end_line")?;
438    let replacement = require_str(args, "replacement")?;
439
440    let abs = safe_path(path)?;
441    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
442
443    save_ghost_backup(path, &original);
444
445    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
446    let total = lines.len();
447
448    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
449        return Err(format!(
450            "patch_hunk: invalid line range {}-{} for file with {} lines",
451            start_line, end_line, total
452        ));
453    }
454
455    let mut updated_lines = Vec::new();
456    // 0-indexed adjustment
457    let s_idx = start_line - 1;
458    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
459
460    // 1. Lines before the hunk
461    updated_lines.extend_from_slice(&lines[0..s_idx]);
462
463    // 2. The hunk replacement
464    for line in replacement.lines() {
465        updated_lines.push(line.to_string());
466    }
467
468    // 3. Lines after the hunk
469    if e_idx < total {
470        updated_lines.extend_from_slice(&lines[e_idx..total]);
471    }
472
473    let updated_content = updated_lines.join("\n");
474    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
475
476    let mut diff = String::new();
477    diff.push_str("\n--- HUNK DIFF ---\n");
478    for i in s_idx..e_idx {
479        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
480    }
481    for line in replacement.lines() {
482        diff.push_str(&format!("+ {}\n", line.trim_end()));
483    }
484
485    Ok(format!(
486        "Patched {path} lines {}-{} ({} -> {} lines){}",
487        start_line,
488        end_line,
489        (e_idx - s_idx),
490        replacement.lines().count(),
491        diff
492    ))
493}
494
495// ── multi_search_replace ──────────────────────────────────────────────────────
496
497#[derive(serde::Deserialize)]
498struct SearchReplaceHunk {
499    search: String,
500    replace: String,
501}
502
503pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
504    let path = require_str(args, "path")?;
505    let hunks_val = args
506        .get("hunks")
507        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
508
509    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
510        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
511
512    if hunks.is_empty() {
513        return Err("multi_search_replace: hunks array is empty".to_string());
514    }
515
516    let abs = safe_path(path)?;
517    let raw =
518        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
519    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
520    let original = raw.replace("\r\n", "\n");
521
522    save_ghost_backup(path, &original);
523
524    let mut current_content = original.clone();
525    let mut diff = String::new();
526    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
527
528    let mut patched_hunks = 0;
529
530    for (i, hunk) in hunks.iter().enumerate() {
531        let match_count = current_content.matches(&hunk.search).count();
532
533        let (effective_search, effective_replace) = if match_count == 1 {
534            // Exact match — use as-is.
535            (hunk.search.clone(), hunk.replace.clone())
536        } else if match_count == 0 {
537            // Progressive fuzzy fallback: rstrip → full-strip.
538            let span = rstrip_find_span(&current_content, &hunk.search)
539                .or_else(|| fuzzy_find_span(&current_content, &hunk.search));
540            match span {
541                Some(span) => {
542                    let real_slice = current_content[span].to_string();
543                    let adjusted_replace =
544                        adjust_replace_indent(&hunk.search, &real_slice, &hunk.replace);
545                    (real_slice, adjusted_replace)
546                }
547                None => {
548                    return Err(format!(
549                        "multi_search_replace: hunk {} search string not found in file.",
550                        i
551                    ));
552                }
553            }
554        } else {
555            return Err(format!(
556                "multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.",
557                i, match_count
558            ));
559        };
560
561        diff.push_str(&format!("\n@@ Hunk {} @@\n", i + 1));
562        for line in effective_search.lines() {
563            diff.push_str(&format!("- {}\n", line.trim_end()));
564        }
565        for line in effective_replace.lines() {
566            diff.push_str(&format!("+ {}\n", line.trim_end()));
567        }
568
569        current_content = current_content.replacen(&effective_search, &effective_replace, 1);
570        patched_hunks += 1;
571    }
572
573    fs::write(&abs, &current_content)
574        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
575
576    Ok(format!(
577        "Modified {} hunks in {} using exact search-and-replace.{}",
578        patched_hunks, path, diff
579    ))
580}
581
582// ── list_files ────────────────────────────────────────────────────────────────
583
584pub async fn list_files(args: &Value) -> Result<String, String> {
585    let started = Instant::now();
586    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
587    let ext_filter = args.get("extension").and_then(|v| v.as_str());
588
589    let base = safe_path(base_str)?;
590
591    let mut files: Vec<PathBuf> = Vec::new();
592    let mut scanned_count = 0;
593    for entry in WalkDir::new(&base).follow_links(false) {
594        scanned_count += 1;
595        if scanned_count > 25_000 {
596            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
597        }
598        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
599        if !entry.file_type().is_file() {
600            continue;
601        }
602        let p = entry.path();
603
604        // Skip hidden dirs / target / node_modules
605        if path_has_hidden_segment(p) {
606            continue;
607        }
608
609        if let Some(ext) = ext_filter {
610            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
611                continue;
612            }
613        }
614        files.push(p.to_path_buf());
615    }
616
617    // Sort by modification time (newest first).
618    files.sort_by_key(|p| {
619        fs::metadata(p)
620            .and_then(|m| m.modified())
621            .ok()
622            .map(std::cmp::Reverse)
623    });
624
625    let total = files.len();
626    const LIMIT: usize = 200;
627    let truncated = total > LIMIT;
628    let shown: Vec<String> = files
629        .into_iter()
630        .take(LIMIT)
631        .map(|p| p.display().to_string())
632        .collect();
633
634    let ms = started.elapsed().as_millis();
635    let mut out = format!(
636        "{} file(s) in {}  ({ms}ms){}",
637        total.min(LIMIT),
638        base_str,
639        if truncated {
640            "  [truncated at 200]"
641        } else {
642            ""
643        }
644    );
645    out.push('\n');
646    out.push_str(&shown.join("\n"));
647    Ok(out)
648}
649
650// ── grep_files ────────────────────────────────────────────────────────────────
651
652pub async fn grep_files(args: &Value) -> Result<String, String> {
653    let pattern = require_str(args, "pattern")?;
654    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
655    let ext_filter = args.get("extension").and_then(|v| v.as_str());
656    let case_insensitive = args
657        .get("case_insensitive")
658        .and_then(|v| v.as_bool())
659        .unwrap_or(true);
660    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
661    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
662    let offset = get_usize_arg(args, "offset").unwrap_or(0);
663
664    // Context lines: `context` sets both before+after; `before`/`after` override individually.
665    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
666    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
667    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
668
669    let base = safe_path(base_str)?;
670
671    let regex = regex::RegexBuilder::new(pattern)
672        .case_insensitive(case_insensitive)
673        .build()
674        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
675
676    // ── files_only mode ───────────────────────────────────────────────────────
677    if files_only {
678        let mut matched_files: Vec<String> = Vec::new();
679        let mut scanned_count = 0;
680
681        for entry in WalkDir::new(&base).follow_links(false) {
682            scanned_count += 1;
683            if scanned_count > 25_000 {
684                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
685            }
686            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
687            if !entry.file_type().is_file() {
688                continue;
689            }
690            let p = entry.path();
691            if path_has_hidden_segment(p) {
692                continue;
693            }
694            if let Some(ext) = ext_filter {
695                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
696                    continue;
697                }
698            }
699            let Ok(contents) = fs::read_to_string(p) else {
700                continue;
701            };
702            if contents.lines().any(|line| regex.is_match(line)) {
703                matched_files.push(p.display().to_string());
704            }
705        }
706
707        if matched_files.is_empty() {
708            return Ok(format!("No files matching '{pattern}' in {base_str}"));
709        }
710
711        let total = matched_files.len();
712        let page: Vec<_> = matched_files
713            .into_iter()
714            .skip(offset)
715            .take(head_limit)
716            .collect();
717        let showing = page.len();
718        let mut out = format!("{total} file(s) match '{pattern}'");
719        if offset > 0 || showing < total {
720            out.push_str(&format!(
721                " [showing {}-{} of {total}]",
722                offset + 1,
723                offset + showing
724            ));
725        }
726        out.push('\n');
727        out.push_str(&page.join("\n"));
728        return Ok(out);
729    }
730
731    // ── content mode with optional context lines ──────────────────────────────
732
733    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
734    struct Hunk {
735        path: String,
736        /// (line_number_1_indexed, line_text, is_match)
737        lines: Vec<(usize, String, bool)>,
738    }
739
740    let mut hunks: Vec<Hunk> = Vec::new();
741    let mut total_matches = 0usize;
742    let mut files_matched = 0usize;
743    let mut scanned_count = 0;
744
745    for entry in WalkDir::new(&base).follow_links(false) {
746        scanned_count += 1;
747        if scanned_count > 25_000 {
748            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
749        }
750        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
751        if !entry.file_type().is_file() {
752            continue;
753        }
754        let p = entry.path();
755        if path_has_hidden_segment(p) {
756            continue;
757        }
758        if let Some(ext) = ext_filter {
759            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
760                continue;
761            }
762        }
763        let Ok(contents) = fs::read_to_string(p) else {
764            continue;
765        };
766        let all_lines: Vec<&str> = contents.lines().collect();
767        let n = all_lines.len();
768
769        // Find all match indices in this file.
770        let match_idxs: Vec<usize> = all_lines
771            .iter()
772            .enumerate()
773            .filter(|(_, line)| regex.is_match(line))
774            .map(|(i, _)| i)
775            .collect();
776
777        if match_idxs.is_empty() {
778            continue;
779        }
780        files_matched += 1;
781        total_matches += match_idxs.len();
782
783        // Merge overlapping ranges into hunks.
784        let path_str = p.display().to_string();
785        let mut ranges: Vec<(usize, usize)> = match_idxs
786            .iter()
787            .map(|&i| {
788                (
789                    i.saturating_sub(before),
790                    (i + after).min(n.saturating_sub(1)),
791                )
792            })
793            .collect();
794
795        // Sort and merge overlapping ranges.
796        ranges.sort_unstable();
797        let mut merged: Vec<(usize, usize)> = Vec::new();
798        for (s, e) in ranges {
799            if let Some(last) = merged.last_mut() {
800                if s <= last.1 + 1 {
801                    last.1 = last.1.max(e);
802                    continue;
803                }
804            }
805            merged.push((s, e));
806        }
807
808        // Build hunks from merged ranges.
809        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
810        for (start, end) in merged {
811            let mut hunk_lines = Vec::new();
812            for i in start..=end {
813                hunk_lines.push((i + 1, all_lines[i].to_string(), match_set.contains(&i)));
814            }
815            hunks.push(Hunk {
816                path: path_str.clone(),
817                lines: hunk_lines,
818            });
819        }
820    }
821
822    if hunks.is_empty() {
823        return Ok(format!("No matches for '{pattern}' in {base_str}"));
824    }
825
826    let total_hunks = hunks.len();
827    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
828    let showing = page_hunks.len();
829
830    let mut out =
831        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
832    if offset > 0 || showing < total_hunks {
833        out.push_str(&format!(
834            " [hunks {}-{} of {total_hunks}]",
835            offset + 1,
836            offset + showing
837        ));
838    }
839    out.push('\n');
840
841    for (i, hunk) in page_hunks.iter().enumerate() {
842        if i > 0 {
843            out.push_str("\n--\n");
844        }
845        for (lineno, text, is_match) in &hunk.lines {
846            if *is_match {
847                out.push_str(&format!("{}:{}:{}\n", hunk.path, lineno, text));
848            } else {
849                out.push_str(&format!("{}: {}-{}\n", hunk.path, lineno, text));
850            }
851        }
852    }
853
854    Ok(out.trim_end().to_string())
855}
856
857// ── Argument helpers ──────────────────────────────────────────────────────────
858
859fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
860    args.get(key)
861        .and_then(|v| v.as_str())
862        .ok_or_else(|| format!("Missing required argument: '{key}'"))
863}
864
865fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
866    args.get(key).and_then(value_as_usize)
867}
868
869fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
870    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
871}
872
873fn value_as_usize(value: &Value) -> Option<usize> {
874    if let Some(v) = value.as_u64() {
875        return usize::try_from(v).ok();
876    }
877
878    if let Some(v) = value.as_i64() {
879        return if v >= 0 {
880            usize::try_from(v as u64).ok()
881        } else {
882            None
883        };
884    }
885
886    if let Some(v) = value.as_f64() {
887        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
888            return Some(v as usize);
889        }
890        return None;
891    }
892
893    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
894}
895
896// ── Path helpers ──────────────────────────────────────────────────────────────
897
898/// Resolve a path that must already exist, and check it's inside the workspace.
899fn safe_path(path: &str) -> Result<PathBuf, String> {
900    let candidate = resolve_candidate(path);
901    canonicalize_safe(&candidate, path)
902}
903
904/// Resolve a path that may not exist yet (for write_file).
905fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
906    let candidate = resolve_candidate(path);
907
908    // Try canonical first.
909    if let Ok(abs) = candidate.canonicalize() {
910        check_workspace_bounds(&abs, path)?;
911        return Ok(abs);
912    }
913
914    // File doesn't exist yet — canonicalize the parent, append the filename.
915    let parent = candidate.parent().unwrap_or(Path::new("."));
916    let name = candidate
917        .file_name()
918        .ok_or_else(|| format!("invalid path: {path}"))?;
919    let abs_parent = parent
920        .canonicalize()
921        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
922    let abs = abs_parent.join(name);
923    check_workspace_bounds(&abs, path)?;
924    Ok(abs)
925}
926
927fn resolve_candidate(path: &str) -> PathBuf {
928    let p = Path::new(path);
929    if p.is_absolute() {
930        p.to_path_buf()
931    } else {
932        std::env::current_dir()
933            .unwrap_or_else(|_| PathBuf::from("."))
934            .join(p)
935    }
936}
937
938fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
939    let abs = candidate
940        .canonicalize()
941        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
942    check_workspace_bounds(&abs, original)?;
943    Ok(abs)
944}
945
946fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
947    // Delegate to the existing guard for blacklist + traversal checks.
948    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
949    super::guard::path_is_safe(&workspace, abs)
950        .map(|_| ())
951        .map_err(|e| format!("file access denied for '{original}': {e}"))
952}
953
954/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
955fn path_has_hidden_segment(p: &Path) -> bool {
956    p.components().any(|c| {
957        let s = c.as_os_str().to_string_lossy();
958        if s == ".hematite" || s == ".git" || s == "." || s == ".." {
959            return false;
960        }
961        s.starts_with('.') || s == "target" || s == "node_modules" || s == "__pycache__"
962    })
963}
964
965/// Show the lines nearest to where the search string *almost* matched,
966/// so the model can see the real indentation/content and self-correct.
967fn nearest_lines(content: &str, search: &str) -> String {
968    // Try to find the best-matching line by the first non-empty search line.
969    let first_search_line = search
970        .lines()
971        .map(|l| l.trim())
972        .find(|l| !l.is_empty())
973        .unwrap_or("");
974
975    let lines: Vec<&str> = content.lines().collect();
976    if lines.is_empty() {
977        return "(file is empty)".into();
978    }
979
980    // Find the line in the file that contains the most chars from the search line.
981    let best_idx = if first_search_line.is_empty() {
982        0
983    } else {
984        lines
985            .iter()
986            .enumerate()
987            .max_by_key(|(_, l)| {
988                let lt = l.trim();
989                // Score: length of longest common prefix after trimming.
990                first_search_line
991                    .chars()
992                    .zip(lt.chars())
993                    .take_while(|(a, b)| a == b)
994                    .count()
995            })
996            .map(|(i, _)| i)
997            .unwrap_or(0)
998    };
999
1000    let start = best_idx.saturating_sub(3);
1001    let end = (best_idx + 5).min(lines.len());
1002    let snippet = lines[start..end]
1003        .iter()
1004        .enumerate()
1005        .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
1006        .collect::<Vec<_>>()
1007        .join("\n");
1008
1009    format!(
1010        "Nearest matching lines ({}:{}):\n{}",
1011        best_idx + 1,
1012        end,
1013        snippet
1014    )
1015}
1016
1017/// Core span-mapping logic shared by both fuzzy match levels.
1018/// Given a normalisation function, finds `search` inside `content` after
1019/// applying that function to both, then maps the result back to a byte
1020/// range in the original (un-normalised) `content`.
1021fn find_span_normalised(
1022    content: &str,
1023    search: &str,
1024    normalise: impl Fn(&str) -> String,
1025) -> Option<std::ops::Range<usize>> {
1026    let norm_content = normalise(content);
1027    let norm_search = normalise(search)
1028        .trim_start_matches('\n')
1029        .trim_end_matches('\n')
1030        .to_string();
1031
1032    if norm_search.is_empty() {
1033        return None;
1034    }
1035
1036    let norm_pos = norm_content.find(&norm_search)?;
1037
1038    let lines_before = norm_content[..norm_pos]
1039        .as_bytes()
1040        .iter()
1041        .filter(|&&b| b == b'\n')
1042        .count();
1043    let search_lines = norm_search
1044        .as_bytes()
1045        .iter()
1046        .filter(|&&b| b == b'\n')
1047        .count()
1048        + 1;
1049
1050    let orig_lines: Vec<&str> = content.lines().collect();
1051
1052    let mut current_pos = 0;
1053    for i in 0..lines_before {
1054        if i < orig_lines.len() {
1055            current_pos += orig_lines[i].len() + 1;
1056        }
1057    }
1058    let byte_start = current_pos;
1059
1060    let mut byte_len = 0;
1061    for i in 0..search_lines {
1062        let idx = lines_before + i;
1063        if idx < orig_lines.len() {
1064            byte_len += orig_lines[idx].len();
1065            if i < search_lines - 1 {
1066                byte_len += 1;
1067            }
1068        }
1069    }
1070
1071    if byte_start + byte_len > content.len() {
1072        return None;
1073    }
1074
1075    let candidate = &content[byte_start..byte_start + byte_len];
1076    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
1077        Some(byte_start..byte_start + byte_len)
1078    } else {
1079        None
1080    }
1081}
1082
1083/// Level 1 fuzzy: rstrip only — removes trailing whitespace per line but
1084/// preserves leading indentation. Catches trailing-space mismatches where
1085/// the model's indentation is actually correct.
1086fn rstrip_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1087    find_span_normalised(content, search, |s| {
1088        s.lines()
1089            .map(|l| l.trim_end())
1090            .collect::<Vec<_>>()
1091            .join("\n")
1092    })
1093}
1094
1095/// Level 2 fuzzy: full strip — trims all leading and trailing whitespace
1096/// per line. Catches indentation mismatches where the model wrote the
1097/// correct content but with wrong indent level.
1098fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1099    find_span_normalised(content, search, |s| {
1100        s.lines().map(|l| l.trim()).collect::<Vec<_>>().join("\n")
1101    })
1102}
1103
1104/// Scan source files in the workspace for a search string that failed to
1105/// match in the intended target file. Returns the first file path where
1106/// the string is found (after CRLF normalisation), capped at 100 files.
1107/// Used to generate a "did you mean this file?" hint in edit errors.
1108fn find_search_in_workspace(search: &str, skip_path: &str) -> Option<String> {
1109    let root = workspace_root();
1110    let norm_search = search.replace("\r\n", "\n");
1111    let mut checked = 0usize;
1112
1113    let walker = ignore::WalkBuilder::new(&root)
1114        .hidden(true)
1115        .ignore(true)
1116        .git_ignore(true)
1117        .build();
1118
1119    for entry in walker.flatten() {
1120        if checked >= 100 {
1121            break;
1122        }
1123        let path = entry.path();
1124        if !path.is_file() {
1125            continue;
1126        }
1127        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1128        if !matches!(
1129            ext,
1130            "rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h"
1131        ) {
1132            continue;
1133        }
1134        let rel = path
1135            .strip_prefix(&root)
1136            .unwrap_or(path)
1137            .to_string_lossy()
1138            .replace('\\', "/");
1139        if rel == skip_path {
1140            continue;
1141        }
1142        checked += 1;
1143        if let Ok(content) = std::fs::read_to_string(path) {
1144            let normalised = content.replace("\r\n", "\n");
1145            if normalised.contains(&norm_search) {
1146                return Some(rel);
1147            }
1148        }
1149    }
1150    None
1151}
1152
1153// ── Indent-aware replacement ──────────────────────────────────────────────────
1154
1155/// When the model's search string has different indentation than the actual file
1156/// content (fuzzy match succeeded), apply the same indentation delta to the
1157/// replace string so the replacement lands with correct indentation.
1158///
1159/// Example: model wrote search/replace with 0-space indent, file uses 8 spaces.
1160/// Delta = +8. Every line of replace gets 8 spaces prepended.
1161fn adjust_replace_indent(search: &str, file_span: &str, replace: &str) -> String {
1162    fn first_indent(s: &str) -> usize {
1163        s.lines()
1164            .find(|l| !l.trim().is_empty())
1165            .map(|l| l.len() - l.trim_start_matches(' ').len())
1166            .unwrap_or(0)
1167    }
1168
1169    let search_indent = first_indent(search);
1170    let file_indent = first_indent(file_span);
1171
1172    if search_indent == file_indent {
1173        return replace.to_string();
1174    }
1175
1176    let delta: i64 = file_indent as i64 - search_indent as i64;
1177    let trailing_newline = replace.ends_with('\n');
1178
1179    let adjusted: Vec<String> = replace
1180        .lines()
1181        .map(|line| {
1182            if line.trim().is_empty() {
1183                // Preserve blank lines as-is
1184                line.to_string()
1185            } else {
1186                let current_indent = line.len() - line.trim_start_matches(' ').len();
1187                let new_indent = (current_indent as i64 + delta).max(0) as usize;
1188                format!("{}{}", " ".repeat(new_indent), line.trim_start_matches(' '))
1189            }
1190        })
1191        .collect();
1192
1193    let mut result = adjusted.join("\n");
1194    if trailing_newline {
1195        result.push('\n');
1196    }
1197    result
1198}
1199
1200// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
1201
1202/// Return a formatted diff string for an edit_file operation without applying it.
1203/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1204/// search string cannot be located (caller falls through to normal tool dispatch).
1205pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1206    let path = require_str(args, "path")?;
1207    let search = require_str(args, "search")?;
1208    let replace = require_str(args, "replace")?;
1209
1210    let abs = safe_path(path)?;
1211    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1212    let original = raw.replace("\r\n", "\n");
1213
1214    let (effective_search, effective_replace): (String, String) = if original.contains(search) {
1215        (search.to_string(), replace.to_string())
1216    } else {
1217        let span =
1218            rstrip_find_span(&original, search).or_else(|| fuzzy_find_span(&original, search));
1219        match span {
1220            Some(span) => {
1221                let real_slice = original[span].to_string();
1222                let adjusted = adjust_replace_indent(search, &real_slice, replace);
1223                (real_slice, adjusted)
1224            }
1225            None => return Err("search string not found — diff preview unavailable".into()),
1226        }
1227    };
1228
1229    let mut diff = String::new();
1230    for line in effective_search.lines() {
1231        diff.push_str(&format!("- {}\n", line));
1232    }
1233    for line in effective_replace.lines() {
1234        diff.push_str(&format!("+ {}\n", line));
1235    }
1236    Ok(diff)
1237}
1238
1239/// Return a formatted diff string for a patch_hunk operation without applying it.
1240pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1241    let path = require_str(args, "path")?;
1242    let start_line = require_usize(args, "start_line")?;
1243    let end_line = require_usize(args, "end_line")?;
1244    let replacement = require_str(args, "replacement")?;
1245
1246    let abs = safe_path(path)?;
1247    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1248    let lines: Vec<&str> = original.lines().collect();
1249    let total = lines.len();
1250
1251    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1252        return Err(format!(
1253            "patch_hunk: invalid line range {}-{} for file with {} lines",
1254            start_line, end_line, total
1255        ));
1256    }
1257
1258    let s_idx = start_line - 1;
1259    let e_idx = end_line;
1260
1261    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1262    for i in s_idx..e_idx {
1263        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
1264    }
1265    for line in replacement.lines() {
1266        diff.push_str(&format!("+ {}\n", line.trim_end()));
1267    }
1268    Ok(diff)
1269}
1270
1271/// Return a formatted diff string for a multi_search_replace operation without applying it.
1272pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1273    let hunks_val = args
1274        .get("hunks")
1275        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1276
1277    #[derive(serde::Deserialize)]
1278    struct PreviewHunk {
1279        search: String,
1280        replace: String,
1281    }
1282    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1283        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1284
1285    let mut diff = String::new();
1286    for (i, hunk) in hunks.iter().enumerate() {
1287        if hunks.len() > 1 {
1288            diff.push_str(&format!("@@ hunk {} @@\n", i + 1));
1289        }
1290        for line in hunk.search.lines() {
1291            diff.push_str(&format!("- {}\n", line.trim_end()));
1292        }
1293        for line in hunk.replace.lines() {
1294            diff.push_str(&format!("+ {}\n", line.trim_end()));
1295        }
1296    }
1297    Ok(diff)
1298}
1299
1300/// Resolve the workspace root by looking upward for common markers.
1301pub fn workspace_root() -> PathBuf {
1302    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1303    loop {
1304        if current.join(".git").exists()
1305            || current.join("Cargo.toml").exists()
1306            || current.join("package.json").exists()
1307        {
1308            return current;
1309        }
1310        if !current.pop() {
1311            break;
1312        }
1313    }
1314    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1315}
1316
1317/// Returns true if the workspace root looks like a real project.
1318/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1319/// count — at least one explicit build/package marker must also be present.
1320pub fn is_project_workspace() -> bool {
1321    let root = workspace_root();
1322    let has_explicit_marker = root.join("Cargo.toml").exists()
1323        || root.join("package.json").exists()
1324        || root.join("pyproject.toml").exists()
1325        || root.join("go.mod").exists()
1326        || root.join("setup.py").exists()
1327        || root.join("pom.xml").exists()
1328        || root.join("build.gradle").exists()
1329        || root.join("CMakeLists.txt").exists();
1330    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1331}
1332
1333// ── open_in_system_editor ───────────────────────────────────────────────────
1334
1335pub fn open_in_system_editor(path: &std::path::Path) -> Result<(), String> {
1336    if !path.exists() {
1337        return Err(format!("File not found: {}", path.display()));
1338    }
1339
1340    #[cfg(target_os = "windows")]
1341    {
1342        // On Windows, 'start' is the most reliable way to open a file in the default associated app.
1343        // We use cmd /c start so it handles spaces and associations properly.
1344        let status = std::process::Command::new("cmd")
1345            .args(["/c", "start", "", &path.to_string_lossy()])
1346            .status()
1347            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1348
1349        if !status.success() {
1350            return Err("Editor command failed to start.".into());
1351        }
1352    }
1353
1354    #[cfg(target_os = "macos")]
1355    {
1356        let status = std::process::Command::new("open")
1357            .arg(path)
1358            .status()
1359            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1360
1361        if !status.success() {
1362            return Err("open command failed.".into());
1363        }
1364    }
1365
1366    #[cfg(all(unix, not(target_os = "macos")))]
1367    {
1368        // Try xdg-open on Linux
1369        let status = std::process::Command::new("xdg-open")
1370            .arg(path)
1371            .status()
1372            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1373
1374        if !status.success() {
1375            return Err("xdg-open failed.".into());
1376        }
1377    }
1378
1379    Ok(())
1380}
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs