hematite/tools/
file_ops.rs

1use serde_json::Value;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6use walkdir::WalkDir;
7
8// ── Ghost Ledger ──────────────────────────────────────────────────────────────
9
10const MAX_GHOST_BACKUPS: usize = 8;
11
12fn prune_ghost_backups(ghost_dir: &Path) {
13    let Ok(entries) = fs::read_dir(ghost_dir) else {
14        return;
15    };
16
17    let mut backups: Vec<_> = entries
18        .filter_map(Result::ok)
19        .filter(|entry| {
20            entry
21                .path()
22                .extension()
23                .and_then(|ext| ext.to_str())
24                .map(|ext| ext.eq_ignore_ascii_case("bak"))
25                .unwrap_or(false)
26        })
27        .collect();
28
29    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
30    backups.reverse();
31
32    let retained: std::collections::HashSet<String> = backups
33        .iter()
34        .take(MAX_GHOST_BACKUPS)
35        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
36        .collect();
37
38    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
39        let _ = fs::remove_file(entry.path());
40    }
41
42    let ledger_path = ghost_dir.join("ledger.txt");
43    let Ok(content) = fs::read_to_string(&ledger_path) else {
44        return;
45    };
46
47    let filtered_lines: Vec<String> = content
48        .lines()
49        .filter_map(|line| {
50            let parts: Vec<&str> = line.splitn(2, '|').collect();
51            if parts.len() != 2 {
52                return None;
53            }
54
55            let backup_path = parts[1].replace('\\', "/");
56            if retained.contains(&backup_path) {
57                Some(line.to_string())
58            } else {
59                None
60            }
61        })
62        .collect();
63
64    let rewritten = if filtered_lines.is_empty() {
65        String::new()
66    } else {
67        filtered_lines.join("\n") + "\n"
68    };
69    let _ = fs::write(ledger_path, rewritten);
70}
71
72fn save_ghost_backup(target_path: &str, content: &str) {
73    let ws = workspace_root();
74
75    // Phase 1: Try Git Ghost Snapshot
76    if crate::agent::git::is_git_repo(&ws) {
77        let _ = crate::agent::git::create_ghost_snapshot(&ws);
78    }
79
80    // Phase 2: Fallback to local file backup (Ghost Ledger)
81    let ghost_dir = ws.join(".hematite").join("ghost");
82    let _ = fs::create_dir_all(&ghost_dir);
83    let ts = std::time::SystemTime::now()
84        .duration_since(std::time::UNIX_EPOCH)
85        .unwrap()
86        .as_millis();
87    let safe_name = Path::new(target_path)
88        .file_name()
89        .unwrap_or_default()
90        .to_string_lossy();
91    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
92
93    if fs::write(&backup_file, content).is_ok() {
94        use std::io::Write;
95        if let Ok(mut f) = fs::OpenOptions::new()
96            .create(true)
97            .append(true)
98            .open(ghost_dir.join("ledger.txt"))
99        {
100            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
101        }
102        prune_ghost_backups(&ghost_dir);
103    }
104}
105
106pub fn pop_ghost_ledger() -> Result<String, String> {
107    let ws = workspace_root();
108    let ghost_dir = ws.join(".hematite").join("ghost");
109    let ledger_path = ghost_dir.join("ledger.txt");
110
111    if !ledger_path.exists() {
112        return Err("Ghost Ledger is empty — no edits to undo".into());
113    }
114
115    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
116    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
117
118    if lines.is_empty() {
119        return Err("Ghost Ledger is empty".into());
120    }
121
122    let last_line = lines.pop().unwrap();
123    let parts: Vec<&str> = last_line.splitn(2, '|').collect();
124    if parts.len() != 2 {
125        return Err("Corrupted ledger entry".into());
126    }
127
128    let target_path = parts[0];
129    let backup_path = parts[1];
130
131    // Priority 1: Try Git Rollback
132    if crate::agent::git::is_git_repo(&ws) {
133        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
134            let _ = fs::remove_file(backup_path);
135            let new_ledger = lines.join("\n");
136            let _ = fs::write(
137                &ledger_path,
138                if new_ledger.is_empty() {
139                    String::new()
140                } else {
141                    new_ledger + "\n"
142                },
143            );
144            return Ok(msg);
145        }
146    }
147
148    // Priority 2: Standard File Rollback
149    let original_content =
150        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
151    let abs_target = ws.join(target_path);
152    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
153
154    let new_ledger = lines.join("\n");
155    let _ = fs::write(
156        &ledger_path,
157        if new_ledger.is_empty() {
158            String::new()
159        } else {
160            new_ledger + "\n"
161        },
162    );
163    let _ = fs::remove_file(backup_path);
164
165    Ok(format!("Restored {} from Ghost Ledger", target_path))
166}
167
168// ── read_file ─────────────────────────────────────────────────────────────────
169
170pub async fn read_file(args: &Value) -> Result<String, String> {
171    let path = require_str(args, "path")?;
172    let offset = get_usize_arg(args, "offset");
173    let limit = get_usize_arg(args, "limit");
174
175    let abs = safe_path(path)?;
176    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
177
178    let lines: Vec<&str> = raw.lines().collect();
179    let total = lines.len();
180    let start = offset.unwrap_or(0).min(total);
181    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
182
183    let mut content = lines[start..end].join("\n");
184    if end < total {
185        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
186        content.push_str(&format!("This file has {} more lines below. ", total - end));
187        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
188                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
189    }
190
191    Ok(format!(
192        "[{path}  lines {}-{} of {}]\n{}",
193        start + 1,
194        end,
195        total,
196        content
197    ))
198}
199
200// ── inspect_lines ─────────────────────────────────────────────────────────────
201
202pub async fn inspect_lines(args: &Value) -> Result<String, String> {
203    let path = require_str(args, "path")?;
204    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
205    let end_line = get_usize_arg(args, "end_line");
206
207    let abs = safe_path(path)?;
208    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
209
210    let lines: Vec<&str> = raw.lines().collect();
211    let total = lines.len();
212
213    let start = start_line.saturating_sub(1).min(total);
214    let end = end_line.unwrap_or(total).min(total);
215
216    if start >= end && total > 0 {
217        return Err(format!(
218            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
219            end_line.unwrap_or(total)
220        ));
221    }
222
223    let mut output = format!(
224        "[inspect_lines: {path} lines {}-{} of {}]\n",
225        start + 1,
226        end,
227        total
228    );
229    for i in start..end {
230        output.push_str(&format!("[{:>4}] | {}\n", i + 1, lines[i]));
231    }
232
233    Ok(output)
234}
235
236// ── write_file ────────────────────────────────────────────────────────────────
237
238pub async fn write_file(args: &Value) -> Result<String, String> {
239    let path = require_str(args, "path")?;
240    let content = require_str(args, "content")?;
241
242    let abs = safe_path_allow_new(path)?;
243    if let Some(parent) = abs.parent() {
244        fs::create_dir_all(parent)
245            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
246    }
247
248    let existed = abs.exists();
249    if existed {
250        if let Ok(orig) = fs::read_to_string(&abs) {
251            save_ghost_backup(path, &orig);
252        }
253    }
254
255    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
256
257    let action = if existed { "Updated" } else { "Created" };
258    Ok(format!("{action} {path}  ({} bytes)", content.len()))
259}
260
261// ── edit_file ─────────────────────────────────────────────────────────────────
262
263pub async fn edit_file(args: &Value) -> Result<String, String> {
264    let path = require_str(args, "path")?;
265    let search = require_str(args, "search")?;
266    let replace = require_str(args, "replace")?;
267    let replace_all = args
268        .get("replace_all")
269        .and_then(|v| v.as_bool())
270        .unwrap_or(false);
271
272    if search == replace {
273        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
274    }
275
276    let abs = safe_path(path)?;
277    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
278    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
279    let original = raw.replace("\r\n", "\n");
280
281    save_ghost_backup(path, &original);
282
283    let search_trimmed = search.trim();
284    let search_non_ws_len = search_trimmed
285        .chars()
286        .filter(|c| !c.is_whitespace())
287        .count();
288    let search_line_count = search_trimmed.lines().count();
289    if search_non_ws_len < 12 && search_line_count <= 1 {
290        return Err(format!(
291            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
292             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
293        ));
294    }
295
296    // ── Exact match first ────────────────────────────────────────────────────
297    let (effective_search, was_repaired) = if original.contains(search) {
298        let exact_match_count = original.matches(search).count();
299        if exact_match_count > 1 && !replace_all {
300            return Err(format!(
301                "edit_file: search string matched {} times in {path}.\n\
302                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
303                exact_match_count
304            ));
305        }
306        (search.to_string(), false)
307    } else {
308        // ── Fuzzy repair: try whitespace-normalised match ─────────────────
309        // Local models commonly produce search strings with wrong indentation,
310        // trailing spaces, or CRLF/LF mismatches.  We normalise both sides and
311        // find the real span in the file, then apply the replacement there.
312        match fuzzy_find_span(&original, search) {
313            Some(span) => {
314                // Extract the exact slice from the file so we can replace it.
315                let real_slice = original[span.clone()].to_string();
316                (real_slice, true)
317            }
318            None => {
319                let hint = nearest_lines(&original, search);
320                return Err(format!(
321                    "edit_file: search string not found in {path}.\n\
322                     The 'search' value must match the file content exactly \
323                     (including whitespace/indentation).\n\
324                     {hint}"
325                ));
326            }
327        }
328    };
329
330    let updated = if replace_all {
331        original.replace(effective_search.as_str(), replace)
332    } else {
333        original.replacen(effective_search.as_str(), replace, 1)
334    };
335
336    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
337
338    let removed = original.lines().count();
339    let added = updated.lines().count();
340    let repair_note = if was_repaired {
341        "  [whitespace auto-corrected]"
342    } else {
343        ""
344    };
345
346    let mut diff_block = String::new();
347    diff_block.push_str("\n--- DIFF \n");
348    for line in effective_search.lines() {
349        diff_block.push_str(&format!("- {}\n", line));
350    }
351    for line in replace.lines() {
352        diff_block.push_str(&format!("+ {}\n", line));
353    }
354
355    Ok(format!(
356        "Edited {path}  ({} -> {} lines){repair_note}{}",
357        removed, added, diff_block
358    ))
359}
360
361// ── patch_hunk ────────────────────────────────────────────────────────────────
362
363pub async fn patch_hunk(args: &Value) -> Result<String, String> {
364    let path = require_str(args, "path")?;
365    let start_line = require_usize(args, "start_line")?;
366    let end_line = require_usize(args, "end_line")?;
367    let replacement = require_str(args, "replacement")?;
368
369    let abs = safe_path(path)?;
370    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
371
372    save_ghost_backup(path, &original);
373
374    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
375    let total = lines.len();
376
377    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
378        return Err(format!(
379            "patch_hunk: invalid line range {}-{} for file with {} lines",
380            start_line, end_line, total
381        ));
382    }
383
384    let mut updated_lines = Vec::new();
385    // 0-indexed adjustment
386    let s_idx = start_line - 1;
387    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
388
389    // 1. Lines before the hunk
390    updated_lines.extend_from_slice(&lines[0..s_idx]);
391
392    // 2. The hunk replacement
393    for line in replacement.lines() {
394        updated_lines.push(line.to_string());
395    }
396
397    // 3. Lines after the hunk
398    if e_idx < total {
399        updated_lines.extend_from_slice(&lines[e_idx..total]);
400    }
401
402    let updated_content = updated_lines.join("\n");
403    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
404
405    let mut diff = String::new();
406    diff.push_str("\n--- HUNK DIFF ---\n");
407    for i in s_idx..e_idx {
408        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
409    }
410    for line in replacement.lines() {
411        diff.push_str(&format!("+ {}\n", line.trim_end()));
412    }
413
414    Ok(format!(
415        "Patched {path} lines {}-{} ({} -> {} lines){}",
416        start_line,
417        end_line,
418        (e_idx - s_idx),
419        replacement.lines().count(),
420        diff
421    ))
422}
423
424// ── multi_search_replace ──────────────────────────────────────────────────────
425
426#[derive(serde::Deserialize)]
427struct SearchReplaceHunk {
428    search: String,
429    replace: String,
430}
431
432pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
433    let path = require_str(args, "path")?;
434    let hunks_val = args
435        .get("hunks")
436        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
437
438    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
439        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
440
441    if hunks.is_empty() {
442        return Err("multi_search_replace: hunks array is empty".to_string());
443    }
444
445    let abs = safe_path(path)?;
446    let raw =
447        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
448    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
449    let original = raw.replace("\r\n", "\n");
450
451    save_ghost_backup(path, &original);
452
453    let mut current_content = original.clone();
454    let mut diff = String::new();
455    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
456
457    let mut patched_hunks = 0;
458
459    for (i, hunk) in hunks.iter().enumerate() {
460        let match_count = current_content.matches(&hunk.search).count();
461        if match_count == 0 {
462            return Err(format!("multi_search_replace: hunk {} search string not found in file. Ensure exact whitespace match.", i));
463        }
464        if match_count > 1 {
465            return Err(format!("multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.", i, match_count));
466        }
467
468        diff.push_str(&format!("\n@@ Hunk {} @@\n", i + 1));
469        for line in hunk.search.lines() {
470            diff.push_str(&format!("- {}\n", line.trim_end()));
471        }
472        for line in hunk.replace.lines() {
473            diff.push_str(&format!("+ {}\n", line.trim_end()));
474        }
475
476        current_content = current_content.replace(&hunk.search, &hunk.replace);
477        patched_hunks += 1;
478    }
479
480    fs::write(&abs, &current_content)
481        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
482
483    Ok(format!(
484        "Modified {} hunks in {} using exact search-and-replace.{}",
485        patched_hunks, path, diff
486    ))
487}
488
489// ── list_files ────────────────────────────────────────────────────────────────
490
491pub async fn list_files(args: &Value) -> Result<String, String> {
492    let started = Instant::now();
493    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
494    let ext_filter = args.get("extension").and_then(|v| v.as_str());
495
496    let base = safe_path(base_str)?;
497
498    let mut files: Vec<PathBuf> = Vec::new();
499    for entry in WalkDir::new(&base).follow_links(false) {
500        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
501        if !entry.file_type().is_file() {
502            continue;
503        }
504        let p = entry.path();
505
506        // Skip hidden dirs / target / node_modules
507        if path_has_hidden_segment(p) {
508            continue;
509        }
510
511        if let Some(ext) = ext_filter {
512            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
513                continue;
514            }
515        }
516        files.push(p.to_path_buf());
517    }
518
519    // Sort by modification time (newest first).
520    files.sort_by_key(|p| {
521        fs::metadata(p)
522            .and_then(|m| m.modified())
523            .ok()
524            .map(std::cmp::Reverse)
525    });
526
527    let total = files.len();
528    const LIMIT: usize = 200;
529    let truncated = total > LIMIT;
530    let shown: Vec<String> = files
531        .into_iter()
532        .take(LIMIT)
533        .map(|p| p.display().to_string())
534        .collect();
535
536    let ms = started.elapsed().as_millis();
537    let mut out = format!(
538        "{} file(s) in {}  ({ms}ms){}",
539        total.min(LIMIT),
540        base_str,
541        if truncated {
542            "  [truncated at 200]"
543        } else {
544            ""
545        }
546    );
547    out.push('\n');
548    out.push_str(&shown.join("\n"));
549    Ok(out)
550}
551
552// ── grep_files ────────────────────────────────────────────────────────────────
553
554pub async fn grep_files(args: &Value) -> Result<String, String> {
555    let pattern = require_str(args, "pattern")?;
556    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
557    let ext_filter = args.get("extension").and_then(|v| v.as_str());
558    let case_insensitive = args
559        .get("case_insensitive")
560        .and_then(|v| v.as_bool())
561        .unwrap_or(true);
562    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
563    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
564    let offset = get_usize_arg(args, "offset").unwrap_or(0);
565
566    // Context lines: `context` sets both before+after; `before`/`after` override individually.
567    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
568    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
569    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
570
571    let base = safe_path(base_str)?;
572
573    let regex = regex::RegexBuilder::new(pattern)
574        .case_insensitive(case_insensitive)
575        .build()
576        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
577
578    // ── files_only mode ───────────────────────────────────────────────────────
579    if files_only {
580        let mut matched_files: Vec<String> = Vec::new();
581
582        for entry in WalkDir::new(&base).follow_links(false) {
583            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
584            if !entry.file_type().is_file() {
585                continue;
586            }
587            let p = entry.path();
588            if path_has_hidden_segment(p) {
589                continue;
590            }
591            if let Some(ext) = ext_filter {
592                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
593                    continue;
594                }
595            }
596            let Ok(contents) = fs::read_to_string(p) else {
597                continue;
598            };
599            if contents.lines().any(|line| regex.is_match(line)) {
600                matched_files.push(p.display().to_string());
601            }
602        }
603
604        if matched_files.is_empty() {
605            return Ok(format!("No files matching '{pattern}' in {base_str}"));
606        }
607
608        let total = matched_files.len();
609        let page: Vec<_> = matched_files
610            .into_iter()
611            .skip(offset)
612            .take(head_limit)
613            .collect();
614        let showing = page.len();
615        let mut out = format!("{total} file(s) match '{pattern}'");
616        if offset > 0 || showing < total {
617            out.push_str(&format!(
618                " [showing {}-{} of {total}]",
619                offset + 1,
620                offset + showing
621            ));
622        }
623        out.push('\n');
624        out.push_str(&page.join("\n"));
625        return Ok(out);
626    }
627
628    // ── content mode with optional context lines ──────────────────────────────
629
630    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
631    struct Hunk {
632        path: String,
633        /// (line_number_1_indexed, line_text, is_match)
634        lines: Vec<(usize, String, bool)>,
635    }
636
637    let mut hunks: Vec<Hunk> = Vec::new();
638    let mut total_matches = 0usize;
639    let mut files_matched = 0usize;
640
641    for entry in WalkDir::new(&base).follow_links(false) {
642        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
643        if !entry.file_type().is_file() {
644            continue;
645        }
646        let p = entry.path();
647        if path_has_hidden_segment(p) {
648            continue;
649        }
650        if let Some(ext) = ext_filter {
651            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
652                continue;
653            }
654        }
655        let Ok(contents) = fs::read_to_string(p) else {
656            continue;
657        };
658        let all_lines: Vec<&str> = contents.lines().collect();
659        let n = all_lines.len();
660
661        // Find all match indices in this file.
662        let match_idxs: Vec<usize> = all_lines
663            .iter()
664            .enumerate()
665            .filter(|(_, line)| regex.is_match(line))
666            .map(|(i, _)| i)
667            .collect();
668
669        if match_idxs.is_empty() {
670            continue;
671        }
672        files_matched += 1;
673        total_matches += match_idxs.len();
674
675        // Merge overlapping ranges into hunks.
676        let path_str = p.display().to_string();
677        let mut ranges: Vec<(usize, usize)> = match_idxs
678            .iter()
679            .map(|&i| {
680                (
681                    i.saturating_sub(before),
682                    (i + after).min(n.saturating_sub(1)),
683                )
684            })
685            .collect();
686
687        // Sort and merge overlapping ranges.
688        ranges.sort_unstable();
689        let mut merged: Vec<(usize, usize)> = Vec::new();
690        for (s, e) in ranges {
691            if let Some(last) = merged.last_mut() {
692                if s <= last.1 + 1 {
693                    last.1 = last.1.max(e);
694                    continue;
695                }
696            }
697            merged.push((s, e));
698        }
699
700        // Build hunks from merged ranges.
701        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
702        for (start, end) in merged {
703            let mut hunk_lines = Vec::new();
704            for i in start..=end {
705                hunk_lines.push((i + 1, all_lines[i].to_string(), match_set.contains(&i)));
706            }
707            hunks.push(Hunk {
708                path: path_str.clone(),
709                lines: hunk_lines,
710            });
711        }
712    }
713
714    if hunks.is_empty() {
715        return Ok(format!("No matches for '{pattern}' in {base_str}"));
716    }
717
718    let total_hunks = hunks.len();
719    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
720    let showing = page_hunks.len();
721
722    let mut out =
723        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
724    if offset > 0 || showing < total_hunks {
725        out.push_str(&format!(
726            " [hunks {}-{} of {total_hunks}]",
727            offset + 1,
728            offset + showing
729        ));
730    }
731    out.push('\n');
732
733    for (i, hunk) in page_hunks.iter().enumerate() {
734        if i > 0 {
735            out.push_str("\n--\n");
736        }
737        for (lineno, text, is_match) in &hunk.lines {
738            if *is_match {
739                out.push_str(&format!("{}:{}:{}\n", hunk.path, lineno, text));
740            } else {
741                out.push_str(&format!("{}: {}-{}\n", hunk.path, lineno, text));
742            }
743        }
744    }
745
746    Ok(out.trim_end().to_string())
747}
748
749// ── Argument helpers ──────────────────────────────────────────────────────────
750
751fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
752    args.get(key)
753        .and_then(|v| v.as_str())
754        .ok_or_else(|| format!("Missing required argument: '{key}'"))
755}
756
757fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
758    args.get(key).and_then(value_as_usize)
759}
760
761fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
762    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
763}
764
765fn value_as_usize(value: &Value) -> Option<usize> {
766    if let Some(v) = value.as_u64() {
767        return usize::try_from(v).ok();
768    }
769
770    if let Some(v) = value.as_i64() {
771        return if v >= 0 {
772            usize::try_from(v as u64).ok()
773        } else {
774            None
775        };
776    }
777
778    if let Some(v) = value.as_f64() {
779        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
780            return Some(v as usize);
781        }
782        return None;
783    }
784
785    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
786}
787
788// ── Path helpers ──────────────────────────────────────────────────────────────
789
790/// Resolve a path that must already exist, and check it's inside the workspace.
791fn safe_path(path: &str) -> Result<PathBuf, String> {
792    let candidate = resolve_candidate(path);
793    canonicalize_safe(&candidate, path)
794}
795
796/// Resolve a path that may not exist yet (for write_file).
797fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
798    let candidate = resolve_candidate(path);
799
800    // Try canonical first.
801    if let Ok(abs) = candidate.canonicalize() {
802        check_workspace_bounds(&abs, path)?;
803        return Ok(abs);
804    }
805
806    // File doesn't exist yet — canonicalize the parent, append the filename.
807    let parent = candidate.parent().unwrap_or(Path::new("."));
808    let name = candidate
809        .file_name()
810        .ok_or_else(|| format!("invalid path: {path}"))?;
811    let abs_parent = parent
812        .canonicalize()
813        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
814    let abs = abs_parent.join(name);
815    check_workspace_bounds(&abs, path)?;
816    Ok(abs)
817}
818
819fn resolve_candidate(path: &str) -> PathBuf {
820    let p = Path::new(path);
821    if p.is_absolute() {
822        p.to_path_buf()
823    } else {
824        std::env::current_dir()
825            .unwrap_or_else(|_| PathBuf::from("."))
826            .join(p)
827    }
828}
829
830fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
831    let abs = candidate
832        .canonicalize()
833        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
834    check_workspace_bounds(&abs, original)?;
835    Ok(abs)
836}
837
838fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
839    // Delegate to the existing guard for blacklist + traversal checks.
840    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
841    super::guard::path_is_safe(&workspace, abs)
842        .map(|_| ())
843        .map_err(|e| format!("file access denied for '{original}': {e}"))
844}
845
846/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
847fn path_has_hidden_segment(p: &Path) -> bool {
848    p.components().any(|c| {
849        let s = c.as_os_str().to_string_lossy();
850        s.starts_with('.') && s != "." && s != ".."
851            || s == "target"
852            || s == "node_modules"
853            || s == "__pycache__"
854    })
855}
856
857/// Show the lines nearest to where the search string *almost* matched,
858/// so the model can see the real indentation/content and self-correct.
859fn nearest_lines(content: &str, search: &str) -> String {
860    // Try to find the best-matching line by the first non-empty search line.
861    let first_search_line = search
862        .lines()
863        .map(|l| l.trim())
864        .find(|l| !l.is_empty())
865        .unwrap_or("");
866
867    let lines: Vec<&str> = content.lines().collect();
868    if lines.is_empty() {
869        return "(file is empty)".into();
870    }
871
872    // Find the line in the file that contains the most chars from the search line.
873    let best_idx = if first_search_line.is_empty() {
874        0
875    } else {
876        lines
877            .iter()
878            .enumerate()
879            .max_by_key(|(_, l)| {
880                let lt = l.trim();
881                // Score: length of longest common prefix after trimming.
882                first_search_line
883                    .chars()
884                    .zip(lt.chars())
885                    .take_while(|(a, b)| a == b)
886                    .count()
887            })
888            .map(|(i, _)| i)
889            .unwrap_or(0)
890    };
891
892    let start = best_idx.saturating_sub(3);
893    let end = (best_idx + 5).min(lines.len());
894    let snippet = lines[start..end]
895        .iter()
896        .enumerate()
897        .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
898        .collect::<Vec<_>>()
899        .join("\n");
900
901    format!(
902        "Nearest matching lines ({}:{}):\n{}",
903        best_idx + 1,
904        end,
905        snippet
906    )
907}
908
909/// Fuzzy match: normalise both sides (trim trailing whitespace per line,
910/// unify CRLF→LF) and return the byte range of the real match in `content`.
911///
912/// Only considers indentation-style differences — it does NOT tolerate
913/// changed content, only changed surrounding whitespace.
914fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
915    // Normalise a string: CRLF→LF, trim both leading and trailing whitespace on each line.
916    fn normalise(s: &str) -> String {
917        s.lines().map(|l| l.trim()).collect::<Vec<_>>().join("\n")
918    }
919
920    let norm_content = normalise(content);
921    let norm_search = normalise(search)
922        .trim_start_matches('\n')
923        .trim_end_matches('\n')
924        .to_string();
925
926    if norm_search.is_empty() {
927        return None;
928    }
929
930    // Find where the normalised search appears in the normalised content.
931    let norm_pos = norm_content.find(&norm_search)?;
932
933    // Map the byte position back into the original (non-normalised) content.
934    // We do this by counting newlines up to norm_pos and replaying through original.
935    let lines_before = norm_content[..norm_pos]
936        .as_bytes()
937        .iter()
938        .filter(|&&b| b == b'\n')
939        .count();
940    let search_lines = norm_search
941        .as_bytes()
942        .iter()
943        .filter(|&&b| b == b'\n')
944        .count()
945        + 1;
946
947    let orig_lines: Vec<&str> = content.lines().collect();
948
949    // Byte start of the first line in original.
950    let mut current_pos = 0;
951    for i in 0..lines_before {
952        if i < orig_lines.len() {
953            current_pos += orig_lines[i].len() + 1; // +1 for newline
954        }
955    }
956    let byte_start = current_pos;
957
958    // Byte end: sum of original line lengths for the matched span.
959    let mut byte_len = 0;
960    for i in 0..search_lines {
961        let idx = lines_before + i;
962        if idx < orig_lines.len() {
963            byte_len += orig_lines[idx].len();
964            if i < search_lines - 1 {
965                byte_len += 1; // newline
966            }
967        }
968    }
969
970    // Validate: normalised forms must actually match (guards against false positives).
971    if byte_start + byte_len > content.len() {
972        return None;
973    }
974
975    let candidate = &content[byte_start..byte_start + byte_len];
976    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
977        Some(byte_start..byte_start + byte_len)
978    } else {
979        None
980    }
981}
982
983// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
984
985/// Return a formatted diff string for an edit_file operation without applying it.
986/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
987/// search string cannot be located (caller falls through to normal tool dispatch).
988pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
989    let path = require_str(args, "path")?;
990    let search = require_str(args, "search")?;
991    let replace = require_str(args, "replace")?;
992
993    let abs = safe_path(path)?;
994    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
995    let original = raw.replace("\r\n", "\n");
996
997    let effective_search: String = if original.contains(search) {
998        search.to_string()
999    } else {
1000        match fuzzy_find_span(&original, search) {
1001            Some(span) => original[span].to_string(),
1002            None => return Err("search string not found — diff preview unavailable".into()),
1003        }
1004    };
1005
1006    let mut diff = String::new();
1007    for line in effective_search.lines() {
1008        diff.push_str(&format!("- {}\n", line));
1009    }
1010    for line in replace.lines() {
1011        diff.push_str(&format!("+ {}\n", line));
1012    }
1013    Ok(diff)
1014}
1015
1016/// Return a formatted diff string for a patch_hunk operation without applying it.
1017pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1018    let path = require_str(args, "path")?;
1019    let start_line = require_usize(args, "start_line")?;
1020    let end_line = require_usize(args, "end_line")?;
1021    let replacement = require_str(args, "replacement")?;
1022
1023    let abs = safe_path(path)?;
1024    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1025    let lines: Vec<&str> = original.lines().collect();
1026    let total = lines.len();
1027
1028    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1029        return Err(format!(
1030            "patch_hunk: invalid line range {}-{} for file with {} lines",
1031            start_line, end_line, total
1032        ));
1033    }
1034
1035    let s_idx = start_line - 1;
1036    let e_idx = end_line;
1037
1038    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1039    for i in s_idx..e_idx {
1040        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
1041    }
1042    for line in replacement.lines() {
1043        diff.push_str(&format!("+ {}\n", line.trim_end()));
1044    }
1045    Ok(diff)
1046}
1047
1048/// Return a formatted diff string for a multi_search_replace operation without applying it.
1049pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1050    let hunks_val = args
1051        .get("hunks")
1052        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1053
1054    #[derive(serde::Deserialize)]
1055    struct PreviewHunk {
1056        search: String,
1057        replace: String,
1058    }
1059    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1060        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1061
1062    let mut diff = String::new();
1063    for (i, hunk) in hunks.iter().enumerate() {
1064        if hunks.len() > 1 {
1065            diff.push_str(&format!("@@ hunk {} @@\n", i + 1));
1066        }
1067        for line in hunk.search.lines() {
1068            diff.push_str(&format!("- {}\n", line.trim_end()));
1069        }
1070        for line in hunk.replace.lines() {
1071            diff.push_str(&format!("+ {}\n", line.trim_end()));
1072        }
1073    }
1074    Ok(diff)
1075}
1076
1077/// Resolve the workspace root by looking upward for common markers.
1078pub fn workspace_root() -> PathBuf {
1079    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1080    loop {
1081        if current.join(".git").exists()
1082            || current.join("Cargo.toml").exists()
1083            || current.join("package.json").exists()
1084        {
1085            return current;
1086        }
1087        if !current.pop() {
1088            break;
1089        }
1090    }
1091    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1092}
1093
1094/// Returns true if the workspace root looks like a real project.
1095/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1096/// count — at least one explicit build/package marker must also be present.
1097pub fn is_project_workspace() -> bool {
1098    let root = workspace_root();
1099    let has_explicit_marker = root.join("Cargo.toml").exists()
1100        || root.join("package.json").exists()
1101        || root.join("pyproject.toml").exists()
1102        || root.join("go.mod").exists()
1103        || root.join("setup.py").exists()
1104        || root.join("pom.xml").exists()
1105        || root.join("build.gradle").exists()
1106        || root.join("CMakeLists.txt").exists();
1107    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1108}
1109
1110/// A "Pre-Flight Scoping" tool that provides a high-level recursive map of the project.
1111/// Returns a directory tree and project configuration overview.
1112pub async fn map_project(_args: &Value) -> Result<String, String> {
1113    let root = workspace_root();
1114    let mut report = String::new();
1115    report.push_str(&format!("Project Root: {}\n", root.display()));
1116
1117    // ── Layer 1: Configuration DNA ───────────────────────────────────────────
1118    report.push_str("\n── Configuration DNA ──\n");
1119    let markers = [
1120        "Cargo.toml",
1121        "package.json",
1122        "go.mod",
1123        "requirements.txt",
1124        "pyproject.toml",
1125        "README.md",
1126        "CLAUDE.md",
1127        "Taskfile.yml",
1128        ".env.example",
1129    ];
1130    for marker in &markers {
1131        let path = root.join(marker);
1132        if path.exists() {
1133            if let Ok(content) = std::fs::read_to_string(&path) {
1134                let snippet = &content[..content.len().min(800)];
1135                report.push_str(&format!("### File: {}\n```\n{}\n```\n", marker, snippet));
1136            }
1137        }
1138    }
1139
1140    // ── Layer 2: Hierarchy Discovery ───────────────────────────────────────
1141    report.push_str("\n── Directory Structure ──\n");
1142    let mut lines = Vec::new();
1143    build_tree(&root, 0, &mut lines)?;
1144    report.push_str(&lines.join("\n"));
1145
1146    Ok(report)
1147}
1148
1149fn build_tree(dir: &PathBuf, depth: usize, lines: &mut Vec<String>) -> Result<(), String> {
1150    if depth > 4 {
1151        return Ok(());
1152    } // Cap depth to prevent token explosion
1153
1154    let mut entries: Vec<_> = std::fs::read_dir(dir)
1155        .map_err(|e| format!("Failed to read dir {dir:?}: {e}"))?
1156        .filter_map(Result::ok)
1157        .collect();
1158
1159    entries.sort_by_key(|e| (e.file_type().unwrap().is_file(), e.file_name()));
1160
1161    for entry in entries {
1162        let name = entry.file_name().to_string_lossy().into_owned();
1163        if name.starts_with('.') || name == "target" || name == "node_modules" || name == "vendor" {
1164            continue;
1165        }
1166
1167        let indent = "  ".repeat(depth);
1168        let prefix = if entry.file_type().unwrap().is_dir() {
1169            "📁 "
1170        } else {
1171            "📄 "
1172        };
1173        lines.push(format!("{indent}{prefix}{name}"));
1174
1175        if entry.file_type().unwrap().is_dir() {
1176            build_tree(&entry.path(), depth + 1, lines)?;
1177        }
1178    }
1179    Ok(())
1180}
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs