hematite/tools/
file_ops.rs

1use serde_json::Value;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6use walkdir::WalkDir;
7
8// ── Ghost Ledger ──────────────────────────────────────────────────────────────
9
10const MAX_GHOST_BACKUPS: usize = 8;
11
12fn prune_ghost_backups(ghost_dir: &Path) {
13    let Ok(entries) = fs::read_dir(ghost_dir) else {
14        return;
15    };
16
17    let mut backups: Vec<_> = entries
18        .filter_map(Result::ok)
19        .filter(|entry| {
20            entry
21                .path()
22                .extension()
23                .and_then(|ext| ext.to_str())
24                .map(|ext| ext.eq_ignore_ascii_case("bak"))
25                .unwrap_or(false)
26        })
27        .collect();
28
29    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
30    backups.reverse();
31
32    let retained: std::collections::HashSet<String> = backups
33        .iter()
34        .take(MAX_GHOST_BACKUPS)
35        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
36        .collect();
37
38    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
39        let _ = fs::remove_file(entry.path());
40    }
41
42    let ledger_path = ghost_dir.join("ledger.txt");
43    let Ok(content) = fs::read_to_string(&ledger_path) else {
44        return;
45    };
46
47    let filtered_lines: Vec<String> = content
48        .lines()
49        .filter_map(|line| {
50            let parts: Vec<&str> = line.splitn(2, '|').collect();
51            if parts.len() != 2 {
52                return None;
53            }
54
55            let backup_path = parts[1].replace('\\', "/");
56            if retained.contains(&backup_path) {
57                Some(line.to_string())
58            } else {
59                None
60            }
61        })
62        .collect();
63
64    let rewritten = if filtered_lines.is_empty() {
65        String::new()
66    } else {
67        filtered_lines.join("\n") + "\n"
68    };
69    let _ = fs::write(ledger_path, rewritten);
70}
71
72fn save_ghost_backup(target_path: &str, content: &str) {
73    let ws = workspace_root();
74
75    // Phase 1: Try Git Ghost Snapshot
76    if crate::agent::git::is_git_repo(&ws) {
77        let _ = crate::agent::git::create_ghost_snapshot(&ws);
78    }
79
80    // Phase 2: Fallback to local file backup (Ghost Ledger)
81    let ghost_dir = ws.join(".hematite").join("ghost");
82    let _ = fs::create_dir_all(&ghost_dir);
83    let ts = std::time::SystemTime::now()
84        .duration_since(std::time::UNIX_EPOCH)
85        .unwrap()
86        .as_millis();
87    let safe_name = Path::new(target_path)
88        .file_name()
89        .unwrap_or_default()
90        .to_string_lossy();
91    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
92
93    if fs::write(&backup_file, content).is_ok() {
94        use std::io::Write;
95        if let Ok(mut f) = fs::OpenOptions::new()
96            .create(true)
97            .append(true)
98            .open(ghost_dir.join("ledger.txt"))
99        {
100            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
101        }
102        prune_ghost_backups(&ghost_dir);
103    }
104}
105
106pub fn pop_ghost_ledger() -> Result<String, String> {
107    let ws = workspace_root();
108    let ghost_dir = ws.join(".hematite").join("ghost");
109    let ledger_path = ghost_dir.join("ledger.txt");
110
111    if !ledger_path.exists() {
112        return Err("Ghost Ledger is empty — no edits to undo".into());
113    }
114
115    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
116    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
117
118    if lines.is_empty() {
119        return Err("Ghost Ledger is empty".into());
120    }
121
122    let last_line = lines.pop().unwrap();
123    let parts: Vec<&str> = last_line.splitn(2, '|').collect();
124    if parts.len() != 2 {
125        return Err("Corrupted ledger entry".into());
126    }
127
128    let target_path = parts[0];
129    let backup_path = parts[1];
130
131    // Priority 1: Try Git Rollback
132    if crate::agent::git::is_git_repo(&ws) {
133        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
134            let _ = fs::remove_file(backup_path);
135            let new_ledger = lines.join("\n");
136            let _ = fs::write(
137                &ledger_path,
138                if new_ledger.is_empty() {
139                    String::new()
140                } else {
141                    new_ledger + "\n"
142                },
143            );
144            return Ok(msg);
145        }
146    }
147
148    // Priority 2: Standard File Rollback
149    let original_content =
150        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
151    let abs_target = ws.join(target_path);
152    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
153
154    let new_ledger = lines.join("\n");
155    let _ = fs::write(
156        &ledger_path,
157        if new_ledger.is_empty() {
158            String::new()
159        } else {
160            new_ledger + "\n"
161        },
162    );
163    let _ = fs::remove_file(backup_path);
164
165    Ok(format!("Restored {} from Ghost Ledger", target_path))
166}
167
168// ── read_file ─────────────────────────────────────────────────────────────────
169
170pub async fn read_file(args: &Value) -> Result<String, String> {
171    let path = require_str(args, "path")?;
172    let offset = get_usize_arg(args, "offset");
173    let limit = get_usize_arg(args, "limit");
174
175    let abs = safe_path(path)?;
176    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
177
178    let lines: Vec<&str> = raw.lines().collect();
179    let total = lines.len();
180    let start = offset.unwrap_or(0).min(total);
181    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
182
183    let mut content = lines[start..end].join("\n");
184    if end < total {
185        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
186        content.push_str(&format!("This file has {} more lines below. ", total - end));
187        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
188                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
189    }
190
191    Ok(format!(
192        "[{path}  lines {}-{} of {}]\n{}",
193        start + 1,
194        end,
195        total,
196        content
197    ))
198}
199
200// ── inspect_lines ─────────────────────────────────────────────────────────────
201
202pub async fn inspect_lines(args: &Value) -> Result<String, String> {
203    let path = require_str(args, "path")?;
204    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
205    let end_line = get_usize_arg(args, "end_line");
206
207    let abs = safe_path(path)?;
208    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
209
210    let lines: Vec<&str> = raw.lines().collect();
211    let total = lines.len();
212
213    let start = start_line.saturating_sub(1).min(total);
214    let end = end_line.unwrap_or(total).min(total);
215
216    if start >= end && total > 0 {
217        return Err(format!(
218            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
219            end_line.unwrap_or(total)
220        ));
221    }
222
223    let mut output = format!(
224        "[inspect_lines: {path} lines {}-{} of {}]\n",
225        start + 1,
226        end,
227        total
228    );
229    for i in start..end {
230        output.push_str(&format!("[{:>4}] | {}\n", i + 1, lines[i]));
231    }
232
233    Ok(output)
234}
235
236// ── write_file ────────────────────────────────────────────────────────────────
237
238pub async fn write_file(args: &Value) -> Result<String, String> {
239    let path = require_str(args, "path")?;
240    let content = require_str(args, "content")?;
241
242    let abs = safe_path_allow_new(path)?;
243    if let Some(parent) = abs.parent() {
244        fs::create_dir_all(parent)
245            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
246    }
247
248    let existed = abs.exists();
249    if existed {
250        if let Ok(orig) = fs::read_to_string(&abs) {
251            save_ghost_backup(path, &orig);
252        }
253    }
254
255    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
256
257    let action = if existed { "Updated" } else { "Created" };
258    Ok(format!("{action} {path}  ({} bytes)", content.len()))
259}
260
261// ── edit_file ─────────────────────────────────────────────────────────────────
262
263pub async fn edit_file(args: &Value) -> Result<String, String> {
264    let path = require_str(args, "path")?;
265    let search = require_str(args, "search")?;
266    let replace = require_str(args, "replace")?;
267    let replace_all = args
268        .get("replace_all")
269        .and_then(|v| v.as_bool())
270        .unwrap_or(false);
271
272    if search == replace {
273        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
274    }
275
276    let abs = safe_path(path)?;
277    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
278    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
279    let original = raw.replace("\r\n", "\n");
280
281    save_ghost_backup(path, &original);
282
283    let search_trimmed = search.trim();
284    let search_non_ws_len = search_trimmed
285        .chars()
286        .filter(|c| !c.is_whitespace())
287        .count();
288    let search_line_count = search_trimmed.lines().count();
289    if search_non_ws_len < 12 && search_line_count <= 1 {
290        return Err(format!(
291            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
292             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
293        ));
294    }
295
296    // ── Exact match first ────────────────────────────────────────────────────
297    let (effective_search, was_repaired) = if original.contains(search) {
298        let exact_match_count = original.matches(search).count();
299        if exact_match_count > 1 && !replace_all {
300            return Err(format!(
301                "edit_file: search string matched {} times in {path}.\n\
302                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
303                exact_match_count
304            ));
305        }
306        (search.to_string(), false)
307    } else {
308        // ── Fuzzy repair: try whitespace-normalised match ─────────────────
309        // Local models commonly produce search strings with wrong indentation,
310        // trailing spaces, or CRLF/LF mismatches.  We normalise both sides and
311        // find the real span in the file, then apply the replacement there.
312        match fuzzy_find_span(&original, search) {
313            Some(span) => {
314                // Extract the exact slice from the file so we can replace it.
315                let real_slice = original[span.clone()].to_string();
316                (real_slice, true)
317            }
318            None => {
319                let hint = nearest_lines(&original, search);
320                return Err(format!(
321                    "edit_file: search string not found in {path}.\n\
322                     The 'search' value must match the file content exactly \
323                     (including whitespace/indentation).\n\
324                     {hint}"
325                ));
326            }
327        }
328    };
329
330    let updated = if replace_all {
331        original.replace(effective_search.as_str(), replace)
332    } else {
333        original.replacen(effective_search.as_str(), replace, 1)
334    };
335
336    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
337
338    let removed = original.lines().count();
339    let added = updated.lines().count();
340    let repair_note = if was_repaired {
341        "  [whitespace auto-corrected]"
342    } else {
343        ""
344    };
345
346    let mut diff_block = String::new();
347    diff_block.push_str("\n--- DIFF \n");
348    for line in effective_search.lines() {
349        diff_block.push_str(&format!("- {}\n", line));
350    }
351    for line in replace.lines() {
352        diff_block.push_str(&format!("+ {}\n", line));
353    }
354
355    Ok(format!(
356        "Edited {path}  ({} -> {} lines){repair_note}{}",
357        removed, added, diff_block
358    ))
359}
360
361// ── patch_hunk ────────────────────────────────────────────────────────────────
362
363pub async fn patch_hunk(args: &Value) -> Result<String, String> {
364    let path = require_str(args, "path")?;
365    let start_line = require_usize(args, "start_line")?;
366    let end_line = require_usize(args, "end_line")?;
367    let replacement = require_str(args, "replacement")?;
368
369    let abs = safe_path(path)?;
370    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
371
372    save_ghost_backup(path, &original);
373
374    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
375    let total = lines.len();
376
377    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
378        return Err(format!(
379            "patch_hunk: invalid line range {}-{} for file with {} lines",
380            start_line, end_line, total
381        ));
382    }
383
384    let mut updated_lines = Vec::new();
385    // 0-indexed adjustment
386    let s_idx = start_line - 1;
387    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
388
389    // 1. Lines before the hunk
390    updated_lines.extend_from_slice(&lines[0..s_idx]);
391
392    // 2. The hunk replacement
393    for line in replacement.lines() {
394        updated_lines.push(line.to_string());
395    }
396
397    // 3. Lines after the hunk
398    if e_idx < total {
399        updated_lines.extend_from_slice(&lines[e_idx..total]);
400    }
401
402    let updated_content = updated_lines.join("\n");
403    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
404
405    let mut diff = String::new();
406    diff.push_str("\n--- HUNK DIFF ---\n");
407    for i in s_idx..e_idx {
408        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
409    }
410    for line in replacement.lines() {
411        diff.push_str(&format!("+ {}\n", line.trim_end()));
412    }
413
414    Ok(format!(
415        "Patched {path} lines {}-{} ({} -> {} lines){}",
416        start_line,
417        end_line,
418        (e_idx - s_idx),
419        replacement.lines().count(),
420        diff
421    ))
422}
423
424// ── multi_search_replace ──────────────────────────────────────────────────────
425
426#[derive(serde::Deserialize)]
427struct SearchReplaceHunk {
428    search: String,
429    replace: String,
430}
431
432pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
433    let path = require_str(args, "path")?;
434    let hunks_val = args
435        .get("hunks")
436        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
437
438    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
439        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
440
441    if hunks.is_empty() {
442        return Err("multi_search_replace: hunks array is empty".to_string());
443    }
444
445    let abs = safe_path(path)?;
446    let raw =
447        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
448    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
449    let original = raw.replace("\r\n", "\n");
450
451    save_ghost_backup(path, &original);
452
453    let mut current_content = original.clone();
454    let mut diff = String::new();
455    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
456
457    let mut patched_hunks = 0;
458
459    for (i, hunk) in hunks.iter().enumerate() {
460        let match_count = current_content.matches(&hunk.search).count();
461        if match_count == 0 {
462            return Err(format!("multi_search_replace: hunk {} search string not found in file. Ensure exact whitespace match.", i));
463        }
464        if match_count > 1 {
465            return Err(format!("multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.", i, match_count));
466        }
467
468        diff.push_str(&format!("\n@@ Hunk {} @@\n", i + 1));
469        for line in hunk.search.lines() {
470            diff.push_str(&format!("- {}\n", line.trim_end()));
471        }
472        for line in hunk.replace.lines() {
473            diff.push_str(&format!("+ {}\n", line.trim_end()));
474        }
475
476        current_content = current_content.replace(&hunk.search, &hunk.replace);
477        patched_hunks += 1;
478    }
479
480    fs::write(&abs, &current_content)
481        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
482
483    Ok(format!(
484        "Modified {} hunks in {} using exact search-and-replace.{}",
485        patched_hunks, path, diff
486    ))
487}
488
489// ── list_files ────────────────────────────────────────────────────────────────
490
491pub async fn list_files(args: &Value) -> Result<String, String> {
492    let started = Instant::now();
493    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
494    let ext_filter = args.get("extension").and_then(|v| v.as_str());
495
496    let base = safe_path(base_str)?;
497
498    let mut files: Vec<PathBuf> = Vec::new();
499    let mut scanned_count = 0;
500    for entry in WalkDir::new(&base).follow_links(false) {
501        scanned_count += 1;
502        if scanned_count > 25_000 {
503            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
504        }
505        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
506        if !entry.file_type().is_file() {
507            continue;
508        }
509        let p = entry.path();
510
511        // Skip hidden dirs / target / node_modules
512        if path_has_hidden_segment(p) {
513            continue;
514        }
515
516        if let Some(ext) = ext_filter {
517            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
518                continue;
519            }
520        }
521        files.push(p.to_path_buf());
522    }
523
524    // Sort by modification time (newest first).
525    files.sort_by_key(|p| {
526        fs::metadata(p)
527            .and_then(|m| m.modified())
528            .ok()
529            .map(std::cmp::Reverse)
530    });
531
532    let total = files.len();
533    const LIMIT: usize = 200;
534    let truncated = total > LIMIT;
535    let shown: Vec<String> = files
536        .into_iter()
537        .take(LIMIT)
538        .map(|p| p.display().to_string())
539        .collect();
540
541    let ms = started.elapsed().as_millis();
542    let mut out = format!(
543        "{} file(s) in {}  ({ms}ms){}",
544        total.min(LIMIT),
545        base_str,
546        if truncated {
547            "  [truncated at 200]"
548        } else {
549            ""
550        }
551    );
552    out.push('\n');
553    out.push_str(&shown.join("\n"));
554    Ok(out)
555}
556
557// ── grep_files ────────────────────────────────────────────────────────────────
558
559pub async fn grep_files(args: &Value) -> Result<String, String> {
560    let pattern = require_str(args, "pattern")?;
561    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
562    let ext_filter = args.get("extension").and_then(|v| v.as_str());
563    let case_insensitive = args
564        .get("case_insensitive")
565        .and_then(|v| v.as_bool())
566        .unwrap_or(true);
567    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
568    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
569    let offset = get_usize_arg(args, "offset").unwrap_or(0);
570
571    // Context lines: `context` sets both before+after; `before`/`after` override individually.
572    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
573    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
574    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
575
576    let base = safe_path(base_str)?;
577
578    let regex = regex::RegexBuilder::new(pattern)
579        .case_insensitive(case_insensitive)
580        .build()
581        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
582
583    // ── files_only mode ───────────────────────────────────────────────────────
584    if files_only {
585        let mut matched_files: Vec<String> = Vec::new();
586        let mut scanned_count = 0;
587
588        for entry in WalkDir::new(&base).follow_links(false) {
589            scanned_count += 1;
590            if scanned_count > 25_000 {
591                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
592            }
593            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
594            if !entry.file_type().is_file() {
595                continue;
596            }
597            let p = entry.path();
598            if path_has_hidden_segment(p) {
599                continue;
600            }
601            if let Some(ext) = ext_filter {
602                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
603                    continue;
604                }
605            }
606            let Ok(contents) = fs::read_to_string(p) else {
607                continue;
608            };
609            if contents.lines().any(|line| regex.is_match(line)) {
610                matched_files.push(p.display().to_string());
611            }
612        }
613
614        if matched_files.is_empty() {
615            return Ok(format!("No files matching '{pattern}' in {base_str}"));
616        }
617
618        let total = matched_files.len();
619        let page: Vec<_> = matched_files
620            .into_iter()
621            .skip(offset)
622            .take(head_limit)
623            .collect();
624        let showing = page.len();
625        let mut out = format!("{total} file(s) match '{pattern}'");
626        if offset > 0 || showing < total {
627            out.push_str(&format!(
628                " [showing {}-{} of {total}]",
629                offset + 1,
630                offset + showing
631            ));
632        }
633        out.push('\n');
634        out.push_str(&page.join("\n"));
635        return Ok(out);
636    }
637
638    // ── content mode with optional context lines ──────────────────────────────
639
640    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
641    struct Hunk {
642        path: String,
643        /// (line_number_1_indexed, line_text, is_match)
644        lines: Vec<(usize, String, bool)>,
645    }
646
647    let mut hunks: Vec<Hunk> = Vec::new();
648    let mut total_matches = 0usize;
649    let mut files_matched = 0usize;
650    let mut scanned_count = 0;
651
652    for entry in WalkDir::new(&base).follow_links(false) {
653        scanned_count += 1;
654        if scanned_count > 25_000 {
655            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
656        }
657        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
658        if !entry.file_type().is_file() {
659            continue;
660        }
661        let p = entry.path();
662        if path_has_hidden_segment(p) {
663            continue;
664        }
665        if let Some(ext) = ext_filter {
666            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
667                continue;
668            }
669        }
670        let Ok(contents) = fs::read_to_string(p) else {
671            continue;
672        };
673        let all_lines: Vec<&str> = contents.lines().collect();
674        let n = all_lines.len();
675
676        // Find all match indices in this file.
677        let match_idxs: Vec<usize> = all_lines
678            .iter()
679            .enumerate()
680            .filter(|(_, line)| regex.is_match(line))
681            .map(|(i, _)| i)
682            .collect();
683
684        if match_idxs.is_empty() {
685            continue;
686        }
687        files_matched += 1;
688        total_matches += match_idxs.len();
689
690        // Merge overlapping ranges into hunks.
691        let path_str = p.display().to_string();
692        let mut ranges: Vec<(usize, usize)> = match_idxs
693            .iter()
694            .map(|&i| {
695                (
696                    i.saturating_sub(before),
697                    (i + after).min(n.saturating_sub(1)),
698                )
699            })
700            .collect();
701
702        // Sort and merge overlapping ranges.
703        ranges.sort_unstable();
704        let mut merged: Vec<(usize, usize)> = Vec::new();
705        for (s, e) in ranges {
706            if let Some(last) = merged.last_mut() {
707                if s <= last.1 + 1 {
708                    last.1 = last.1.max(e);
709                    continue;
710                }
711            }
712            merged.push((s, e));
713        }
714
715        // Build hunks from merged ranges.
716        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
717        for (start, end) in merged {
718            let mut hunk_lines = Vec::new();
719            for i in start..=end {
720                hunk_lines.push((i + 1, all_lines[i].to_string(), match_set.contains(&i)));
721            }
722            hunks.push(Hunk {
723                path: path_str.clone(),
724                lines: hunk_lines,
725            });
726        }
727    }
728
729    if hunks.is_empty() {
730        return Ok(format!("No matches for '{pattern}' in {base_str}"));
731    }
732
733    let total_hunks = hunks.len();
734    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
735    let showing = page_hunks.len();
736
737    let mut out =
738        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
739    if offset > 0 || showing < total_hunks {
740        out.push_str(&format!(
741            " [hunks {}-{} of {total_hunks}]",
742            offset + 1,
743            offset + showing
744        ));
745    }
746    out.push('\n');
747
748    for (i, hunk) in page_hunks.iter().enumerate() {
749        if i > 0 {
750            out.push_str("\n--\n");
751        }
752        for (lineno, text, is_match) in &hunk.lines {
753            if *is_match {
754                out.push_str(&format!("{}:{}:{}\n", hunk.path, lineno, text));
755            } else {
756                out.push_str(&format!("{}: {}-{}\n", hunk.path, lineno, text));
757            }
758        }
759    }
760
761    Ok(out.trim_end().to_string())
762}
763
764// ── Argument helpers ──────────────────────────────────────────────────────────
765
766fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
767    args.get(key)
768        .and_then(|v| v.as_str())
769        .ok_or_else(|| format!("Missing required argument: '{key}'"))
770}
771
772fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
773    args.get(key).and_then(value_as_usize)
774}
775
776fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
777    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
778}
779
780fn value_as_usize(value: &Value) -> Option<usize> {
781    if let Some(v) = value.as_u64() {
782        return usize::try_from(v).ok();
783    }
784
785    if let Some(v) = value.as_i64() {
786        return if v >= 0 {
787            usize::try_from(v as u64).ok()
788        } else {
789            None
790        };
791    }
792
793    if let Some(v) = value.as_f64() {
794        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
795            return Some(v as usize);
796        }
797        return None;
798    }
799
800    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
801}
802
803// ── Path helpers ──────────────────────────────────────────────────────────────
804
805/// Resolve a path that must already exist, and check it's inside the workspace.
806fn safe_path(path: &str) -> Result<PathBuf, String> {
807    let candidate = resolve_candidate(path);
808    canonicalize_safe(&candidate, path)
809}
810
811/// Resolve a path that may not exist yet (for write_file).
812fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
813    let candidate = resolve_candidate(path);
814
815    // Try canonical first.
816    if let Ok(abs) = candidate.canonicalize() {
817        check_workspace_bounds(&abs, path)?;
818        return Ok(abs);
819    }
820
821    // File doesn't exist yet — canonicalize the parent, append the filename.
822    let parent = candidate.parent().unwrap_or(Path::new("."));
823    let name = candidate
824        .file_name()
825        .ok_or_else(|| format!("invalid path: {path}"))?;
826    let abs_parent = parent
827        .canonicalize()
828        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
829    let abs = abs_parent.join(name);
830    check_workspace_bounds(&abs, path)?;
831    Ok(abs)
832}
833
834fn resolve_candidate(path: &str) -> PathBuf {
835    let p = Path::new(path);
836    if p.is_absolute() {
837        p.to_path_buf()
838    } else {
839        std::env::current_dir()
840            .unwrap_or_else(|_| PathBuf::from("."))
841            .join(p)
842    }
843}
844
845fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
846    let abs = candidate
847        .canonicalize()
848        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
849    check_workspace_bounds(&abs, original)?;
850    Ok(abs)
851}
852
853fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
854    // Delegate to the existing guard for blacklist + traversal checks.
855    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
856    super::guard::path_is_safe(&workspace, abs)
857        .map(|_| ())
858        .map_err(|e| format!("file access denied for '{original}': {e}"))
859}
860
861/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
862fn path_has_hidden_segment(p: &Path) -> bool {
863    p.components().any(|c| {
864        let s = c.as_os_str().to_string_lossy();
865        s.starts_with('.') && s != "." && s != ".."
866            || s == "target"
867            || s == "node_modules"
868            || s == "__pycache__"
869    })
870}
871
872/// Show the lines nearest to where the search string *almost* matched,
873/// so the model can see the real indentation/content and self-correct.
874fn nearest_lines(content: &str, search: &str) -> String {
875    // Try to find the best-matching line by the first non-empty search line.
876    let first_search_line = search
877        .lines()
878        .map(|l| l.trim())
879        .find(|l| !l.is_empty())
880        .unwrap_or("");
881
882    let lines: Vec<&str> = content.lines().collect();
883    if lines.is_empty() {
884        return "(file is empty)".into();
885    }
886
887    // Find the line in the file that contains the most chars from the search line.
888    let best_idx = if first_search_line.is_empty() {
889        0
890    } else {
891        lines
892            .iter()
893            .enumerate()
894            .max_by_key(|(_, l)| {
895                let lt = l.trim();
896                // Score: length of longest common prefix after trimming.
897                first_search_line
898                    .chars()
899                    .zip(lt.chars())
900                    .take_while(|(a, b)| a == b)
901                    .count()
902            })
903            .map(|(i, _)| i)
904            .unwrap_or(0)
905    };
906
907    let start = best_idx.saturating_sub(3);
908    let end = (best_idx + 5).min(lines.len());
909    let snippet = lines[start..end]
910        .iter()
911        .enumerate()
912        .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
913        .collect::<Vec<_>>()
914        .join("\n");
915
916    format!(
917        "Nearest matching lines ({}:{}):\n{}",
918        best_idx + 1,
919        end,
920        snippet
921    )
922}
923
924/// Fuzzy match: normalise both sides (trim trailing whitespace per line,
925/// unify CRLF→LF) and return the byte range of the real match in `content`.
926///
927/// Only considers indentation-style differences — it does NOT tolerate
928/// changed content, only changed surrounding whitespace.
929fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
930    // Normalise a string: CRLF→LF, trim both leading and trailing whitespace on each line.
931    fn normalise(s: &str) -> String {
932        s.lines().map(|l| l.trim()).collect::<Vec<_>>().join("\n")
933    }
934
935    let norm_content = normalise(content);
936    let norm_search = normalise(search)
937        .trim_start_matches('\n')
938        .trim_end_matches('\n')
939        .to_string();
940
941    if norm_search.is_empty() {
942        return None;
943    }
944
945    // Find where the normalised search appears in the normalised content.
946    let norm_pos = norm_content.find(&norm_search)?;
947
948    // Map the byte position back into the original (non-normalised) content.
949    // We do this by counting newlines up to norm_pos and replaying through original.
950    let lines_before = norm_content[..norm_pos]
951        .as_bytes()
952        .iter()
953        .filter(|&&b| b == b'\n')
954        .count();
955    let search_lines = norm_search
956        .as_bytes()
957        .iter()
958        .filter(|&&b| b == b'\n')
959        .count()
960        + 1;
961
962    let orig_lines: Vec<&str> = content.lines().collect();
963
964    // Byte start of the first line in original.
965    let mut current_pos = 0;
966    for i in 0..lines_before {
967        if i < orig_lines.len() {
968            current_pos += orig_lines[i].len() + 1; // +1 for newline
969        }
970    }
971    let byte_start = current_pos;
972
973    // Byte end: sum of original line lengths for the matched span.
974    let mut byte_len = 0;
975    for i in 0..search_lines {
976        let idx = lines_before + i;
977        if idx < orig_lines.len() {
978            byte_len += orig_lines[idx].len();
979            if i < search_lines - 1 {
980                byte_len += 1; // newline
981            }
982        }
983    }
984
985    // Validate: normalised forms must actually match (guards against false positives).
986    if byte_start + byte_len > content.len() {
987        return None;
988    }
989
990    let candidate = &content[byte_start..byte_start + byte_len];
991    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
992        Some(byte_start..byte_start + byte_len)
993    } else {
994        None
995    }
996}
997
998// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
999
1000/// Return a formatted diff string for an edit_file operation without applying it.
1001/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1002/// search string cannot be located (caller falls through to normal tool dispatch).
1003pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1004    let path = require_str(args, "path")?;
1005    let search = require_str(args, "search")?;
1006    let replace = require_str(args, "replace")?;
1007
1008    let abs = safe_path(path)?;
1009    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1010    let original = raw.replace("\r\n", "\n");
1011
1012    let effective_search: String = if original.contains(search) {
1013        search.to_string()
1014    } else {
1015        match fuzzy_find_span(&original, search) {
1016            Some(span) => original[span].to_string(),
1017            None => return Err("search string not found — diff preview unavailable".into()),
1018        }
1019    };
1020
1021    let mut diff = String::new();
1022    for line in effective_search.lines() {
1023        diff.push_str(&format!("- {}\n", line));
1024    }
1025    for line in replace.lines() {
1026        diff.push_str(&format!("+ {}\n", line));
1027    }
1028    Ok(diff)
1029}
1030
1031/// Return a formatted diff string for a patch_hunk operation without applying it.
1032pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1033    let path = require_str(args, "path")?;
1034    let start_line = require_usize(args, "start_line")?;
1035    let end_line = require_usize(args, "end_line")?;
1036    let replacement = require_str(args, "replacement")?;
1037
1038    let abs = safe_path(path)?;
1039    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1040    let lines: Vec<&str> = original.lines().collect();
1041    let total = lines.len();
1042
1043    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1044        return Err(format!(
1045            "patch_hunk: invalid line range {}-{} for file with {} lines",
1046            start_line, end_line, total
1047        ));
1048    }
1049
1050    let s_idx = start_line - 1;
1051    let e_idx = end_line;
1052
1053    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1054    for i in s_idx..e_idx {
1055        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
1056    }
1057    for line in replacement.lines() {
1058        diff.push_str(&format!("+ {}\n", line.trim_end()));
1059    }
1060    Ok(diff)
1061}
1062
1063/// Return a formatted diff string for a multi_search_replace operation without applying it.
1064pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1065    let hunks_val = args
1066        .get("hunks")
1067        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1068
1069    #[derive(serde::Deserialize)]
1070    struct PreviewHunk {
1071        search: String,
1072        replace: String,
1073    }
1074    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1075        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1076
1077    let mut diff = String::new();
1078    for (i, hunk) in hunks.iter().enumerate() {
1079        if hunks.len() > 1 {
1080            diff.push_str(&format!("@@ hunk {} @@\n", i + 1));
1081        }
1082        for line in hunk.search.lines() {
1083            diff.push_str(&format!("- {}\n", line.trim_end()));
1084        }
1085        for line in hunk.replace.lines() {
1086            diff.push_str(&format!("+ {}\n", line.trim_end()));
1087        }
1088    }
1089    Ok(diff)
1090}
1091
1092/// Resolve the workspace root by looking upward for common markers.
1093pub fn workspace_root() -> PathBuf {
1094    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1095    loop {
1096        if current.join(".git").exists()
1097            || current.join("Cargo.toml").exists()
1098            || current.join("package.json").exists()
1099        {
1100            return current;
1101        }
1102        if !current.pop() {
1103            break;
1104        }
1105    }
1106    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1107}
1108
1109/// Returns true if the workspace root looks like a real project.
1110/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1111/// count — at least one explicit build/package marker must also be present.
1112pub fn is_project_workspace() -> bool {
1113    let root = workspace_root();
1114    let has_explicit_marker = root.join("Cargo.toml").exists()
1115        || root.join("package.json").exists()
1116        || root.join("pyproject.toml").exists()
1117        || root.join("go.mod").exists()
1118        || root.join("setup.py").exists()
1119        || root.join("pom.xml").exists()
1120        || root.join("build.gradle").exists()
1121        || root.join("CMakeLists.txt").exists();
1122    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1123}
1124
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs