hematite/tools/
file_ops.rs

1use serde_json::Value;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6use walkdir::WalkDir;
7
8// ── Ghost Ledger ──────────────────────────────────────────────────────────────
9
10const MAX_GHOST_BACKUPS: usize = 8;
11
12fn prune_ghost_backups(ghost_dir: &Path) {
13    let Ok(entries) = fs::read_dir(ghost_dir) else {
14        return;
15    };
16
17    let mut backups: Vec<_> = entries
18        .filter_map(Result::ok)
19        .filter(|entry| {
20            entry
21                .path()
22                .extension()
23                .and_then(|ext| ext.to_str())
24                .map(|ext| ext.eq_ignore_ascii_case("bak"))
25                .unwrap_or(false)
26        })
27        .collect();
28
29    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
30    backups.reverse();
31
32    let retained: std::collections::HashSet<String> = backups
33        .iter()
34        .take(MAX_GHOST_BACKUPS)
35        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
36        .collect();
37
38    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
39        let _ = fs::remove_file(entry.path());
40    }
41
42    let ledger_path = ghost_dir.join("ledger.txt");
43    let Ok(content) = fs::read_to_string(&ledger_path) else {
44        return;
45    };
46
47    let filtered_lines: Vec<String> = content
48        .lines()
49        .filter_map(|line| {
50            let parts: Vec<&str> = line.splitn(2, '|').collect();
51            if parts.len() != 2 {
52                return None;
53            }
54
55            let backup_path = parts[1].replace('\\', "/");
56            if retained.contains(&backup_path) {
57                Some(line.to_string())
58            } else {
59                None
60            }
61        })
62        .collect();
63
64    let rewritten = if filtered_lines.is_empty() {
65        String::new()
66    } else {
67        filtered_lines.join("\n") + "\n"
68    };
69    let _ = fs::write(ledger_path, rewritten);
70}
71
72fn save_ghost_backup(target_path: &str, content: &str) {
73    let ws = workspace_root();
74
75    // Phase 1: Try Git Ghost Snapshot
76    if crate::agent::git::is_git_repo(&ws) {
77        let _ = crate::agent::git::create_ghost_snapshot(&ws);
78    }
79
80    // Phase 2: Fallback to local file backup (Ghost Ledger)
81    let ghost_dir = hematite_dir().join("ghost");
82    let _ = fs::create_dir_all(&ghost_dir);
83    let ts = std::time::SystemTime::now()
84        .duration_since(std::time::UNIX_EPOCH)
85        .unwrap()
86        .as_millis();
87    let safe_name = Path::new(target_path)
88        .file_name()
89        .unwrap_or_default()
90        .to_string_lossy();
91    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
92
93    if fs::write(&backup_file, content).is_ok() {
94        use std::io::Write;
95        if let Ok(mut f) = fs::OpenOptions::new()
96            .create(true)
97            .append(true)
98            .open(ghost_dir.join("ledger.txt"))
99        {
100            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
101        }
102        prune_ghost_backups(&ghost_dir);
103    }
104}
105
106pub fn pop_ghost_ledger() -> Result<String, String> {
107    let ghost_dir = hematite_dir().join("ghost");
108    let ledger_path = ghost_dir.join("ledger.txt");
109
110    if !ledger_path.exists() {
111        return Err("Ghost Ledger is empty — no edits to undo".into());
112    }
113
114    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
115    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
116
117    if lines.is_empty() {
118        return Err("Ghost Ledger is empty".into());
119    }
120
121    let last_line = lines.pop().unwrap();
122    let parts: Vec<&str> = last_line.splitn(2, '|').collect();
123    if parts.len() != 2 {
124        return Err("Corrupted ledger entry".into());
125    }
126
127    let target_path = parts[0];
128    let backup_path = parts[1];
129
130    let ws = workspace_root();
131
132    // Priority 1: Try Git Rollback
133    if crate::agent::git::is_git_repo(&ws) {
134        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
135            let _ = fs::remove_file(backup_path);
136            let new_ledger = lines.join("\n");
137            let _ = fs::write(
138                &ledger_path,
139                if new_ledger.is_empty() {
140                    String::new()
141                } else {
142                    new_ledger + "\n"
143                },
144            );
145            return Ok(msg);
146        }
147    }
148
149    // Priority 2: Standard File Rollback
150    let original_content =
151        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
152    let abs_target = ws.join(target_path);
153    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
154
155    let new_ledger = lines.join("\n");
156    let _ = fs::write(
157        &ledger_path,
158        if new_ledger.is_empty() {
159            String::new()
160        } else {
161            new_ledger + "\n"
162        },
163    );
164    let _ = fs::remove_file(backup_path);
165
166    Ok(format!("Restored {} from Ghost Ledger", target_path))
167}
168
169// ── read_file ─────────────────────────────────────────────────────────────────
170
171pub async fn read_file(args: &Value, budget_tokens: usize) -> Result<String, String> {
172    let path = require_str(args, "path")?;
173    let offset = get_usize_arg(args, "offset");
174    let limit = get_usize_arg(args, "limit");
175
176    let abs = safe_path(path)?;
177    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
178
179    let lines: Vec<&str> = raw.lines().collect();
180    let total = lines.len();
181    let start = offset.unwrap_or(0).min(total);
182    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
183
184    let mut content = lines[start..end].join("\n");
185
186    // Phase 5: Calculate predictive character budget based on remaining context.
187    let budget_chars = budget_tokens.saturating_mul(4);
188    let char_limit = if budget_tokens == 0 {
189        100_000
190    } else {
191        budget_chars.min(100_000).max(2000)
192    };
193
194    if content.len() > char_limit {
195        content.truncate(char_limit);
196        content.push_str("\n\n--- [PREDICTIVE TRUNCATION: CONTEXT BUDGET REACHED] ---\n");
197        content.push_str(&format!(
198            "Output truncated at {} chars to prevent context window flooding. ",
199            char_limit
200        ));
201        content
202            .push_str("To see more, use `read_file` with a higher `offset` and a smaller `limit`.");
203    } else if end < total {
204        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
205        content.push_str(&format!("This file has {} more lines below. ", total - end));
206        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
207                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
208    }
209
210    Ok(format!(
211        "[{path}  lines {}-{} of {}]\n{}",
212        start + 1,
213        end,
214        total,
215        content
216    ))
217}
218
219// ── inspect_lines ─────────────────────────────────────────────────────────────
220
221pub async fn inspect_lines(args: &Value) -> Result<String, String> {
222    let path = require_str(args, "path")?;
223    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
224    let end_line = get_usize_arg(args, "end_line");
225
226    let abs = safe_path(path)?;
227    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
228
229    let lines: Vec<&str> = raw.lines().collect();
230    let total_lines = lines.len();
231
232    // Out-of-bounds check with descriptive feedback.
233    if start_line > total_lines && total_lines > 0 {
234        return Err(format!(
235            "Invalid line range: You requested line {}, but the file only has {} lines. Try `read_file` on a smaller range or the whole file.",
236            start_line, total_lines
237        ));
238    }
239
240    let start = start_line.saturating_sub(1).min(total_lines);
241    let end = end_line.unwrap_or(total_lines).min(total_lines);
242
243    if start >= end && total_lines > 0 {
244        return Err(format!(
245            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
246            end_line.unwrap_or(total_lines)
247        ));
248    }
249
250    let mut output = format!(
251        "[inspect_lines: {path} lines {}-{} of {}]\n",
252        start + 1,
253        end,
254        total_lines
255    );
256    for i in start..end {
257        output.push_str(&format!("[{:>4}] | {}\n", i + 1, lines[i]));
258    }
259
260    Ok(output)
261}
262
263// ── tail_file ─────────────────────────────────────────────────────────────────
264
265pub async fn tail_file(args: &Value) -> Result<String, String> {
266    let path = require_str(args, "path")?;
267    let n = args
268        .get("lines")
269        .and_then(|v| v.as_u64())
270        .unwrap_or(50)
271        .min(500) as usize;
272    let grep_pat = args.get("grep").and_then(|v| v.as_str());
273
274    let abs = safe_path(path)?;
275    let raw = fs::read_to_string(&abs).map_err(|e| format!("tail_file: {e} ({path})"))?;
276
277    let all_lines: Vec<&str> = raw.lines().collect();
278    let total = all_lines.len();
279
280    // Apply optional grep filter before slicing — model asks for the last N
281    // matching lines, not the last N lines containing maybe 0 matches.
282    let filtered: Vec<(usize, &str)> = if let Some(pat) = grep_pat {
283        let re = regex::Regex::new(pat)
284            .map_err(|e| format!("tail_file: invalid grep pattern '{pat}': {e}"))?;
285        all_lines
286            .iter()
287            .enumerate()
288            .filter(|(_, l)| re.is_match(l))
289            .map(|(i, l)| (i, *l))
290            .collect()
291    } else {
292        all_lines.iter().enumerate().map(|(i, l)| (i, *l)).collect()
293    };
294
295    let total_filtered = filtered.len();
296    let skip = total_filtered.saturating_sub(n);
297    let window = &filtered[skip..];
298
299    if window.is_empty() {
300        let note = if grep_pat.is_some() {
301            format!(" matching '{}'", grep_pat.unwrap())
302        } else {
303            String::new()
304        };
305        return Ok(format!(
306            "[tail_file: {path} — no lines{note} found (total {total} lines)]"
307        ));
308    }
309
310    let first_abs = window[0].0 + 1;
311    let last_abs = window[window.len() - 1].0 + 1;
312    let mut out = format!(
313        "[tail_file: {path} — lines {first_abs}–{last_abs} of {total} (last {n} of {total_filtered} matched)]\n"
314    );
315    for (abs_idx, line) in window {
316        out.push_str(&format!("[{:>5}] {}\n", abs_idx + 1, line));
317    }
318
319    Ok(out)
320}
321
322// ── write_file ────────────────────────────────────────────────────────────────
323
324pub async fn write_file(args: &Value) -> Result<String, String> {
325    let path = require_str(args, "path")?;
326    let content = require_str(args, "content")?;
327
328    let abs = safe_path_allow_new(path)?;
329    if let Some(parent) = abs.parent() {
330        fs::create_dir_all(parent)
331            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
332    }
333
334    let existed = abs.exists();
335    if existed {
336        if let Ok(orig) = fs::read_to_string(&abs) {
337            save_ghost_backup(path, &orig);
338        }
339    }
340
341    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
342
343    let action = if existed { "Updated" } else { "Created" };
344    Ok(format!("{action} {path}  ({} bytes)", content.len()))
345}
346
347// ── edit_file ─────────────────────────────────────────────────────────────────
348
349pub async fn edit_file(args: &Value) -> Result<String, String> {
350    let path = require_str(args, "path")?;
351    let search = require_str(args, "search")?;
352    let replace = require_str(args, "replace")?;
353    let replace_all = args
354        .get("replace_all")
355        .and_then(|v| v.as_bool())
356        .unwrap_or(false);
357
358    if search == replace {
359        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
360    }
361
362    let abs = safe_path(path)?;
363    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
364    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
365    let original = raw.replace("\r\n", "\n");
366
367    save_ghost_backup(path, &original);
368
369    let search_trimmed = search.trim();
370    let search_non_ws_len = search_trimmed
371        .chars()
372        .filter(|c| !c.is_whitespace())
373        .count();
374    let search_line_count = search_trimmed.lines().count();
375    if search_non_ws_len < 12 && search_line_count <= 1 {
376        return Err(format!(
377            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
378             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
379        ));
380    }
381
382    // ── Exact match first ────────────────────────────────────────────────────
383    let (effective_search, was_repaired) = if original.contains(search) {
384        let exact_match_count = original.matches(search).count();
385        if exact_match_count > 1 && !replace_all {
386            return Err(format!(
387                "edit_file: search string matched {} times in {path}.\n\
388                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
389                exact_match_count
390            ));
391        }
392        (search.to_string(), false)
393    } else {
394        // ── Fuzzy repair: progressive normalisation ───────────────────────
395        // Level 1: rstrip only — preserves indentation, strips trailing spaces.
396        // Level 2: indent-flexible — dedent both sides, preserve relative structure.
397        // Level 3: full strip — last resort before cross-file hint.
398        let span = rstrip_find_span(&original, search)
399            .or_else(|| indent_flexible_find_span(&original, search))
400            .or_else(|| fuzzy_find_span(&original, search));
401        match span {
402            Some(span) => {
403                let real_slice = original[span.clone()].to_string();
404                (real_slice, true)
405            }
406            None => {
407                let hint = nearest_lines(&original, search);
408                let cross_hint = find_search_in_workspace(search, path)
409                    .map(|found| format!("\nNote: search string found in '{found}' — did you mean to edit that file?"))
410                    .unwrap_or_default();
411                return Err(format!(
412                    "edit_file: search string not found in {path}.\n\
413                     The 'search' value must match the file content exactly \
414                     (including whitespace/indentation).\n\
415                     {hint}{cross_hint}"
416                ));
417            }
418        }
419    };
420
421    // When a fuzzy match was used, adjust the replace string's indentation to
422    // match the file's actual indent level (not the model's potentially-wrong indent).
423    let effective_replace = if was_repaired {
424        adjust_replace_indent(search, effective_search.as_str(), replace)
425    } else {
426        replace.to_string()
427    };
428
429    let updated = if replace_all {
430        original.replace(effective_search.as_str(), effective_replace.as_str())
431    } else {
432        original.replacen(effective_search.as_str(), effective_replace.as_str(), 1)
433    };
434
435    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
436
437    let removed = original.lines().count();
438    let added = updated.lines().count();
439    let repair_note = if was_repaired {
440        "  [indent auto-corrected]"
441    } else {
442        ""
443    };
444
445    let mut diff_block = String::new();
446    diff_block.push_str("\n--- DIFF \n");
447    for line in effective_search.lines() {
448        diff_block.push_str(&format!("- {}\n", line));
449    }
450    for line in effective_replace.lines() {
451        diff_block.push_str(&format!("+ {}\n", line));
452    }
453
454    Ok(format!(
455        "Edited {path}  ({} -> {} lines){repair_note}{}",
456        removed, added, diff_block
457    ))
458}
459
460// ── patch_hunk ────────────────────────────────────────────────────────────────
461
462pub async fn patch_hunk(args: &Value) -> Result<String, String> {
463    let path = require_str(args, "path")?;
464    let start_line = require_usize(args, "start_line")?;
465    let end_line = require_usize(args, "end_line")?;
466    let replacement = require_str(args, "replacement")?;
467
468    let abs = safe_path(path)?;
469    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
470
471    save_ghost_backup(path, &original);
472
473    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
474    let total = lines.len();
475
476    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
477        return Err(format!(
478            "patch_hunk: invalid line range {}-{} for file with {} lines",
479            start_line, end_line, total
480        ));
481    }
482
483    let mut updated_lines = Vec::new();
484    // 0-indexed adjustment
485    let s_idx = start_line - 1;
486    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
487
488    // 1. Lines before the hunk
489    updated_lines.extend_from_slice(&lines[0..s_idx]);
490
491    // 2. The hunk replacement
492    for line in replacement.lines() {
493        updated_lines.push(line.to_string());
494    }
495
496    // 3. Lines after the hunk
497    if e_idx < total {
498        updated_lines.extend_from_slice(&lines[e_idx..total]);
499    }
500
501    let updated_content = updated_lines.join("\n");
502    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
503
504    let mut diff = String::new();
505    diff.push_str("\n--- HUNK DIFF ---\n");
506    for i in s_idx..e_idx {
507        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
508    }
509    for line in replacement.lines() {
510        diff.push_str(&format!("+ {}\n", line.trim_end()));
511    }
512
513    Ok(format!(
514        "Patched {path} lines {}-{} ({} -> {} lines){}",
515        start_line,
516        end_line,
517        (e_idx - s_idx),
518        replacement.lines().count(),
519        diff
520    ))
521}
522
523// ── multi_search_replace ──────────────────────────────────────────────────────
524
525#[derive(serde::Deserialize)]
526struct SearchReplaceHunk {
527    search: String,
528    replace: String,
529}
530
531pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
532    let path = require_str(args, "path")?;
533    let hunks_val = args
534        .get("hunks")
535        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
536
537    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
538        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
539
540    if hunks.is_empty() {
541        return Err("multi_search_replace: hunks array is empty".to_string());
542    }
543
544    let abs = safe_path(path)?;
545    let raw =
546        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
547    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
548    let original = raw.replace("\r\n", "\n");
549
550    save_ghost_backup(path, &original);
551
552    let mut current_content = original.clone();
553    let mut diff = String::new();
554    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
555
556    let mut patched_hunks = 0;
557
558    for (i, hunk) in hunks.iter().enumerate() {
559        let match_count = current_content.matches(&hunk.search).count();
560
561        let (effective_search, effective_replace) = if match_count == 1 {
562            // Exact match — use as-is.
563            (hunk.search.clone(), hunk.replace.clone())
564        } else if match_count == 0 {
565            // Progressive fuzzy fallback: rstrip → indent-flexible → full-strip.
566            let span = rstrip_find_span(&current_content, &hunk.search)
567                .or_else(|| indent_flexible_find_span(&current_content, &hunk.search))
568                .or_else(|| fuzzy_find_span(&current_content, &hunk.search));
569            match span {
570                Some(span) => {
571                    let real_slice = current_content[span].to_string();
572                    let adjusted_replace =
573                        adjust_replace_indent(&hunk.search, &real_slice, &hunk.replace);
574                    (real_slice, adjusted_replace)
575                }
576                None => {
577                    return Err(format!(
578                        "multi_search_replace: hunk {} search string not found in file.",
579                        i
580                    ));
581                }
582            }
583        } else {
584            return Err(format!(
585                "multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.",
586                i, match_count
587            ));
588        };
589
590        diff.push_str(&format!("\n@@ Hunk {} @@\n", i + 1));
591        for line in effective_search.lines() {
592            diff.push_str(&format!("- {}\n", line.trim_end()));
593        }
594        for line in effective_replace.lines() {
595            diff.push_str(&format!("+ {}\n", line.trim_end()));
596        }
597
598        current_content = current_content.replacen(&effective_search, &effective_replace, 1);
599        patched_hunks += 1;
600    }
601
602    fs::write(&abs, &current_content)
603        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
604
605    Ok(format!(
606        "Modified {} hunks in {} using exact search-and-replace.{}",
607        patched_hunks, path, diff
608    ))
609}
610
611// ── list_files ────────────────────────────────────────────────────────────────
612
613pub async fn list_files(args: &Value, budget: usize) -> Result<String, String> {
614    let char_budget = budget * 4; // Approx tokens to chars
615    let started = Instant::now();
616    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
617    let ext_filter = args.get("extension").and_then(|v| v.as_str());
618
619    let base = safe_path(base_str)?;
620
621    let mut files: Vec<PathBuf> = Vec::new();
622    let mut scanned_count = 0;
623    for entry in WalkDir::new(&base).follow_links(false) {
624        scanned_count += 1;
625        if scanned_count > 25_000 {
626            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
627        }
628        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
629        if !entry.file_type().is_file() {
630            continue;
631        }
632        let p = entry.path();
633
634        // Skip hidden dirs / target / node_modules
635        if path_has_hidden_segment(p) {
636            continue;
637        }
638
639        if let Some(ext) = ext_filter {
640            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
641                continue;
642            }
643        }
644        files.push(p.to_path_buf());
645    }
646
647    // Sort by modification time (newest first).
648    files.sort_by_key(|p| {
649        fs::metadata(p)
650            .and_then(|m| m.modified())
651            .ok()
652            .map(std::cmp::Reverse)
653    });
654
655    let mut current_chars = 0;
656    let mut shown = Vec::new();
657    let mut truncated_by_budget = false;
658
659    let total_scanned = files.len();
660    for f in files {
661        let f_str = f.display().to_string();
662        if current_chars + f_str.len() + 1 > char_budget {
663            truncated_by_budget = true;
664            break;
665        }
666        current_chars += f_str.len() + 1;
667        shown.push(f_str);
668        if shown.len() >= 200 {
669            break;
670        }
671    }
672
673    let truncated = total_scanned > shown.len();
674
675    let ms = started.elapsed().as_millis();
676    let mut out = format!(
677        "{} file(s) in {}  ({ms}ms){}",
678        shown.len(),
679        base_str,
680        if truncated {
681            if truncated_by_budget {
682                "  [truncated by token budget]"
683            } else {
684                "  [truncated at 200]"
685            }
686        } else {
687            ""
688        }
689    );
690    out.push('\n');
691    out.push_str(&shown.join("\n"));
692    Ok(out)
693}
694
695// ── create_directory ──────────────────────────────────────────────────────────
696
697pub async fn create_directory(args: &Value) -> Result<String, String> {
698    let path = require_str(args, "path")?;
699    let abs = safe_path_allow_new(path)?;
700
701    if abs.exists() {
702        if abs.is_dir() {
703            return Ok(format!("Directory already exists: {path}"));
704        } else {
705            return Err(format!("A file already exists at this path: {path}"));
706        }
707    }
708
709    fs::create_dir_all(&abs).map_err(|e| format!("create_directory: {e} ({path})"))?;
710    Ok(format!("Created directory: {path}"))
711}
712
713// ── grep_files ────────────────────────────────────────────────────────────────
714
715pub async fn grep_files(args: &Value, budget: usize) -> Result<String, String> {
716    let char_budget = budget * 4;
717    let pattern = require_str(args, "pattern")?;
718    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
719    let ext_filter = args.get("extension").and_then(|v| v.as_str());
720    let case_insensitive = args
721        .get("case_insensitive")
722        .and_then(|v| v.as_bool())
723        .unwrap_or(true);
724    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
725    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
726    let offset = get_usize_arg(args, "offset").unwrap_or(0);
727
728    // Context lines: `context` sets both before+after; `before`/`after` override individually.
729    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
730    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
731    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
732
733    let base = safe_path(base_str)?;
734
735    let regex = regex::RegexBuilder::new(pattern)
736        .case_insensitive(case_insensitive)
737        .build()
738        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
739
740    // ── files_only mode ───────────────────────────────────────────────────────
741    if files_only {
742        let mut matched_files: Vec<String> = Vec::new();
743        let mut scanned_count = 0;
744
745        for entry in WalkDir::new(&base).follow_links(false) {
746            scanned_count += 1;
747            if scanned_count > 25_000 {
748                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
749            }
750            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
751            if !entry.file_type().is_file() {
752                continue;
753            }
754            let p = entry.path();
755            if path_has_hidden_segment(p) {
756                continue;
757            }
758            if let Some(ext) = ext_filter {
759                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
760                    continue;
761                }
762            }
763            let Ok(contents) = fs::read_to_string(p) else {
764                continue;
765            };
766            if contents.lines().any(|line| regex.is_match(line)) {
767                matched_files.push(p.display().to_string());
768            }
769        }
770
771        if matched_files.is_empty() {
772            return Ok(format!("No files matching '{pattern}' in {base_str}"));
773        }
774
775        let total = matched_files.len();
776        let page: Vec<_> = matched_files
777            .into_iter()
778            .skip(offset)
779            .take(head_limit)
780            .collect();
781        let showing = page.len();
782
783        let mut out = format!("{total} file(s) match '{pattern}'");
784        if offset > 0 || showing < total {
785            out.push_str(&format!(
786                " [showing {}-{} of {total}]",
787                offset + 1,
788                offset + showing
789            ));
790        }
791        out.push('\n');
792
793        let mut current_chars = out.len();
794        let mut shown_pages = Vec::new();
795        for p in page {
796            if current_chars + p.len() + 1 > char_budget {
797                out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
798                break;
799            }
800            current_chars += p.len() + 1;
801            shown_pages.push(p);
802        }
803        out.push_str(&shown_pages.join("\n"));
804        return Ok(out);
805    }
806
807    // ── content mode with optional context lines ──────────────────────────────
808
809    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
810    struct Hunk {
811        path: String,
812        /// (line_number_1_indexed, line_text, is_match)
813        lines: Vec<(usize, String, bool)>,
814    }
815
816    let mut hunks: Vec<Hunk> = Vec::new();
817    let mut total_matches = 0usize;
818    let mut files_matched = 0usize;
819    let mut scanned_count = 0;
820
821    for entry in WalkDir::new(&base).follow_links(false) {
822        scanned_count += 1;
823        if scanned_count > 25_000 {
824            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
825        }
826        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
827        if !entry.file_type().is_file() {
828            continue;
829        }
830        let p = entry.path();
831        if path_has_hidden_segment(p) {
832            continue;
833        }
834        if let Some(ext) = ext_filter {
835            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
836                continue;
837            }
838        }
839        let Ok(contents) = fs::read_to_string(p) else {
840            continue;
841        };
842        let all_lines: Vec<&str> = contents.lines().collect();
843        let n = all_lines.len();
844
845        // Find all match indices in this file.
846        let match_idxs: Vec<usize> = all_lines
847            .iter()
848            .enumerate()
849            .filter(|(_, line)| regex.is_match(line))
850            .map(|(i, _)| i)
851            .collect();
852
853        if match_idxs.is_empty() {
854            continue;
855        }
856        files_matched += 1;
857        total_matches += match_idxs.len();
858
859        // Merge overlapping ranges into hunks.
860        let path_str = p.display().to_string();
861        let mut ranges: Vec<(usize, usize)> = match_idxs
862            .iter()
863            .map(|&i| {
864                (
865                    i.saturating_sub(before),
866                    (i + after).min(n.saturating_sub(1)),
867                )
868            })
869            .collect();
870
871        // Sort and merge overlapping ranges.
872        ranges.sort_unstable();
873        let mut merged: Vec<(usize, usize)> = Vec::new();
874        for (s, e) in ranges {
875            if let Some(last) = merged.last_mut() {
876                if s <= last.1 + 1 {
877                    last.1 = last.1.max(e);
878                    continue;
879                }
880            }
881            merged.push((s, e));
882        }
883
884        // Build hunks from merged ranges.
885        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
886        for (start, end) in merged {
887            let mut hunk_lines = Vec::new();
888            for i in start..=end {
889                hunk_lines.push((i + 1, all_lines[i].to_string(), match_set.contains(&i)));
890            }
891            hunks.push(Hunk {
892                path: path_str.clone(),
893                lines: hunk_lines,
894            });
895        }
896    }
897
898    if hunks.is_empty() {
899        return Ok(format!("No matches for '{pattern}' in {base_str}"));
900    }
901
902    let total_hunks = hunks.len();
903    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
904    let showing = page_hunks.len();
905
906    let mut out =
907        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
908    if offset > 0 || showing < total_hunks {
909        out.push_str(&format!(
910            " [hunks {}-{} of {total_hunks}]",
911            offset + 1,
912            offset + showing
913        ));
914    }
915    out.push('\n');
916
917    let mut current_chars = out.len();
918    let mut truncated_by_budget = false;
919
920    for (i, hunk) in page_hunks.iter().enumerate() {
921        let mut hunk_out = String::new();
922        if i > 0 {
923            hunk_out.push_str("\n--\n");
924        }
925        for (lineno, text, is_match) in &hunk.lines {
926            if *is_match {
927                hunk_out.push_str(&format!("{}:{}:{}\n", hunk.path, lineno, text));
928            } else {
929                hunk_out.push_str(&format!("{}: {}-{}\n", hunk.path, lineno, text));
930            }
931        }
932
933        if current_chars + hunk_out.len() > char_budget {
934            truncated_by_budget = true;
935            break;
936        }
937        current_chars += hunk_out.len();
938        out.push_str(&hunk_out);
939    }
940
941    if truncated_by_budget {
942        out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
943    }
944
945    Ok(out.trim_end().to_string())
946}
947
948// ── Argument helpers ──────────────────────────────────────────────────────────
949
950fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
951    args.get(key)
952        .and_then(|v| v.as_str())
953        .ok_or_else(|| format!("Missing required argument: '{key}'"))
954}
955
956fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
957    args.get(key).and_then(value_as_usize)
958}
959
960fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
961    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
962}
963
964fn value_as_usize(value: &Value) -> Option<usize> {
965    if let Some(v) = value.as_u64() {
966        return usize::try_from(v).ok();
967    }
968
969    if let Some(v) = value.as_i64() {
970        return if v >= 0 {
971            usize::try_from(v as u64).ok()
972        } else {
973            None
974        };
975    }
976
977    if let Some(v) = value.as_f64() {
978        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
979            return Some(v as usize);
980        }
981        return None;
982    }
983
984    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
985}
986
987// ── Path helpers ──────────────────────────────────────────────────────────────
988
989/// Resolve a path that must already exist, and check it's inside the workspace.
990fn safe_path(path: &str) -> Result<PathBuf, String> {
991    let candidate = resolve_candidate(path);
992    match canonicalize_safe(&candidate, path) {
993        Ok(abs) => Ok(abs),
994        Err(e) => {
995            if e.contains("The system cannot find the file specified") || e.contains("os error 2") {
996                if let Some(suggestion) = suggest_better_path(path) {
997                    return Err(format!("{e}. Did you mean '{suggestion}'?"));
998                }
999            }
1000            Err(e)
1001        }
1002    }
1003}
1004
1005fn suggest_better_path(original: &str) -> Option<String> {
1006    let path = Path::new(original);
1007    let filename = path.file_name()?.to_str()?.to_lowercase();
1008    let parent = path.parent().unwrap_or_else(|| Path::new("."));
1009
1010    // Use resolve_candidate to handle sovereign tokens like @DESKTOP/
1011    let abs_parent = resolve_candidate(&parent.to_string_lossy())
1012        .canonicalize()
1013        .ok()?;
1014
1015    let mut best_match = None;
1016    let mut best_score = 0;
1017
1018    if let Ok(entries) = fs::read_dir(abs_parent) {
1019        for entry in entries.flatten() {
1020            if let Some(candidate_name) = entry.file_name().to_str() {
1021                let lower_candidate = candidate_name.to_lowercase();
1022                if lower_candidate == filename {
1023                    continue;
1024                }
1025
1026                let mut score = 0;
1027                if lower_candidate.starts_with(&filename) || filename.starts_with(&lower_candidate)
1028                {
1029                    score += 10;
1030                }
1031                // Catch style.css vs styles.css
1032                if (filename.ends_with('s') && filename[..filename.len() - 1] == lower_candidate)
1033                    || (lower_candidate.ends_with('s')
1034                        && lower_candidate[..lower_candidate.len() - 1] == filename)
1035                {
1036                    score += 20;
1037                }
1038
1039                if score > best_score {
1040                    best_score = score;
1041                    best_match = Some(candidate_name.to_string());
1042                }
1043            }
1044        }
1045    }
1046
1047    if best_score >= 10 {
1048        best_match
1049    } else {
1050        None
1051    }
1052}
1053
1054/// Resolve a path that may not exist yet (for write_file).
1055fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
1056    let candidate = resolve_candidate(path);
1057
1058    // Try canonical first.
1059    if let Ok(abs) = candidate.canonicalize() {
1060        check_workspace_bounds(&abs, path)?;
1061        return Ok(abs);
1062    }
1063
1064    // File doesn't exist yet — canonicalize the parent, append the filename.
1065    let parent = candidate.parent().unwrap_or(Path::new("."));
1066    let name = candidate
1067        .file_name()
1068        .ok_or_else(|| format!("invalid path: {path}"))?;
1069    let abs_parent = parent
1070        .canonicalize()
1071        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
1072    let abs = abs_parent.join(name);
1073    check_workspace_bounds(&abs, path)?;
1074    Ok(abs)
1075}
1076
1077pub(crate) fn resolve_candidate(path: &str) -> PathBuf {
1078    // 1. Handle Special Sovereign Tokens
1079    let upper = path.to_uppercase();
1080
1081    // Bare token support — matches exact names with or without @ prefix, with or without
1082    // trailing slash. Enables /cd downloads, /cd @DESKTOP, /cd ~ etc.
1083    let bare = upper.trim_end_matches('/').trim_start_matches('@');
1084    let bare_resolved = match bare {
1085        "DESKTOP" => dirs::desktop_dir(),
1086        "DOWNLOADS" | "DOWNLOAD" => dirs::download_dir(),
1087        "DOCUMENTS" | "DOCS" => dirs::document_dir(),
1088        "PICTURES" | "IMAGES" => dirs::picture_dir(),
1089        "VIDEOS" | "MOVIES" => dirs::video_dir(),
1090        "MUSIC" | "AUDIO" => dirs::audio_dir(),
1091        "HOME" => dirs::home_dir(),
1092        "TEMP" | "TMP" => Some(std::env::temp_dir()),
1093        "CACHE" => dirs::cache_dir(),
1094        "CONFIG" => dirs::config_dir(),
1095        "DATA" => dirs::data_dir(),
1096        _ => None,
1097    };
1098    // Also handle bare ~ and ~/ as home
1099    let bare_resolved = bare_resolved.or_else(|| {
1100        if path == "~" || path == "~/" {
1101            dirs::home_dir()
1102        } else {
1103            None
1104        }
1105    });
1106    if let Some(p) = bare_resolved {
1107        return p;
1108    }
1109
1110    // Helper to resolve via dirs crate
1111    let resolved = if upper.starts_with("@DESKTOP/") {
1112        dirs::desktop_dir().map(|p| p.join(&path[9..]))
1113    } else if upper.starts_with("@DOCUMENTS/") {
1114        dirs::document_dir().map(|p| p.join(&path[11..]))
1115    } else if upper.starts_with("@DOWNLOADS/") {
1116        dirs::download_dir().map(|p| p.join(&path[11..]))
1117    } else if upper.starts_with("@PICTURES/") || upper.starts_with("@IMAGES/") {
1118        let offset = if upper.starts_with("@PICTURES/") {
1119            10
1120        } else {
1121            8
1122        };
1123        dirs::picture_dir().map(|p| p.join(&path[offset..]))
1124    } else if upper.starts_with("@VIDEOS/") || upper.starts_with("@MOVIES/") {
1125        let offset = if upper.starts_with("@VIDEOS/") { 8 } else { 8 };
1126        dirs::video_dir().map(|p| p.join(&path[offset..]))
1127    } else if upper.starts_with("@MUSIC/") || upper.starts_with("@AUDIO/") {
1128        let offset = if upper.starts_with("@MUSIC/") { 7 } else { 7 };
1129        dirs::audio_dir().map(|p| p.join(&path[offset..]))
1130    } else if upper.starts_with("@HOME/") || upper.starts_with("~/") {
1131        let offset = if upper.starts_with("@HOME/") { 6 } else { 2 };
1132        dirs::home_dir().map(|p| p.join(&path[offset..]))
1133    } else if upper.starts_with("@TEMP/") {
1134        Some(std::env::temp_dir().join(&path[6..]))
1135    } else if upper.starts_with("@CACHE/") {
1136        dirs::cache_dir().map(|p| p.join(&path[7..]))
1137    } else if upper.starts_with("@CONFIG/") {
1138        dirs::config_dir().map(|p| p.join(&path[8..]))
1139    } else if upper.starts_with("@DATA/") {
1140        dirs::data_dir().map(|p| p.join(&path[6..]))
1141    } else {
1142        None
1143    };
1144
1145    if let Some(p) = resolved {
1146        return p;
1147    }
1148
1149    // 2. Fallback to Standard Resolution
1150    let p = Path::new(path);
1151    if p.is_absolute() {
1152        p.to_path_buf()
1153    } else {
1154        std::env::current_dir()
1155            .unwrap_or_else(|_| PathBuf::from("."))
1156            .join(p)
1157    }
1158}
1159
1160fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
1161    let abs = candidate
1162        .canonicalize()
1163        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
1164    check_workspace_bounds(&abs, original)?;
1165    Ok(abs)
1166}
1167
1168fn is_allowed_plan_sidecar(workspace: &Path, abs: &Path) -> bool {
1169    let normalized = abs
1170        .to_string_lossy()
1171        .trim_start_matches(r"\\?\")
1172        .to_lowercase()
1173        .replace('\\', "/");
1174    let workspace_norm = workspace
1175        .to_string_lossy()
1176        .trim_start_matches(r"\\?\")
1177        .to_lowercase()
1178        .replace('\\', "/");
1179
1180    if !normalized.starts_with(&workspace_norm) {
1181        return false;
1182    }
1183
1184    normalized.ends_with("/.hematite/task.md")
1185        || normalized.ends_with("/.hematite/plan.md")
1186        || normalized.ends_with("/.hematite/walkthrough.md")
1187}
1188
1189fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
1190    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
1191    if is_allowed_plan_sidecar(&workspace, abs) {
1192        return Ok(());
1193    }
1194
1195    // Delegate to the existing guard for blacklist + traversal checks.
1196    super::guard::path_is_safe(&workspace, abs)
1197        .map(|_| ())
1198        .map_err(|e| format!("file access denied for '{original}': {e}"))
1199}
1200
1201/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
1202fn path_has_hidden_segment(p: &Path) -> bool {
1203    p.components().any(|c| {
1204        let s = c.as_os_str().to_string_lossy();
1205        if s == ".hematite" || s == ".git" || s == "." || s == ".." {
1206            return false;
1207        }
1208        s.starts_with('.') || s == "target" || s == "node_modules" || s == "__pycache__"
1209    })
1210}
1211
1212/// Show the lines nearest to where the search string *almost* matched,
1213/// so the model can see the real indentation/content and self-correct.
1214fn nearest_lines(content: &str, search: &str) -> String {
1215    // Try to find the best-matching line by the first non-empty search line.
1216    let first_search_line = search
1217        .lines()
1218        .map(|l| l.trim())
1219        .find(|l| !l.is_empty())
1220        .unwrap_or("");
1221
1222    let lines: Vec<&str> = content.lines().collect();
1223    if lines.is_empty() {
1224        return "(file is empty)".into();
1225    }
1226
1227    // Find the line in the file that contains the most chars from the search line.
1228    let best_idx = if first_search_line.is_empty() {
1229        0
1230    } else {
1231        lines
1232            .iter()
1233            .enumerate()
1234            .max_by_key(|(_, l)| {
1235                let lt = l.trim();
1236                // Score: length of longest common prefix after trimming.
1237                first_search_line
1238                    .chars()
1239                    .zip(lt.chars())
1240                    .take_while(|(a, b)| a == b)
1241                    .count()
1242            })
1243            .map(|(i, _)| i)
1244            .unwrap_or(0)
1245    };
1246
1247    let start = best_idx.saturating_sub(3);
1248    let end = (best_idx + 5).min(lines.len());
1249    let snippet = lines[start..end]
1250        .iter()
1251        .enumerate()
1252        .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
1253        .collect::<Vec<_>>()
1254        .join("\n");
1255
1256    format!(
1257        "Nearest matching lines ({}:{}):\n{}",
1258        best_idx + 1,
1259        end,
1260        snippet
1261    )
1262}
1263
1264/// Core span-mapping logic shared by both fuzzy match levels.
1265/// Given a normalisation function, finds `search` inside `content` after
1266/// applying that function to both, then maps the result back to a byte
1267/// range in the original (un-normalised) `content`.
1268fn find_span_normalised(
1269    content: &str,
1270    search: &str,
1271    normalise: impl Fn(&str) -> String,
1272) -> Option<std::ops::Range<usize>> {
1273    let norm_content = normalise(content);
1274    let norm_search = normalise(search)
1275        .trim_start_matches('\n')
1276        .trim_end_matches('\n')
1277        .to_string();
1278
1279    if norm_search.is_empty() {
1280        return None;
1281    }
1282
1283    let norm_pos = norm_content.find(&norm_search)?;
1284
1285    let lines_before = norm_content[..norm_pos]
1286        .as_bytes()
1287        .iter()
1288        .filter(|&&b| b == b'\n')
1289        .count();
1290    let search_lines = norm_search
1291        .as_bytes()
1292        .iter()
1293        .filter(|&&b| b == b'\n')
1294        .count()
1295        + 1;
1296
1297    let orig_lines: Vec<&str> = content.lines().collect();
1298
1299    let mut current_pos = 0;
1300    for i in 0..lines_before {
1301        if i < orig_lines.len() {
1302            current_pos += orig_lines[i].len() + 1;
1303        }
1304    }
1305    let byte_start = current_pos;
1306
1307    let mut byte_len = 0;
1308    for i in 0..search_lines {
1309        let idx = lines_before + i;
1310        if idx < orig_lines.len() {
1311            byte_len += orig_lines[idx].len();
1312            if i < search_lines - 1 {
1313                byte_len += 1;
1314            }
1315        }
1316    }
1317
1318    if byte_start + byte_len > content.len() {
1319        return None;
1320    }
1321
1322    let candidate = &content[byte_start..byte_start + byte_len];
1323    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
1324        Some(byte_start..byte_start + byte_len)
1325    } else {
1326        None
1327    }
1328}
1329
1330/// Level 1 fuzzy: rstrip only — removes trailing whitespace per line but
1331/// preserves leading indentation. Catches trailing-space mismatches where
1332/// the model's indentation is actually correct.
1333fn rstrip_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1334    find_span_normalised(content, search, |s| {
1335        s.lines()
1336            .map(|l| l.trim_end())
1337            .collect::<Vec<_>>()
1338            .join("\n")
1339    })
1340}
1341
1342/// Level 2 fuzzy: indent-flexible — strips the minimum common leading whitespace
1343/// (dedent) from both search and candidate windows before comparing. Preserves
1344/// relative indentation structure so nested code remains distinguishable. Also
1345/// normalises tabs → 4 spaces so tab/space mismatches are tolerated.
1346fn indent_flexible_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1347    let norm_search = dedent(search.trim_matches('\n'));
1348    if norm_search.trim().is_empty() {
1349        return None;
1350    }
1351    let search_line_count = norm_search.lines().count();
1352    let content_lines: Vec<&str> = content.lines().collect();
1353    if content_lines.len() < search_line_count {
1354        return None;
1355    }
1356
1357    // Precompute byte start of each line (content is already LF-normalised).
1358    let mut line_starts: Vec<usize> = Vec::with_capacity(content_lines.len() + 1);
1359    let mut pos = 0usize;
1360    for line in &content_lines {
1361        line_starts.push(pos);
1362        pos += line.len() + 1; // +1 for '\n'
1363    }
1364    line_starts.push(pos);
1365
1366    for start in 0..=(content_lines.len() - search_line_count) {
1367        let window = content_lines[start..start + search_line_count].join("\n");
1368        if dedent(&window) == norm_search {
1369            let byte_start = line_starts[start];
1370            let end_line = start + search_line_count;
1371            let byte_end = if end_line < content_lines.len() {
1372                line_starts[end_line] - 1 // exclude trailing '\n'
1373            } else {
1374                content.len()
1375            };
1376            return Some(byte_start..byte_end);
1377        }
1378    }
1379    None
1380}
1381
1382/// Level 3 fuzzy: full strip — trims all leading and trailing whitespace
1383/// per line. Last resort before the cross-file hint error.
1384fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1385    find_span_normalised(content, search, |s| {
1386        s.lines().map(|l| l.trim()).collect::<Vec<_>>().join("\n")
1387    })
1388}
1389
1390/// Scan source files in the workspace for a search string that failed to
1391/// match in the intended target file. Returns the first file path where
1392/// the string is found (after CRLF normalisation), capped at 100 files.
1393/// Used to generate a "did you mean this file?" hint in edit errors.
1394fn find_search_in_workspace(search: &str, skip_path: &str) -> Option<String> {
1395    let root = workspace_root();
1396    let norm_search = search.replace("\r\n", "\n");
1397    let mut checked = 0usize;
1398
1399    let walker = ignore::WalkBuilder::new(&root)
1400        .hidden(true)
1401        .ignore(true)
1402        .git_ignore(true)
1403        .build();
1404
1405    for entry in walker.flatten() {
1406        if checked >= 100 {
1407            break;
1408        }
1409        let path = entry.path();
1410        if !path.is_file() {
1411            continue;
1412        }
1413        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1414        if !matches!(
1415            ext,
1416            "rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h"
1417        ) {
1418            continue;
1419        }
1420        let rel = path
1421            .strip_prefix(&root)
1422            .unwrap_or(path)
1423            .to_string_lossy()
1424            .replace('\\', "/");
1425        if rel == skip_path {
1426            continue;
1427        }
1428        checked += 1;
1429        if let Ok(content) = std::fs::read_to_string(path) {
1430            let normalised = content.replace("\r\n", "\n");
1431            if normalised.contains(&norm_search) {
1432                return Some(rel);
1433            }
1434        }
1435    }
1436    None
1437}
1438
1439// ── Indent-aware replacement ──────────────────────────────────────────────────
1440
1441/// Strip minimum common leading whitespace from all non-empty lines and
1442/// normalise tabs to 4 spaces. Blank lines are reduced to empty strings.
1443/// Used by indent_flexible_find_span for canonical comparison.
1444fn dedent(s: &str) -> String {
1445    let expanded: Vec<String> = s.lines().map(|l| l.replace('\t', "    ")).collect();
1446    let min_indent = expanded
1447        .iter()
1448        .filter(|l| !l.trim().is_empty())
1449        .map(|l| l.len() - l.trim_start_matches(' ').len())
1450        .min()
1451        .unwrap_or(0);
1452    expanded
1453        .iter()
1454        .map(|l| {
1455            if l.trim().is_empty() {
1456                String::new()
1457            } else {
1458                l.get(min_indent..).unwrap_or(l).trim_end().to_string()
1459            }
1460        })
1461        .collect::<Vec<_>>()
1462        .join("\n")
1463}
1464
1465/// When the model's search string has different indentation than the actual file
1466/// content (fuzzy match succeeded), apply the same indentation delta to the
1467/// replace string so the replacement lands with correct indentation.
1468///
1469/// Example: model wrote search/replace with 0-space indent, file uses 8 spaces.
1470/// Delta = +8. Every line of replace gets 8 spaces prepended.
1471fn adjust_replace_indent(search: &str, file_span: &str, replace: &str) -> String {
1472    fn first_indent(s: &str) -> usize {
1473        s.lines()
1474            .find(|l| !l.trim().is_empty())
1475            .map(|l| l.len() - l.trim_start_matches(' ').len())
1476            .unwrap_or(0)
1477    }
1478
1479    let search_indent = first_indent(search);
1480    let file_indent = first_indent(file_span);
1481
1482    if search_indent == file_indent {
1483        return replace.to_string();
1484    }
1485
1486    let delta: i64 = file_indent as i64 - search_indent as i64;
1487    let trailing_newline = replace.ends_with('\n');
1488
1489    let adjusted: Vec<String> = replace
1490        .lines()
1491        .map(|line| {
1492            if line.trim().is_empty() {
1493                // Preserve blank lines as-is
1494                line.to_string()
1495            } else {
1496                let current_indent = line.len() - line.trim_start_matches(' ').len();
1497                let new_indent = (current_indent as i64 + delta).max(0) as usize;
1498                format!("{}{}", " ".repeat(new_indent), line.trim_start_matches(' '))
1499            }
1500        })
1501        .collect();
1502
1503    let mut result = adjusted.join("\n");
1504    if trailing_newline {
1505        result.push('\n');
1506    }
1507    result
1508}
1509
1510// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
1511
1512/// Return a formatted diff string for an edit_file operation without applying it.
1513/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1514/// search string cannot be located (caller falls through to normal tool dispatch).
1515pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1516    let path = require_str(args, "path")?;
1517    let search = require_str(args, "search")?;
1518    let replace = require_str(args, "replace")?;
1519
1520    let abs = safe_path(path)?;
1521    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1522    let original = raw.replace("\r\n", "\n");
1523
1524    let (effective_search, effective_replace): (String, String) = if original.contains(search) {
1525        (search.to_string(), replace.to_string())
1526    } else {
1527        let span = rstrip_find_span(&original, search)
1528            .or_else(|| indent_flexible_find_span(&original, search))
1529            .or_else(|| fuzzy_find_span(&original, search));
1530        match span {
1531            Some(span) => {
1532                let real_slice = original[span].to_string();
1533                let adjusted = adjust_replace_indent(search, &real_slice, replace);
1534                (real_slice, adjusted)
1535            }
1536            None => return Err("search string not found — diff preview unavailable".into()),
1537        }
1538    };
1539
1540    let mut diff = String::new();
1541    for line in effective_search.lines() {
1542        diff.push_str(&format!("- {}\n", line));
1543    }
1544    for line in effective_replace.lines() {
1545        diff.push_str(&format!("+ {}\n", line));
1546    }
1547    Ok(diff)
1548}
1549
1550/// Return a formatted diff string for a patch_hunk operation without applying it.
1551pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1552    let path = require_str(args, "path")?;
1553    let start_line = require_usize(args, "start_line")?;
1554    let end_line = require_usize(args, "end_line")?;
1555    let replacement = require_str(args, "replacement")?;
1556
1557    let abs = safe_path(path)?;
1558    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1559    let lines: Vec<&str> = original.lines().collect();
1560    let total = lines.len();
1561
1562    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1563        return Err(format!(
1564            "patch_hunk: invalid line range {}-{} for file with {} lines",
1565            start_line, end_line, total
1566        ));
1567    }
1568
1569    let s_idx = start_line - 1;
1570    let e_idx = end_line;
1571
1572    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1573    for i in s_idx..e_idx {
1574        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
1575    }
1576    for line in replacement.lines() {
1577        diff.push_str(&format!("+ {}\n", line.trim_end()));
1578    }
1579    Ok(diff)
1580}
1581
1582/// Return a formatted diff string for a multi_search_replace operation without applying it.
1583pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1584    let hunks_val = args
1585        .get("hunks")
1586        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1587
1588    #[derive(serde::Deserialize)]
1589    struct PreviewHunk {
1590        search: String,
1591        replace: String,
1592    }
1593    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1594        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1595
1596    let mut diff = String::new();
1597    for (i, hunk) in hunks.iter().enumerate() {
1598        if hunks.len() > 1 {
1599            diff.push_str(&format!("@@ hunk {} @@\n", i + 1));
1600        }
1601        for line in hunk.search.lines() {
1602            diff.push_str(&format!("- {}\n", line.trim_end()));
1603        }
1604        for line in hunk.replace.lines() {
1605            diff.push_str(&format!("+ {}\n", line.trim_end()));
1606        }
1607    }
1608    Ok(diff)
1609}
1610
1611/// Compute a preview diff for write_file — shows the full new content as additions,
1612/// and any existing file content as removals. New files show only `+` lines.
1613pub fn compute_write_file_diff(args: &Value) -> Result<String, String> {
1614    let path = require_str(args, "path")?;
1615    let new_content = require_str(args, "content")?;
1616
1617    let abs = safe_path(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
1618    let old_content = fs::read_to_string(&abs)
1619        .map(|s| s.replace("\r\n", "\n"))
1620        .unwrap_or_default();
1621
1622    let mut diff = String::new();
1623    if !old_content.is_empty() {
1624        for line in old_content.lines() {
1625            diff.push_str(&format!("- {}\n", line));
1626        }
1627    }
1628    for line in new_content.lines() {
1629        diff.push_str(&format!("+ {}\n", line));
1630    }
1631    if diff.is_empty() {
1632        return Err("empty content — diff preview unavailable".into());
1633    }
1634    Ok(diff)
1635}
1636
1637/// Resolve the workspace root by looking upward for common markers.
1638pub fn workspace_root() -> PathBuf {
1639    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1640    loop {
1641        if current.join(".git").exists()
1642            || current.join("Cargo.toml").exists()
1643            || current.join("package.json").exists()
1644        {
1645            return current;
1646        }
1647        if !current.pop() {
1648            break;
1649        }
1650    }
1651    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1652}
1653
1654/// Returns true if `path` is a known OS shortcut directory (Desktop, Downloads,
1655/// Documents, Pictures, Videos, Music). These directories should not accumulate
1656/// `.hematite/` workspace state — they use the global `~/.hematite/` instead.
1657pub fn is_os_shortcut_directory(path: &Path) -> bool {
1658    let candidates = [
1659        dirs::desktop_dir(),
1660        dirs::download_dir(),
1661        dirs::document_dir(),
1662        dirs::picture_dir(),
1663        dirs::video_dir(),
1664        dirs::audio_dir(),
1665    ];
1666    candidates
1667        .iter()
1668        .filter_map(|d| d.as_deref())
1669        .any(|d| d == path)
1670}
1671
1672/// Returns the directory where Hematite's runtime state (`.hematite/`) should live.
1673///
1674/// - In sovereign OS directories (Desktop, Downloads, Documents, Pictures, Videos,
1675///   Music): returns `~/.hematite/` so no workspace folder is created there.
1676/// - Everywhere else: returns `workspace_root()/.hematite/` as normal.
1677pub fn hematite_dir() -> PathBuf {
1678    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1679    if is_os_shortcut_directory(&cwd) {
1680        if let Some(home) = dirs::home_dir() {
1681            return home.join(".hematite");
1682        }
1683    }
1684    workspace_root().join(".hematite")
1685}
1686
1687/// Returns true if the workspace root looks like a real project.
1688/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1689/// count — at least one explicit build/package marker must also be present.
1690pub fn is_project_workspace() -> bool {
1691    let root = workspace_root();
1692    let has_explicit_marker = root.join("Cargo.toml").exists()
1693        || root.join("package.json").exists()
1694        || root.join("pyproject.toml").exists()
1695        || root.join("go.mod").exists()
1696        || root.join("setup.py").exists()
1697        || root.join("pom.xml").exists()
1698        || root.join("build.gradle").exists()
1699        || root.join("CMakeLists.txt").exists()
1700        || root.join("index.html").exists()
1701        || root.join("style.css").exists()
1702        || root.join("script.js").exists();
1703    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1704}
1705
1706// ── open_in_system_editor ───────────────────────────────────────────────────
1707
1708pub fn open_in_system_editor(path: &std::path::Path) -> Result<(), String> {
1709    if !path.exists() {
1710        return Err(format!("File not found: {}", path.display()));
1711    }
1712
1713    #[cfg(target_os = "windows")]
1714    {
1715        // On Windows, 'start' is the most reliable way to open a file in the default associated app.
1716        // We use cmd /c start so it handles spaces and associations properly.
1717        let status = std::process::Command::new("cmd")
1718            .args(["/c", "start", "", &path.to_string_lossy()])
1719            .status()
1720            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1721
1722        if !status.success() {
1723            return Err("Editor command failed to start.".into());
1724        }
1725    }
1726
1727    #[cfg(target_os = "macos")]
1728    {
1729        let status = std::process::Command::new("open")
1730            .arg(path)
1731            .status()
1732            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1733
1734        if !status.success() {
1735            return Err("open command failed.".into());
1736        }
1737    }
1738
1739    #[cfg(all(unix, not(target_os = "macos")))]
1740    {
1741        // Try xdg-open on Linux
1742        let status = std::process::Command::new("xdg-open")
1743            .arg(path)
1744            .status()
1745            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1746
1747        if !status.success() {
1748            return Err("xdg-open failed.".into());
1749        }
1750    }
1751
1752    Ok(())
1753}
1754
1755#[cfg(test)]
1756mod tests {
1757    use super::*;
1758
1759    #[test]
1760    fn safe_path_allows_plan_sidecars_inside_workspace() {
1761        let temp = tempfile::tempdir().unwrap();
1762        let root = temp.path();
1763        std::fs::create_dir_all(root.join(".hematite")).unwrap();
1764        std::fs::write(root.join(".hematite").join("TASK.md"), "# Task Ledger\n").unwrap();
1765
1766        let previous = std::env::current_dir().unwrap();
1767        std::env::set_current_dir(root).unwrap();
1768        let resolved = safe_path(".hematite/TASK.md").unwrap();
1769        std::env::set_current_dir(previous).unwrap();
1770
1771        assert!(resolved.ends_with(Path::new(".hematite").join("TASK.md")));
1772    }
1773}
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs