hematite/tools/
file_ops.rs

1use std::fmt::Write as _;
2
3use crate::agent::truncation::safe_head;
4use serde_json::Value;
5use std::fs;
6use std::io;
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9use walkdir::WalkDir;
10
11// ── Ghost Ledger ──────────────────────────────────────────────────────────────
12
13const MAX_GHOST_BACKUPS: usize = 8;
14
15fn prune_ghost_backups(ghost_dir: &Path) {
16    let Ok(entries) = fs::read_dir(ghost_dir) else {
17        return;
18    };
19
20    let mut backups: Vec<_> = entries
21        .filter_map(Result::ok)
22        .filter(|entry| {
23            entry
24                .path()
25                .extension()
26                .and_then(|ext| ext.to_str())
27                .map(|ext| ext.eq_ignore_ascii_case("bak"))
28                .unwrap_or(false)
29        })
30        .collect();
31
32    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
33    backups.reverse();
34
35    let retained: std::collections::HashSet<String> = backups
36        .iter()
37        .take(MAX_GHOST_BACKUPS)
38        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
39        .collect();
40
41    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
42        let _ = fs::remove_file(entry.path());
43    }
44
45    let ledger_path = ghost_dir.join("ledger.txt");
46    let Ok(content) = fs::read_to_string(&ledger_path) else {
47        return;
48    };
49
50    let mut rewritten = String::with_capacity(content.len());
51    for line in content.lines() {
52        let mut parts = line.splitn(2, '|');
53        if parts.next().is_some() {
54            if let Some(rest) = parts.next() {
55                let backup_path = rest.replace('\\', "/");
56                if retained.contains(&backup_path) {
57                    rewritten.push_str(line);
58                    rewritten.push('\n');
59                }
60            }
61        }
62    }
63    let _ = fs::write(ledger_path, rewritten);
64}
65
66fn save_ghost_backup(target_path: &str, content: &str) {
67    let ws = workspace_root();
68
69    // Phase 1: Try Git Ghost Snapshot
70    if crate::agent::git::is_git_repo(&ws) {
71        let _ = crate::agent::git::create_ghost_snapshot(&ws);
72    }
73
74    // Phase 2: Fallback to local file backup (Ghost Ledger)
75    let ghost_dir = hematite_dir().join("ghost");
76    let _ = fs::create_dir_all(&ghost_dir);
77    let ts = std::time::SystemTime::now()
78        .duration_since(std::time::UNIX_EPOCH)
79        .unwrap()
80        .as_millis();
81    let safe_name = Path::new(target_path)
82        .file_name()
83        .unwrap_or_default()
84        .to_string_lossy();
85    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
86
87    if fs::write(&backup_file, content).is_ok() {
88        use std::io::Write;
89        if let Ok(mut f) = fs::OpenOptions::new()
90            .create(true)
91            .append(true)
92            .open(ghost_dir.join("ledger.txt"))
93        {
94            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
95        }
96        prune_ghost_backups(&ghost_dir);
97    }
98}
99
100pub fn pop_ghost_ledger() -> Result<String, String> {
101    let ghost_dir = hematite_dir().join("ghost");
102    let ledger_path = ghost_dir.join("ledger.txt");
103
104    if !ledger_path.exists() {
105        return Err("Ghost Ledger is empty — no edits to undo".into());
106    }
107
108    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
109    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
110
111    if lines.is_empty() {
112        return Err("Ghost Ledger is empty".into());
113    }
114
115    let last_line = lines.pop().unwrap();
116    let Some((target_path, backup_path)) = last_line.split_once('|') else {
117        return Err("Corrupted ledger entry".into());
118    };
119
120    let ws = workspace_root();
121
122    // Priority 1: Try Git Rollback
123    if crate::agent::git::is_git_repo(&ws) {
124        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
125            let _ = fs::remove_file(backup_path);
126            let new_ledger = lines.join("\n");
127            let _ = fs::write(
128                &ledger_path,
129                if new_ledger.is_empty() {
130                    String::new()
131                } else {
132                    new_ledger + "\n"
133                },
134            );
135            return Ok(msg);
136        }
137    }
138
139    // Priority 2: Standard File Rollback
140    let original_content =
141        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
142    let abs_target = ws.join(target_path);
143    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
144
145    let new_ledger = lines.join("\n");
146    let _ = fs::write(
147        &ledger_path,
148        if new_ledger.is_empty() {
149            String::new()
150        } else {
151            new_ledger + "\n"
152        },
153    );
154    let _ = fs::remove_file(backup_path);
155
156    Ok(format!("Restored {} from Ghost Ledger", target_path))
157}
158
159// ── read_file ─────────────────────────────────────────────────────────────────
160
161pub async fn read_file(args: &Value, budget_tokens: usize) -> Result<String, String> {
162    let path = require_str(args, "path")?;
163    let offset = get_usize_arg(args, "offset");
164    let limit = get_usize_arg(args, "limit");
165
166    let abs = safe_path(path)?;
167    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
168
169    let lines: Vec<&str> = raw.lines().collect();
170    let total = lines.len();
171    let start = offset.unwrap_or(0).min(total);
172    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
173
174    let mut content = lines[start..end].join("\n");
175
176    // Phase 5: Calculate predictive character budget based on remaining context.
177    let budget_chars = budget_tokens.saturating_mul(4);
178    let char_limit = if budget_tokens == 0 {
179        100_000
180    } else {
181        budget_chars.clamp(2000, 100_000)
182    };
183
184    if content.len() > char_limit {
185        let safe_end = safe_head(&content, char_limit).len();
186        content.truncate(safe_end);
187        content.push_str("\n\n--- [PREDICTIVE TRUNCATION: CONTEXT BUDGET REACHED] ---\n");
188        let _ = write!(
189            content,
190            "Output truncated at {} chars to prevent context window flooding. ",
191            char_limit
192        );
193        content
194            .push_str("To see more, use `read_file` with a higher `offset` and a smaller `limit`.");
195    } else if end < total {
196        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
197        let _ = write!(content, "This file has {} more lines below. ", total - end);
198        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
199                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
200    }
201
202    Ok(format!(
203        "[{path}  lines {}-{} of {}]\n{}",
204        start + 1,
205        end,
206        total,
207        content
208    ))
209}
210
211// ── inspect_lines ─────────────────────────────────────────────────────────────
212
213pub async fn inspect_lines(args: &Value) -> Result<String, String> {
214    let path = require_str(args, "path")?;
215    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
216    let end_line = get_usize_arg(args, "end_line");
217
218    let abs = safe_path(path)?;
219    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
220
221    let lines: Vec<&str> = raw.lines().collect();
222    let total_lines = lines.len();
223
224    // Out-of-bounds check with descriptive feedback.
225    if start_line > total_lines && total_lines > 0 {
226        return Err(format!(
227            "Invalid line range: You requested line {}, but the file only has {} lines. Try `read_file` on a smaller range or the whole file.",
228            start_line, total_lines
229        ));
230    }
231
232    let start = start_line.saturating_sub(1).min(total_lines);
233    let end = end_line.unwrap_or(total_lines).min(total_lines);
234
235    if start >= end && total_lines > 0 {
236        return Err(format!(
237            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
238            end_line.unwrap_or(total_lines)
239        ));
240    }
241
242    let mut output = format!(
243        "[inspect_lines: {path} lines {}-{} of {}]\n",
244        start + 1,
245        end,
246        total_lines
247    );
248    for (offset, line) in lines[start..end].iter().enumerate() {
249        let _ = writeln!(output, "[{:>4}] | {}", start + offset + 1, line);
250    }
251
252    Ok(output)
253}
254
255// ── tail_file ─────────────────────────────────────────────────────────────────
256
257pub async fn tail_file(args: &Value) -> Result<String, String> {
258    let path = require_str(args, "path")?;
259    let n = args
260        .get("lines")
261        .and_then(|v| v.as_u64())
262        .unwrap_or(50)
263        .min(500) as usize;
264    let grep_pat = args.get("grep").and_then(|v| v.as_str());
265
266    let abs = safe_path(path)?;
267    let raw = fs::read_to_string(&abs).map_err(|e| format!("tail_file: {e} ({path})"))?;
268
269    let all_lines: Vec<&str> = raw.lines().collect();
270    let total = all_lines.len();
271
272    // Apply optional grep filter before slicing — model asks for the last N
273    // matching lines, not the last N lines containing maybe 0 matches.
274    let filtered: Vec<(usize, &str)> = if let Some(pat) = grep_pat {
275        let re = regex::Regex::new(pat)
276            .map_err(|e| format!("tail_file: invalid grep pattern '{pat}': {e}"))?;
277        all_lines
278            .iter()
279            .enumerate()
280            .filter(|(_, l)| re.is_match(l))
281            .map(|(i, l)| (i, *l))
282            .collect()
283    } else {
284        all_lines.iter().enumerate().map(|(i, l)| (i, *l)).collect()
285    };
286
287    let total_filtered = filtered.len();
288    let skip = total_filtered.saturating_sub(n);
289    let window = &filtered[skip..];
290
291    if window.is_empty() {
292        let note = if let Some(pat) = grep_pat {
293            format!(" matching '{pat}'")
294        } else {
295            String::new()
296        };
297        return Ok(format!(
298            "[tail_file: {path} — no lines{note} found (total {total} lines)]"
299        ));
300    }
301
302    let first_abs = window[0].0 + 1;
303    let last_abs = window[window.len() - 1].0 + 1;
304    let mut out = format!(
305        "[tail_file: {path} — lines {first_abs}–{last_abs} of {total} (last {n} of {total_filtered} matched)]\n"
306    );
307    for (abs_idx, line) in window {
308        let _ = writeln!(out, "[{:>5}] {}", abs_idx + 1, line);
309    }
310
311    Ok(out)
312}
313
314// ── write_file ────────────────────────────────────────────────────────────────
315
316pub async fn write_file(args: &Value) -> Result<String, String> {
317    let path = require_str(args, "path")?;
318    let content = require_str(args, "content")?;
319
320    let abs = safe_path_allow_new(path)?;
321    if let Some(parent) = abs.parent() {
322        fs::create_dir_all(parent)
323            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
324    }
325
326    let existed = abs.exists();
327    if existed {
328        if let Ok(orig) = fs::read_to_string(&abs) {
329            save_ghost_backup(path, &orig);
330        }
331    }
332
333    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
334
335    let action = if existed { "Updated" } else { "Created" };
336    Ok(format!("{action} {path}  ({} bytes)", content.len()))
337}
338
339// ── edit_file ─────────────────────────────────────────────────────────────────
340
341pub async fn edit_file(args: &Value) -> Result<String, String> {
342    let path = require_str(args, "path")?;
343    let search = require_str(args, "search")?;
344    let replace = require_str(args, "replace")?;
345    let replace_all = args
346        .get("replace_all")
347        .and_then(|v| v.as_bool())
348        .unwrap_or(false);
349
350    if search == replace {
351        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
352    }
353
354    let abs = safe_path(path)?;
355    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
356    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
357    let original = raw.replace("\r\n", "\n");
358
359    save_ghost_backup(path, &original);
360
361    let search_trimmed = search.trim();
362    let search_non_ws_len = search_trimmed
363        .chars()
364        .filter(|c| !c.is_whitespace())
365        .count();
366    let search_line_count = search_trimmed.lines().count();
367    if search_non_ws_len < 12 && search_line_count <= 1 {
368        return Err(format!(
369            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
370             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
371        ));
372    }
373
374    // ── Exact match first ────────────────────────────────────────────────────
375    let (effective_search, was_repaired) = if original.contains(search) {
376        let exact_match_count = original.matches(search).count();
377        if exact_match_count > 1 && !replace_all {
378            return Err(format!(
379                "edit_file: search string matched {} times in {path}.\n\
380                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
381                exact_match_count
382            ));
383        }
384        (search.to_string(), false)
385    } else {
386        // ── Fuzzy repair: progressive normalisation ───────────────────────
387        // Level 1: rstrip only — preserves indentation, strips trailing spaces.
388        // Level 2: indent-flexible — dedent both sides, preserve relative structure.
389        // Level 3: full strip — last resort before cross-file hint.
390        let span = rstrip_find_span(&original, search)
391            .or_else(|| indent_flexible_find_span(&original, search))
392            .or_else(|| fuzzy_find_span(&original, search));
393        match span {
394            Some(span) => {
395                let real_slice = original[span.clone()].to_string();
396                (real_slice, true)
397            }
398            None => {
399                let hint = nearest_lines(&original, search);
400                let cross_hint = find_search_in_workspace(search, path)
401                    .map(|found| format!("\nNote: search string found in '{found}' — did you mean to edit that file?"))
402                    .unwrap_or_default();
403                return Err(format!(
404                    "edit_file: search string not found in {path}.\n\
405                     The 'search' value must match the file content exactly \
406                     (including whitespace/indentation).\n\
407                     {hint}{cross_hint}"
408                ));
409            }
410        }
411    };
412
413    // When a fuzzy match was used, adjust the replace string's indentation to
414    // match the file's actual indent level (not the model's potentially-wrong indent).
415    let effective_replace = if was_repaired {
416        adjust_replace_indent(search, effective_search.as_str(), replace)
417    } else {
418        replace.to_string()
419    };
420
421    let updated = if replace_all {
422        original.replace(effective_search.as_str(), effective_replace.as_str())
423    } else {
424        original.replacen(effective_search.as_str(), effective_replace.as_str(), 1)
425    };
426
427    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
428
429    let removed = original.lines().count();
430    let added = updated.lines().count();
431    let repair_note = if was_repaired {
432        "  [indent auto-corrected]"
433    } else {
434        ""
435    };
436
437    let mut diff_block =
438        String::with_capacity(effective_search.len() + effective_replace.len() + 32);
439    diff_block.push_str("\n--- DIFF \n");
440    for line in effective_search.lines() {
441        let _ = writeln!(diff_block, "- {}", line);
442    }
443    for line in effective_replace.lines() {
444        let _ = writeln!(diff_block, "+ {}", line);
445    }
446
447    Ok(format!(
448        "Edited {path}  ({} -> {} lines){repair_note}{}",
449        removed, added, diff_block
450    ))
451}
452
453// ── patch_hunk ────────────────────────────────────────────────────────────────
454
455pub async fn patch_hunk(args: &Value) -> Result<String, String> {
456    let path = require_str(args, "path")?;
457    let start_line = require_usize(args, "start_line")?;
458    let end_line = require_usize(args, "end_line")?;
459    let replacement = require_str(args, "replacement")?;
460
461    let abs = safe_path(path)?;
462    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
463
464    save_ghost_backup(path, &original);
465
466    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
467    let total = lines.len();
468
469    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
470        return Err(format!(
471            "patch_hunk: invalid line range {}-{} for file with {} lines",
472            start_line, end_line, total
473        ));
474    }
475
476    let mut updated_lines = Vec::with_capacity(total);
477    // 0-indexed adjustment
478    let s_idx = start_line - 1;
479    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
480
481    // 1. Lines before the hunk
482    updated_lines.extend_from_slice(&lines[0..s_idx]);
483
484    // 2. The hunk replacement
485    for line in replacement.lines() {
486        updated_lines.push(line.to_string());
487    }
488
489    // 3. Lines after the hunk
490    if e_idx < total {
491        updated_lines.extend_from_slice(&lines[e_idx..total]);
492    }
493
494    let updated_content = updated_lines.join("\n");
495    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
496
497    let mut diff = String::with_capacity(replacement.len() + (e_idx - s_idx) * 64 + 32);
498    diff.push_str("\n--- HUNK DIFF ---\n");
499    for line in &lines[s_idx..e_idx] {
500        let _ = writeln!(diff, "- {}", line.trim_end());
501    }
502    for line in replacement.lines() {
503        let _ = writeln!(diff, "+ {}", line.trim_end());
504    }
505
506    Ok(format!(
507        "Patched {path} lines {}-{} ({} -> {} lines){}",
508        start_line,
509        end_line,
510        (e_idx - s_idx),
511        replacement.lines().count(),
512        diff
513    ))
514}
515
516// ── multi_search_replace ──────────────────────────────────────────────────────
517
518#[derive(serde::Deserialize)]
519struct SearchReplaceHunk {
520    search: String,
521    replace: String,
522}
523
524pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
525    let path = require_str(args, "path")?;
526    let hunks_val = args
527        .get("hunks")
528        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
529
530    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
531        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
532
533    if hunks.is_empty() {
534        return Err("multi_search_replace: hunks array is empty".to_string());
535    }
536
537    let abs = safe_path(path)?;
538    let raw =
539        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
540    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
541    let original = raw.replace("\r\n", "\n");
542
543    save_ghost_backup(path, &original);
544
545    let mut current_content = original.clone();
546    let mut diff = String::with_capacity(hunks.len() * 128 + 32);
547    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
548
549    let mut patched_hunks = 0;
550
551    for (i, hunk) in hunks.iter().enumerate() {
552        let match_count = current_content.matches(&hunk.search).count();
553
554        let (effective_search, effective_replace) = if match_count == 1 {
555            // Exact match — use as-is.
556            (hunk.search.clone(), hunk.replace.clone())
557        } else if match_count == 0 {
558            // Progressive fuzzy fallback: rstrip → indent-flexible → full-strip.
559            let span = rstrip_find_span(&current_content, &hunk.search)
560                .or_else(|| indent_flexible_find_span(&current_content, &hunk.search))
561                .or_else(|| fuzzy_find_span(&current_content, &hunk.search));
562            match span {
563                Some(span) => {
564                    let real_slice = current_content[span].to_string();
565                    let adjusted_replace =
566                        adjust_replace_indent(&hunk.search, &real_slice, &hunk.replace);
567                    (real_slice, adjusted_replace)
568                }
569                None => {
570                    return Err(format!(
571                        "multi_search_replace: hunk {} search string not found in file.",
572                        i
573                    ));
574                }
575            }
576        } else {
577            return Err(format!(
578                "multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.",
579                i, match_count
580            ));
581        };
582
583        let _ = write!(diff, "\n@@ Hunk {} @@\n", i + 1);
584        for line in effective_search.lines() {
585            let _ = writeln!(diff, "- {}", line.trim_end());
586        }
587        for line in effective_replace.lines() {
588            let _ = writeln!(diff, "+ {}", line.trim_end());
589        }
590
591        current_content = current_content.replacen(&effective_search, &effective_replace, 1);
592        patched_hunks += 1;
593    }
594
595    fs::write(&abs, &current_content)
596        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
597
598    Ok(format!(
599        "Modified {} hunks in {} using exact search-and-replace.{}",
600        patched_hunks, path, diff
601    ))
602}
603
604// ── list_files ────────────────────────────────────────────────────────────────
605
606pub async fn list_files(args: &Value, budget: usize) -> Result<String, String> {
607    let char_budget = budget * 4; // Approx tokens to chars
608    let started = Instant::now();
609    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
610    let ext_filter = args.get("extension").and_then(|v| v.as_str());
611
612    let base = safe_path(base_str)?;
613
614    let mut files: Vec<PathBuf> = Vec::new();
615    let mut scanned_count = 0;
616    for entry in WalkDir::new(&base).follow_links(false) {
617        scanned_count += 1;
618        if scanned_count > 25_000 {
619            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
620        }
621        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
622        if !entry.file_type().is_file() {
623            continue;
624        }
625        let p = entry.path();
626
627        // Skip hidden dirs / target / node_modules
628        if path_has_hidden_segment(p) {
629            continue;
630        }
631
632        if let Some(ext) = ext_filter {
633            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
634                continue;
635            }
636        }
637        files.push(p.to_path_buf());
638    }
639
640    // Sort by modification time (newest first).
641    files.sort_by_key(|p| {
642        fs::metadata(p)
643            .and_then(|m| m.modified())
644            .ok()
645            .map(std::cmp::Reverse)
646    });
647
648    let mut current_chars = 0;
649    let mut shown = Vec::with_capacity(files.len().min(200));
650    let mut truncated_by_budget = false;
651
652    let total_scanned = files.len();
653    for f in files {
654        let f_str = f.display().to_string();
655        if current_chars + f_str.len() + 1 > char_budget {
656            truncated_by_budget = true;
657            break;
658        }
659        current_chars += f_str.len() + 1;
660        shown.push(f_str);
661        if shown.len() >= 200 {
662            break;
663        }
664    }
665
666    let truncated = total_scanned > shown.len();
667
668    let ms = started.elapsed().as_millis();
669    let mut out = format!(
670        "{} file(s) in {}  ({ms}ms){}",
671        shown.len(),
672        base_str,
673        if truncated {
674            if truncated_by_budget {
675                "  [truncated by token budget]"
676            } else {
677                "  [truncated at 200]"
678            }
679        } else {
680            ""
681        }
682    );
683    out.push('\n');
684    out.push_str(&shown.join("\n"));
685    Ok(out)
686}
687
688// ── create_directory ──────────────────────────────────────────────────────────
689
690pub async fn create_directory(args: &Value) -> Result<String, String> {
691    let path = require_str(args, "path")?;
692    let abs = safe_path_allow_new(path)?;
693
694    if abs.exists() {
695        if abs.is_dir() {
696            return Ok(format!("Directory already exists: {path}"));
697        } else {
698            return Err(format!("A file already exists at this path: {path}"));
699        }
700    }
701
702    fs::create_dir_all(&abs).map_err(|e| format!("create_directory: {e} ({path})"))?;
703    Ok(format!("Created directory: {path}"))
704}
705
706// ── grep_files ────────────────────────────────────────────────────────────────
707
708pub async fn grep_files(args: &Value, budget: usize) -> Result<String, String> {
709    let char_budget = budget * 4;
710    let pattern = require_str(args, "pattern")?;
711    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
712    let ext_filter = args.get("extension").and_then(|v| v.as_str());
713    let case_insensitive = args
714        .get("case_insensitive")
715        .and_then(|v| v.as_bool())
716        .unwrap_or(true);
717    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
718    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
719    let offset = get_usize_arg(args, "offset").unwrap_or(0);
720
721    // Context lines: `context` sets both before+after; `before`/`after` override individually.
722    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
723    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
724    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
725
726    let base = safe_path(base_str)?;
727
728    let regex = regex::RegexBuilder::new(pattern)
729        .case_insensitive(case_insensitive)
730        .build()
731        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
732
733    // ── files_only mode ───────────────────────────────────────────────────────
734    if files_only {
735        let mut matched_files: Vec<String> = Vec::new();
736        let mut scanned_count = 0;
737
738        for entry in WalkDir::new(&base).follow_links(false) {
739            scanned_count += 1;
740            if scanned_count > 25_000 {
741                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
742            }
743            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
744            if !entry.file_type().is_file() {
745                continue;
746            }
747            let p = entry.path();
748            if path_has_hidden_segment(p) {
749                continue;
750            }
751            if let Some(ext) = ext_filter {
752                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
753                    continue;
754                }
755            }
756            let Ok(contents) = fs::read_to_string(p) else {
757                continue;
758            };
759            if contents.lines().any(|line| regex.is_match(line)) {
760                matched_files.push(p.display().to_string());
761            }
762        }
763
764        if matched_files.is_empty() {
765            return Ok(format!("No files matching '{pattern}' in {base_str}"));
766        }
767
768        let total = matched_files.len();
769        let page: Vec<_> = matched_files
770            .into_iter()
771            .skip(offset)
772            .take(head_limit)
773            .collect();
774        let showing = page.len();
775
776        let mut out = format!("{total} file(s) match '{pattern}'");
777        if offset > 0 || showing < total {
778            let _ = write!(
779                out,
780                " [showing {}-{} of {total}]",
781                offset + 1,
782                offset + showing
783            );
784        }
785        out.push('\n');
786
787        let mut current_chars = out.len();
788        let mut shown_pages = Vec::with_capacity(page.len());
789        for p in page {
790            if current_chars + p.len() + 1 > char_budget {
791                out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
792                break;
793            }
794            current_chars += p.len() + 1;
795            shown_pages.push(p);
796        }
797        out.push_str(&shown_pages.join("\n"));
798        return Ok(out);
799    }
800
801    // ── content mode with optional context lines ──────────────────────────────
802
803    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
804    struct Hunk {
805        path: String,
806        /// (line_number_1_indexed, line_text, is_match)
807        lines: Vec<(usize, String, bool)>,
808    }
809
810    let mut hunks: Vec<Hunk> = Vec::new();
811    let mut total_matches = 0usize;
812    let mut files_matched = 0usize;
813    let mut scanned_count = 0;
814
815    for entry in WalkDir::new(&base).follow_links(false) {
816        scanned_count += 1;
817        if scanned_count > 25_000 {
818            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
819        }
820        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
821        if !entry.file_type().is_file() {
822            continue;
823        }
824        let p = entry.path();
825        if path_has_hidden_segment(p) {
826            continue;
827        }
828        if let Some(ext) = ext_filter {
829            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
830                continue;
831            }
832        }
833        let Ok(contents) = fs::read_to_string(p) else {
834            continue;
835        };
836        let all_lines: Vec<&str> = contents.lines().collect();
837        let n = all_lines.len();
838
839        // Find all match indices in this file.
840        let match_idxs: Vec<usize> = all_lines
841            .iter()
842            .enumerate()
843            .filter(|(_, line)| regex.is_match(line))
844            .map(|(i, _)| i)
845            .collect();
846
847        if match_idxs.is_empty() {
848            continue;
849        }
850        files_matched += 1;
851        total_matches += match_idxs.len();
852
853        // Merge overlapping ranges into hunks.
854        let path_str = p.display().to_string();
855        let mut ranges: Vec<(usize, usize)> = match_idxs
856            .iter()
857            .map(|&i| {
858                (
859                    i.saturating_sub(before),
860                    (i + after).min(n.saturating_sub(1)),
861                )
862            })
863            .collect();
864
865        // Sort and merge overlapping ranges.
866        ranges.sort_unstable();
867        let mut merged: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
868        for (s, e) in ranges {
869            if let Some(last) = merged.last_mut() {
870                if s <= last.1 + 1 {
871                    last.1 = last.1.max(e);
872                    continue;
873                }
874            }
875            merged.push((s, e));
876        }
877
878        // Build hunks from merged ranges.
879        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
880        for (start, end) in merged {
881            let mut hunk_lines = Vec::with_capacity(end - start + 1);
882            for (offset, line) in all_lines[start..=end].iter().enumerate() {
883                hunk_lines.push((
884                    start + offset + 1,
885                    line.to_string(),
886                    match_set.contains(&(start + offset)),
887                ));
888            }
889            hunks.push(Hunk {
890                path: path_str.clone(),
891                lines: hunk_lines,
892            });
893        }
894    }
895
896    if hunks.is_empty() {
897        return Ok(format!("No matches for '{pattern}' in {base_str}"));
898    }
899
900    let total_hunks = hunks.len();
901    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
902    let showing = page_hunks.len();
903
904    let mut out =
905        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
906    if offset > 0 || showing < total_hunks {
907        let _ = write!(
908            out,
909            " [hunks {}-{} of {total_hunks}]",
910            offset + 1,
911            offset + showing
912        );
913    }
914    out.push('\n');
915
916    let mut current_chars = out.len();
917    let mut truncated_by_budget = false;
918
919    for (i, hunk) in page_hunks.iter().enumerate() {
920        let mut hunk_out = String::with_capacity(hunk.lines.len() * 64 + 8);
921        if i > 0 {
922            hunk_out.push_str("\n--\n");
923        }
924        for (lineno, text, is_match) in &hunk.lines {
925            if *is_match {
926                let _ = writeln!(hunk_out, "{}:{}:{}", hunk.path, lineno, text);
927            } else {
928                let _ = writeln!(hunk_out, "{}: {}-{}", hunk.path, lineno, text);
929            }
930        }
931
932        if current_chars + hunk_out.len() > char_budget {
933            truncated_by_budget = true;
934            break;
935        }
936        current_chars += hunk_out.len();
937        out.push_str(&hunk_out);
938    }
939
940    if truncated_by_budget {
941        out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
942    }
943
944    Ok(out.trim_end().to_string())
945}
946
947// ── Argument helpers ──────────────────────────────────────────────────────────
948
949fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
950    args.get(key)
951        .and_then(|v| v.as_str())
952        .ok_or_else(|| format!("Missing required argument: '{key}'"))
953}
954
955fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
956    args.get(key).and_then(value_as_usize)
957}
958
959fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
960    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
961}
962
963fn value_as_usize(value: &Value) -> Option<usize> {
964    if let Some(v) = value.as_u64() {
965        return usize::try_from(v).ok();
966    }
967
968    if let Some(v) = value.as_i64() {
969        return if v >= 0 {
970            usize::try_from(v as u64).ok()
971        } else {
972            None
973        };
974    }
975
976    if let Some(v) = value.as_f64() {
977        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
978            return Some(v as usize);
979        }
980        return None;
981    }
982
983    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
984}
985
986// ── Path helpers ──────────────────────────────────────────────────────────────
987
988/// Resolve a path that must already exist, and check it's inside the workspace.
989fn safe_path(path: &str) -> Result<PathBuf, String> {
990    let candidate = resolve_candidate(path);
991    match canonicalize_safe(&candidate, path) {
992        Ok(abs) => Ok(abs),
993        Err(e) => {
994            if e.contains("The system cannot find the file specified") || e.contains("os error 2") {
995                if let Some(suggestion) = suggest_better_path(path) {
996                    return Err(format!("{e}. Did you mean '{suggestion}'?"));
997                }
998            }
999            Err(e)
1000        }
1001    }
1002}
1003
1004fn suggest_better_path(original: &str) -> Option<String> {
1005    let path = Path::new(original);
1006    let filename = path.file_name()?.to_str()?.to_lowercase();
1007    let parent = path.parent().unwrap_or_else(|| Path::new("."));
1008
1009    // Use resolve_candidate to handle sovereign tokens like @DESKTOP/
1010    let abs_parent = resolve_candidate(&parent.to_string_lossy())
1011        .canonicalize()
1012        .ok()?;
1013
1014    let mut best_match = None;
1015    let mut best_score = 0;
1016
1017    if let Ok(entries) = fs::read_dir(abs_parent) {
1018        for entry in entries.flatten() {
1019            if let Some(candidate_name) = entry.file_name().to_str() {
1020                let lower_candidate = candidate_name.to_lowercase();
1021                if lower_candidate == filename {
1022                    continue;
1023                }
1024
1025                let mut score = 0;
1026                if lower_candidate.starts_with(&filename) || filename.starts_with(&lower_candidate)
1027                {
1028                    score += 10;
1029                }
1030                // Catch style.css vs styles.css
1031                if (filename.ends_with('s') && filename[..filename.len() - 1] == lower_candidate)
1032                    || (lower_candidate.ends_with('s')
1033                        && lower_candidate[..lower_candidate.len() - 1] == filename)
1034                {
1035                    score += 20;
1036                }
1037
1038                if score > best_score {
1039                    best_score = score;
1040                    best_match = Some(candidate_name.to_string());
1041                }
1042            }
1043        }
1044    }
1045
1046    if best_score >= 10 {
1047        best_match
1048    } else {
1049        None
1050    }
1051}
1052
1053/// Resolve a path that may not exist yet (for write_file).
1054fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
1055    let candidate = resolve_candidate(path);
1056
1057    // Try canonical first.
1058    if let Ok(abs) = candidate.canonicalize() {
1059        check_workspace_bounds(&abs, path)?;
1060        return Ok(abs);
1061    }
1062
1063    // File doesn't exist yet — canonicalize the parent, append the filename.
1064    let parent = candidate.parent().unwrap_or(Path::new("."));
1065    let name = candidate
1066        .file_name()
1067        .ok_or_else(|| format!("invalid path: {path}"))?;
1068    let abs_parent = parent
1069        .canonicalize()
1070        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
1071    let abs = abs_parent.join(name);
1072    check_workspace_bounds(&abs, path)?;
1073    Ok(abs)
1074}
1075
1076pub(crate) fn resolve_candidate(path: &str) -> PathBuf {
1077    // 1. Handle Special Sovereign Tokens
1078    let upper = path.to_uppercase();
1079
1080    // Bare token support — matches exact names with or without @ prefix, with or without
1081    // trailing slash. Enables /cd downloads, /cd @DESKTOP, /cd ~ etc.
1082    let bare = upper.trim_end_matches('/').trim_start_matches('@');
1083    let bare_resolved = match bare {
1084        "DESKTOP" => dirs::desktop_dir(),
1085        "DOWNLOADS" | "DOWNLOAD" => dirs::download_dir(),
1086        "DOCUMENTS" | "DOCS" => dirs::document_dir(),
1087        "PICTURES" | "IMAGES" => dirs::picture_dir(),
1088        "VIDEOS" | "MOVIES" => dirs::video_dir(),
1089        "MUSIC" | "AUDIO" => dirs::audio_dir(),
1090        "HOME" => dirs::home_dir(),
1091        "TEMP" | "TMP" => Some(std::env::temp_dir()),
1092        "CACHE" => dirs::cache_dir(),
1093        "CONFIG" => dirs::config_dir(),
1094        "DATA" => dirs::data_dir(),
1095        _ => None,
1096    };
1097    // Also handle bare ~ and ~/ as home
1098    let bare_resolved = bare_resolved.or_else(|| {
1099        if path == "~" || path == "~/" {
1100            dirs::home_dir()
1101        } else {
1102            None
1103        }
1104    });
1105    if let Some(p) = bare_resolved {
1106        return p;
1107    }
1108
1109    // Helper to resolve via dirs crate
1110    let resolved = if upper.starts_with("@DESKTOP/") {
1111        dirs::desktop_dir().map(|p| p.join(&path[9..]))
1112    } else if upper.starts_with("@DOCUMENTS/") {
1113        dirs::document_dir().map(|p| p.join(&path[11..]))
1114    } else if upper.starts_with("@DOWNLOADS/") {
1115        dirs::download_dir().map(|p| p.join(&path[11..]))
1116    } else if upper.starts_with("@PICTURES/") || upper.starts_with("@IMAGES/") {
1117        let offset = if upper.starts_with("@PICTURES/") {
1118            10
1119        } else {
1120            8
1121        };
1122        dirs::picture_dir().map(|p| p.join(&path[offset..]))
1123    } else if upper.starts_with("@VIDEOS/") || upper.starts_with("@MOVIES/") {
1124        let offset = 8;
1125        dirs::video_dir().map(|p| p.join(&path[offset..]))
1126    } else if upper.starts_with("@MUSIC/") || upper.starts_with("@AUDIO/") {
1127        let offset = 7;
1128        dirs::audio_dir().map(|p| p.join(&path[offset..]))
1129    } else if upper.starts_with("@HOME/") || upper.starts_with("~/") {
1130        let offset = if upper.starts_with("@HOME/") { 6 } else { 2 };
1131        dirs::home_dir().map(|p| p.join(&path[offset..]))
1132    } else if upper.starts_with("@TEMP/") {
1133        Some(std::env::temp_dir().join(&path[6..]))
1134    } else if upper.starts_with("@CACHE/") {
1135        dirs::cache_dir().map(|p| p.join(&path[7..]))
1136    } else if upper.starts_with("@CONFIG/") {
1137        dirs::config_dir().map(|p| p.join(&path[8..]))
1138    } else if upper.starts_with("@DATA/") {
1139        dirs::data_dir().map(|p| p.join(&path[6..]))
1140    } else {
1141        None
1142    };
1143
1144    if let Some(p) = resolved {
1145        return p;
1146    }
1147
1148    // 2. Fallback to Standard Resolution
1149    let p = Path::new(path);
1150    if p.is_absolute() {
1151        p.to_path_buf()
1152    } else {
1153        std::env::current_dir()
1154            .unwrap_or_else(|_| PathBuf::from("."))
1155            .join(p)
1156    }
1157}
1158
1159fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
1160    let abs = candidate
1161        .canonicalize()
1162        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
1163    check_workspace_bounds(&abs, original)?;
1164    Ok(abs)
1165}
1166
1167fn is_allowed_plan_sidecar(workspace: &Path, abs: &Path) -> bool {
1168    // Use Path::starts_with with a canonicalized workspace so the prefix check is
1169    // path-component–aware and works on Windows where Path::canonicalize() prepends
1170    // the \\?\ extended-path prefix: that prefix is its own path component, so
1171    // abs.starts_with(non_canonical_workspace) silently returns false even when both
1172    // paths point to the same directory tree.
1173    let canonical_workspace = workspace
1174        .canonicalize()
1175        .unwrap_or_else(|_| workspace.to_path_buf());
1176
1177    if !abs.starts_with(&canonical_workspace) {
1178        return false;
1179    }
1180
1181    let path_lower = abs.to_string_lossy().to_lowercase().replace('\\', "/");
1182    path_lower.ends_with("/.hematite/task.md")
1183        || path_lower.ends_with("/.hematite/plan.md")
1184        || path_lower.ends_with("/.hematite/walkthrough.md")
1185}
1186
1187fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
1188    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
1189    if is_allowed_plan_sidecar(&workspace, abs) {
1190        return Ok(());
1191    }
1192
1193    // Delegate to the existing guard for blacklist + traversal checks.
1194    super::guard::path_is_safe(&workspace, abs)
1195        .map(|_| ())
1196        .map_err(|e| format!("file access denied for '{original}': {e}"))
1197}
1198
1199/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
1200fn path_has_hidden_segment(p: &Path) -> bool {
1201    p.components().any(|c| {
1202        let s = c.as_os_str().to_string_lossy();
1203        if s == ".hematite" || s == ".git" || s == "." || s == ".." {
1204            return false;
1205        }
1206        s.starts_with('.') || s == "target" || s == "node_modules" || s == "__pycache__"
1207    })
1208}
1209
1210/// Show the lines nearest to where the search string *almost* matched,
1211/// so the model can see the real indentation/content and self-correct.
1212fn nearest_lines(content: &str, search: &str) -> String {
1213    // Try to find the best-matching line by the first non-empty search line.
1214    let first_search_line = search
1215        .lines()
1216        .map(|l| l.trim())
1217        .find(|l| !l.is_empty())
1218        .unwrap_or("");
1219
1220    let lines: Vec<&str> = content.lines().collect();
1221    if lines.is_empty() {
1222        return "(file is empty)".into();
1223    }
1224
1225    // Find the line in the file that contains the most chars from the search line.
1226    let best_idx = if first_search_line.is_empty() {
1227        0
1228    } else {
1229        lines
1230            .iter()
1231            .enumerate()
1232            .max_by_key(|(_, l)| {
1233                let lt = l.trim();
1234                // Score: length of longest common prefix after trimming.
1235                first_search_line
1236                    .chars()
1237                    .zip(lt.chars())
1238                    .take_while(|(a, b)| a == b)
1239                    .count()
1240            })
1241            .map(|(i, _)| i)
1242            .unwrap_or(0)
1243    };
1244
1245    let start = best_idx.saturating_sub(3);
1246    let end = (best_idx + 5).min(lines.len());
1247    let count = end - start;
1248    let mut snippet = String::with_capacity(count * 60);
1249    for (i, l) in lines[start..end].iter().enumerate() {
1250        if i > 0 {
1251            snippet.push('\n');
1252        }
1253        let _ = write!(snippet, "{:>4} | {}", start + i + 1, l);
1254    }
1255
1256    format!(
1257        "Nearest matching lines ({}:{}):\n{}",
1258        best_idx + 1,
1259        end,
1260        snippet
1261    )
1262}
1263
1264/// Core span-mapping logic shared by both fuzzy match levels.
1265/// Given a normalisation function, finds `search` inside `content` after
1266/// applying that function to both, then maps the result back to a byte
1267/// range in the original (un-normalised) `content`.
1268fn find_span_normalised(
1269    content: &str,
1270    search: &str,
1271    normalise: impl Fn(&str) -> String,
1272) -> Option<std::ops::Range<usize>> {
1273    let norm_content = normalise(content);
1274    let norm_search = normalise(search)
1275        .trim_start_matches('\n')
1276        .trim_end_matches('\n')
1277        .to_string();
1278
1279    if norm_search.is_empty() {
1280        return None;
1281    }
1282
1283    let norm_pos = norm_content.find(&norm_search)?;
1284
1285    let lines_before = norm_content.as_bytes()[..norm_pos]
1286        .iter()
1287        .filter(|&&b| b == b'\n')
1288        .count();
1289    let search_lines = norm_search
1290        .as_bytes()
1291        .iter()
1292        .filter(|&&b| b == b'\n')
1293        .count()
1294        + 1;
1295
1296    let orig_lines: Vec<&str> = content.lines().collect();
1297
1298    let mut current_pos = 0;
1299    for i in 0..lines_before {
1300        if i < orig_lines.len() {
1301            current_pos += orig_lines[i].len() + 1;
1302        }
1303    }
1304    let byte_start = current_pos;
1305
1306    let mut byte_len = 0;
1307    for i in 0..search_lines {
1308        let idx = lines_before + i;
1309        if idx < orig_lines.len() {
1310            byte_len += orig_lines[idx].len();
1311            if i < search_lines - 1 {
1312                byte_len += 1;
1313            }
1314        }
1315    }
1316
1317    if byte_start + byte_len > content.len() {
1318        return None;
1319    }
1320
1321    let candidate = &content[byte_start..byte_start + byte_len];
1322    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
1323        Some(byte_start..byte_start + byte_len)
1324    } else {
1325        None
1326    }
1327}
1328
1329/// Level 1 fuzzy: rstrip only — removes trailing whitespace per line but
1330/// preserves leading indentation. Catches trailing-space mismatches where
1331/// the model's indentation is actually correct.
1332fn rstrip_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1333    find_span_normalised(content, search, |s| {
1334        let mut out = String::with_capacity(s.len());
1335        for (i, l) in s.lines().enumerate() {
1336            if i > 0 {
1337                out.push('\n');
1338            }
1339            out.push_str(l.trim_end());
1340        }
1341        out
1342    })
1343}
1344
1345/// Level 2 fuzzy: indent-flexible — strips the minimum common leading whitespace
1346/// (dedent) from both search and candidate windows before comparing. Preserves
1347/// relative indentation structure so nested code remains distinguishable. Also
1348/// normalises tabs → 4 spaces so tab/space mismatches are tolerated.
1349fn indent_flexible_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1350    let norm_search = dedent(search.trim_matches('\n'));
1351    if norm_search.trim().is_empty() {
1352        return None;
1353    }
1354    let search_line_count = norm_search.lines().count();
1355    let content_lines: Vec<&str> = content.lines().collect();
1356    if content_lines.len() < search_line_count {
1357        return None;
1358    }
1359
1360    // Precompute byte start of each line (content is already LF-normalised).
1361    let mut line_starts: Vec<usize> = Vec::with_capacity(content_lines.len() + 1);
1362    let mut pos = 0usize;
1363    for line in &content_lines {
1364        line_starts.push(pos);
1365        pos += line.len() + 1; // +1 for '\n'
1366    }
1367    line_starts.push(pos);
1368
1369    for start in 0..=(content_lines.len() - search_line_count) {
1370        let window = content_lines[start..start + search_line_count].join("\n");
1371        if dedent(&window) == norm_search {
1372            let byte_start = line_starts[start];
1373            let end_line = start + search_line_count;
1374            let byte_end = if end_line < content_lines.len() {
1375                line_starts[end_line] - 1 // exclude trailing '\n'
1376            } else {
1377                content.len()
1378            };
1379            return Some(byte_start..byte_end);
1380        }
1381    }
1382    None
1383}
1384
1385/// Level 3 fuzzy: full strip — trims all leading and trailing whitespace
1386/// per line. Last resort before the cross-file hint error.
1387fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1388    find_span_normalised(content, search, |s| {
1389        let mut result = String::with_capacity(s.len());
1390        for (i, l) in s.lines().enumerate() {
1391            if i > 0 {
1392                result.push('\n');
1393            }
1394            result.push_str(l.trim());
1395        }
1396        result
1397    })
1398}
1399
1400/// Scan source files in the workspace for a search string that failed to
1401/// match in the intended target file. Returns the first file path where
1402/// the string is found (after CRLF normalisation), capped at 100 files.
1403/// Used to generate a "did you mean this file?" hint in edit errors.
1404fn find_search_in_workspace(search: &str, skip_path: &str) -> Option<String> {
1405    let root = workspace_root();
1406    let norm_search = search.replace("\r\n", "\n");
1407    let mut checked = 0usize;
1408
1409    let walker = ignore::WalkBuilder::new(&root)
1410        .hidden(true)
1411        .ignore(true)
1412        .git_ignore(true)
1413        .build();
1414
1415    for entry in walker.flatten() {
1416        if checked >= 100 {
1417            break;
1418        }
1419        let path = entry.path();
1420        if !path.is_file() {
1421            continue;
1422        }
1423        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1424        if !matches!(
1425            ext,
1426            "rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h"
1427        ) {
1428            continue;
1429        }
1430        let rel = path
1431            .strip_prefix(&root)
1432            .unwrap_or(path)
1433            .to_string_lossy()
1434            .replace('\\', "/");
1435        if rel == skip_path {
1436            continue;
1437        }
1438        checked += 1;
1439        if let Ok(content) = std::fs::read_to_string(path) {
1440            let normalised = content.replace("\r\n", "\n");
1441            if normalised.contains(&norm_search) {
1442                return Some(rel);
1443            }
1444        }
1445    }
1446    None
1447}
1448
1449// ── Indent-aware replacement ──────────────────────────────────────────────────
1450
1451/// Strip minimum common leading whitespace from all non-empty lines and
1452/// normalise tabs to 4 spaces. Blank lines are reduced to empty strings.
1453/// Used by indent_flexible_find_span for canonical comparison.
1454fn dedent(s: &str) -> String {
1455    let expanded: Vec<String> = s.lines().map(|l| l.replace('\t', "    ")).collect();
1456    let min_indent = expanded
1457        .iter()
1458        .filter(|l| !l.trim().is_empty())
1459        .map(|l| l.len() - l.trim_start_matches(' ').len())
1460        .min()
1461        .unwrap_or(0);
1462    let mut out = String::with_capacity(s.len());
1463    for (i, l) in expanded.iter().enumerate() {
1464        if i > 0 {
1465            out.push('\n');
1466        }
1467        if l.trim().is_empty() {
1468            // blank line: push nothing (empty string)
1469        } else {
1470            out.push_str(l.get(min_indent..).unwrap_or(l).trim_end());
1471        }
1472    }
1473    out
1474}
1475
1476/// When the model's search string has different indentation than the actual file
1477/// content (fuzzy match succeeded), apply the same indentation delta to the
1478/// replace string so the replacement lands with correct indentation.
1479///
1480/// Example: model wrote search/replace with 0-space indent, file uses 8 spaces.
1481/// Delta = +8. Every line of replace gets 8 spaces prepended.
1482fn adjust_replace_indent(search: &str, file_span: &str, replace: &str) -> String {
1483    fn first_indent(s: &str) -> usize {
1484        s.lines()
1485            .find(|l| !l.trim().is_empty())
1486            .map(|l| l.len() - l.trim_start_matches(' ').len())
1487            .unwrap_or(0)
1488    }
1489
1490    let search_indent = first_indent(search);
1491    let file_indent = first_indent(file_span);
1492
1493    if search_indent == file_indent {
1494        return replace.to_string();
1495    }
1496
1497    let delta: i64 = file_indent as i64 - search_indent as i64;
1498    let trailing_newline = replace.ends_with('\n');
1499
1500    let adjusted: Vec<String> = replace
1501        .lines()
1502        .map(|line| {
1503            if line.trim().is_empty() {
1504                // Preserve blank lines as-is
1505                line.to_string()
1506            } else {
1507                let current_indent = line.len() - line.trim_start_matches(' ').len();
1508                let new_indent = (current_indent as i64 + delta).max(0) as usize;
1509                format!("{}{}", " ".repeat(new_indent), line.trim_start_matches(' '))
1510            }
1511        })
1512        .collect();
1513
1514    let mut result = adjusted.join("\n");
1515    if trailing_newline {
1516        result.push('\n');
1517    }
1518    result
1519}
1520
1521// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
1522
1523/// Return a formatted diff string for an edit_file operation without applying it.
1524/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1525/// search string cannot be located (caller falls through to normal tool dispatch).
1526pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1527    let path = require_str(args, "path")?;
1528    let search = require_str(args, "search")?;
1529    let replace = require_str(args, "replace")?;
1530
1531    let abs = safe_path(path)?;
1532    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1533    let original = raw.replace("\r\n", "\n");
1534
1535    let (effective_search, effective_replace): (String, String) = if original.contains(search) {
1536        (search.to_string(), replace.to_string())
1537    } else {
1538        let span = rstrip_find_span(&original, search)
1539            .or_else(|| indent_flexible_find_span(&original, search))
1540            .or_else(|| fuzzy_find_span(&original, search));
1541        match span {
1542            Some(span) => {
1543                let real_slice = original[span].to_string();
1544                let adjusted = adjust_replace_indent(search, &real_slice, replace);
1545                (real_slice, adjusted)
1546            }
1547            None => return Err("search string not found — diff preview unavailable".into()),
1548        }
1549    };
1550
1551    let mut diff = String::with_capacity(effective_search.len() + effective_replace.len() + 16);
1552    for line in effective_search.lines() {
1553        let _ = writeln!(diff, "- {}", line);
1554    }
1555    for line in effective_replace.lines() {
1556        let _ = writeln!(diff, "+ {}", line);
1557    }
1558    Ok(diff)
1559}
1560
1561/// Return a formatted diff string for a patch_hunk operation without applying it.
1562pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1563    let path = require_str(args, "path")?;
1564    let start_line = require_usize(args, "start_line")?;
1565    let end_line = require_usize(args, "end_line")?;
1566    let replacement = require_str(args, "replacement")?;
1567
1568    let abs = safe_path(path)?;
1569    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1570    let lines: Vec<&str> = original.lines().collect();
1571    let total = lines.len();
1572
1573    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1574        return Err(format!(
1575            "patch_hunk: invalid line range {}-{} for file with {} lines",
1576            start_line, end_line, total
1577        ));
1578    }
1579
1580    let s_idx = start_line - 1;
1581    let e_idx = end_line;
1582
1583    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1584    for line in &lines[s_idx..e_idx] {
1585        let _ = writeln!(diff, "- {}", line.trim_end());
1586    }
1587    for line in replacement.lines() {
1588        let _ = writeln!(diff, "+ {}", line.trim_end());
1589    }
1590    Ok(diff)
1591}
1592
1593/// Return a formatted diff string for a multi_search_replace operation without applying it.
1594pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1595    let hunks_val = args
1596        .get("hunks")
1597        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1598
1599    #[derive(serde::Deserialize)]
1600    struct PreviewHunk {
1601        search: String,
1602        replace: String,
1603    }
1604    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1605        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1606
1607    let mut diff = String::with_capacity(hunks.len() * 128 + 16);
1608    for (i, hunk) in hunks.iter().enumerate() {
1609        if hunks.len() > 1 {
1610            let _ = writeln!(diff, "@@ hunk {} @@", i + 1);
1611        }
1612        for line in hunk.search.lines() {
1613            let _ = writeln!(diff, "- {}", line.trim_end());
1614        }
1615        for line in hunk.replace.lines() {
1616            let _ = writeln!(diff, "+ {}", line.trim_end());
1617        }
1618    }
1619    Ok(diff)
1620}
1621
1622/// Compute a preview diff for write_file — shows the full new content as additions,
1623/// and any existing file content as removals. New files show only `+` lines.
1624pub fn compute_write_file_diff(args: &Value) -> Result<String, String> {
1625    let path = require_str(args, "path")?;
1626    let new_content = require_str(args, "content")?;
1627
1628    let abs = safe_path(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
1629    let old_content = fs::read_to_string(&abs)
1630        .map(|s| s.replace("\r\n", "\n"))
1631        .unwrap_or_default();
1632
1633    let mut diff = String::with_capacity(old_content.len() + new_content.len() + 16);
1634    if !old_content.is_empty() {
1635        for line in old_content.lines() {
1636            let _ = writeln!(diff, "- {}", line);
1637        }
1638    }
1639    for line in new_content.lines() {
1640        let _ = writeln!(diff, "+ {}", line);
1641    }
1642    if diff.is_empty() {
1643        return Err("empty content — diff preview unavailable".into());
1644    }
1645    Ok(diff)
1646}
1647
1648/// Resolve the workspace root by looking upward for common markers.
1649pub fn workspace_root() -> PathBuf {
1650    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1651    loop {
1652        if current.join(".git").exists()
1653            || current.join("Cargo.toml").exists()
1654            || current.join("package.json").exists()
1655        {
1656            return current;
1657        }
1658        if !current.pop() {
1659            break;
1660        }
1661    }
1662    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1663}
1664
1665/// Returns true if `path` is a known OS shortcut directory (Desktop, Downloads,
1666/// Documents, Pictures, Videos, Music). These directories should not accumulate
1667/// `.hematite/` workspace state — they use the global `~/.hematite/` instead.
1668pub fn is_os_shortcut_directory(path: &Path) -> bool {
1669    let candidates = [
1670        dirs::desktop_dir(),
1671        dirs::download_dir(),
1672        dirs::document_dir(),
1673        dirs::picture_dir(),
1674        dirs::video_dir(),
1675        dirs::audio_dir(),
1676    ];
1677    candidates
1678        .iter()
1679        .filter_map(|d| d.as_deref())
1680        .any(|d| d == path)
1681}
1682
1683/// Returns the directory where Hematite's runtime state (`.hematite/`) should live.
1684///
1685/// - In sovereign OS directories (Desktop, Downloads, Documents, Pictures, Videos,
1686///   Music): returns `~/.hematite/` so no workspace folder is created there.
1687/// - Everywhere else: returns `workspace_root()/.hematite/` as normal.
1688pub fn hematite_dir() -> PathBuf {
1689    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1690    if is_os_shortcut_directory(&cwd) {
1691        if let Some(home) = dirs::home_dir() {
1692            return home.join(".hematite");
1693        }
1694    }
1695    workspace_root().join(".hematite")
1696}
1697
1698/// Returns true if the workspace root looks like a real project.
1699/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1700/// count — at least one explicit build/package marker must also be present.
1701pub fn is_project_workspace() -> bool {
1702    let root = workspace_root();
1703    let has_explicit_marker = root.join("Cargo.toml").exists()
1704        || root.join("package.json").exists()
1705        || root.join("pyproject.toml").exists()
1706        || root.join("go.mod").exists()
1707        || root.join("setup.py").exists()
1708        || root.join("pom.xml").exists()
1709        || root.join("build.gradle").exists()
1710        || root.join("CMakeLists.txt").exists()
1711        || root.join("index.html").exists()
1712        || root.join("style.css").exists()
1713        || root.join("script.js").exists();
1714    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1715}
1716
1717// ── open_in_system_editor ───────────────────────────────────────────────────
1718
1719pub fn open_in_system_editor(path: &std::path::Path) -> Result<(), String> {
1720    if !path.exists() {
1721        return Err(format!("File not found: {}", path.display()));
1722    }
1723
1724    #[cfg(target_os = "windows")]
1725    {
1726        // On Windows, 'start' is the most reliable way to open a file in the default associated app.
1727        // We use cmd /c start so it handles spaces and associations properly.
1728        let status = std::process::Command::new("cmd")
1729            .args(["/c", "start", "", &path.to_string_lossy()])
1730            .status()
1731            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1732
1733        if !status.success() {
1734            return Err("Editor command failed to start.".into());
1735        }
1736    }
1737
1738    #[cfg(target_os = "macos")]
1739    {
1740        let status = std::process::Command::new("open")
1741            .arg(path)
1742            .status()
1743            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1744
1745        if !status.success() {
1746            return Err("open command failed.".into());
1747        }
1748    }
1749
1750    #[cfg(all(unix, not(target_os = "macos")))]
1751    {
1752        // Try xdg-open on Linux
1753        let status = std::process::Command::new("xdg-open")
1754            .arg(path)
1755            .status()
1756            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1757
1758        if !status.success() {
1759            return Err("xdg-open failed.".into());
1760        }
1761    }
1762
1763    Ok(())
1764}
1765
1766#[cfg(test)]
1767mod tests {
1768    use super::*;
1769
1770    #[test]
1771    fn safe_path_allows_plan_sidecars_inside_workspace() {
1772        let _cwd_lock = crate::TEST_CWD_LOCK
1773            .lock()
1774            .unwrap_or_else(|e| e.into_inner());
1775        let temp = tempfile::tempdir().unwrap();
1776        let root = temp.path();
1777        std::fs::create_dir_all(root.join(".hematite")).unwrap();
1778        std::fs::write(root.join(".hematite").join("TASK.md"), "# Task Ledger\n").unwrap();
1779
1780        let previous = env!("CARGO_MANIFEST_DIR");
1781        std::env::set_current_dir(root).unwrap();
1782        let resolved = safe_path(".hematite/TASK.md").unwrap();
1783        std::env::set_current_dir(previous).unwrap();
1784
1785        assert!(resolved.ends_with(Path::new(".hematite").join("TASK.md")));
1786    }
1787}
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs