Skip to main content

hematite/tools/
file_ops.rs

1use serde_json::Value;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6use walkdir::WalkDir;
7
8// ── Ghost Ledger ──────────────────────────────────────────────────────────────
9
10const MAX_GHOST_BACKUPS: usize = 8;
11
12fn prune_ghost_backups(ghost_dir: &Path) {
13    let Ok(entries) = fs::read_dir(ghost_dir) else {
14        return;
15    };
16
17    let mut backups: Vec<_> = entries
18        .filter_map(Result::ok)
19        .filter(|entry| {
20            entry
21                .path()
22                .extension()
23                .and_then(|ext| ext.to_str())
24                .map(|ext| ext.eq_ignore_ascii_case("bak"))
25                .unwrap_or(false)
26        })
27        .collect();
28
29    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
30    backups.reverse();
31
32    let retained: std::collections::HashSet<String> = backups
33        .iter()
34        .take(MAX_GHOST_BACKUPS)
35        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
36        .collect();
37
38    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
39        let _ = fs::remove_file(entry.path());
40    }
41
42    let ledger_path = ghost_dir.join("ledger.txt");
43    let Ok(content) = fs::read_to_string(&ledger_path) else {
44        return;
45    };
46
47    let filtered_lines: Vec<String> = content
48        .lines()
49        .filter_map(|line| {
50            let parts: Vec<&str> = line.splitn(2, '|').collect();
51            if parts.len() != 2 {
52                return None;
53            }
54
55            let backup_path = parts[1].replace('\\', "/");
56            if retained.contains(&backup_path) {
57                Some(line.to_string())
58            } else {
59                None
60            }
61        })
62        .collect();
63
64    let rewritten = if filtered_lines.is_empty() {
65        String::new()
66    } else {
67        filtered_lines.join("\n") + "\n"
68    };
69    let _ = fs::write(ledger_path, rewritten);
70}
71
72fn save_ghost_backup(target_path: &str, content: &str) {
73    let ws = workspace_root();
74
75    // Phase 1: Try Git Ghost Snapshot
76    if crate::agent::git::is_git_repo(&ws) {
77        let _ = crate::agent::git::create_ghost_snapshot(&ws);
78    }
79
80    // Phase 2: Fallback to local file backup (Ghost Ledger)
81    let ghost_dir = hematite_dir().join("ghost");
82    let _ = fs::create_dir_all(&ghost_dir);
83    let ts = std::time::SystemTime::now()
84        .duration_since(std::time::UNIX_EPOCH)
85        .unwrap()
86        .as_millis();
87    let safe_name = Path::new(target_path)
88        .file_name()
89        .unwrap_or_default()
90        .to_string_lossy();
91    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
92
93    if fs::write(&backup_file, content).is_ok() {
94        use std::io::Write;
95        if let Ok(mut f) = fs::OpenOptions::new()
96            .create(true)
97            .append(true)
98            .open(ghost_dir.join("ledger.txt"))
99        {
100            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
101        }
102        prune_ghost_backups(&ghost_dir);
103    }
104}
105
106pub fn pop_ghost_ledger() -> Result<String, String> {
107    let ghost_dir = hematite_dir().join("ghost");
108    let ledger_path = ghost_dir.join("ledger.txt");
109
110    if !ledger_path.exists() {
111        return Err("Ghost Ledger is empty — no edits to undo".into());
112    }
113
114    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
115    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
116
117    if lines.is_empty() {
118        return Err("Ghost Ledger is empty".into());
119    }
120
121    let last_line = lines.pop().unwrap();
122    let parts: Vec<&str> = last_line.splitn(2, '|').collect();
123    if parts.len() != 2 {
124        return Err("Corrupted ledger entry".into());
125    }
126
127    let target_path = parts[0];
128    let backup_path = parts[1];
129
130    let ws = workspace_root();
131
132    // Priority 1: Try Git Rollback
133    if crate::agent::git::is_git_repo(&ws) {
134        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
135            let _ = fs::remove_file(backup_path);
136            let new_ledger = lines.join("\n");
137            let _ = fs::write(
138                &ledger_path,
139                if new_ledger.is_empty() {
140                    String::new()
141                } else {
142                    new_ledger + "\n"
143                },
144            );
145            return Ok(msg);
146        }
147    }
148
149    // Priority 2: Standard File Rollback
150    let original_content =
151        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
152    let abs_target = ws.join(target_path);
153    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
154
155    let new_ledger = lines.join("\n");
156    let _ = fs::write(
157        &ledger_path,
158        if new_ledger.is_empty() {
159            String::new()
160        } else {
161            new_ledger + "\n"
162        },
163    );
164    let _ = fs::remove_file(backup_path);
165
166    Ok(format!("Restored {} from Ghost Ledger", target_path))
167}
168
169// ── read_file ─────────────────────────────────────────────────────────────────
170
171pub async fn read_file(args: &Value) -> Result<String, String> {
172    let path = require_str(args, "path")?;
173    let offset = get_usize_arg(args, "offset");
174    let limit = get_usize_arg(args, "limit");
175
176    let abs = safe_path(path)?;
177    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
178
179    let lines: Vec<&str> = raw.lines().collect();
180    let total = lines.len();
181    let start = offset.unwrap_or(0).min(total);
182    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
183
184    let mut content = lines[start..end].join("\n");
185    if end < total {
186        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
187        content.push_str(&format!("This file has {} more lines below. ", total - end));
188        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
189                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
190    }
191
192    Ok(format!(
193        "[{path}  lines {}-{} of {}]\n{}",
194        start + 1,
195        end,
196        total,
197        content
198    ))
199}
200
201// ── inspect_lines ─────────────────────────────────────────────────────────────
202
203pub async fn inspect_lines(args: &Value) -> Result<String, String> {
204    let path = require_str(args, "path")?;
205    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
206    let end_line = get_usize_arg(args, "end_line");
207
208    let abs = safe_path(path)?;
209    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
210
211    let lines: Vec<&str> = raw.lines().collect();
212    let total = lines.len();
213
214    let start = start_line.saturating_sub(1).min(total);
215    let end = end_line.unwrap_or(total).min(total);
216
217    if start >= end && total > 0 {
218        return Err(format!(
219            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
220            end_line.unwrap_or(total)
221        ));
222    }
223
224    let mut output = format!(
225        "[inspect_lines: {path} lines {}-{} of {}]\n",
226        start + 1,
227        end,
228        total
229    );
230    for i in start..end {
231        output.push_str(&format!("[{:>4}] | {}\n", i + 1, lines[i]));
232    }
233
234    Ok(output)
235}
236
237// ── tail_file ─────────────────────────────────────────────────────────────────
238
239pub async fn tail_file(args: &Value) -> Result<String, String> {
240    let path = require_str(args, "path")?;
241    let n = args
242        .get("lines")
243        .and_then(|v| v.as_u64())
244        .unwrap_or(50)
245        .min(500) as usize;
246    let grep_pat = args.get("grep").and_then(|v| v.as_str());
247
248    let abs = safe_path(path)?;
249    let raw = fs::read_to_string(&abs).map_err(|e| format!("tail_file: {e} ({path})"))?;
250
251    let all_lines: Vec<&str> = raw.lines().collect();
252    let total = all_lines.len();
253
254    // Apply optional grep filter before slicing — model asks for the last N
255    // matching lines, not the last N lines containing maybe 0 matches.
256    let filtered: Vec<(usize, &str)> = if let Some(pat) = grep_pat {
257        let re = regex::Regex::new(pat)
258            .map_err(|e| format!("tail_file: invalid grep pattern '{pat}': {e}"))?;
259        all_lines
260            .iter()
261            .enumerate()
262            .filter(|(_, l)| re.is_match(l))
263            .map(|(i, l)| (i, *l))
264            .collect()
265    } else {
266        all_lines.iter().enumerate().map(|(i, l)| (i, *l)).collect()
267    };
268
269    let total_filtered = filtered.len();
270    let skip = total_filtered.saturating_sub(n);
271    let window = &filtered[skip..];
272
273    if window.is_empty() {
274        let note = if grep_pat.is_some() {
275            format!(" matching '{}'", grep_pat.unwrap())
276        } else {
277            String::new()
278        };
279        return Ok(format!(
280            "[tail_file: {path} — no lines{note} found (total {total} lines)]"
281        ));
282    }
283
284    let first_abs = window[0].0 + 1;
285    let last_abs = window[window.len() - 1].0 + 1;
286    let mut out = format!(
287        "[tail_file: {path} — lines {first_abs}–{last_abs} of {total} (last {n} of {total_filtered} matched)]\n"
288    );
289    for (abs_idx, line) in window {
290        out.push_str(&format!("[{:>5}] {}\n", abs_idx + 1, line));
291    }
292
293    Ok(out)
294}
295
296// ── write_file ────────────────────────────────────────────────────────────────
297
298pub async fn write_file(args: &Value) -> Result<String, String> {
299    let path = require_str(args, "path")?;
300    let content = require_str(args, "content")?;
301
302    let abs = safe_path_allow_new(path)?;
303    if let Some(parent) = abs.parent() {
304        fs::create_dir_all(parent)
305            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
306    }
307
308    let existed = abs.exists();
309    if existed {
310        if let Ok(orig) = fs::read_to_string(&abs) {
311            save_ghost_backup(path, &orig);
312        }
313    }
314
315    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
316
317    let action = if existed { "Updated" } else { "Created" };
318    Ok(format!("{action} {path}  ({} bytes)", content.len()))
319}
320
321// ── edit_file ─────────────────────────────────────────────────────────────────
322
323pub async fn edit_file(args: &Value) -> Result<String, String> {
324    let path = require_str(args, "path")?;
325    let search = require_str(args, "search")?;
326    let replace = require_str(args, "replace")?;
327    let replace_all = args
328        .get("replace_all")
329        .and_then(|v| v.as_bool())
330        .unwrap_or(false);
331
332    if search == replace {
333        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
334    }
335
336    let abs = safe_path(path)?;
337    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
338    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
339    let original = raw.replace("\r\n", "\n");
340
341    save_ghost_backup(path, &original);
342
343    let search_trimmed = search.trim();
344    let search_non_ws_len = search_trimmed
345        .chars()
346        .filter(|c| !c.is_whitespace())
347        .count();
348    let search_line_count = search_trimmed.lines().count();
349    if search_non_ws_len < 12 && search_line_count <= 1 {
350        return Err(format!(
351            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
352             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
353        ));
354    }
355
356    // ── Exact match first ────────────────────────────────────────────────────
357    let (effective_search, was_repaired) = if original.contains(search) {
358        let exact_match_count = original.matches(search).count();
359        if exact_match_count > 1 && !replace_all {
360            return Err(format!(
361                "edit_file: search string matched {} times in {path}.\n\
362                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
363                exact_match_count
364            ));
365        }
366        (search.to_string(), false)
367    } else {
368        // ── Fuzzy repair: progressive normalisation ───────────────────────
369        // Level 1: rstrip only — preserves indentation, strips trailing spaces.
370        // Level 2: full strip — corrects indentation mismatches.
371        // Level 3: cross-file hint — tells the model which file has the string.
372        let span =
373            rstrip_find_span(&original, search).or_else(|| fuzzy_find_span(&original, search));
374        match span {
375            Some(span) => {
376                let real_slice = original[span.clone()].to_string();
377                (real_slice, true)
378            }
379            None => {
380                let hint = nearest_lines(&original, search);
381                let cross_hint = find_search_in_workspace(search, path)
382                    .map(|found| format!("\nNote: search string found in '{found}' — did you mean to edit that file?"))
383                    .unwrap_or_default();
384                return Err(format!(
385                    "edit_file: search string not found in {path}.\n\
386                     The 'search' value must match the file content exactly \
387                     (including whitespace/indentation).\n\
388                     {hint}{cross_hint}"
389                ));
390            }
391        }
392    };
393
394    // When a fuzzy match was used, adjust the replace string's indentation to
395    // match the file's actual indent level (not the model's potentially-wrong indent).
396    let effective_replace = if was_repaired {
397        adjust_replace_indent(search, effective_search.as_str(), replace)
398    } else {
399        replace.to_string()
400    };
401
402    let updated = if replace_all {
403        original.replace(effective_search.as_str(), effective_replace.as_str())
404    } else {
405        original.replacen(effective_search.as_str(), effective_replace.as_str(), 1)
406    };
407
408    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
409
410    let removed = original.lines().count();
411    let added = updated.lines().count();
412    let repair_note = if was_repaired {
413        "  [indent auto-corrected]"
414    } else {
415        ""
416    };
417
418    let mut diff_block = String::new();
419    diff_block.push_str("\n--- DIFF \n");
420    for line in effective_search.lines() {
421        diff_block.push_str(&format!("- {}\n", line));
422    }
423    for line in effective_replace.lines() {
424        diff_block.push_str(&format!("+ {}\n", line));
425    }
426
427    Ok(format!(
428        "Edited {path}  ({} -> {} lines){repair_note}{}",
429        removed, added, diff_block
430    ))
431}
432
433// ── patch_hunk ────────────────────────────────────────────────────────────────
434
435pub async fn patch_hunk(args: &Value) -> Result<String, String> {
436    let path = require_str(args, "path")?;
437    let start_line = require_usize(args, "start_line")?;
438    let end_line = require_usize(args, "end_line")?;
439    let replacement = require_str(args, "replacement")?;
440
441    let abs = safe_path(path)?;
442    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
443
444    save_ghost_backup(path, &original);
445
446    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
447    let total = lines.len();
448
449    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
450        return Err(format!(
451            "patch_hunk: invalid line range {}-{} for file with {} lines",
452            start_line, end_line, total
453        ));
454    }
455
456    let mut updated_lines = Vec::new();
457    // 0-indexed adjustment
458    let s_idx = start_line - 1;
459    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
460
461    // 1. Lines before the hunk
462    updated_lines.extend_from_slice(&lines[0..s_idx]);
463
464    // 2. The hunk replacement
465    for line in replacement.lines() {
466        updated_lines.push(line.to_string());
467    }
468
469    // 3. Lines after the hunk
470    if e_idx < total {
471        updated_lines.extend_from_slice(&lines[e_idx..total]);
472    }
473
474    let updated_content = updated_lines.join("\n");
475    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
476
477    let mut diff = String::new();
478    diff.push_str("\n--- HUNK DIFF ---\n");
479    for i in s_idx..e_idx {
480        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
481    }
482    for line in replacement.lines() {
483        diff.push_str(&format!("+ {}\n", line.trim_end()));
484    }
485
486    Ok(format!(
487        "Patched {path} lines {}-{} ({} -> {} lines){}",
488        start_line,
489        end_line,
490        (e_idx - s_idx),
491        replacement.lines().count(),
492        diff
493    ))
494}
495
496// ── multi_search_replace ──────────────────────────────────────────────────────
497
498#[derive(serde::Deserialize)]
499struct SearchReplaceHunk {
500    search: String,
501    replace: String,
502}
503
504pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
505    let path = require_str(args, "path")?;
506    let hunks_val = args
507        .get("hunks")
508        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
509
510    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
511        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
512
513    if hunks.is_empty() {
514        return Err("multi_search_replace: hunks array is empty".to_string());
515    }
516
517    let abs = safe_path(path)?;
518    let raw =
519        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
520    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
521    let original = raw.replace("\r\n", "\n");
522
523    save_ghost_backup(path, &original);
524
525    let mut current_content = original.clone();
526    let mut diff = String::new();
527    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
528
529    let mut patched_hunks = 0;
530
531    for (i, hunk) in hunks.iter().enumerate() {
532        let match_count = current_content.matches(&hunk.search).count();
533
534        let (effective_search, effective_replace) = if match_count == 1 {
535            // Exact match — use as-is.
536            (hunk.search.clone(), hunk.replace.clone())
537        } else if match_count == 0 {
538            // Progressive fuzzy fallback: rstrip → full-strip.
539            let span = rstrip_find_span(&current_content, &hunk.search)
540                .or_else(|| fuzzy_find_span(&current_content, &hunk.search));
541            match span {
542                Some(span) => {
543                    let real_slice = current_content[span].to_string();
544                    let adjusted_replace =
545                        adjust_replace_indent(&hunk.search, &real_slice, &hunk.replace);
546                    (real_slice, adjusted_replace)
547                }
548                None => {
549                    return Err(format!(
550                        "multi_search_replace: hunk {} search string not found in file.",
551                        i
552                    ));
553                }
554            }
555        } else {
556            return Err(format!(
557                "multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.",
558                i, match_count
559            ));
560        };
561
562        diff.push_str(&format!("\n@@ Hunk {} @@\n", i + 1));
563        for line in effective_search.lines() {
564            diff.push_str(&format!("- {}\n", line.trim_end()));
565        }
566        for line in effective_replace.lines() {
567            diff.push_str(&format!("+ {}\n", line.trim_end()));
568        }
569
570        current_content = current_content.replacen(&effective_search, &effective_replace, 1);
571        patched_hunks += 1;
572    }
573
574    fs::write(&abs, &current_content)
575        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
576
577    Ok(format!(
578        "Modified {} hunks in {} using exact search-and-replace.{}",
579        patched_hunks, path, diff
580    ))
581}
582
583// ── list_files ────────────────────────────────────────────────────────────────
584
585pub async fn list_files(args: &Value) -> Result<String, String> {
586    let started = Instant::now();
587    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
588    let ext_filter = args.get("extension").and_then(|v| v.as_str());
589
590    let base = safe_path(base_str)?;
591
592    let mut files: Vec<PathBuf> = Vec::new();
593    let mut scanned_count = 0;
594    for entry in WalkDir::new(&base).follow_links(false) {
595        scanned_count += 1;
596        if scanned_count > 25_000 {
597            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
598        }
599        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
600        if !entry.file_type().is_file() {
601            continue;
602        }
603        let p = entry.path();
604
605        // Skip hidden dirs / target / node_modules
606        if path_has_hidden_segment(p) {
607            continue;
608        }
609
610        if let Some(ext) = ext_filter {
611            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
612                continue;
613            }
614        }
615        files.push(p.to_path_buf());
616    }
617
618    // Sort by modification time (newest first).
619    files.sort_by_key(|p| {
620        fs::metadata(p)
621            .and_then(|m| m.modified())
622            .ok()
623            .map(std::cmp::Reverse)
624    });
625
626    let total = files.len();
627    const LIMIT: usize = 200;
628    let truncated = total > LIMIT;
629    let shown: Vec<String> = files
630        .into_iter()
631        .take(LIMIT)
632        .map(|p| p.display().to_string())
633        .collect();
634
635    let ms = started.elapsed().as_millis();
636    let mut out = format!(
637        "{} file(s) in {}  ({ms}ms){}",
638        total.min(LIMIT),
639        base_str,
640        if truncated {
641            "  [truncated at 200]"
642        } else {
643            ""
644        }
645    );
646    out.push('\n');
647    out.push_str(&shown.join("\n"));
648    Ok(out)
649}
650
651// ── create_directory ──────────────────────────────────────────────────────────
652
653pub async fn create_directory(args: &Value) -> Result<String, String> {
654    let path = require_str(args, "path")?;
655    let abs = safe_path_allow_new(path)?;
656
657    if abs.exists() {
658        if abs.is_dir() {
659            return Ok(format!("Directory already exists: {path}"));
660        } else {
661            return Err(format!("A file already exists at this path: {path}"));
662        }
663    }
664
665    fs::create_dir_all(&abs).map_err(|e| format!("create_directory: {e} ({path})"))?;
666    Ok(format!("Created directory: {path}"))
667}
668
669// ── grep_files ────────────────────────────────────────────────────────────────
670
671pub async fn grep_files(args: &Value) -> Result<String, String> {
672    let pattern = require_str(args, "pattern")?;
673    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
674    let ext_filter = args.get("extension").and_then(|v| v.as_str());
675    let case_insensitive = args
676        .get("case_insensitive")
677        .and_then(|v| v.as_bool())
678        .unwrap_or(true);
679    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
680    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
681    let offset = get_usize_arg(args, "offset").unwrap_or(0);
682
683    // Context lines: `context` sets both before+after; `before`/`after` override individually.
684    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
685    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
686    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
687
688    let base = safe_path(base_str)?;
689
690    let regex = regex::RegexBuilder::new(pattern)
691        .case_insensitive(case_insensitive)
692        .build()
693        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
694
695    // ── files_only mode ───────────────────────────────────────────────────────
696    if files_only {
697        let mut matched_files: Vec<String> = Vec::new();
698        let mut scanned_count = 0;
699
700        for entry in WalkDir::new(&base).follow_links(false) {
701            scanned_count += 1;
702            if scanned_count > 25_000 {
703                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
704            }
705            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
706            if !entry.file_type().is_file() {
707                continue;
708            }
709            let p = entry.path();
710            if path_has_hidden_segment(p) {
711                continue;
712            }
713            if let Some(ext) = ext_filter {
714                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
715                    continue;
716                }
717            }
718            let Ok(contents) = fs::read_to_string(p) else {
719                continue;
720            };
721            if contents.lines().any(|line| regex.is_match(line)) {
722                matched_files.push(p.display().to_string());
723            }
724        }
725
726        if matched_files.is_empty() {
727            return Ok(format!("No files matching '{pattern}' in {base_str}"));
728        }
729
730        let total = matched_files.len();
731        let page: Vec<_> = matched_files
732            .into_iter()
733            .skip(offset)
734            .take(head_limit)
735            .collect();
736        let showing = page.len();
737        let mut out = format!("{total} file(s) match '{pattern}'");
738        if offset > 0 || showing < total {
739            out.push_str(&format!(
740                " [showing {}-{} of {total}]",
741                offset + 1,
742                offset + showing
743            ));
744        }
745        out.push('\n');
746        out.push_str(&page.join("\n"));
747        return Ok(out);
748    }
749
750    // ── content mode with optional context lines ──────────────────────────────
751
752    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
753    struct Hunk {
754        path: String,
755        /// (line_number_1_indexed, line_text, is_match)
756        lines: Vec<(usize, String, bool)>,
757    }
758
759    let mut hunks: Vec<Hunk> = Vec::new();
760    let mut total_matches = 0usize;
761    let mut files_matched = 0usize;
762    let mut scanned_count = 0;
763
764    for entry in WalkDir::new(&base).follow_links(false) {
765        scanned_count += 1;
766        if scanned_count > 25_000 {
767            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
768        }
769        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
770        if !entry.file_type().is_file() {
771            continue;
772        }
773        let p = entry.path();
774        if path_has_hidden_segment(p) {
775            continue;
776        }
777        if let Some(ext) = ext_filter {
778            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
779                continue;
780            }
781        }
782        let Ok(contents) = fs::read_to_string(p) else {
783            continue;
784        };
785        let all_lines: Vec<&str> = contents.lines().collect();
786        let n = all_lines.len();
787
788        // Find all match indices in this file.
789        let match_idxs: Vec<usize> = all_lines
790            .iter()
791            .enumerate()
792            .filter(|(_, line)| regex.is_match(line))
793            .map(|(i, _)| i)
794            .collect();
795
796        if match_idxs.is_empty() {
797            continue;
798        }
799        files_matched += 1;
800        total_matches += match_idxs.len();
801
802        // Merge overlapping ranges into hunks.
803        let path_str = p.display().to_string();
804        let mut ranges: Vec<(usize, usize)> = match_idxs
805            .iter()
806            .map(|&i| {
807                (
808                    i.saturating_sub(before),
809                    (i + after).min(n.saturating_sub(1)),
810                )
811            })
812            .collect();
813
814        // Sort and merge overlapping ranges.
815        ranges.sort_unstable();
816        let mut merged: Vec<(usize, usize)> = Vec::new();
817        for (s, e) in ranges {
818            if let Some(last) = merged.last_mut() {
819                if s <= last.1 + 1 {
820                    last.1 = last.1.max(e);
821                    continue;
822                }
823            }
824            merged.push((s, e));
825        }
826
827        // Build hunks from merged ranges.
828        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
829        for (start, end) in merged {
830            let mut hunk_lines = Vec::new();
831            for i in start..=end {
832                hunk_lines.push((i + 1, all_lines[i].to_string(), match_set.contains(&i)));
833            }
834            hunks.push(Hunk {
835                path: path_str.clone(),
836                lines: hunk_lines,
837            });
838        }
839    }
840
841    if hunks.is_empty() {
842        return Ok(format!("No matches for '{pattern}' in {base_str}"));
843    }
844
845    let total_hunks = hunks.len();
846    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
847    let showing = page_hunks.len();
848
849    let mut out =
850        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
851    if offset > 0 || showing < total_hunks {
852        out.push_str(&format!(
853            " [hunks {}-{} of {total_hunks}]",
854            offset + 1,
855            offset + showing
856        ));
857    }
858    out.push('\n');
859
860    for (i, hunk) in page_hunks.iter().enumerate() {
861        if i > 0 {
862            out.push_str("\n--\n");
863        }
864        for (lineno, text, is_match) in &hunk.lines {
865            if *is_match {
866                out.push_str(&format!("{}:{}:{}\n", hunk.path, lineno, text));
867            } else {
868                out.push_str(&format!("{}: {}-{}\n", hunk.path, lineno, text));
869            }
870        }
871    }
872
873    Ok(out.trim_end().to_string())
874}
875
876// ── Argument helpers ──────────────────────────────────────────────────────────
877
878fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
879    args.get(key)
880        .and_then(|v| v.as_str())
881        .ok_or_else(|| format!("Missing required argument: '{key}'"))
882}
883
884fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
885    args.get(key).and_then(value_as_usize)
886}
887
888fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
889    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
890}
891
892fn value_as_usize(value: &Value) -> Option<usize> {
893    if let Some(v) = value.as_u64() {
894        return usize::try_from(v).ok();
895    }
896
897    if let Some(v) = value.as_i64() {
898        return if v >= 0 {
899            usize::try_from(v as u64).ok()
900        } else {
901            None
902        };
903    }
904
905    if let Some(v) = value.as_f64() {
906        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
907            return Some(v as usize);
908        }
909        return None;
910    }
911
912    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
913}
914
915// ── Path helpers ──────────────────────────────────────────────────────────────
916
917/// Resolve a path that must already exist, and check it's inside the workspace.
918fn safe_path(path: &str) -> Result<PathBuf, String> {
919    let candidate = resolve_candidate(path);
920    canonicalize_safe(&candidate, path)
921}
922
923/// Resolve a path that may not exist yet (for write_file).
924fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
925    let candidate = resolve_candidate(path);
926
927    // Try canonical first.
928    if let Ok(abs) = candidate.canonicalize() {
929        check_workspace_bounds(&abs, path)?;
930        return Ok(abs);
931    }
932
933    // File doesn't exist yet — canonicalize the parent, append the filename.
934    let parent = candidate.parent().unwrap_or(Path::new("."));
935    let name = candidate
936        .file_name()
937        .ok_or_else(|| format!("invalid path: {path}"))?;
938    let abs_parent = parent
939        .canonicalize()
940        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
941    let abs = abs_parent.join(name);
942    check_workspace_bounds(&abs, path)?;
943    Ok(abs)
944}
945
946pub(crate) fn resolve_candidate(path: &str) -> PathBuf {
947    // 1. Handle Special Sovereign Tokens
948    let upper = path.to_uppercase();
949
950    // Bare token support — matches exact names with or without @ prefix, with or without
951    // trailing slash. Enables /cd downloads, /cd @DESKTOP, /cd ~ etc.
952    let bare = upper.trim_end_matches('/').trim_start_matches('@');
953    let bare_resolved = match bare {
954        "DESKTOP" => dirs::desktop_dir(),
955        "DOWNLOADS" | "DOWNLOAD" => dirs::download_dir(),
956        "DOCUMENTS" | "DOCS" => dirs::document_dir(),
957        "PICTURES" | "IMAGES" => dirs::picture_dir(),
958        "VIDEOS" | "MOVIES" => dirs::video_dir(),
959        "MUSIC" | "AUDIO" => dirs::audio_dir(),
960        "HOME" => dirs::home_dir(),
961        "TEMP" | "TMP" => Some(std::env::temp_dir()),
962        "CACHE" => dirs::cache_dir(),
963        "CONFIG" => dirs::config_dir(),
964        "DATA" => dirs::data_dir(),
965        _ => None,
966    };
967    // Also handle bare ~ and ~/ as home
968    let bare_resolved = bare_resolved.or_else(|| {
969        if path == "~" || path == "~/" {
970            dirs::home_dir()
971        } else {
972            None
973        }
974    });
975    if let Some(p) = bare_resolved {
976        return p;
977    }
978
979    // Helper to resolve via dirs crate
980    let resolved = if upper.starts_with("@DESKTOP/") {
981        dirs::desktop_dir().map(|p| p.join(&path[9..]))
982    } else if upper.starts_with("@DOCUMENTS/") {
983        dirs::document_dir().map(|p| p.join(&path[11..]))
984    } else if upper.starts_with("@DOWNLOADS/") {
985        dirs::download_dir().map(|p| p.join(&path[11..]))
986    } else if upper.starts_with("@PICTURES/") || upper.starts_with("@IMAGES/") {
987        let offset = if upper.starts_with("@PICTURES/") {
988            10
989        } else {
990            8
991        };
992        dirs::picture_dir().map(|p| p.join(&path[offset..]))
993    } else if upper.starts_with("@VIDEOS/") || upper.starts_with("@MOVIES/") {
994        let offset = if upper.starts_with("@VIDEOS/") { 8 } else { 8 };
995        dirs::video_dir().map(|p| p.join(&path[offset..]))
996    } else if upper.starts_with("@MUSIC/") || upper.starts_with("@AUDIO/") {
997        let offset = if upper.starts_with("@MUSIC/") { 7 } else { 7 };
998        dirs::audio_dir().map(|p| p.join(&path[offset..]))
999    } else if upper.starts_with("@HOME/") || upper.starts_with("~/") {
1000        let offset = if upper.starts_with("@HOME/") { 6 } else { 2 };
1001        dirs::home_dir().map(|p| p.join(&path[offset..]))
1002    } else if upper.starts_with("@TEMP/") {
1003        Some(std::env::temp_dir().join(&path[6..]))
1004    } else if upper.starts_with("@CACHE/") {
1005        dirs::cache_dir().map(|p| p.join(&path[7..]))
1006    } else if upper.starts_with("@CONFIG/") {
1007        dirs::config_dir().map(|p| p.join(&path[8..]))
1008    } else if upper.starts_with("@DATA/") {
1009        dirs::data_dir().map(|p| p.join(&path[6..]))
1010    } else {
1011        None
1012    };
1013
1014    if let Some(p) = resolved {
1015        return p;
1016    }
1017
1018    // 2. Fallback to Standard Resolution
1019    let p = Path::new(path);
1020    if p.is_absolute() {
1021        p.to_path_buf()
1022    } else {
1023        std::env::current_dir()
1024            .unwrap_or_else(|_| PathBuf::from("."))
1025            .join(p)
1026    }
1027}
1028
1029fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
1030    let abs = candidate
1031        .canonicalize()
1032        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
1033    check_workspace_bounds(&abs, original)?;
1034    Ok(abs)
1035}
1036
1037fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
1038    // Delegate to the existing guard for blacklist + traversal checks.
1039    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
1040    super::guard::path_is_safe(&workspace, abs)
1041        .map(|_| ())
1042        .map_err(|e| format!("file access denied for '{original}': {e}"))
1043}
1044
1045/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
1046fn path_has_hidden_segment(p: &Path) -> bool {
1047    p.components().any(|c| {
1048        let s = c.as_os_str().to_string_lossy();
1049        if s == ".hematite" || s == ".git" || s == "." || s == ".." {
1050            return false;
1051        }
1052        s.starts_with('.') || s == "target" || s == "node_modules" || s == "__pycache__"
1053    })
1054}
1055
1056/// Show the lines nearest to where the search string *almost* matched,
1057/// so the model can see the real indentation/content and self-correct.
1058fn nearest_lines(content: &str, search: &str) -> String {
1059    // Try to find the best-matching line by the first non-empty search line.
1060    let first_search_line = search
1061        .lines()
1062        .map(|l| l.trim())
1063        .find(|l| !l.is_empty())
1064        .unwrap_or("");
1065
1066    let lines: Vec<&str> = content.lines().collect();
1067    if lines.is_empty() {
1068        return "(file is empty)".into();
1069    }
1070
1071    // Find the line in the file that contains the most chars from the search line.
1072    let best_idx = if first_search_line.is_empty() {
1073        0
1074    } else {
1075        lines
1076            .iter()
1077            .enumerate()
1078            .max_by_key(|(_, l)| {
1079                let lt = l.trim();
1080                // Score: length of longest common prefix after trimming.
1081                first_search_line
1082                    .chars()
1083                    .zip(lt.chars())
1084                    .take_while(|(a, b)| a == b)
1085                    .count()
1086            })
1087            .map(|(i, _)| i)
1088            .unwrap_or(0)
1089    };
1090
1091    let start = best_idx.saturating_sub(3);
1092    let end = (best_idx + 5).min(lines.len());
1093    let snippet = lines[start..end]
1094        .iter()
1095        .enumerate()
1096        .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
1097        .collect::<Vec<_>>()
1098        .join("\n");
1099
1100    format!(
1101        "Nearest matching lines ({}:{}):\n{}",
1102        best_idx + 1,
1103        end,
1104        snippet
1105    )
1106}
1107
1108/// Core span-mapping logic shared by both fuzzy match levels.
1109/// Given a normalisation function, finds `search` inside `content` after
1110/// applying that function to both, then maps the result back to a byte
1111/// range in the original (un-normalised) `content`.
1112fn find_span_normalised(
1113    content: &str,
1114    search: &str,
1115    normalise: impl Fn(&str) -> String,
1116) -> Option<std::ops::Range<usize>> {
1117    let norm_content = normalise(content);
1118    let norm_search = normalise(search)
1119        .trim_start_matches('\n')
1120        .trim_end_matches('\n')
1121        .to_string();
1122
1123    if norm_search.is_empty() {
1124        return None;
1125    }
1126
1127    let norm_pos = norm_content.find(&norm_search)?;
1128
1129    let lines_before = norm_content[..norm_pos]
1130        .as_bytes()
1131        .iter()
1132        .filter(|&&b| b == b'\n')
1133        .count();
1134    let search_lines = norm_search
1135        .as_bytes()
1136        .iter()
1137        .filter(|&&b| b == b'\n')
1138        .count()
1139        + 1;
1140
1141    let orig_lines: Vec<&str> = content.lines().collect();
1142
1143    let mut current_pos = 0;
1144    for i in 0..lines_before {
1145        if i < orig_lines.len() {
1146            current_pos += orig_lines[i].len() + 1;
1147        }
1148    }
1149    let byte_start = current_pos;
1150
1151    let mut byte_len = 0;
1152    for i in 0..search_lines {
1153        let idx = lines_before + i;
1154        if idx < orig_lines.len() {
1155            byte_len += orig_lines[idx].len();
1156            if i < search_lines - 1 {
1157                byte_len += 1;
1158            }
1159        }
1160    }
1161
1162    if byte_start + byte_len > content.len() {
1163        return None;
1164    }
1165
1166    let candidate = &content[byte_start..byte_start + byte_len];
1167    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
1168        Some(byte_start..byte_start + byte_len)
1169    } else {
1170        None
1171    }
1172}
1173
1174/// Level 1 fuzzy: rstrip only — removes trailing whitespace per line but
1175/// preserves leading indentation. Catches trailing-space mismatches where
1176/// the model's indentation is actually correct.
1177fn rstrip_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1178    find_span_normalised(content, search, |s| {
1179        s.lines()
1180            .map(|l| l.trim_end())
1181            .collect::<Vec<_>>()
1182            .join("\n")
1183    })
1184}
1185
1186/// Level 2 fuzzy: full strip — trims all leading and trailing whitespace
1187/// per line. Catches indentation mismatches where the model wrote the
1188/// correct content but with wrong indent level.
1189fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1190    find_span_normalised(content, search, |s| {
1191        s.lines().map(|l| l.trim()).collect::<Vec<_>>().join("\n")
1192    })
1193}
1194
1195/// Scan source files in the workspace for a search string that failed to
1196/// match in the intended target file. Returns the first file path where
1197/// the string is found (after CRLF normalisation), capped at 100 files.
1198/// Used to generate a "did you mean this file?" hint in edit errors.
1199fn find_search_in_workspace(search: &str, skip_path: &str) -> Option<String> {
1200    let root = workspace_root();
1201    let norm_search = search.replace("\r\n", "\n");
1202    let mut checked = 0usize;
1203
1204    let walker = ignore::WalkBuilder::new(&root)
1205        .hidden(true)
1206        .ignore(true)
1207        .git_ignore(true)
1208        .build();
1209
1210    for entry in walker.flatten() {
1211        if checked >= 100 {
1212            break;
1213        }
1214        let path = entry.path();
1215        if !path.is_file() {
1216            continue;
1217        }
1218        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1219        if !matches!(
1220            ext,
1221            "rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h"
1222        ) {
1223            continue;
1224        }
1225        let rel = path
1226            .strip_prefix(&root)
1227            .unwrap_or(path)
1228            .to_string_lossy()
1229            .replace('\\', "/");
1230        if rel == skip_path {
1231            continue;
1232        }
1233        checked += 1;
1234        if let Ok(content) = std::fs::read_to_string(path) {
1235            let normalised = content.replace("\r\n", "\n");
1236            if normalised.contains(&norm_search) {
1237                return Some(rel);
1238            }
1239        }
1240    }
1241    None
1242}
1243
1244// ── Indent-aware replacement ──────────────────────────────────────────────────
1245
1246/// When the model's search string has different indentation than the actual file
1247/// content (fuzzy match succeeded), apply the same indentation delta to the
1248/// replace string so the replacement lands with correct indentation.
1249///
1250/// Example: model wrote search/replace with 0-space indent, file uses 8 spaces.
1251/// Delta = +8. Every line of replace gets 8 spaces prepended.
1252fn adjust_replace_indent(search: &str, file_span: &str, replace: &str) -> String {
1253    fn first_indent(s: &str) -> usize {
1254        s.lines()
1255            .find(|l| !l.trim().is_empty())
1256            .map(|l| l.len() - l.trim_start_matches(' ').len())
1257            .unwrap_or(0)
1258    }
1259
1260    let search_indent = first_indent(search);
1261    let file_indent = first_indent(file_span);
1262
1263    if search_indent == file_indent {
1264        return replace.to_string();
1265    }
1266
1267    let delta: i64 = file_indent as i64 - search_indent as i64;
1268    let trailing_newline = replace.ends_with('\n');
1269
1270    let adjusted: Vec<String> = replace
1271        .lines()
1272        .map(|line| {
1273            if line.trim().is_empty() {
1274                // Preserve blank lines as-is
1275                line.to_string()
1276            } else {
1277                let current_indent = line.len() - line.trim_start_matches(' ').len();
1278                let new_indent = (current_indent as i64 + delta).max(0) as usize;
1279                format!("{}{}", " ".repeat(new_indent), line.trim_start_matches(' '))
1280            }
1281        })
1282        .collect();
1283
1284    let mut result = adjusted.join("\n");
1285    if trailing_newline {
1286        result.push('\n');
1287    }
1288    result
1289}
1290
1291// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
1292
1293/// Return a formatted diff string for an edit_file operation without applying it.
1294/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1295/// search string cannot be located (caller falls through to normal tool dispatch).
1296pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1297    let path = require_str(args, "path")?;
1298    let search = require_str(args, "search")?;
1299    let replace = require_str(args, "replace")?;
1300
1301    let abs = safe_path(path)?;
1302    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1303    let original = raw.replace("\r\n", "\n");
1304
1305    let (effective_search, effective_replace): (String, String) = if original.contains(search) {
1306        (search.to_string(), replace.to_string())
1307    } else {
1308        let span =
1309            rstrip_find_span(&original, search).or_else(|| fuzzy_find_span(&original, search));
1310        match span {
1311            Some(span) => {
1312                let real_slice = original[span].to_string();
1313                let adjusted = adjust_replace_indent(search, &real_slice, replace);
1314                (real_slice, adjusted)
1315            }
1316            None => return Err("search string not found — diff preview unavailable".into()),
1317        }
1318    };
1319
1320    let mut diff = String::new();
1321    for line in effective_search.lines() {
1322        diff.push_str(&format!("- {}\n", line));
1323    }
1324    for line in effective_replace.lines() {
1325        diff.push_str(&format!("+ {}\n", line));
1326    }
1327    Ok(diff)
1328}
1329
1330/// Return a formatted diff string for a patch_hunk operation without applying it.
1331pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1332    let path = require_str(args, "path")?;
1333    let start_line = require_usize(args, "start_line")?;
1334    let end_line = require_usize(args, "end_line")?;
1335    let replacement = require_str(args, "replacement")?;
1336
1337    let abs = safe_path(path)?;
1338    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1339    let lines: Vec<&str> = original.lines().collect();
1340    let total = lines.len();
1341
1342    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1343        return Err(format!(
1344            "patch_hunk: invalid line range {}-{} for file with {} lines",
1345            start_line, end_line, total
1346        ));
1347    }
1348
1349    let s_idx = start_line - 1;
1350    let e_idx = end_line;
1351
1352    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1353    for i in s_idx..e_idx {
1354        diff.push_str(&format!("- {}\n", lines[i].trim_end()));
1355    }
1356    for line in replacement.lines() {
1357        diff.push_str(&format!("+ {}\n", line.trim_end()));
1358    }
1359    Ok(diff)
1360}
1361
1362/// Return a formatted diff string for a multi_search_replace operation without applying it.
1363pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1364    let hunks_val = args
1365        .get("hunks")
1366        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1367
1368    #[derive(serde::Deserialize)]
1369    struct PreviewHunk {
1370        search: String,
1371        replace: String,
1372    }
1373    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1374        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1375
1376    let mut diff = String::new();
1377    for (i, hunk) in hunks.iter().enumerate() {
1378        if hunks.len() > 1 {
1379            diff.push_str(&format!("@@ hunk {} @@\n", i + 1));
1380        }
1381        for line in hunk.search.lines() {
1382            diff.push_str(&format!("- {}\n", line.trim_end()));
1383        }
1384        for line in hunk.replace.lines() {
1385            diff.push_str(&format!("+ {}\n", line.trim_end()));
1386        }
1387    }
1388    Ok(diff)
1389}
1390
1391/// Resolve the workspace root by looking upward for common markers.
1392pub fn workspace_root() -> PathBuf {
1393    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1394    loop {
1395        if current.join(".git").exists()
1396            || current.join("Cargo.toml").exists()
1397            || current.join("package.json").exists()
1398        {
1399            return current;
1400        }
1401        if !current.pop() {
1402            break;
1403        }
1404    }
1405    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1406}
1407
1408/// Returns true if `path` is a known sovereign OS directory (Desktop, Downloads,
1409/// Documents, Pictures, Videos, Music). These directories should not accumulate
1410/// `.hematite/` workspace state — they use the global `~/.hematite/` instead.
1411pub fn is_sovereign_directory(path: &Path) -> bool {
1412    let candidates = [
1413        dirs::desktop_dir(),
1414        dirs::download_dir(),
1415        dirs::document_dir(),
1416        dirs::picture_dir(),
1417        dirs::video_dir(),
1418        dirs::audio_dir(),
1419    ];
1420    candidates
1421        .iter()
1422        .filter_map(|d| d.as_deref())
1423        .any(|d| d == path)
1424}
1425
1426/// Returns the directory where Hematite's runtime state (`.hematite/`) should live.
1427///
1428/// - In sovereign OS directories (Desktop, Downloads, Documents, Pictures, Videos,
1429///   Music): returns `~/.hematite/` so no workspace folder is created there.
1430/// - Everywhere else: returns `workspace_root()/.hematite/` as normal.
1431pub fn hematite_dir() -> PathBuf {
1432    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1433    if is_sovereign_directory(&cwd) {
1434        if let Some(home) = dirs::home_dir() {
1435            return home.join(".hematite");
1436        }
1437    }
1438    workspace_root().join(".hematite")
1439}
1440
1441/// Returns true if the workspace root looks like a real project.
1442/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1443/// count — at least one explicit build/package marker must also be present.
1444pub fn is_project_workspace() -> bool {
1445    let root = workspace_root();
1446    let has_explicit_marker = root.join("Cargo.toml").exists()
1447        || root.join("package.json").exists()
1448        || root.join("pyproject.toml").exists()
1449        || root.join("go.mod").exists()
1450        || root.join("setup.py").exists()
1451        || root.join("pom.xml").exists()
1452        || root.join("build.gradle").exists()
1453        || root.join("CMakeLists.txt").exists();
1454    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1455}
1456
1457// ── open_in_system_editor ───────────────────────────────────────────────────
1458
1459pub fn open_in_system_editor(path: &std::path::Path) -> Result<(), String> {
1460    if !path.exists() {
1461        return Err(format!("File not found: {}", path.display()));
1462    }
1463
1464    #[cfg(target_os = "windows")]
1465    {
1466        // On Windows, 'start' is the most reliable way to open a file in the default associated app.
1467        // We use cmd /c start so it handles spaces and associations properly.
1468        let status = std::process::Command::new("cmd")
1469            .args(["/c", "start", "", &path.to_string_lossy()])
1470            .status()
1471            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1472
1473        if !status.success() {
1474            return Err("Editor command failed to start.".into());
1475        }
1476    }
1477
1478    #[cfg(target_os = "macos")]
1479    {
1480        let status = std::process::Command::new("open")
1481            .arg(path)
1482            .status()
1483            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1484
1485        if !status.success() {
1486            return Err("open command failed.".into());
1487        }
1488    }
1489
1490    #[cfg(all(unix, not(target_os = "macos")))]
1491    {
1492        // Try xdg-open on Linux
1493        let status = std::process::Command::new("xdg-open")
1494            .arg(path)
1495            .status()
1496            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1497
1498        if !status.success() {
1499            return Err("xdg-open failed.".into());
1500        }
1501    }
1502
1503    Ok(())
1504}