hematite/tools/
file_ops.rs

1use std::fmt::Write as _;
2
3use crate::agent::truncation::safe_head;
4use serde_json::Value;
5use std::fs;
6use std::io;
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9use walkdir::WalkDir;
10
11// ── Symlink-safe write ────────────────────────────────────────────────────────
12
13/// Write `content` to `path`, refusing to follow a symlink.
14/// On platforms that support symlink detection, returns an error if `path` is
15/// already a symlink so malicious workspace symlinks cannot redirect writes to
16/// arbitrary files. Falls back to a plain `fs::write` on platforms or edge
17/// cases where symlink detection is unavailable.
18pub fn safe_write(path: &Path, content: impl AsRef<[u8]>) -> io::Result<()> {
19    // `symlink_metadata` does NOT follow symlinks — it describes the link itself.
20    if let Ok(meta) = fs::symlink_metadata(path) {
21        if meta.file_type().is_symlink() {
22            return Err(io::Error::new(
23                io::ErrorKind::InvalidInput,
24                format!(
25                    "Refused to write to '{}': path is a symlink",
26                    path.display()
27                ),
28            ));
29        }
30    }
31    fs::write(path, content)
32}
33
34// ── Ghost Ledger ──────────────────────────────────────────────────────────────
35
36const MAX_GHOST_BACKUPS: usize = 8;
37
38fn prune_ghost_backups(ghost_dir: &Path) {
39    let Ok(entries) = fs::read_dir(ghost_dir) else {
40        return;
41    };
42
43    let mut backups: Vec<_> = entries
44        .filter_map(Result::ok)
45        .filter(|entry| {
46            entry
47                .path()
48                .extension()
49                .and_then(|ext| ext.to_str())
50                .map(|ext| ext.eq_ignore_ascii_case("bak"))
51                .unwrap_or(false)
52        })
53        .collect();
54
55    backups.sort_by_key(|entry| entry.metadata().and_then(|meta| meta.modified()).ok());
56    backups.reverse();
57
58    let retained: std::collections::HashSet<String> = backups
59        .iter()
60        .take(MAX_GHOST_BACKUPS)
61        .map(|entry| entry.path().to_string_lossy().replace('\\', "/"))
62        .collect();
63
64    for entry in backups.into_iter().skip(MAX_GHOST_BACKUPS) {
65        let _ = fs::remove_file(entry.path());
66    }
67
68    let ledger_path = ghost_dir.join("ledger.txt");
69    let Ok(content) = fs::read_to_string(&ledger_path) else {
70        return;
71    };
72
73    let mut rewritten = String::with_capacity(content.len());
74    for line in content.lines() {
75        let mut parts = line.splitn(2, '|');
76        if parts.next().is_some() {
77            if let Some(rest) = parts.next() {
78                let backup_path = rest.replace('\\', "/");
79                if retained.contains(&backup_path) {
80                    rewritten.push_str(line);
81                    rewritten.push('\n');
82                }
83            }
84        }
85    }
86    let _ = fs::write(ledger_path, rewritten);
87}
88
89fn save_ghost_backup(target_path: &str, content: &str) {
90    let ws = workspace_root();
91
92    // Phase 1: Try Git Ghost Snapshot
93    if crate::agent::git::is_git_repo(&ws) {
94        let _ = crate::agent::git::create_ghost_snapshot(&ws);
95    }
96
97    // Phase 2: Fallback to local file backup (Ghost Ledger)
98    let ghost_dir = hematite_dir().join("ghost");
99    let _ = fs::create_dir_all(&ghost_dir);
100    let ts = std::time::SystemTime::now()
101        .duration_since(std::time::UNIX_EPOCH)
102        .unwrap()
103        .as_millis();
104    let safe_name = Path::new(target_path)
105        .file_name()
106        .unwrap_or_default()
107        .to_string_lossy();
108    let backup_file = ghost_dir.join(format!("{}_{}.bak", ts, safe_name));
109
110    if fs::write(&backup_file, content).is_ok() {
111        use std::io::Write;
112        if let Ok(mut f) = fs::OpenOptions::new()
113            .create(true)
114            .append(true)
115            .open(ghost_dir.join("ledger.txt"))
116        {
117            let _ = writeln!(f, "{}|{}", target_path, backup_file.display());
118        }
119        prune_ghost_backups(&ghost_dir);
120    }
121}
122
123pub fn pop_ghost_ledger() -> Result<String, String> {
124    let ghost_dir = hematite_dir().join("ghost");
125    let ledger_path = ghost_dir.join("ledger.txt");
126
127    if !ledger_path.exists() {
128        return Err("Ghost Ledger is empty — no edits to undo".into());
129    }
130
131    let content = fs::read_to_string(&ledger_path).map_err(|e| e.to_string())?;
132    let mut lines: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
133
134    if lines.is_empty() {
135        return Err("Ghost Ledger is empty".into());
136    }
137
138    let last_line = lines.pop().unwrap();
139    let Some((target_path, backup_path)) = last_line.split_once('|') else {
140        return Err("Corrupted ledger entry".into());
141    };
142
143    let ws = workspace_root();
144
145    // Priority 1: Try Git Rollback
146    if crate::agent::git::is_git_repo(&ws) {
147        if let Ok(msg) = crate::agent::git::revert_from_ghost(&ws, target_path) {
148            let _ = fs::remove_file(backup_path);
149            let new_ledger = lines.join("\n");
150            let _ = fs::write(
151                &ledger_path,
152                if new_ledger.is_empty() {
153                    String::new()
154                } else {
155                    new_ledger + "\n"
156                },
157            );
158            return Ok(msg);
159        }
160    }
161
162    // Priority 2: Standard File Rollback
163    let original_content =
164        fs::read_to_string(backup_path).map_err(|e| format!("Failed to read backup: {e}"))?;
165    let abs_target = ws.join(target_path);
166    fs::write(&abs_target, original_content).map_err(|e| format!("Failed to restore file: {e}"))?;
167
168    let new_ledger = lines.join("\n");
169    let _ = fs::write(
170        &ledger_path,
171        if new_ledger.is_empty() {
172            String::new()
173        } else {
174            new_ledger + "\n"
175        },
176    );
177    let _ = fs::remove_file(backup_path);
178
179    Ok(format!("Restored {} from Ghost Ledger", target_path))
180}
181
182// ── read_file ─────────────────────────────────────────────────────────────────
183
184pub async fn read_file(args: &Value, budget_tokens: usize) -> Result<String, String> {
185    let path = require_str(args, "path")?;
186    let offset = get_usize_arg(args, "offset");
187    let limit = get_usize_arg(args, "limit");
188
189    let abs = safe_path(path)?;
190    let raw = fs::read_to_string(&abs).map_err(|e| format!("read_file: {e} ({path})"))?;
191
192    let lines: Vec<&str> = raw.lines().collect();
193    let total = lines.len();
194    let start = offset.unwrap_or(0).min(total);
195    let end = limit.map(|n| (start + n).min(total)).unwrap_or(total);
196
197    let mut content = lines[start..end].join("\n");
198
199    // Phase 5: Calculate predictive character budget based on remaining context.
200    let budget_chars = budget_tokens.saturating_mul(4);
201    let char_limit = if budget_tokens == 0 {
202        100_000
203    } else {
204        budget_chars.clamp(2000, 100_000)
205    };
206
207    if content.len() > char_limit {
208        let safe_end = safe_head(&content, char_limit).len();
209        content.truncate(safe_end);
210        content.push_str("\n\n--- [PREDICTIVE TRUNCATION: CONTEXT BUDGET REACHED] ---\n");
211        let _ = write!(
212            content,
213            "Output truncated at {} chars to prevent context window flooding. ",
214            char_limit
215        );
216        content
217            .push_str("To see more, use `read_file` with a higher `offset` and a smaller `limit`.");
218    } else if end < total {
219        content.push_str("\n\n--- [TRUNCATION WARNING] ---\n");
220        let _ = write!(content, "This file has {} more lines below. ", total - end);
221        content.push_str("To read more, use `read_file` with a higher `offset` OR use `inspect_lines` to find relevant blocks. \
222                         Do NOT attempt to read the entire large file at once if it keeps truncating.");
223    }
224
225    Ok(format!(
226        "[{path}  lines {}-{} of {}]\n{}",
227        start + 1,
228        end,
229        total,
230        content
231    ))
232}
233
234// ── inspect_lines ─────────────────────────────────────────────────────────────
235
236pub async fn inspect_lines(args: &Value) -> Result<String, String> {
237    let path = require_str(args, "path")?;
238    let start_line = get_usize_arg(args, "start_line").unwrap_or(1);
239    let end_line = get_usize_arg(args, "end_line");
240
241    let abs = safe_path(path)?;
242    let raw = fs::read_to_string(&abs).map_err(|e| format!("inspect_lines: {e} ({path})"))?;
243
244    let lines: Vec<&str> = raw.lines().collect();
245    let total_lines = lines.len();
246
247    // Out-of-bounds check with descriptive feedback.
248    if start_line > total_lines && total_lines > 0 {
249        return Err(format!(
250            "Invalid line range: You requested line {}, but the file only has {} lines. Try `read_file` on a smaller range or the whole file.",
251            start_line, total_lines
252        ));
253    }
254
255    let start = start_line.saturating_sub(1).min(total_lines);
256    let end = end_line.unwrap_or(total_lines).min(total_lines);
257
258    if start >= end && total_lines > 0 {
259        return Err(format!(
260            "inspect_lines: start_line ({start_line}) must be <= end_line ({})",
261            end_line.unwrap_or(total_lines)
262        ));
263    }
264
265    let mut output = format!(
266        "[inspect_lines: {path} lines {}-{} of {}]\n",
267        start + 1,
268        end,
269        total_lines
270    );
271    for (offset, line) in lines[start..end].iter().enumerate() {
272        let _ = writeln!(output, "[{:>4}] | {}", start + offset + 1, line);
273    }
274
275    Ok(output)
276}
277
278// ── tail_file ─────────────────────────────────────────────────────────────────
279
280pub async fn tail_file(args: &Value) -> Result<String, String> {
281    let path = require_str(args, "path")?;
282    let n = args
283        .get("lines")
284        .and_then(|v| v.as_u64())
285        .unwrap_or(50)
286        .min(500) as usize;
287    let grep_pat = args.get("grep").and_then(|v| v.as_str());
288
289    let abs = safe_path(path)?;
290    let raw = fs::read_to_string(&abs).map_err(|e| format!("tail_file: {e} ({path})"))?;
291
292    let all_lines: Vec<&str> = raw.lines().collect();
293    let total = all_lines.len();
294
295    // Apply optional grep filter before slicing — model asks for the last N
296    // matching lines, not the last N lines containing maybe 0 matches.
297    let filtered: Vec<(usize, &str)> = if let Some(pat) = grep_pat {
298        let re = regex::Regex::new(pat)
299            .map_err(|e| format!("tail_file: invalid grep pattern '{pat}': {e}"))?;
300        all_lines
301            .iter()
302            .enumerate()
303            .filter(|(_, l)| re.is_match(l))
304            .map(|(i, l)| (i, *l))
305            .collect()
306    } else {
307        all_lines.iter().enumerate().map(|(i, l)| (i, *l)).collect()
308    };
309
310    let total_filtered = filtered.len();
311    let skip = total_filtered.saturating_sub(n);
312    let window = &filtered[skip..];
313
314    if window.is_empty() {
315        let note = if let Some(pat) = grep_pat {
316            format!(" matching '{pat}'")
317        } else {
318            String::new()
319        };
320        return Ok(format!(
321            "[tail_file: {path} — no lines{note} found (total {total} lines)]"
322        ));
323    }
324
325    let first_abs = window[0].0 + 1;
326    let last_abs = window[window.len() - 1].0 + 1;
327    let mut out = format!(
328        "[tail_file: {path} — lines {first_abs}–{last_abs} of {total} (last {n} of {total_filtered} matched)]\n"
329    );
330    for (abs_idx, line) in window {
331        let _ = writeln!(out, "[{:>5}] {}", abs_idx + 1, line);
332    }
333
334    Ok(out)
335}
336
337// ── write_file ────────────────────────────────────────────────────────────────
338
339pub async fn write_file(args: &Value) -> Result<String, String> {
340    let path = require_str(args, "path")?;
341    let content = require_str(args, "content")?;
342
343    let abs = safe_path_allow_new(path)?;
344    if let Some(parent) = abs.parent() {
345        fs::create_dir_all(parent)
346            .map_err(|e| format!("write_file: could not create dirs: {e}"))?;
347    }
348
349    let existed = abs.exists();
350    if existed {
351        if let Ok(orig) = fs::read_to_string(&abs) {
352            save_ghost_backup(path, &orig);
353        }
354    }
355
356    fs::write(&abs, content).map_err(|e| format!("write_file: {e} ({path})"))?;
357
358    let action = if existed { "Updated" } else { "Created" };
359    Ok(format!("{action} {path}  ({} bytes)", content.len()))
360}
361
362// ── edit_file ─────────────────────────────────────────────────────────────────
363
364pub async fn edit_file(args: &Value) -> Result<String, String> {
365    let path = require_str(args, "path")?;
366    let search = require_str(args, "search")?;
367    let replace = require_str(args, "replace")?;
368    let replace_all = args
369        .get("replace_all")
370        .and_then(|v| v.as_bool())
371        .unwrap_or(false);
372
373    if search == replace {
374        return Err("edit_file: 'search' and 'replace' are identical — no change needed".into());
375    }
376
377    let abs = safe_path(path)?;
378    let raw = fs::read_to_string(&abs).map_err(|e| format!("edit_file: {e} ({path})"))?;
379    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
380    let original = raw.replace("\r\n", "\n");
381
382    save_ghost_backup(path, &original);
383
384    let search_trimmed = search.trim();
385    let search_non_ws_len = search_trimmed
386        .chars()
387        .filter(|c| !c.is_whitespace())
388        .count();
389    let search_line_count = search_trimmed.lines().count();
390    if search_non_ws_len < 12 && search_line_count <= 1 {
391        return Err(format!(
392            "edit_file: search string is too short or generic for a safe mutation in {path}.\n\
393             Provide a more specific anchor (prefer a full line, multiple lines, or use `inspect_lines` + `patch_hunk`)."
394        ));
395    }
396
397    // ── Exact match first ────────────────────────────────────────────────────
398    let (effective_search, was_repaired) = if original.contains(search) {
399        let exact_match_count = original.matches(search).count();
400        if exact_match_count > 1 && !replace_all {
401            return Err(format!(
402                "edit_file: search string matched {} times in {path}.\n\
403                 Provide a more specific unique anchor or use `inspect_lines` + `patch_hunk`.",
404                exact_match_count
405            ));
406        }
407        (search.to_string(), false)
408    } else {
409        // ── Fuzzy repair: progressive normalisation ───────────────────────
410        // Level 1: rstrip only — preserves indentation, strips trailing spaces.
411        // Level 2: indent-flexible — dedent both sides, preserve relative structure.
412        // Level 3: full strip — last resort before cross-file hint.
413        let span = rstrip_find_span(&original, search)
414            .or_else(|| indent_flexible_find_span(&original, search))
415            .or_else(|| fuzzy_find_span(&original, search));
416        match span {
417            Some(span) => {
418                let real_slice = original[span.clone()].to_string();
419                (real_slice, true)
420            }
421            None => {
422                let hint = nearest_lines(&original, search);
423                let cross_hint = find_search_in_workspace(search, path)
424                    .map(|found| format!("\nNote: search string found in '{found}' — did you mean to edit that file?"))
425                    .unwrap_or_default();
426                return Err(format!(
427                    "edit_file: search string not found in {path}.\n\
428                     The 'search' value must match the file content exactly \
429                     (including whitespace/indentation).\n\
430                     {hint}{cross_hint}"
431                ));
432            }
433        }
434    };
435
436    // When a fuzzy match was used, adjust the replace string's indentation to
437    // match the file's actual indent level (not the model's potentially-wrong indent).
438    let effective_replace = if was_repaired {
439        adjust_replace_indent(search, effective_search.as_str(), replace)
440    } else {
441        replace.to_string()
442    };
443
444    let updated = if replace_all {
445        original.replace(effective_search.as_str(), effective_replace.as_str())
446    } else {
447        original.replacen(effective_search.as_str(), effective_replace.as_str(), 1)
448    };
449
450    fs::write(&abs, &updated).map_err(|e| format!("edit_file: write failed: {e}"))?;
451
452    let removed = original.lines().count();
453    let added = updated.lines().count();
454    let repair_note = if was_repaired {
455        "  [indent auto-corrected]"
456    } else {
457        ""
458    };
459
460    let mut diff_block =
461        String::with_capacity(effective_search.len() + effective_replace.len() + 32);
462    diff_block.push_str("\n--- DIFF \n");
463    for line in effective_search.lines() {
464        let _ = writeln!(diff_block, "- {}", line);
465    }
466    for line in effective_replace.lines() {
467        let _ = writeln!(diff_block, "+ {}", line);
468    }
469
470    Ok(format!(
471        "Edited {path}  ({} -> {} lines){repair_note}{}",
472        removed, added, diff_block
473    ))
474}
475
476// ── patch_hunk ────────────────────────────────────────────────────────────────
477
478pub async fn patch_hunk(args: &Value) -> Result<String, String> {
479    let path = require_str(args, "path")?;
480    let start_line = require_usize(args, "start_line")?;
481    let end_line = require_usize(args, "end_line")?;
482    let replacement = require_str(args, "replacement")?;
483
484    let abs = safe_path(path)?;
485    let original = fs::read_to_string(&abs).map_err(|e| format!("patch_hunk: {e} ({path})"))?;
486
487    save_ghost_backup(path, &original);
488
489    let lines: Vec<String> = original.lines().map(|s| s.to_string()).collect();
490    let total = lines.len();
491
492    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
493        return Err(format!(
494            "patch_hunk: invalid line range {}-{} for file with {} lines",
495            start_line, end_line, total
496        ));
497    }
498
499    let mut updated_lines = Vec::with_capacity(total);
500    // 0-indexed adjustment
501    let s_idx = start_line - 1;
502    let e_idx = end_line; // inclusive in current logic from 1-based start_line..end_line
503
504    // 1. Lines before the hunk
505    updated_lines.extend_from_slice(&lines[0..s_idx]);
506
507    // 2. The hunk replacement
508    for line in replacement.lines() {
509        updated_lines.push(line.to_string());
510    }
511
512    // 3. Lines after the hunk
513    if e_idx < total {
514        updated_lines.extend_from_slice(&lines[e_idx..total]);
515    }
516
517    let updated_content = updated_lines.join("\n");
518    fs::write(&abs, &updated_content).map_err(|e| format!("patch_hunk: write failed: {e}"))?;
519
520    let mut diff = String::with_capacity(replacement.len() + (e_idx - s_idx) * 64 + 32);
521    diff.push_str("\n--- HUNK DIFF ---\n");
522    for line in &lines[s_idx..e_idx] {
523        let _ = writeln!(diff, "- {}", line.trim_end());
524    }
525    for line in replacement.lines() {
526        let _ = writeln!(diff, "+ {}", line.trim_end());
527    }
528
529    Ok(format!(
530        "Patched {path} lines {}-{} ({} -> {} lines){}",
531        start_line,
532        end_line,
533        (e_idx - s_idx),
534        replacement.lines().count(),
535        diff
536    ))
537}
538
539// ── multi_search_replace ──────────────────────────────────────────────────────
540
541#[derive(serde::Deserialize)]
542struct SearchReplaceHunk {
543    search: String,
544    replace: String,
545}
546
547pub async fn multi_search_replace(args: &Value) -> Result<String, String> {
548    let path = require_str(args, "path")?;
549    let hunks_val = args
550        .get("hunks")
551        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
552
553    let hunks: Vec<SearchReplaceHunk> = serde_json::from_value(hunks_val.clone())
554        .map_err(|e| format!("multi_search_replace: invalid hunks array: {e}"))?;
555
556    if hunks.is_empty() {
557        return Err("multi_search_replace: hunks array is empty".to_string());
558    }
559
560    let abs = safe_path(path)?;
561    let raw =
562        fs::read_to_string(&abs).map_err(|e| format!("multi_search_replace: {e} ({path})"))?;
563    // Normalize CRLF → LF so search strings from the model (always LF) match on Windows.
564    let original = raw.replace("\r\n", "\n");
565
566    save_ghost_backup(path, &original);
567
568    let mut current_content = original.clone();
569    let mut diff = String::with_capacity(hunks.len() * 128 + 32);
570    diff.push_str("\n--- SEARCH & REPLACE DIFF ---\n");
571
572    let mut patched_hunks = 0;
573
574    for (i, hunk) in hunks.iter().enumerate() {
575        let match_count = current_content.matches(&hunk.search).count();
576
577        let (effective_search, effective_replace) = if match_count == 1 {
578            // Exact match — use as-is.
579            (hunk.search.clone(), hunk.replace.clone())
580        } else if match_count == 0 {
581            // Progressive fuzzy fallback: rstrip → indent-flexible → full-strip.
582            let span = rstrip_find_span(&current_content, &hunk.search)
583                .or_else(|| indent_flexible_find_span(&current_content, &hunk.search))
584                .or_else(|| fuzzy_find_span(&current_content, &hunk.search));
585            match span {
586                Some(span) => {
587                    let real_slice = current_content[span].to_string();
588                    let adjusted_replace =
589                        adjust_replace_indent(&hunk.search, &real_slice, &hunk.replace);
590                    (real_slice, adjusted_replace)
591                }
592                None => {
593                    return Err(format!(
594                        "multi_search_replace: hunk {} search string not found in file.",
595                        i
596                    ));
597                }
598            }
599        } else {
600            return Err(format!(
601                "multi_search_replace: hunk {} search string matched {} times. Provide more context to make it unique.",
602                i, match_count
603            ));
604        };
605
606        let _ = write!(diff, "\n@@ Hunk {} @@\n", i + 1);
607        for line in effective_search.lines() {
608            let _ = writeln!(diff, "- {}", line.trim_end());
609        }
610        for line in effective_replace.lines() {
611            let _ = writeln!(diff, "+ {}", line.trim_end());
612        }
613
614        current_content = current_content.replacen(&effective_search, &effective_replace, 1);
615        patched_hunks += 1;
616    }
617
618    fs::write(&abs, &current_content)
619        .map_err(|e| format!("multi_search_replace: write failed: {e}"))?;
620
621    Ok(format!(
622        "Modified {} hunks in {} using exact search-and-replace.{}",
623        patched_hunks, path, diff
624    ))
625}
626
627// ── list_files ────────────────────────────────────────────────────────────────
628
629pub async fn list_files(args: &Value, budget: usize) -> Result<String, String> {
630    let char_budget = budget * 4; // Approx tokens to chars
631    let started = Instant::now();
632    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
633    let ext_filter = args.get("extension").and_then(|v| v.as_str());
634
635    let base = safe_path(base_str)?;
636
637    let mut files: Vec<PathBuf> = Vec::new();
638    let mut scanned_count = 0;
639    for entry in WalkDir::new(&base).follow_links(false) {
640        scanned_count += 1;
641        if scanned_count > 25_000 {
642            return Err("list_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
643        }
644        let entry = entry.map_err(|e| format!("list_files: {e}"))?;
645        if !entry.file_type().is_file() {
646            continue;
647        }
648        let p = entry.path();
649
650        // Skip hidden dirs / target / node_modules
651        if path_has_hidden_segment(p) {
652            continue;
653        }
654
655        if let Some(ext) = ext_filter {
656            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
657                continue;
658            }
659        }
660        files.push(p.to_path_buf());
661    }
662
663    // Sort by modification time (newest first).
664    files.sort_by_key(|p| {
665        fs::metadata(p)
666            .and_then(|m| m.modified())
667            .ok()
668            .map(std::cmp::Reverse)
669    });
670
671    let mut current_chars = 0;
672    let mut shown = Vec::with_capacity(files.len().min(200));
673    let mut truncated_by_budget = false;
674
675    let total_scanned = files.len();
676    for f in files {
677        let f_str = f.display().to_string();
678        if current_chars + f_str.len() + 1 > char_budget {
679            truncated_by_budget = true;
680            break;
681        }
682        current_chars += f_str.len() + 1;
683        shown.push(f_str);
684        if shown.len() >= 200 {
685            break;
686        }
687    }
688
689    let truncated = total_scanned > shown.len();
690
691    let ms = started.elapsed().as_millis();
692    let mut out = format!(
693        "{} file(s) in {}  ({ms}ms){}",
694        shown.len(),
695        base_str,
696        if truncated {
697            if truncated_by_budget {
698                "  [truncated by token budget]"
699            } else {
700                "  [truncated at 200]"
701            }
702        } else {
703            ""
704        }
705    );
706    out.push('\n');
707    out.push_str(&shown.join("\n"));
708    Ok(out)
709}
710
711// ── create_directory ──────────────────────────────────────────────────────────
712
713pub async fn create_directory(args: &Value) -> Result<String, String> {
714    let path = require_str(args, "path")?;
715    let abs = safe_path_allow_new(path)?;
716
717    if abs.exists() {
718        if abs.is_dir() {
719            return Ok(format!("Directory already exists: {path}"));
720        } else {
721            return Err(format!("A file already exists at this path: {path}"));
722        }
723    }
724
725    fs::create_dir_all(&abs).map_err(|e| format!("create_directory: {e} ({path})"))?;
726    Ok(format!("Created directory: {path}"))
727}
728
729// ── grep_files ────────────────────────────────────────────────────────────────
730
731pub async fn grep_files(args: &Value, budget: usize) -> Result<String, String> {
732    let char_budget = budget * 4;
733    let pattern = require_str(args, "pattern")?;
734    let base_str = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
735    let ext_filter = args.get("extension").and_then(|v| v.as_str());
736    let case_insensitive = args
737        .get("case_insensitive")
738        .and_then(|v| v.as_bool())
739        .unwrap_or(true);
740    let files_only = args.get("mode").and_then(|v| v.as_str()) == Some("files_only");
741    let head_limit = get_usize_arg(args, "head_limit").unwrap_or(50);
742    let offset = get_usize_arg(args, "offset").unwrap_or(0);
743
744    // Context lines: `context` sets both before+after; `before`/`after` override individually.
745    let ctx_default = get_usize_arg(args, "context").unwrap_or(0);
746    let before = get_usize_arg(args, "before").unwrap_or(ctx_default);
747    let after = get_usize_arg(args, "after").unwrap_or(ctx_default);
748
749    let base = safe_path(base_str)?;
750
751    let regex = regex::RegexBuilder::new(pattern)
752        .case_insensitive(case_insensitive)
753        .build()
754        .map_err(|e| format!("grep_files: invalid pattern '{pattern}': {e}"))?;
755
756    // ── files_only mode ───────────────────────────────────────────────────────
757    if files_only {
758        let mut matched_files: Vec<String> = Vec::new();
759        let mut scanned_count = 0;
760
761        for entry in WalkDir::new(&base).follow_links(false) {
762            scanned_count += 1;
763            if scanned_count > 25_000 {
764                return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
765            }
766            let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
767            if !entry.file_type().is_file() {
768                continue;
769            }
770            let p = entry.path();
771            if path_has_hidden_segment(p) {
772                continue;
773            }
774            if let Some(ext) = ext_filter {
775                if p.extension().and_then(|s| s.to_str()) != Some(ext) {
776                    continue;
777                }
778            }
779            let Ok(contents) = fs::read_to_string(p) else {
780                continue;
781            };
782            if contents.lines().any(|line| regex.is_match(line)) {
783                matched_files.push(p.display().to_string());
784            }
785        }
786
787        if matched_files.is_empty() {
788            return Ok(format!("No files matching '{pattern}' in {base_str}"));
789        }
790
791        let total = matched_files.len();
792        let page: Vec<_> = matched_files
793            .into_iter()
794            .skip(offset)
795            .take(head_limit)
796            .collect();
797        let showing = page.len();
798
799        let mut out = format!("{total} file(s) match '{pattern}'");
800        if offset > 0 || showing < total {
801            let _ = write!(
802                out,
803                " [showing {}-{} of {total}]",
804                offset + 1,
805                offset + showing
806            );
807        }
808        out.push('\n');
809
810        let mut current_chars = out.len();
811        let mut shown_pages = Vec::with_capacity(page.len());
812        for p in page {
813            if current_chars + p.len() + 1 > char_budget {
814                out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
815                break;
816            }
817            current_chars += p.len() + 1;
818            shown_pages.push(p);
819        }
820        out.push_str(&shown_pages.join("\n"));
821        return Ok(out);
822    }
823
824    // ── content mode with optional context lines ──────────────────────────────
825
826    // A "hunk" is a contiguous run of lines to display for one or more nearby matches.
827    struct Hunk {
828        path: String,
829        /// (line_number_1_indexed, line_text, is_match)
830        lines: Vec<(usize, String, bool)>,
831    }
832
833    let mut hunks: Vec<Hunk> = Vec::new();
834    let mut total_matches = 0usize;
835    let mut files_matched = 0usize;
836    let mut scanned_count = 0;
837
838    for entry in WalkDir::new(&base).follow_links(false) {
839        scanned_count += 1;
840        if scanned_count > 25_000 {
841            return Err("grep_files: Too many files scanned (>25,000). The path is too broad. Narrow your search path or run Hematite directly in a project directory.".into());
842        }
843        let entry = entry.map_err(|e| format!("grep_files: {e}"))?;
844        if !entry.file_type().is_file() {
845            continue;
846        }
847        let p = entry.path();
848        if path_has_hidden_segment(p) {
849            continue;
850        }
851        if let Some(ext) = ext_filter {
852            if p.extension().and_then(|s| s.to_str()) != Some(ext) {
853                continue;
854            }
855        }
856        let Ok(contents) = fs::read_to_string(p) else {
857            continue;
858        };
859        let all_lines: Vec<&str> = contents.lines().collect();
860        let n = all_lines.len();
861
862        // Find all match indices in this file.
863        let match_idxs: Vec<usize> = all_lines
864            .iter()
865            .enumerate()
866            .filter(|(_, line)| regex.is_match(line))
867            .map(|(i, _)| i)
868            .collect();
869
870        if match_idxs.is_empty() {
871            continue;
872        }
873        files_matched += 1;
874        total_matches += match_idxs.len();
875
876        // Merge overlapping ranges into hunks.
877        let path_str = p.display().to_string();
878        let mut ranges: Vec<(usize, usize)> = match_idxs
879            .iter()
880            .map(|&i| {
881                (
882                    i.saturating_sub(before),
883                    (i + after).min(n.saturating_sub(1)),
884                )
885            })
886            .collect();
887
888        // Sort and merge overlapping ranges.
889        ranges.sort_unstable();
890        let mut merged: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
891        for (s, e) in ranges {
892            if let Some(last) = merged.last_mut() {
893                if s <= last.1 + 1 {
894                    last.1 = last.1.max(e);
895                    continue;
896                }
897            }
898            merged.push((s, e));
899        }
900
901        // Build hunks from merged ranges.
902        let match_set: std::collections::HashSet<usize> = match_idxs.into_iter().collect();
903        for (start, end) in merged {
904            let mut hunk_lines = Vec::with_capacity(end - start + 1);
905            for (offset, line) in all_lines[start..=end].iter().enumerate() {
906                hunk_lines.push((
907                    start + offset + 1,
908                    line.to_string(),
909                    match_set.contains(&(start + offset)),
910                ));
911            }
912            hunks.push(Hunk {
913                path: path_str.clone(),
914                lines: hunk_lines,
915            });
916        }
917    }
918
919    if hunks.is_empty() {
920        return Ok(format!("No matches for '{pattern}' in {base_str}"));
921    }
922
923    let total_hunks = hunks.len();
924    let page_hunks: Vec<_> = hunks.into_iter().skip(offset).take(head_limit).collect();
925    let showing = page_hunks.len();
926
927    let mut out =
928        format!("{total_matches} match(es) across {files_matched} file(s), {total_hunks} hunk(s)");
929    if offset > 0 || showing < total_hunks {
930        let _ = write!(
931            out,
932            " [hunks {}-{} of {total_hunks}]",
933            offset + 1,
934            offset + showing
935        );
936    }
937    out.push('\n');
938
939    let mut current_chars = out.len();
940    let mut truncated_by_budget = false;
941
942    for (i, hunk) in page_hunks.iter().enumerate() {
943        let mut hunk_out = String::with_capacity(hunk.lines.len() * 64 + 8);
944        if i > 0 {
945            hunk_out.push_str("\n--\n");
946        }
947        for (lineno, text, is_match) in &hunk.lines {
948            if *is_match {
949                let _ = writeln!(hunk_out, "{}:{}:{}", hunk.path, lineno, text);
950            } else {
951                let _ = writeln!(hunk_out, "{}: {}-{}", hunk.path, lineno, text);
952            }
953        }
954
955        if current_chars + hunk_out.len() > char_budget {
956            truncated_by_budget = true;
957            break;
958        }
959        current_chars += hunk_out.len();
960        out.push_str(&hunk_out);
961    }
962
963    if truncated_by_budget {
964        out.push_str("\n[TRUNCATED BY TOKEN BUDGET]");
965    }
966
967    Ok(out.trim_end().to_string())
968}
969
970// ── Argument helpers ──────────────────────────────────────────────────────────
971
972fn require_str<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
973    args.get(key)
974        .and_then(|v| v.as_str())
975        .ok_or_else(|| format!("Missing required argument: '{key}'"))
976}
977
978fn get_usize_arg(args: &Value, key: &str) -> Option<usize> {
979    args.get(key).and_then(value_as_usize)
980}
981
982fn require_usize(args: &Value, key: &str) -> Result<usize, String> {
983    get_usize_arg(args, key).ok_or_else(|| format!("Missing required numeric argument: '{key}'"))
984}
985
986fn value_as_usize(value: &Value) -> Option<usize> {
987    if let Some(v) = value.as_u64() {
988        return usize::try_from(v).ok();
989    }
990
991    if let Some(v) = value.as_i64() {
992        return if v >= 0 {
993            usize::try_from(v as u64).ok()
994        } else {
995            None
996        };
997    }
998
999    if let Some(v) = value.as_f64() {
1000        if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= (usize::MAX as f64) {
1001            return Some(v as usize);
1002        }
1003        return None;
1004    }
1005
1006    value.as_str().and_then(|s| s.trim().parse::<usize>().ok())
1007}
1008
1009// ── Path helpers ──────────────────────────────────────────────────────────────
1010
1011/// Resolve a path that must already exist, and check it's inside the workspace.
1012fn safe_path(path: &str) -> Result<PathBuf, String> {
1013    let candidate = resolve_candidate(path);
1014    match canonicalize_safe(&candidate, path) {
1015        Ok(abs) => Ok(abs),
1016        Err(e) => {
1017            if e.contains("The system cannot find the file specified") || e.contains("os error 2") {
1018                if let Some(suggestion) = suggest_better_path(path) {
1019                    return Err(format!("{e}. Did you mean '{suggestion}'?"));
1020                }
1021            }
1022            Err(e)
1023        }
1024    }
1025}
1026
1027fn suggest_better_path(original: &str) -> Option<String> {
1028    let path = Path::new(original);
1029    let filename = path.file_name()?.to_str()?.to_lowercase();
1030    let parent = path.parent().unwrap_or_else(|| Path::new("."));
1031
1032    // Use resolve_candidate to handle sovereign tokens like @DESKTOP/
1033    let abs_parent = resolve_candidate(&parent.to_string_lossy())
1034        .canonicalize()
1035        .ok()?;
1036
1037    let mut best_match = None;
1038    let mut best_score = 0;
1039
1040    if let Ok(entries) = fs::read_dir(abs_parent) {
1041        for entry in entries.flatten() {
1042            if let Some(candidate_name) = entry.file_name().to_str() {
1043                let lower_candidate = candidate_name.to_lowercase();
1044                if lower_candidate == filename {
1045                    continue;
1046                }
1047
1048                let mut score = 0;
1049                if lower_candidate.starts_with(&filename) || filename.starts_with(&lower_candidate)
1050                {
1051                    score += 10;
1052                }
1053                // Catch style.css vs styles.css
1054                if (filename.ends_with('s') && filename[..filename.len() - 1] == lower_candidate)
1055                    || (lower_candidate.ends_with('s')
1056                        && lower_candidate[..lower_candidate.len() - 1] == filename)
1057                {
1058                    score += 20;
1059                }
1060
1061                if score > best_score {
1062                    best_score = score;
1063                    best_match = Some(candidate_name.to_string());
1064                }
1065            }
1066        }
1067    }
1068
1069    if best_score >= 10 {
1070        best_match
1071    } else {
1072        None
1073    }
1074}
1075
1076/// Resolve a path that may not exist yet (for write_file).
1077fn safe_path_allow_new(path: &str) -> Result<PathBuf, String> {
1078    let candidate = resolve_candidate(path);
1079
1080    // Try canonical first.
1081    if let Ok(abs) = candidate.canonicalize() {
1082        check_workspace_bounds(&abs, path)?;
1083        return Ok(abs);
1084    }
1085
1086    // File doesn't exist yet — canonicalize the parent, append the filename.
1087    let parent = candidate.parent().unwrap_or(Path::new("."));
1088    let name = candidate
1089        .file_name()
1090        .ok_or_else(|| format!("invalid path: {path}"))?;
1091    let abs_parent = parent
1092        .canonicalize()
1093        .map_err(|_| format!("safe_path: parent dir doesn't exist for {path}"))?;
1094    let abs = abs_parent.join(name);
1095    check_workspace_bounds(&abs, path)?;
1096    Ok(abs)
1097}
1098
1099pub(crate) fn resolve_candidate(path: &str) -> PathBuf {
1100    // 1. Handle Special Sovereign Tokens
1101    let upper = path.to_uppercase();
1102
1103    // Bare token support — matches exact names with or without @ prefix, with or without
1104    // trailing slash. Enables /cd downloads, /cd @DESKTOP, /cd ~ etc.
1105    let bare = upper.trim_end_matches('/').trim_start_matches('@');
1106    let bare_resolved = match bare {
1107        "DESKTOP" => dirs::desktop_dir(),
1108        "DOWNLOADS" | "DOWNLOAD" => dirs::download_dir(),
1109        "DOCUMENTS" | "DOCS" => dirs::document_dir(),
1110        "PICTURES" | "IMAGES" => dirs::picture_dir(),
1111        "VIDEOS" | "MOVIES" => dirs::video_dir(),
1112        "MUSIC" | "AUDIO" => dirs::audio_dir(),
1113        "HOME" => dirs::home_dir(),
1114        "TEMP" | "TMP" => Some(std::env::temp_dir()),
1115        "CACHE" => dirs::cache_dir(),
1116        "CONFIG" => dirs::config_dir(),
1117        "DATA" => dirs::data_dir(),
1118        _ => None,
1119    };
1120    // Also handle bare ~ and ~/ as home
1121    let bare_resolved = bare_resolved.or_else(|| {
1122        if path == "~" || path == "~/" {
1123            dirs::home_dir()
1124        } else {
1125            None
1126        }
1127    });
1128    if let Some(p) = bare_resolved {
1129        return p;
1130    }
1131
1132    // Helper to resolve via dirs crate
1133    let resolved = if upper.starts_with("@DESKTOP/") {
1134        dirs::desktop_dir().map(|p| p.join(&path[9..]))
1135    } else if upper.starts_with("@DOCUMENTS/") {
1136        dirs::document_dir().map(|p| p.join(&path[11..]))
1137    } else if upper.starts_with("@DOWNLOADS/") {
1138        dirs::download_dir().map(|p| p.join(&path[11..]))
1139    } else if upper.starts_with("@PICTURES/") || upper.starts_with("@IMAGES/") {
1140        let offset = if upper.starts_with("@PICTURES/") {
1141            10
1142        } else {
1143            8
1144        };
1145        dirs::picture_dir().map(|p| p.join(&path[offset..]))
1146    } else if upper.starts_with("@VIDEOS/") || upper.starts_with("@MOVIES/") {
1147        let offset = 8;
1148        dirs::video_dir().map(|p| p.join(&path[offset..]))
1149    } else if upper.starts_with("@MUSIC/") || upper.starts_with("@AUDIO/") {
1150        let offset = 7;
1151        dirs::audio_dir().map(|p| p.join(&path[offset..]))
1152    } else if upper.starts_with("@HOME/") || upper.starts_with("~/") {
1153        let offset = if upper.starts_with("@HOME/") { 6 } else { 2 };
1154        dirs::home_dir().map(|p| p.join(&path[offset..]))
1155    } else if upper.starts_with("@TEMP/") {
1156        Some(std::env::temp_dir().join(&path[6..]))
1157    } else if upper.starts_with("@CACHE/") {
1158        dirs::cache_dir().map(|p| p.join(&path[7..]))
1159    } else if upper.starts_with("@CONFIG/") {
1160        dirs::config_dir().map(|p| p.join(&path[8..]))
1161    } else if upper.starts_with("@DATA/") {
1162        dirs::data_dir().map(|p| p.join(&path[6..]))
1163    } else {
1164        None
1165    };
1166
1167    if let Some(p) = resolved {
1168        return p;
1169    }
1170
1171    // 2. Fallback to Standard Resolution
1172    let p = Path::new(path);
1173    if p.is_absolute() {
1174        p.to_path_buf()
1175    } else {
1176        std::env::current_dir()
1177            .unwrap_or_else(|_| PathBuf::from("."))
1178            .join(p)
1179    }
1180}
1181
1182fn canonicalize_safe(candidate: &Path, original: &str) -> Result<PathBuf, String> {
1183    let abs = candidate
1184        .canonicalize()
1185        .map_err(|e: io::Error| format!("safe_path: {e} ({original})"))?;
1186    check_workspace_bounds(&abs, original)?;
1187    Ok(abs)
1188}
1189
1190fn is_allowed_plan_sidecar(workspace: &Path, abs: &Path) -> bool {
1191    // Use Path::starts_with with a canonicalized workspace so the prefix check is
1192    // path-component–aware and works on Windows where Path::canonicalize() prepends
1193    // the \\?\ extended-path prefix: that prefix is its own path component, so
1194    // abs.starts_with(non_canonical_workspace) silently returns false even when both
1195    // paths point to the same directory tree.
1196    let canonical_workspace = workspace
1197        .canonicalize()
1198        .unwrap_or_else(|_| workspace.to_path_buf());
1199
1200    if !abs.starts_with(&canonical_workspace) {
1201        return false;
1202    }
1203
1204    let path_lower = abs.to_string_lossy().to_lowercase().replace('\\', "/");
1205    path_lower.ends_with("/.hematite/task.md")
1206        || path_lower.ends_with("/.hematite/plan.md")
1207        || path_lower.ends_with("/.hematite/walkthrough.md")
1208}
1209
1210fn check_workspace_bounds(abs: &Path, original: &str) -> Result<(), String> {
1211    let workspace = std::env::current_dir().map_err(|e| format!("could not read cwd: {e}"))?;
1212    if is_allowed_plan_sidecar(&workspace, abs) {
1213        return Ok(());
1214    }
1215
1216    // Delegate to the existing guard for blacklist + traversal checks.
1217    super::guard::path_is_safe(&workspace, abs)
1218        .map(|_| ())
1219        .map_err(|e| format!("file access denied for '{original}': {e}"))
1220}
1221
1222/// Returns true if the path contains a segment that should be skipped (.git, target, node_modules, etc.)
1223fn path_has_hidden_segment(p: &Path) -> bool {
1224    p.components().any(|c| {
1225        let s = c.as_os_str().to_string_lossy();
1226        if s == ".hematite" || s == ".git" || s == "." || s == ".." {
1227            return false;
1228        }
1229        s.starts_with('.') || s == "target" || s == "node_modules" || s == "__pycache__"
1230    })
1231}
1232
1233/// Show the lines nearest to where the search string *almost* matched,
1234/// so the model can see the real indentation/content and self-correct.
1235fn nearest_lines(content: &str, search: &str) -> String {
1236    // Try to find the best-matching line by the first non-empty search line.
1237    let first_search_line = search
1238        .lines()
1239        .map(|l| l.trim())
1240        .find(|l| !l.is_empty())
1241        .unwrap_or("");
1242
1243    let lines: Vec<&str> = content.lines().collect();
1244    if lines.is_empty() {
1245        return "(file is empty)".into();
1246    }
1247
1248    // Find the line in the file that contains the most chars from the search line.
1249    let best_idx = if first_search_line.is_empty() {
1250        0
1251    } else {
1252        lines
1253            .iter()
1254            .enumerate()
1255            .max_by_key(|(_, l)| {
1256                let lt = l.trim();
1257                // Score: length of longest common prefix after trimming.
1258                first_search_line
1259                    .chars()
1260                    .zip(lt.chars())
1261                    .take_while(|(a, b)| a == b)
1262                    .count()
1263            })
1264            .map(|(i, _)| i)
1265            .unwrap_or(0)
1266    };
1267
1268    let start = best_idx.saturating_sub(3);
1269    let end = (best_idx + 5).min(lines.len());
1270    let count = end - start;
1271    let mut snippet = String::with_capacity(count * 60);
1272    for (i, l) in lines[start..end].iter().enumerate() {
1273        if i > 0 {
1274            snippet.push('\n');
1275        }
1276        let _ = write!(snippet, "{:>4} | {}", start + i + 1, l);
1277    }
1278
1279    format!(
1280        "Nearest matching lines ({}:{}):\n{}",
1281        best_idx + 1,
1282        end,
1283        snippet
1284    )
1285}
1286
1287/// Core span-mapping logic shared by both fuzzy match levels.
1288/// Given a normalisation function, finds `search` inside `content` after
1289/// applying that function to both, then maps the result back to a byte
1290/// range in the original (un-normalised) `content`.
1291fn find_span_normalised(
1292    content: &str,
1293    search: &str,
1294    normalise: impl Fn(&str) -> String,
1295) -> Option<std::ops::Range<usize>> {
1296    let norm_content = normalise(content);
1297    let norm_search = normalise(search)
1298        .trim_start_matches('\n')
1299        .trim_end_matches('\n')
1300        .to_string();
1301
1302    if norm_search.is_empty() {
1303        return None;
1304    }
1305
1306    let norm_pos = norm_content.find(&norm_search)?;
1307
1308    let lines_before = norm_content.as_bytes()[..norm_pos]
1309        .iter()
1310        .filter(|&&b| b == b'\n')
1311        .count();
1312    let search_lines = norm_search
1313        .as_bytes()
1314        .iter()
1315        .filter(|&&b| b == b'\n')
1316        .count()
1317        + 1;
1318
1319    let orig_lines: Vec<&str> = content.lines().collect();
1320
1321    let mut current_pos = 0;
1322    for i in 0..lines_before {
1323        if i < orig_lines.len() {
1324            current_pos += orig_lines[i].len() + 1;
1325        }
1326    }
1327    let byte_start = current_pos;
1328
1329    let mut byte_len = 0;
1330    for i in 0..search_lines {
1331        let idx = lines_before + i;
1332        if idx < orig_lines.len() {
1333            byte_len += orig_lines[idx].len();
1334            if i < search_lines - 1 {
1335                byte_len += 1;
1336            }
1337        }
1338    }
1339
1340    if byte_start + byte_len > content.len() {
1341        return None;
1342    }
1343
1344    let candidate = &content[byte_start..byte_start + byte_len];
1345    if normalise(candidate).trim_end_matches('\n') == norm_search.as_str() {
1346        Some(byte_start..byte_start + byte_len)
1347    } else {
1348        None
1349    }
1350}
1351
1352/// Level 1 fuzzy: rstrip only — removes trailing whitespace per line but
1353/// preserves leading indentation. Catches trailing-space mismatches where
1354/// the model's indentation is actually correct.
1355fn rstrip_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1356    find_span_normalised(content, search, |s| {
1357        let mut out = String::with_capacity(s.len());
1358        for (i, l) in s.lines().enumerate() {
1359            if i > 0 {
1360                out.push('\n');
1361            }
1362            out.push_str(l.trim_end());
1363        }
1364        out
1365    })
1366}
1367
1368/// Level 2 fuzzy: indent-flexible — strips the minimum common leading whitespace
1369/// (dedent) from both search and candidate windows before comparing. Preserves
1370/// relative indentation structure so nested code remains distinguishable. Also
1371/// normalises tabs → 4 spaces so tab/space mismatches are tolerated.
1372fn indent_flexible_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1373    let norm_search = dedent(search.trim_matches('\n'));
1374    if norm_search.trim().is_empty() {
1375        return None;
1376    }
1377    let search_line_count = norm_search.lines().count();
1378    let content_lines: Vec<&str> = content.lines().collect();
1379    if content_lines.len() < search_line_count {
1380        return None;
1381    }
1382
1383    // Precompute byte start of each line (content is already LF-normalised).
1384    let mut line_starts: Vec<usize> = Vec::with_capacity(content_lines.len() + 1);
1385    let mut pos = 0usize;
1386    for line in &content_lines {
1387        line_starts.push(pos);
1388        pos += line.len() + 1; // +1 for '\n'
1389    }
1390    line_starts.push(pos);
1391
1392    for start in 0..=(content_lines.len() - search_line_count) {
1393        let window = content_lines[start..start + search_line_count].join("\n");
1394        if dedent(&window) == norm_search {
1395            let byte_start = line_starts[start];
1396            let end_line = start + search_line_count;
1397            let byte_end = if end_line < content_lines.len() {
1398                line_starts[end_line] - 1 // exclude trailing '\n'
1399            } else {
1400                content.len()
1401            };
1402            return Some(byte_start..byte_end);
1403        }
1404    }
1405    None
1406}
1407
1408/// Level 3 fuzzy: full strip — trims all leading and trailing whitespace
1409/// per line. Last resort before the cross-file hint error.
1410fn fuzzy_find_span(content: &str, search: &str) -> Option<std::ops::Range<usize>> {
1411    find_span_normalised(content, search, |s| {
1412        let mut result = String::with_capacity(s.len());
1413        for (i, l) in s.lines().enumerate() {
1414            if i > 0 {
1415                result.push('\n');
1416            }
1417            result.push_str(l.trim());
1418        }
1419        result
1420    })
1421}
1422
1423/// Scan source files in the workspace for a search string that failed to
1424/// match in the intended target file. Returns the first file path where
1425/// the string is found (after CRLF normalisation), capped at 100 files.
1426/// Used to generate a "did you mean this file?" hint in edit errors.
1427fn find_search_in_workspace(search: &str, skip_path: &str) -> Option<String> {
1428    let root = workspace_root();
1429    let norm_search = search.replace("\r\n", "\n");
1430    let mut checked = 0usize;
1431
1432    let walker = ignore::WalkBuilder::new(&root)
1433        .hidden(true)
1434        .ignore(true)
1435        .git_ignore(true)
1436        .build();
1437
1438    for entry in walker.flatten() {
1439        if checked >= 100 {
1440            break;
1441        }
1442        let path = entry.path();
1443        if !path.is_file() {
1444            continue;
1445        }
1446        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1447        if !matches!(
1448            ext,
1449            "rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h"
1450        ) {
1451            continue;
1452        }
1453        let rel = path
1454            .strip_prefix(&root)
1455            .unwrap_or(path)
1456            .to_string_lossy()
1457            .replace('\\', "/");
1458        if rel == skip_path {
1459            continue;
1460        }
1461        checked += 1;
1462        if let Ok(content) = std::fs::read_to_string(path) {
1463            let normalised = content.replace("\r\n", "\n");
1464            if normalised.contains(&norm_search) {
1465                return Some(rel);
1466            }
1467        }
1468    }
1469    None
1470}
1471
1472// ── Indent-aware replacement ──────────────────────────────────────────────────
1473
1474/// Strip minimum common leading whitespace from all non-empty lines and
1475/// normalise tabs to 4 spaces. Blank lines are reduced to empty strings.
1476/// Used by indent_flexible_find_span for canonical comparison.
1477fn dedent(s: &str) -> String {
1478    let expanded: Vec<String> = s.lines().map(|l| l.replace('\t', "    ")).collect();
1479    let min_indent = expanded
1480        .iter()
1481        .filter(|l| !l.trim().is_empty())
1482        .map(|l| l.len() - l.trim_start_matches(' ').len())
1483        .min()
1484        .unwrap_or(0);
1485    let mut out = String::with_capacity(s.len());
1486    for (i, l) in expanded.iter().enumerate() {
1487        if i > 0 {
1488            out.push('\n');
1489        }
1490        if l.trim().is_empty() {
1491            // blank line: push nothing (empty string)
1492        } else {
1493            out.push_str(l.get(min_indent..).unwrap_or(l).trim_end());
1494        }
1495    }
1496    out
1497}
1498
1499/// When the model's search string has different indentation than the actual file
1500/// content (fuzzy match succeeded), apply the same indentation delta to the
1501/// replace string so the replacement lands with correct indentation.
1502///
1503/// Example: model wrote search/replace with 0-space indent, file uses 8 spaces.
1504/// Delta = +8. Every line of replace gets 8 spaces prepended.
1505fn adjust_replace_indent(search: &str, file_span: &str, replace: &str) -> String {
1506    fn first_indent(s: &str) -> usize {
1507        s.lines()
1508            .find(|l| !l.trim().is_empty())
1509            .map(|l| l.len() - l.trim_start_matches(' ').len())
1510            .unwrap_or(0)
1511    }
1512
1513    let search_indent = first_indent(search);
1514    let file_indent = first_indent(file_span);
1515
1516    if search_indent == file_indent {
1517        return replace.to_string();
1518    }
1519
1520    let delta: i64 = file_indent as i64 - search_indent as i64;
1521    let trailing_newline = replace.ends_with('\n');
1522
1523    let adjusted: Vec<String> = replace
1524        .lines()
1525        .map(|line| {
1526            if line.trim().is_empty() {
1527                // Preserve blank lines as-is
1528                line.to_string()
1529            } else {
1530                let current_indent = line.len() - line.trim_start_matches(' ').len();
1531                let new_indent = (current_indent as i64 + delta).max(0) as usize;
1532                format!("{}{}", " ".repeat(new_indent), line.trim_start_matches(' '))
1533            }
1534        })
1535        .collect();
1536
1537    let mut result = adjusted.join("\n");
1538    if trailing_newline {
1539        result.push('\n');
1540    }
1541    result
1542}
1543
1544// ── Diff preview helpers (read-only, no writes) ───────────────────────────────
1545
1546/// Return a formatted diff string for an edit_file operation without applying it.
1547/// Lines prefixed "- " are removals, "+ " are additions.  Returns Err if the
1548/// search string cannot be located (caller falls through to normal tool dispatch).
1549pub fn compute_edit_file_diff(args: &Value) -> Result<String, String> {
1550    let path = require_str(args, "path")?;
1551    let search = require_str(args, "search")?;
1552    let replace = require_str(args, "replace")?;
1553
1554    let abs = safe_path(path)?;
1555    let raw = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1556    let original = raw.replace("\r\n", "\n");
1557
1558    let (effective_search, effective_replace): (String, String) = if original.contains(search) {
1559        (search.to_string(), replace.to_string())
1560    } else {
1561        let span = rstrip_find_span(&original, search)
1562            .or_else(|| indent_flexible_find_span(&original, search))
1563            .or_else(|| fuzzy_find_span(&original, search));
1564        match span {
1565            Some(span) => {
1566                let real_slice = original[span].to_string();
1567                let adjusted = adjust_replace_indent(search, &real_slice, replace);
1568                (real_slice, adjusted)
1569            }
1570            None => return Err("search string not found — diff preview unavailable".into()),
1571        }
1572    };
1573
1574    let mut diff = String::with_capacity(effective_search.len() + effective_replace.len() + 16);
1575    for line in effective_search.lines() {
1576        let _ = writeln!(diff, "- {}", line);
1577    }
1578    for line in effective_replace.lines() {
1579        let _ = writeln!(diff, "+ {}", line);
1580    }
1581    Ok(diff)
1582}
1583
1584/// Return a formatted diff string for a patch_hunk operation without applying it.
1585pub fn compute_patch_hunk_diff(args: &Value) -> Result<String, String> {
1586    let path = require_str(args, "path")?;
1587    let start_line = require_usize(args, "start_line")?;
1588    let end_line = require_usize(args, "end_line")?;
1589    let replacement = require_str(args, "replacement")?;
1590
1591    let abs = safe_path(path)?;
1592    let original = fs::read_to_string(&abs).map_err(|e| format!("diff preview read: {e}"))?;
1593    let lines: Vec<&str> = original.lines().collect();
1594    let total = lines.len();
1595
1596    if start_line < 1 || start_line > total || end_line < start_line || end_line > total {
1597        return Err(format!(
1598            "patch_hunk: invalid line range {}-{} for file with {} lines",
1599            start_line, end_line, total
1600        ));
1601    }
1602
1603    let s_idx = start_line - 1;
1604    let e_idx = end_line;
1605
1606    let mut diff = format!("@@ lines {}-{} @@\n", start_line, end_line);
1607    for line in &lines[s_idx..e_idx] {
1608        let _ = writeln!(diff, "- {}", line.trim_end());
1609    }
1610    for line in replacement.lines() {
1611        let _ = writeln!(diff, "+ {}", line.trim_end());
1612    }
1613    Ok(diff)
1614}
1615
1616/// Return a formatted diff string for a multi_search_replace operation without applying it.
1617pub fn compute_msr_diff(args: &Value) -> Result<String, String> {
1618    let hunks_val = args
1619        .get("hunks")
1620        .ok_or_else(|| "multi_search_replace requires 'hunks' array".to_string())?;
1621
1622    #[derive(serde::Deserialize)]
1623    struct PreviewHunk {
1624        search: String,
1625        replace: String,
1626    }
1627    let hunks: Vec<PreviewHunk> = serde_json::from_value(hunks_val.clone())
1628        .map_err(|e| format!("compute_msr_diff: invalid hunks: {e}"))?;
1629
1630    let mut diff = String::with_capacity(hunks.len() * 128 + 16);
1631    for (i, hunk) in hunks.iter().enumerate() {
1632        if hunks.len() > 1 {
1633            let _ = writeln!(diff, "@@ hunk {} @@", i + 1);
1634        }
1635        for line in hunk.search.lines() {
1636            let _ = writeln!(diff, "- {}", line.trim_end());
1637        }
1638        for line in hunk.replace.lines() {
1639            let _ = writeln!(diff, "+ {}", line.trim_end());
1640        }
1641    }
1642    Ok(diff)
1643}
1644
1645/// Compute a preview diff for write_file — shows the full new content as additions,
1646/// and any existing file content as removals. New files show only `+` lines.
1647pub fn compute_write_file_diff(args: &Value) -> Result<String, String> {
1648    let path = require_str(args, "path")?;
1649    let new_content = require_str(args, "content")?;
1650
1651    let abs = safe_path(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
1652    let old_content = fs::read_to_string(&abs)
1653        .map(|s| s.replace("\r\n", "\n"))
1654        .unwrap_or_default();
1655
1656    let mut diff = String::with_capacity(old_content.len() + new_content.len() + 16);
1657    if !old_content.is_empty() {
1658        for line in old_content.lines() {
1659            let _ = writeln!(diff, "- {}", line);
1660        }
1661    }
1662    for line in new_content.lines() {
1663        let _ = writeln!(diff, "+ {}", line);
1664    }
1665    if diff.is_empty() {
1666        return Err("empty content — diff preview unavailable".into());
1667    }
1668    Ok(diff)
1669}
1670
1671/// Resolve the workspace root by looking upward for common markers.
1672pub fn workspace_root() -> PathBuf {
1673    let mut current = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1674    loop {
1675        if current.join(".git").exists()
1676            || current.join("Cargo.toml").exists()
1677            || current.join("package.json").exists()
1678        {
1679            return current;
1680        }
1681        if !current.pop() {
1682            break;
1683        }
1684    }
1685    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1686}
1687
1688/// Returns true if `path` is a known OS shortcut directory (Desktop, Downloads,
1689/// Documents, Pictures, Videos, Music). These directories should not accumulate
1690/// `.hematite/` workspace state — they use the global `~/.hematite/` instead.
1691pub fn is_os_shortcut_directory(path: &Path) -> bool {
1692    let candidates = [
1693        dirs::desktop_dir(),
1694        dirs::download_dir(),
1695        dirs::document_dir(),
1696        dirs::picture_dir(),
1697        dirs::video_dir(),
1698        dirs::audio_dir(),
1699    ];
1700    candidates
1701        .iter()
1702        .filter_map(|d| d.as_deref())
1703        .any(|d| d == path)
1704}
1705
1706/// Returns the directory where Hematite's runtime state (`.hematite/`) should live.
1707///
1708/// - In sovereign OS directories (Desktop, Downloads, Documents, Pictures, Videos,
1709///   Music): returns `~/.hematite/` so no workspace folder is created there.
1710/// - Everywhere else: returns `workspace_root()/.hematite/` as normal.
1711pub fn hematite_dir() -> PathBuf {
1712    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1713    if is_os_shortcut_directory(&cwd) {
1714        if let Some(home) = dirs::home_dir() {
1715            return home.join(".hematite");
1716        }
1717    }
1718    workspace_root().join(".hematite")
1719}
1720
1721/// Returns true if the workspace root looks like a real project.
1722/// A bare `.git` alone (e.g. accidental `git init` in the home folder) doesn't
1723/// count — at least one explicit build/package marker must also be present.
1724pub fn is_project_workspace() -> bool {
1725    let root = workspace_root();
1726    let has_explicit_marker = root.join("Cargo.toml").exists()
1727        || root.join("package.json").exists()
1728        || root.join("pyproject.toml").exists()
1729        || root.join("go.mod").exists()
1730        || root.join("setup.py").exists()
1731        || root.join("pom.xml").exists()
1732        || root.join("build.gradle").exists()
1733        || root.join("CMakeLists.txt").exists()
1734        || root.join("index.html").exists()
1735        || root.join("style.css").exists()
1736        || root.join("script.js").exists();
1737    has_explicit_marker || (root.join(".git").exists() && root.join("src").exists())
1738}
1739
1740// ── open_in_system_editor ───────────────────────────────────────────────────
1741
1742pub fn open_in_system_editor(path: &std::path::Path) -> Result<(), String> {
1743    if !path.exists() {
1744        return Err(format!("File not found: {}", path.display()));
1745    }
1746
1747    #[cfg(target_os = "windows")]
1748    {
1749        // On Windows, 'start' is the most reliable way to open a file in the default associated app.
1750        // We use cmd /c start so it handles spaces and associations properly.
1751        let status = std::process::Command::new("cmd")
1752            .args(["/c", "start", "", &path.to_string_lossy()])
1753            .status()
1754            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1755
1756        if !status.success() {
1757            return Err("Editor command failed to start.".into());
1758        }
1759    }
1760
1761    #[cfg(target_os = "macos")]
1762    {
1763        let status = std::process::Command::new("open")
1764            .arg(path)
1765            .status()
1766            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1767
1768        if !status.success() {
1769            return Err("open command failed.".into());
1770        }
1771    }
1772
1773    #[cfg(all(unix, not(target_os = "macos")))]
1774    {
1775        // Try xdg-open on Linux
1776        let status = std::process::Command::new("xdg-open")
1777            .arg(path)
1778            .status()
1779            .map_err(|e| format!("Failed to launch editor: {e}"))?;
1780
1781        if !status.success() {
1782            return Err("xdg-open failed.".into());
1783        }
1784    }
1785
1786    Ok(())
1787}
1788
1789#[cfg(test)]
1790mod tests {
1791    use super::*;
1792
1793    #[test]
1794    fn safe_path_allows_plan_sidecars_inside_workspace() {
1795        let _cwd_lock = crate::TEST_CWD_LOCK
1796            .lock()
1797            .unwrap_or_else(|e| e.into_inner());
1798        let temp = tempfile::tempdir().unwrap();
1799        let root = temp.path();
1800        std::fs::create_dir_all(root.join(".hematite")).unwrap();
1801        std::fs::write(root.join(".hematite").join("TASK.md"), "# Task Ledger\n").unwrap();
1802
1803        let previous = env!("CARGO_MANIFEST_DIR");
1804        std::env::set_current_dir(root).unwrap();
1805        let resolved = safe_path(".hematite/TASK.md").unwrap();
1806        std::env::set_current_dir(previous).unwrap();
1807
1808        assert!(resolved.ends_with(Path::new(".hematite").join("TASK.md")));
1809    }
1810}
hematite/tools/file_ops.rs

hematite/tools/
file_ops.rs