pi/
tools.rs

1//! Built-in tool implementations.
2//!
3//! Pi provides 8 built-in tools: read, bash, edit, write, grep, find, ls, hashline_edit.
4//!
5//! Tools are exposed to the model via JSON Schema (see [`crate::provider::ToolDef`]) and executed
6//! locally by the agent loop. Each tool returns structured [`ContentBlock`] output suitable for
7//! rendering in the TUI and for inclusion in provider messages as tool results.
8
9use crate::agent_cx::AgentCx;
10use crate::config::Config;
11use crate::error::{Error, Result};
12use crate::extensions::{safe_canonicalize, strip_unc_prefix};
13use crate::model::{ContentBlock, ImageContent, TextContent};
14use asupersync::io::{AsyncRead, AsyncReadExt, AsyncWriteExt, ReadBuf, SeekFrom};
15use asupersync::time::{sleep, wall_now};
16use async_trait::async_trait;
17use serde::{Deserialize, Serialize};
18use sha2::Digest as _;
19use std::cmp::Ordering;
20use std::collections::{HashMap, VecDeque};
21use std::ffi::{OsStr, OsString};
22use std::fmt::Write as _;
23use std::io::{BufRead, Read, Write};
24use std::path::{Path, PathBuf};
25use std::process::{Command, Stdio};
26use std::sync::{Mutex, OnceLock, mpsc};
27use std::thread;
28use std::time::{Duration, SystemTime, UNIX_EPOCH};
29use unicode_normalization::UnicodeNormalization;
30use uuid::Uuid;
31
32// ============================================================================
33// Tool Trait
34// ============================================================================
35
36/// Coarse side-effect declaration for tool scheduling.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct ToolEffects {
39    bits: u8,
40}
41
42impl ToolEffects {
43    const READ: u8 = 1 << 0;
44    const WRITE: u8 = 1 << 1;
45    const APPEND: u8 = 1 << 2;
46    const NETWORK: u8 = 1 << 3;
47    const PROCESS: u8 = 1 << 4;
48    const BARRIER: u8 = Self::WRITE | Self::APPEND | Self::PROCESS;
49
50    /// Tool reads local state without mutating it.
51    #[must_use]
52    pub const fn read() -> Self {
53        Self { bits: Self::READ }
54    }
55
56    /// Tool may create, replace, or otherwise mutate local state.
57    #[must_use]
58    pub const fn write() -> Self {
59        Self { bits: Self::WRITE }
60    }
61
62    /// Tool appends to existing local state.
63    #[must_use]
64    pub const fn append() -> Self {
65        Self { bits: Self::APPEND }
66    }
67
68    /// Tool performs network I/O but does not mutate local state.
69    #[must_use]
70    pub const fn network() -> Self {
71        Self {
72            bits: Self::NETWORK,
73        }
74    }
75
76    /// Tool starts a local process. This is treated as a scheduling barrier.
77    #[must_use]
78    pub const fn process() -> Self {
79        Self {
80            bits: Self::PROCESS,
81        }
82    }
83
84    /// Combine multiple effect declarations for a single tool or batch.
85    #[must_use]
86    pub const fn union(self, other: Self) -> Self {
87        Self {
88            bits: self.bits | other.bits,
89        }
90    }
91
92    /// Whether this declaration reads local state.
93    #[must_use]
94    pub const fn reads(self) -> bool {
95        self.bits & Self::READ != 0
96    }
97
98    /// Whether this declaration may mutate local state by replacing content.
99    #[must_use]
100    pub const fn writes(self) -> bool {
101        self.bits & Self::WRITE != 0
102    }
103
104    /// Whether this declaration may append to local state.
105    #[must_use]
106    pub const fn appends(self) -> bool {
107        self.bits & Self::APPEND != 0
108    }
109
110    /// Whether this declaration performs network I/O.
111    #[must_use]
112    pub const fn networks(self) -> bool {
113        self.bits & Self::NETWORK != 0
114    }
115
116    /// Whether this declaration starts or controls a local process.
117    #[must_use]
118    pub const fn processes(self) -> bool {
119        self.bits & Self::PROCESS != 0
120    }
121
122    /// Stable labels for machine-readable scheduling evidence.
123    #[must_use]
124    pub fn labels(self) -> Vec<&'static str> {
125        let mut labels = Vec::with_capacity(5);
126        if self.reads() {
127            labels.push("read");
128        }
129        if self.writes() {
130            labels.push("write");
131        }
132        if self.appends() {
133            labels.push("append");
134        }
135        if self.networks() {
136            labels.push("network");
137        }
138        if self.processes() {
139            labels.push("process");
140        }
141        labels
142    }
143
144    /// Whether this effect set can run in a compatible concurrent batch.
145    #[must_use]
146    pub const fn parallel_safe(self) -> bool {
147        self.bits != 0 && self.bits & Self::BARRIER == 0
148    }
149
150    /// Whether two effect sets can share a concurrent batch.
151    #[must_use]
152    pub const fn compatible_with(self, other: Self) -> bool {
153        self.parallel_safe() && other.parallel_safe()
154    }
155}
156
157/// A tool that can be executed by the agent.
158#[async_trait]
159pub trait Tool: Send + Sync {
160    /// Get the tool name.
161    fn name(&self) -> &str;
162
163    /// Get the tool label (display name).
164    fn label(&self) -> &str;
165
166    /// Get the tool description.
167    fn description(&self) -> &str;
168
169    /// Get the tool parameters as JSON Schema.
170    fn parameters(&self) -> serde_json::Value;
171
172    /// Execute the tool.
173    ///
174    /// Tools may call `on_update` to stream incremental results (e.g. while a long-running `bash`
175    /// command is still producing output). The final return value is a [`ToolOutput`] which is
176    /// persisted into the session as a tool result message.
177    async fn execute(
178        &self,
179        tool_call_id: &str,
180        input: serde_json::Value,
181        on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
182    ) -> Result<ToolOutput>;
183
184    /// Declare the coarse side effects used by the agent scheduler.
185    ///
186    /// Defaults to local write effects so undeclared tools are serialized fail-closed.
187    #[must_use]
188    fn effects(&self) -> ToolEffects {
189        ToolEffects::write()
190    }
191}
192
193/// Tool execution output.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(rename_all = "camelCase")]
196pub struct ToolOutput {
197    pub content: Vec<ContentBlock>,
198    pub details: Option<serde_json::Value>,
199    #[serde(default, skip_serializing_if = "is_false")]
200    pub is_error: bool,
201}
202
203#[allow(clippy::trivially_copy_pass_by_ref)] // serde requires `fn(&bool) -> bool` for `skip_serializing_if`
204const fn is_false(value: &bool) -> bool {
205    !*value
206}
207
208/// Incremental update during tool execution.
209#[derive(Debug, Clone, Serialize)]
210#[serde(rename_all = "camelCase")]
211pub struct ToolUpdate {
212    pub content: Vec<ContentBlock>,
213    pub details: Option<serde_json::Value>,
214}
215
216// ============================================================================
217// Truncation
218// ============================================================================
219
220/// Default maximum lines for truncation.
221pub const DEFAULT_MAX_LINES: usize = 2000;
222
223/// Default maximum bytes for truncation.
224pub const DEFAULT_MAX_BYTES: usize = 1_000_000; // 1MB
225
226/// Maximum line length for grep results.
227pub const GREP_MAX_LINE_LENGTH: usize = 500;
228
229/// Default grep result limit.
230pub const DEFAULT_GREP_LIMIT: usize = 100;
231
232/// Default find result limit.
233pub const DEFAULT_FIND_LIMIT: usize = 1000;
234
235/// Default ls result limit.
236pub const DEFAULT_LS_LIMIT: usize = 500;
237
238/// Hard limit for directory scanning in ls tool to prevent OOM/hangs.
239pub const LS_SCAN_HARD_LIMIT: usize = 20_000;
240
241/// Hard limit for read tool file size (100MB) to prevent OOM.
242pub const READ_TOOL_MAX_BYTES: u64 = 100 * 1024 * 1024;
243
244/// Hard limit for write/edit tool file size (100MB) to prevent OOM.
245pub const WRITE_TOOL_MAX_BYTES: usize = 100 * 1024 * 1024;
246
247/// Maximum size for an image to be sent to the API (4.5MB).
248pub const IMAGE_MAX_BYTES: usize = 4_718_592;
249
250/// Default timeout (in seconds) for bash tool execution.
251pub const DEFAULT_BASH_TIMEOUT_SECS: u64 = 120;
252
253const BASH_TERMINATE_GRACE_SECS: u64 = 5;
254const BASH_CANCELLATION_SCHEMA_V1: &str = "pi.tool.bash.cancellation.v1";
255
256/// Hard limit for bash output file size (1GB) to prevent disk exhaustion DoS.
257pub(crate) const BASH_FILE_LIMIT_BYTES: usize = 1024 * 1024 * 1024; // 1 GiB
258
259const TOOL_OUTPUT_ARTIFACT_SCHEMA_V1: &str = "pi.tool_output_artifact.v1";
260const TOOL_OUTPUT_ARTIFACT_REDACTION_POLICY_V1: &str = "pi.tool_output_artifact.redaction.v1";
261const TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS: &str = "session_scoped_temp_evidence";
262const TOOL_OUTPUT_ARTIFACT_SPILLOVER_REASON: &str = "sourceBytesExceededPreviewThreshold";
263const TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES: usize = DEFAULT_MAX_BYTES;
264const TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES_USIZE: usize = 64 * 1024 * 1024;
265const TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES: u64 = 64 * 1024 * 1024;
266const TOOL_OUTPUT_ARTIFACT_MAX_BYTES_USIZE: usize = 1024 * 1024 * 1024;
267const TOOL_OUTPUT_ARTIFACT_MAX_BYTES: u64 = 1024 * 1024 * 1024;
268
269/// Result of truncation operation.
270#[derive(Debug, Clone, Serialize)]
271#[serde(rename_all = "camelCase")]
272pub struct TruncationResult {
273    pub content: String,
274    pub truncated: bool,
275    #[serde(skip_serializing_if = "Option::is_none")]
276    pub truncated_by: Option<TruncatedBy>,
277    pub total_lines: usize,
278    pub total_bytes: usize,
279    pub output_lines: usize,
280    pub output_bytes: usize,
281    pub last_line_partial: bool,
282    pub first_line_exceeds_limit: bool,
283    pub max_lines: usize,
284    pub max_bytes: usize,
285}
286
287#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
288#[serde(rename_all = "camelCase")]
289pub enum TruncatedBy {
290    Lines,
291    Bytes,
292}
293
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub enum BashCancellationReason {
296    Timeout,
297    AmbientCancellation,
298}
299
300impl BashCancellationReason {
301    const fn as_str(self) -> &'static str {
302        match self {
303            Self::Timeout => "timeout",
304            Self::AmbientCancellation => "ambient_cancellation",
305        }
306    }
307}
308
309/// Truncate from the beginning (keep first N lines).
310///
311/// Takes ownership of the input `String` to avoid allocation in the common
312/// no-truncation case (content moved, zero-copy) and to enable in-place
313/// truncation when the content exceeds limits (`String::truncate`, no new
314/// allocation).
315#[allow(clippy::too_many_lines)]
316pub fn truncate_head(
317    content: impl Into<String>,
318    max_lines: usize,
319    max_bytes: usize,
320) -> TruncationResult {
321    let mut content = content.into();
322    let total_bytes = content.len();
323
324    let total_lines = {
325        let nl = memchr::memchr_iter(b'\n', content.as_bytes()).count();
326        if content.is_empty() {
327            0
328        } else if content.ends_with('\n') {
329            nl
330        } else {
331            nl + 1
332        }
333    };
334
335    if max_lines == 0 {
336        let truncated = !content.is_empty();
337        content.clear();
338        return TruncationResult {
339            content,
340            truncated,
341            truncated_by: if truncated {
342                Some(TruncatedBy::Lines)
343            } else {
344                None
345            },
346            total_lines,
347            total_bytes,
348            output_lines: 0,
349            output_bytes: 0,
350            last_line_partial: false,
351            first_line_exceeds_limit: false,
352            max_lines,
353            max_bytes,
354        };
355    }
356
357    if max_bytes == 0 {
358        let truncated = !content.is_empty();
359        let first_line_exceeds_limit = !content.is_empty();
360        content.clear();
361        return TruncationResult {
362            content,
363            truncated,
364            truncated_by: if truncated {
365                Some(TruncatedBy::Bytes)
366            } else {
367                None
368            },
369            total_lines,
370            total_bytes,
371            output_lines: 0,
372            output_bytes: 0,
373            last_line_partial: false,
374            first_line_exceeds_limit,
375            max_lines,
376            max_bytes,
377        };
378    }
379
380    if total_lines <= max_lines && total_bytes <= max_bytes {
381        return TruncationResult {
382            content,
383            truncated: false,
384            truncated_by: None,
385            total_lines,
386            total_bytes,
387            output_lines: total_lines,
388            output_bytes: total_bytes,
389            last_line_partial: false,
390            first_line_exceeds_limit: false,
391            max_lines,
392            max_bytes,
393        };
394    }
395
396    let first_newline = memchr::memchr(b'\n', content.as_bytes());
397    let first_line_bytes = first_newline.unwrap_or(content.len());
398
399    if first_line_bytes > max_bytes {
400        let mut valid_bytes = max_bytes;
401        while valid_bytes > 0 && !content.is_char_boundary(valid_bytes) {
402            valid_bytes -= 1;
403        }
404        content.truncate(valid_bytes);
405        return TruncationResult {
406            content,
407            truncated: true,
408            truncated_by: Some(TruncatedBy::Bytes),
409            total_lines,
410            total_bytes,
411            output_lines: usize::from(valid_bytes > 0),
412            output_bytes: valid_bytes,
413            last_line_partial: true,
414            first_line_exceeds_limit: true,
415            max_lines,
416            max_bytes,
417        };
418    }
419
420    let mut line_count = 0;
421    let mut byte_count = 0;
422    let mut truncated_by = None;
423    let mut current_offset = 0;
424    let mut last_line_partial = false;
425
426    while current_offset < content.len() {
427        if line_count >= max_lines {
428            truncated_by = Some(TruncatedBy::Lines);
429            break;
430        }
431
432        let next_newline = memchr::memchr(b'\n', &content.as_bytes()[current_offset..]);
433        let line_end_without_nl = next_newline.map_or(content.len(), |idx| current_offset + idx);
434        let line_end_with_nl = next_newline.map_or(content.len(), |idx| current_offset + idx + 1);
435
436        if line_end_without_nl > max_bytes {
437            let mut byte_limit = max_bytes.min(content.len());
438            if byte_limit < current_offset {
439                truncated_by = Some(TruncatedBy::Bytes);
440                break;
441            }
442            while byte_limit > current_offset && !content.is_char_boundary(byte_limit) {
443                byte_limit -= 1;
444            }
445            if byte_limit > current_offset {
446                byte_count = byte_limit;
447                line_count += 1;
448                last_line_partial = true;
449            }
450            truncated_by = Some(TruncatedBy::Bytes);
451            break;
452        }
453
454        if line_end_with_nl > max_bytes {
455            if line_end_without_nl > current_offset {
456                byte_count = line_end_without_nl;
457                line_count += 1;
458            }
459            truncated_by = Some(TruncatedBy::Bytes);
460            break;
461        }
462
463        byte_count = line_end_with_nl;
464        line_count += 1;
465        current_offset = line_end_with_nl;
466    }
467
468    content.truncate(byte_count);
469
470    TruncationResult {
471        truncated: truncated_by.is_some(),
472        truncated_by,
473        total_lines,
474        total_bytes,
475        output_lines: line_count,
476        output_bytes: byte_count,
477        last_line_partial,
478        first_line_exceeds_limit: false,
479        max_lines,
480        max_bytes,
481        content,
482    }
483}
484
485/// Truncate from the end (keep last N lines).
486///
487/// Takes ownership of the input `String` to avoid allocation in the common
488/// no-truncation case (content moved, zero-copy). When truncation is needed,
489/// the prefix is drained in-place, reusing the original buffer.
490#[allow(clippy::too_many_lines)]
491pub fn truncate_tail(
492    content: impl Into<String>,
493    max_lines: usize,
494    max_bytes: usize,
495) -> TruncationResult {
496    let mut content = content.into();
497    let total_bytes = content.len();
498
499    // Count lines correctly: trailing newline terminates the last line, it doesn't start a new one.
500    // "a\n" -> 1 line. "a\nb" -> 2 lines. "a" -> 1 line. "" -> 0 lines (handled below).
501    let mut total_lines = memchr::memchr_iter(b'\n', content.as_bytes()).count();
502    if !content.ends_with('\n') && !content.is_empty() {
503        total_lines += 1;
504    }
505    if content.is_empty() {
506        total_lines = 0;
507    }
508
509    // Explicitly handle zero-line budgets. Keeping any line would violate the
510    // contract (`output_lines <= max_lines`) and proptest invariants.
511    if max_lines == 0 {
512        let truncated = !content.is_empty();
513        return TruncationResult {
514            content: String::new(),
515            truncated,
516            truncated_by: if truncated {
517                Some(TruncatedBy::Lines)
518            } else {
519                None
520            },
521            total_lines,
522            total_bytes,
523            output_lines: 0,
524            output_bytes: 0,
525            last_line_partial: false,
526            first_line_exceeds_limit: false,
527            max_lines,
528            max_bytes,
529        };
530    }
531
532    // No truncation needed — reuse the owned String (zero-copy move).
533    if total_lines <= max_lines && total_bytes <= max_bytes {
534        return TruncationResult {
535            content,
536            truncated: false,
537            truncated_by: None,
538            total_lines,
539            total_bytes,
540            output_lines: total_lines,
541            output_bytes: total_bytes,
542            last_line_partial: false,
543            first_line_exceeds_limit: false,
544            max_lines,
545            max_bytes,
546        };
547    }
548
549    let mut line_count = 0usize;
550    let mut byte_count = 0usize;
551    let mut start_idx = content.len();
552    let mut partial_output: Option<String> = None;
553    let mut partial_line_truncated = false;
554    let mut truncated_by = None;
555    let mut last_line_partial = false;
556
557    // Scope the immutable borrow so we can mutate `content` afterwards.
558    {
559        let bytes = content.as_bytes();
560        // Initialize search_limit outside the loop to track progress backwards.
561        // If the file ends with a newline, we skip it for the purpose of finding
562        // the *start* of the last line, but start_idx (at len) includes it.
563        let mut search_limit = bytes.len();
564        if search_limit > 0 && bytes[search_limit - 1] == b'\n' {
565            search_limit -= 1;
566        }
567
568        loop {
569            // Find the *previous* newline.
570            let prev_newline = memchr::memrchr(b'\n', &bytes[..search_limit]);
571            let line_start = prev_newline.map_or(0, |idx| idx + 1);
572
573            // Bytes for this line (including its newline if it's not the last one,
574            // or if the file ends with newline). start_idx is the end of the
575            // segment we are accumulating.
576            let added_bytes = start_idx - line_start;
577
578            if byte_count + added_bytes > max_bytes {
579                // Try to take a partial line if byte budget remains. This
580                // preserves suffix stability under prepends while staying on a
581                // valid UTF-8 boundary.
582                let remaining = max_bytes.saturating_sub(byte_count);
583                if remaining > 0 {
584                    let chunk = &content[line_start..start_idx];
585                    let truncated_chunk = truncate_string_to_bytes_from_end(chunk, remaining);
586                    if !truncated_chunk.is_empty() {
587                        partial_output = Some(truncated_chunk);
588                        partial_line_truncated = true;
589                        if line_count == 0 {
590                            last_line_partial = true;
591                        }
592                    }
593                }
594                truncated_by = Some(TruncatedBy::Bytes);
595                break;
596            }
597
598            line_count += 1;
599            byte_count += added_bytes;
600            start_idx = line_start;
601
602            if line_count >= max_lines {
603                truncated_by = Some(TruncatedBy::Lines);
604                break;
605            }
606
607            if line_start == 0 {
608                break;
609            }
610
611            // Prepare for next iter.
612            // We just consumed line starting at `line_start`.
613            // The separator before it is at `line_start - 1`.
614            // That separator is the `\n` of the *previous* line.
615            // We want to search *before* it.
616            search_limit = line_start - 1;
617        }
618    } // immutable borrow of `content` released
619
620    // Extract the suffix: drain the prefix in-place (reuses the buffer),
621    // or use the partial output from the byte-truncation path.
622    let partial_suffix = if partial_line_truncated {
623        Some(content[start_idx..].to_string())
624    } else {
625        None
626    };
627
628    let mut output = partial_output.unwrap_or_else(|| {
629        drop(content.drain(..start_idx));
630        content
631    });
632
633    // If we have a partial last line, we need to append the *rest* of the content
634    // that we successfully kept (the `byte_count` lines).
635    // Wait, `partial_output` replaces the *current line*.
636    // The previous successful lines are in `content[old_start_idx..]`.
637    // My logic above for partial output:
638    // `truncated_chunk` is the partial tail of the *current line*.
639    // We need to prepend it to the lines we already collected?
640    // Actually, `content` is the full string.
641    // We are scanning backwards.
642    // `start_idx` tracks the start of the valid suffix so far.
643    // When we hit the byte limit, we are at `line_start..start_idx`.
644    // `truncated_chunk` is the tail of *that* segment.
645    // So final output = `truncated_chunk` + `content[start_idx..]`.
646
647    if let Some(suffix) = partial_suffix {
648        // Need to reconstruct.
649        // `output` is currently just the truncated chunk.
650        // We need to append the previously accumulated suffix.
651        // `content` still holds everything.
652        // `start_idx` points to the start of the *valid* suffix from previous iters.
653        output.push_str(&suffix);
654        // Recalculate line count from the final output.
655        // Since truncated output is bounded (<= max_bytes), this scan is cheap.
656        let mut count = memchr::memchr_iter(b'\n', output.as_bytes()).count();
657        if !output.ends_with('\n') && !output.is_empty() {
658            count += 1;
659        }
660        if output.is_empty() {
661            count = 0;
662        }
663        line_count = count;
664    }
665
666    let output_bytes = output.len();
667
668    TruncationResult {
669        content: output,
670        truncated: truncated_by.is_some(),
671        truncated_by,
672        total_lines,
673        total_bytes,
674        output_lines: line_count,
675        output_bytes,
676        last_line_partial,
677        first_line_exceeds_limit: false,
678        max_lines,
679        max_bytes,
680    }
681}
682
683/// Truncate a string to fit within a byte limit (from the end), preserving UTF-8 boundaries.
684fn truncate_string_to_bytes_from_end(s: &str, max_bytes: usize) -> String {
685    let bytes = s.as_bytes();
686    if bytes.len() <= max_bytes {
687        return s.to_string();
688    }
689
690    let mut start = bytes.len().saturating_sub(max_bytes);
691    while start < bytes.len() && (bytes[start] & 0b1100_0000) == 0b1000_0000 {
692        start += 1;
693    }
694
695    std::str::from_utf8(&bytes[start..])
696        .map(str::to_string)
697        .unwrap_or_default()
698}
699
700struct HeadTruncatingLineWriter {
701    content: String,
702    max_bytes: usize,
703    total_lines: usize,
704    total_bytes: usize,
705    output_lines: usize,
706    truncated: bool,
707    last_line_partial: bool,
708    first_line_exceeds_limit: bool,
709}
710
711impl HeadTruncatingLineWriter {
712    fn new(max_bytes: usize) -> Self {
713        Self {
714            content: String::with_capacity(max_bytes.min(8192)),
715            max_bytes,
716            total_lines: 0,
717            total_bytes: 0,
718            output_lines: 0,
719            truncated: false,
720            last_line_partial: false,
721            first_line_exceeds_limit: false,
722        }
723    }
724
725    fn push_line(&mut self, line: &str) {
726        debug_assert!(!line.contains('\n'));
727
728        let line_index = self.total_lines;
729        let separator_len = usize::from(line_index > 0);
730        let piece_bytes = separator_len.saturating_add(line.len());
731        self.total_lines = self.total_lines.saturating_add(1);
732        self.total_bytes = self.total_bytes.saturating_add(piece_bytes);
733
734        if self.truncated {
735            return;
736        }
737
738        if self.max_bytes == 0 {
739            self.truncated = true;
740            self.first_line_exceeds_limit = line_index == 0 && !line.is_empty();
741            return;
742        }
743
744        let remaining = self.max_bytes.saturating_sub(self.content.len());
745        if piece_bytes <= remaining {
746            if separator_len > 0 {
747                self.content.push('\n');
748            }
749            self.content.push_str(line);
750            self.output_lines = self.output_lines.saturating_add(1);
751            return;
752        }
753
754        self.truncated = true;
755        if line_index == 0 && line.len() > self.max_bytes {
756            self.first_line_exceeds_limit = true;
757        }
758
759        let line_budget = if separator_len > 0 {
760            if remaining == 0 {
761                return;
762            }
763            self.content.push('\n');
764            remaining - 1
765        } else {
766            remaining
767        };
768
769        let valid_bytes = utf8_prefix_len(line, line_budget);
770        if valid_bytes > 0 {
771            self.content.push_str(&line[..valid_bytes]);
772            self.output_lines = self.output_lines.saturating_add(1);
773            self.last_line_partial = valid_bytes < line.len();
774        }
775    }
776
777    fn finish(self) -> TruncationResult {
778        let output_bytes = self.content.len();
779        TruncationResult {
780            content: self.content,
781            truncated: self.truncated,
782            truncated_by: if self.truncated {
783                Some(TruncatedBy::Bytes)
784            } else {
785                None
786            },
787            total_lines: self.total_lines,
788            total_bytes: self.total_bytes,
789            output_lines: self.output_lines,
790            output_bytes,
791            last_line_partial: self.last_line_partial,
792            first_line_exceeds_limit: self.first_line_exceeds_limit,
793            max_lines: usize::MAX,
794            max_bytes: self.max_bytes,
795        }
796    }
797}
798
799fn utf8_prefix_len(s: &str, max_bytes: usize) -> usize {
800    let mut valid_bytes = max_bytes.min(s.len());
801    while valid_bytes > 0 && !s.is_char_boundary(valid_bytes) {
802        valid_bytes -= 1;
803    }
804    valid_bytes
805}
806
807#[derive(Debug, Clone, Serialize)]
808#[serde(rename_all = "camelCase")]
809struct ToolOutputArtifactRef {
810    schema: &'static str,
811    id: String,
812    tool_name: String,
813    source_kind: String,
814    #[serde(skip_serializing_if = "Option::is_none")]
815    session_id: Option<String>,
816    path: String,
817    metadata_path: String,
818    sha256: String,
819    byte_count: u64,
820    line_count: usize,
821    preview_bytes: usize,
822    content_type: &'static str,
823    retention_class: &'static str,
824    spillover_reason: &'static str,
825    redaction_summary: ToolOutputArtifactRedactionSummary,
826    safe_delete_candidate: bool,
827}
828
829#[derive(Debug, Clone, Serialize)]
830#[serde(rename_all = "camelCase")]
831struct ToolOutputArtifactRedactionSummary {
832    policy: &'static str,
833    status: &'static str,
834    redacted_count: usize,
835    fields: Vec<String>,
836    raw_secret_bytes_emitted: usize,
837    binary_suspect: bool,
838    max_redaction_bytes: u64,
839}
840
841struct RedactedToolOutputArtifact {
842    bytes: Vec<u8>,
843    summary: ToolOutputArtifactRedactionSummary,
844}
845
846fn tool_output_artifact_root() -> PathBuf {
847    std::env::var_os("PI_TOOL_OUTPUT_ARTIFACT_DIR").map_or_else(
848        || Config::global_dir().join("tool-output-artifacts"),
849        PathBuf::from,
850    )
851}
852
853static TOOL_OUTPUT_ARTIFACT_SESSIONS: OnceLock<Mutex<HashMap<String, String>>> = OnceLock::new();
854
855fn tool_output_artifact_sessions() -> &'static Mutex<HashMap<String, String>> {
856    TOOL_OUTPUT_ARTIFACT_SESSIONS.get_or_init(|| Mutex::new(HashMap::new()))
857}
858
859pub(crate) struct ToolOutputArtifactSessionGuard {
860    tool_call_id: String,
861    previous_session_id: Option<String>,
862    active: bool,
863}
864
865impl Drop for ToolOutputArtifactSessionGuard {
866    fn drop(&mut self) {
867        if !self.active {
868            return;
869        }
870        let Ok(mut sessions) = tool_output_artifact_sessions().lock() else {
871            return;
872        };
873        if let Some(previous) = self.previous_session_id.take() {
874            sessions.insert(self.tool_call_id.clone(), previous);
875        } else {
876            sessions.remove(&self.tool_call_id);
877        }
878    }
879}
880
881pub(crate) fn register_tool_output_artifact_session(
882    tool_call_id: &str,
883    session_id: &str,
884) -> ToolOutputArtifactSessionGuard {
885    if session_id.is_empty() {
886        return ToolOutputArtifactSessionGuard {
887            tool_call_id: String::new(),
888            previous_session_id: None,
889            active: false,
890        };
891    }
892    let previous_session_id = tool_output_artifact_sessions()
893        .lock()
894        .ok()
895        .and_then(|mut sessions| sessions.insert(tool_call_id.to_string(), session_id.to_string()));
896    ToolOutputArtifactSessionGuard {
897        tool_call_id: tool_call_id.to_string(),
898        previous_session_id,
899        active: true,
900    }
901}
902
903fn tool_output_artifact_session_id(tool_call_id: &str) -> Option<String> {
904    tool_output_artifact_sessions()
905        .lock()
906        .ok()
907        .and_then(|sessions| sessions.get(tool_call_id).cloned())
908}
909
910fn tool_output_artifact_scope_dir(root: &Path, tool_call_id: &str) -> (PathBuf, Option<String>) {
911    let call_scope = sanitize_artifact_scope(tool_call_id);
912    if let Some(session_id) = tool_output_artifact_session_id(tool_call_id) {
913        (
914            root.join(sanitize_artifact_scope(&session_id))
915                .join(call_scope),
916            Some(session_id),
917        )
918    } else {
919        (root.join(call_scope), None)
920    }
921}
922
923fn sanitize_artifact_scope(scope: &str) -> String {
924    let mut out = String::new();
925    for ch in scope.chars().take(96) {
926        if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') {
927            out.push(ch);
928        } else {
929            out.push('_');
930        }
931    }
932    if out.trim_matches('_').is_empty() {
933        "tool-call".to_string()
934    } else {
935        out
936    }
937}
938
939fn artifact_line_count(bytes: &[u8]) -> usize {
940    if bytes.is_empty() {
941        0
942    } else {
943        memchr::memchr_iter(b'\n', bytes).count() + usize::from(!bytes.ends_with(b"\n"))
944    }
945}
946
947fn artifact_details_object(
948    details: &mut Option<serde_json::Value>,
949) -> &mut serde_json::Map<String, serde_json::Value> {
950    let value = details.get_or_insert_with(|| serde_json::Value::Object(serde_json::Map::new()));
951    if !value.is_object() {
952        *value = serde_json::Value::Object(serde_json::Map::new());
953    }
954    value
955        .as_object_mut()
956        .expect("details value forced to object")
957}
958
959fn normalize_redaction_field(field: &str) -> String {
960    let mut out = String::new();
961    let mut previous_underscore = false;
962    for ch in field.chars() {
963        let normalized = if ch.is_ascii_alphanumeric() {
964            previous_underscore = false;
965            ch.to_ascii_lowercase()
966        } else if previous_underscore {
967            continue;
968        } else {
969            previous_underscore = true;
970            '_'
971        };
972        out.push(normalized);
973    }
974    out.trim_matches('_').to_string()
975}
976
977fn record_redacted_field(fields: &mut Vec<String>, field: &str) {
978    let field = normalize_redaction_field(field);
979    if !field.is_empty() && !fields.iter().any(|existing| existing == &field) {
980        fields.push(field);
981    }
982}
983
984fn artifact_sensitive_key_value_regex() -> &'static regex::Regex {
985    static RE: OnceLock<regex::Regex> = OnceLock::new();
986    RE.get_or_init(|| {
987        regex::Regex::new(
988            r#"(?i)\b([A-Za-z_][A-Za-z0-9_.-]*(?:api[_-]?key|token|secret|password|passwd|credential|authorization)[A-Za-z0-9_.-]*)(\s*[:=]\s*)("[^"\r\n]*"|'[^'\r\n]*'|[^\s,;}]+)"#,
989        )
990        .expect("valid artifact key-value redaction regex")
991    })
992}
993
994fn artifact_bearer_token_regex() -> &'static regex::Regex {
995    static RE: OnceLock<regex::Regex> = OnceLock::new();
996    RE.get_or_init(|| {
997        regex::Regex::new(r"(?i)\b(Bearer\s+)([A-Za-z0-9._~+/=-]{8,})")
998            .expect("valid artifact bearer redaction regex")
999    })
1000}
1001
1002fn artifact_token_value_regex() -> &'static regex::Regex {
1003    static RE: OnceLock<regex::Regex> = OnceLock::new();
1004    RE.get_or_init(|| {
1005        regex::Regex::new(
1006            r"\b(sk-[A-Za-z0-9][A-Za-z0-9_-]{10,}|gh[pousr]_[A-Za-z0-9_]{10,}|AKIA[0-9A-Z]{12,})\b",
1007        )
1008        .expect("valid artifact token value redaction regex")
1009    })
1010}
1011
1012fn redacted_literal_for_value(value: &str) -> &'static str {
1013    if value.starts_with('"') && value.ends_with('"') {
1014        "\"[REDACTED]\""
1015    } else if value.starts_with('\'') && value.ends_with('\'') {
1016        "'[REDACTED]'"
1017    } else {
1018        "[REDACTED]"
1019    }
1020}
1021
1022fn redact_tool_output_artifact_text(
1023    text: &str,
1024    binary_suspect: bool,
1025) -> RedactedToolOutputArtifact {
1026    let mut fields = Vec::new();
1027    let mut redacted_count = 0usize;
1028
1029    let redacted = artifact_sensitive_key_value_regex()
1030        .replace_all(text, |caps: &regex::Captures<'_>| {
1031            let key = caps.get(1).map_or("", |m| m.as_str());
1032            let sep = caps.get(2).map_or("", |m| m.as_str());
1033            let value = caps.get(3).map_or("", |m| m.as_str());
1034            if value == "[REDACTED]" || value == "\"[REDACTED]\"" || value == "'[REDACTED]'" {
1035                caps.get(0).map_or("", |m| m.as_str()).to_string()
1036            } else {
1037                redacted_count = redacted_count.saturating_add(1);
1038                record_redacted_field(&mut fields, key);
1039                format!("{key}{sep}{}", redacted_literal_for_value(value))
1040            }
1041        })
1042        .to_string();
1043
1044    let redacted = artifact_bearer_token_regex()
1045        .replace_all(&redacted, |caps: &regex::Captures<'_>| {
1046            redacted_count = redacted_count.saturating_add(1);
1047            record_redacted_field(&mut fields, "authorization");
1048            let prefix = caps.get(1).map_or("", |m| m.as_str());
1049            format!("{prefix}[REDACTED]")
1050        })
1051        .to_string();
1052
1053    let redacted = artifact_token_value_regex()
1054        .replace_all(&redacted, |_caps: &regex::Captures<'_>| {
1055            redacted_count = redacted_count.saturating_add(1);
1056            record_redacted_field(&mut fields, "tokenValue");
1057            "[REDACTED]".to_string()
1058        })
1059        .to_string();
1060
1061    fields.sort();
1062    let raw_secret_bytes_emitted = estimate_raw_secret_bytes(&redacted);
1063    let summary = ToolOutputArtifactRedactionSummary {
1064        policy: TOOL_OUTPUT_ARTIFACT_REDACTION_POLICY_V1,
1065        status: if raw_secret_bytes_emitted > 0 {
1066            "unsafe"
1067        } else if redacted_count > 0 {
1068            "redacted"
1069        } else {
1070            "clean"
1071        },
1072        redacted_count,
1073        fields,
1074        raw_secret_bytes_emitted,
1075        binary_suspect,
1076        max_redaction_bytes: TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES,
1077    };
1078
1079    RedactedToolOutputArtifact {
1080        bytes: redacted.into_bytes(),
1081        summary,
1082    }
1083}
1084
1085fn estimate_raw_secret_bytes(text: &str) -> usize {
1086    let key_value_bytes = artifact_sensitive_key_value_regex()
1087        .captures_iter(text)
1088        .filter_map(|caps| {
1089            let value = caps.get(3)?.as_str();
1090            if value == "[REDACTED]" || value == "\"[REDACTED]\"" || value == "'[REDACTED]'" {
1091                None
1092            } else {
1093                caps.get(0).map(|m| m.as_str().len())
1094            }
1095        })
1096        .sum::<usize>();
1097    let bearer_bytes = artifact_bearer_token_regex()
1098        .find_iter(text)
1099        .map(|m| m.as_str().len())
1100        .sum::<usize>();
1101    let token_bytes = artifact_token_value_regex()
1102        .find_iter(text)
1103        .map(|m| m.as_str().len())
1104        .sum::<usize>();
1105    key_value_bytes
1106        .saturating_add(bearer_bytes)
1107        .saturating_add(token_bytes)
1108}
1109
1110fn redact_tool_output_artifact_bytes(bytes: &[u8]) -> std::io::Result<RedactedToolOutputArtifact> {
1111    let binary_suspect =
1112        memchr::memchr(b'\0', bytes).is_some() || std::str::from_utf8(bytes).is_err();
1113    let text = String::from_utf8_lossy(bytes);
1114    let redacted = redact_tool_output_artifact_text(text.as_ref(), binary_suspect);
1115    if redacted.summary.raw_secret_bytes_emitted > 0 {
1116        return Err(std::io::Error::new(
1117            std::io::ErrorKind::InvalidData,
1118            "artifact redaction failed closed: raw secret-looking bytes remain",
1119        ));
1120    }
1121    Ok(redacted)
1122}
1123
1124fn ensure_artifact_path_under_root(root: &Path, path: &Path) -> std::io::Result<()> {
1125    if path.starts_with(root) {
1126        Ok(())
1127    } else {
1128        Err(std::io::Error::new(
1129            std::io::ErrorKind::PermissionDenied,
1130            format!(
1131                "artifact path {} is outside artifact root {}",
1132                path.display(),
1133                root.display()
1134            ),
1135        ))
1136    }
1137}
1138
1139fn write_artifact_file_if_absent(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
1140    match std::fs::OpenOptions::new()
1141        .write(true)
1142        .create_new(true)
1143        .open(path)
1144    {
1145        Ok(mut file) => {
1146            file.write_all(bytes)?;
1147            file.sync_all()?;
1148            Ok(())
1149        }
1150        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => Ok(()),
1151        Err(err) => Err(err),
1152    }
1153}
1154
1155fn write_text_tool_output_artifact_at_root(
1156    root: &Path,
1157    tool_name: &str,
1158    tool_call_id: &str,
1159    source_kind: &str,
1160    full_text: &str,
1161    preview_bytes: usize,
1162) -> std::io::Result<ToolOutputArtifactRef> {
1163    let bytes = full_text.as_bytes();
1164    if bytes.len() > TOOL_OUTPUT_ARTIFACT_MAX_BYTES_USIZE {
1165        return Err(std::io::Error::new(
1166            std::io::ErrorKind::InvalidData,
1167            format!(
1168                "artifact source exceeds {} hard limit",
1169                format_size(TOOL_OUTPUT_ARTIFACT_MAX_BYTES_USIZE)
1170            ),
1171        ));
1172    }
1173    if bytes.len() > TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES_USIZE {
1174        return Err(std::io::Error::new(
1175            std::io::ErrorKind::InvalidData,
1176            format!(
1177                "artifact source exceeds {} redaction limit",
1178                format_size(TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES_USIZE)
1179            ),
1180        ));
1181    }
1182    let redacted = redact_tool_output_artifact_bytes(bytes)?;
1183    let bytes = redacted.bytes.as_slice();
1184    let sha256 = format!("{:x}", sha2::Sha256::digest(bytes));
1185    let (scope_dir, session_id) = tool_output_artifact_scope_dir(root, tool_call_id);
1186    std::fs::create_dir_all(&scope_dir)?;
1187
1188    let id = format!("tool-artifact-{}", &sha256[..16]);
1189    let content_path = scope_dir.join(format!("{sha256}.txt"));
1190    let metadata_path = scope_dir.join(format!("{sha256}.json"));
1191    ensure_artifact_path_under_root(root, &content_path)?;
1192    ensure_artifact_path_under_root(root, &metadata_path)?;
1193    write_artifact_file_if_absent(&content_path, bytes)?;
1194
1195    let artifact = ToolOutputArtifactRef {
1196        schema: TOOL_OUTPUT_ARTIFACT_SCHEMA_V1,
1197        id,
1198        tool_name: tool_name.to_string(),
1199        source_kind: source_kind.to_string(),
1200        session_id,
1201        path: content_path.display().to_string(),
1202        metadata_path: metadata_path.display().to_string(),
1203        sha256,
1204        byte_count: bytes.len().try_into().unwrap_or(u64::MAX),
1205        line_count: artifact_line_count(bytes),
1206        preview_bytes,
1207        content_type: "text/plain; charset=utf-8",
1208        retention_class: TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS,
1209        spillover_reason: TOOL_OUTPUT_ARTIFACT_SPILLOVER_REASON,
1210        redaction_summary: redacted.summary,
1211        safe_delete_candidate: true,
1212    };
1213    let metadata = serde_json::to_vec_pretty(&artifact).map_err(std::io::Error::other)?;
1214    write_artifact_file_if_absent(&metadata_path, &metadata)?;
1215    Ok(artifact)
1216}
1217
1218fn copy_text_tool_output_artifact_from_path_at_root(
1219    root: &Path,
1220    tool_name: &str,
1221    tool_call_id: &str,
1222    source_kind: &str,
1223    source_path: &Path,
1224    preview_bytes: usize,
1225) -> std::io::Result<ToolOutputArtifactRef> {
1226    let metadata = std::fs::metadata(source_path)?;
1227    if metadata.len() > TOOL_OUTPUT_ARTIFACT_MAX_BYTES {
1228        return Err(std::io::Error::new(
1229            std::io::ErrorKind::InvalidData,
1230            format!(
1231                "artifact source exceeds {} hard limit",
1232                format_size(TOOL_OUTPUT_ARTIFACT_MAX_BYTES_USIZE)
1233            ),
1234        ));
1235    }
1236    if metadata.len() > TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES {
1237        return Err(std::io::Error::new(
1238            std::io::ErrorKind::InvalidData,
1239            format!(
1240                "artifact source exceeds {} redaction limit",
1241                format_size(TOOL_OUTPUT_ARTIFACT_REDACTION_MAX_BYTES_USIZE)
1242            ),
1243        ));
1244    }
1245
1246    let mut source = std::fs::File::open(source_path)?;
1247    let mut source_bytes = Vec::with_capacity(usize::try_from(metadata.len()).unwrap_or(0));
1248    source.read_to_end(&mut source_bytes)?;
1249    let redacted = redact_tool_output_artifact_bytes(&source_bytes)?;
1250    let bytes = redacted.bytes.as_slice();
1251
1252    let sha256 = format!("{:x}", sha2::Sha256::digest(bytes));
1253    let (scope_dir, session_id) = tool_output_artifact_scope_dir(root, tool_call_id);
1254    std::fs::create_dir_all(&scope_dir)?;
1255    let id = format!("tool-artifact-{}", &sha256[..16]);
1256    let content_path = scope_dir.join(format!("{sha256}.txt"));
1257    let metadata_path = scope_dir.join(format!("{sha256}.json"));
1258    ensure_artifact_path_under_root(root, &content_path)?;
1259    ensure_artifact_path_under_root(root, &metadata_path)?;
1260    write_artifact_file_if_absent(&content_path, bytes)?;
1261
1262    let artifact = ToolOutputArtifactRef {
1263        schema: TOOL_OUTPUT_ARTIFACT_SCHEMA_V1,
1264        id,
1265        tool_name: tool_name.to_string(),
1266        source_kind: source_kind.to_string(),
1267        session_id,
1268        path: content_path.display().to_string(),
1269        metadata_path: metadata_path.display().to_string(),
1270        sha256,
1271        byte_count: bytes.len().try_into().unwrap_or(u64::MAX),
1272        line_count: artifact_line_count(bytes),
1273        preview_bytes,
1274        content_type: "text/plain; charset=utf-8",
1275        retention_class: TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS,
1276        spillover_reason: TOOL_OUTPUT_ARTIFACT_SPILLOVER_REASON,
1277        redaction_summary: redacted.summary,
1278        safe_delete_candidate: true,
1279    };
1280    let metadata = serde_json::to_vec_pretty(&artifact).map_err(std::io::Error::other)?;
1281    write_artifact_file_if_absent(&metadata_path, &metadata)?;
1282    Ok(artifact)
1283}
1284
1285fn append_tool_output_artifact_notice(output_text: &mut String, artifact: &ToolOutputArtifactRef) {
1286    let _ = write!(
1287        output_text,
1288        "\n\n[Full tool output artifact: {} ({} bytes, {} lines, sha256 {}). Use read on this path to inspect more.]",
1289        artifact.path, artifact.byte_count, artifact.line_count, artifact.sha256,
1290    );
1291}
1292
1293fn append_artifact_source_line(full_text: &mut String, line: &str) {
1294    if !full_text.is_empty() {
1295        full_text.push('\n');
1296    }
1297    full_text.push_str(line);
1298}
1299
1300fn record_tool_output_artifact_error(
1301    output_text: &mut String,
1302    details: &mut Option<serde_json::Value>,
1303    error: &std::io::Error,
1304) {
1305    let _ = write!(
1306        output_text,
1307        "\n\n[Tool output artifact persistence failed: {error}. Showing the bounded preview only.]"
1308    );
1309    artifact_details_object(details).insert(
1310        "artifactError".to_string(),
1311        serde_json::json!({
1312            "schema": TOOL_OUTPUT_ARTIFACT_SCHEMA_V1,
1313            "message": error.to_string(),
1314        }),
1315    );
1316}
1317
1318fn attach_text_artifact_if_needed_at_root(
1319    root: &Path,
1320    output_text: &mut String,
1321    details: &mut Option<serde_json::Value>,
1322    tool_name: &str,
1323    tool_call_id: &str,
1324    source_kind: &str,
1325    full_text: &str,
1326) -> bool {
1327    if full_text.len() <= TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES {
1328        return false;
1329    }
1330    match write_text_tool_output_artifact_at_root(
1331        root,
1332        tool_name,
1333        tool_call_id,
1334        source_kind,
1335        full_text,
1336        output_text.len(),
1337    ) {
1338        Ok(artifact) => {
1339            append_tool_output_artifact_notice(output_text, &artifact);
1340            artifact_details_object(details).insert(
1341                "artifact".to_string(),
1342                serde_json::to_value(&artifact).expect("artifact ref serializes"),
1343            );
1344            true
1345        }
1346        Err(err) => {
1347            record_tool_output_artifact_error(output_text, details, &err);
1348            false
1349        }
1350    }
1351}
1352
1353fn attach_text_artifact_if_needed(
1354    output_text: &mut String,
1355    details: &mut Option<serde_json::Value>,
1356    tool_name: &str,
1357    tool_call_id: &str,
1358    source_kind: &str,
1359    full_text: &str,
1360) -> bool {
1361    let root = tool_output_artifact_root();
1362    attach_text_artifact_if_needed_at_root(
1363        &root,
1364        output_text,
1365        details,
1366        tool_name,
1367        tool_call_id,
1368        source_kind,
1369        full_text,
1370    )
1371}
1372
1373fn attach_text_artifact_if_needed_with_root(
1374    root: Option<&Path>,
1375    output_text: &mut String,
1376    details: &mut Option<serde_json::Value>,
1377    tool_name: &str,
1378    tool_call_id: &str,
1379    source_kind: &str,
1380    full_text: &str,
1381) -> bool {
1382    if let Some(root) = root {
1383        attach_text_artifact_if_needed_at_root(
1384            root,
1385            output_text,
1386            details,
1387            tool_name,
1388            tool_call_id,
1389            source_kind,
1390            full_text,
1391        )
1392    } else {
1393        attach_text_artifact_if_needed(
1394            output_text,
1395            details,
1396            tool_name,
1397            tool_call_id,
1398            source_kind,
1399            full_text,
1400        )
1401    }
1402}
1403
1404fn attach_text_artifact_from_path_if_needed_at_root(
1405    root: &Path,
1406    output_text: &mut String,
1407    details: &mut Option<serde_json::Value>,
1408    tool_name: &str,
1409    tool_call_id: &str,
1410    source_kind: &str,
1411    source_path: &Path,
1412) -> bool {
1413    let Ok(metadata) = std::fs::metadata(source_path) else {
1414        return false;
1415    };
1416    if metadata.len() <= u64::try_from(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES).unwrap_or(u64::MAX) {
1417        return false;
1418    }
1419    match copy_text_tool_output_artifact_from_path_at_root(
1420        root,
1421        tool_name,
1422        tool_call_id,
1423        source_kind,
1424        source_path,
1425        output_text.len(),
1426    ) {
1427        Ok(artifact) => {
1428            append_tool_output_artifact_notice(output_text, &artifact);
1429            artifact_details_object(details).insert(
1430                "artifact".to_string(),
1431                serde_json::to_value(&artifact).expect("artifact ref serializes"),
1432            );
1433            true
1434        }
1435        Err(err) => {
1436            record_tool_output_artifact_error(output_text, details, &err);
1437            false
1438        }
1439    }
1440}
1441
1442fn attach_text_artifact_from_path_if_needed(
1443    output_text: &mut String,
1444    details: &mut Option<serde_json::Value>,
1445    tool_name: &str,
1446    tool_call_id: &str,
1447    source_kind: &str,
1448    source_path: &Path,
1449) -> bool {
1450    let root = tool_output_artifact_root();
1451    attach_text_artifact_from_path_if_needed_at_root(
1452        &root,
1453        output_text,
1454        details,
1455        tool_name,
1456        tool_call_id,
1457        source_kind,
1458        source_path,
1459    )
1460}
1461
1462fn attach_text_artifact_from_path_if_needed_with_root(
1463    root: Option<&Path>,
1464    output_text: &mut String,
1465    details: &mut Option<serde_json::Value>,
1466    tool_name: &str,
1467    tool_call_id: &str,
1468    source_kind: &str,
1469    source_path: &Path,
1470) -> bool {
1471    if let Some(root) = root {
1472        attach_text_artifact_from_path_if_needed_at_root(
1473            root,
1474            output_text,
1475            details,
1476            tool_name,
1477            tool_call_id,
1478            source_kind,
1479            source_path,
1480        )
1481    } else {
1482        attach_text_artifact_from_path_if_needed(
1483            output_text,
1484            details,
1485            tool_name,
1486            tool_call_id,
1487            source_kind,
1488            source_path,
1489        )
1490    }
1491}
1492
1493const TOOL_OUTPUT_CACHE_MAX_ENTRIES: usize = 128;
1494const TOOL_OUTPUT_CACHE_MAX_BYTES: usize = 8 * 1024 * 1024;
1495const TOOL_OUTPUT_CACHE_MAX_ENTRY_BYTES: usize = DEFAULT_MAX_BYTES + 64 * 1024;
1496const TOOL_OUTPUT_CACHE_MAX_FINGERPRINT_FILES: usize = 2048;
1497const TOOL_OUTPUT_CACHE_MAX_FINGERPRINT_BYTES: u64 = 8 * 1024 * 1024;
1498const TOOL_OUTPUT_CACHE_MAX_FILE_HASH_BYTES: u64 = 2 * 1024 * 1024;
1499
1500#[derive(Debug, Clone, PartialEq, Eq)]
1501struct ToolCacheDependency {
1502    path: PathBuf,
1503    fingerprint: [u8; 32],
1504}
1505
1506#[derive(Debug, Clone, Copy)]
1507enum ToolCacheFingerprintMode {
1508    FileContent,
1509    DirectoryImmediate,
1510    DirectoryRecursive,
1511}
1512
1513#[derive(Debug, Clone)]
1514struct CachedToolOutput {
1515    deps: Vec<ToolCacheDependency>,
1516    output: ToolOutput,
1517    weight: usize,
1518    generation: u64,
1519}
1520
1521#[cfg(test)]
1522#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1523struct ToolOutputCacheStats {
1524    hits: usize,
1525    misses: usize,
1526    inserts: usize,
1527    invalidations: usize,
1528    disabled: usize,
1529    side_effect_accesses: usize,
1530    side_effect_insert_attempts: usize,
1531}
1532
1533#[derive(Debug, Default)]
1534struct ToolOutputCache {
1535    entries: HashMap<String, CachedToolOutput>,
1536    order: VecDeque<(String, u64)>,
1537    total_bytes: usize,
1538    generation: u64,
1539    #[cfg(test)]
1540    stats: ToolOutputCacheStats,
1541}
1542
1543impl ToolOutputCache {
1544    fn get(&mut self, key: &str, deps: &[ToolCacheDependency]) -> Option<ToolOutput> {
1545        self.generation = self.generation.saturating_add(1);
1546        let generation = self.generation;
1547        #[cfg(test)]
1548        {
1549            if is_side_effect_tool_cache_key(key) {
1550                self.stats.side_effect_accesses = self.stats.side_effect_accesses.saturating_add(1);
1551            }
1552        }
1553
1554        if self
1555            .entries
1556            .get(key)
1557            .is_some_and(|entry| entry.deps == deps)
1558        {
1559            let entry = self.entries.get_mut(key)?;
1560            entry.generation = generation;
1561            self.order.push_back((key.to_string(), generation));
1562            #[cfg(test)]
1563            {
1564                self.stats.hits = self.stats.hits.saturating_add(1);
1565            }
1566            return Some(entry.output.clone());
1567        }
1568
1569        if let Some(removed) = self.entries.remove(key) {
1570            self.total_bytes = self.total_bytes.saturating_sub(removed.weight);
1571            #[cfg(test)]
1572            {
1573                self.stats.invalidations = self.stats.invalidations.saturating_add(1);
1574            }
1575        } else {
1576            #[cfg(test)]
1577            {
1578                self.stats.misses = self.stats.misses.saturating_add(1);
1579            }
1580        }
1581
1582        None
1583    }
1584
1585    fn insert(
1586        &mut self,
1587        key: String,
1588        deps: Vec<ToolCacheDependency>,
1589        output: ToolOutput,
1590        weight: usize,
1591    ) {
1592        if weight == 0 || weight > TOOL_OUTPUT_CACHE_MAX_ENTRY_BYTES {
1593            #[cfg(test)]
1594            {
1595                self.stats.disabled = self.stats.disabled.saturating_add(1);
1596            }
1597            return;
1598        }
1599
1600        #[cfg(test)]
1601        {
1602            if is_side_effect_tool_cache_key(&key) {
1603                self.stats.side_effect_insert_attempts =
1604                    self.stats.side_effect_insert_attempts.saturating_add(1);
1605            }
1606        }
1607
1608        if let Some(removed) = self.entries.remove(&key) {
1609            self.total_bytes = self.total_bytes.saturating_sub(removed.weight);
1610        }
1611
1612        self.generation = self.generation.saturating_add(1);
1613        let generation = self.generation;
1614        self.total_bytes = self.total_bytes.saturating_add(weight);
1615        self.order.push_back((key.clone(), generation));
1616        self.entries.insert(
1617            key,
1618            CachedToolOutput {
1619                deps,
1620                output,
1621                weight,
1622                generation,
1623            },
1624        );
1625        #[cfg(test)]
1626        {
1627            self.stats.inserts = self.stats.inserts.saturating_add(1);
1628        }
1629        self.evict_to_limits();
1630    }
1631
1632    fn evict_to_limits(&mut self) {
1633        while self.entries.len() > TOOL_OUTPUT_CACHE_MAX_ENTRIES
1634            || self.total_bytes > TOOL_OUTPUT_CACHE_MAX_BYTES
1635        {
1636            let Some((key, generation)) = self.order.pop_front() else {
1637                break;
1638            };
1639            if self
1640                .entries
1641                .get(&key)
1642                .is_some_and(|entry| entry.generation == generation)
1643                && let Some(removed) = self.entries.remove(&key)
1644            {
1645                self.total_bytes = self.total_bytes.saturating_sub(removed.weight);
1646            }
1647        }
1648    }
1649}
1650
1651fn tool_output_cache() -> &'static Mutex<ToolOutputCache> {
1652    static CACHE: OnceLock<Mutex<ToolOutputCache>> = OnceLock::new();
1653    CACHE.get_or_init(|| Mutex::new(ToolOutputCache::default()))
1654}
1655
1656fn lock_tool_output_cache() -> std::sync::MutexGuard<'static, ToolOutputCache> {
1657    tool_output_cache()
1658        .lock()
1659        .unwrap_or_else(std::sync::PoisonError::into_inner)
1660}
1661
1662fn tool_cache_key(tool: &str, cwd: &Path, input: &serde_json::Value) -> String {
1663    let input_json = serde_json::to_string(input).unwrap_or_else(|_| input.to_string());
1664    format!("{tool}\0{}\0{input_json}", cwd.display())
1665}
1666
1667#[cfg(test)]
1668fn is_side_effect_tool_cache_key(key: &str) -> bool {
1669    key.starts_with("write\0") || key.starts_with("edit\0") || key.starts_with("bash\0")
1670}
1671
1672fn cached_tool_output(key: &str, deps: Option<&[ToolCacheDependency]>) -> Option<ToolOutput> {
1673    let deps = deps?;
1674    lock_tool_output_cache().get(key, deps)
1675}
1676
1677fn cache_tool_output(key: String, deps: Option<Vec<ToolCacheDependency>>, output: &ToolOutput) {
1678    let Some(deps) = deps else {
1679        return;
1680    };
1681    if output.details.as_ref().is_some_and(|details| {
1682        details.as_object().is_some_and(|details| {
1683            details.contains_key("artifact") || details.contains_key("artifactError")
1684        })
1685    }) {
1686        return;
1687    }
1688    let Some(weight) = cacheable_tool_output_weight(output) else {
1689        return;
1690    };
1691    lock_tool_output_cache().insert(key, deps, output.clone(), weight);
1692}
1693
1694fn stable_cache_dependency_for_path(
1695    path: &Path,
1696    mode: ToolCacheFingerprintMode,
1697    before_deps: Option<&[ToolCacheDependency]>,
1698) -> Option<Vec<ToolCacheDependency>> {
1699    let before_deps = before_deps?;
1700    let after_deps = cache_dependency_for_path(path, mode)?;
1701    (before_deps == after_deps.as_slice()).then_some(after_deps)
1702}
1703
1704fn cacheable_tool_output_weight(output: &ToolOutput) -> Option<usize> {
1705    let mut weight = output
1706        .details
1707        .as_ref()
1708        .and_then(|details| serde_json::to_vec(details).ok())
1709        .map_or(0, |details| details.len());
1710
1711    for block in &output.content {
1712        match block {
1713            ContentBlock::Text(text) => {
1714                weight = weight.saturating_add(text.text.len());
1715                if let Some(signature) = &text.text_signature {
1716                    weight = weight.saturating_add(signature.len());
1717                }
1718            }
1719            ContentBlock::Image(_)
1720            | ContentBlock::Thinking(_)
1721            | ContentBlock::RedactedThinking(_)
1722            | ContentBlock::ToolCall(_) => return None,
1723        }
1724    }
1725
1726    Some(weight)
1727}
1728
1729fn cache_dependency_for_path(
1730    path: &Path,
1731    mode: ToolCacheFingerprintMode,
1732) -> Option<Vec<ToolCacheDependency>> {
1733    let fingerprint = match mode {
1734        ToolCacheFingerprintMode::FileContent => fingerprint_file_content(path)?,
1735        ToolCacheFingerprintMode::DirectoryImmediate => fingerprint_directory_immediate(path)?,
1736        ToolCacheFingerprintMode::DirectoryRecursive => fingerprint_directory_recursive(path)?,
1737    };
1738
1739    Some(vec![ToolCacheDependency {
1740        path: path.to_path_buf(),
1741        fingerprint,
1742    }])
1743}
1744
1745fn fingerprint_file_content(path: &Path) -> Option<[u8; 32]> {
1746    let metadata = std::fs::symlink_metadata(path).ok()?;
1747    if !metadata.is_file() || metadata.len() > TOOL_OUTPUT_CACHE_MAX_FILE_HASH_BYTES {
1748        return None;
1749    }
1750
1751    let bytes = std::fs::read(path).ok()?;
1752    let mut hasher = sha2::Sha256::new();
1753    update_fingerprint_metadata(&mut hasher, Path::new(""), &metadata);
1754    hasher.update(sha2::Sha256::digest(&bytes));
1755    Some(hasher.finalize().into())
1756}
1757
1758fn fingerprint_directory_immediate(path: &Path) -> Option<[u8; 32]> {
1759    let metadata = std::fs::symlink_metadata(path).ok()?;
1760    if !metadata.is_dir() {
1761        return None;
1762    }
1763
1764    let mut entries = std::fs::read_dir(path)
1765        .ok()?
1766        .collect::<std::result::Result<Vec<_>, _>>()
1767        .ok()?;
1768    if entries.len() > TOOL_OUTPUT_CACHE_MAX_FINGERPRINT_FILES {
1769        return None;
1770    }
1771    entries.sort_by_key(std::fs::DirEntry::file_name);
1772
1773    let mut hasher = sha2::Sha256::new();
1774    update_fingerprint_metadata(&mut hasher, Path::new(""), &metadata);
1775    for entry in entries {
1776        let entry_path = entry.path();
1777        let rel = entry.file_name();
1778        let rel = Path::new(&rel);
1779        let entry_metadata = std::fs::symlink_metadata(&entry_path).ok()?;
1780        update_fingerprint_metadata(&mut hasher, rel, &entry_metadata);
1781        if entry_metadata.file_type().is_symlink() {
1782            update_symlink_target(&mut hasher, &entry_path);
1783        }
1784    }
1785
1786    Some(hasher.finalize().into())
1787}
1788
1789fn fingerprint_directory_recursive(path: &Path) -> Option<[u8; 32]> {
1790    let metadata = std::fs::symlink_metadata(path).ok()?;
1791    if metadata.is_file() {
1792        return fingerprint_file_content(path);
1793    }
1794    if !metadata.is_dir() {
1795        return None;
1796    }
1797
1798    let mut budget = FingerprintBudget::default();
1799    let mut hasher = sha2::Sha256::new();
1800    update_fingerprint_metadata(&mut hasher, Path::new(""), &metadata);
1801    fingerprint_tree(path, path, &mut budget, &mut hasher)?;
1802    Some(hasher.finalize().into())
1803}
1804
1805#[derive(Debug, Default)]
1806struct FingerprintBudget {
1807    entries: usize,
1808    bytes: u64,
1809}
1810
1811fn fingerprint_tree(
1812    root: &Path,
1813    dir: &Path,
1814    budget: &mut FingerprintBudget,
1815    hasher: &mut sha2::Sha256,
1816) -> Option<()> {
1817    let mut entries = std::fs::read_dir(dir)
1818        .ok()?
1819        .collect::<std::result::Result<Vec<_>, _>>()
1820        .ok()?;
1821    entries.sort_by_key(std::fs::DirEntry::path);
1822
1823    for entry in entries {
1824        budget.entries = budget.entries.saturating_add(1);
1825        if budget.entries > TOOL_OUTPUT_CACHE_MAX_FINGERPRINT_FILES {
1826            return None;
1827        }
1828
1829        let entry_path = entry.path();
1830        let rel = entry_path.strip_prefix(root).unwrap_or(&entry_path);
1831        let metadata = std::fs::symlink_metadata(&entry_path).ok()?;
1832        update_fingerprint_metadata(hasher, rel, &metadata);
1833
1834        if metadata.file_type().is_symlink() {
1835            update_symlink_target(hasher, &entry_path);
1836        } else if metadata.is_dir() {
1837            fingerprint_tree(root, &entry_path, budget, hasher)?;
1838        } else if metadata.is_file() {
1839            if metadata.len() > TOOL_OUTPUT_CACHE_MAX_FILE_HASH_BYTES {
1840                return None;
1841            }
1842            budget.bytes = budget.bytes.saturating_add(metadata.len());
1843            if budget.bytes > TOOL_OUTPUT_CACHE_MAX_FINGERPRINT_BYTES {
1844                return None;
1845            }
1846            let bytes = std::fs::read(&entry_path).ok()?;
1847            hasher.update(sha2::Sha256::digest(&bytes));
1848        }
1849    }
1850
1851    Some(())
1852}
1853
1854fn update_fingerprint_metadata(
1855    hasher: &mut sha2::Sha256,
1856    path: &Path,
1857    metadata: &std::fs::Metadata,
1858) {
1859    hasher.update(path.to_string_lossy().as_bytes());
1860    hasher.update([0]);
1861    let file_type = metadata.file_type();
1862    hasher.update([
1863        u8::from(metadata.is_file()),
1864        u8::from(metadata.is_dir()),
1865        u8::from(file_type.is_symlink()),
1866    ]);
1867    hasher.update(metadata.len().to_le_bytes());
1868    let modified_nanos = metadata
1869        .modified()
1870        .ok()
1871        .and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
1872        .map_or(0, |duration| duration.as_nanos());
1873    hasher.update(modified_nanos.to_le_bytes());
1874    hasher.update([0xff]);
1875}
1876
1877fn update_symlink_target(hasher: &mut sha2::Sha256, path: &Path) {
1878    if let Ok(target) = std::fs::read_link(path) {
1879        hasher.update(target.to_string_lossy().as_bytes());
1880    }
1881    hasher.update([0xfe]);
1882}
1883
1884#[cfg(test)]
1885fn reset_tool_output_cache_for_tests() {
1886    *lock_tool_output_cache() = ToolOutputCache::default();
1887}
1888
1889#[cfg(test)]
1890fn tool_output_cache_stats_for_tests() -> ToolOutputCacheStats {
1891    lock_tool_output_cache().stats
1892}
1893
1894/// Format a byte count into a human-readable string with appropriate unit suffix.
1895#[allow(clippy::cast_precision_loss)]
1896fn format_size(bytes: usize) -> String {
1897    const KB: usize = 1024;
1898    const MB: usize = 1024 * 1024;
1899
1900    if bytes >= MB {
1901        format!("{:.1}MB", bytes as f64 / MB as f64)
1902    } else if bytes >= KB {
1903        format!("{:.1}KB", bytes as f64 / KB as f64)
1904    } else {
1905        format!("{bytes}B")
1906    }
1907}
1908
1909#[cfg(test)]
1910fn js_string_length(s: &str) -> usize {
1911    // Match JavaScript's String.length (UTF-16 code units), not UTF-8 bytes.
1912    s.encode_utf16().count()
1913}
1914
1915// ============================================================================
1916// Path Utilities (port of pi-mono path-utils.ts)
1917// ============================================================================
1918
1919fn is_special_unicode_space(c: char) -> bool {
1920    matches!(c, '\u{00A0}' | '\u{202F}' | '\u{205F}' | '\u{3000}')
1921        || ('\u{2000}'..='\u{200A}').contains(&c)
1922}
1923
1924fn normalize_unicode_spaces(s: &str) -> String {
1925    s.chars()
1926        .map(|c| if is_special_unicode_space(c) { ' ' } else { c })
1927        .collect()
1928}
1929
1930#[cfg(test)]
1931fn normalize_for_match(s: &str) -> String {
1932    // Single-pass normalization: spaces, quotes, and dashes in one allocation.
1933    // Avoids 3 intermediate String allocations from chained replace calls.
1934    let mut out = String::with_capacity(s.len());
1935    for c in s.chars() {
1936        match c {
1937            // Unicode spaces → ASCII space
1938            c if is_special_unicode_space(c) => out.push(' '),
1939            // Curly single quotes → straight apostrophe
1940            '\u{2018}' | '\u{2019}' => out.push('\''),
1941            // Curly double quotes → straight double quote
1942            '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => out.push('"'),
1943            // Various dashes → ASCII hyphen
1944            '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
1945            | '\u{2212}' => out.push('-'),
1946            // Everything else passes through
1947            c => out.push(c),
1948        }
1949    }
1950    out
1951}
1952
1953fn expand_path(file_path: &str) -> String {
1954    let normalized = normalize_unicode_spaces(file_path);
1955    if normalized == "~" {
1956        return dirs::home_dir()
1957            .unwrap_or_else(|| PathBuf::from("~"))
1958            .to_string_lossy()
1959            .to_string();
1960    }
1961    if let Some(rest) = normalized.strip_prefix("~/") {
1962        let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("~"));
1963        return home.join(rest).to_string_lossy().to_string();
1964    }
1965    normalized
1966}
1967
1968/// Resolve a path relative to `cwd`. Handles `~` expansion and absolute paths.
1969fn resolve_to_cwd(file_path: &str, cwd: &Path) -> PathBuf {
1970    let expanded = expand_path(file_path);
1971    let expanded_path = PathBuf::from(expanded);
1972    if expanded_path.is_absolute() {
1973        expanded_path
1974    } else {
1975        cwd.join(expanded_path)
1976    }
1977}
1978
1979fn try_mac_os_screenshot_path(file_path: &str) -> String {
1980    // Replace " AM." / " PM." with a narrow no-break space variant used by macOS screenshots.
1981    file_path
1982        .replace(" AM.", "\u{202F}AM.")
1983        .replace(" PM.", "\u{202F}PM.")
1984}
1985
1986fn try_curly_quote_variant(file_path: &str) -> String {
1987    // Replace straight apostrophe with macOS screenshot curly apostrophe.
1988    file_path.replace('\'', "\u{2019}")
1989}
1990
1991fn try_nfd_variant(file_path: &str) -> String {
1992    // NFD normalization - decompose characters into base + combining marks
1993    // This handles macOS HFS+ filesystem normalization differences
1994    use unicode_normalization::UnicodeNormalization;
1995    file_path.nfd().collect::<String>()
1996}
1997
1998fn file_exists(path: &Path) -> bool {
1999    std::fs::metadata(path).is_ok()
2000}
2001
2002/// Resolve a file path for reading, including macOS screenshot name variants.
2003pub(crate) fn resolve_read_path(file_path: &str, cwd: &Path) -> PathBuf {
2004    let resolved = normalize_dot_segments(&resolve_to_cwd(file_path, cwd));
2005    let normalized_cwd = normalize_dot_segments(cwd);
2006    let within_cwd = resolved.starts_with(&normalized_cwd);
2007    if within_cwd && file_exists(&resolved) {
2008        return resolved;
2009    }
2010    if !within_cwd {
2011        // Avoid probing the filesystem outside the working directory.
2012        return resolved;
2013    }
2014
2015    let Some(resolved_str) = resolved.to_str() else {
2016        return resolved;
2017    };
2018
2019    let am_pm_variant = try_mac_os_screenshot_path(resolved_str);
2020    if am_pm_variant.ne(resolved_str) {
2021        let candidate = PathBuf::from(&am_pm_variant);
2022        if candidate.starts_with(&normalized_cwd) && file_exists(&candidate) {
2023            return candidate;
2024        }
2025    }
2026
2027    let nfd_variant = try_nfd_variant(resolved_str);
2028    if nfd_variant.ne(resolved_str) {
2029        let candidate = PathBuf::from(&nfd_variant);
2030        if candidate.starts_with(&normalized_cwd) && file_exists(&candidate) {
2031            return candidate;
2032        }
2033    }
2034
2035    let curly_variant = try_curly_quote_variant(resolved_str);
2036    if curly_variant.ne(resolved_str) {
2037        let candidate = PathBuf::from(&curly_variant);
2038        if candidate.starts_with(&normalized_cwd) && file_exists(&candidate) {
2039            return candidate;
2040        }
2041    }
2042
2043    let nfd_curly_variant = try_curly_quote_variant(&nfd_variant);
2044    if nfd_curly_variant.ne(resolved_str) {
2045        let candidate = PathBuf::from(&nfd_curly_variant);
2046        if candidate.starts_with(&normalized_cwd) && file_exists(&candidate) {
2047            return candidate;
2048        }
2049    }
2050
2051    resolved
2052}
2053
2054fn enforce_cwd_scope(path: &Path, cwd: &Path, action: &str) -> Result<PathBuf> {
2055    let canonical_path = crate::extensions::safe_canonicalize(path);
2056    let canonical_cwd = crate::extensions::safe_canonicalize(cwd);
2057    if !canonical_path.starts_with(&canonical_cwd) {
2058        return Err(Error::validation(format!(
2059            "Cannot {action} outside the working directory (resolved: {}, cwd: {})",
2060            canonical_path.display(),
2061            canonical_cwd.display()
2062        )));
2063    }
2064    Ok(canonical_path)
2065}
2066
2067/// Same scoping contract as `enforce_cwd_scope`, but also accepts paths under
2068/// the configured pi-agent directory (`Config::global_dir()`, default
2069/// `~/.pi/agent/`, override via `PI_CODING_AGENT_DIR`).
2070///
2071/// Read access is broadened so the model can fetch the bodies of skill files,
2072/// prompt templates, and other resources that ship under the agent dir
2073/// without needing to fall back to a `bash cat`. Write/edit/grep/find/list
2074/// stay strictly cwd-only — broadening write access would let a misbehaving
2075/// model persist instructions into the agent dir, which is a much higher-
2076/// risk surface than the read case warrants. See pi_agent_rust#71.
2077///
2078/// Symlink escapes remain blocked because `safe_canonicalize` resolves
2079/// symlinks before the prefix check, so e.g. `~/.pi/agent/skills/foo/SKILL.md`
2080/// pointing at `/etc/passwd` resolves to `/etc/passwd` and fails the prefix
2081/// test against both cwd and agent dir.
2082fn enforce_read_scope_with_roots(path: &Path, cwd: &Path, agent_dir: &Path) -> Result<PathBuf> {
2083    let canonical_path = crate::extensions::safe_canonicalize(path);
2084    let canonical_cwd = crate::extensions::safe_canonicalize(cwd);
2085    if canonical_path.starts_with(&canonical_cwd) {
2086        return Ok(canonical_path);
2087    }
2088
2089    let canonical_agent = crate::extensions::safe_canonicalize(agent_dir);
2090    if canonical_path.starts_with(&canonical_agent) {
2091        return Ok(canonical_path);
2092    }
2093
2094    Err(Error::validation(format!(
2095        "Cannot read outside the working directory or agent dir \
2096         (resolved: {}, cwd: {}, agent dir: {})",
2097        canonical_path.display(),
2098        canonical_cwd.display(),
2099        canonical_agent.display(),
2100    )))
2101}
2102
2103/// Convenience wrapper that pulls the agent dir from the active config.
2104fn enforce_read_scope(path: &Path, cwd: &Path) -> Result<PathBuf> {
2105    let agent_dir = crate::config::Config::global_dir();
2106    enforce_read_scope_with_roots(path, cwd, &agent_dir)
2107}
2108
2109// ============================================================================
2110// CLI @file Processor (used by src/main.rs)
2111// ============================================================================
2112
2113/// Result of processing `@file` CLI arguments.
2114#[derive(Debug, Clone, Default)]
2115pub struct ProcessedFiles {
2116    pub text: String,
2117    pub images: Vec<ImageContent>,
2118}
2119
2120fn normalize_dot_segments(path: &Path) -> PathBuf {
2121    use std::ffi::{OsStr, OsString};
2122    use std::path::Component;
2123
2124    let mut out = PathBuf::new();
2125    let mut normals: Vec<OsString> = Vec::new();
2126    let mut has_prefix = false;
2127    let mut has_root = false;
2128
2129    for component in path.components() {
2130        match component {
2131            Component::Prefix(prefix) => {
2132                out.push(prefix.as_os_str());
2133                has_prefix = true;
2134            }
2135            Component::RootDir => {
2136                out.push(component.as_os_str());
2137                has_root = true;
2138            }
2139            Component::CurDir => {}
2140            Component::ParentDir => match normals.last() {
2141                Some(last) if last.as_os_str() != OsStr::new("..") => {
2142                    normals.pop();
2143                }
2144                _ => {
2145                    if !has_root && !has_prefix {
2146                        normals.push(OsString::from(".."));
2147                    }
2148                }
2149            },
2150            Component::Normal(part) => normals.push(part.to_os_string()),
2151        }
2152    }
2153
2154    for part in normals {
2155        out.push(part);
2156    }
2157
2158    out
2159}
2160
2161#[cfg(feature = "fuzzing")]
2162pub fn fuzz_normalize_dot_segments(path: &Path) -> PathBuf {
2163    normalize_dot_segments(path)
2164}
2165
2166#[cfg(unix)]
2167fn sync_parent_dir(path: &Path) -> std::io::Result<()> {
2168    let Some(parent) = path.parent() else {
2169        return Ok(());
2170    };
2171    std::fs::File::open(parent)?.sync_all()
2172}
2173
2174#[cfg(not(unix))]
2175fn sync_parent_dir(_path: &Path) -> std::io::Result<()> {
2176    Ok(())
2177}
2178
2179fn escape_file_tag_attribute(value: &str) -> String {
2180    let mut escaped = String::with_capacity(value.len());
2181    for ch in value.chars() {
2182        match ch {
2183            '&' => escaped.push_str("&amp;"),
2184            '"' => escaped.push_str("&quot;"),
2185            '<' => escaped.push_str("&lt;"),
2186            '>' => escaped.push_str("&gt;"),
2187            '\n' => escaped.push_str("&#10;"),
2188            '\r' => escaped.push_str("&#13;"),
2189            '\t' => escaped.push_str("&#9;"),
2190            _ => escaped.push(ch),
2191        }
2192    }
2193    escaped
2194}
2195
2196fn escaped_file_tag_name(path: &Path) -> String {
2197    escape_file_tag_attribute(&path.display().to_string())
2198}
2199
2200fn append_file_notice_block(out: &mut String, path: &Path, notice: &str) {
2201    let path_str = escaped_file_tag_name(path);
2202    let _ = writeln!(out, "<file name=\"{path_str}\">\n{notice}\n</file>");
2203}
2204
2205fn append_image_file_ref(out: &mut String, path: &Path, note: Option<&str>) {
2206    let path_str = escaped_file_tag_name(path);
2207    match note {
2208        Some(text) => {
2209            let _ = writeln!(out, "<file name=\"{path_str}\">{text}</file>");
2210        }
2211        None => {
2212            let _ = writeln!(out, "<file name=\"{path_str}\"></file>");
2213        }
2214    }
2215}
2216
2217fn append_text_file_block(out: &mut String, path: &Path, bytes: &[u8]) {
2218    let content = String::from_utf8_lossy(bytes);
2219    let path_str = escaped_file_tag_name(path);
2220    let _ = writeln!(out, "<file name=\"{path_str}\">");
2221
2222    let truncation = truncate_head(content.into_owned(), DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES);
2223    let needs_trailing_newline = !truncation.truncated && !truncation.content.ends_with('\n');
2224    out.push_str(&truncation.content);
2225
2226    if truncation.truncated {
2227        let _ = write!(
2228            out,
2229            "\n... [Truncated: showing {}/{} lines, {}/{} bytes]",
2230            truncation.output_lines,
2231            truncation.total_lines,
2232            format_size(truncation.output_bytes),
2233            format_size(truncation.total_bytes)
2234        );
2235    } else if needs_trailing_newline {
2236        out.push('\n');
2237    }
2238    let _ = writeln!(out, "</file>");
2239}
2240
2241fn maybe_append_image_argument(
2242    out: &mut ProcessedFiles,
2243    absolute_path: &Path,
2244    bytes: &[u8],
2245    auto_resize_images: bool,
2246) -> Result<bool> {
2247    let Some(mime_type) = detect_supported_image_mime_type_from_bytes(bytes) else {
2248        return Ok(false);
2249    };
2250
2251    let resized = if auto_resize_images {
2252        resize_image_if_needed(bytes, mime_type)?
2253    } else {
2254        ResizedImage::original(bytes.to_vec(), mime_type)
2255    };
2256
2257    if resized.bytes.len() > IMAGE_MAX_BYTES {
2258        let msg = if resized.resized {
2259            format!(
2260                "[Image is too large ({} bytes) after resizing. Max allowed is {} bytes.]",
2261                resized.bytes.len(),
2262                IMAGE_MAX_BYTES
2263            )
2264        } else {
2265            format!(
2266                "[Image is too large ({} bytes). Max allowed is {} bytes.]",
2267                resized.bytes.len(),
2268                IMAGE_MAX_BYTES
2269            )
2270        };
2271        append_file_notice_block(&mut out.text, absolute_path, &msg);
2272        return Ok(true);
2273    }
2274
2275    let base64_data =
2276        base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &resized.bytes);
2277    out.images.push(ImageContent {
2278        data: base64_data,
2279        mime_type: resized.mime_type.to_string(),
2280    });
2281
2282    let note = if resized.resized {
2283        if let (Some(ow), Some(oh), Some(w), Some(h)) = (
2284            resized.original_width,
2285            resized.original_height,
2286            resized.width,
2287            resized.height,
2288        ) {
2289            if w > 0 {
2290                let scale = f64::from(ow) / f64::from(w);
2291                Some(format!(
2292                    "[Image: original {ow}x{oh}, displayed at {w}x{h}. Multiply coordinates by {scale:.2} to map to original image.]"
2293                ))
2294            } else {
2295                Some(format!(
2296                    "[Image: original {ow}x{oh}, displayed at {w}x{h}.]"
2297                ))
2298            }
2299        } else {
2300            None
2301        }
2302    } else {
2303        None
2304    };
2305    append_image_file_ref(&mut out.text, absolute_path, note.as_deref());
2306    Ok(true)
2307}
2308
2309/// Process `@file` arguments into a single text prefix and image attachments.
2310///
2311/// Matches the legacy TypeScript behavior:
2312/// - Resolves paths (including `~` expansion + macOS screenshot variants)
2313/// - Skips empty files
2314/// - For images: attaches image blocks and appends `<file name="...">...</file>` references
2315/// - For text: embeds the file contents inside `<file>` tags
2316pub fn process_file_arguments(
2317    file_args: &[String],
2318    cwd: &Path,
2319    auto_resize_images: bool,
2320) -> Result<ProcessedFiles> {
2321    let mut out = ProcessedFiles::default();
2322
2323    for file_arg in file_args {
2324        let resolved = resolve_read_path(file_arg, cwd);
2325        let absolute_path = normalize_dot_segments(&resolved);
2326        let absolute_path = enforce_read_scope(&absolute_path, cwd)?;
2327
2328        let meta = std::fs::metadata(&absolute_path).map_err(|e| {
2329            Error::tool(
2330                "read",
2331                format!("Cannot access file {}: {e}", absolute_path.display()),
2332            )
2333        })?;
2334        if meta.is_dir() {
2335            append_file_notice_block(
2336                &mut out.text,
2337                &absolute_path,
2338                "[Path is a directory, not a file. Use the list tool to view its contents.]",
2339            );
2340            continue;
2341        }
2342
2343        if meta.len() == 0 {
2344            continue;
2345        }
2346
2347        if meta.len() > READ_TOOL_MAX_BYTES {
2348            append_file_notice_block(
2349                &mut out.text,
2350                &absolute_path,
2351                &format!(
2352                    "[File is too large ({} bytes). Max allowed is {} bytes.]",
2353                    meta.len(),
2354                    READ_TOOL_MAX_BYTES
2355                ),
2356            );
2357            continue;
2358        }
2359
2360        let bytes = std::fs::read(&absolute_path).map_err(|e| {
2361            Error::tool(
2362                "read",
2363                format!("Could not read file {}: {e}", absolute_path.display()),
2364            )
2365        })?;
2366
2367        if maybe_append_image_argument(&mut out, &absolute_path, &bytes, auto_resize_images)? {
2368            continue;
2369        }
2370
2371        append_text_file_block(&mut out.text, &absolute_path, &bytes);
2372    }
2373
2374    Ok(out)
2375}
2376
2377/// Resolve a file path relative to the current working directory.
2378/// Public alias for `resolve_to_cwd` used by tools.
2379fn resolve_path(file_path: &str, cwd: &Path) -> PathBuf {
2380    normalize_dot_segments(&resolve_to_cwd(file_path, cwd))
2381}
2382
2383#[cfg(feature = "fuzzing")]
2384pub fn fuzz_resolve_path(file_path: &str, cwd: &Path) -> PathBuf {
2385    resolve_path(file_path, cwd)
2386}
2387
2388pub(crate) fn detect_supported_image_mime_type_from_bytes(bytes: &[u8]) -> Option<&'static str> {
2389    // Supported image types match the legacy tool: jpeg/png/gif/webp only.
2390    if bytes.len() >= 8 && bytes.starts_with(b"\x89PNG\r\n\x1A\n") {
2391        return Some("image/png");
2392    }
2393    if bytes.len() >= 3 && bytes[0] == 0xFF && bytes[1] == 0xD8 && bytes[2] == 0xFF {
2394        return Some("image/jpeg");
2395    }
2396    if bytes.len() >= 6 && (bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a")) {
2397        return Some("image/gif");
2398    }
2399    if bytes.len() >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WEBP" {
2400        return Some("image/webp");
2401    }
2402    None
2403}
2404
2405#[derive(Debug, Clone)]
2406pub(crate) struct ResizedImage {
2407    pub(crate) bytes: Vec<u8>,
2408    pub(crate) mime_type: &'static str,
2409    pub(crate) resized: bool,
2410    pub(crate) width: Option<u32>,
2411    pub(crate) height: Option<u32>,
2412    pub(crate) original_width: Option<u32>,
2413    pub(crate) original_height: Option<u32>,
2414}
2415
2416impl ResizedImage {
2417    pub(crate) const fn original(bytes: Vec<u8>, mime_type: &'static str) -> Self {
2418        Self {
2419            bytes,
2420            mime_type,
2421            resized: false,
2422            width: None,
2423            height: None,
2424            original_width: None,
2425            original_height: None,
2426        }
2427    }
2428}
2429
2430#[cfg(feature = "image-resize")]
2431#[allow(clippy::too_many_lines)]
2432pub(crate) fn resize_image_if_needed(
2433    bytes: &[u8],
2434    mime_type: &'static str,
2435) -> Result<ResizedImage> {
2436    // Match legacy behavior from pi-mono `utils/image-resize.ts`.
2437    //
2438    // Strategy:
2439    // 1) If image already fits within max dims AND max bytes: return original
2440    // 2) Otherwise resize to maxWidth/maxHeight (2000x2000)
2441    // 3) Encode as PNG and JPEG, pick smaller
2442    // 4) If still too large, try JPEG with different quality steps
2443    // 5) If still too large, progressively scale down dimensions
2444    //
2445    // Note: even if dimensions don't change, an oversized image may be re-encoded to fit max bytes.
2446    use image::codecs::jpeg::JpegEncoder;
2447    use image::codecs::png::PngEncoder;
2448    use image::imageops::FilterType;
2449    use image::{GenericImageView, ImageEncoder, ImageReader, Limits};
2450    use std::io::Cursor;
2451
2452    const MAX_WIDTH: u32 = 2000;
2453    const MAX_HEIGHT: u32 = 2000;
2454    const DEFAULT_JPEG_QUALITY: u8 = 80;
2455    const QUALITY_STEPS: [u8; 4] = [85, 70, 55, 40];
2456    const SCALE_STEPS: [f64; 5] = [1.0, 0.75, 0.5, 0.35, 0.25];
2457
2458    fn scale_u32(value: u32, numerator: u32, denominator: u32) -> u32 {
2459        let den = u64::from(denominator).max(1);
2460        let num = u64::from(value) * u64::from(numerator);
2461        let rounded = (num + den / 2) / den;
2462        u32::try_from(rounded).unwrap_or(u32::MAX)
2463    }
2464
2465    fn encode_png(img: &image::DynamicImage) -> Result<Vec<u8>> {
2466        let rgba = img.to_rgba8();
2467        let mut out = Vec::new();
2468        PngEncoder::new(&mut out)
2469            .write_image(
2470                rgba.as_raw(),
2471                rgba.width(),
2472                rgba.height(),
2473                image::ExtendedColorType::Rgba8,
2474            )
2475            .map_err(|e| Error::tool("read", format!("Failed to encode PNG: {e}")))?;
2476        Ok(out)
2477    }
2478
2479    fn encode_jpeg(img: &image::DynamicImage, quality: u8) -> Result<Vec<u8>> {
2480        let rgb = img.to_rgb8();
2481        let mut out = Vec::new();
2482        JpegEncoder::new_with_quality(&mut out, quality)
2483            .write_image(
2484                rgb.as_raw(),
2485                rgb.width(),
2486                rgb.height(),
2487                image::ExtendedColorType::Rgb8,
2488            )
2489            .map_err(|e| Error::tool("read", format!("Failed to encode JPEG: {e}")))?;
2490        Ok(out)
2491    }
2492
2493    fn try_both_formats(
2494        img: &image::DynamicImage,
2495        width: u32,
2496        height: u32,
2497        jpeg_quality: u8,
2498    ) -> Result<(Vec<u8>, &'static str)> {
2499        let resized = img.resize_exact(width, height, FilterType::Lanczos3);
2500        let png = encode_png(&resized)?;
2501        let jpeg = encode_jpeg(&resized, jpeg_quality)?;
2502        if png.len() <= jpeg.len() {
2503            Ok((png, "image/png"))
2504        } else {
2505            Ok((jpeg, "image/jpeg"))
2506        }
2507    }
2508
2509    // Use ImageReader with explicit limits to prevent decompression bomb attacks.
2510    // 128MB allocation limit allows reasonable images but stops massive expansions.
2511    let mut limits = Limits::default();
2512    limits.max_alloc = Some(128 * 1024 * 1024);
2513
2514    let reader = ImageReader::new(Cursor::new(bytes))
2515        .with_guessed_format()
2516        .map_err(|e| Error::tool("read", format!("Failed to detect image format: {e}")))?;
2517
2518    let mut reader = reader;
2519    reader.limits(limits);
2520
2521    // ubs:ignore false positive: image decode, not JWT processing.
2522    let Ok(img) = reader.decode() else {
2523        return Ok(ResizedImage::original(bytes.to_vec(), mime_type));
2524    };
2525
2526    let (original_width, original_height) = img.dimensions();
2527    let original_size = bytes.len();
2528
2529    if original_width <= MAX_WIDTH
2530        && original_height <= MAX_HEIGHT
2531        && original_size <= IMAGE_MAX_BYTES
2532    {
2533        return Ok(ResizedImage {
2534            bytes: bytes.to_vec(),
2535            mime_type,
2536            resized: false,
2537            width: Some(original_width),
2538            height: Some(original_height),
2539            original_width: Some(original_width),
2540            original_height: Some(original_height),
2541        });
2542    }
2543
2544    let mut target_width = original_width;
2545    let mut target_height = original_height;
2546
2547    if target_width > MAX_WIDTH {
2548        target_height = scale_u32(target_height, MAX_WIDTH, target_width);
2549        target_width = MAX_WIDTH;
2550    }
2551    if target_height > MAX_HEIGHT {
2552        target_width = scale_u32(target_width, MAX_HEIGHT, target_height);
2553        target_height = MAX_HEIGHT;
2554    }
2555
2556    let mut best = try_both_formats(&img, target_width, target_height, DEFAULT_JPEG_QUALITY)?;
2557    let mut final_width = target_width;
2558    let mut final_height = target_height;
2559
2560    if best.0.len() <= IMAGE_MAX_BYTES {
2561        return Ok(ResizedImage {
2562            bytes: best.0,
2563            mime_type: best.1,
2564            resized: true,
2565            width: Some(final_width),
2566            height: Some(final_height),
2567            original_width: Some(original_width),
2568            original_height: Some(original_height),
2569        });
2570    }
2571
2572    for quality in QUALITY_STEPS {
2573        best = try_both_formats(&img, target_width, target_height, quality)?;
2574        if best.0.len() <= IMAGE_MAX_BYTES {
2575            return Ok(ResizedImage {
2576                bytes: best.0,
2577                mime_type: best.1,
2578                resized: true,
2579                width: Some(final_width),
2580                height: Some(final_height),
2581                original_width: Some(original_width),
2582                original_height: Some(original_height),
2583            });
2584        }
2585    }
2586
2587    for scale in SCALE_STEPS {
2588        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2589        {
2590            final_width = (f64::from(target_width) * scale).round() as u32;
2591            final_height = (f64::from(target_height) * scale).round() as u32;
2592        }
2593
2594        if final_width < 100 || final_height < 100 {
2595            break;
2596        }
2597
2598        for quality in QUALITY_STEPS {
2599            best = try_both_formats(&img, final_width, final_height, quality)?;
2600            if best.0.len() <= IMAGE_MAX_BYTES {
2601                return Ok(ResizedImage {
2602                    bytes: best.0,
2603                    mime_type: best.1,
2604                    resized: true,
2605                    width: Some(final_width),
2606                    height: Some(final_height),
2607                    original_width: Some(original_width),
2608                    original_height: Some(original_height),
2609                });
2610            }
2611        }
2612    }
2613
2614    Ok(ResizedImage {
2615        bytes: best.0,
2616        mime_type: best.1,
2617        resized: true,
2618        width: Some(final_width),
2619        height: Some(final_height),
2620        original_width: Some(original_width),
2621        original_height: Some(original_height),
2622    })
2623}
2624
2625#[cfg(not(feature = "image-resize"))]
2626#[expect(
2627    clippy::unnecessary_wraps,
2628    reason = "The no-feature stub preserves the feature-enabled Result API at shared call sites."
2629)]
2630pub(crate) fn resize_image_if_needed(
2631    bytes: &[u8],
2632    mime_type: &'static str,
2633) -> Result<ResizedImage> {
2634    Ok(ResizedImage::original(bytes.to_vec(), mime_type))
2635}
2636
2637// ============================================================================
2638// Tool Registry
2639// ============================================================================
2640
2641/// Registry of enabled tools for a Pi run.
2642///
2643/// The registry is constructed from configuration (enabled tool names + settings) and is used for:
2644/// - Looking up a tool implementation by name during tool-call execution.
2645/// - Enumerating tool schemas when building provider requests.
2646pub struct ToolRegistry {
2647    tools: Vec<Box<dyn Tool>>,
2648}
2649
2650impl ToolRegistry {
2651    /// Create a new registry with the specified tools enabled.
2652    pub fn new(enabled: &[&str], cwd: &Path, config: Option<&Config>) -> Self {
2653        let mut tools: Vec<Box<dyn Tool>> = Vec::new();
2654        let shell_path = config.and_then(|c| c.shell_path.clone());
2655        let shell_command_prefix = config.and_then(|c| c.shell_command_prefix.clone());
2656        let image_auto_resize = config.is_none_or(Config::image_auto_resize);
2657        let block_images = config
2658            .and_then(|c| c.images.as_ref().and_then(|i| i.block_images))
2659            .unwrap_or(false);
2660
2661        for name in enabled {
2662            match *name {
2663                "read" => tools.push(Box::new(ReadTool::with_settings(
2664                    cwd,
2665                    image_auto_resize,
2666                    block_images,
2667                ))),
2668                "bash" => tools.push(Box::new(BashTool::with_shell(
2669                    cwd,
2670                    shell_path.clone(),
2671                    shell_command_prefix.clone(),
2672                ))),
2673                "edit" => tools.push(Box::new(EditTool::new(cwd))),
2674                "write" => tools.push(Box::new(WriteTool::new(cwd))),
2675                "grep" => tools.push(Box::new(GrepTool::new(cwd))),
2676                "find" => tools.push(Box::new(FindTool::new(cwd))),
2677                "ls" => tools.push(Box::new(LsTool::new(cwd))),
2678                "hashline_edit" => tools.push(Box::new(HashlineEditTool::new(cwd))),
2679                _ => {}
2680            }
2681        }
2682
2683        Self { tools }
2684    }
2685
2686    /// Construct a registry from a pre-built tool list.
2687    pub fn from_tools(tools: Vec<Box<dyn Tool>>) -> Self {
2688        Self { tools }
2689    }
2690
2691    /// Convert the registry into the owned tool list.
2692    pub fn into_tools(self) -> Vec<Box<dyn Tool>> {
2693        self.tools
2694    }
2695
2696    /// Append a tool.
2697    pub fn push(&mut self, tool: Box<dyn Tool>) {
2698        self.tools.push(tool);
2699    }
2700
2701    /// Extend the registry with additional tools.
2702    pub fn extend<I>(&mut self, tools: I)
2703    where
2704        I: IntoIterator<Item = Box<dyn Tool>>,
2705    {
2706        self.tools.extend(tools);
2707    }
2708
2709    /// Get all tools.
2710    pub fn tools(&self) -> &[Box<dyn Tool>] {
2711        &self.tools
2712    }
2713
2714    /// Find a tool by name.
2715    pub fn get(&self, name: &str) -> Option<&dyn Tool> {
2716        self.tools
2717            .iter()
2718            .find(|t| t.name() == name)
2719            .map(std::convert::AsRef::as_ref)
2720    }
2721}
2722
2723// ============================================================================
2724// Read Tool
2725// ============================================================================
2726
2727/// Input parameters for the read tool.
2728#[derive(Debug, Deserialize)]
2729#[serde(rename_all = "camelCase")]
2730struct ReadInput {
2731    path: String,
2732    offset: Option<i64>,
2733    limit: Option<i64>,
2734    #[serde(default)]
2735    hashline: bool,
2736}
2737
2738pub struct ReadTool {
2739    cwd: PathBuf,
2740    /// Whether to auto-resize images to fit token limits.
2741    auto_resize: bool,
2742    block_images: bool,
2743    artifact_root: Option<PathBuf>,
2744}
2745
2746impl ReadTool {
2747    pub fn new(cwd: &Path) -> Self {
2748        Self {
2749            cwd: cwd.to_path_buf(),
2750            auto_resize: true,
2751            block_images: false,
2752            artifact_root: None,
2753        }
2754    }
2755
2756    pub fn with_settings(cwd: &Path, auto_resize: bool, block_images: bool) -> Self {
2757        Self {
2758            cwd: cwd.to_path_buf(),
2759            auto_resize,
2760            block_images,
2761            artifact_root: None,
2762        }
2763    }
2764
2765    #[cfg(test)]
2766    fn with_artifact_root(cwd: &Path, artifact_root: &Path) -> Self {
2767        Self {
2768            cwd: cwd.to_path_buf(),
2769            auto_resize: true,
2770            block_images: false,
2771            artifact_root: Some(artifact_root.to_path_buf()),
2772        }
2773    }
2774}
2775
2776async fn read_some<R>(reader: &mut R, dst: &mut [u8]) -> std::io::Result<usize>
2777where
2778    R: AsyncRead + Unpin,
2779{
2780    if dst.is_empty() {
2781        return Ok(0);
2782    }
2783
2784    futures::future::poll_fn(|cx| {
2785        let mut read_buf = ReadBuf::new(dst);
2786        match std::pin::Pin::new(&mut *reader).poll_read(cx, &mut read_buf) {
2787            std::task::Poll::Ready(Ok(())) => std::task::Poll::Ready(Ok(read_buf.filled().len())),
2788            std::task::Poll::Ready(Err(err)) => std::task::Poll::Ready(Err(err)),
2789            std::task::Poll::Pending => std::task::Poll::Pending,
2790        }
2791    })
2792    .await
2793}
2794
2795#[async_trait]
2796#[allow(clippy::unnecessary_literal_bound)]
2797impl Tool for ReadTool {
2798    fn name(&self) -> &str {
2799        "read"
2800    }
2801    fn label(&self) -> &str {
2802        "read"
2803    }
2804    fn description(&self) -> &str {
2805        "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to 2000 lines or 1MB (whichever is hit first). Use offset/limit for large files. When you need the full file, continue with offset until complete."
2806    }
2807
2808    fn parameters(&self) -> serde_json::Value {
2809        serde_json::json!({
2810            "type": "object",
2811            "properties": {
2812                "path": {
2813                    "type": "string",
2814                    "description": "Path to the file to read (relative or absolute)"
2815                },
2816                "offset": {
2817                    "type": "integer",
2818                    "description": "Line number to start reading from (1-indexed)"
2819                },
2820                "limit": {
2821                    "type": "integer",
2822                    "description": "Maximum number of lines to read"
2823                },
2824                "hashline": {
2825                    "type": "boolean",
2826                    "description": "When true, output each line as N#AB:content where N is the line number and AB is a content hash. Use with hashline_edit tool for precise edits."
2827                }
2828            },
2829            "required": ["path"]
2830        })
2831    }
2832
2833    fn effects(&self) -> ToolEffects {
2834        ToolEffects::read()
2835    }
2836
2837    #[allow(clippy::too_many_lines)]
2838    async fn execute(
2839        &self,
2840        tool_call_id: &str,
2841        input: serde_json::Value,
2842        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
2843    ) -> Result<ToolOutput> {
2844        let input_value = input.clone();
2845        let input: ReadInput =
2846            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
2847
2848        if matches!(input.limit, Some(limit) if limit <= 0) {
2849            return Err(Error::validation(
2850                "`limit` must be greater than 0".to_string(),
2851            ));
2852        }
2853        if matches!(input.offset, Some(offset) if offset < 0) {
2854            return Err(Error::validation(
2855                "`offset` must be non-negative".to_string(),
2856            ));
2857        }
2858
2859        let path = resolve_read_path(&input.path, &self.cwd);
2860        let path = enforce_read_scope(&path, &self.cwd)?;
2861
2862        let meta = asupersync::fs::metadata(&path).await.ok();
2863        if let Some(meta) = &meta {
2864            if !meta.is_file() {
2865                return Err(Error::tool(
2866                    "read",
2867                    format!("Path {} is not a regular file", path.display()),
2868                ));
2869            }
2870        }
2871
2872        let cache_key = tool_cache_key("read", &self.cwd, &input_value);
2873        let cache_mode = ToolCacheFingerprintMode::FileContent;
2874        let cache_deps = cache_dependency_for_path(&path, cache_mode);
2875        if let Some(output) = cached_tool_output(&cache_key, cache_deps.as_deref()) {
2876            return Ok(output);
2877        }
2878
2879        let mut file = asupersync::fs::File::open(&path)
2880            .await
2881            .map_err(|e| Error::tool("read", e.to_string()))?;
2882
2883        // Read initial chunk for mime detection
2884        let mut buffer = [0u8; 8192];
2885        let mut initial_read = 0;
2886        loop {
2887            let n = read_some(&mut file, &mut buffer[initial_read..])
2888                .await
2889                .map_err(|e| Error::tool("read", format!("Failed to read file: {e}")))?;
2890            if n == 0 {
2891                break;
2892            }
2893            initial_read += n;
2894            if initial_read == buffer.len() {
2895                break;
2896            }
2897        }
2898        let initial_bytes = &buffer[..initial_read];
2899
2900        if let Some(mime_type) = detect_supported_image_mime_type_from_bytes(initial_bytes) {
2901            if self.block_images {
2902                return Err(Error::tool(
2903                    "read",
2904                    "Images are blocked by configuration".to_string(),
2905                ));
2906            }
2907
2908            // For images, allow a larger on-disk source as long as it stays
2909            // within the read-tool input bound; resize/re-encode may still
2910            // bring the API payload under IMAGE_MAX_BYTES.
2911            let max_image_input_bytes = usize::try_from(READ_TOOL_MAX_BYTES).unwrap_or(usize::MAX);
2912            if let Some(meta) = &meta {
2913                if meta.len() > READ_TOOL_MAX_BYTES {
2914                    return Err(Error::tool(
2915                        "read",
2916                        format!(
2917                            "Image is too large ({} bytes). Max allowed is {} bytes.",
2918                            meta.len(),
2919                            READ_TOOL_MAX_BYTES
2920                        ),
2921                    ));
2922                }
2923            }
2924            let mut all_bytes = Vec::with_capacity(initial_read);
2925            all_bytes.extend_from_slice(initial_bytes);
2926
2927            let remaining_limit = max_image_input_bytes.saturating_sub(initial_read);
2928            let mut limiter = file.take((remaining_limit as u64).saturating_add(1));
2929            limiter
2930                .read_to_end(&mut all_bytes)
2931                .await
2932                .map_err(|e| Error::tool("read", format!("Failed to read image: {e}")))?;
2933
2934            if all_bytes.len() > max_image_input_bytes {
2935                return Err(Error::tool(
2936                    "read",
2937                    format!(
2938                        "Image is too large ({} bytes). Max allowed is {} bytes.",
2939                        all_bytes.len(),
2940                        READ_TOOL_MAX_BYTES
2941                    ),
2942                ));
2943            }
2944
2945            let resized = if self.auto_resize {
2946                resize_image_if_needed(&all_bytes, mime_type)?
2947            } else {
2948                ResizedImage::original(all_bytes, mime_type)
2949            };
2950
2951            if resized.bytes.len() > IMAGE_MAX_BYTES {
2952                let message = if resized.resized {
2953                    format!(
2954                        "Image is too large ({} bytes) after resizing. Max allowed is {} bytes.",
2955                        resized.bytes.len(),
2956                        IMAGE_MAX_BYTES
2957                    )
2958                } else {
2959                    format!(
2960                        "Image is too large ({} bytes). Max allowed is {} bytes.",
2961                        resized.bytes.len(),
2962                        IMAGE_MAX_BYTES
2963                    )
2964                };
2965                return Err(Error::tool("read", message));
2966            }
2967
2968            let base64_data =
2969                base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &resized.bytes);
2970
2971            let mut note = format!("Read image file [{}]", resized.mime_type);
2972            if resized.resized {
2973                if let (Some(ow), Some(oh), Some(w), Some(h)) = (
2974                    resized.original_width,
2975                    resized.original_height,
2976                    resized.width,
2977                    resized.height,
2978                ) {
2979                    if w > 0 {
2980                        let scale = f64::from(ow) / f64::from(w);
2981                        let _ = write!(
2982                            note,
2983                            "\n[Image: original {ow}x{oh}, displayed at {w}x{h}. Multiply coordinates by {scale:.2} to map to original image.]"
2984                        );
2985                    } else {
2986                        let _ =
2987                            write!(note, "\n[Image: original {ow}x{oh}, displayed at {w}x{h}.]");
2988                    }
2989                }
2990            }
2991
2992            return Ok(ToolOutput {
2993                content: vec![
2994                    ContentBlock::Text(TextContent::new(note)),
2995                    ContentBlock::Image(ImageContent {
2996                        data: base64_data,
2997                        mime_type: resized.mime_type.to_string(),
2998                    }),
2999                ],
3000                details: None,
3001                is_error: false,
3002            });
3003        }
3004
3005        // Text path: optimized streaming read.
3006        // We need:
3007        // 1. Total line count.
3008        // 2. Content for the requested range (offset/limit) OR head/tail if no range.
3009
3010        // Reset file to start if we read some bytes
3011        if initial_read > 0 {
3012            file.seek(SeekFrom::Start(0))
3013                .await
3014                .map_err(|e| Error::tool("read", format!("Failed to seek: {e}")))?;
3015        }
3016
3017        let mut raw_content = Vec::new();
3018        let mut newlines_seen = 0usize;
3019
3020        // Input offset is 1-based. Convert to 0-based index.
3021        let start_line_idx = match input.offset {
3022            Some(n) if n > 0 => n.saturating_sub(1).try_into().unwrap_or(usize::MAX),
3023            _ => 0,
3024        };
3025        let limit_lines = input
3026            .limit
3027            .map_or(usize::MAX, |l| l.try_into().unwrap_or(usize::MAX));
3028        let end_line_idx = start_line_idx.saturating_add(limit_lines);
3029
3030        let mut collecting = start_line_idx == 0;
3031        let mut buf = vec![0u8; 64 * 1024].into_boxed_slice(); // 64KB chunks
3032        let mut last_byte_was_newline = false;
3033        let mut pending_cr = false;
3034
3035        // We need to track total_lines accurately for the output.
3036        // We will respect MAX_BYTES for *collected* content, but continue scanning for line counts
3037        // so pagination metadata is correct.
3038        let mut total_bytes_read = 0u64;
3039
3040        loop {
3041            let n = read_some(&mut file, &mut buf)
3042                .await
3043                .map_err(|e| Error::tool("read", e.to_string()))?;
3044            if n == 0 {
3045                break;
3046            }
3047            total_bytes_read = total_bytes_read.saturating_add(n as u64);
3048
3049            let chunk = normalize_line_endings_chunk(&buf[..n], &mut pending_cr);
3050            if chunk.is_empty() {
3051                continue;
3052            }
3053            last_byte_was_newline = chunk.last().is_some_and(|byte| *byte == b'\n');
3054            let mut chunk_cursor = 0;
3055
3056            for pos in memchr::memchr_iter(b'\n', &chunk) {
3057                // Check if this newline marks the end of a line we are collecting
3058                if collecting {
3059                    // newlines_seen is the index of the line ending at this newline
3060                    if newlines_seen + 1 == end_line_idx {
3061                        // We reached the limit. Collect up to this newline.
3062                        if raw_content.len() < DEFAULT_MAX_BYTES {
3063                            let remaining = DEFAULT_MAX_BYTES - raw_content.len();
3064                            let slice_len = (pos + 1 - chunk_cursor).min(remaining);
3065                            raw_content
3066                                .extend_from_slice(&chunk[chunk_cursor..chunk_cursor + slice_len]);
3067                        }
3068                        collecting = false;
3069                        chunk_cursor = pos + 1;
3070                    }
3071                }
3072
3073                newlines_seen += 1;
3074
3075                // Check if this newline marks the start of the window
3076                if !collecting && newlines_seen == start_line_idx {
3077                    collecting = true;
3078                    chunk_cursor = pos + 1;
3079                }
3080            }
3081
3082            // Append remainder of chunk if collecting
3083            if collecting && chunk_cursor < chunk.len() && raw_content.len() < DEFAULT_MAX_BYTES {
3084                let remaining = DEFAULT_MAX_BYTES - raw_content.len();
3085                let slice_len = (chunk.len() - chunk_cursor).min(remaining);
3086                raw_content.extend_from_slice(&chunk[chunk_cursor..chunk_cursor + slice_len]);
3087            }
3088        }
3089
3090        if pending_cr {
3091            last_byte_was_newline = true;
3092            if collecting && raw_content.len() < DEFAULT_MAX_BYTES {
3093                raw_content.push(b'\n');
3094            }
3095            newlines_seen += 1;
3096        }
3097
3098        // A trailing newline terminates the last line rather than starting a new one.
3099        // Also keep empty files at 0 lines so explicit positive offsets can error correctly.
3100        let total_lines = if total_bytes_read == 0 {
3101            0
3102        } else if last_byte_was_newline {
3103            newlines_seen
3104        } else {
3105            newlines_seen + 1
3106        };
3107        let text_content = String::from_utf8_lossy(&raw_content).into_owned();
3108
3109        // Handle empty file.
3110        // Offset=0 behaves like "start from beginning", but positive offsets should fail.
3111        if total_lines == 0 {
3112            if input.offset.unwrap_or(0) > 0 {
3113                let offset_display = input.offset.unwrap_or(0);
3114                return Err(Error::tool(
3115                    "read",
3116                    format!(
3117                        "Offset {offset_display} is beyond end of file ({total_lines} lines total)"
3118                    ),
3119                ));
3120            }
3121            let output = ToolOutput {
3122                content: vec![ContentBlock::Text(TextContent::new(""))],
3123                details: None,
3124                is_error: false,
3125            };
3126            cache_tool_output(
3127                cache_key,
3128                stable_cache_dependency_for_path(&path, cache_mode, cache_deps.as_deref()),
3129                &output,
3130            );
3131            return Ok(output);
3132        }
3133
3134        // Now we have the content (up to safety limit) in memory, but only for the requested window.
3135        // `text_content` starts at `start_line_idx`.
3136
3137        let start_line = start_line_idx;
3138        let start_line_display = start_line.saturating_add(1);
3139
3140        if start_line >= total_lines {
3141            let offset_display = input.offset.unwrap_or(0);
3142            return Err(Error::tool(
3143                "read",
3144                format!(
3145                    "Offset {offset_display} is beyond end of file ({total_lines} lines total)"
3146                ),
3147            ));
3148        }
3149
3150        let max_lines_for_truncation = input
3151            .limit
3152            .and_then(|l| usize::try_from(l).ok())
3153            .unwrap_or(DEFAULT_MAX_LINES);
3154        let display_limit = max_lines_for_truncation.saturating_add(1);
3155
3156        // We calculate lines to take based on the limit, but since we already filtered
3157        // during read, we can mostly trust `text_content`, except for `DEFAULT_MAX_BYTES` truncation.
3158
3159        let lines_to_take = limit_lines.min(display_limit);
3160
3161        let mut selected_content = String::new();
3162        let line_iter = text_content.split('\n');
3163
3164        // Note: we use skip(0) because text_content is already offset
3165        let effective_iter = if text_content.ends_with('\n') {
3166            line_iter.take(lines_to_take)
3167        } else {
3168            line_iter.take(usize::MAX)
3169        };
3170
3171        let max_line_num = start_line.saturating_add(lines_to_take).min(total_lines);
3172        let line_num_width = max_line_num.to_string().len().max(5);
3173
3174        for (i, line) in effective_iter.enumerate() {
3175            if i >= lines_to_take || start_line + i >= total_lines {
3176                break;
3177            }
3178            if i > 0 {
3179                selected_content.push('\n');
3180            }
3181            let line_idx = start_line + i; // 0-indexed
3182            let line = line.strip_suffix('\r').unwrap_or(line);
3183            if input.hashline {
3184                let tag = format_hashline_tag(line_idx, line);
3185                let _ = write!(selected_content, "{tag}:{line}");
3186            } else {
3187                let line_num = line_idx + 1;
3188                let _ = write!(selected_content, "{line_num:>line_num_width$}→{line}");
3189            }
3190
3191            if selected_content.len() > DEFAULT_MAX_BYTES * 2 {
3192                break;
3193            }
3194        }
3195
3196        let artifact_source = (selected_content.len() > TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES)
3197            .then(|| selected_content.clone());
3198
3199        let mut truncation = truncate_head(
3200            selected_content,
3201            max_lines_for_truncation,
3202            DEFAULT_MAX_BYTES,
3203        );
3204        truncation.total_lines = total_lines;
3205
3206        let mut output_text = std::mem::take(&mut truncation.content);
3207        let mut details: Option<serde_json::Value> = None;
3208
3209        if truncation.first_line_exceeds_limit {
3210            let first_line = text_content.split('\n').next().unwrap_or("");
3211            let first_line = first_line.strip_suffix('\r').unwrap_or(first_line);
3212            let first_line_size = format_size(first_line.len());
3213            output_text = format!(
3214                "[Line {start_line_display} is {first_line_size}, exceeds {} limit. Use bash: sed -n '{start_line_display}p' '{}' | head -c {DEFAULT_MAX_BYTES}]",
3215                format_size(DEFAULT_MAX_BYTES),
3216                input.path.replace('\'', "'\\''")
3217            );
3218            details = Some(serde_json::json!({ "truncation": truncation }));
3219        } else if truncation.truncated {
3220            let end_line_display = start_line_display
3221                .saturating_add(truncation.output_lines)
3222                .saturating_sub(1);
3223            let next_offset = end_line_display.saturating_add(1);
3224
3225            if truncation.truncated_by == Some(TruncatedBy::Lines) {
3226                let _ = write!(
3227                    output_text,
3228                    "\n\n[Showing lines {start_line_display}-{end_line_display} of {total_lines}. Use offset={next_offset} to continue.]"
3229                );
3230            } else {
3231                let _ = write!(
3232                    output_text,
3233                    "\n\n[Showing lines {start_line_display}-{end_line_display} of {total_lines} ({} limit). Use offset={next_offset} to continue.]",
3234                    format_size(DEFAULT_MAX_BYTES)
3235                );
3236            }
3237
3238            details = Some(serde_json::json!({ "truncation": truncation }));
3239        } else {
3240            // Calculate how many lines we actually displayed
3241            let displayed_lines = truncation.output_lines;
3242            let end_line_display = start_line_display
3243                .saturating_add(displayed_lines)
3244                .saturating_sub(1);
3245
3246            if end_line_display < total_lines {
3247                let remaining = total_lines.saturating_sub(end_line_display);
3248                let next_offset = end_line_display.saturating_add(1);
3249                let _ = write!(
3250                    output_text,
3251                    "\n\n[{remaining} more lines in file. Use offset={next_offset} to continue.]"
3252                );
3253            }
3254        }
3255
3256        if let Some(artifact_source) = artifact_source.as_deref() {
3257            attach_text_artifact_if_needed_with_root(
3258                self.artifact_root.as_deref(),
3259                &mut output_text,
3260                &mut details,
3261                "read",
3262                tool_call_id,
3263                "selectedTextWindow",
3264                artifact_source,
3265            );
3266        }
3267
3268        let output = ToolOutput {
3269            content: vec![ContentBlock::Text(TextContent::new(output_text))],
3270            details,
3271            is_error: false,
3272        };
3273        cache_tool_output(
3274            cache_key,
3275            stable_cache_dependency_for_path(&path, cache_mode, cache_deps.as_deref()),
3276            &output,
3277        );
3278        Ok(output)
3279    }
3280}
3281
3282// ============================================================================
3283// Bash Tool
3284// ============================================================================
3285
3286/// Input parameters for the bash tool.
3287#[derive(Debug, Deserialize)]
3288#[serde(rename_all = "camelCase")]
3289struct BashInput {
3290    command: String,
3291    timeout: Option<u64>,
3292}
3293
3294pub struct BashTool {
3295    cwd: PathBuf,
3296    shell_path: Option<String>,
3297    command_prefix: Option<String>,
3298    artifact_root: Option<PathBuf>,
3299}
3300
3301#[derive(Debug, Clone)]
3302pub struct BashRunResult {
3303    pub output: String,
3304    pub exit_code: i32,
3305    pub cancelled: bool,
3306    pub cancellation_reason: Option<BashCancellationReason>,
3307    pub timeout_ms: Option<u64>,
3308    pub truncated: bool,
3309    pub full_output_path: Option<String>,
3310    pub truncation: Option<TruncationResult>,
3311}
3312
3313#[derive(Debug)]
3314enum BashPipeFrame {
3315    Chunk(Vec<u8>),
3316    Error(String),
3317}
3318
3319#[allow(clippy::unnecessary_lazy_evaluations)] // lazy eval needed on unix for signal()
3320fn exit_status_code(status: std::process::ExitStatus) -> i32 {
3321    status.code().unwrap_or_else(|| {
3322        #[cfg(unix)]
3323        {
3324            use std::os::unix::process::ExitStatusExt as _;
3325            status.signal().map_or(-1, |signal| -signal)
3326        }
3327        #[cfg(not(unix))]
3328        {
3329            -1
3330        }
3331    })
3332}
3333
3334fn bash_cancellation_details(
3335    reason: BashCancellationReason,
3336    timeout_ms: Option<u64>,
3337    exit_code: i32,
3338) -> serde_json::Value {
3339    serde_json::json!({
3340        "schema": BASH_CANCELLATION_SCHEMA_V1,
3341        "status": "cancelled",
3342        "reason": reason.as_str(),
3343        "cleanup": "process_group_tree_terminated",
3344        "exitCode": exit_code,
3345        "timeoutMs": timeout_ms,
3346    })
3347}
3348
3349#[allow(clippy::too_many_lines)]
3350pub(crate) async fn run_bash_command(
3351    cwd: &Path,
3352    shell_path: Option<&str>,
3353    command_prefix: Option<&str>,
3354    command: &str,
3355    timeout_secs: Option<u64>,
3356    on_update: Option<&(dyn Fn(ToolUpdate) + Send + Sync)>,
3357) -> Result<BashRunResult> {
3358    let timeout_secs = match timeout_secs {
3359        None => Some(DEFAULT_BASH_TIMEOUT_SECS),
3360        Some(0) => None,
3361        Some(value) => Some(value),
3362    };
3363    let command = command_prefix.filter(|p| !p.trim().is_empty()).map_or_else(
3364        || command.to_string(),
3365        |prefix| format!("{prefix}\n{command}"),
3366    );
3367    let command = format!("trap 'code=$?; wait; exit $code' EXIT\n{command}");
3368
3369    if !cwd.exists() {
3370        return Err(Error::tool(
3371            "bash",
3372            format!(
3373                "Working directory does not exist: {}\nCannot execute bash commands.",
3374                cwd.display()
3375            ),
3376        ));
3377    }
3378
3379    let shell = shell_path.unwrap_or_else(|| {
3380        for path in ["/bin/bash", "/usr/bin/bash", "/usr/local/bin/bash"] {
3381            if Path::new(path).exists() {
3382                return path;
3383            }
3384        }
3385        "sh"
3386    });
3387
3388    let mut cmd = command_with_default_sigpipe_in_dir(shell, cwd)
3389        .map_err(|e| Error::tool("bash", format!("Failed to prepare shell: {e}")))?;
3390    cmd.arg("-c")
3391        .arg(&command)
3392        .current_dir(cwd)
3393        .stdin(Stdio::null())
3394        .stdout(Stdio::piped())
3395        .stderr(Stdio::piped());
3396
3397    // Place the shell in its own process group so background children
3398    // can be killed reliably even if the shell exits first.
3399    isolate_command_process_group(&mut cmd);
3400
3401    let mut child = cmd
3402        .spawn()
3403        .map_err(|e| Error::tool("bash", format!("Failed to spawn shell: {e}")))?;
3404
3405    let stdout = child
3406        .stdout
3407        .take()
3408        .ok_or_else(|| Error::tool("bash", "Missing stdout".to_string()))?;
3409    let stderr = child
3410        .stderr
3411        .take()
3412        .ok_or_else(|| Error::tool("bash", "Missing stderr".to_string()))?;
3413
3414    // Wrap in ProcessGuard for cleanup (including tree kill)
3415    let mut guard = ProcessGuard::new(child, ProcessCleanupMode::ProcessGroupTree);
3416
3417    // We use a bounded channel to provide backpressure. If the child process
3418    // produces output faster than the async loop can drain it (and spill to disk),
3419    // the pump threads will block on send(), which stops them from reading from the OS pipe.
3420    // The OS pipe buffer will fill up, causing the child's `write()` calls to block.
3421    // This correctly pauses the child until we catch up, preventing unbounded memory growth (OOM).
3422    let (tx, rx) = mpsc::sync_channel::<BashPipeFrame>(1024);
3423    let tx_stdout = tx.clone();
3424
3425    // Design Decision (bd-xdcrh.4.3):
3426    // We intentionally use raw dedicated OS threads here rather than `asupersync::runtime::spawn_blocking`.
3427    // The `pump_stream` loop blocks indefinitely on `read()` until the subprocess closes the pipe (EOF).
3428    // If we used the runtime's blocking pool, concurrently running long-lived bash tools (like compilers
3429    // or servers) could easily exhaust the pool's thread limit, starving the rest of the application
3430    // of threads needed for short-lived blocking I/O (e.g., SQLite transactions or filesystem metadata).
3431    // Dedicated threads cleanly isolate this unbounded blocking risk.
3432    let stdout_thread = thread::spawn(move || pump_stream(stdout, "stdout", &tx_stdout));
3433    let stderr_thread = thread::spawn(move || pump_stream(stderr, "stderr", &tx));
3434
3435    let max_chunks_bytes = DEFAULT_MAX_BYTES.saturating_mul(2);
3436    let mut bash_output = BashOutputState::new(max_chunks_bytes);
3437    bash_output.timeout_ms = timeout_secs.map(|s| s.saturating_mul(1000));
3438
3439    let cx = AgentCx::for_current_or_request();
3440    let mut timed_out = false;
3441    let mut cancelled = false;
3442    let mut cancellation_reason: Option<BashCancellationReason> = None;
3443    let mut exit_code: Option<i32> = None;
3444    let start = cx
3445        .cx()
3446        .timer_driver()
3447        .map_or_else(wall_now, |timer| timer.now());
3448    let timeout = timeout_secs.map(Duration::from_secs);
3449    let mut terminate_deadline: Option<asupersync::Time> = None;
3450
3451    let tick = Duration::from_millis(10);
3452    loop {
3453        let mut updated = false;
3454        while let Ok(frame) = rx.try_recv() {
3455            if let Err(err) = ingest_bash_pipe_frame(frame, &mut bash_output).await {
3456                let _ = guard.kill();
3457                return Err(err);
3458            }
3459            updated = true;
3460        }
3461
3462        if updated {
3463            emit_bash_update(&bash_output, on_update)?;
3464        }
3465
3466        match guard.try_wait_child() {
3467            Ok(Some(status)) => {
3468                exit_code = Some(exit_status_code(status));
3469                break;
3470            }
3471            Ok(None) => {}
3472            Err(err) => return Err(Error::tool("bash", err.to_string())),
3473        }
3474
3475        let now = cx
3476            .cx()
3477            .timer_driver()
3478            .map_or_else(wall_now, |timer| timer.now());
3479
3480        if let Some(deadline) = terminate_deadline {
3481            if now >= deadline {
3482                if let Some(status) = guard.kill() {
3483                    exit_code = Some(exit_status_code(status));
3484                }
3485                break; // Guard now owns no child after kill()
3486            }
3487        } else if let Some(timeout) = timeout {
3488            let elapsed = std::time::Duration::from_nanos(now.duration_since(start));
3489            if elapsed >= timeout {
3490                timed_out = true;
3491                cancellation_reason = Some(BashCancellationReason::Timeout);
3492                let pid = guard.child.as_ref().map(std::process::Child::id);
3493                terminate_process_group_tree(pid);
3494                terminate_deadline = Some(now + Duration::from_secs(BASH_TERMINATE_GRACE_SECS));
3495            }
3496        }
3497
3498        if terminate_deadline.is_none() && cx.checkpoint().is_err() {
3499            cancelled = true;
3500            cancellation_reason = Some(BashCancellationReason::AmbientCancellation);
3501            let _ = guard.kill();
3502            exit_code = Some(-1);
3503            break;
3504        }
3505
3506        sleep(now, tick).await;
3507    }
3508
3509    // Drain any remaining channel frames while waiting for the pump threads
3510    // to observe EOF and exit. Because the channel is bounded, they may still
3511    // be blocked on send() until we consume the buffered output after the child
3512    // closes its pipe ends. The 5-second cap is a safety net for pathological
3513    // cases (e.g. the child spawned a grandchild that inherited the pipe fd
3514    // and is still running).
3515    {
3516        let drain_start = cx
3517            .cx()
3518            .timer_driver()
3519            .map_or_else(wall_now, |timer| timer.now());
3520        let drain_deadline = drain_start + Duration::from_secs(5);
3521        let allow_drain_cancellation = !cancelled && !timed_out && exit_code.is_none();
3522        loop {
3523            // Drain everything currently available in the channel.
3524            let mut got_data = false;
3525            while let Ok(frame) = rx.try_recv() {
3526                if let Err(err) = ingest_bash_pipe_frame(frame, &mut bash_output).await {
3527                    let _ = guard.kill();
3528                    return Err(err);
3529                }
3530                got_data = true;
3531            }
3532            if got_data {
3533                emit_bash_update(&bash_output, on_update)?;
3534            }
3535
3536            // If both pump threads have finished, all data is in the channel
3537            // and we've drained it above, so we're done.
3538            if stdout_thread.is_finished() && stderr_thread.is_finished() {
3539                // One final drain in case they sent items between our last
3540                // try_recv loop and the is_finished check.
3541                while let Ok(frame) = rx.try_recv() {
3542                    if let Err(err) = ingest_bash_pipe_frame(frame, &mut bash_output).await {
3543                        let _ = guard.kill();
3544                        return Err(err);
3545                    }
3546                }
3547                break;
3548            }
3549
3550            let now = cx
3551                .cx()
3552                .timer_driver()
3553                .map_or_else(wall_now, |timer| timer.now());
3554            if now >= drain_deadline {
3555                break;
3556            }
3557            if allow_drain_cancellation && cx.checkpoint().is_err() {
3558                cancelled = true;
3559                cancellation_reason.get_or_insert(BashCancellationReason::AmbientCancellation);
3560                break;
3561            }
3562            sleep(now, tick).await;
3563        }
3564    }
3565
3566    // Explicitly reap the child process to prevent zombies. try_wait_child()
3567    // uses WNOHANG which *should* reap the zombie on the first successful
3568    // return, but calling wait() as a belt-and-suspenders ensures the zombie
3569    // is cleaned up even if try_wait missed it (observed on macOS when the
3570    // child is in its own process group).
3571    if guard.child.is_some() {
3572        if let Ok(status) = guard.wait() {
3573            exit_code.get_or_insert_with(|| exit_status_code(status));
3574        }
3575    }
3576
3577    drop(bash_output.temp_file.take());
3578
3579    let raw_output = concat_chunks(&bash_output.chunks);
3580    let full_output = String::from_utf8_lossy(&raw_output).into_owned();
3581    let full_output_last_line_len = full_output.split('\n').next_back().map_or(0, str::len);
3582
3583    let mut truncation = truncate_tail(full_output, DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES);
3584    if bash_output.total_bytes > bash_output.chunks_bytes {
3585        truncation.truncated = true;
3586        truncation.truncated_by = Some(TruncatedBy::Bytes);
3587        truncation.total_bytes = bash_output.total_bytes;
3588        truncation.total_lines = line_count_from_newline_count(
3589            bash_output.total_bytes,
3590            bash_output.line_count,
3591            bash_output.last_byte_was_newline,
3592        );
3593    }
3594
3595    let mut output_text = if truncation.content.is_empty() {
3596        "(no output)".to_string()
3597    } else {
3598        std::mem::take(&mut truncation.content)
3599    };
3600
3601    let mut full_output_path = None;
3602    if truncation.truncated {
3603        if let Some(path) = bash_output.temp_file_path.as_ref() {
3604            full_output_path = Some(path.display().to_string());
3605        }
3606
3607        let start_line = truncation
3608            .total_lines
3609            .saturating_sub(truncation.output_lines)
3610            .saturating_add(1);
3611        let end_line = truncation.total_lines;
3612
3613        let display_path = full_output_path.as_deref().unwrap_or("undefined");
3614        let file_limit_hit = bash_output.total_bytes > BASH_FILE_LIMIT_BYTES;
3615        let output_qualifier = if file_limit_hit {
3616            format!(
3617                "Partial output (capped at {})",
3618                format_size(BASH_FILE_LIMIT_BYTES)
3619            )
3620        } else {
3621            "Full output".to_string()
3622        };
3623
3624        if truncation.last_line_partial {
3625            let last_line_size = format_size(full_output_last_line_len);
3626            let _ = write!(
3627                output_text,
3628                "\n\n[Showing last {} of line {end_line} (line is {last_line_size}). {output_qualifier}: {display_path}]",
3629                format_size(truncation.output_bytes)
3630            );
3631        } else if truncation.truncated_by == Some(TruncatedBy::Lines) {
3632            let _ = write!(
3633                output_text,
3634                "\n\n[Showing lines {start_line}-{end_line} of {}. {output_qualifier}: {display_path}]",
3635                truncation.total_lines
3636            );
3637        } else {
3638            let _ = write!(
3639                output_text,
3640                "\n\n[Showing lines {start_line}-{end_line} of {} ({} limit). {output_qualifier}: {display_path}]",
3641                truncation.total_lines,
3642                format_size(DEFAULT_MAX_BYTES)
3643            );
3644        }
3645    }
3646
3647    if timed_out {
3648        cancelled = true;
3649        if !output_text.is_empty() {
3650            output_text.push_str("\n\n");
3651        }
3652        let timeout_display = timeout_secs.unwrap_or(0);
3653        let _ = write!(
3654            output_text,
3655            "Command timed out after {timeout_display} seconds"
3656        );
3657    }
3658
3659    let exit_code = exit_code.unwrap_or(-1);
3660    if !cancelled && exit_code != 0 {
3661        let _ = write!(output_text, "\n\nCommand exited with code {exit_code}");
3662    }
3663
3664    Ok(BashRunResult {
3665        output: output_text,
3666        exit_code,
3667        cancelled,
3668        cancellation_reason,
3669        timeout_ms: timeout_secs.map(|s| s.saturating_mul(1000)),
3670        truncated: truncation.truncated,
3671        full_output_path,
3672        truncation: if truncation.truncated {
3673            Some(truncation)
3674        } else {
3675            None
3676        },
3677    })
3678}
3679
3680impl BashTool {
3681    pub fn new(cwd: &Path) -> Self {
3682        Self {
3683            cwd: cwd.to_path_buf(),
3684            shell_path: None,
3685            command_prefix: None,
3686            artifact_root: None,
3687        }
3688    }
3689
3690    pub fn with_shell(
3691        cwd: &Path,
3692        shell_path: Option<String>,
3693        command_prefix: Option<String>,
3694    ) -> Self {
3695        Self {
3696            cwd: cwd.to_path_buf(),
3697            shell_path,
3698            command_prefix,
3699            artifact_root: None,
3700        }
3701    }
3702
3703    #[cfg(test)]
3704    fn with_artifact_root(cwd: &Path, artifact_root: &Path) -> Self {
3705        Self {
3706            cwd: cwd.to_path_buf(),
3707            shell_path: None,
3708            command_prefix: None,
3709            artifact_root: Some(artifact_root.to_path_buf()),
3710        }
3711    }
3712}
3713
3714#[async_trait]
3715#[allow(clippy::unnecessary_literal_bound)]
3716impl Tool for BashTool {
3717    fn name(&self) -> &str {
3718        "bash"
3719    }
3720    fn label(&self) -> &str {
3721        "bash"
3722    }
3723    fn description(&self) -> &str {
3724        "Execute a bash command in the current working directory. Returns stdout and stderr. Output is truncated to last 2000 lines or 1MB (whichever is hit first). If truncated, full output is saved to a temp file. `timeout` defaults to 120 seconds; set `timeout: 0` to disable."
3725    }
3726
3727    fn parameters(&self) -> serde_json::Value {
3728        serde_json::json!({
3729            "type": "object",
3730            "properties": {
3731                "command": {
3732                    "type": "string",
3733                    "description": "Bash command to execute"
3734                },
3735                "timeout": {
3736                    "type": "integer",
3737                    "description": "Timeout in seconds (default 120; set 0 to disable)"
3738                }
3739            },
3740            "required": ["command"]
3741        })
3742    }
3743
3744    fn effects(&self) -> ToolEffects {
3745        ToolEffects::process().union(ToolEffects::write())
3746    }
3747
3748    #[allow(clippy::too_many_lines)]
3749    async fn execute(
3750        &self,
3751        tool_call_id: &str,
3752        input: serde_json::Value,
3753        on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
3754    ) -> Result<ToolOutput> {
3755        let input: BashInput =
3756            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
3757
3758        let result = run_bash_command(
3759            &self.cwd,
3760            self.shell_path.as_deref(),
3761            self.command_prefix.as_deref(),
3762            &input.command,
3763            input.timeout,
3764            on_update.as_deref(),
3765        )
3766        .await?;
3767
3768        let mut details_map = serde_json::Map::new();
3769        if let Some(truncation) = result.truncation.as_ref() {
3770            details_map.insert("truncation".to_string(), serde_json::to_value(truncation)?);
3771        }
3772        if let Some(path) = result.full_output_path.as_ref() {
3773            details_map.insert(
3774                "fullOutputPath".to_string(),
3775                serde_json::Value::String(path.clone()),
3776            );
3777        }
3778        if let Some(reason) = result.cancellation_reason {
3779            details_map.insert(
3780                "cancellation".to_string(),
3781                bash_cancellation_details(reason, result.timeout_ms, result.exit_code),
3782            );
3783        }
3784
3785        let details = if details_map.is_empty() {
3786            None
3787        } else {
3788            Some(serde_json::Value::Object(details_map))
3789        };
3790        let mut details = details;
3791        let mut output_text = result.output;
3792
3793        if let Some(path) = result.full_output_path.as_deref() {
3794            attach_text_artifact_from_path_if_needed_with_root(
3795                self.artifact_root.as_deref(),
3796                &mut output_text,
3797                &mut details,
3798                "bash",
3799                tool_call_id,
3800                "fullCommandOutput",
3801                Path::new(path),
3802            );
3803        }
3804
3805        let is_error = result.cancelled || result.exit_code != 0;
3806
3807        Ok(ToolOutput {
3808            content: vec![ContentBlock::Text(TextContent::new(output_text))],
3809            details,
3810            is_error,
3811        })
3812    }
3813}
3814
3815// ============================================================================
3816// Edit Tool
3817// ============================================================================
3818
3819/// Input parameters for the edit tool.
3820#[derive(Debug, Deserialize)]
3821#[serde(rename_all = "camelCase")]
3822struct EditInput {
3823    path: String,
3824    old_text: String,
3825    new_text: String,
3826}
3827
3828pub struct EditTool {
3829    cwd: PathBuf,
3830}
3831
3832impl EditTool {
3833    pub fn new(cwd: &Path) -> Self {
3834        Self {
3835            cwd: cwd.to_path_buf(),
3836        }
3837    }
3838}
3839
3840fn strip_bom(s: &str) -> (&str, bool) {
3841    s.strip_prefix('\u{FEFF}')
3842        .map_or_else(|| (s, false), |stripped| (stripped, true))
3843}
3844
3845fn detect_line_ending(content: &str) -> &'static str {
3846    let bytes = content.as_bytes();
3847    let mut idx = 0;
3848    while idx < bytes.len() {
3849        match bytes[idx] {
3850            b'\r' => {
3851                return if bytes.get(idx + 1) == Some(&b'\n') {
3852                    "\r\n"
3853                } else {
3854                    "\r"
3855                };
3856            }
3857            b'\n' => return "\n",
3858            _ => idx += 1,
3859        }
3860    }
3861    "\n"
3862}
3863
3864fn normalize_to_lf(text: &str) -> String {
3865    if !text.contains('\r') {
3866        return text.to_string();
3867    }
3868    let mut out = String::with_capacity(text.len());
3869    let mut chars = text.chars().peekable();
3870    while let Some(c) = chars.next() {
3871        if c == '\r' {
3872            out.push('\n');
3873            if chars.peek() == Some(&'\n') {
3874                chars.next();
3875            }
3876        } else {
3877            out.push(c);
3878        }
3879    }
3880    out
3881}
3882
3883fn normalize_line_endings_chunk<'a>(
3884    chunk: &'a [u8],
3885    pending_cr: &mut bool,
3886) -> std::borrow::Cow<'a, [u8]> {
3887    if !*pending_cr && memchr::memchr(b'\r', chunk).is_none() {
3888        return std::borrow::Cow::Borrowed(chunk);
3889    }
3890
3891    let mut normalized = Vec::with_capacity(chunk.len().saturating_add(usize::from(*pending_cr)));
3892    let mut idx = 0;
3893
3894    if *pending_cr {
3895        normalized.push(b'\n');
3896        if chunk.first() == Some(&b'\n') {
3897            idx = 1;
3898        }
3899        *pending_cr = false;
3900    }
3901
3902    while idx < chunk.len() {
3903        match chunk[idx] {
3904            b'\r' => {
3905                if chunk.get(idx + 1) == Some(&b'\n') {
3906                    normalized.push(b'\n');
3907                    idx += 2;
3908                } else if idx + 1 < chunk.len() {
3909                    normalized.push(b'\n');
3910                    idx += 1;
3911                } else {
3912                    *pending_cr = true;
3913                    idx += 1;
3914                }
3915            }
3916            byte => {
3917                normalized.push(byte);
3918                idx += 1;
3919            }
3920        }
3921    }
3922
3923    std::borrow::Cow::Owned(normalized)
3924}
3925
3926fn restore_line_endings(text: &str, ending: &str) -> String {
3927    match ending {
3928        "\r\n" => text.replace('\n', "\r\n"),
3929        "\r" => text.replace('\n', "\r"),
3930        _ => text.to_string(),
3931    }
3932}
3933
3934#[derive(Debug, Clone)]
3935struct FuzzyMatchResult {
3936    found: bool,
3937    index: usize,
3938    match_length: usize,
3939    exact_match: bool,
3940}
3941
3942/// Map a range in normalized content back to byte offsets in the original text.
3943///
3944/// Returns `(original_start_byte_idx, original_match_byte_len)`.
3945fn map_normalized_range_to_original(
3946    content: &str,
3947    norm_match_start: usize,
3948    norm_match_len: usize,
3949) -> (usize, usize) {
3950    let mut norm_idx = 0;
3951    let mut orig_idx = 0;
3952    let mut match_start = None;
3953    let mut match_end = None;
3954    let norm_match_end = norm_match_start + norm_match_len;
3955    let mut last_trimmed_end = 0;
3956    let mut last_has_newline = false;
3957
3958    for line in content.split_inclusive('\n') {
3959        let line_content = line.strip_suffix('\n').unwrap_or(line);
3960        let has_newline = line.ends_with('\n');
3961        let trimmed_len = line_content
3962            .trim_end_matches(|c: char| c.is_whitespace() || is_special_unicode_space(c))
3963            .len();
3964        let trimmed_end = orig_idx + trimmed_len;
3965        last_trimmed_end = trimmed_end;
3966        last_has_newline = has_newline;
3967
3968        for (char_offset, c) in line_content.char_indices() {
3969            // match_end can be detected at any position including trailing
3970            // whitespace — it correctly points to right after the last content char.
3971            if norm_idx == norm_match_end && match_end.is_none() {
3972                match_end = Some(orig_idx + char_offset);
3973            }
3974
3975            if char_offset >= trimmed_len {
3976                continue;
3977            }
3978
3979            // match_start must only be detected at non-trailing-whitespace positions.
3980            // During trailing whitespace, norm_idx is "frozen" at the value after the
3981            // last real char, which corresponds to the newline in normalized content —
3982            // not the trailing space. The post-loop newline check handles that case.
3983            if norm_idx == norm_match_start && match_start.is_none() {
3984                match_start = Some(orig_idx + char_offset);
3985            }
3986            if match_start.is_some() && match_end.is_some() {
3987                break;
3988            }
3989
3990            let normalized_char = if is_special_unicode_space(c) {
3991                ' '
3992            } else if matches!(c, '\u{2018}' | '\u{2019}') {
3993                '\''
3994            } else if matches!(c, '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}') {
3995                '"'
3996            } else if matches!(
3997                c,
3998                '\u{2010}'
3999                    | '\u{2011}'
4000                    | '\u{2012}'
4001                    | '\u{2013}'
4002                    | '\u{2014}'
4003                    | '\u{2015}'
4004                    | '\u{2212}'
4005            ) {
4006                '-'
4007            } else {
4008                c
4009            };
4010
4011            norm_idx += normalized_char.len_utf8();
4012        }
4013
4014        orig_idx += line_content.len();
4015
4016        if has_newline {
4017            if norm_idx == norm_match_start && match_start.is_none() {
4018                match_start = Some(orig_idx);
4019            }
4020            if norm_idx == norm_match_end && match_end.is_none() {
4021                match_end = Some(trimmed_end);
4022            }
4023
4024            norm_idx += 1;
4025            orig_idx += 1;
4026        }
4027
4028        if match_start.is_some() && match_end.is_some() {
4029            break;
4030        }
4031    }
4032
4033    if norm_idx == norm_match_end && match_end.is_none() {
4034        match_end = Some(if last_has_newline {
4035            orig_idx
4036        } else {
4037            last_trimmed_end
4038        });
4039    }
4040
4041    let start = match_start.unwrap_or(0);
4042    let end = match_end.unwrap_or(content.len());
4043    (start, end.saturating_sub(start))
4044}
4045
4046fn build_normalized_content(content: &str) -> String {
4047    let mut normalized = String::with_capacity(content.len());
4048    let mut lines = content.split('\n').peekable();
4049
4050    while let Some(line) = lines.next() {
4051        let trimmed_len = line
4052            .trim_end_matches(|c: char| c.is_whitespace() || is_special_unicode_space(c))
4053            .len();
4054        for (char_offset, c) in line.char_indices() {
4055            if char_offset >= trimmed_len {
4056                continue;
4057            }
4058            let normalized_char = if is_special_unicode_space(c) {
4059                ' '
4060            } else if matches!(c, '\u{2018}' | '\u{2019}') {
4061                '\''
4062            } else if matches!(c, '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}') {
4063                '"'
4064            } else if matches!(
4065                c,
4066                '\u{2010}'
4067                    | '\u{2011}'
4068                    | '\u{2012}'
4069                    | '\u{2013}'
4070                    | '\u{2014}'
4071                    | '\u{2015}'
4072                    | '\u{2212}'
4073            ) {
4074                '-'
4075            } else {
4076                c
4077            };
4078            normalized.push(normalized_char);
4079        }
4080        if lines.peek().is_some() {
4081            normalized.push('\n');
4082        }
4083    }
4084    normalized
4085}
4086
4087#[cfg(test)]
4088fn fuzzy_find_text(content: &str, old_text: &str) -> FuzzyMatchResult {
4089    fuzzy_find_text_with_normalized(content, old_text, None, None)
4090}
4091
4092/// Like [`fuzzy_find_text`], but accepts optional pre-computed normalized
4093/// versions.
4094fn fuzzy_find_text_with_normalized(
4095    content: &str,
4096    old_text: &str,
4097    precomputed_content: Option<&str>,
4098    precomputed_old: Option<&str>,
4099) -> FuzzyMatchResult {
4100    use std::borrow::Cow;
4101
4102    // First, try exact match (fastest path)
4103    if let Some(index) = content.find(old_text) {
4104        return FuzzyMatchResult {
4105            found: true,
4106            index,
4107            match_length: old_text.len(),
4108            exact_match: true,
4109        };
4110    }
4111
4112    // Build normalized versions (reuse pre-computed if available)
4113    let normalized_content = precomputed_content.map_or_else(
4114        || Cow::Owned(build_normalized_content(content)),
4115        Cow::Borrowed,
4116    );
4117    let normalized_old_text = precomputed_old.map_or_else(
4118        || Cow::Owned(build_normalized_content(old_text)),
4119        Cow::Borrowed,
4120    );
4121
4122    // Try to find the normalized old_text in normalized content
4123    if let Some(normalized_index) = normalized_content.find(normalized_old_text.as_ref()) {
4124        let (original_start, original_match_len) =
4125            map_normalized_range_to_original(content, normalized_index, normalized_old_text.len());
4126
4127        return FuzzyMatchResult {
4128            found: true,
4129            index: original_start,
4130            match_length: original_match_len,
4131            exact_match: false,
4132        };
4133    }
4134
4135    FuzzyMatchResult {
4136        found: false,
4137        index: 0,
4138        match_length: 0,
4139        exact_match: false,
4140    }
4141}
4142
4143fn count_overlapping_occurrences(haystack: &str, needle: &str) -> usize {
4144    if needle.is_empty() {
4145        return 0;
4146    }
4147
4148    haystack
4149        .char_indices()
4150        .filter(|(idx, _)| haystack[*idx..].starts_with(needle))
4151        .count()
4152}
4153
4154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4155enum DiffTag {
4156    Equal,
4157    Added,
4158    Removed,
4159}
4160
4161#[derive(Debug, Clone)]
4162struct DiffPart {
4163    tag: DiffTag,
4164    value: String,
4165}
4166
4167fn diff_parts(old_content: &str, new_content: &str) -> Vec<DiffPart> {
4168    use similar::ChangeTag;
4169
4170    let diff = similar::TextDiff::from_lines(old_content, new_content);
4171
4172    let mut parts: Vec<DiffPart> = Vec::new();
4173    let mut current_tag: Option<DiffTag> = None;
4174    let mut current_lines: Vec<&str> = Vec::new();
4175
4176    for change in diff.iter_all_changes() {
4177        let tag = match change.tag() {
4178            ChangeTag::Equal => DiffTag::Equal,
4179            ChangeTag::Insert => DiffTag::Added,
4180            ChangeTag::Delete => DiffTag::Removed,
4181        };
4182
4183        let mut line = change.value();
4184        if let Some(stripped) = line.strip_suffix('\n') {
4185            line = stripped;
4186        }
4187
4188        if current_tag == Some(tag) {
4189            current_lines.push(line);
4190        } else {
4191            if let Some(prev_tag) = current_tag {
4192                parts.push(DiffPart {
4193                    tag: prev_tag,
4194                    value: current_lines.join("\n"),
4195                });
4196            }
4197            current_tag = Some(tag);
4198            current_lines = vec![line];
4199        }
4200    }
4201
4202    if let Some(tag) = current_tag {
4203        parts.push(DiffPart {
4204            tag,
4205            value: current_lines.join("\n"),
4206        });
4207    }
4208
4209    parts
4210}
4211
4212fn diff_line_num_width(old_content: &str, new_content: &str) -> usize {
4213    // Count newlines with memchr (avoids iterator-item overhead of split().count())
4214    let old_line_count = memchr::memchr_iter(b'\n', old_content.as_bytes()).count() + 1;
4215    let new_line_count = memchr::memchr_iter(b'\n', new_content.as_bytes()).count() + 1;
4216    let max_line_num = old_line_count.max(new_line_count).max(1);
4217    max_line_num.ilog10() as usize + 1
4218}
4219
4220fn split_diff_lines(value: &str) -> Vec<&str> {
4221    // value is joined by `\n` from a Vec<&str> in diff_parts, so there is no
4222    // spurious trailing newline. We can split exactly.
4223    // We only need to handle the case where value is empty but it originated from
4224    // 0 elements, but `diff_parts` only emits when there is at least 1 line.
4225    // If value is "", `split('\n')` returns `[""]`, which correctly represents 1 empty line.
4226    value.split('\n').collect()
4227}
4228
4229#[inline]
4230const fn is_change_tag(tag: DiffTag) -> bool {
4231    matches!(tag, DiffTag::Added | DiffTag::Removed)
4232}
4233
4234#[derive(Debug)]
4235struct DiffRenderState {
4236    output: String,
4237    old_line_num: usize,
4238    new_line_num: usize,
4239    last_was_change: bool,
4240    first_changed_line: Option<usize>,
4241    line_num_width: usize,
4242    context_lines: usize,
4243}
4244
4245impl DiffRenderState {
4246    const fn new(line_num_width: usize, context_lines: usize) -> Self {
4247        Self {
4248            output: String::new(),
4249            old_line_num: 1,
4250            new_line_num: 1,
4251            last_was_change: false,
4252            first_changed_line: None,
4253            line_num_width,
4254            context_lines,
4255        }
4256    }
4257
4258    #[inline]
4259    fn ensure_line_break(&mut self) {
4260        if !self.output.is_empty() {
4261            self.output.push('\n');
4262        }
4263    }
4264
4265    const fn mark_first_change(&mut self) {
4266        if self.first_changed_line.is_none() {
4267            self.first_changed_line = Some(self.new_line_num);
4268        }
4269    }
4270
4271    fn push_added_line(&mut self, line: &str) {
4272        self.ensure_line_break();
4273        let _ = write!(
4274            self.output,
4275            "+{line_num:>width$} {line}",
4276            line_num = self.new_line_num,
4277            width = self.line_num_width
4278        );
4279        self.new_line_num = self.new_line_num.saturating_add(1);
4280    }
4281
4282    fn push_removed_line(&mut self, line: &str) {
4283        self.ensure_line_break();
4284        let _ = write!(
4285            self.output,
4286            "-{line_num:>width$} {line}",
4287            line_num = self.old_line_num,
4288            width = self.line_num_width
4289        );
4290        self.old_line_num = self.old_line_num.saturating_add(1);
4291    }
4292
4293    fn push_context_line(&mut self, line: &str) {
4294        self.ensure_line_break();
4295        let _ = write!(
4296            self.output,
4297            " {line_num:>width$} {line}",
4298            line_num = self.old_line_num,
4299            width = self.line_num_width
4300        );
4301        self.old_line_num = self.old_line_num.saturating_add(1);
4302        self.new_line_num = self.new_line_num.saturating_add(1);
4303    }
4304
4305    fn push_skip_marker(&mut self, skip: usize) {
4306        if skip == 0 {
4307            return;
4308        }
4309        self.ensure_line_break();
4310        let _ = write!(
4311            self.output,
4312            " {:>width$} ...",
4313            " ",
4314            width = self.line_num_width
4315        );
4316        self.old_line_num = self.old_line_num.saturating_add(skip);
4317        self.new_line_num = self.new_line_num.saturating_add(skip);
4318    }
4319}
4320
4321fn render_changed_part(tag: DiffTag, raw: &[&str], state: &mut DiffRenderState) {
4322    state.mark_first_change();
4323    for line in raw {
4324        match tag {
4325            DiffTag::Added => state.push_added_line(line),
4326            DiffTag::Removed => state.push_removed_line(line),
4327            DiffTag::Equal => {}
4328        }
4329    }
4330    state.last_was_change = true;
4331}
4332
4333fn render_equal_part(raw: &[&str], next_part_is_change: bool, state: &mut DiffRenderState) {
4334    if !(state.last_was_change || next_part_is_change) {
4335        let raw_len = raw.len();
4336        state.old_line_num = state.old_line_num.saturating_add(raw_len);
4337        state.new_line_num = state.new_line_num.saturating_add(raw_len);
4338        state.last_was_change = false;
4339        return;
4340    }
4341
4342    if state.last_was_change
4343        && next_part_is_change
4344        && raw.len() > state.context_lines.saturating_mul(2)
4345    {
4346        for line in raw.iter().take(state.context_lines) {
4347            state.push_context_line(line);
4348        }
4349
4350        let skip = raw.len().saturating_sub(state.context_lines * 2);
4351        state.push_skip_marker(skip);
4352
4353        for line in raw
4354            .iter()
4355            .skip(raw.len().saturating_sub(state.context_lines))
4356        {
4357            state.push_context_line(line);
4358        }
4359    } else {
4360        // Compute slice bounds directly instead of cloning Vecs
4361        let start = if state.last_was_change {
4362            0
4363        } else {
4364            raw.len().saturating_sub(state.context_lines)
4365        };
4366        let lines_after_start = raw.len().saturating_sub(start);
4367        let (end, skip_end) = if !next_part_is_change && lines_after_start > state.context_lines {
4368            (
4369                start + state.context_lines,
4370                lines_after_start - state.context_lines,
4371            )
4372        } else {
4373            (raw.len(), 0)
4374        };
4375
4376        state.push_skip_marker(start);
4377        for line in &raw[start..end] {
4378            state.push_context_line(line);
4379        }
4380        state.push_skip_marker(skip_end);
4381    }
4382
4383    state.last_was_change = false;
4384}
4385
4386fn generate_diff_string(old_content: &str, new_content: &str) -> (String, Option<usize>) {
4387    let parts = diff_parts(old_content, new_content);
4388    let mut state = DiffRenderState::new(diff_line_num_width(old_content, new_content), 4);
4389
4390    for (i, part) in parts.iter().enumerate() {
4391        let raw = split_diff_lines(&part.value);
4392        let next_part_is_change = parts.get(i + 1).is_some_and(|next| is_change_tag(next.tag));
4393
4394        match part.tag {
4395            DiffTag::Added | DiffTag::Removed => render_changed_part(part.tag, &raw, &mut state),
4396            DiffTag::Equal => render_equal_part(&raw, next_part_is_change, &mut state),
4397        }
4398    }
4399
4400    (state.output, state.first_changed_line)
4401}
4402
4403#[async_trait]
4404#[allow(clippy::unnecessary_literal_bound)]
4405impl Tool for EditTool {
4406    fn name(&self) -> &str {
4407        "edit"
4408    }
4409    fn label(&self) -> &str {
4410        "edit"
4411    }
4412    fn description(&self) -> &str {
4413        "Edit a file by replacing text. The oldText must match a unique region; matching is exact but normalizes line endings, Unicode spaces/quotes/dashes, and ignores trailing whitespace."
4414    }
4415
4416    fn parameters(&self) -> serde_json::Value {
4417        serde_json::json!({
4418            "type": "object",
4419            "properties": {
4420                "path": {
4421                    "type": "string",
4422                    "description": "Path to the file to edit (relative or absolute)"
4423                },
4424                "oldText": {
4425                    "type": "string",
4426                    "minLength": 1,
4427                    "description": "Text to find and replace (must match uniquely; matching normalizes line endings, Unicode spaces/quotes/dashes, and ignores trailing whitespace)"
4428                },
4429                "newText": {
4430                    "type": "string",
4431                    "description": "New text to replace the old text with"
4432                }
4433            },
4434            "required": ["path", "oldText", "newText"]
4435        })
4436    }
4437
4438    #[allow(clippy::too_many_lines)]
4439    async fn execute(
4440        &self,
4441        _tool_call_id: &str,
4442        input: serde_json::Value,
4443        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
4444    ) -> Result<ToolOutput> {
4445        let input: EditInput =
4446            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
4447
4448        if input.new_text.len() > WRITE_TOOL_MAX_BYTES {
4449            return Err(Error::validation(format!(
4450                "New text size exceeds maximum allowed ({} > {} bytes)",
4451                input.new_text.len(),
4452                WRITE_TOOL_MAX_BYTES
4453            )));
4454        }
4455
4456        let absolute_path = resolve_read_path(&input.path, &self.cwd);
4457        let absolute_path = enforce_cwd_scope(&absolute_path, &self.cwd, "edit")?;
4458
4459        let meta = asupersync::fs::metadata(&absolute_path)
4460            .await
4461            .map_err(|err| {
4462                let message = match err.kind() {
4463                    std::io::ErrorKind::NotFound => format!("File not found: {}", input.path),
4464                    std::io::ErrorKind::PermissionDenied => {
4465                        format!("Permission denied: {}", input.path)
4466                    }
4467                    _ => format!("Failed to access file {}: {err}", input.path),
4468                };
4469                Error::tool("edit", message)
4470            })?;
4471
4472        if !meta.is_file() {
4473            return Err(Error::tool(
4474                "edit",
4475                format!("Path {} is not a regular file", absolute_path.display()),
4476            ));
4477        }
4478        if meta.len() > READ_TOOL_MAX_BYTES {
4479            return Err(Error::tool(
4480                "edit",
4481                format!(
4482                    "File is too large ({} bytes). Max allowed for editing is {} bytes.",
4483                    meta.len(),
4484                    READ_TOOL_MAX_BYTES
4485                ),
4486            ));
4487        }
4488
4489        if let Err(err) = asupersync::fs::OpenOptions::new()
4490            .read(true)
4491            .write(true)
4492            .open(&absolute_path)
4493            .await
4494        {
4495            let message = match err.kind() {
4496                std::io::ErrorKind::NotFound => format!("File not found: {}", input.path),
4497                std::io::ErrorKind::PermissionDenied => {
4498                    format!("Permission denied: {}", input.path)
4499                }
4500                _ => format!("Failed to open file for editing: {err}"),
4501            };
4502            return Err(Error::tool("edit", message));
4503        }
4504
4505        // Read bytes strictly up to the limit to prevent OOM if metadata failed or file grows.
4506        let file = asupersync::fs::File::open(&absolute_path)
4507            .await
4508            .map_err(|e| Error::tool("edit", format!("Failed to open file: {e}")))?;
4509        let mut raw = Vec::new();
4510        let mut limiter = file.take(READ_TOOL_MAX_BYTES.saturating_add(1));
4511        limiter
4512            .read_to_end(&mut raw)
4513            .await
4514            .map_err(|e| Error::tool("edit", format!("Failed to read file: {e}")))?;
4515
4516        if raw.len() > usize::try_from(READ_TOOL_MAX_BYTES).unwrap_or(usize::MAX) {
4517            return Err(Error::tool(
4518                "edit",
4519                format!("File is too large (> {READ_TOOL_MAX_BYTES} bytes)."),
4520            ));
4521        }
4522
4523        let raw_content = String::from_utf8(raw).map_err(|_| {
4524            Error::tool(
4525                "edit",
4526                "File contains invalid UTF-8 characters and cannot be safely edited as text."
4527                    .to_string(),
4528            )
4529        })?;
4530
4531        // Strip BOM before matching (LLM won't include invisible BOM in oldText).
4532        let (content_no_bom, had_bom) = strip_bom(&raw_content);
4533
4534        let original_ending = detect_line_ending(content_no_bom);
4535        let normalized_content = normalize_to_lf(content_no_bom);
4536        let content_for_matching =
4537            if content_no_bom.contains('\r') && !content_no_bom.contains('\n') {
4538                std::borrow::Cow::Owned(content_no_bom.replace('\r', "\n"))
4539            } else {
4540                std::borrow::Cow::Borrowed(content_no_bom)
4541            };
4542        let normalized_old_text = normalize_to_lf(&input.old_text);
4543
4544        if normalized_old_text.is_empty() {
4545            return Err(Error::tool(
4546                "edit",
4547                "The old text cannot be empty. To prepend text, include the first line's content in oldText and newText.".to_string(),
4548            ));
4549        }
4550        if build_normalized_content(&normalized_old_text).is_empty() {
4551            return Err(Error::tool(
4552                "edit",
4553                "The old text must include at least one non-whitespace character.".to_string(),
4554            ));
4555        }
4556
4557        // Try variants of old_text to handle Unicode normalization differences (NFC vs NFD)
4558        // and potential input normalization (clipboard, LLM output).
4559        //
4560        // Note: normalized_content is already LF-normalized but preserves Unicode form
4561        // (from String::from_utf8).
4562
4563        let mut variants = Vec::with_capacity(3);
4564        variants.push(normalized_old_text.clone());
4565
4566        let nfc = normalized_old_text.nfc().collect::<String>();
4567        if nfc != normalized_old_text {
4568            variants.push(nfc);
4569        }
4570
4571        let nfd = normalized_old_text.nfd().collect::<String>();
4572        if nfd != normalized_old_text {
4573            variants.push(nfd);
4574        }
4575
4576        // Pre-compute normalized versions once and reuse for both matching and
4577        // occurrence counting (avoids 2x redundant O(n) normalization).
4578        let precomputed_content = build_normalized_content(content_for_matching.as_ref());
4579
4580        let mut best_match: Option<(FuzzyMatchResult, String, String)> = None;
4581
4582        for variant in variants {
4583            let precomputed_variant = build_normalized_content(&variant);
4584            let match_result = fuzzy_find_text_with_normalized(
4585                content_for_matching.as_ref(),
4586                &variant,
4587                Some(precomputed_content.as_str()),
4588                Some(precomputed_variant.as_str()),
4589            );
4590
4591            if match_result.found {
4592                best_match = Some((match_result, precomputed_variant, variant));
4593                break;
4594            }
4595        }
4596
4597        let Some((match_result, normalized_old_text, matched_variant)) = best_match else {
4598            return Err(Error::tool(
4599                "edit",
4600                format!(
4601                    "Could not find the exact text in {}. The old text must match exactly including all whitespace and newlines.",
4602                    input.path
4603                ),
4604            ));
4605        };
4606
4607        // Count occurrences in the same matching mode to avoid false ambiguity
4608        // when normalized matching collapses distinct trailing whitespace.
4609        let occurrences = if match_result.exact_match {
4610            count_overlapping_occurrences(content_for_matching.as_ref(), &matched_variant)
4611        } else {
4612            count_overlapping_occurrences(&precomputed_content, &normalized_old_text)
4613        };
4614
4615        if occurrences > 1 {
4616            return Err(Error::tool(
4617                "edit",
4618                format!(
4619                    "Found {occurrences} occurrences of the text in {}. The text must be unique. Please provide more context to make it unique.",
4620                    input.path
4621                ),
4622            ));
4623        }
4624
4625        // Perform replacement in the original coordinate space to preserve
4626        // line endings and unmatched content exactly.
4627        let idx = match_result.index;
4628        let match_len = match_result.match_length;
4629
4630        // Adapt new_text to match the file's line endings.
4631        // normalize_to_lf ensures we start from a known state (LF), then
4632        // restore_line_endings converts LFs to the target ending (e.g. CRLF).
4633        let adapted_new_text =
4634            restore_line_endings(&normalize_to_lf(&input.new_text), original_ending);
4635
4636        let new_len = content_no_bom.len() - match_len + adapted_new_text.len();
4637        let mut new_content = String::with_capacity(new_len);
4638        new_content.push_str(&content_no_bom[..idx]);
4639        new_content.push_str(&adapted_new_text);
4640        new_content.push_str(&content_no_bom[idx + match_len..]);
4641
4642        if content_no_bom.eq(&new_content) {
4643            return Err(Error::tool(
4644                "edit",
4645                format!(
4646                    "No changes made to {}. The replacement produced identical content. This might indicate an issue with special characters or the text not existing as expected.",
4647                    input.path
4648                ),
4649            ));
4650        }
4651
4652        let new_content_for_diff = normalize_to_lf(&new_content);
4653
4654        // Re-add BOM if present.
4655        let mut final_content = new_content;
4656        if had_bom {
4657            final_content = format!("\u{FEFF}{final_content}");
4658        }
4659
4660        // Atomic write (safe improvement vs legacy, behavior-equivalent).
4661        let absolute_path_clone = absolute_path.clone();
4662        let final_content_bytes = final_content.into_bytes();
4663        asupersync::runtime::spawn_blocking_io(move || {
4664            // Capture original permissions before the file is replaced.
4665            let original_perms = std::fs::metadata(&absolute_path_clone)
4666                .ok()
4667                .map(|m| m.permissions());
4668            let parent = absolute_path_clone
4669                .parent()
4670                .unwrap_or_else(|| Path::new("."));
4671            let mut temp_file = tempfile::NamedTempFile::new_in(parent)?;
4672
4673            temp_file.as_file_mut().write_all(&final_content_bytes)?;
4674            temp_file.as_file_mut().sync_all()?;
4675
4676            // Restore original file permissions (tempfile defaults to 0o600) before persisting.
4677            if let Some(perms) = original_perms {
4678                let _ = temp_file.as_file().set_permissions(perms);
4679            } else {
4680                // Default to 0644 (rw-r--r--) instead of tempfile's 0600 if we couldn't read original perms.
4681                #[cfg(unix)]
4682                {
4683                    use std::os::unix::fs::PermissionsExt;
4684                    let _ = temp_file
4685                        .as_file()
4686                        .set_permissions(std::fs::Permissions::from_mode(0o644));
4687                }
4688            }
4689
4690            temp_file
4691                .persist(&absolute_path_clone)
4692                .map_err(|e| e.error)?;
4693            sync_parent_dir(&absolute_path_clone)?;
4694            Ok(())
4695        })
4696        .await
4697        .map_err(|e| Error::tool("edit", format!("Failed to write file: {e}")))?;
4698
4699        let (diff, first_changed_line) =
4700            generate_diff_string(&normalized_content, &new_content_for_diff);
4701        let mut details = serde_json::Map::new();
4702        details.insert("diff".to_string(), serde_json::Value::String(diff));
4703        if let Some(line) = first_changed_line {
4704            details.insert(
4705                "firstChangedLine".to_string(),
4706                serde_json::Value::Number(serde_json::Number::from(line)),
4707            );
4708        }
4709
4710        Ok(ToolOutput {
4711            content: vec![ContentBlock::Text(TextContent::new(format!(
4712                "Successfully replaced text in {}.",
4713                input.path
4714            )))],
4715            details: Some(serde_json::Value::Object(details)),
4716            is_error: false,
4717        })
4718    }
4719}
4720
4721// ============================================================================
4722// Write Tool
4723// ============================================================================
4724
4725/// Input parameters for the write tool.
4726#[derive(Debug, Deserialize)]
4727#[serde(rename_all = "camelCase")]
4728struct WriteInput {
4729    path: String,
4730    content: String,
4731}
4732
4733pub struct WriteTool {
4734    cwd: PathBuf,
4735}
4736
4737impl WriteTool {
4738    pub fn new(cwd: &Path) -> Self {
4739        Self {
4740            cwd: cwd.to_path_buf(),
4741        }
4742    }
4743}
4744
4745#[async_trait]
4746#[allow(clippy::unnecessary_literal_bound)]
4747impl Tool for WriteTool {
4748    fn name(&self) -> &str {
4749        "write"
4750    }
4751    fn label(&self) -> &str {
4752        "write"
4753    }
4754    fn description(&self) -> &str {
4755        "Write content to a file. Creates the file if it doesn't exist, overwrites if it does. Automatically creates parent directories."
4756    }
4757
4758    fn parameters(&self) -> serde_json::Value {
4759        serde_json::json!({
4760            "type": "object",
4761            "properties": {
4762                "path": {
4763                    "type": "string",
4764                    "description": "Path to the file to write (relative or absolute)"
4765                },
4766                "content": {
4767                    "type": "string",
4768                    "description": "Content to write to the file"
4769                }
4770            },
4771            "required": ["path", "content"]
4772        })
4773    }
4774
4775    #[allow(clippy::too_many_lines)]
4776    async fn execute(
4777        &self,
4778        _tool_call_id: &str,
4779        input: serde_json::Value,
4780        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
4781    ) -> Result<ToolOutput> {
4782        let input: WriteInput =
4783            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
4784
4785        if input.content.len() > WRITE_TOOL_MAX_BYTES {
4786            return Err(Error::validation(format!(
4787                "Content size exceeds maximum allowed ({} > {} bytes)",
4788                input.content.len(),
4789                WRITE_TOOL_MAX_BYTES
4790            )));
4791        }
4792
4793        let path = resolve_path(&input.path, &self.cwd);
4794        let path = enforce_cwd_scope(&path, &self.cwd, "write")?;
4795
4796        if let Ok(meta) = asupersync::fs::metadata(&path).await {
4797            if !meta.is_file() {
4798                return Err(Error::tool(
4799                    "write",
4800                    format!("Path {} is not a regular file", path.display()),
4801                ));
4802            }
4803            if let Err(err) = asupersync::fs::OpenOptions::new()
4804                .write(true)
4805                .open(&path)
4806                .await
4807            {
4808                let message = match err.kind() {
4809                    std::io::ErrorKind::PermissionDenied => {
4810                        format!("Permission denied: {}", input.path)
4811                    }
4812                    _ => format!("Failed to open file for writing: {err}"),
4813                };
4814                return Err(Error::tool("write", message));
4815            }
4816        }
4817
4818        // Create parent directories if needed
4819        if let Some(parent) = path.parent() {
4820            asupersync::fs::create_dir_all(parent)
4821                .await
4822                .map_err(|e| Error::tool("write", format!("Failed to create directories: {e}")))?;
4823        }
4824
4825        // Parity with legacy pi-mono: report JS string length (UTF-16 code units) as "bytes".
4826        let bytes_written = input.content.encode_utf16().count();
4827
4828        // Write atomically using tempfile on a blocking thread
4829        let path_clone = path.clone();
4830        let content_bytes = input.content.into_bytes();
4831        asupersync::runtime::spawn_blocking_io(move || {
4832            // Capture original permissions before the file is replaced (new files get None).
4833            let original_perms = std::fs::metadata(&path_clone).ok().map(|m| m.permissions());
4834            let parent = path_clone.parent().unwrap_or_else(|| Path::new("."));
4835            let mut temp_file = tempfile::NamedTempFile::new_in(parent)?;
4836
4837            temp_file.as_file_mut().write_all(&content_bytes)?;
4838            temp_file.as_file_mut().sync_all()?;
4839
4840            // Restore original file permissions (tempfile defaults to 0o600) before persisting.
4841            if let Some(perms) = original_perms {
4842                let _ = temp_file.as_file().set_permissions(perms);
4843            } else {
4844                // New file: default to 0644 (rw-r--r--) instead of tempfile's 0600.
4845                #[cfg(unix)]
4846                {
4847                    use std::os::unix::fs::PermissionsExt;
4848                    let _ = temp_file
4849                        .as_file()
4850                        .set_permissions(std::fs::Permissions::from_mode(0o644));
4851                }
4852            }
4853
4854            // Persist (atomic rename)
4855            temp_file.persist(&path_clone).map_err(|e| e.error)?;
4856            sync_parent_dir(&path_clone)?;
4857            Ok(())
4858        })
4859        .await
4860        .map_err(|e| Error::tool("write", format!("Failed to write file: {e}")))?;
4861
4862        Ok(ToolOutput {
4863            content: vec![ContentBlock::Text(TextContent::new(format!(
4864                "Successfully wrote {} bytes to {}",
4865                bytes_written, input.path
4866            )))],
4867            details: None,
4868            is_error: false,
4869        })
4870    }
4871}
4872
4873// ============================================================================
4874// Grep Tool
4875// ============================================================================
4876
4877/// Input parameters for the grep tool.
4878#[derive(Debug, Deserialize)]
4879#[serde(rename_all = "camelCase")]
4880struct GrepInput {
4881    pattern: String,
4882    path: Option<String>,
4883    glob: Option<String>,
4884    ignore_case: Option<bool>,
4885    literal: Option<bool>,
4886    context: Option<usize>,
4887    limit: Option<usize>,
4888    #[serde(default)]
4889    hashline: bool,
4890}
4891
4892pub struct GrepTool {
4893    cwd: PathBuf,
4894    artifact_root: Option<PathBuf>,
4895}
4896
4897impl GrepTool {
4898    pub fn new(cwd: &Path) -> Self {
4899        Self {
4900            cwd: cwd.to_path_buf(),
4901            artifact_root: None,
4902        }
4903    }
4904
4905    #[cfg(test)]
4906    fn with_artifact_root(cwd: &Path, artifact_root: &Path) -> Self {
4907        Self {
4908            cwd: cwd.to_path_buf(),
4909            artifact_root: Some(artifact_root.to_path_buf()),
4910        }
4911    }
4912}
4913
4914/// Result of truncating a single grep output line.
4915#[derive(Debug, Clone, PartialEq, Eq)]
4916struct TruncateLineResult {
4917    text: String,
4918    was_truncated: bool,
4919}
4920
4921/// Truncate a single line to max characters, adding a marker suffix.
4922///
4923/// Matches pi-mono behavior: `${line.slice(0, maxChars)}... [truncated]`.
4924fn truncate_line(line: &str, max_chars: usize) -> TruncateLineResult {
4925    let mut chars = line.chars();
4926    let prefix: String = chars.by_ref().take(max_chars).collect();
4927    if chars.next().is_none() {
4928        return TruncateLineResult {
4929            text: line.to_string(),
4930            was_truncated: false,
4931        };
4932    }
4933
4934    TruncateLineResult {
4935        text: format!("{prefix}... [truncated]"),
4936        was_truncated: true,
4937    }
4938}
4939
4940fn process_rg_json_match_line(
4941    line_res: std::io::Result<String>,
4942    matches: &mut Vec<(PathBuf, usize)>,
4943    match_count: &mut usize,
4944    match_limit_reached: &mut bool,
4945    scan_limit: usize,
4946) {
4947    if *match_limit_reached {
4948        return;
4949    }
4950
4951    let line = match line_res {
4952        Ok(l) => l,
4953        Err(e) => {
4954            tracing::debug!("Skipping ripgrep output line due to read error: {e}");
4955            return;
4956        }
4957    };
4958    if line.trim().is_empty() {
4959        return;
4960    }
4961
4962    let Ok(event) = serde_json::from_str::<serde_json::Value>(&line) else {
4963        return;
4964    };
4965
4966    if event.get("type").and_then(serde_json::Value::as_str) != Some("match") {
4967        return;
4968    }
4969
4970    let file_path = event
4971        .pointer("/data/path/text")
4972        .and_then(serde_json::Value::as_str)
4973        .map(PathBuf::from);
4974    let line_number = event
4975        .pointer("/data/line_number")
4976        .and_then(serde_json::Value::as_u64)
4977        .and_then(|n| usize::try_from(n).ok());
4978
4979    if let (Some(fp), Some(ln)) = (file_path, line_number) {
4980        matches.push((fp, ln));
4981        *match_count += 1;
4982        if *match_count >= scan_limit {
4983            *match_limit_reached = true;
4984        }
4985    }
4986}
4987
4988fn drain_rg_stdout(
4989    stdout_rx: &std::sync::mpsc::Receiver<std::io::Result<String>>,
4990    matches: &mut Vec<(PathBuf, usize)>,
4991    match_count: &mut usize,
4992    match_limit_reached: &mut bool,
4993    scan_limit: usize,
4994) {
4995    while let Ok(line_res) = stdout_rx.try_recv() {
4996        process_rg_json_match_line(
4997            line_res,
4998            matches,
4999            match_count,
5000            match_limit_reached,
5001            scan_limit,
5002        );
5003        if *match_limit_reached {
5004            break;
5005        }
5006    }
5007}
5008
5009fn drain_rg_stderr(
5010    stderr_rx: &std::sync::mpsc::Receiver<std::result::Result<Vec<u8>, String>>,
5011    stderr_bytes: &mut Vec<u8>,
5012) -> Result<()> {
5013    while let Ok(chunk_result) = stderr_rx.try_recv() {
5014        let chunk = chunk_result
5015            .map_err(|err| Error::tool("grep", format!("Failed to read stderr: {err}")))?;
5016        stderr_bytes.extend_from_slice(&chunk);
5017    }
5018    Ok(())
5019}
5020
5021#[async_trait]
5022#[allow(clippy::unnecessary_literal_bound)]
5023impl Tool for GrepTool {
5024    fn name(&self) -> &str {
5025        "grep"
5026    }
5027    fn label(&self) -> &str {
5028        "grep"
5029    }
5030    fn description(&self) -> &str {
5031        "Search file contents for a pattern. Returns matching lines with file paths and line numbers. Respects .gitignore. Output is truncated to 100 matches or 1MB (whichever is hit first). Long lines are truncated to 500 chars. Use hashline=true to get N#AB content-hash tags for use with hashline_edit."
5032    }
5033
5034    fn parameters(&self) -> serde_json::Value {
5035        serde_json::json!({
5036            "type": "object",
5037            "properties": {
5038                "pattern": {
5039                    "type": "string",
5040                    "description": "Search pattern (regex or literal string)"
5041                },
5042                "path": {
5043                    "type": "string",
5044                    "description": "Directory or file to search (default: current directory)"
5045                },
5046                "glob": {
5047                    "type": "string",
5048                    "description": "Filter files by glob pattern, e.g. '*.ts' or '**/*.spec.ts'"
5049                },
5050                "ignoreCase": {
5051                    "type": "boolean",
5052                    "description": "Case-insensitive search (default: false)"
5053                },
5054                "literal": {
5055                    "type": "boolean",
5056                    "description": "Treat pattern as literal string instead of regex (default: false)"
5057                },
5058                "context": {
5059                    "type": "integer",
5060                    "description": "Number of lines to show before and after each match (default: 0)"
5061                },
5062                "limit": {
5063                    "type": "integer",
5064                    "description": "Maximum number of matches to return (default: 100)"
5065                },
5066                "hashline": {
5067                    "type": "boolean",
5068                    "description": "When true, output each line as N#AB:content where N is the line number and AB is a content hash. Use with hashline_edit tool for precise edits."
5069                }
5070            },
5071            "required": ["pattern"]
5072        })
5073    }
5074
5075    fn effects(&self) -> ToolEffects {
5076        ToolEffects::read()
5077    }
5078
5079    #[allow(clippy::too_many_lines)]
5080    async fn execute(
5081        &self,
5082        tool_call_id: &str,
5083        input: serde_json::Value,
5084        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
5085    ) -> Result<ToolOutput> {
5086        let input_value = input.clone();
5087        let input: GrepInput =
5088            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
5089
5090        if matches!(input.limit, Some(0)) {
5091            return Err(Error::validation(
5092                "`limit` must be greater than 0".to_string(),
5093            ));
5094        }
5095
5096        if !rg_available() {
5097            return Err(Error::tool(
5098                "grep",
5099                "ripgrep (rg) is not available (please install ripgrep)".to_string(),
5100            ));
5101        }
5102
5103        let search_dir = input.path.as_deref().unwrap_or(".");
5104        let search_path = resolve_read_path(search_dir, &self.cwd);
5105        let search_path = enforce_cwd_scope(&search_path, &self.cwd, "grep")?;
5106
5107        let is_directory = asupersync::fs::metadata(&search_path)
5108            .await
5109            .map_err(|e| {
5110                Error::tool(
5111                    "grep",
5112                    format!("Cannot access path {}: {e}", search_path.display()),
5113                )
5114            })?
5115            .is_dir();
5116
5117        let context_value = input.context.unwrap_or(0);
5118        let effective_limit = input.limit.unwrap_or(DEFAULT_GREP_LIMIT).max(1);
5119        // Overfetch one match so limit notices only appear after confirmed overflow.
5120        let scan_limit = effective_limit.saturating_add(1);
5121        let cache_key = tool_cache_key("grep", &self.cwd, &input_value);
5122        let cache_mode = if is_directory {
5123            ToolCacheFingerprintMode::DirectoryRecursive
5124        } else {
5125            ToolCacheFingerprintMode::FileContent
5126        };
5127        let cache_deps = cache_dependency_for_path(&search_path, cache_mode);
5128        if let Some(output) = cached_tool_output(&cache_key, cache_deps.as_deref()) {
5129            return Ok(output);
5130        }
5131
5132        let mut args: Vec<String> = vec![
5133            "--json".to_string(),
5134            "--line-number".to_string(),
5135            "--color=never".to_string(),
5136            "--hidden".to_string(),
5137            // Prevent massive JSON lines from minified files causing OOM
5138            "--max-columns=10000".to_string(),
5139        ];
5140
5141        if input.ignore_case.unwrap_or(false) {
5142            args.push("--ignore-case".to_string());
5143        }
5144        if input.literal.unwrap_or(false) {
5145            args.push("--fixed-strings".to_string());
5146        }
5147        if let Some(glob) = &input.glob {
5148            args.push("--glob".to_string());
5149            args.push(glob.clone());
5150        }
5151
5152        // Mirror find-tool behavior: explicitly pass root/nested .gitignore files
5153        // so ignore rules apply consistently even outside a git worktree.
5154        let ignore_root = if is_directory {
5155            search_path.clone()
5156        } else {
5157            search_path
5158                .parent()
5159                .unwrap_or_else(|| Path::new("."))
5160                .to_path_buf()
5161        };
5162        // NOTE: We rely on rg's native .gitignore discovery. We only explicitly pass
5163        // the root .gitignore if it exists, to ensure it's respected even if the
5164        // search path logic might otherwise miss it (e.g. searching a subdir).
5165        // We do NOT perform a blocking `glob("**/.gitignore")` here, as that stalls
5166        // the async runtime on large repos.
5167        let workspace_gitignore = self.cwd.join(".gitignore");
5168        if workspace_gitignore.exists() {
5169            args.push("--ignore-file".to_string());
5170            args.push(workspace_gitignore.display().to_string());
5171        }
5172        let root_gitignore = ignore_root.join(".gitignore");
5173        if root_gitignore != workspace_gitignore && root_gitignore.exists() {
5174            args.push("--ignore-file".to_string());
5175            args.push(root_gitignore.display().to_string());
5176        }
5177
5178        args.push("--".to_string());
5179        args.push(input.pattern.clone());
5180        args.push(search_path.display().to_string());
5181
5182        let rg_cmd = find_rg_binary().ok_or_else(|| {
5183            Error::tool(
5184                "grep",
5185                "rg is not available (please install ripgrep or rg)".to_string(),
5186            )
5187        })?;
5188
5189        let mut child = command_with_default_sigpipe(rg_cmd)
5190            .map_err(|e| Error::tool("grep", format!("Failed to prepare ripgrep: {e}")))?
5191            .args(args)
5192            .stdout(Stdio::piped())
5193            .stderr(Stdio::piped())
5194            .spawn()
5195            .map_err(|e| Error::tool("grep", format!("Failed to run ripgrep: {e}")))?;
5196
5197        let stdout = child
5198            .stdout
5199            .take()
5200            .ok_or_else(|| Error::tool("grep", "Missing stdout".to_string()))?;
5201        let stderr = child
5202            .stderr
5203            .take()
5204            .ok_or_else(|| Error::tool("grep", "Missing stderr".to_string()))?;
5205
5206        let mut guard = ProcessGuard::new(child, ProcessCleanupMode::ChildOnly);
5207
5208        let (stdout_tx, stdout_rx) = std::sync::mpsc::sync_channel(1024);
5209        let (stderr_tx, stderr_rx) =
5210            std::sync::mpsc::sync_channel::<std::result::Result<Vec<u8>, String>>(1024);
5211
5212        let stdout_thread = std::thread::spawn(move || {
5213            let reader = std::io::BufReader::new(stdout);
5214            for line in reader.lines() {
5215                if stdout_tx.send(line).is_err() {
5216                    break;
5217                }
5218            }
5219        });
5220
5221        let stderr_thread = std::thread::spawn(move || {
5222            let reader = std::io::BufReader::new(stderr);
5223            let _ = stderr_tx.send(read_to_end_capped_and_drain(reader, READ_TOOL_MAX_BYTES));
5224        });
5225
5226        let mut matches: Vec<(PathBuf, usize)> = Vec::new();
5227        let mut match_count: usize = 0;
5228        let mut match_scan_limit_reached = false;
5229        let mut stderr_bytes = Vec::new();
5230
5231        let tick = Duration::from_millis(10);
5232        let mut cx_cancelled = false;
5233
5234        let exit_status = loop {
5235            let agent_cx = AgentCx::for_current_or_request();
5236            let cx = agent_cx.cx();
5237            if cx.checkpoint().is_err() {
5238                cx_cancelled = true;
5239                break None;
5240            }
5241
5242            drain_rg_stdout(
5243                &stdout_rx,
5244                &mut matches,
5245                &mut match_count,
5246                &mut match_scan_limit_reached,
5247                scan_limit,
5248            );
5249            drain_rg_stderr(&stderr_rx, &mut stderr_bytes)?;
5250
5251            if match_scan_limit_reached {
5252                break None;
5253            }
5254
5255            match guard.try_wait_child() {
5256                Ok(Some(status)) => break Some(status),
5257                Ok(None) => {
5258                    let now = cx.timer_driver().map_or_else(wall_now, |timer| timer.now());
5259                    sleep(now, tick).await;
5260                }
5261                Err(e) => return Err(Error::tool("grep", e.to_string())),
5262            }
5263        };
5264
5265        drain_rg_stdout(
5266            &stdout_rx,
5267            &mut matches,
5268            &mut match_count,
5269            &mut match_scan_limit_reached,
5270            scan_limit,
5271        );
5272
5273        let code = if match_scan_limit_reached || cx_cancelled {
5274            // Avoid buffering unbounded stdout/stderr once we've hit the match limit.
5275            // `kill()` terminates the process, and we reap it in a background thread
5276            // so the stdout reader threads can exit promptly without blocking this task.
5277            let _ = guard.kill();
5278            // Drop any buffered stdout/stderr lines that were queued before termination.
5279            while stdout_rx.try_recv().is_ok() {}
5280            while stderr_rx.try_recv().is_ok() {}
5281            0
5282        } else {
5283            let status = exit_status.expect("rg exit status");
5284            status.code().unwrap_or(0)
5285        };
5286
5287        // Keep draining while waiting for reader threads to finish; otherwise a
5288        // bounded channel can fill and block the sender thread, causing join()
5289        // to hang after ripgrep has already exited.
5290        while !stdout_thread.is_finished() || !stderr_thread.is_finished() {
5291            if match_scan_limit_reached || cx_cancelled {
5292                while stdout_rx.try_recv().is_ok() {}
5293            } else {
5294                drain_rg_stdout(
5295                    &stdout_rx,
5296                    &mut matches,
5297                    &mut match_count,
5298                    &mut match_scan_limit_reached,
5299                    scan_limit,
5300                );
5301            }
5302            drain_rg_stderr(&stderr_rx, &mut stderr_bytes)?;
5303            sleep(wall_now(), Duration::from_millis(1)).await;
5304        }
5305
5306        if cx_cancelled {
5307            return Err(Error::tool("grep", "Command cancelled"));
5308        }
5309
5310        // Ensure stdout/stderr reader threads have fully drained the pipes before
5311        // we decide whether matches were found. Without this, fast ripgrep runs can
5312        // exit before the reader thread has delivered JSON match lines, causing
5313        // false "No matches found" results.
5314        stdout_thread
5315            .join()
5316            .map_err(|_| Error::tool("grep", "ripgrep stdout reader thread panicked"))?;
5317        stderr_thread
5318            .join()
5319            .map_err(|_| Error::tool("grep", "ripgrep stderr reader thread panicked"))?;
5320
5321        // Drain any remaining stdout/stderr produced after the last poll.
5322        if match_scan_limit_reached {
5323            while stdout_rx.try_recv().is_ok() {}
5324        } else {
5325            drain_rg_stdout(
5326                &stdout_rx,
5327                &mut matches,
5328                &mut match_count,
5329                &mut match_scan_limit_reached,
5330                scan_limit,
5331            );
5332        }
5333        drain_rg_stderr(&stderr_rx, &mut stderr_bytes)?;
5334
5335        let mut stderr_text = String::from_utf8_lossy(&stderr_bytes).trim().to_string();
5336        if stderr_bytes.len() as u64 > READ_TOOL_MAX_BYTES {
5337            stderr_text.push_str("\n... [stderr truncated] ...");
5338        }
5339        if !match_scan_limit_reached && code != 0 && code != 1 {
5340            let msg = if stderr_text.is_empty() {
5341                format!("ripgrep exited with code {code}")
5342            } else {
5343                stderr_text
5344            };
5345            return Err(Error::tool("grep", msg));
5346        }
5347
5348        let match_limit_reached = match_count > effective_limit;
5349        if match_limit_reached {
5350            matches.truncate(effective_limit);
5351            match_count = effective_limit;
5352        }
5353
5354        if match_count == 0 {
5355            let output = ToolOutput {
5356                content: vec![ContentBlock::Text(TextContent::new("No matches found"))],
5357                details: None,
5358                is_error: false,
5359            };
5360            cache_tool_output(
5361                cache_key,
5362                stable_cache_dependency_for_path(&search_path, cache_mode, cache_deps.as_deref()),
5363                &output,
5364            );
5365            return Ok(output);
5366        }
5367
5368        let mut file_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
5369        let mut output_builder = HeadTruncatingLineWriter::new(DEFAULT_MAX_BYTES);
5370        let mut artifact_source = String::new();
5371        let mut lines_truncated = false;
5372
5373        // Group matches by file to merge overlapping context windows
5374        let mut file_order: Vec<PathBuf> = Vec::new();
5375        let mut matches_by_file: HashMap<PathBuf, Vec<usize>> = HashMap::new();
5376        for (file_path, line_number) in &matches {
5377            if !matches_by_file.contains_key(file_path) {
5378                file_order.push(file_path.clone());
5379            }
5380            matches_by_file
5381                .entry(file_path.clone())
5382                .or_default()
5383                .push(*line_number);
5384        }
5385
5386        for file_path in file_order {
5387            let Some(mut match_lines) = matches_by_file.remove(&file_path) else {
5388                continue;
5389            };
5390            let relative_path = format_grep_path(&file_path, &self.cwd);
5391            let lines = get_file_lines_async(&file_path, &mut file_cache).await;
5392
5393            if lines.is_empty() {
5394                if let Some(first_match) = match_lines.first() {
5395                    let line = format!(
5396                        "{relative_path}:{first_match}: (unable to read file or too large)"
5397                    );
5398                    output_builder.push_line(&line);
5399                    append_artifact_source_line(&mut artifact_source, &line);
5400                }
5401                continue;
5402            }
5403
5404            match_lines.sort_unstable();
5405            match_lines.dedup();
5406
5407            let mut blocks: Vec<(usize, usize)> = Vec::new();
5408            for &line_number in &match_lines {
5409                let start = if context_value > 0 {
5410                    line_number.saturating_sub(context_value).max(1)
5411                } else {
5412                    line_number
5413                };
5414                let end = if context_value > 0 {
5415                    line_number.saturating_add(context_value).min(lines.len())
5416                } else {
5417                    line_number
5418                };
5419
5420                if let Some(last_block) = blocks.last_mut() {
5421                    if start <= last_block.1.saturating_add(1) {
5422                        last_block.1 = last_block.1.max(end);
5423                        continue;
5424                    }
5425                }
5426                blocks.push((start, end));
5427            }
5428
5429            for (i, (start, end)) in blocks.into_iter().enumerate() {
5430                if i > 0 {
5431                    output_builder.push_line("--");
5432                    append_artifact_source_line(&mut artifact_source, "--");
5433                }
5434                for current in start..=end {
5435                    let line_text = lines.get(current - 1).map_or("", String::as_str);
5436                    let sanitized = line_text.replace('\r', "");
5437                    let truncated = truncate_line(&sanitized, GREP_MAX_LINE_LENGTH);
5438                    if truncated.was_truncated {
5439                        lines_truncated = true;
5440                    }
5441
5442                    if input.hashline {
5443                        let line_idx = current - 1; // 0-indexed for hashline
5444                        let tag = format_hashline_tag(line_idx, &sanitized);
5445                        let line = if match_lines.binary_search(&current).is_ok() {
5446                            format!("{relative_path}:{tag}: {}", truncated.text)
5447                        } else {
5448                            format!("{relative_path}-{tag}- {}", truncated.text)
5449                        };
5450                        output_builder.push_line(&line);
5451                        append_artifact_source_line(&mut artifact_source, &line);
5452                    } else if match_lines.binary_search(&current).is_ok() {
5453                        let line = format!("{relative_path}:{current}: {}", truncated.text);
5454                        output_builder.push_line(&line);
5455                        append_artifact_source_line(&mut artifact_source, &line);
5456                    } else {
5457                        let line = format!("{relative_path}-{current}- {}", truncated.text);
5458                        output_builder.push_line(&line);
5459                        append_artifact_source_line(&mut artifact_source, &line);
5460                    }
5461                }
5462            }
5463        }
5464
5465        // Apply byte truncation while writing, avoiding a second joined copy.
5466        let mut truncation = output_builder.finish();
5467
5468        let mut output = std::mem::take(&mut truncation.content);
5469        let mut notices: Vec<String> = Vec::new();
5470        let mut details_map = serde_json::Map::new();
5471
5472        if match_limit_reached {
5473            notices.push(format!(
5474                "{effective_limit} matches limit reached. Use limit={} for more, or refine pattern",
5475                effective_limit * 2
5476            ));
5477            details_map.insert(
5478                "matchLimitReached".to_string(),
5479                serde_json::Value::Number(serde_json::Number::from(effective_limit)),
5480            );
5481        }
5482
5483        if truncation.truncated {
5484            notices.push(format!("{} limit reached", format_size(DEFAULT_MAX_BYTES)));
5485            details_map.insert("truncation".to_string(), serde_json::to_value(truncation)?);
5486        }
5487
5488        if lines_truncated {
5489            notices.push(format!(
5490                "Some lines truncated to {GREP_MAX_LINE_LENGTH} chars. Use read tool to see full lines"
5491            ));
5492            details_map.insert("linesTruncated".to_string(), serde_json::Value::Bool(true));
5493        }
5494
5495        if !notices.is_empty() {
5496            let _ = write!(output, "\n\n[{}]", notices.join(". "));
5497        }
5498
5499        let mut details = if details_map.is_empty() {
5500            None
5501        } else {
5502            Some(serde_json::Value::Object(details_map))
5503        };
5504
5505        attach_text_artifact_if_needed_with_root(
5506            self.artifact_root.as_deref(),
5507            &mut output,
5508            &mut details,
5509            "grep",
5510            tool_call_id,
5511            "searchResults",
5512            &artifact_source,
5513        );
5514
5515        let output = ToolOutput {
5516            content: vec![ContentBlock::Text(TextContent::new(output))],
5517            details,
5518            is_error: false,
5519        };
5520        cache_tool_output(
5521            cache_key,
5522            stable_cache_dependency_for_path(&search_path, cache_mode, cache_deps.as_deref()),
5523            &output,
5524        );
5525        Ok(output)
5526    }
5527}
5528
5529// ============================================================================
5530// Find Tool
5531// ============================================================================
5532
5533/// Input parameters for the find tool.
5534#[derive(Debug, Deserialize)]
5535#[serde(rename_all = "camelCase")]
5536struct FindInput {
5537    pattern: String,
5538    path: Option<String>,
5539    limit: Option<usize>,
5540}
5541
5542#[derive(Debug)]
5543struct FindEntry {
5544    rel: String,
5545    modified: Option<SystemTime>,
5546}
5547
5548pub struct FindTool {
5549    cwd: PathBuf,
5550    artifact_root: Option<PathBuf>,
5551}
5552
5553impl FindTool {
5554    pub fn new(cwd: &Path) -> Self {
5555        Self {
5556            cwd: cwd.to_path_buf(),
5557            artifact_root: None,
5558        }
5559    }
5560}
5561
5562#[async_trait]
5563#[allow(clippy::unnecessary_literal_bound)]
5564impl Tool for FindTool {
5565    fn name(&self) -> &str {
5566        "find"
5567    }
5568    fn label(&self) -> &str {
5569        "find"
5570    }
5571    fn description(&self) -> &str {
5572        "Search for files by glob pattern. Returns matching file paths relative to the search directory. Sorted by modification time (newest first). Respects .gitignore. Output is truncated to 1000 results or 1MB (whichever is hit first)."
5573    }
5574
5575    fn parameters(&self) -> serde_json::Value {
5576        serde_json::json!({
5577            "type": "object",
5578            "properties": {
5579                "pattern": {
5580                    "type": "string",
5581                    "description": "Glob pattern to match files, e.g. '*.ts', '**/*.json', or 'src/**/*.spec.ts'"
5582                },
5583                "path": {
5584                    "type": "string",
5585                    "description": "Directory to search in (default: current directory)"
5586                },
5587                "limit": {
5588                    "type": "integer",
5589                    "description": "Maximum number of results (default: 1000)"
5590                }
5591            },
5592            "required": ["pattern"]
5593        })
5594    }
5595
5596    fn effects(&self) -> ToolEffects {
5597        ToolEffects::read()
5598    }
5599
5600    #[allow(clippy::too_many_lines)]
5601    async fn execute(
5602        &self,
5603        tool_call_id: &str,
5604        input: serde_json::Value,
5605        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
5606    ) -> Result<ToolOutput> {
5607        let input_value = input.clone();
5608        let input: FindInput =
5609            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
5610
5611        if matches!(input.limit, Some(0)) {
5612            return Err(Error::validation(
5613                "`limit` must be greater than 0".to_string(),
5614            ));
5615        }
5616
5617        let search_dir = input.path.as_deref().unwrap_or(".");
5618        let search_path = resolve_read_path(search_dir, &self.cwd);
5619        let search_path = enforce_cwd_scope(&search_path, &self.cwd, "find")?;
5620        let search_path = strip_unc_prefix(search_path);
5621        let effective_limit = input.limit.unwrap_or(DEFAULT_FIND_LIMIT);
5622        // Overfetch one result so limit notices only appear after confirmed overflow.
5623        let scan_limit = effective_limit.saturating_add(1);
5624
5625        if !search_path.exists() {
5626            return Err(Error::tool(
5627                "find",
5628                format!("Path not found: {}", search_path.display()),
5629            ));
5630        }
5631
5632        let cache_key = tool_cache_key("find", &self.cwd, &input_value);
5633        let cache_mode = if search_path.is_dir() {
5634            ToolCacheFingerprintMode::DirectoryRecursive
5635        } else {
5636            ToolCacheFingerprintMode::FileContent
5637        };
5638        let cache_deps = cache_dependency_for_path(&search_path, cache_mode);
5639        if let Some(output) = cached_tool_output(&cache_key, cache_deps.as_deref()) {
5640            return Ok(output);
5641        }
5642
5643        let fd_cmd = find_fd_binary().ok_or_else(|| {
5644            Error::tool(
5645                "find",
5646                "fd is not available (please install fd-find or fd)".to_string(),
5647            )
5648        })?;
5649
5650        // Build fd arguments
5651        let mut args: Vec<String> = vec![
5652            "--glob".to_string(),
5653            "--color=never".to_string(),
5654            "--hidden".to_string(),
5655            "--max-results".to_string(),
5656            scan_limit.to_string(),
5657        ];
5658
5659        // NOTE: We rely on fd's native .gitignore discovery. We only explicitly pass
5660        // the root .gitignore if it exists, to ensure it's respected even if the
5661        // search path logic might otherwise miss it.
5662        // We do NOT perform a blocking `glob("**/.gitignore")` here.
5663        let workspace_gitignore = self.cwd.join(".gitignore");
5664        if workspace_gitignore.exists() {
5665            args.push("--ignore-file".to_string());
5666            args.push(workspace_gitignore.display().to_string());
5667        }
5668        let root_gitignore = search_path.join(".gitignore");
5669        if root_gitignore != workspace_gitignore && root_gitignore.exists() {
5670            args.push("--ignore-file".to_string());
5671            args.push(root_gitignore.display().to_string());
5672        }
5673
5674        args.push("--".to_string());
5675        args.push(input.pattern.clone());
5676        args.push(search_path.display().to_string());
5677
5678        let mut child = command_with_default_sigpipe_in_dir(fd_cmd, &self.cwd)
5679            .map_err(|e| Error::tool("find", format!("Failed to prepare fd: {e}")))?
5680            .args(args)
5681            .current_dir(&self.cwd)
5682            .stdin(Stdio::null())
5683            .stdout(Stdio::piped())
5684            .stderr(Stdio::piped())
5685            .spawn()
5686            .map_err(|e| Error::tool("find", format!("Failed to run fd: {e}")))?;
5687
5688        let stdout_pipe = child
5689            .stdout
5690            .take()
5691            .ok_or_else(|| Error::tool("find", "Missing stdout"))?;
5692        let stderr_pipe = child
5693            .stderr
5694            .take()
5695            .ok_or_else(|| Error::tool("find", "Missing stderr"))?;
5696
5697        let mut guard = ProcessGuard::new(child, ProcessCleanupMode::ChildOnly);
5698
5699        let stdout_handle = std::thread::spawn(move || -> std::result::Result<Vec<u8>, String> {
5700            read_to_end_capped_and_drain(stdout_pipe, READ_TOOL_MAX_BYTES)
5701        });
5702
5703        let stderr_handle = std::thread::spawn(move || -> std::result::Result<Vec<u8>, String> {
5704            read_to_end_capped_and_drain(stderr_pipe, READ_TOOL_MAX_BYTES)
5705        });
5706
5707        let tick = Duration::from_millis(10);
5708        let start_time = std::time::Instant::now();
5709        let timeout_ms = 60_000; // 60 seconds
5710        let mut timed_out = false;
5711        let mut cx_cancelled = false;
5712
5713        let status = loop {
5714            let agent_cx = AgentCx::for_current_or_request();
5715            let cx = agent_cx.cx();
5716            if cx.checkpoint().is_err() {
5717                cx_cancelled = true;
5718                let _ = guard.kill();
5719                break None;
5720            }
5721
5722            // Check if process is done
5723            match guard.try_wait_child() {
5724                Ok(Some(status)) => break Some(status),
5725                Ok(None) => {
5726                    if start_time.elapsed().as_millis() > timeout_ms {
5727                        timed_out = true;
5728                        let _ = guard.kill();
5729                        break None;
5730                    }
5731                    let now = cx.timer_driver().map_or_else(wall_now, |timer| timer.now());
5732                    sleep(now, tick).await;
5733                }
5734                Err(e) => return Err(Error::tool("find", e.to_string())),
5735            }
5736        };
5737
5738        let stdout_bytes = stdout_handle
5739            .join()
5740            .map_err(|_| Error::tool("find", "fd stdout reader thread panicked"))?
5741            .map_err(|err| Error::tool("find", format!("Failed to read fd stdout: {err}")))?;
5742        let stderr_bytes = stderr_handle
5743            .join()
5744            .map_err(|_| Error::tool("find", "fd stderr reader thread panicked"))?
5745            .map_err(|err| Error::tool("find", format!("Failed to read fd stderr: {err}")))?;
5746
5747        if cx_cancelled {
5748            return Err(Error::tool("find", "Command cancelled"));
5749        }
5750        if timed_out {
5751            return Err(Error::tool("find", "Command timed out after 60 seconds"));
5752        }
5753        let status = status.expect("fd exit status after successful completion");
5754
5755        let mut stdout = String::from_utf8_lossy(&stdout_bytes).trim().to_string();
5756        if stdout_bytes.len() as u64 > READ_TOOL_MAX_BYTES {
5757            stdout.push_str("\n... [stdout truncated] ...");
5758        }
5759        let mut stderr = String::from_utf8_lossy(&stderr_bytes).trim().to_string();
5760        if stderr_bytes.len() as u64 > READ_TOOL_MAX_BYTES {
5761            stderr.push_str("\n... [stderr truncated] ...");
5762        }
5763
5764        if !status.success() && stdout.is_empty() {
5765            if status.code() == Some(1) && stderr.is_empty() {
5766                // fd uses exit code 1 for "no matches"; treat as empty result.
5767            } else {
5768                let code = status.code().unwrap_or(1);
5769                let msg = if stderr.is_empty() {
5770                    format!("fd exited with code {code}")
5771                } else {
5772                    stderr
5773                };
5774                return Err(Error::tool("find", msg));
5775            }
5776        }
5777
5778        if stdout.is_empty() {
5779            let output = ToolOutput {
5780                content: vec![ContentBlock::Text(TextContent::new(
5781                    "No files found matching pattern",
5782                ))],
5783                details: None,
5784                is_error: false,
5785            };
5786            cache_tool_output(
5787                cache_key,
5788                stable_cache_dependency_for_path(&search_path, cache_mode, cache_deps.as_deref()),
5789                &output,
5790            );
5791            return Ok(output);
5792        }
5793
5794        let mut entries: Vec<FindEntry> = Vec::new();
5795        for raw_line in stdout.lines() {
5796            let line = raw_line.trim_end_matches('\r').trim();
5797            if line.is_empty() {
5798                continue;
5799            }
5800
5801            // On Windows, fd may emit `//?/…` or `\\?\…` extended-length
5802            // paths. Strip the prefix so relativization works correctly.
5803            let clean = strip_unc_prefix(PathBuf::from(line));
5804            let line_path = clean.as_path();
5805            let mut rel = if line_path.is_absolute() {
5806                line_path.strip_prefix(&search_path).map_or_else(
5807                    |_| line_path.to_string_lossy().to_string(),
5808                    |stripped| stripped.to_string_lossy().to_string(),
5809                )
5810            } else {
5811                line_path.to_string_lossy().to_string()
5812            };
5813
5814            let full_path = if line_path.is_absolute() {
5815                line_path.to_path_buf()
5816            } else {
5817                search_path.join(line_path)
5818            };
5819            if full_path.is_dir() && !rel.ends_with('/') {
5820                rel.push('/');
5821            }
5822
5823            let modified = std::fs::metadata(&full_path)
5824                .and_then(|meta| meta.modified())
5825                .ok();
5826            entries.push(FindEntry { rel, modified });
5827        }
5828
5829        entries.sort_by(|a, b| {
5830            let ordering = match (&a.modified, &b.modified) {
5831                (Some(a_time), Some(b_time)) => b_time.cmp(a_time),
5832                (Some(_), None) => Ordering::Less,
5833                (None, Some(_)) => Ordering::Greater,
5834                (None, None) => Ordering::Equal,
5835            };
5836            ordering.then_with(|| {
5837                let a_lower = a.rel.to_lowercase();
5838                let b_lower = b.rel.to_lowercase();
5839                a_lower.cmp(&b_lower).then_with(|| a.rel.cmp(&b.rel))
5840            })
5841        });
5842
5843        if entries.is_empty() {
5844            let output = ToolOutput {
5845                content: vec![ContentBlock::Text(TextContent::new(
5846                    "No files found matching pattern",
5847                ))],
5848                details: None,
5849                is_error: false,
5850            };
5851            cache_tool_output(
5852                cache_key,
5853                stable_cache_dependency_for_path(&search_path, cache_mode, cache_deps.as_deref()),
5854                &output,
5855            );
5856            return Ok(output);
5857        }
5858
5859        let result_limit_reached = entries.len() > effective_limit;
5860        let mut output_builder = HeadTruncatingLineWriter::new(DEFAULT_MAX_BYTES);
5861        let mut artifact_source = String::new();
5862        for entry in entries.into_iter().take(effective_limit) {
5863            output_builder.push_line(&entry.rel);
5864            append_artifact_source_line(&mut artifact_source, &entry.rel);
5865        }
5866        let mut truncation = output_builder.finish();
5867
5868        let mut result_output = std::mem::take(&mut truncation.content);
5869        let mut notices: Vec<String> = Vec::new();
5870        let mut details_map = serde_json::Map::new();
5871
5872        if !status.success() {
5873            let code = status.code().unwrap_or(1);
5874            notices.push(format!("fd exited with code {code}"));
5875        }
5876
5877        if result_limit_reached {
5878            notices.push(format!(
5879                "{effective_limit} results limit reached. Use limit={} for more, or refine pattern",
5880                effective_limit * 2
5881            ));
5882            details_map.insert(
5883                "resultLimitReached".to_string(),
5884                serde_json::Value::Number(serde_json::Number::from(effective_limit)),
5885            );
5886        }
5887
5888        if truncation.truncated {
5889            notices.push(format!("{} limit reached", format_size(DEFAULT_MAX_BYTES)));
5890            details_map.insert("truncation".to_string(), serde_json::to_value(truncation)?);
5891        }
5892
5893        if !notices.is_empty() {
5894            let _ = write!(result_output, "\n\n[{}]", notices.join(". "));
5895        }
5896
5897        let mut details = if details_map.is_empty() {
5898            None
5899        } else {
5900            Some(serde_json::Value::Object(details_map))
5901        };
5902
5903        attach_text_artifact_if_needed_with_root(
5904            self.artifact_root.as_deref(),
5905            &mut result_output,
5906            &mut details,
5907            "find",
5908            tool_call_id,
5909            "fileResults",
5910            &artifact_source,
5911        );
5912
5913        let output = ToolOutput {
5914            content: vec![ContentBlock::Text(TextContent::new(result_output))],
5915            details,
5916            is_error: false,
5917        };
5918        cache_tool_output(
5919            cache_key,
5920            stable_cache_dependency_for_path(&search_path, cache_mode, cache_deps.as_deref()),
5921            &output,
5922        );
5923        Ok(output)
5924    }
5925}
5926
5927// ============================================================================
5928// Ls Tool
5929// ============================================================================
5930
5931/// Input parameters for the ls tool.
5932#[derive(Debug, Deserialize)]
5933#[serde(rename_all = "camelCase")]
5934struct LsInput {
5935    path: Option<String>,
5936    limit: Option<usize>,
5937}
5938
5939pub struct LsTool {
5940    cwd: PathBuf,
5941    artifact_root: Option<PathBuf>,
5942}
5943
5944impl LsTool {
5945    pub fn new(cwd: &Path) -> Self {
5946        Self {
5947            cwd: cwd.to_path_buf(),
5948            artifact_root: None,
5949        }
5950    }
5951
5952    #[cfg(test)]
5953    fn with_artifact_root(cwd: &Path, artifact_root: &Path) -> Self {
5954        Self {
5955            cwd: cwd.to_path_buf(),
5956            artifact_root: Some(artifact_root.to_path_buf()),
5957        }
5958    }
5959}
5960
5961#[async_trait]
5962#[allow(clippy::unnecessary_literal_bound, clippy::too_many_lines)]
5963impl Tool for LsTool {
5964    fn name(&self) -> &str {
5965        "ls"
5966    }
5967    fn label(&self) -> &str {
5968        "ls"
5969    }
5970    fn description(&self) -> &str {
5971        "List directory contents. Returns entries sorted alphabetically, with '/' suffix for directories. Includes dotfiles. Output is truncated to 500 entries or 1MB (whichever is hit first)."
5972    }
5973
5974    fn parameters(&self) -> serde_json::Value {
5975        serde_json::json!({
5976            "type": "object",
5977            "properties": {
5978                "path": {
5979                    "type": "string",
5980                    "description": "Directory to list (default: current directory)"
5981                },
5982                "limit": {
5983                    "type": "integer",
5984                    "description": "Maximum number of entries to return (default: 500)"
5985                }
5986            }
5987        })
5988    }
5989
5990    fn effects(&self) -> ToolEffects {
5991        ToolEffects::read()
5992    }
5993
5994    async fn execute(
5995        &self,
5996        tool_call_id: &str,
5997        input: serde_json::Value,
5998        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
5999    ) -> Result<ToolOutput> {
6000        let input_value = input.clone();
6001        let input: LsInput =
6002            serde_json::from_value(input).map_err(|e| Error::validation(e.to_string()))?;
6003
6004        if matches!(input.limit, Some(0)) {
6005            return Err(Error::validation(
6006                "`limit` must be greater than 0".to_string(),
6007            ));
6008        }
6009
6010        let dir_path = input
6011            .path
6012            .as_ref()
6013            .map_or_else(|| self.cwd.clone(), |p| resolve_read_path(p, &self.cwd));
6014        let dir_path = enforce_cwd_scope(&dir_path, &self.cwd, "list")?;
6015
6016        let effective_limit = input.limit.unwrap_or(DEFAULT_LS_LIMIT);
6017
6018        if !dir_path.exists() {
6019            return Err(Error::tool(
6020                "ls",
6021                format!("Path not found: {}", dir_path.display()),
6022            ));
6023        }
6024        if !dir_path.is_dir() {
6025            return Err(Error::tool(
6026                "ls",
6027                format!("Not a directory: {}", dir_path.display()),
6028            ));
6029        }
6030
6031        let cache_key = tool_cache_key("ls", &self.cwd, &input_value);
6032        let cache_mode = ToolCacheFingerprintMode::DirectoryImmediate;
6033        let cache_deps = cache_dependency_for_path(&dir_path, cache_mode);
6034        if let Some(output) = cached_tool_output(&cache_key, cache_deps.as_deref()) {
6035            return Ok(output);
6036        }
6037
6038        let mut entries = Vec::new();
6039        let mut read_dir = asupersync::fs::read_dir(&dir_path)
6040            .await
6041            .map_err(|e| Error::tool("ls", format!("Cannot read directory: {e}")))?;
6042
6043        let mut scan_limit_reached = false;
6044        while let Some(entry) = read_dir
6045            .next_entry()
6046            .await
6047            .map_err(|e| Error::tool("ls", format!("Cannot read directory entry: {e}")))?
6048        {
6049            if entries.len() >= LS_SCAN_HARD_LIMIT {
6050                scan_limit_reached = true;
6051                break;
6052            }
6053            let name = entry.file_name().to_string_lossy().to_string();
6054            // Handle broken symlinks or permission errors by treating them as non-directories
6055            // Optimization: use file_type() first to avoid stat overhead on every file.
6056            let is_dir = match entry.file_type().await {
6057                Ok(ft) => {
6058                    if ft.is_dir() {
6059                        true
6060                    } else if ft.is_symlink() {
6061                        // Only stat if it's a symlink to see if it points to a directory
6062                        entry.metadata().await.is_ok_and(|meta| meta.is_dir())
6063                    } else {
6064                        false
6065                    }
6066                }
6067                Err(_) => entry.metadata().await.is_ok_and(|meta| meta.is_dir()),
6068            };
6069            entries.push((name, is_dir));
6070        }
6071
6072        // Sort alphabetically (case-insensitive).
6073        entries.sort_by_cached_key(|(a, _)| a.to_lowercase());
6074
6075        let mut output_builder = HeadTruncatingLineWriter::new(DEFAULT_MAX_BYTES);
6076        let mut artifact_source = String::new();
6077        let mut emitted_entries = 0usize;
6078        let mut entry_limit_reached = false;
6079
6080        for (entry, is_dir) in entries {
6081            if emitted_entries >= effective_limit {
6082                entry_limit_reached = true;
6083                break;
6084            }
6085            let line = if is_dir { format!("{entry}/") } else { entry };
6086            output_builder.push_line(&line);
6087            append_artifact_source_line(&mut artifact_source, &line);
6088            emitted_entries = emitted_entries.saturating_add(1);
6089        }
6090
6091        if emitted_entries == 0 {
6092            let output = ToolOutput {
6093                content: vec![ContentBlock::Text(TextContent::new("(empty directory)"))],
6094                details: None,
6095                is_error: false,
6096            };
6097            cache_tool_output(
6098                cache_key,
6099                stable_cache_dependency_for_path(&dir_path, cache_mode, cache_deps.as_deref()),
6100                &output,
6101            );
6102            return Ok(output);
6103        }
6104
6105        // Apply byte truncation while writing, avoiding a second joined copy.
6106        let mut truncation = output_builder.finish();
6107
6108        let mut output = std::mem::take(&mut truncation.content);
6109        let mut details_map = serde_json::Map::new();
6110        let mut notices: Vec<String> = Vec::new();
6111
6112        if entry_limit_reached {
6113            notices.push(format!(
6114                "{effective_limit} entries limit reached. Use limit={} for more",
6115                effective_limit * 2
6116            ));
6117            details_map.insert(
6118                "entryLimitReached".to_string(),
6119                serde_json::Value::Number(serde_json::Number::from(effective_limit)),
6120            );
6121        }
6122
6123        if scan_limit_reached {
6124            notices.push(format!(
6125                "Directory scan limited to {LS_SCAN_HARD_LIMIT} entries to prevent system overload"
6126            ));
6127            details_map.insert(
6128                "scanLimitReached".to_string(),
6129                serde_json::Value::Number(serde_json::Number::from(LS_SCAN_HARD_LIMIT)),
6130            );
6131        }
6132
6133        if truncation.truncated {
6134            notices.push(format!("{} limit reached", format_size(DEFAULT_MAX_BYTES)));
6135            details_map.insert("truncation".to_string(), serde_json::to_value(truncation)?);
6136        }
6137
6138        if !notices.is_empty() {
6139            let _ = write!(output, "\n\n[{}]", notices.join(". "));
6140        }
6141
6142        let mut details = if details_map.is_empty() {
6143            None
6144        } else {
6145            Some(serde_json::Value::Object(details_map))
6146        };
6147
6148        attach_text_artifact_if_needed_with_root(
6149            self.artifact_root.as_deref(),
6150            &mut output,
6151            &mut details,
6152            "ls",
6153            tool_call_id,
6154            "directoryEntries",
6155            &artifact_source,
6156        );
6157
6158        let output = ToolOutput {
6159            content: vec![ContentBlock::Text(TextContent::new(output))],
6160            details,
6161            is_error: false,
6162        };
6163        cache_tool_output(
6164            cache_key,
6165            stable_cache_dependency_for_path(&dir_path, cache_mode, cache_deps.as_deref()),
6166            &output,
6167        );
6168        Ok(output)
6169    }
6170}
6171
6172// ============================================================================
6173// Cleanup
6174// ============================================================================
6175
6176/// Clean up old temporary files created by the bash tool.
6177///
6178/// Scans the system temporary directory for files matching `pi-bash-*.log`
6179/// that are older than 24 hours and deletes them. This prevents indefinite
6180/// accumulation of log files from long-running sessions.
6181pub fn cleanup_temp_files() {
6182    // Run in a detached thread to avoid blocking startup/shutdown.
6183    std::thread::spawn(|| {
6184        let temp_dir = std::env::temp_dir();
6185        let Ok(entries) = std::fs::read_dir(&temp_dir) else {
6186            return;
6187        };
6188
6189        for entry in entries.flatten() {
6190            let path = entry.path();
6191            if !path.is_file() {
6192                continue;
6193            }
6194
6195            let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
6196                continue;
6197            };
6198
6199            // Match "pi-bash-" or "pi-rpc-bash-" prefix and ".log" suffix.
6200            if (file_name.starts_with("pi-bash-") || file_name.starts_with("pi-rpc-bash-"))
6201                && std::path::Path::new(file_name)
6202                    .extension()
6203                    .is_some_and(|ext| ext.eq_ignore_ascii_case("log"))
6204                && let Ok(metadata) = entry.metadata()
6205                && metadata.modified().is_ok_and(|modified| {
6206                    modified
6207                        .elapsed()
6208                        .is_ok_and(|age| age > Duration::from_secs(24 * 60 * 60))
6209                })
6210                && let Err(e) = std::fs::remove_file(&path)
6211            {
6212                // Log but don't panic on cleanup failure
6213                tracing::debug!("Failed to remove temp file {}: {}", path.display(), e);
6214            }
6215        }
6216    });
6217}
6218
6219// ============================================================================
6220// Helper functions
6221// ============================================================================
6222
6223fn rg_available() -> bool {
6224    find_rg_binary().is_some()
6225}
6226
6227fn pump_stream<R: Read + Send + 'static>(
6228    mut reader: R,
6229    stream_name: &'static str,
6230    tx: &mpsc::SyncSender<BashPipeFrame>,
6231) {
6232    let mut buf = vec![0u8; 8192];
6233    loop {
6234        match reader.read(&mut buf) {
6235            Ok(0) => break,
6236            Ok(n) => {
6237                if tx.send(BashPipeFrame::Chunk(buf[..n].to_vec())).is_err() {
6238                    break;
6239                }
6240            }
6241            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
6242            Err(err) => {
6243                let _ = tx.send(BashPipeFrame::Error(format!(
6244                    "Failed to read bash {stream_name}: {err}"
6245                )));
6246                break;
6247            }
6248        }
6249    }
6250}
6251
6252async fn ingest_bash_pipe_frame(frame: BashPipeFrame, state: &mut BashOutputState) -> Result<()> {
6253    match frame {
6254        BashPipeFrame::Chunk(chunk) => ingest_bash_chunk(chunk, state).await,
6255        BashPipeFrame::Error(message) => {
6256            let error_message = bash_capture_error_message(&message, state);
6257            state.abandon_spill_file();
6258            Err(Error::tool("bash", error_message))
6259        }
6260    }
6261}
6262
6263fn bash_capture_error_message(message: &str, state: &BashOutputState) -> String {
6264    let raw = concat_chunks(&state.chunks);
6265    if raw.is_empty() {
6266        return message.to_string();
6267    }
6268
6269    let full_text = String::from_utf8_lossy(&raw).into_owned();
6270    let truncation = truncate_tail(full_text, DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES);
6271    let mut error_message = message.to_string();
6272    let partial_output = if truncation.content.is_empty() {
6273        "(no output)".to_string()
6274    } else {
6275        truncation.content
6276    };
6277    let _ = write!(
6278        error_message,
6279        "\n\nPartial output before failure:\n{partial_output}"
6280    );
6281    if truncation.truncated || state.total_bytes > state.chunks_bytes {
6282        let _ = write!(
6283            error_message,
6284            "\n\n[Partial output truncated before failure]"
6285        );
6286    }
6287    error_message
6288}
6289
6290/// Read from a subprocess pipe until EOF while retaining only the first
6291/// `max_bytes + 1` bytes in memory so callers can detect truncation without
6292/// changing child-process behavior by closing the pipe early.
6293pub(crate) fn read_to_end_capped_and_drain<R: Read>(
6294    mut reader: R,
6295    max_bytes: u64,
6296) -> std::result::Result<Vec<u8>, String> {
6297    let capture_limit = usize::try_from(max_bytes.saturating_add(1)).unwrap_or(usize::MAX);
6298    let mut captured = Vec::with_capacity(capture_limit.min(8192));
6299    let mut chunk = [0u8; 8192];
6300
6301    loop {
6302        match reader.read(&mut chunk) {
6303            Ok(0) => break,
6304            Ok(read) => {
6305                let remaining = capture_limit.saturating_sub(captured.len());
6306                if remaining > 0 {
6307                    let keep = remaining.min(read);
6308                    captured.extend_from_slice(&chunk[..keep]);
6309                }
6310            }
6311            Err(err) if matches!(err.kind(), std::io::ErrorKind::Interrupted) => {}
6312            Err(err) => return Err(err.to_string()),
6313        }
6314    }
6315
6316    Ok(captured)
6317}
6318
6319// Keep `rx` as `&mut Receiver`: `std::sync::mpsc::Receiver` is `Send` but not
6320// `Sync`, and this helper awaits between polls, so `&Receiver` would make the
6321// surrounding future non-Send.
6322#[allow(clippy::needless_pass_by_ref_mut)]
6323#[cfg(test)]
6324async fn drain_bash_output(
6325    rx: &mut mpsc::Receiver<BashPipeFrame>,
6326    bash_output: &mut BashOutputState,
6327    cx: &AgentCx,
6328    drain_deadline: asupersync::Time,
6329    tick: Duration,
6330    allow_cancellation: bool,
6331) -> Result<bool> {
6332    loop {
6333        match rx.try_recv() {
6334            Ok(frame) => ingest_bash_pipe_frame(frame, bash_output).await?,
6335            Err(mpsc::TryRecvError::Empty) => {
6336                let now = cx
6337                    .cx()
6338                    .timer_driver()
6339                    .map_or_else(wall_now, |timer| timer.now());
6340                if now >= drain_deadline {
6341                    return Ok(false);
6342                }
6343                if allow_cancellation && cx.checkpoint().is_err() {
6344                    return Ok(true);
6345                }
6346                sleep(now, tick).await;
6347            }
6348            Err(mpsc::TryRecvError::Disconnected) => return Ok(false),
6349        }
6350    }
6351}
6352
6353fn concat_chunks(chunks: &VecDeque<Vec<u8>>) -> Vec<u8> {
6354    let total: usize = chunks.iter().map(Vec::len).sum();
6355    let mut out = Vec::with_capacity(total);
6356    for chunk in chunks {
6357        out.extend_from_slice(chunk);
6358    }
6359    out
6360}
6361
6362struct BashOutputState {
6363    total_bytes: usize,
6364    line_count: usize,
6365    last_byte_was_newline: bool,
6366    start_time: std::time::Instant,
6367    timeout_ms: Option<u64>,
6368    temp_file_path: Option<PathBuf>,
6369    temp_file: Option<asupersync::fs::File>,
6370    chunks: VecDeque<Vec<u8>>,
6371    chunks_bytes: usize,
6372    max_chunks_bytes: usize,
6373    spill_failed: bool,
6374}
6375
6376impl BashOutputState {
6377    fn new(max_chunks_bytes: usize) -> Self {
6378        Self {
6379            total_bytes: 0,
6380            line_count: 0,
6381            last_byte_was_newline: false,
6382            start_time: std::time::Instant::now(),
6383            timeout_ms: None,
6384            temp_file_path: None,
6385            temp_file: None,
6386            chunks: VecDeque::new(),
6387            chunks_bytes: 0,
6388            max_chunks_bytes,
6389            spill_failed: false,
6390        }
6391    }
6392
6393    fn abandon_spill_file(&mut self) {
6394        self.spill_failed = true;
6395        self.temp_file = None;
6396        if let Some(path) = self.temp_file_path.take() {
6397            if let Err(e) = std::fs::remove_file(&path)
6398                && e.kind() != std::io::ErrorKind::NotFound
6399            {
6400                tracing::debug!(
6401                    "Failed to remove incomplete bash spill file {}: {}",
6402                    path.display(),
6403                    e
6404                );
6405            }
6406        }
6407    }
6408}
6409
6410#[allow(clippy::too_many_lines)]
6411async fn ingest_bash_chunk(chunk: Vec<u8>, state: &mut BashOutputState) -> Result<()> {
6412    if chunk.is_empty() {
6413        return Ok(());
6414    }
6415
6416    state.last_byte_was_newline = chunk.last().is_some_and(|byte| *byte == b'\n');
6417    state.total_bytes = state.total_bytes.saturating_add(chunk.len());
6418    state.line_count = state
6419        .line_count
6420        .saturating_add(memchr::memchr_iter(b'\n', &chunk).count());
6421
6422    if state.total_bytes > DEFAULT_MAX_BYTES
6423        && state.temp_file.is_none()
6424        && state.temp_file_path.is_none()
6425        && !state.spill_failed
6426    {
6427        let id_full = Uuid::new_v4().simple().to_string();
6428        let id = &id_full[..16];
6429        let path = std::env::temp_dir().join(format!("pi-bash-{id}.log"));
6430
6431        // Create the file synchronously with restricted permissions to avoid
6432        // a race condition where the file is world-readable before we fix it.
6433        // We also capture the inode (on Unix) to verify identity later.
6434        let path_clone = path.clone();
6435        let expected_inode: Option<u64> =
6436            asupersync::runtime::spawn_blocking_io(move || -> std::io::Result<Option<u64>> {
6437                let mut options = std::fs::OpenOptions::new();
6438                options.write(true).create_new(true);
6439
6440                #[cfg(unix)]
6441                {
6442                    use std::os::unix::fs::OpenOptionsExt;
6443                    options.mode(0o600);
6444                }
6445
6446                match options.open(&path_clone) {
6447                    Ok(file) => {
6448                        #[cfg(unix)]
6449                        {
6450                            use std::os::unix::fs::MetadataExt;
6451                            Ok(file.metadata().ok().map(|m| m.ino()))
6452                        }
6453                        #[cfg(not(unix))]
6454                        {
6455                            drop(file);
6456                            Ok(None)
6457                        }
6458                    }
6459                    Err(e) => {
6460                        tracing::warn!("Failed to create bash temp file: {e}");
6461                        Ok(None)
6462                    }
6463                }
6464            })
6465            .await
6466            .unwrap_or(None);
6467
6468        if expected_inode.is_some() || !cfg!(unix) {
6469            match asupersync::fs::OpenOptions::new()
6470                .append(true)
6471                .open(&path)
6472                .await
6473            {
6474                Ok(mut file) => {
6475                    #[cfg_attr(not(unix), allow(unused_mut))]
6476                    let mut identity_match = true;
6477                    #[cfg(unix)]
6478                    if let Some(expected) = expected_inode {
6479                        use std::os::unix::fs::MetadataExt;
6480                        match file.metadata().await {
6481                            Ok(meta) => {
6482                                if !meta.ino().eq(&expected) {
6483                                    tracing::warn!(
6484                                        "Temp file identity mismatch (possible TOCTOU attack)"
6485                                    );
6486                                    identity_match = false;
6487                                }
6488                            }
6489                            Err(e) => {
6490                                tracing::warn!("Failed to stat temp file: {e}");
6491                                identity_match = false;
6492                            }
6493                        }
6494                    }
6495
6496                    if identity_match {
6497                        // Write buffered chunks to file first so it contains output from the beginning.
6498                        let mut failed_flush = false;
6499                        for existing in &state.chunks {
6500                            if let Err(e) = file.write_all(existing).await {
6501                                tracing::warn!("Failed to flush bash chunk to temp file: {e}");
6502                                failed_flush = true;
6503                                break;
6504                            }
6505                        }
6506
6507                        state.temp_file_path = Some(path);
6508                        if failed_flush {
6509                            state.abandon_spill_file();
6510                        } else {
6511                            state.temp_file = Some(file);
6512                        }
6513                    } else {
6514                        state.temp_file_path = Some(path);
6515                        state.abandon_spill_file();
6516                    }
6517                }
6518                Err(e) => {
6519                    tracing::warn!("Failed to open temp file async: {e}");
6520                    state.temp_file_path = Some(path);
6521                    state.abandon_spill_file();
6522                }
6523            }
6524        } else {
6525            state.spill_failed = true;
6526        }
6527    }
6528
6529    let mut close_spill_file = false;
6530    if let Some(file) = state.temp_file.as_mut() {
6531        let mut abandon_spill_file = false;
6532        if state.total_bytes <= BASH_FILE_LIMIT_BYTES {
6533            if let Err(e) = file.write_all(&chunk).await {
6534                tracing::warn!("Failed to write bash chunk to temp file: {e}");
6535                abandon_spill_file = true;
6536            }
6537        } else {
6538            // Hard limit reached. Stop writing and close the file to release the FD.
6539            if !state.spill_failed {
6540                tracing::warn!("Bash output exceeded hard limit; stopping file log");
6541                close_spill_file = true;
6542            }
6543        }
6544        if abandon_spill_file {
6545            state.abandon_spill_file();
6546        }
6547    }
6548    if close_spill_file {
6549        state.temp_file = None;
6550    }
6551
6552    state.chunks_bytes = state.chunks_bytes.saturating_add(chunk.len());
6553    state.chunks.push_back(chunk);
6554    while state.chunks_bytes > state.max_chunks_bytes && state.chunks.len() > 1 {
6555        if let Some(front) = state.chunks.pop_front() {
6556            state.chunks_bytes = state.chunks_bytes.saturating_sub(front.len());
6557        }
6558    }
6559    Ok(())
6560}
6561
6562const fn line_count_from_newline_count(
6563    total_bytes: usize,
6564    newline_count: usize,
6565    last_byte_was_newline: bool,
6566) -> usize {
6567    if total_bytes == 0 {
6568        0
6569    } else if last_byte_was_newline {
6570        newline_count
6571    } else {
6572        newline_count.saturating_add(1)
6573    }
6574}
6575
6576fn emit_bash_update(
6577    state: &BashOutputState,
6578    on_update: Option<&(dyn Fn(ToolUpdate) + Send + Sync)>,
6579) -> Result<()> {
6580    if let Some(callback) = on_update {
6581        let raw = concat_chunks(&state.chunks);
6582        let full_text = String::from_utf8_lossy(&raw);
6583        let truncation =
6584            truncate_tail(full_text.into_owned(), DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES);
6585
6586        // Build the progress + details JSON using the json! macro instead of
6587        // manual Map::insert calls.  This eliminates 7+ String heap
6588        // allocations per update for the constant field-name keys
6589        // ("elapsedMs", "lineCount", …) that the manual path required.
6590        let elapsed_ms = state.start_time.elapsed().as_millis();
6591        let line_count = line_count_from_newline_count(
6592            state.total_bytes,
6593            state.line_count,
6594            state.last_byte_was_newline,
6595        );
6596        let mut details = serde_json::json!({
6597            "progress": {
6598                "elapsedMs": elapsed_ms,
6599                "lineCount": line_count,
6600                "byteCount": state.total_bytes
6601            }
6602        });
6603        let Some(details_map) = details.as_object_mut() else {
6604            return Ok(());
6605        };
6606
6607        if let Some(timeout) = state.timeout_ms {
6608            if let Some(progress) = details_map
6609                .get_mut("progress")
6610                .and_then(|v| v.as_object_mut())
6611            {
6612                progress.insert("timeoutMs".into(), serde_json::json!(timeout));
6613            }
6614        }
6615        if truncation.truncated {
6616            details_map.insert("truncation".into(), serde_json::to_value(&truncation)?);
6617        }
6618        if let Some(path) = state.temp_file_path.as_ref() {
6619            details_map.insert(
6620                "fullOutputPath".into(),
6621                serde_json::Value::String(path.display().to_string()),
6622            );
6623        }
6624
6625        callback(ToolUpdate {
6626            content: vec![ContentBlock::Text(TextContent::new(truncation.content))],
6627            details: Some(details),
6628        });
6629    }
6630    Ok(())
6631}
6632
6633pub(crate) struct ProcessGuard {
6634    child: Option<std::process::Child>,
6635    cleanup_mode: ProcessCleanupMode,
6636}
6637
6638#[derive(Clone, Copy, Debug, Eq, PartialEq)]
6639pub(crate) enum ProcessCleanupMode {
6640    ChildOnly,
6641    ProcessGroupTree,
6642}
6643
6644impl ProcessGuard {
6645    pub(crate) const fn new(child: std::process::Child, cleanup_mode: ProcessCleanupMode) -> Self {
6646        Self {
6647            child: Some(child),
6648            cleanup_mode,
6649        }
6650    }
6651
6652    pub(crate) fn try_wait_child(&mut self) -> std::io::Result<Option<std::process::ExitStatus>> {
6653        self.child
6654            .as_mut()
6655            .map_or(Ok(None), std::process::Child::try_wait)
6656    }
6657
6658    pub(crate) fn kill(&mut self) -> Option<std::process::ExitStatus> {
6659        if let Some(mut child) = self.child.take() {
6660            cleanup_child(Some(child.id()), self.cleanup_mode);
6661            let _ = child.kill();
6662            std::thread::spawn(move || {
6663                let _ = child.wait();
6664            });
6665            // We cannot return the exit status synchronously without blocking,
6666            // so we return None to indicate the process was forcefully killed.
6667            return None;
6668        }
6669        None
6670    }
6671
6672    pub(crate) fn wait(&mut self) -> std::io::Result<std::process::ExitStatus> {
6673        if let Some(mut child) = self.child.take() {
6674            return child.wait();
6675        }
6676        Err(std::io::Error::other("Already waited"))
6677    }
6678}
6679
6680impl Drop for ProcessGuard {
6681    fn drop(&mut self) {
6682        if let Some(mut child) = self.child.take() {
6683            match child.try_wait() {
6684                Ok(None) => {}
6685                Ok(Some(_)) | Err(_) => return,
6686            }
6687            let cleanup_mode = self.cleanup_mode;
6688            std::thread::spawn(move || {
6689                cleanup_child(Some(child.id()), cleanup_mode);
6690                let _ = child.kill();
6691                let _ = child.wait();
6692            });
6693        }
6694    }
6695}
6696
6697fn cleanup_child(pid: Option<u32>, cleanup_mode: ProcessCleanupMode) {
6698    if cleanup_mode == ProcessCleanupMode::ProcessGroupTree {
6699        kill_process_group_tree(pid);
6700    }
6701}
6702
6703pub fn kill_process_tree(pid: Option<u32>) {
6704    kill_process_tree_with(pid, sysinfo::Signal::Kill, false);
6705}
6706
6707pub(crate) fn kill_process_group_tree(pid: Option<u32>) {
6708    kill_process_tree_with(pid, sysinfo::Signal::Kill, true);
6709}
6710
6711fn terminate_process_group_tree(pid: Option<u32>) {
6712    kill_process_tree_with(pid, sysinfo::Signal::Term, true);
6713}
6714
6715fn kill_process_tree_with(pid: Option<u32>, signal: sysinfo::Signal, include_process_group: bool) {
6716    let Some(pid) = pid else {
6717        return;
6718    };
6719
6720    let root = sysinfo::Pid::from_u32(pid);
6721
6722    let mut sys = sysinfo::System::new();
6723    sys.refresh_processes(sysinfo::ProcessesToUpdate::All, true);
6724
6725    let mut children_map: HashMap<sysinfo::Pid, Vec<sysinfo::Pid>> = HashMap::new();
6726    for (p, proc_) in sys.processes() {
6727        if let Some(parent) = proc_.parent() {
6728            children_map.entry(parent).or_default().push(*p);
6729        }
6730    }
6731
6732    let mut to_kill = Vec::new();
6733    let mut visited = std::collections::HashSet::new();
6734    collect_process_tree(root, &children_map, &mut to_kill, &mut visited);
6735
6736    if include_process_group {
6737        // Some subprocess surfaces isolate the child into its own process group.
6738        // When they do, killing the group first catches background children even
6739        // if they have already been reparented away from the original root PID.
6740        #[cfg(unix)]
6741        {
6742            let sig_num = match signal {
6743                sysinfo::Signal::Kill => "9",
6744                _ => "15",
6745            };
6746            let _ = Command::new("kill")
6747                .arg(format!("-{sig_num}"))
6748                .arg("--")
6749                .arg(format!("-{pid}"))
6750                .stdin(Stdio::null())
6751                .stdout(Stdio::null())
6752                .stderr(Stdio::null())
6753                .status();
6754        }
6755    }
6756
6757    // Kill children first.
6758    for pid in to_kill.into_iter().rev() {
6759        if let Some(proc_) = sys.process(pid) {
6760            match proc_.kill_with(signal) {
6761                Some(true) => {}
6762                Some(false) | None => {
6763                    let _ = proc_.kill();
6764                }
6765            }
6766        }
6767    }
6768}
6769
6770fn collect_process_tree(
6771    pid: sysinfo::Pid,
6772    children_map: &HashMap<sysinfo::Pid, Vec<sysinfo::Pid>>,
6773    out: &mut Vec<sysinfo::Pid>,
6774    visited: &mut std::collections::HashSet<sysinfo::Pid>,
6775) {
6776    if !visited.insert(pid) {
6777        return;
6778    }
6779    out.push(pid);
6780    if let Some(children) = children_map.get(&pid) {
6781        for child in children {
6782            collect_process_tree(*child, children_map, out, visited);
6783        }
6784    }
6785}
6786
6787/// Build a child command whose Unix process image starts with SIGPIPE restored
6788/// to the platform default, without using `Command::pre_exec`.
6789///
6790/// Rust binaries ignore SIGPIPE by default, and POSIX inherits that disposition
6791/// across `exec(2)`. The tiny `/bin/sh` trampoline resets PIPE and then `exec`s
6792/// the requested program, preserving argv, cwd, stdio, and the process id that
6793/// later becomes the isolated process-group leader.
6794pub(crate) const SIGPIPE_TRAMPOLINE_EXEC_FAILURE_PREFIX: &str = "pi-sigpipe-reset: exec failed:";
6795
6796pub(crate) fn command_with_default_sigpipe(program: impl AsRef<OsStr>) -> std::io::Result<Command> {
6797    command_with_default_sigpipe_for_cwd(program.as_ref(), None)
6798}
6799
6800/// Variant of [`command_with_default_sigpipe`] for commands that will run with
6801/// `current_dir(cwd)`. This preserves relative `./program` lookup semantics.
6802pub(crate) fn command_with_default_sigpipe_in_dir(
6803    program: impl AsRef<OsStr>,
6804    cwd: &Path,
6805) -> std::io::Result<Command> {
6806    command_with_default_sigpipe_for_cwd(program.as_ref(), Some(cwd))
6807}
6808
6809#[cfg(unix)]
6810fn command_with_default_sigpipe_for_cwd(
6811    program: &OsStr,
6812    cwd: Option<&Path>,
6813) -> std::io::Result<Command> {
6814    let program = resolve_executable_for_shell_trampoline(program, cwd)?;
6815    let mut command = Command::new("/bin/sh");
6816    command
6817        .arg("-c")
6818        .arg(
6819            "trap - PIPE\n\
6820             exec \"$@\"\n\
6821             status=$?\n\
6822             printf 'pi-sigpipe-reset: exec failed: %s\\n' \"$1\" >&2\n\
6823             exit \"$status\"",
6824        )
6825        .arg("pi-sigpipe-reset")
6826        .arg(program);
6827    Ok(command)
6828}
6829
6830#[cfg(not(unix))]
6831fn command_with_default_sigpipe_for_cwd(
6832    program: &OsStr,
6833    _cwd: Option<&Path>,
6834) -> std::io::Result<Command> {
6835    let command = Command::new(program); // ubs:ignore policy-checked non-Unix command runner
6836    Ok(command)
6837}
6838
6839#[cfg(unix)]
6840fn resolve_executable_for_shell_trampoline(
6841    program: &OsStr,
6842    cwd: Option<&Path>,
6843) -> std::io::Result<OsString> {
6844    use std::os::unix::ffi::OsStrExt as _;
6845    use std::os::unix::fs::PermissionsExt as _;
6846
6847    fn executable_candidate(path: &Path) -> std::io::Result<bool> {
6848        let metadata = std::fs::metadata(path)?;
6849        Ok(metadata.is_file() && metadata.permissions().mode() & 0o111 != 0)
6850    }
6851
6852    fn absolutize_candidate(path: &Path, cwd: Option<&Path>) -> std::io::Result<PathBuf> {
6853        if path.is_absolute() {
6854            return Ok(path.to_path_buf());
6855        }
6856
6857        let base = std::env::current_dir()?;
6858        Ok(cwd.map_or_else(|| base.join(path), |cwd| base.join(cwd).join(path)))
6859    }
6860
6861    if program.as_bytes().contains(&b'/') {
6862        let path = Path::new(program);
6863        let candidate = absolutize_candidate(path, cwd)?;
6864        if executable_candidate(&candidate)? {
6865            return Ok(candidate.into_os_string());
6866        }
6867        return Err(std::io::Error::new(
6868            std::io::ErrorKind::PermissionDenied,
6869            format!("not an executable file: {}", candidate.display()),
6870        ));
6871    }
6872
6873    let mut permission_denied = false;
6874    let paths = std::env::var_os("PATH").unwrap_or_else(|| OsString::from("/bin:/usr/bin"));
6875    for dir in std::env::split_paths(&paths) {
6876        let candidate = absolutize_candidate(&dir.join(program), cwd)?;
6877        match executable_candidate(&candidate) {
6878            Ok(true) => return Ok(candidate.into_os_string()),
6879            Ok(false) => permission_denied = true,
6880            Err(err) if matches!(err.kind(), std::io::ErrorKind::NotFound) => {}
6881            Err(err) if matches!(err.kind(), std::io::ErrorKind::PermissionDenied) => {
6882                permission_denied = true;
6883            }
6884            Err(_) => {}
6885        }
6886    }
6887
6888    if permission_denied {
6889        Err(std::io::Error::new(
6890            std::io::ErrorKind::PermissionDenied,
6891            format!("command is not executable: {}", program.to_string_lossy()),
6892        ))
6893    } else {
6894        Err(std::io::Error::new(
6895            std::io::ErrorKind::NotFound,
6896            format!("command not found: {}", program.to_string_lossy()),
6897        ))
6898    }
6899}
6900
6901/// Detach a child process from pi's controlling terminal.
6902pub(crate) fn isolate_command_process_group(command: &mut Command) {
6903    #[cfg(unix)]
6904    {
6905        use std::os::unix::process::CommandExt as _;
6906        command.process_group(0);
6907    }
6908
6909    #[cfg(not(unix))]
6910    {
6911        let _ = command;
6912    }
6913}
6914
6915fn format_grep_path(file_path: &Path, cwd: &Path) -> String {
6916    if let Ok(rel) = file_path.strip_prefix(cwd) {
6917        let rel_str = rel.display().to_string().replace('\\', "/");
6918        if !rel_str.is_empty() {
6919            return rel_str;
6920        }
6921    }
6922
6923    let canonical_file = safe_canonicalize(file_path);
6924    let canonical_cwd = safe_canonicalize(cwd);
6925    if let Ok(rel) = canonical_file.strip_prefix(&canonical_cwd) {
6926        let rel_str = rel.display().to_string().replace('\\', "/");
6927        if !rel_str.is_empty() {
6928            return rel_str;
6929        }
6930    }
6931
6932    file_path.display().to_string().replace('\\', "/")
6933}
6934
6935async fn get_file_lines_async<'a>(
6936    path: &Path,
6937    cache: &'a mut HashMap<PathBuf, Vec<String>>,
6938) -> &'a [String] {
6939    if !cache.contains_key(path) {
6940        // Prevent OOM on huge files and hangs on pipes
6941        if let Ok(meta) = asupersync::fs::metadata(path).await {
6942            if !meta.is_file() || meta.len() > 10 * 1024 * 1024 {
6943                cache.insert(path.to_path_buf(), Vec::new());
6944                return &[];
6945            }
6946        } else {
6947            cache.insert(path.to_path_buf(), Vec::new());
6948            return &[];
6949        }
6950
6951        // Match Node's `readFileSync(..., "utf-8")` behavior: decode lossily rather than failing.
6952        let bytes = match asupersync::fs::read(path).await {
6953            Ok(bytes) => bytes,
6954            Err(err) => {
6955                tracing::debug!("Failed to read grep file {}: {err}", path.display());
6956                cache.insert(path.to_path_buf(), Vec::new());
6957                return &[];
6958            }
6959        };
6960        let content = String::from_utf8_lossy(&bytes);
6961        let mut lines = Vec::new();
6962        for line in content.split('\n') {
6963            let trimmed = line.strip_suffix('\r').unwrap_or(line);
6964            for piece in trimmed.split('\r') {
6965                lines.push(piece.to_string());
6966            }
6967        }
6968        if content.ends_with('\n') && lines.last().is_some_and(std::string::String::is_empty) {
6969            lines.pop();
6970        }
6971        cache.insert(path.to_path_buf(), lines);
6972    }
6973    if let Some(lines) = cache.get(path) {
6974        lines.as_slice()
6975    } else {
6976        &[]
6977    }
6978}
6979
6980fn find_fd_binary() -> Option<&'static str> {
6981    static BINARY: OnceLock<Option<&'static str>> = OnceLock::new();
6982    *BINARY.get_or_init(|| {
6983        if std::process::Command::new("fd")
6984            .arg("--version")
6985            .stdout(Stdio::null())
6986            .stderr(Stdio::null())
6987            .status()
6988            .is_ok()
6989        {
6990            return Some("fd");
6991        }
6992        if std::process::Command::new("fdfind")
6993            .arg("--version")
6994            .stdout(Stdio::null())
6995            .stderr(Stdio::null())
6996            .status()
6997            .is_ok()
6998        {
6999            return Some("fdfind");
7000        }
7001        None
7002    })
7003}
7004
7005fn find_rg_binary() -> Option<&'static str> {
7006    static BINARY: OnceLock<Option<&'static str>> = OnceLock::new();
7007    *BINARY.get_or_init(|| {
7008        if std::process::Command::new("rg")
7009            .arg("--version")
7010            .stdout(Stdio::null())
7011            .stderr(Stdio::null())
7012            .status()
7013            .is_ok()
7014        {
7015            return Some("rg");
7016        }
7017        if std::process::Command::new("ripgrep")
7018            .arg("--version")
7019            .stdout(Stdio::null())
7020            .stderr(Stdio::null())
7021            .status()
7022            .is_ok()
7023        {
7024            return Some("ripgrep");
7025        }
7026        None
7027    })
7028}
7029
7030// ============================================================================
7031// Hashline Edit Tool
7032// ============================================================================
7033
7034/// Custom nibble-encoding alphabet used for hashline tags.
7035const NIBBLE_STR: &[u8; 16] = b"ZPMQVRWSNKTXJBYH";
7036
7037/// Pre-computed 256-entry lookup table mapping each byte value to its
7038/// 2-character NIBBLE_STR encoding.
7039static HASHLINE_DICT: OnceLock<[[u8; 2]; 256]> = OnceLock::new();
7040
7041fn hashline_dict() -> &'static [[u8; 2]; 256] {
7042    HASHLINE_DICT.get_or_init(|| {
7043        let mut dict = [[0u8; 2]; 256];
7044        for i in 0..256 {
7045            dict[i] = [NIBBLE_STR[i & 0x0F], NIBBLE_STR[(i >> 4) & 0x0F]];
7046        }
7047        dict
7048    })
7049}
7050
7051/// Compute a 2-character hash tag for a line at the given 0-indexed position.
7052///
7053/// The algorithm:
7054/// 1. Strip trailing `\r`
7055/// 2. Remove all whitespace to get a "significant" string
7056/// 3. If the significant string contains at least one letter or digit, seed = 0;
7057///    otherwise seed = line index (to disambiguate punctuation-only or blank lines)
7058/// 4. Compute `xxh32(significant_bytes, seed) & 0xFF`
7059/// 5. Encode the low byte as 2 nibble chars from `NIBBLE_STR`
7060fn compute_line_hash(line_idx: usize, line: &str) -> [u8; 2] {
7061    let line = line.strip_suffix('\r').unwrap_or(line);
7062    // Remove all whitespace
7063    let significant: String = line.chars().filter(|c| !c.is_whitespace()).collect();
7064    let has_alnum = significant.chars().any(char::is_alphanumeric);
7065    let seed = if has_alnum {
7066        0
7067    } else {
7068        #[allow(clippy::cast_possible_truncation)]
7069        let s = line_idx as u32;
7070        s
7071    };
7072    let hash = xxhash_rust::xxh32::xxh32(significant.as_bytes(), seed);
7073    let byte = (hash & 0xFF) as usize;
7074    hashline_dict()[byte]
7075}
7076
7077/// Format a hashline tag as `"N#AB"` where N is the 1-indexed line number.
7078fn format_hashline_tag(line_idx: usize, line: &str) -> String {
7079    let h = compute_line_hash(line_idx, line);
7080    format!("{}#{}{}", line_idx + 1, h[0] as char, h[1] as char)
7081}
7082
7083/// Compute a hashline tag, reapplying a stripped BOM for the first line if needed.
7084fn format_hashline_tag_with_bom(line_idx: usize, line: &str, had_bom: bool) -> String {
7085    let h = compute_line_hash_with_bom(line_idx, line, had_bom);
7086    format!("{}#{}{}", line_idx + 1, h[0] as char, h[1] as char)
7087}
7088
7089fn compute_line_hash_with_bom(line_idx: usize, line: &str, had_bom: bool) -> [u8; 2] {
7090    if had_bom && line_idx == 0 {
7091        let mut with_bom = String::with_capacity(line.len().saturating_add(1));
7092        with_bom.push('\u{FEFF}');
7093        with_bom.push_str(line);
7094        compute_line_hash(line_idx, &with_bom)
7095    } else {
7096        compute_line_hash(line_idx, line)
7097    }
7098}
7099
7100/// Regex for parsing hashline references like `5#KJ` or ` > +  5 # KJ `.
7101/// Tolerates leading whitespace, diff markers (`>`, `+`, `-`), and spaces around `#`.
7102static HASHLINE_TAG_RE: OnceLock<regex::Regex> = OnceLock::new();
7103
7104fn hashline_tag_regex() -> &'static regex::Regex {
7105    HASHLINE_TAG_RE.get_or_init(|| {
7106        regex::Regex::new(r"^[\s>+\-]*(\d+)\s*#\s*([ZPMQVRWSNKTXJBYH]{2})")
7107            .expect("valid hashline regex")
7108    })
7109}
7110
7111/// Parse a hashline tag reference string into (1-indexed line number, 2-byte hash).
7112fn parse_hashline_tag(ref_str: &str) -> std::result::Result<(usize, [u8; 2]), String> {
7113    let re = hashline_tag_regex();
7114    let caps = re
7115        .captures(ref_str)
7116        .ok_or_else(|| format!("Invalid hashline reference: {ref_str:?}"))?;
7117    let line_num: usize = caps[1]
7118        .parse()
7119        .map_err(|e| format!("Invalid line number in {ref_str:?}: {e}"))?;
7120    if line_num == 0 {
7121        return Err(format!("Line number must be >= 1, got 0 in {ref_str:?}"));
7122    }
7123    let hash_bytes = caps[2].as_bytes();
7124    Ok((line_num, [hash_bytes[0], hash_bytes[1]]))
7125}
7126
7127/// Strip hashline tag prefixes that models sometimes copy into replacement content.
7128/// Matches patterns like `5#KJ:content` and returns just `content`.
7129static HASHLINE_PREFIX_RE: OnceLock<regex::Regex> = OnceLock::new();
7130
7131fn strip_hashline_prefix(line: &str) -> &str {
7132    let re = HASHLINE_PREFIX_RE.get_or_init(|| {
7133        regex::Regex::new(r"^[\s>+\-]*\d+\s*#\s*[ZPMQVRWSNKTXJBYH]{2}\s*:")
7134            .expect("valid hashline prefix regex")
7135    });
7136    re.find(line).map_or(line, |m| &line[m.end()..])
7137}
7138
7139/// Input parameters for the hashline edit tool.
7140#[derive(Debug, Deserialize)]
7141#[serde(rename_all = "camelCase")]
7142struct HashlineEditInput {
7143    path: String,
7144    edits: Vec<HashlineOp>,
7145}
7146
7147/// A single hashline edit operation.
7148#[derive(Debug, Clone, Deserialize)]
7149#[serde(rename_all = "camelCase")]
7150struct HashlineOp {
7151    /// Operation type: "replace", "prepend", or "append"
7152    op: String,
7153    /// Start anchor in "LINE#HASH" format (optional for BOF prepend / EOF append)
7154    pos: Option<String>,
7155    /// End anchor for range replace (inclusive)
7156    end: Option<String>,
7157    /// Replacement / insertion lines
7158    lines: Option<serde_json::Value>,
7159}
7160
7161impl HashlineOp {
7162    /// Extract lines from the `lines` field, handling string, array, and null variants.
7163    fn get_lines(&self) -> Vec<String> {
7164        match &self.lines {
7165            None | Some(serde_json::Value::Null) => vec![],
7166            Some(serde_json::Value::String(s)) => {
7167                normalize_to_lf(s).split('\n').map(String::from).collect()
7168            }
7169            Some(serde_json::Value::Array(arr)) => arr
7170                .iter()
7171                .map(|v| match v {
7172                    serde_json::Value::String(s) => normalize_to_lf(s),
7173                    other => normalize_to_lf(&other.to_string()),
7174                })
7175                .collect(),
7176            Some(other) => vec![normalize_to_lf(&other.to_string())],
7177        }
7178    }
7179}
7180
7181/// A resolved hashline edit operation ready for application.
7182struct ResolvedEdit<'a> {
7183    op: &'a str,
7184    /// 0-indexed start line (or 0 for BOF, `file_lines.len()` for EOF)
7185    start: usize,
7186    /// 0-indexed end line (inclusive, same as start for single-line ops)
7187    end: usize,
7188    lines: Vec<String>,
7189}
7190
7191pub struct HashlineEditTool {
7192    cwd: PathBuf,
7193}
7194
7195impl HashlineEditTool {
7196    pub fn new(cwd: &Path) -> Self {
7197        Self {
7198            cwd: cwd.to_path_buf(),
7199        }
7200    }
7201}
7202
7203/// Validate a hashline tag reference against actual file lines.
7204/// Returns `Ok(0-indexed line)` or `Err(message)` with context.
7205fn validate_line_ref(
7206    ref_str: &str,
7207    file_lines: &[&str],
7208    had_bom: bool,
7209) -> std::result::Result<usize, String> {
7210    let (line_num, expected_hash) = parse_hashline_tag(ref_str)?;
7211    let line_idx = line_num - 1;
7212    if line_idx >= file_lines.len() {
7213        return Err(format!(
7214            "Line {line_num} out of range (file has {} lines)",
7215            file_lines.len()
7216        ));
7217    }
7218    let actual_hash = compute_line_hash_with_bom(line_idx, file_lines[line_idx], had_bom);
7219    if actual_hash != expected_hash {
7220        let tag = format_hashline_tag_with_bom(line_idx, file_lines[line_idx], had_bom);
7221        return Err(format!(
7222            "Hash mismatch at line {line_num}: expected {}#{}{}, actual is {tag}",
7223            line_num, expected_hash[0] as char, expected_hash[1] as char,
7224        ));
7225    }
7226    Ok(line_idx)
7227}
7228
7229/// Build a context snippet around a mismatched line for error reporting.
7230fn mismatch_context(file_lines: &[&str], line_idx: usize, context: usize, had_bom: bool) -> String {
7231    let start = line_idx.saturating_sub(context);
7232    let end = (line_idx + context + 1).min(file_lines.len());
7233    let mut out = String::new();
7234    for (i, &file_line) in file_lines.iter().enumerate().take(end).skip(start) {
7235        let tag = format_hashline_tag_with_bom(i, file_line, had_bom);
7236        if i == line_idx {
7237            let _ = writeln!(out, ">>> {tag}:{file_line}");
7238        } else {
7239            let _ = writeln!(out, "    {tag}:{file_line}");
7240        }
7241    }
7242    out
7243}
7244
7245/// Collect all hash mismatches from a set of edits, returning a combined error message.
7246fn collect_mismatches(
7247    edits: &[HashlineOp],
7248    file_lines: &[&str],
7249    had_bom: bool,
7250) -> std::result::Result<(), String> {
7251    let mut errors = Vec::new();
7252    for edit in edits {
7253        if let Some(ref pos) = edit.pos {
7254            if let Err(e) = validate_line_ref(pos, file_lines, had_bom) {
7255                // Find the line index for context
7256                if let Ok((line_num, _)) = parse_hashline_tag(pos) {
7257                    let idx = (line_num - 1).min(file_lines.len().saturating_sub(1));
7258                    errors.push(format!(
7259                        "{e}\n{}",
7260                        mismatch_context(file_lines, idx, 2, had_bom)
7261                    ));
7262                } else {
7263                    errors.push(e);
7264                }
7265            }
7266        }
7267        if let Some(ref end) = edit.end {
7268            if let Err(e) = validate_line_ref(end, file_lines, had_bom) {
7269                if let Ok((line_num, _)) = parse_hashline_tag(end) {
7270                    let idx = (line_num - 1).min(file_lines.len().saturating_sub(1));
7271                    errors.push(format!(
7272                        "{e}\n{}",
7273                        mismatch_context(file_lines, idx, 2, had_bom)
7274                    ));
7275                } else {
7276                    errors.push(e);
7277                }
7278            }
7279        }
7280    }
7281    if errors.is_empty() {
7282        Ok(())
7283    } else {
7284        Err(errors.join("\n"))
7285    }
7286}
7287
7288/// Normalized representation of an edit for deduplication.
7289#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7290struct NormalizedEdit {
7291    op: String,
7292    pos_line: Option<usize>,
7293    end_line: Option<usize>,
7294    lines: Vec<String>,
7295}
7296
7297/// Sort precedence for overlapping edits at the same line.
7298fn op_precedence(op: &str) -> u8 {
7299    match op {
7300        "replace" => 0,
7301        "append" => 1,
7302        "prepend" => 2,
7303        _ => 3,
7304    }
7305}
7306
7307#[async_trait]
7308#[allow(clippy::unnecessary_literal_bound)]
7309impl Tool for HashlineEditTool {
7310    fn name(&self) -> &str {
7311        "hashline_edit"
7312    }
7313    fn label(&self) -> &str {
7314        "hashline edit"
7315    }
7316    fn description(&self) -> &str {
7317        "Apply precise file edits using LINE#HASH tags from a prior read with hashline=true. \
7318         Each edit specifies an op (replace/prepend/append), a pos anchor (\"N#AB\"), an optional \
7319         end anchor for range replace, and replacement lines. Edits are validated against current \
7320         file hashes and applied bottom-up to avoid index invalidation."
7321    }
7322
7323    fn parameters(&self) -> serde_json::Value {
7324        serde_json::json!({
7325            "type": "object",
7326            "properties": {
7327                "path": {
7328                    "type": "string",
7329                    "description": "Path to the file to edit (relative or absolute)"
7330                },
7331                "edits": {
7332                    "type": "array",
7333                    "description": "Array of edit operations to apply",
7334                    "items": {
7335                        "type": "object",
7336                        "properties": {
7337                            "op": {
7338                                "type": "string",
7339                                "enum": ["replace", "prepend", "append"],
7340                                "description": "Operation type"
7341                            },
7342                            "pos": {
7343                                "type": "string",
7344                                "description": "Anchor line reference in LINE#HASH format (e.g. \"5#KJ\")"
7345                            },
7346                            "end": {
7347                                "type": "string",
7348                                "description": "End anchor for range replace (inclusive)"
7349                            },
7350                            "lines": {
7351                                "description": "Replacement/insertion content as array of strings, single string, or null for deletion",
7352                                "oneOf": [
7353                                    { "type": "array", "items": { "type": "string" } },
7354                                    { "type": "string" },
7355                                    { "type": "null" }
7356                                ]
7357                            }
7358                        },
7359                        "required": ["op"]
7360                    }
7361                }
7362            },
7363            "required": ["path", "edits"]
7364        })
7365    }
7366
7367    #[allow(clippy::too_many_lines)]
7368    async fn execute(
7369        &self,
7370        _tool_call_id: &str,
7371        input: serde_json::Value,
7372        _on_update: Option<Box<dyn Fn(ToolUpdate) + Send + Sync>>,
7373    ) -> Result<ToolOutput> {
7374        let input: HashlineEditInput = serde_json::from_value(input)
7375            .map_err(|e| Error::tool("hashline_edit", format!("Invalid input: {e}")))?;
7376
7377        if input.edits.is_empty() {
7378            return Err(Error::tool("hashline_edit", "No edits provided"));
7379        }
7380
7381        // Resolve file path and enforce scope before touching the filesystem.
7382        let resolved = resolve_read_path(&input.path, &self.cwd);
7383        let absolute_path = enforce_cwd_scope(&resolved, &self.cwd, "hashline_edit")?;
7384
7385        // Check file size
7386        let metadata = asupersync::fs::metadata(&absolute_path)
7387            .await
7388            .map_err(|err| {
7389                let message = match err.kind() {
7390                    std::io::ErrorKind::NotFound => format!("File not found: {}", input.path),
7391                    std::io::ErrorKind::PermissionDenied => {
7392                        format!("Permission denied: {}", input.path)
7393                    }
7394                    _ => format!("Cannot read file metadata: {err}"),
7395                };
7396                Error::tool("hashline_edit", message)
7397            })?;
7398        if !metadata.is_file() {
7399            return Err(Error::tool(
7400                "hashline_edit",
7401                format!("Path {} is not a regular file", absolute_path.display()),
7402            ));
7403        }
7404        if metadata.len() > READ_TOOL_MAX_BYTES {
7405            return Err(Error::tool(
7406                "hashline_edit",
7407                format!(
7408                    "File too large ({} bytes, max {} bytes)",
7409                    metadata.len(),
7410                    READ_TOOL_MAX_BYTES
7411                ),
7412            ));
7413        }
7414
7415        // Read file content
7416        let file = asupersync::fs::File::open(&absolute_path)
7417            .await
7418            .map_err(|e| Error::tool("hashline_edit", format!("Cannot open file: {e}")))?;
7419        let mut raw = Vec::new();
7420        let mut limiter = file.take(READ_TOOL_MAX_BYTES.saturating_add(1));
7421        limiter
7422            .read_to_end(&mut raw)
7423            .await
7424            .map_err(|e| Error::tool("hashline_edit", format!("Cannot read file: {e}")))?;
7425
7426        if raw.len() as u64 > READ_TOOL_MAX_BYTES {
7427            return Err(Error::tool(
7428                "hashline_edit",
7429                format!("File too large (> {READ_TOOL_MAX_BYTES} bytes)"),
7430            ));
7431        }
7432
7433        let raw_content = String::from_utf8(raw).map_err(|_| {
7434            Error::tool(
7435                "hashline_edit",
7436                "File contains invalid UTF-8 characters and cannot be safely edited as text."
7437                    .to_string(),
7438            )
7439        })?;
7440
7441        let (content_no_bom, had_bom) = strip_bom(&raw_content);
7442        let original_ending = detect_line_ending(content_no_bom);
7443        let normalized = normalize_to_lf(content_no_bom);
7444        let file_lines: Vec<&str> = normalized.split('\n').collect();
7445
7446        // Validate all hash references before making any changes
7447        if let Err(e) = collect_mismatches(&input.edits, &file_lines, had_bom) {
7448            return Err(Error::tool(
7449                "hashline_edit",
7450                format!("Hash validation failed — re-read the file to get current tags.\n\n{e}"),
7451            ));
7452        }
7453
7454        // Deduplicate edits
7455        let mut seen = std::collections::HashSet::new();
7456        let mut deduped_edits: Vec<&HashlineOp> = Vec::new();
7457        for edit in &input.edits {
7458            let pos_line = edit
7459                .pos
7460                .as_ref()
7461                .and_then(|p| parse_hashline_tag(p).ok())
7462                .map(|(n, _)| n);
7463            let end_line = edit
7464                .end
7465                .as_ref()
7466                .and_then(|e| parse_hashline_tag(e).ok())
7467                .map(|(n, _)| n);
7468            let key = NormalizedEdit {
7469                op: edit.op.clone(),
7470                pos_line,
7471                end_line,
7472                lines: edit.get_lines(),
7473            };
7474            if seen.insert(key) {
7475                deduped_edits.push(edit);
7476            }
7477        }
7478
7479        // Resolve line indices and sort bottom-up
7480        let mut resolved: Vec<ResolvedEdit<'_>> = Vec::new();
7481        for edit in &deduped_edits {
7482            let replacement_lines: Vec<String> = edit
7483                .get_lines()
7484                .into_iter()
7485                .map(|l| strip_hashline_prefix(&l).to_string())
7486                .collect();
7487
7488            match edit.op.as_str() {
7489                "replace" => {
7490                    let start_idx = match &edit.pos {
7491                        Some(pos) => validate_line_ref(pos, &file_lines, had_bom)
7492                            .map_err(|e| Error::tool("hashline_edit", e))?,
7493                        None => {
7494                            return Err(Error::tool(
7495                                "hashline_edit",
7496                                "replace operation requires a pos anchor",
7497                            ));
7498                        }
7499                    };
7500                    let end_idx = match &edit.end {
7501                        Some(end) => validate_line_ref(end, &file_lines, had_bom)
7502                            .map_err(|e| Error::tool("hashline_edit", e))?,
7503                        None => start_idx,
7504                    };
7505                    if end_idx < start_idx {
7506                        return Err(Error::tool(
7507                            "hashline_edit",
7508                            format!(
7509                                "End anchor (line {}) is before start anchor (line {})",
7510                                end_idx + 1,
7511                                start_idx + 1
7512                            ),
7513                        ));
7514                    }
7515                    resolved.push(ResolvedEdit {
7516                        op: "replace",
7517                        start: start_idx,
7518                        end: end_idx,
7519                        lines: replacement_lines,
7520                    });
7521                }
7522                "prepend" => {
7523                    let idx = match &edit.pos {
7524                        Some(pos) => validate_line_ref(pos, &file_lines, had_bom)
7525                            .map_err(|e| Error::tool("hashline_edit", e))?,
7526                        None => 0, // BOF
7527                    };
7528                    let end_idx = if file_lines == [""] && edit.pos.is_none() {
7529                        0 // replace the empty line
7530                    } else {
7531                        idx
7532                    };
7533                    resolved.push(ResolvedEdit {
7534                        op: if file_lines == [""] && edit.pos.is_none() {
7535                            "replace"
7536                        } else {
7537                            "prepend"
7538                        },
7539                        start: idx,
7540                        end: end_idx,
7541                        lines: replacement_lines,
7542                    });
7543                }
7544                "append" => {
7545                    let idx = match &edit.pos {
7546                        Some(pos) => validate_line_ref(pos, &file_lines, had_bom)
7547                            .map_err(|e| Error::tool("hashline_edit", e))?,
7548                        None => {
7549                            if file_lines.len() > 1 && file_lines.last() == Some(&"") {
7550                                file_lines.len() - 2
7551                            } else {
7552                                file_lines.len().saturating_sub(1)
7553                            }
7554                        }
7555                    };
7556                    let end_idx = if file_lines == [""] && edit.pos.is_none() {
7557                        0 // replace the empty line
7558                    } else {
7559                        idx
7560                    };
7561                    resolved.push(ResolvedEdit {
7562                        op: if file_lines == [""] && edit.pos.is_none() {
7563                            "replace"
7564                        } else {
7565                            "append"
7566                        },
7567                        start: idx,
7568                        end: end_idx,
7569                        lines: replacement_lines,
7570                    });
7571                }
7572                other => {
7573                    return Err(Error::tool(
7574                        "hashline_edit",
7575                        format!("Unknown op: {other:?}. Must be replace, prepend, or append."),
7576                    ));
7577                }
7578            }
7579        }
7580
7581        // Sort bottom-up: highest line first, then by precedence (replace < append < prepend)
7582        resolved.sort_by(|a, b| {
7583            b.start
7584                .cmp(&a.start)
7585                .then_with(|| op_precedence(a.op).cmp(&op_precedence(b.op)))
7586        });
7587
7588        // Detect overlapping edit ranges (undefined behavior if applied bottom-up)
7589        for i in 0..resolved.len() {
7590            for j in (i + 1)..resolved.len() {
7591                let a = &resolved[i];
7592                let b = &resolved[j];
7593                if a.start <= b.end && b.start <= a.end {
7594                    return Err(Error::tool(
7595                        "hashline_edit",
7596                        format!(
7597                            "Overlapping edits detected: {} at line {}-{} and {} at line {}-{}. \
7598                             Please combine overlapping edits into a single operation.",
7599                            a.op,
7600                            a.start + 1,
7601                            a.end + 1,
7602                            b.op,
7603                            b.start + 1,
7604                            b.end + 1
7605                        ),
7606                    ));
7607                }
7608            }
7609        }
7610
7611        // Apply splices bottom-up on a mutable Vec of lines
7612        let mut lines: Vec<String> = file_lines.iter().map(|s| (*s).to_string()).collect();
7613        let mut any_change = false;
7614
7615        for edit in &resolved {
7616            match edit.op {
7617                "replace" => {
7618                    // Check if it's a no-op
7619                    let existing: Vec<&str> = lines[edit.start..=edit.end]
7620                        .iter()
7621                        .map(String::as_str)
7622                        .collect();
7623                    if existing.eq(&edit.lines.iter().map(String::as_str).collect::<Vec<&str>>()) {
7624                        continue; // no-op
7625                    }
7626                    // Splice: remove old range, insert new lines
7627                    lines.splice(edit.start..=edit.end, edit.lines.iter().cloned());
7628                    any_change = true;
7629                }
7630                "prepend" => {
7631                    // Insert before the target line
7632                    lines.splice(edit.start..edit.start, edit.lines.iter().cloned());
7633                    if !edit.lines.is_empty() {
7634                        any_change = true;
7635                    }
7636                }
7637                "append" => {
7638                    // Insert after the target line
7639                    let insert_at = edit.start + 1;
7640                    lines.splice(insert_at..insert_at, edit.lines.iter().cloned());
7641                    if !edit.lines.is_empty() {
7642                        any_change = true;
7643                    }
7644                }
7645                _ => {} // unreachable due to earlier validation
7646            }
7647        }
7648
7649        if !any_change {
7650            return Err(Error::tool(
7651                "hashline_edit",
7652                format!(
7653                    "No changes made to {}. All edits were no-ops (replacement identical to existing content).",
7654                    input.path
7655                ),
7656            ));
7657        }
7658
7659        // Reconstruct content
7660        let new_normalized = lines.join("\n");
7661        let new_content = restore_line_endings(&new_normalized, original_ending);
7662        let mut final_content = new_content;
7663        if had_bom {
7664            final_content = format!("\u{FEFF}{final_content}");
7665        }
7666
7667        // Atomic write (same pattern as EditTool)
7668        let absolute_path_clone = absolute_path.clone();
7669        let final_content_bytes = final_content.into_bytes();
7670        asupersync::runtime::spawn_blocking_io(move || {
7671            let original_perms = std::fs::metadata(&absolute_path_clone)
7672                .ok()
7673                .map(|m| m.permissions());
7674            let parent = absolute_path_clone
7675                .parent()
7676                .unwrap_or_else(|| Path::new("."));
7677            let mut temp_file = tempfile::NamedTempFile::new_in(parent)?;
7678
7679            temp_file.as_file_mut().write_all(&final_content_bytes)?;
7680            temp_file.as_file_mut().sync_all()?;
7681
7682            if let Some(perms) = original_perms {
7683                let _ = temp_file.as_file().set_permissions(perms);
7684            } else {
7685                #[cfg(unix)]
7686                {
7687                    use std::os::unix::fs::PermissionsExt;
7688                    let _ = temp_file
7689                        .as_file()
7690                        .set_permissions(std::fs::Permissions::from_mode(0o644));
7691                }
7692            }
7693
7694            temp_file
7695                .persist(&absolute_path_clone)
7696                .map_err(|e| e.error)?;
7697            Ok(())
7698        })
7699        .await
7700        .map_err(|e| Error::tool("hashline_edit", format!("Failed to write file: {e}")))?;
7701
7702        // Generate diff
7703        let (diff, first_changed_line) = generate_diff_string(&normalized, &new_normalized);
7704        let mut details = serde_json::Map::new();
7705        details.insert("diff".to_string(), serde_json::Value::String(diff));
7706        if let Some(line) = first_changed_line {
7707            details.insert(
7708                "firstChangedLine".to_string(),
7709                serde_json::Value::Number(serde_json::Number::from(line)),
7710            );
7711        }
7712
7713        Ok(ToolOutput {
7714            content: vec![ContentBlock::Text(TextContent::new(format!(
7715                "Successfully applied hashline edits to {}.",
7716                input.path
7717            )))],
7718            details: Some(serde_json::Value::Object(details)),
7719            is_error: false,
7720        })
7721    }
7722}
7723
7724// ============================================================================
7725// Tests
7726// ============================================================================
7727
7728#[cfg(test)]
7729mod tests {
7730    use super::*;
7731    use proptest::prelude::*;
7732    #[cfg(target_os = "linux")]
7733    use std::time::Duration;
7734
7735    #[test]
7736    fn test_truncate_head() {
7737        let content = "line1\nline2\nline3\nline4\nline5".to_string();
7738        let result = truncate_head(content, 3, 1000);
7739
7740        assert_eq!(result.content, "line1\nline2\nline3\n");
7741        assert!(result.truncated);
7742        assert_eq!(result.truncated_by, Some(TruncatedBy::Lines));
7743        assert_eq!(result.total_lines, 5);
7744        assert_eq!(result.output_lines, 3);
7745    }
7746
7747    #[test]
7748    fn test_truncate_tail() {
7749        let content = "line1\nline2\nline3\nline4\nline5".to_string();
7750        let result = truncate_tail(content, 3, 1000);
7751
7752        assert_eq!(result.content, "line3\nline4\nline5");
7753        assert!(result.truncated);
7754        assert_eq!(result.truncated_by, Some(TruncatedBy::Lines));
7755        assert_eq!(result.total_lines, 5);
7756        assert_eq!(result.output_lines, 3);
7757    }
7758
7759    fn assert_same_head_truncation(actual: &TruncationResult, expected: &TruncationResult) {
7760        assert_eq!(actual.content, expected.content);
7761        assert_eq!(actual.truncated, expected.truncated);
7762        assert_eq!(actual.truncated_by, expected.truncated_by);
7763        assert_eq!(actual.total_lines, expected.total_lines);
7764        assert_eq!(actual.total_bytes, expected.total_bytes);
7765        assert_eq!(actual.output_lines, expected.output_lines);
7766        assert_eq!(actual.output_bytes, expected.output_bytes);
7767        assert_eq!(actual.last_line_partial, expected.last_line_partial);
7768        assert_eq!(
7769            actual.first_line_exceeds_limit,
7770            expected.first_line_exceeds_limit
7771        );
7772        assert_eq!(actual.max_lines, expected.max_lines);
7773        assert_eq!(actual.max_bytes, expected.max_bytes);
7774    }
7775
7776    fn write_lines_with_builder(lines: &[&str], max_bytes: usize) -> TruncationResult {
7777        let mut writer = HeadTruncatingLineWriter::new(max_bytes);
7778        for line in lines {
7779            writer.push_line(line);
7780        }
7781        writer.finish()
7782    }
7783
7784    #[test]
7785    fn head_truncating_line_writer_matches_join_without_truncation() {
7786        let lines = ["alpha", "beta", "gamma"];
7787        let expected = truncate_head(lines.join("\n"), usize::MAX, 1000);
7788        let actual = write_lines_with_builder(&lines, 1000);
7789
7790        assert_same_head_truncation(&actual, &expected);
7791    }
7792
7793    #[test]
7794    fn head_truncating_line_writer_matches_join_at_byte_boundary() {
7795        let lines = ["alpha", "beta", "gamma"];
7796        let expected = truncate_head(lines.join("\n"), usize::MAX, 8);
7797        let actual = write_lines_with_builder(&lines, 8);
7798
7799        assert_same_head_truncation(&actual, &expected);
7800        assert_eq!(actual.content, "alpha\nbe");
7801    }
7802
7803    #[test]
7804    fn head_truncating_line_writer_preserves_utf8_boundary_and_order() {
7805        let lines = ["alpha", "βeta", "gamma"];
7806        let expected = truncate_head(lines.join("\n"), usize::MAX, 8);
7807        let actual = write_lines_with_builder(&lines, 8);
7808
7809        assert_same_head_truncation(&actual, &expected);
7810        assert_eq!(actual.content, "alpha\nβ");
7811    }
7812
7813    fn first_text(output: &ToolOutput) -> &str {
7814        output
7815            .content
7816            .first()
7817            .and_then(|block| match block {
7818                ContentBlock::Text(text) => Some(text.text.as_str()),
7819                _ => None,
7820            })
7821            .unwrap_or("")
7822    }
7823
7824    fn artifact_json(details: Option<&serde_json::Value>) -> &serde_json::Value {
7825        details
7826            .and_then(|value| value.get("artifact"))
7827            .expect("artifact details")
7828    }
7829
7830    fn artifact_str_field<'a>(artifact: &'a serde_json::Value, field: &str) -> &'a str {
7831        artifact
7832            .get(field)
7833            .and_then(serde_json::Value::as_str)
7834            .unwrap_or("")
7835    }
7836
7837    #[test]
7838    fn tool_output_artifact_respects_spill_threshold() {
7839        let tmp = tempfile::tempdir().expect("artifact root");
7840        let mut output = "small preview".to_string();
7841        let mut details = None;
7842        let spilled = attach_text_artifact_if_needed_at_root(
7843            tmp.path(),
7844            &mut output,
7845            &mut details,
7846            "read",
7847            "call-small",
7848            "selectedTextWindow",
7849            "small body",
7850        );
7851
7852        assert!(!spilled);
7853        assert_eq!(output, "small preview");
7854        assert!(details.is_none());
7855    }
7856
7857    #[test]
7858    fn tool_output_artifact_writes_content_addressed_text_and_metadata()
7859    -> std::result::Result<(), Box<dyn std::error::Error>> {
7860        let tmp = tempfile::tempdir().expect("artifact root");
7861        let full = "a".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES + 1);
7862        let mut output = "bounded preview".to_string();
7863        let mut details = None;
7864        let _session_guard =
7865            register_tool_output_artifact_session("call/text:1", "session/artifacts:one");
7866        let spilled = attach_text_artifact_if_needed_at_root(
7867            tmp.path(),
7868            &mut output,
7869            &mut details,
7870            "read",
7871            "call/text:1",
7872            "selectedTextWindow",
7873            &full,
7874        );
7875
7876        assert!(spilled);
7877        assert!(output.contains("Full tool output artifact:"));
7878        let artifact = artifact_json(details.as_ref());
7879        assert_eq!(artifact["schema"], TOOL_OUTPUT_ARTIFACT_SCHEMA_V1);
7880        assert_eq!(artifact["toolName"], "read");
7881        assert_eq!(artifact["sourceKind"], "selectedTextWindow");
7882        assert_eq!(artifact["sessionId"], "session/artifacts:one");
7883        assert_eq!(
7884            artifact["byteCount"].as_u64().unwrap(),
7885            u64::try_from(full.len()).unwrap()
7886        );
7887
7888        let path_value = artifact_str_field(artifact, "path");
7889        let metadata_path_value = artifact_str_field(artifact, "metadataPath");
7890        assert!(!path_value.is_empty(), "artifact path must be a string");
7891        assert!(
7892            !metadata_path_value.is_empty(),
7893            "artifact metadataPath must be a string"
7894        );
7895        let path = PathBuf::from(path_value);
7896        let metadata_path = PathBuf::from(metadata_path_value);
7897        assert!(path.starts_with(tmp.path().join("session_artifacts_one").join("call_text_1")));
7898        assert_eq!(std::fs::read_to_string(path)?, full);
7899        let metadata_bytes = std::fs::read(metadata_path)?;
7900        let metadata: serde_json::Value = serde_json::from_slice(&metadata_bytes)?;
7901        assert_eq!(metadata["sha256"], artifact["sha256"]);
7902        assert_eq!(
7903            metadata["retentionClass"],
7904            TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS
7905        );
7906        assert_eq!(
7907            metadata["spilloverReason"],
7908            TOOL_OUTPUT_ARTIFACT_SPILLOVER_REASON
7909        );
7910        assert_eq!(metadata["safeDeleteCandidate"], true);
7911        assert_eq!(
7912            metadata["redactionSummary"]["policy"],
7913            TOOL_OUTPUT_ARTIFACT_REDACTION_POLICY_V1
7914        );
7915        assert_eq!(metadata["redactionSummary"]["status"], "clean");
7916        assert_eq!(metadata["redactionSummary"]["rawSecretBytesEmitted"], 0);
7917        Ok(())
7918    }
7919
7920    #[test]
7921    fn tool_output_artifact_redacts_sensitive_text_before_persisting()
7922    -> std::result::Result<(), Box<dyn std::error::Error>> {
7923        let tmp = tempfile::tempdir().expect("artifact root");
7924        let leaked_token = "sk-redactionfixture1234567890";
7925        let leaked_bearer = "ghp_redactionfixture1234567890";
7926        let full = format!(
7927            "API_TOKEN={leaked_token}\nAuthorization: Bearer {leaked_bearer}\n{}",
7928            "x".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES + 1)
7929        );
7930        let mut output = "bounded preview".to_string();
7931        let mut details = None;
7932
7933        let spilled = attach_text_artifact_if_needed_at_root(
7934            tmp.path(),
7935            &mut output,
7936            &mut details,
7937            "read",
7938            "call-secret",
7939            "selectedTextWindow",
7940            &full,
7941        );
7942
7943        assert!(spilled);
7944        let artifact = artifact_json(details.as_ref());
7945        let path = PathBuf::from(artifact_str_field(artifact, "path"));
7946        let metadata_path = PathBuf::from(artifact_str_field(artifact, "metadataPath"));
7947        let persisted = std::fs::read_to_string(path)?;
7948        let metadata: serde_json::Value = serde_json::from_slice(&std::fs::read(metadata_path)?)?;
7949
7950        assert!(!persisted.contains(leaked_token));
7951        assert!(!persisted.contains(leaked_bearer));
7952        assert!(persisted.contains("API_TOKEN=[REDACTED]"));
7953        assert_eq!(artifact["redactionSummary"]["status"], "redacted");
7954        assert_eq!(artifact["redactionSummary"]["rawSecretBytesEmitted"], 0);
7955        assert_eq!(metadata["redactionSummary"], artifact["redactionSummary"]);
7956        let fields = artifact["redactionSummary"]["fields"]
7957            .as_array()
7958            .expect("redaction fields");
7959        assert!(fields.iter().any(|field| field == "api_token"));
7960        assert!(fields.iter().any(|field| field == "authorization"));
7961        Ok(())
7962    }
7963
7964    #[test]
7965    fn tool_output_artifact_marks_binaryish_payloads_in_lifecycle_manifest() {
7966        let tmp = tempfile::tempdir().expect("artifact root");
7967        let full = format!(
7968            "{}\0{}",
7969            "z".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES / 2),
7970            "z".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES / 2 + 2)
7971        );
7972        let mut output = "bounded preview".to_string();
7973        let mut details = None;
7974
7975        let spilled = attach_text_artifact_if_needed_at_root(
7976            tmp.path(),
7977            &mut output,
7978            &mut details,
7979            "read",
7980            "call-binaryish",
7981            "selectedTextWindow",
7982            &full,
7983        );
7984
7985        assert!(spilled);
7986        let artifact = artifact_json(details.as_ref());
7987        assert_eq!(artifact["redactionSummary"]["binarySuspect"], true);
7988        assert_eq!(artifact["redactionSummary"]["rawSecretBytesEmitted"], 0);
7989        assert_eq!(artifact["safeDeleteCandidate"], true);
7990    }
7991
7992    #[test]
7993    fn tool_output_artifact_failure_records_degraded_preview() {
7994        let tmp = tempfile::tempdir().expect("artifact root parent");
7995        let root_file = tmp.path().join("not-a-directory");
7996        std::fs::write(&root_file, "not a directory").expect("root file");
7997        let full = "b".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES + 1);
7998        let mut output = "bounded preview".to_string();
7999        let mut details = None;
8000
8001        let spilled = attach_text_artifact_if_needed_at_root(
8002            &root_file,
8003            &mut output,
8004            &mut details,
8005            "read",
8006            "call-fail",
8007            "selectedTextWindow",
8008            &full,
8009        );
8010
8011        assert!(!spilled);
8012        assert!(output.contains("Tool output artifact persistence failed"));
8013        assert!(
8014            details
8015                .as_ref()
8016                .and_then(|value| value.get("artifactError"))
8017                .is_some()
8018        );
8019    }
8020
8021    #[test]
8022    fn read_tool_spills_oversized_selected_text_window_to_artifact() {
8023        asupersync::test_utils::run_test(|| async {
8024            let tmp = tempfile::tempdir().expect("workspace");
8025            let artifact_root = tempfile::tempdir().expect("artifact root");
8026
8027            let body = "r".repeat(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES + 8);
8028            std::fs::write(tmp.path().join("large.txt"), &body).expect("large file");
8029            let read_tool = ReadTool::with_artifact_root(tmp.path(), artifact_root.path());
8030            let output = read_tool
8031                .execute(
8032                    "read-artifact-call",
8033                    serde_json::json!({ "path": "large.txt" }),
8034                    None,
8035                )
8036                .await
8037                .expect("read large file");
8038
8039            assert!(first_text(&output).contains("Full tool output artifact:"));
8040            let artifact = artifact_json(output.details.as_ref());
8041            assert_eq!(artifact["toolName"], "read");
8042            assert_eq!(artifact["sourceKind"], "selectedTextWindow");
8043            let path_value = artifact_str_field(artifact, "path");
8044            assert!(!path_value.is_empty(), "artifact path must be a string");
8045            let path = PathBuf::from(path_value);
8046            let spilled = match std::fs::read_to_string(&path) {
8047                Ok(spilled) => spilled,
8048                Err(err) => {
8049                    assert!(false, "read spilled artifact {}: {err}", path.display());
8050                    return;
8051                }
8052            };
8053            let prefix = "    1→";
8054            assert_eq!(spilled.len(), prefix.len() + DEFAULT_MAX_BYTES);
8055            assert_eq!(
8056                artifact["byteCount"].as_u64().unwrap(),
8057                u64::try_from(spilled.len()).unwrap()
8058            );
8059            assert!(spilled.starts_with(prefix));
8060            assert!(spilled[prefix.len()..].bytes().all(|byte| byte == b'r'));
8061            assert_eq!(
8062                artifact["retentionClass"],
8063                TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS
8064            );
8065            assert_eq!(
8066                artifact["spilloverReason"],
8067                TOOL_OUTPUT_ARTIFACT_SPILLOVER_REASON
8068            );
8069            assert_eq!(artifact["safeDeleteCandidate"], true);
8070        });
8071    }
8072
8073    #[test]
8074    fn bash_tool_spills_truncated_full_output_to_artifact() {
8075        asupersync::test_utils::run_test(|| async {
8076            if !Path::new("/dev/zero").exists() {
8077                return;
8078            }
8079
8080            let tmp = tempfile::tempdir().expect("workspace");
8081            let artifact_root = tempfile::tempdir().expect("artifact root");
8082
8083            let bash_tool = BashTool::with_artifact_root(tmp.path(), artifact_root.path());
8084            let output = bash_tool
8085                .execute(
8086                    "bash-artifact-call",
8087                    serde_json::json!({
8088                        "command": "head -c 1001000 /dev/zero | tr '\\0' x",
8089                        "timeout": 10
8090                    }),
8091                    None,
8092                )
8093                .await
8094                .expect("bash large output");
8095
8096            assert!(first_text(&output).contains("Full tool output artifact:"));
8097            let artifact = artifact_json(output.details.as_ref());
8098            assert_eq!(artifact["toolName"], "bash");
8099            assert_eq!(artifact["sourceKind"], "fullCommandOutput");
8100            let path = PathBuf::from(artifact_str_field(artifact, "path"));
8101            assert_eq!(std::fs::metadata(path).unwrap().len(), 1_001_000);
8102            assert_eq!(artifact["redactionSummary"]["status"], "clean");
8103            assert_eq!(artifact["safeDeleteCandidate"], true);
8104        });
8105    }
8106
8107    #[test]
8108    fn bash_tool_redacts_secret_like_full_output_artifacts() {
8109        asupersync::test_utils::run_test(|| async {
8110            if !Path::new("/dev/zero").exists() {
8111                return;
8112            }
8113
8114            let tmp = tempfile::tempdir().expect("workspace");
8115            let artifact_root = tempfile::tempdir().expect("artifact root");
8116            let leaked_token = "sk-bashredactionfixture1234567890";
8117
8118            let bash_tool = BashTool::with_artifact_root(tmp.path(), artifact_root.path());
8119            let output = bash_tool
8120                .execute(
8121                    "bash-secret-artifact-call",
8122                    serde_json::json!({
8123                        "command": format!("printf 'API_TOKEN={leaked_token}\\n'; head -c 1001000 /dev/zero | tr '\\0' x"),
8124                        "timeout": 10
8125                    }),
8126                    None,
8127                )
8128                .await
8129                .expect("bash large output");
8130
8131            assert!(first_text(&output).contains("Full tool output artifact:"));
8132            let artifact = artifact_json(output.details.as_ref());
8133            assert_eq!(artifact["toolName"], "bash");
8134            assert_eq!(artifact["redactionSummary"]["status"], "redacted");
8135            assert_eq!(artifact["redactionSummary"]["rawSecretBytesEmitted"], 0);
8136            let path = PathBuf::from(artifact_str_field(artifact, "path"));
8137            let persisted = std::fs::read_to_string(path).expect("read redacted bash artifact");
8138            assert!(!persisted.contains(leaked_token));
8139            assert!(persisted.contains("API_TOKEN=[REDACTED]"));
8140        });
8141    }
8142
8143    #[test]
8144    fn grep_tool_spills_large_search_results_with_lifecycle_manifest() {
8145        asupersync::test_utils::run_test(|| async {
8146            if !rg_available() {
8147                return;
8148            }
8149
8150            let tmp = tempfile::tempdir().expect("workspace");
8151            let artifact_root = tempfile::tempdir().expect("artifact root");
8152            let mut body = String::new();
8153            let suffix = "g".repeat(560);
8154            for idx in 0..2200 {
8155                let _ = writeln!(body, "target {idx:04} {suffix}");
8156            }
8157            std::fs::write(tmp.path().join("large-grep.txt"), body).expect("write grep fixture");
8158
8159            let grep_tool = GrepTool::with_artifact_root(tmp.path(), artifact_root.path());
8160            let output = grep_tool
8161                .execute(
8162                    "grep-artifact-call",
8163                    serde_json::json!({
8164                        "pattern": "target",
8165                        "path": "large-grep.txt",
8166                        "literal": true,
8167                        "limit": 2200
8168                    }),
8169                    None,
8170                )
8171                .await
8172                .expect("grep large output");
8173
8174            assert!(first_text(&output).contains("Full tool output artifact:"));
8175            let artifact = artifact_json(output.details.as_ref());
8176            assert_eq!(artifact["toolName"], "grep");
8177            assert_eq!(artifact["sourceKind"], "searchResults");
8178            assert_eq!(
8179                artifact["retentionClass"],
8180                TOOL_OUTPUT_ARTIFACT_RETENTION_CLASS
8181            );
8182            assert_eq!(artifact["safeDeleteCandidate"], true);
8183            assert_eq!(artifact["redactionSummary"]["status"], "clean");
8184            let path = PathBuf::from(artifact_str_field(artifact, "path"));
8185            let persisted = std::fs::read_to_string(path).expect("read grep artifact");
8186            assert!(persisted.contains("large-grep.txt:1: target 0000"));
8187            assert!(
8188                artifact["byteCount"].as_u64().unwrap()
8189                    > u64::try_from(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES).unwrap()
8190            );
8191        });
8192    }
8193
8194    #[test]
8195    fn read_tool_denied_path_does_not_emit_lifecycle_artifact() {
8196        asupersync::test_utils::run_test(|| async {
8197            let cwd = tempfile::tempdir().expect("workspace");
8198            let outside = tempfile::tempdir().expect("outside");
8199            let artifact_root = tempfile::tempdir().expect("artifact root");
8200            let outside_path = outside.path().join("secret.txt");
8201            std::fs::write(&outside_path, "API_TOKEN=sk-deniedpathfixture1234567890")
8202                .expect("outside secret");
8203
8204            let read_tool = ReadTool::with_artifact_root(cwd.path(), artifact_root.path());
8205            let err = read_tool
8206                .execute(
8207                    "read-denied-artifact-call",
8208                    serde_json::json!({ "path": outside_path }),
8209                    None,
8210                )
8211                .await
8212                .expect_err("outside read should be denied");
8213
8214            assert!(
8215                err.to_string()
8216                    .contains("Cannot read outside the working directory or agent dir")
8217            );
8218            let mut entries = std::fs::read_dir(artifact_root.path()).expect("artifact root");
8219            assert!(
8220                entries.next().is_none(),
8221                "denied reads must not write artifacts"
8222            );
8223        });
8224    }
8225
8226    #[test]
8227    fn ls_tool_spills_oversized_directory_listing_to_artifact() {
8228        asupersync::test_utils::run_test(|| async {
8229            let tmp = tempfile::tempdir().expect("workspace");
8230            let artifact_root = tempfile::tempdir().expect("artifact root");
8231            let suffix = "x".repeat(224);
8232            for i in 0..4_500 {
8233                let name = format!("entry-{i:04}-{suffix}.txt");
8234                std::fs::write(tmp.path().join(name), "").expect("write listing fixture");
8235            }
8236
8237            let ls_tool = LsTool::with_artifact_root(tmp.path(), artifact_root.path());
8238            let output = ls_tool
8239                .execute(
8240                    "ls-artifact-call",
8241                    serde_json::json!({ "path": ".", "limit": 4500 }),
8242                    None,
8243                )
8244                .await
8245                .expect("ls large directory");
8246
8247            assert!(first_text(&output).contains("Full tool output artifact:"));
8248            let artifact = artifact_json(output.details.as_ref());
8249            assert_eq!(artifact["toolName"], "ls");
8250            assert_eq!(artifact["sourceKind"], "directoryEntries");
8251            assert!(
8252                artifact["byteCount"].as_u64().unwrap()
8253                    > u64::try_from(TOOL_OUTPUT_ARTIFACT_THRESHOLD_BYTES).unwrap()
8254            );
8255            let path = PathBuf::from(artifact_str_field(artifact, "path"));
8256            assert!(
8257                std::fs::read_to_string(path)
8258                    .unwrap()
8259                    .contains("entry-0000-")
8260            );
8261        });
8262    }
8263
8264    async fn assert_read_cache_hit_and_stale(tmp: &Path) {
8265        let note = tmp.join("note.txt");
8266        std::fs::write(&note, "alpha\n").expect("write note");
8267
8268        let read_tool = ReadTool::new(tmp);
8269        let read_input = serde_json::json!({ "path": "note.txt" });
8270        let first = read_tool
8271            .execute("read-1", read_input.clone(), None)
8272            .await
8273            .expect("first read");
8274        assert!(first_text(&first).contains("alpha"));
8275
8276        let hits_before = tool_output_cache_stats_for_tests().hits;
8277        let second = read_tool
8278            .execute("read-2", read_input.clone(), None)
8279            .await
8280            .expect("cached read");
8281        assert_eq!(first_text(&first), first_text(&second));
8282        assert!(tool_output_cache_stats_for_tests().hits > hits_before);
8283
8284        let invalidations_before = tool_output_cache_stats_for_tests().invalidations;
8285        std::fs::write(&note, "beta\n").expect("rewrite note");
8286        let third = read_tool
8287            .execute("read-3", read_input.clone(), None)
8288            .await
8289            .expect("invalidated read");
8290        assert!(first_text(&third).contains("beta"));
8291        assert!(!first_text(&third).contains("alpha"));
8292        assert!(tool_output_cache_stats_for_tests().invalidations > invalidations_before);
8293    }
8294
8295    async fn assert_ls_cache_hit_and_stale(tmp: &Path) {
8296        let ls_tool = LsTool::new(tmp);
8297        let ls_input = serde_json::json!({ "path": "." });
8298        let ls_first = ls_tool
8299            .execute("ls-1", ls_input.clone(), None)
8300            .await
8301            .expect("first ls");
8302        assert!(first_text(&ls_first).contains("note.txt"));
8303
8304        let hits_before = tool_output_cache_stats_for_tests().hits;
8305        let ls_second = ls_tool
8306            .execute("ls-2", ls_input.clone(), None)
8307            .await
8308            .expect("cached ls");
8309        assert_eq!(first_text(&ls_first), first_text(&ls_second));
8310        assert!(tool_output_cache_stats_for_tests().hits > hits_before);
8311
8312        let invalidations_before = tool_output_cache_stats_for_tests().invalidations;
8313        std::fs::write(tmp.join("new.txt"), "new\n").expect("write new file");
8314        let ls_third = ls_tool
8315            .execute("ls-3", ls_input.clone(), None)
8316            .await
8317            .expect("invalidated ls");
8318        assert!(first_text(&ls_third).contains("new.txt"));
8319        assert!(tool_output_cache_stats_for_tests().invalidations > invalidations_before);
8320    }
8321
8322    async fn assert_grep_cache_hit_and_stale_when_available(tmp: &Path) {
8323        if find_rg_binary().is_none() {
8324            return;
8325        }
8326
8327        let grep_tool = GrepTool::new(tmp);
8328        let grep_input = serde_json::json!({ "pattern": "needle", "path": "." });
8329        std::fs::write(tmp.join("a.txt"), "needle\n").expect("write grep file");
8330
8331        let grep_first = grep_tool
8332            .execute("grep-1", grep_input.clone(), None)
8333            .await
8334            .expect("first grep");
8335        assert!(first_text(&grep_first).contains("a.txt"));
8336
8337        let hits_before = tool_output_cache_stats_for_tests().hits;
8338        let grep_second = grep_tool
8339            .execute("grep-2", grep_input.clone(), None)
8340            .await
8341            .expect("cached grep");
8342        assert_eq!(first_text(&grep_first), first_text(&grep_second));
8343        assert!(tool_output_cache_stats_for_tests().hits > hits_before);
8344
8345        let invalidations_before = tool_output_cache_stats_for_tests().invalidations;
8346        std::fs::write(tmp.join("b.txt"), "needle\n").expect("write new match");
8347        let grep_third = grep_tool
8348            .execute("grep-3", grep_input.clone(), None)
8349            .await
8350            .expect("invalidated grep");
8351        assert!(first_text(&grep_third).contains("b.txt"));
8352        assert!(tool_output_cache_stats_for_tests().invalidations > invalidations_before);
8353    }
8354
8355    async fn assert_find_cache_hit_and_stale_when_available(tmp: &Path) {
8356        if find_fd_binary().is_none() {
8357            return;
8358        }
8359
8360        let find_tool = FindTool::new(tmp);
8361        let find_input = serde_json::json!({ "pattern": "*find*.txt", "path": "." });
8362        std::fs::write(tmp.join("find-a.txt"), "find\n").expect("write first find file");
8363
8364        let find_first = find_tool
8365            .execute("find-1", find_input.clone(), None)
8366            .await
8367            .expect("first find");
8368        assert!(first_text(&find_first).contains("find-a.txt"));
8369
8370        let hits_before = tool_output_cache_stats_for_tests().hits;
8371        let find_second = find_tool
8372            .execute("find-2", find_input.clone(), None)
8373            .await
8374            .expect("cached find");
8375        assert_eq!(first_text(&find_first), first_text(&find_second));
8376        assert!(tool_output_cache_stats_for_tests().hits > hits_before);
8377
8378        let invalidations_before = tool_output_cache_stats_for_tests().invalidations;
8379        std::fs::write(tmp.join("find-b.txt"), "find\n").expect("write second find file");
8380        let find_third = find_tool
8381            .execute("find-3", find_input.clone(), None)
8382            .await
8383            .expect("invalidated find");
8384        assert!(first_text(&find_third).contains("find-b.txt"));
8385        assert!(tool_output_cache_stats_for_tests().invalidations > invalidations_before);
8386    }
8387
8388    async fn assert_side_effect_tools_remain_uncached(tmp: &Path) {
8389        let side_effect_stats_before = tool_output_cache_stats_for_tests();
8390        let write_tool = WriteTool::new(tmp);
8391        write_tool
8392            .execute(
8393                "write-1",
8394                serde_json::json!({
8395                    "path": "side-effect.txt",
8396                    "content": "one\n"
8397                }),
8398                None,
8399            )
8400            .await
8401            .expect("write side-effect file");
8402
8403        let edit_tool = EditTool::new(tmp);
8404        edit_tool
8405            .execute(
8406                "edit-1",
8407                serde_json::json!({
8408                    "path": "side-effect.txt",
8409                    "oldText": "one",
8410                    "newText": "two"
8411                }),
8412                None,
8413            )
8414            .await
8415            .expect("edit side-effect file");
8416
8417        let bash_tool = BashTool::new(tmp);
8418        bash_tool
8419            .execute(
8420                "bash-1",
8421                serde_json::json!({
8422                    "command": "printf 'cache-uncached\\n'",
8423                    "timeout": 5
8424                }),
8425                None,
8426            )
8427            .await
8428            .expect("run uncached bash");
8429
8430        let side_effect_stats_after = tool_output_cache_stats_for_tests();
8431        assert_eq!(
8432            (
8433                side_effect_stats_after.side_effect_accesses,
8434                side_effect_stats_after.side_effect_insert_attempts
8435            ),
8436            (
8437                side_effect_stats_before.side_effect_accesses,
8438                side_effect_stats_before.side_effect_insert_attempts
8439            ),
8440            "write, edit, and bash must not consult or populate the read-only output cache"
8441        );
8442    }
8443
8444    #[test]
8445    fn tool_output_cache_reuses_and_invalidates_read_only_tool_outputs() {
8446        asupersync::test_utils::run_test(|| async {
8447            reset_tool_output_cache_for_tests();
8448
8449            let tmp = tempfile::tempdir().expect("create temp dir");
8450            assert_read_cache_hit_and_stale(tmp.path()).await;
8451            assert_ls_cache_hit_and_stale(tmp.path()).await;
8452            assert_grep_cache_hit_and_stale_when_available(tmp.path()).await;
8453            assert_find_cache_hit_and_stale_when_available(tmp.path()).await;
8454            assert_side_effect_tools_remain_uncached(tmp.path()).await;
8455        });
8456    }
8457
8458    #[test]
8459    fn tool_output_context_cache_evidence_jsonl_covers_required_decisions()
8460    -> std::result::Result<(), String> {
8461        let evidence = include_str!("../docs/evidence/tool-output-context-cache.jsonl");
8462        let mut saw_read_hit = false;
8463        let mut saw_grep_stale = false;
8464        let mut saw_find_stale = false;
8465        let mut saw_ls_stale = false;
8466        let mut saw_write_uncached = false;
8467        let mut saw_edit_uncached = false;
8468        let mut saw_bash_uncached = false;
8469
8470        for (line_number, line) in evidence.lines().enumerate() {
8471            if line.trim().is_empty() {
8472                continue;
8473            }
8474
8475            let event: serde_json::Value = serde_json::from_str(line).map_err(|err| {
8476                format!(
8477                    "invalid context-cache JSONL at line {}: {err}",
8478                    line_number + 1
8479                )
8480            })?;
8481            assert_eq!(
8482                event.get("schema").and_then(serde_json::Value::as_str),
8483                Some("pi.tool_output_context_cache.evidence.v1")
8484            );
8485            assert_eq!(
8486                event.get("bead").and_then(serde_json::Value::as_str),
8487                Some("bd-dklqn.1")
8488            );
8489            let related_beads = event
8490                .get("related_beads")
8491                .and_then(serde_json::Value::as_array)
8492                .ok_or_else(|| format!("missing related_beads at line {}", line_number + 1))?;
8493            assert!(
8494                related_beads
8495                    .iter()
8496                    .any(|bead| bead.as_str() == Some("bd-dklqn.2")),
8497                "evidence line {} must cover bd-dklqn.2",
8498                line_number + 1
8499            );
8500
8501            let tool = event
8502                .get("tool")
8503                .and_then(serde_json::Value::as_str)
8504                .expect("tool");
8505            let outcome = event
8506                .get("outcome")
8507                .and_then(serde_json::Value::as_str)
8508                .expect("outcome");
8509            let reason = event
8510                .get("reason")
8511                .and_then(serde_json::Value::as_str)
8512                .expect("reason");
8513
8514            match (tool, outcome, reason) {
8515                ("read", "hit", "unchanged_file_fingerprint") => saw_read_hit = true,
8516                ("grep", "stale", "recursive_directory_fingerprint_changed") => {
8517                    saw_grep_stale = true;
8518                }
8519                ("find", "stale", "recursive_directory_fingerprint_changed") => {
8520                    saw_find_stale = true;
8521                }
8522                ("ls", "stale", "directory_entry_fingerprint_changed") => saw_ls_stale = true,
8523                ("write", "uncached", "write_effect_tool") => saw_write_uncached = true,
8524                ("edit", "uncached", "write_effect_tool") => saw_edit_uncached = true,
8525                ("bash", "uncached", "process_effect_tool") => saw_bash_uncached = true,
8526                _ => {}
8527            }
8528        }
8529
8530        assert!(saw_read_hit, "evidence must include a read cache hit");
8531        assert!(saw_grep_stale, "evidence must include grep stale bypass");
8532        assert!(saw_find_stale, "evidence must include find stale bypass");
8533        assert!(saw_ls_stale, "evidence must include ls stale bypass");
8534        assert!(saw_write_uncached, "evidence must include write uncached");
8535        assert!(saw_edit_uncached, "evidence must include edit uncached");
8536        assert!(saw_bash_uncached, "evidence must include bash uncached");
8537        Ok(())
8538    }
8539
8540    #[test]
8541    fn test_truncate_tail_zero_lines_returns_empty_output() {
8542        let result = truncate_tail("line1\nline2".to_string(), 0, 1000);
8543
8544        assert!(result.truncated);
8545        assert_eq!(result.truncated_by, Some(TruncatedBy::Lines));
8546        assert_eq!(result.output_lines, 0);
8547        assert_eq!(result.output_bytes, 0);
8548        assert!(result.content.is_empty());
8549    }
8550
8551    #[test]
8552    fn test_line_count_from_newline_count_matches_trailing_newline_semantics() {
8553        assert_eq!(line_count_from_newline_count(0, 0, false), 0);
8554        assert_eq!(line_count_from_newline_count(2, 1, true), 1);
8555        assert_eq!(line_count_from_newline_count(1, 0, false), 1);
8556        assert_eq!(line_count_from_newline_count(3, 1, false), 2);
8557    }
8558
8559    #[test]
8560    fn test_rg_match_requires_path_and_line_number() {
8561        let mut matches = Vec::new();
8562        let mut match_count = 0usize;
8563        let mut match_limit_reached = false;
8564        let scan_limit = 1;
8565
8566        let missing_line =
8567            Ok(r#"{"type":"match","data":{"path":{"text":"file.txt"}}}"#.to_string());
8568        process_rg_json_match_line(
8569            missing_line,
8570            &mut matches,
8571            &mut match_count,
8572            &mut match_limit_reached,
8573            scan_limit,
8574        );
8575        assert!(matches.is_empty());
8576        assert_eq!(match_count, 0);
8577        assert!(!match_limit_reached);
8578
8579        let valid_line = Ok(
8580            r#"{"type":"match","data":{"path":{"text":"file.txt"},"line_number":3}}"#.to_string(),
8581        );
8582        process_rg_json_match_line(
8583            valid_line,
8584            &mut matches,
8585            &mut match_count,
8586            &mut match_limit_reached,
8587            scan_limit,
8588        );
8589        assert_eq!(matches.len(), 1);
8590        assert_eq!(matches[0].1, 3);
8591        assert_eq!(match_count, 1);
8592        assert!(match_limit_reached);
8593    }
8594
8595    #[test]
8596    fn test_truncate_by_bytes() {
8597        let content = "short\nthis is a longer line\nanother".to_string();
8598        let result = truncate_head(content, 100, 15);
8599
8600        assert!(result.truncated);
8601        assert_eq!(result.truncated_by, Some(TruncatedBy::Bytes));
8602    }
8603
8604    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
8605    #[test]
8606    fn test_command_with_default_sigpipe_restores_pipe_disposition() {
8607        // Verify the spawned child does NOT inherit the parent's
8608        // SIGPIPE=SIG_IGN. The probe parses the SigIgn: hex mask exposed by
8609        // Linux-format /proc/<pid>/status — available natively on Linux and,
8610        // on FreeBSD, through the linprocfs compat module mounted at
8611        // /compat/linux/proc. Skip with a one-line notice when linprocfs is
8612        // not mounted rather than failing the test.
8613        #[cfg(target_os = "freebsd")]
8614        let status_dir = {
8615            let probe = format!("/compat/linux/proc/{}/status", std::process::id());
8616            if !std::path::Path::new(&probe).exists() {
8617                eprintln!(
8618                    "skipping sigpipe disposition test: linprocfs not mounted \
8619                     at /compat/linux/proc — add `linprocfs /compat/linux/proc \
8620                     linprocfs rw 0 0` to /etc/fstab and `mount /compat/linux/proc` \
8621                     to enable"
8622                );
8623                return;
8624            }
8625            "/compat/linux/proc"
8626        };
8627        #[cfg(not(target_os = "freebsd"))]
8628        let status_dir = "/proc";
8629
8630        let probe_cmd = format!(
8631            "while read name value _; do [ \"$name\" = SigIgn: ] && \
8632             {{ printf '%s' \"$value\"; exit 0; }}; done < {status_dir}/$$/status"
8633        );
8634
8635        let output = command_with_default_sigpipe("sh")
8636            .expect("prepare sigpipe disposition probe")
8637            .args(["-c", &probe_cmd])
8638            .stdout(std::process::Stdio::piped())
8639            .output()
8640            .expect("spawn sigpipe disposition probe");
8641
8642        assert!(output.status.success(), "probe failed: {output:?}");
8643        let sigign = String::from_utf8(output.stdout).expect("SigIgn should be utf8");
8644        let ignored_mask =
8645            u64::from_str_radix(sigign.trim(), 16).expect("SigIgn should be a hex mask");
8646        let sigpipe_bit = 1_u64 << (13 - 1);
8647        assert_eq!(
8648            ignored_mask & sigpipe_bit,
8649            0,
8650            "child should not inherit ignored SIGPIPE: SigIgn={sigign}"
8651        );
8652    }
8653
8654    #[cfg(unix)]
8655    #[test]
8656    fn test_command_with_default_sigpipe_in_dir_resolves_relative_program_after_cwd() {
8657        use std::os::unix::fs::PermissionsExt as _;
8658
8659        let tmp = tempfile::tempdir().expect("create temp dir");
8660        let script = tmp.path().join("relative-probe");
8661        std::fs::write(&script, "#!/bin/sh\nprintf cwd-relative-ok\n").expect("write script");
8662        let mut permissions = std::fs::metadata(&script)
8663            .expect("stat script")
8664            .permissions();
8665        permissions.set_mode(0o755);
8666        std::fs::set_permissions(&script, permissions).expect("make script executable");
8667
8668        let output = command_with_default_sigpipe_in_dir("./relative-probe", tmp.path())
8669            .expect("prepare relative executable")
8670            .current_dir(tmp.path())
8671            .stdout(std::process::Stdio::piped())
8672            .output()
8673            .expect("spawn relative executable");
8674
8675        assert!(output.status.success(), "probe failed: {output:?}");
8676        assert_eq!(
8677            String::from_utf8(output.stdout).expect("probe stdout should be utf8"),
8678            "cwd-relative-ok"
8679        );
8680    }
8681
8682    #[cfg(target_os = "linux")]
8683    #[test]
8684    fn test_read_to_end_capped_and_drain_preserves_writer_exit_status() {
8685        let mut child = std::process::Command::new("dd")
8686            .args(["if=/dev/zero", "bs=1", "count=70000", "status=none"])
8687            .stdout(std::process::Stdio::piped())
8688            .spawn()
8689            .expect("spawn dd");
8690
8691        let stdout = child.stdout.take().expect("dd stdout");
8692        let captured = read_to_end_capped_and_drain(stdout, 1024).expect("capture bounded stdout");
8693        let status = child.wait().expect("wait for dd");
8694
8695        assert!(
8696            status.success(),
8697            "bounded reader should drain to EOF instead of SIGPIPEing the writer: {status:?}"
8698        );
8699        assert_eq!(captured.len(), 1025);
8700    }
8701
8702    #[cfg(unix)]
8703    #[test]
8704    fn test_get_file_lines_async_unreadable_file_returns_empty() {
8705        asupersync::test_utils::run_test(|| async {
8706            use std::os::unix::fs::PermissionsExt;
8707
8708            let tmp = tempfile::tempdir().unwrap();
8709            let path = tmp.path().join("secret.txt");
8710            std::fs::write(&path, "secret\n").unwrap();
8711
8712            let mut perms = std::fs::metadata(&path).unwrap().permissions();
8713            perms.set_mode(0o000);
8714            std::fs::set_permissions(&path, perms).unwrap();
8715
8716            let mut cache = HashMap::new();
8717            let lines = get_file_lines_async(&path, &mut cache).await;
8718            assert!(lines.is_empty());
8719        });
8720    }
8721
8722    #[test]
8723    fn test_resolve_path_absolute() {
8724        let cwd = PathBuf::from("/home/user/project");
8725        let result = resolve_path("/absolute/path", &cwd);
8726        assert_eq!(result, PathBuf::from("/absolute/path"));
8727    }
8728
8729    #[test]
8730    fn test_resolve_path_relative() {
8731        let cwd = PathBuf::from("/home/user/project");
8732        let result = resolve_path("src/main.rs", &cwd);
8733        assert_eq!(result, PathBuf::from("/home/user/project/src/main.rs"));
8734    }
8735
8736    #[test]
8737    fn test_normalize_dot_segments_preserves_root() {
8738        let result = normalize_dot_segments(std::path::Path::new("/../etc/passwd"));
8739        assert_eq!(result, PathBuf::from("/etc/passwd"));
8740    }
8741
8742    #[test]
8743    fn test_normalize_dot_segments_preserves_leading_parent_for_relative() {
8744        let result = normalize_dot_segments(std::path::Path::new("../a/../b"));
8745        assert_eq!(result, PathBuf::from("../b"));
8746    }
8747
8748    #[test]
8749    fn test_detect_supported_image_mime_type_from_bytes() {
8750        assert_eq!(
8751            detect_supported_image_mime_type_from_bytes(b"\x89PNG\r\n\x1A\n"),
8752            Some("image/png")
8753        );
8754        assert_eq!(
8755            detect_supported_image_mime_type_from_bytes(b"\xFF\xD8\xFF"),
8756            Some("image/jpeg")
8757        );
8758        assert_eq!(
8759            detect_supported_image_mime_type_from_bytes(b"GIF89a"),
8760            Some("image/gif")
8761        );
8762        assert_eq!(
8763            detect_supported_image_mime_type_from_bytes(b"RIFF1234WEBP"),
8764            Some("image/webp")
8765        );
8766        assert_eq!(
8767            detect_supported_image_mime_type_from_bytes(b"not an image"),
8768            None
8769        );
8770    }
8771
8772    #[test]
8773    fn test_format_size() {
8774        assert_eq!(format_size(500), "500B");
8775        assert_eq!(format_size(1024), "1.0KB");
8776        assert_eq!(format_size(1536), "1.5KB");
8777        assert_eq!(format_size(1_048_576), "1.0MB");
8778        assert_eq!(format_size(1_073_741_824), "1024.0MB");
8779    }
8780
8781    #[test]
8782    fn test_js_string_length() {
8783        assert_eq!(js_string_length("hello"), 5);
8784        assert_eq!(js_string_length("😀"), 2);
8785    }
8786
8787    #[test]
8788    fn test_truncate_line() {
8789        let short = "short line";
8790        let result = truncate_line(short, 100);
8791        assert_eq!(result.text, "short line");
8792        assert!(!result.was_truncated);
8793
8794        let long = "a".repeat(600);
8795        let result = truncate_line(&long, 500);
8796        assert!(result.was_truncated);
8797        assert!(result.text.ends_with("... [truncated]"));
8798    }
8799
8800    // ========================================================================
8801    // Helper: extract text from ToolOutput content blocks
8802    // ========================================================================
8803
8804    fn get_text(content: &[ContentBlock]) -> String {
8805        content
8806            .iter()
8807            .filter_map(|block| {
8808                if let ContentBlock::Text(text) = block {
8809                    Some(text.text.clone())
8810                } else {
8811                    None
8812                }
8813            })
8814            .collect::<String>()
8815    }
8816
8817    // ========================================================================
8818    // Read Tool Tests
8819    // ========================================================================
8820
8821    #[test]
8822    fn test_read_valid_file() {
8823        asupersync::test_utils::run_test(|| async {
8824            let tmp = tempfile::tempdir().unwrap();
8825            std::fs::write(tmp.path().join("hello.txt"), "alpha\nbeta\ngamma").unwrap();
8826
8827            let tool = ReadTool::new(tmp.path());
8828            let out = tool
8829                .execute(
8830                    "t",
8831                    serde_json::json!({ "path": tmp.path().join("hello.txt").to_string_lossy() }),
8832                    None,
8833                )
8834                .await
8835                .unwrap();
8836            let text = get_text(&out.content);
8837            assert!(text.contains("alpha"));
8838            assert!(text.contains("beta"));
8839            assert!(text.contains("gamma"));
8840            assert!(!out.is_error);
8841        });
8842    }
8843
8844    #[test]
8845    fn test_read_nonexistent_file() {
8846        asupersync::test_utils::run_test(|| async {
8847            let tmp = tempfile::tempdir().unwrap();
8848            let tool = ReadTool::new(tmp.path());
8849            let err = tool
8850                .execute(
8851                    "t",
8852                    serde_json::json!({ "path": tmp.path().join("nope.txt").to_string_lossy() }),
8853                    None,
8854                )
8855                .await;
8856            assert!(err.is_err());
8857        });
8858    }
8859
8860    #[test]
8861    fn test_read_rejects_outside_cwd() {
8862        asupersync::test_utils::run_test(|| async {
8863            let cwd = tempfile::tempdir().unwrap();
8864            let outside = tempfile::tempdir().unwrap();
8865            std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
8866
8867            let tool = ReadTool::new(cwd.path());
8868            let err = tool
8869                .execute(
8870                    "t",
8871                    serde_json::json!({ "path": outside.path().join("secret.txt").to_string_lossy() }),
8872                    None,
8873                )
8874                .await
8875                .unwrap_err();
8876            assert!(err.to_string().contains("outside the working directory"));
8877        });
8878    }
8879
8880    /// Issue #71: skill files, prompt templates, and themes live under the
8881    /// agent dir (`~/.pi/agent/`, default). The agent legitimately needs to
8882    /// read these even when cwd is a user project on a different path.
8883    /// Ensure `enforce_read_scope_with_roots` accepts the agent dir as a
8884    /// second valid root without breaking the cwd-only contract for paths
8885    /// that are under neither.
8886    #[test]
8887    fn test_enforce_read_scope_allows_agent_dir_outside_cwd() {
8888        let cwd = tempfile::tempdir().unwrap();
8889        let agent_dir = tempfile::tempdir().unwrap();
8890        let skill_dir = agent_dir.path().join("skills").join("freebsd-jails");
8891        std::fs::create_dir_all(&skill_dir).unwrap();
8892        let skill_path = skill_dir.join("SKILL.md");
8893        std::fs::write(&skill_path, "---\nname: test\n---\n# body\n").unwrap();
8894
8895        let resolved =
8896            enforce_read_scope_with_roots(&skill_path, cwd.path(), agent_dir.path()).unwrap();
8897        assert!(
8898            resolved.starts_with(
8899                agent_dir
8900                    .path()
8901                    .canonicalize()
8902                    .unwrap_or_else(|_| agent_dir.path().to_path_buf())
8903            ),
8904            "agent-dir path must be allowed and returned canonicalised"
8905        );
8906    }
8907
8908    #[test]
8909    fn test_enforce_read_scope_still_rejects_unrelated_paths() {
8910        // Paths under neither cwd nor agent_dir must keep failing closed.
8911        let cwd = tempfile::tempdir().unwrap();
8912        let agent_dir = tempfile::tempdir().unwrap();
8913        let unrelated = tempfile::tempdir().unwrap();
8914        std::fs::write(unrelated.path().join("secret.txt"), "secret").unwrap();
8915        let secret_path = unrelated.path().join("secret.txt");
8916
8917        let err =
8918            enforce_read_scope_with_roots(&secret_path, cwd.path(), agent_dir.path()).unwrap_err();
8919        let msg = err.to_string();
8920        assert!(
8921            msg.contains("outside the working directory") && msg.contains("agent dir"),
8922            "error must mention both denied roots, got: {msg}"
8923        );
8924    }
8925
8926    #[test]
8927    fn test_enforce_read_scope_prefers_cwd_when_path_is_under_cwd() {
8928        // When a path is under cwd, we must not silently switch to agent-dir
8929        // resolution. This locks in the order of the prefix checks.
8930        let cwd = tempfile::tempdir().unwrap();
8931        let agent_dir = tempfile::tempdir().unwrap();
8932        std::fs::write(cwd.path().join("a.txt"), "in cwd").unwrap();
8933
8934        let resolved =
8935            enforce_read_scope_with_roots(&cwd.path().join("a.txt"), cwd.path(), agent_dir.path())
8936                .unwrap();
8937        assert!(
8938            resolved.starts_with(
8939                cwd.path()
8940                    .canonicalize()
8941                    .unwrap_or_else(|_| cwd.path().to_path_buf())
8942            )
8943        );
8944    }
8945
8946    #[test]
8947    fn test_read_empty_file() {
8948        asupersync::test_utils::run_test(|| async {
8949            let tmp = tempfile::tempdir().unwrap();
8950            std::fs::write(tmp.path().join("empty.txt"), "").unwrap();
8951
8952            let tool = ReadTool::new(tmp.path());
8953            let out = tool
8954                .execute(
8955                    "t",
8956                    serde_json::json!({ "path": tmp.path().join("empty.txt").to_string_lossy() }),
8957                    None,
8958                )
8959                .await
8960                .unwrap();
8961            let text = get_text(&out.content);
8962            assert_eq!(text, "");
8963            assert!(!out.is_error);
8964        });
8965    }
8966
8967    #[test]
8968    fn test_read_empty_file_positive_offset_errors() {
8969        asupersync::test_utils::run_test(|| async {
8970            let tmp = tempfile::tempdir().unwrap();
8971            std::fs::write(tmp.path().join("empty.txt"), "").unwrap();
8972
8973            let tool = ReadTool::new(tmp.path());
8974            let err = tool
8975                .execute(
8976                    "t",
8977                    serde_json::json!({
8978                        "path": tmp.path().join("empty.txt").to_string_lossy(),
8979                        "offset": 1
8980                    }),
8981                    None,
8982                )
8983                .await;
8984            assert!(err.is_err());
8985            let msg = err.unwrap_err().to_string();
8986            assert!(msg.contains("beyond end of file"));
8987        });
8988    }
8989
8990    #[test]
8991    fn test_read_rejects_zero_limit() {
8992        asupersync::test_utils::run_test(|| async {
8993            let tmp = tempfile::tempdir().unwrap();
8994            std::fs::write(tmp.path().join("lines.txt"), "a\nb\nc\n").unwrap();
8995
8996            let tool = ReadTool::new(tmp.path());
8997            let err = tool
8998                .execute(
8999                    "t",
9000                    serde_json::json!({
9001                        "path": tmp.path().join("lines.txt").to_string_lossy(),
9002                        "limit": 0
9003                    }),
9004                    None,
9005                )
9006                .await;
9007            assert!(err.is_err());
9008            assert!(
9009                err.unwrap_err()
9010                    .to_string()
9011                    .contains("`limit` must be greater than 0")
9012            );
9013        });
9014    }
9015
9016    #[test]
9017    fn test_read_offset_and_limit() {
9018        asupersync::test_utils::run_test(|| async {
9019            let tmp = tempfile::tempdir().unwrap();
9020            std::fs::write(
9021                tmp.path().join("lines.txt"),
9022                "L1\nL2\nL3\nL4\nL5\nL6\nL7\nL8\nL9\nL10",
9023            )
9024            .unwrap();
9025
9026            let tool = ReadTool::new(tmp.path());
9027            let out = tool
9028                .execute(
9029                    "t",
9030                    serde_json::json!({
9031                        "path": tmp.path().join("lines.txt").to_string_lossy(),
9032                        "offset": 3,
9033                        "limit": 2
9034                    }),
9035                    None,
9036                )
9037                .await
9038                .unwrap();
9039            let text = get_text(&out.content);
9040            assert!(text.contains("L3"));
9041            assert!(text.contains("L4"));
9042            assert!(!text.contains("L2"));
9043            assert!(!text.contains("L5"));
9044        });
9045    }
9046
9047    #[test]
9048    fn test_read_offset_and_limit_with_cr_only_line_endings() {
9049        asupersync::test_utils::run_test(|| async {
9050            let tmp = tempfile::tempdir().unwrap();
9051            std::fs::write(tmp.path().join("lines.txt"), b"L1\rL2\rL3\r").unwrap();
9052
9053            let tool = ReadTool::new(tmp.path());
9054            let out = tool
9055                .execute(
9056                    "t",
9057                    serde_json::json!({
9058                        "path": tmp.path().join("lines.txt").to_string_lossy(),
9059                        "offset": 2,
9060                        "limit": 1
9061                    }),
9062                    None,
9063                )
9064                .await
9065                .unwrap();
9066            let text = get_text(&out.content);
9067            assert!(text.contains("L2"));
9068            assert!(!text.contains("L1"));
9069            assert!(!text.contains("L3"));
9070            assert!(text.contains("offset=3"));
9071            assert!(!text.contains('\r'));
9072        });
9073    }
9074
9075    #[test]
9076    fn test_read_offset_and_limit_with_split_crlf_chunk_boundary() {
9077        asupersync::test_utils::run_test(|| async {
9078            let tmp = tempfile::tempdir().unwrap();
9079            let mut content = vec![b'x'; (64 * 1024) - 1];
9080            content.extend_from_slice(b"\r\nSECOND\r\nTHIRD");
9081            std::fs::write(tmp.path().join("lines.txt"), content).unwrap();
9082
9083            let tool = ReadTool::new(tmp.path());
9084            let out = tool
9085                .execute(
9086                    "t",
9087                    serde_json::json!({
9088                        "path": tmp.path().join("lines.txt").to_string_lossy(),
9089                        "offset": 2,
9090                        "limit": 1
9091                    }),
9092                    None,
9093                )
9094                .await
9095                .unwrap();
9096            let text = get_text(&out.content);
9097            assert!(text.contains("SECOND"));
9098            assert!(!text.contains("THIRD"));
9099            assert!(!text.contains("xxxx"));
9100            assert!(text.contains("offset=3"));
9101        });
9102    }
9103
9104    #[test]
9105    fn test_read_offset_beyond_eof() {
9106        asupersync::test_utils::run_test(|| async {
9107            let tmp = tempfile::tempdir().unwrap();
9108            std::fs::write(tmp.path().join("short.txt"), "a\nb").unwrap();
9109
9110            let tool = ReadTool::new(tmp.path());
9111            let err = tool
9112                .execute(
9113                    "t",
9114                    serde_json::json!({
9115                        "path": tmp.path().join("short.txt").to_string_lossy(),
9116                        "offset": 100
9117                    }),
9118                    None,
9119                )
9120                .await;
9121            assert!(err.is_err());
9122            let msg = err.unwrap_err().to_string();
9123            assert!(msg.contains("beyond end of file"));
9124        });
9125    }
9126
9127    #[test]
9128    fn test_map_normalized_with_trailing_whitespace() {
9129        // "A   \nB" -> "A\nB" (normalized strips trailing spaces)
9130        let content = "A   \nB";
9131        let normalized = build_normalized_content(content);
9132        assert_eq!(normalized, "A\nB");
9133
9134        // Find "A" (norm idx 0)
9135        let (start, len) = map_normalized_range_to_original(content, 0, 1);
9136        assert_eq!(start, 0);
9137        assert_eq!(len, 1);
9138        assert_eq!(&content[start..start + len], "A");
9139
9140        // Find "\n" (norm idx 1)
9141        let (start, len) = map_normalized_range_to_original(content, 1, 1);
9142        assert_eq!(start, 4);
9143        assert_eq!(len, 1);
9144        assert_eq!(&content[start..start + len], "\n");
9145
9146        // Find "B" (norm idx 2)
9147        let (start, len) = map_normalized_range_to_original(content, 2, 1);
9148        assert_eq!(start, 5);
9149        assert_eq!(len, 1);
9150        assert_eq!(&content[start..start + len], "B");
9151    }
9152
9153    #[test]
9154    fn test_read_binary_file_lossy() {
9155        asupersync::test_utils::run_test(|| async {
9156            let tmp = tempfile::tempdir().unwrap();
9157            let binary_data: Vec<u8> = (0..=255).collect();
9158            std::fs::write(tmp.path().join("binary.bin"), &binary_data).unwrap();
9159
9160            let tool = ReadTool::new(tmp.path());
9161            let out = tool
9162                .execute(
9163                    "t",
9164                    serde_json::json!({ "path": tmp.path().join("binary.bin").to_string_lossy() }),
9165                    None,
9166                )
9167                .await
9168                .unwrap();
9169            // Binary files are read as lossy UTF-8 with replacement characters
9170            let text = get_text(&out.content);
9171            assert!(!text.is_empty());
9172            assert!(!out.is_error);
9173        });
9174    }
9175
9176    #[test]
9177    fn test_read_image_detection() {
9178        asupersync::test_utils::run_test(|| async {
9179            let tmp = tempfile::tempdir().unwrap();
9180            // Minimal valid PNG header
9181            let png_header: Vec<u8> = vec![
9182                0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
9183                0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // IHDR chunk
9184                0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // 1x1 pixel
9185                0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53,
9186                0xDE, // bit depth, color type, etc
9187                0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, 0x54, // IDAT chunk
9188                0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // compressed data
9189                0x00, 0x02, 0x00, 0x01, 0xE2, 0x21, 0xBC, 0x33, // CRC
9190                0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, // IEND chunk
9191                0xAE, 0x42, 0x60, 0x82,
9192            ];
9193            std::fs::write(tmp.path().join("test.png"), &png_header).unwrap();
9194
9195            let tool = ReadTool::new(tmp.path());
9196            let out = tool
9197                .execute(
9198                    "t",
9199                    serde_json::json!({ "path": tmp.path().join("test.png").to_string_lossy() }),
9200                    None,
9201                )
9202                .await
9203                .unwrap();
9204
9205            // Should return an image content block
9206            let has_image = out
9207                .content
9208                .iter()
9209                .any(|b| matches!(b, ContentBlock::Image(_)));
9210            assert!(has_image, "expected image content block for PNG file");
9211        });
9212    }
9213
9214    #[cfg(feature = "image-resize")]
9215    #[test]
9216    fn test_read_resizes_large_source_image_before_api_limit_check() {
9217        asupersync::test_utils::run_test(|| async {
9218            use image::codecs::png::PngEncoder;
9219            use image::{ExtendedColorType, ImageEncoder, Rgb, RgbImage};
9220
9221            let tmp = tempfile::tempdir().unwrap();
9222            let image = RgbImage::from_fn(2600, 2600, |x, y| {
9223                let seed = x.wrapping_mul(1_973)
9224                    ^ y.wrapping_mul(9_277)
9225                    ^ x.rotate_left(7)
9226                    ^ y.rotate_left(13);
9227                Rgb([
9228                    u8::try_from(seed % 256).unwrap_or(0),
9229                    u8::try_from((seed >> 8) % 256).unwrap_or(0),
9230                    u8::try_from((seed >> 16) % 256).unwrap_or(0),
9231                ])
9232            });
9233
9234            let mut png_bytes = Vec::new();
9235            PngEncoder::new(&mut png_bytes)
9236                .write_image(
9237                    image.as_raw(),
9238                    image.width(),
9239                    image.height(),
9240                    ExtendedColorType::Rgb8,
9241                )
9242                .unwrap();
9243
9244            assert!(
9245                png_bytes.len() > IMAGE_MAX_BYTES,
9246                "fixture must exceed API image limit to exercise resize path"
9247            );
9248            assert!(
9249                png_bytes.len() < usize::try_from(READ_TOOL_MAX_BYTES).unwrap_or(usize::MAX),
9250                "fixture must stay within read-tool input bound"
9251            );
9252
9253            let image_path = tmp.path().join("large.png");
9254            std::fs::write(&image_path, &png_bytes).unwrap();
9255
9256            let tool = ReadTool::new(tmp.path());
9257            let out = tool
9258                .execute(
9259                    "t",
9260                    serde_json::json!({ "path": image_path.to_string_lossy() }),
9261                    None,
9262                )
9263                .await
9264                .unwrap();
9265
9266            assert!(!out.is_error, "resizable large images should succeed");
9267            assert!(
9268                out.content
9269                    .iter()
9270                    .any(|block| matches!(block, ContentBlock::Image(_))),
9271                "expected an image attachment after resizing"
9272            );
9273
9274            let text = get_text(&out.content);
9275            assert!(text.contains("Read image file"));
9276            assert!(
9277                text.contains("displayed at"),
9278                "expected resize note in read output, got: {text}"
9279            );
9280        });
9281    }
9282
9283    #[test]
9284    fn test_read_blocked_images() {
9285        asupersync::test_utils::run_test(|| async {
9286            let tmp = tempfile::tempdir().unwrap();
9287            let png_header: Vec<u8> =
9288                vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00];
9289            std::fs::write(tmp.path().join("test.png"), &png_header).unwrap();
9290
9291            let tool = ReadTool::with_settings(tmp.path(), false, true);
9292            let err = tool
9293                .execute(
9294                    "t",
9295                    serde_json::json!({ "path": tmp.path().join("test.png").to_string_lossy() }),
9296                    None,
9297                )
9298                .await;
9299            assert!(err.is_err());
9300            assert!(err.unwrap_err().to_string().contains("blocked"));
9301        });
9302    }
9303
9304    #[test]
9305    fn test_read_truncation_at_max_lines() {
9306        asupersync::test_utils::run_test(|| async {
9307            let tmp = tempfile::tempdir().unwrap();
9308            let content: String = (0..DEFAULT_MAX_LINES + 500)
9309                .map(|i| format!("line {i}"))
9310                .collect::<Vec<_>>()
9311                .join("\n");
9312            std::fs::write(tmp.path().join("big.txt"), &content).unwrap();
9313
9314            let tool = ReadTool::new(tmp.path());
9315            let out = tool
9316                .execute(
9317                    "t",
9318                    serde_json::json!({ "path": tmp.path().join("big.txt").to_string_lossy() }),
9319                    None,
9320                )
9321                .await
9322                .unwrap();
9323            // Should have truncation details
9324            assert!(out.details.is_some(), "expected truncation details");
9325            let text = get_text(&out.content);
9326            assert!(text.contains("offset="));
9327        });
9328    }
9329
9330    #[test]
9331    fn test_read_first_line_exceeds_max_bytes() {
9332        asupersync::test_utils::run_test(|| async {
9333            let tmp = tempfile::tempdir().unwrap();
9334            let long_line = "a".repeat(DEFAULT_MAX_BYTES + 128);
9335            std::fs::write(tmp.path().join("too_long.txt"), long_line).unwrap();
9336
9337            let tool = ReadTool::new(tmp.path());
9338            let out = tool
9339                .execute(
9340                    "t",
9341                    serde_json::json!({ "path": tmp.path().join("too_long.txt").to_string_lossy() }),
9342                    None,
9343                )
9344                .await
9345                .unwrap();
9346
9347            let text = get_text(&out.content);
9348            let expected_limit = format!("exceeds {} limit", format_size(DEFAULT_MAX_BYTES));
9349            assert!(
9350                text.contains(&expected_limit),
9351                "expected limit hint '{expected_limit}', got: {text}"
9352            );
9353            let details = out.details.expect("expected truncation details");
9354            assert_eq!(
9355                details
9356                    .get("truncation")
9357                    .and_then(|v| v.get("firstLineExceedsLimit"))
9358                    .and_then(serde_json::Value::as_bool),
9359                Some(true)
9360            );
9361        });
9362    }
9363
9364    #[test]
9365    fn test_read_unicode_content() {
9366        asupersync::test_utils::run_test(|| async {
9367            let tmp = tempfile::tempdir().unwrap();
9368            std::fs::write(tmp.path().join("uni.txt"), "Hello 你好 🌍\nLine 2 café").unwrap();
9369
9370            let tool = ReadTool::new(tmp.path());
9371            let out = tool
9372                .execute(
9373                    "t",
9374                    serde_json::json!({ "path": tmp.path().join("uni.txt").to_string_lossy() }),
9375                    None,
9376                )
9377                .await
9378                .unwrap();
9379            let text = get_text(&out.content);
9380            assert!(text.contains("你好"));
9381            assert!(text.contains("🌍"));
9382            assert!(text.contains("café"));
9383        });
9384    }
9385
9386    // ========================================================================
9387    // Write Tool Tests
9388    // ========================================================================
9389
9390    #[test]
9391    fn test_write_new_file() {
9392        asupersync::test_utils::run_test(|| async {
9393            let tmp = tempfile::tempdir().unwrap();
9394            let tool = WriteTool::new(tmp.path());
9395            let out = tool
9396                .execute(
9397                    "t",
9398                    serde_json::json!({
9399                        "path": tmp.path().join("new.txt").to_string_lossy(),
9400                        "content": "hello world"
9401                    }),
9402                    None,
9403                )
9404                .await
9405                .unwrap();
9406            assert!(!out.is_error);
9407            let contents = std::fs::read_to_string(tmp.path().join("new.txt")).unwrap();
9408            assert_eq!(contents, "hello world");
9409        });
9410    }
9411
9412    #[test]
9413    fn test_write_overwrite_existing() {
9414        asupersync::test_utils::run_test(|| async {
9415            let tmp = tempfile::tempdir().unwrap();
9416            std::fs::write(tmp.path().join("exist.txt"), "old content").unwrap();
9417
9418            let tool = WriteTool::new(tmp.path());
9419            let out = tool
9420                .execute(
9421                    "t",
9422                    serde_json::json!({
9423                        "path": tmp.path().join("exist.txt").to_string_lossy(),
9424                        "content": "new content"
9425                    }),
9426                    None,
9427                )
9428                .await
9429                .unwrap();
9430            assert!(!out.is_error);
9431            let contents = std::fs::read_to_string(tmp.path().join("exist.txt")).unwrap();
9432            assert_eq!(contents, "new content");
9433        });
9434    }
9435
9436    #[test]
9437    fn test_write_creates_parent_dirs() {
9438        asupersync::test_utils::run_test(|| async {
9439            let tmp = tempfile::tempdir().unwrap();
9440            let tool = WriteTool::new(tmp.path());
9441            let deep_path = tmp.path().join("a/b/c/deep.txt");
9442            let out = tool
9443                .execute(
9444                    "t",
9445                    serde_json::json!({
9446                        "path": deep_path.to_string_lossy(),
9447                        "content": "deep file"
9448                    }),
9449                    None,
9450                )
9451                .await
9452                .unwrap();
9453            assert!(!out.is_error);
9454            assert!(deep_path.exists());
9455            assert_eq!(std::fs::read_to_string(&deep_path).unwrap(), "deep file");
9456        });
9457    }
9458
9459    #[test]
9460    fn test_write_empty_file() {
9461        asupersync::test_utils::run_test(|| async {
9462            let tmp = tempfile::tempdir().unwrap();
9463            let tool = WriteTool::new(tmp.path());
9464            let out = tool
9465                .execute(
9466                    "t",
9467                    serde_json::json!({
9468                        "path": tmp.path().join("empty.txt").to_string_lossy(),
9469                        "content": ""
9470                    }),
9471                    None,
9472                )
9473                .await
9474                .unwrap();
9475            assert!(!out.is_error);
9476            let contents = std::fs::read_to_string(tmp.path().join("empty.txt")).unwrap();
9477            assert_eq!(contents, "");
9478            let text = get_text(&out.content);
9479            assert!(text.contains("Successfully wrote 0 bytes"));
9480        });
9481    }
9482
9483    #[test]
9484    fn test_write_rejects_outside_cwd() {
9485        asupersync::test_utils::run_test(|| async {
9486            let cwd = tempfile::tempdir().unwrap();
9487            let outside = tempfile::tempdir().unwrap();
9488            let tool = WriteTool::new(cwd.path());
9489            let err = tool
9490                .execute(
9491                    "t",
9492                    serde_json::json!({
9493                        "path": outside.path().join("escape.txt").to_string_lossy(),
9494                        "content": "nope"
9495                    }),
9496                    None,
9497                )
9498                .await
9499                .unwrap_err();
9500            assert!(err.to_string().contains("outside the working directory"));
9501
9502            let err = tool
9503                .execute(
9504                    "t",
9505                    serde_json::json!({
9506                        "path": "../escape.txt",
9507                        "content": "nope"
9508                    }),
9509                    None,
9510                )
9511                .await
9512                .unwrap_err();
9513            assert!(err.to_string().contains("outside the working directory"));
9514        });
9515    }
9516
9517    #[test]
9518    fn test_write_unicode_content() {
9519        asupersync::test_utils::run_test(|| async {
9520            let tmp = tempfile::tempdir().unwrap();
9521            let tool = WriteTool::new(tmp.path());
9522            let out = tool
9523                .execute(
9524                    "t",
9525                    serde_json::json!({
9526                        "path": tmp.path().join("unicode.txt").to_string_lossy(),
9527                        "content": "日本語 🎉 Ñoño"
9528                    }),
9529                    None,
9530                )
9531                .await
9532                .unwrap();
9533            assert!(!out.is_error);
9534            let contents = std::fs::read_to_string(tmp.path().join("unicode.txt")).unwrap();
9535            assert_eq!(contents, "日本語 🎉 Ñoño");
9536        });
9537    }
9538
9539    #[test]
9540    #[cfg(unix)]
9541    fn test_write_file_permissions_unix() {
9542        use std::os::unix::fs::PermissionsExt;
9543        asupersync::test_utils::run_test(|| async {
9544            let tmp = tempfile::tempdir().unwrap();
9545            let tool = WriteTool::new(tmp.path());
9546            let path = tmp.path().join("perms.txt");
9547            let out = tool
9548                .execute(
9549                    "t",
9550                    serde_json::json!({
9551                        "path": path.to_string_lossy(),
9552                        "content": "check perms"
9553                    }),
9554                    None,
9555                )
9556                .await
9557                .unwrap();
9558            assert!(!out.is_error);
9559
9560            let meta = std::fs::metadata(&path).unwrap();
9561            let mode = meta.permissions().mode();
9562            assert_eq!(
9563                mode & 0o777,
9564                0o644,
9565                "Expected default 0o644 permissions for new files"
9566            );
9567        });
9568    }
9569
9570    // ========================================================================
9571    // Edit Tool Tests
9572    // ========================================================================
9573
9574    #[test]
9575    fn test_edit_exact_match_replace() {
9576        asupersync::test_utils::run_test(|| async {
9577            let tmp = tempfile::tempdir().unwrap();
9578            std::fs::write(tmp.path().join("code.rs"), "fn foo() { bar() }").unwrap();
9579
9580            let tool = EditTool::new(tmp.path());
9581            let out = tool
9582                .execute(
9583                    "t",
9584                    serde_json::json!({
9585                        "path": tmp.path().join("code.rs").to_string_lossy(),
9586                        "oldText": "bar()",
9587                        "newText": "baz()"
9588                    }),
9589                    None,
9590                )
9591                .await
9592                .unwrap();
9593            assert!(!out.is_error);
9594            let contents = std::fs::read_to_string(tmp.path().join("code.rs")).unwrap();
9595            assert_eq!(contents, "fn foo() { baz() }");
9596        });
9597    }
9598
9599    #[test]
9600    fn test_edit_no_match_error() {
9601        asupersync::test_utils::run_test(|| async {
9602            let tmp = tempfile::tempdir().unwrap();
9603            std::fs::write(tmp.path().join("code.rs"), "fn foo() {}").unwrap();
9604
9605            let tool = EditTool::new(tmp.path());
9606            let err = tool
9607                .execute(
9608                    "t",
9609                    serde_json::json!({
9610                        "path": tmp.path().join("code.rs").to_string_lossy(),
9611                        "oldText": "NONEXISTENT TEXT",
9612                        "newText": "replacement"
9613                    }),
9614                    None,
9615                )
9616                .await;
9617            assert!(err.is_err());
9618        });
9619    }
9620
9621    #[test]
9622    fn test_edit_empty_old_text_error() {
9623        asupersync::test_utils::run_test(|| async {
9624            let tmp = tempfile::tempdir().unwrap();
9625            let path = tmp.path().join("code.rs");
9626            std::fs::write(&path, "fn foo() {}").unwrap();
9627
9628            let tool = EditTool::new(tmp.path());
9629            let err = tool
9630                .execute(
9631                    "t",
9632                    serde_json::json!({
9633                        "path": path.to_string_lossy(),
9634                        "oldText": "",
9635                        "newText": "prefix"
9636                    }),
9637                    None,
9638                )
9639                .await
9640                .expect_err("empty oldText should be rejected");
9641
9642            let msg = err.to_string();
9643            assert!(
9644                msg.contains("old text cannot be empty"),
9645                "unexpected error: {msg}"
9646            );
9647            let after = std::fs::read_to_string(path).unwrap();
9648            assert_eq!(after, "fn foo() {}");
9649        });
9650    }
9651
9652    #[test]
9653    fn test_edit_ambiguous_match_error() {
9654        asupersync::test_utils::run_test(|| async {
9655            let tmp = tempfile::tempdir().unwrap();
9656            std::fs::write(tmp.path().join("dup.txt"), "hello hello hello").unwrap();
9657
9658            let tool = EditTool::new(tmp.path());
9659            let err = tool
9660                .execute(
9661                    "t",
9662                    serde_json::json!({
9663                        "path": tmp.path().join("dup.txt").to_string_lossy(),
9664                        "oldText": "hello",
9665                        "newText": "world"
9666                    }),
9667                    None,
9668                )
9669                .await;
9670            assert!(err.is_err(), "expected error for ambiguous match");
9671        });
9672    }
9673
9674    #[test]
9675    fn test_edit_multi_line_replacement() {
9676        asupersync::test_utils::run_test(|| async {
9677            let tmp = tempfile::tempdir().unwrap();
9678            std::fs::write(
9679                tmp.path().join("multi.txt"),
9680                "line 1\nline 2\nline 3\nline 4",
9681            )
9682            .unwrap();
9683
9684            let tool = EditTool::new(tmp.path());
9685            let out = tool
9686                .execute(
9687                    "t",
9688                    serde_json::json!({
9689                        "path": tmp.path().join("multi.txt").to_string_lossy(),
9690                        "oldText": "line 2\nline 3",
9691                        "newText": "replaced 2\nreplaced 3\nextra line"
9692                    }),
9693                    None,
9694                )
9695                .await
9696                .unwrap();
9697            assert!(!out.is_error);
9698            let contents = std::fs::read_to_string(tmp.path().join("multi.txt")).unwrap();
9699            assert_eq!(
9700                contents,
9701                "line 1\nreplaced 2\nreplaced 3\nextra line\nline 4"
9702            );
9703        });
9704    }
9705
9706    #[test]
9707    fn test_edit_unicode_content() {
9708        asupersync::test_utils::run_test(|| async {
9709            let tmp = tempfile::tempdir().unwrap();
9710            std::fs::write(tmp.path().join("uni.txt"), "Héllo wörld 🌍").unwrap();
9711
9712            let tool = EditTool::new(tmp.path());
9713            let out = tool
9714                .execute(
9715                    "t",
9716                    serde_json::json!({
9717                        "path": tmp.path().join("uni.txt").to_string_lossy(),
9718                        "oldText": "wörld 🌍",
9719                        "newText": "Welt 🌎"
9720                    }),
9721                    None,
9722                )
9723                .await
9724                .unwrap();
9725            assert!(!out.is_error);
9726            let contents = std::fs::read_to_string(tmp.path().join("uni.txt")).unwrap();
9727            assert_eq!(contents, "Héllo Welt 🌎");
9728        });
9729    }
9730
9731    #[test]
9732    fn test_edit_missing_file() {
9733        asupersync::test_utils::run_test(|| async {
9734            let tmp = tempfile::tempdir().unwrap();
9735            let tool = EditTool::new(tmp.path());
9736            let err = tool
9737                .execute(
9738                    "t",
9739                    serde_json::json!({
9740                        "path": tmp.path().join("nope.txt").to_string_lossy(),
9741                        "oldText": "foo",
9742                        "newText": "bar"
9743                    }),
9744                    None,
9745                )
9746                .await;
9747            assert!(err.is_err());
9748        });
9749    }
9750
9751    // ========================================================================
9752    // Bash Tool Tests
9753    // ========================================================================
9754
9755    struct FailingReader {
9756        responses: std::collections::VecDeque<std::io::Result<Vec<u8>>>,
9757    }
9758
9759    impl FailingReader {
9760        fn new(responses: impl IntoIterator<Item = std::io::Result<Vec<u8>>>) -> Self {
9761            Self {
9762                responses: responses.into_iter().collect(),
9763            }
9764        }
9765    }
9766
9767    impl Read for FailingReader {
9768        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
9769            match self.responses.pop_front().unwrap_or_else(|| Ok(Vec::new())) {
9770                Ok(bytes) => {
9771                    assert!(
9772                        bytes.len() <= buf.len(),
9773                        "test reader only supports single-chunk reads"
9774                    );
9775                    buf[..bytes.len()].copy_from_slice(&bytes);
9776                    Ok(bytes.len())
9777                }
9778                Err(err) => Err(err),
9779            }
9780        }
9781    }
9782
9783    #[test]
9784    fn test_bash_simple_command() {
9785        asupersync::test_utils::run_test(|| async {
9786            let tmp = tempfile::tempdir().unwrap();
9787            let tool = BashTool::new(tmp.path());
9788            let out = tool
9789                .execute(
9790                    "t",
9791                    serde_json::json!({ "command": "echo hello_from_bash" }),
9792                    None,
9793                )
9794                .await
9795                .unwrap();
9796            let text = get_text(&out.content);
9797            assert!(text.contains("hello_from_bash"));
9798            assert!(!out.is_error);
9799        });
9800    }
9801
9802    #[test]
9803    fn test_bash_exit_code_nonzero() {
9804        asupersync::test_utils::run_test(|| async {
9805            let tmp = tempfile::tempdir().unwrap();
9806            let tool = BashTool::new(tmp.path());
9807            let out = tool
9808                .execute("t", serde_json::json!({ "command": "exit 42" }), None)
9809                .await
9810                .expect("non-zero exit should return Ok with is_error=true");
9811            assert!(out.is_error, "non-zero exit must set is_error");
9812            let msg = get_text(&out.content);
9813            assert!(
9814                msg.contains("42"),
9815                "expected exit code 42 in output, got: {msg}"
9816            );
9817        });
9818    }
9819
9820    #[cfg(unix)]
9821    #[test]
9822    fn test_bash_signal_termination_is_error() {
9823        asupersync::test_utils::run_test(|| async {
9824            let tmp = tempfile::tempdir().unwrap();
9825            let tool = BashTool::new(tmp.path());
9826            let out = tool
9827                .execute("t", serde_json::json!({ "command": "kill -KILL $$" }), None)
9828                .await
9829                .expect("signal-terminated shell should return Ok with is_error=true");
9830            assert!(
9831                out.is_error,
9832                "signal-terminated shell must be reported as error"
9833            );
9834            let msg = get_text(&out.content);
9835            assert!(
9836                msg.contains("Command exited with code"),
9837                "expected explicit exit-code report, got: {msg}"
9838            );
9839            assert!(
9840                !msg.contains("Command exited with code 0"),
9841                "signal-terminated shell must not appear successful: {msg}"
9842            );
9843        });
9844    }
9845
9846    #[test]
9847    fn test_bash_stderr_capture() {
9848        asupersync::test_utils::run_test(|| async {
9849            let tmp = tempfile::tempdir().unwrap();
9850            let tool = BashTool::new(tmp.path());
9851            let out = tool
9852                .execute(
9853                    "t",
9854                    serde_json::json!({ "command": "echo stderr_msg >&2" }),
9855                    None,
9856                )
9857                .await
9858                .unwrap();
9859            let text = get_text(&out.content);
9860            assert!(
9861                text.contains("stderr_msg"),
9862                "expected stderr output in result, got: {text}"
9863            );
9864        });
9865    }
9866
9867    #[test]
9868    fn test_bash_timeout() {
9869        asupersync::test_utils::run_test(|| async {
9870            let tmp = tempfile::tempdir().unwrap();
9871            let tool = BashTool::new(tmp.path());
9872            let out = tool
9873                .execute(
9874                    "t",
9875                    serde_json::json!({ "command": "sleep 60", "timeout": 2 }),
9876                    None,
9877                )
9878                .await
9879                .expect("timeout should return Ok with is_error=true");
9880            assert!(out.is_error, "timeout must set is_error");
9881            let msg = get_text(&out.content);
9882            assert!(
9883                msg.to_lowercase().contains("timeout") || msg.to_lowercase().contains("timed out"),
9884                "expected timeout indication, got: {msg}"
9885            );
9886            let cancellation = out
9887                .details
9888                .as_ref()
9889                .and_then(|details| details.get("cancellation"))
9890                .expect("timeout should include structured cancellation details");
9891            assert_eq!(cancellation["schema"], BASH_CANCELLATION_SCHEMA_V1);
9892            assert_eq!(cancellation["status"], "cancelled");
9893            assert_eq!(cancellation["reason"], "timeout");
9894            assert_eq!(cancellation["cleanup"], "process_group_tree_terminated");
9895            assert_eq!(cancellation["timeoutMs"], 2000);
9896        });
9897    }
9898
9899    #[cfg(target_os = "linux")]
9900    #[test]
9901    fn test_bash_timeout_kills_process_tree() {
9902        asupersync::test_utils::run_test(|| async {
9903            let tmp = tempfile::tempdir().unwrap();
9904            let marker = tmp.path().join("leaked_child.txt");
9905            let tool = BashTool::new(tmp.path());
9906
9907            let out = tool
9908                .execute(
9909                    "t",
9910                    serde_json::json!({
9911                        "command": "(sleep 3; echo leaked > leaked_child.txt) & sleep 10",
9912                        "timeout": 1
9913                    }),
9914                    None,
9915                )
9916                .await
9917                .expect("timeout should return Ok with is_error=true");
9918
9919            assert!(out.is_error, "timeout must set is_error");
9920            let msg = get_text(&out.content);
9921            assert!(msg.contains("Command timed out"));
9922
9923            // If process tree cleanup fails, this file appears after ~3 seconds.
9924            std::thread::sleep(Duration::from_secs(4));
9925            assert!(
9926                !marker.exists(),
9927                "background child was not terminated on timeout"
9928            );
9929        });
9930    }
9931
9932    #[cfg(target_os = "linux")]
9933    #[test]
9934    fn test_bash_cancelled_context_kills_process_tree() {
9935        asupersync::test_utils::run_test(|| async {
9936            let tmp = tempfile::tempdir().unwrap();
9937            let marker = tmp.path().join("leaked_child.txt");
9938
9939            let ambient_cx = asupersync::Cx::for_testing();
9940            let cancel_cx = ambient_cx.clone();
9941            let _current = asupersync::Cx::set_current(Some(ambient_cx));
9942
9943            let cancel_thread = std::thread::spawn(move || {
9944                std::thread::sleep(Duration::from_millis(100));
9945                cancel_cx.set_cancel_requested(true);
9946            });
9947
9948            let result = run_bash_command(
9949                tmp.path(),
9950                None,
9951                None,
9952                "(sleep 3; echo leaked > leaked_child.txt) & sleep 10",
9953                Some(30),
9954                None,
9955            )
9956            .await
9957            .expect("cancelled bash should return a result");
9958
9959            cancel_thread.join().expect("cancel thread");
9960
9961            assert!(
9962                result.cancelled,
9963                "expected cancelled bash result: {result:?}"
9964            );
9965            assert_eq!(
9966                result.cancellation_reason,
9967                Some(BashCancellationReason::AmbientCancellation)
9968            );
9969
9970            std::thread::sleep(Duration::from_secs(4));
9971            assert!(
9972                !marker.exists(),
9973                "background child was not terminated on cancellation"
9974            );
9975        });
9976    }
9977
9978    #[test]
9979    fn test_bash_pump_stream_emits_io_error_frame_after_partial_output() {
9980        let reader = FailingReader::new([
9981            Ok(b"partial stdout".to_vec()),
9982            Err(std::io::Error::other("simulated stdout failure")),
9983        ]);
9984        let (tx, rx) = mpsc::sync_channel::<BashPipeFrame>(4);
9985
9986        pump_stream(reader, "stdout", &tx);
9987
9988        match rx.recv().expect("partial chunk") {
9989            BashPipeFrame::Chunk(chunk) => assert_eq!(chunk, b"partial stdout"),
9990            BashPipeFrame::Error(message) => {
9991                unreachable!("expected output chunk before error, got error frame: {message}")
9992            }
9993        }
9994
9995        match rx.recv().expect("io error frame") {
9996            BashPipeFrame::Chunk(chunk) => {
9997                unreachable!("expected io error after partial chunk, got chunk: {chunk:?}")
9998            }
9999            BashPipeFrame::Error(message) => {
10000                assert!(message.contains("Failed to read bash stdout"));
10001                assert!(message.contains("simulated stdout failure"));
10002            }
10003        }
10004
10005        assert!(matches!(rx.try_recv(), Err(mpsc::TryRecvError::Empty)));
10006    }
10007
10008    #[test]
10009    fn test_drain_bash_output_ignores_cancellation_after_process_exit() {
10010        asupersync::test_utils::run_test(|| async {
10011            let (tx, mut rx) = mpsc::sync_channel::<BashPipeFrame>(1);
10012            let mut bash_output = BashOutputState::new(DEFAULT_MAX_BYTES);
10013
10014            let ambient_cx = asupersync::Cx::for_testing();
10015            ambient_cx.set_cancel_requested(true);
10016            let _current = asupersync::Cx::set_current(Some(ambient_cx));
10017            let cx = AgentCx::for_current_or_request();
10018            let now = cx
10019                .cx()
10020                .timer_driver()
10021                .map_or_else(wall_now, |timer| timer.now());
10022
10023            let cancelled = drain_bash_output(
10024                &mut rx,
10025                &mut bash_output,
10026                &cx,
10027                now + std::time::Duration::from_millis(10),
10028                std::time::Duration::from_millis(1),
10029                false,
10030            )
10031            .await
10032            .expect("drain should complete without cancellation");
10033
10034            drop(tx);
10035
10036            assert!(
10037                !cancelled,
10038                "post-exit drain should ignore late ambient cancellation"
10039            );
10040            assert_eq!(bash_output.total_bytes, 0);
10041        });
10042    }
10043
10044    #[test]
10045    fn test_drain_bash_output_returns_pipe_read_error() {
10046        asupersync::test_utils::run_test(|| async {
10047            let (tx, mut rx) = mpsc::sync_channel::<BashPipeFrame>(2);
10048            tx.send(BashPipeFrame::Chunk(b"partial stderr".to_vec()))
10049                .expect("queue partial output");
10050            tx.send(BashPipeFrame::Error(
10051                "Failed to read bash stderr: simulated stderr failure".to_string(),
10052            ))
10053            .expect("queue error frame");
10054            drop(tx);
10055
10056            let mut bash_output = BashOutputState::new(DEFAULT_MAX_BYTES);
10057            let cx = AgentCx::for_current_or_request();
10058            let now = cx
10059                .cx()
10060                .timer_driver()
10061                .map_or_else(wall_now, |timer| timer.now());
10062
10063            let err = drain_bash_output(
10064                &mut rx,
10065                &mut bash_output,
10066                &cx,
10067                now + std::time::Duration::from_millis(10),
10068                std::time::Duration::from_millis(1),
10069                false,
10070            )
10071            .await
10072            .expect_err("pipe read failures must surface as errors");
10073
10074            let message = err.to_string();
10075            assert!(message.contains("Failed to read bash stderr"));
10076            assert!(message.contains("simulated stderr failure"));
10077            assert!(message.contains("Partial output before failure"));
10078            assert!(message.contains("partial stderr"));
10079            assert_eq!(bash_output.total_bytes, "partial stderr".len());
10080        });
10081    }
10082
10083    #[test]
10084    fn test_drain_bash_output_honors_cancellation_while_process_still_active() {
10085        asupersync::test_utils::run_test(|| async {
10086            let (_tx, mut rx) = mpsc::sync_channel::<BashPipeFrame>(1);
10087            let mut bash_output = BashOutputState::new(DEFAULT_MAX_BYTES);
10088
10089            let ambient_cx = asupersync::Cx::for_testing();
10090            ambient_cx.set_cancel_requested(true);
10091            let _current = asupersync::Cx::set_current(Some(ambient_cx));
10092            let cx = AgentCx::for_current_or_request();
10093            let now = cx
10094                .cx()
10095                .timer_driver()
10096                .map_or_else(wall_now, |timer| timer.now());
10097
10098            let cancelled = drain_bash_output(
10099                &mut rx,
10100                &mut bash_output,
10101                &cx,
10102                now + std::time::Duration::from_secs(1),
10103                std::time::Duration::from_millis(1),
10104                true,
10105            )
10106            .await
10107            .expect("drain should complete under cancellation");
10108
10109            assert!(
10110                cancelled,
10111                "active drain should still honor ambient cancellation"
10112            );
10113            assert_eq!(bash_output.total_bytes, 0);
10114        });
10115    }
10116
10117    #[test]
10118    fn test_bash_output_state_abandon_spill_file_clears_path_and_unlinks_file() {
10119        let tmp = tempfile::tempdir().unwrap();
10120        let spill_path = tmp.path().join("partial-bash.log");
10121        std::fs::write(&spill_path, b"partial output").unwrap();
10122
10123        let mut bash_output = BashOutputState::new(DEFAULT_MAX_BYTES);
10124        bash_output.temp_file_path = Some(spill_path.clone());
10125
10126        bash_output.abandon_spill_file();
10127
10128        assert!(bash_output.spill_failed);
10129        assert!(bash_output.temp_file.is_none());
10130        assert!(bash_output.temp_file_path.is_none());
10131        assert!(
10132            !spill_path.exists(),
10133            "abandoned spill files should not be advertised or left behind"
10134        );
10135    }
10136
10137    #[test]
10138    fn test_bash_hard_limit_retains_partial_spill_file() {
10139        asupersync::test_utils::run_test(|| async {
10140            let tmp = tempfile::tempdir().unwrap();
10141            let spill_path = tmp.path().join("hard-limit-bash.log");
10142            std::fs::write(&spill_path, b"partial output").unwrap();
10143
10144            let spill_file = asupersync::fs::OpenOptions::new()
10145                .append(true)
10146                .open(&spill_path)
10147                .await
10148                .unwrap();
10149
10150            let mut bash_output = BashOutputState::new(DEFAULT_MAX_BYTES);
10151            bash_output.total_bytes = BASH_FILE_LIMIT_BYTES;
10152            bash_output.temp_file_path = Some(spill_path.clone());
10153            bash_output.temp_file = Some(spill_file);
10154
10155            ingest_bash_chunk(vec![b'x'], &mut bash_output)
10156                .await
10157                .expect("hard-limit ingestion should still succeed");
10158
10159            assert!(!bash_output.spill_failed);
10160            assert!(bash_output.temp_file.is_none());
10161            assert!(bash_output.temp_file_path.is_some());
10162            assert!(
10163                spill_path.exists(),
10164                "partial spill files must be retained once the hard limit is reached for diagnostics"
10165            );
10166        });
10167    }
10168
10169    #[test]
10170    #[cfg(unix)]
10171    fn test_bash_working_directory() {
10172        asupersync::test_utils::run_test(|| async {
10173            let tmp = tempfile::tempdir().unwrap();
10174            let tool = BashTool::new(tmp.path());
10175            let out = tool
10176                .execute("t", serde_json::json!({ "command": "pwd" }), None)
10177                .await
10178                .unwrap();
10179            let text = get_text(&out.content);
10180            let canonical = tmp.path().canonicalize().unwrap();
10181            assert!(
10182                text.contains(&canonical.to_string_lossy().to_string()),
10183                "expected cwd in output, got: {text}"
10184            );
10185        });
10186    }
10187
10188    #[test]
10189    fn test_bash_multiline_output() {
10190        asupersync::test_utils::run_test(|| async {
10191            let tmp = tempfile::tempdir().unwrap();
10192            let tool = BashTool::new(tmp.path());
10193            let out = tool
10194                .execute(
10195                    "t",
10196                    serde_json::json!({ "command": "echo line1; echo line2; echo line3" }),
10197                    None,
10198                )
10199                .await
10200                .unwrap();
10201            let text = get_text(&out.content);
10202            assert!(text.contains("line1"));
10203            assert!(text.contains("line2"));
10204            assert!(text.contains("line3"));
10205        });
10206    }
10207
10208    // ========================================================================
10209    // Grep Tool Tests
10210    // ========================================================================
10211
10212    #[test]
10213    fn test_grep_basic_pattern() {
10214        asupersync::test_utils::run_test(|| async {
10215            let tmp = tempfile::tempdir().unwrap();
10216            std::fs::write(
10217                tmp.path().join("search.txt"),
10218                "apple\nbanana\napricot\ncherry",
10219            )
10220            .unwrap();
10221
10222            let tool = GrepTool::new(tmp.path());
10223            let out = tool
10224                .execute(
10225                    "t",
10226                    serde_json::json!({
10227                        "pattern": "ap",
10228                        "path": tmp.path().join("search.txt").to_string_lossy()
10229                    }),
10230                    None,
10231                )
10232                .await
10233                .unwrap();
10234            let text = get_text(&out.content);
10235            assert!(text.contains("apple"));
10236            assert!(text.contains("apricot"));
10237            assert!(!text.contains("banana"));
10238            assert!(!text.contains("cherry"));
10239        });
10240    }
10241
10242    #[test]
10243    fn test_grep_rejects_outside_cwd() {
10244        asupersync::test_utils::run_test(|| async {
10245            let cwd = tempfile::tempdir().unwrap();
10246            let outside = tempfile::tempdir().unwrap();
10247            std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
10248
10249            let tool = GrepTool::new(cwd.path());
10250            let err = tool
10251                .execute(
10252                    "t",
10253                    serde_json::json!({
10254                        "pattern": "secret",
10255                        "path": outside.path().join("secret.txt").to_string_lossy()
10256                    }),
10257                    None,
10258                )
10259                .await
10260                .unwrap_err();
10261            assert!(err.to_string().contains("outside the working directory"));
10262        });
10263    }
10264
10265    #[test]
10266    fn test_grep_rejects_zero_limit() {
10267        asupersync::test_utils::run_test(|| async {
10268            let tmp = tempfile::tempdir().unwrap();
10269            std::fs::write(tmp.path().join("search.txt"), "alpha\nbeta\n").unwrap();
10270
10271            let tool = GrepTool::new(tmp.path());
10272            let err = tool
10273                .execute(
10274                    "t",
10275                    serde_json::json!({
10276                        "pattern": "alpha",
10277                        "path": tmp.path().join("search.txt").to_string_lossy(),
10278                        "limit": 0
10279                    }),
10280                    None,
10281                )
10282                .await
10283                .unwrap_err();
10284            assert!(err.to_string().contains("`limit` must be greater than 0"));
10285        });
10286    }
10287
10288    #[test]
10289    #[cfg(unix)]
10290    fn test_grep_formats_paths_relative_to_symlinked_cwd() {
10291        asupersync::test_utils::run_test(|| async {
10292            let real = tempfile::tempdir().unwrap();
10293            let link_parent = tempfile::tempdir().unwrap();
10294            let link = link_parent.path().join("linked-cwd");
10295            std::os::unix::fs::symlink(real.path(), &link).unwrap();
10296            std::fs::write(real.path().join("needle.txt"), "needle\n").unwrap();
10297
10298            let tool = GrepTool::new(&link);
10299            let out = tool
10300                .execute("t", serde_json::json!({ "pattern": "needle" }), None)
10301                .await
10302                .unwrap();
10303
10304            let text = get_text(&out.content);
10305            assert!(
10306                text.contains("needle.txt:1: needle"),
10307                "grep output should use cwd-relative paths for symlinked cwd, got: {text}"
10308            );
10309            assert!(
10310                !text.contains(real.path().to_string_lossy().as_ref()),
10311                "grep output should not leak canonical temp root, got: {text}"
10312            );
10313        });
10314    }
10315
10316    #[test]
10317    fn test_grep_regex_pattern() {
10318        asupersync::test_utils::run_test(|| async {
10319            let tmp = tempfile::tempdir().unwrap();
10320            std::fs::write(
10321                tmp.path().join("regex.txt"),
10322                "foo123\nbar456\nbaz789\nfoo000",
10323            )
10324            .unwrap();
10325
10326            let tool = GrepTool::new(tmp.path());
10327            let out = tool
10328                .execute(
10329                    "t",
10330                    serde_json::json!({
10331                        "pattern": "foo\\d+",
10332                        "path": tmp.path().join("regex.txt").to_string_lossy()
10333                    }),
10334                    None,
10335                )
10336                .await
10337                .unwrap();
10338            let text = get_text(&out.content);
10339            assert!(text.contains("foo123"));
10340            assert!(text.contains("foo000"));
10341            assert!(!text.contains("bar456"));
10342        });
10343    }
10344
10345    #[test]
10346    fn test_grep_case_insensitive() {
10347        asupersync::test_utils::run_test(|| async {
10348            let tmp = tempfile::tempdir().unwrap();
10349            std::fs::write(tmp.path().join("case.txt"), "Hello\nhello\nHELLO").unwrap();
10350
10351            let tool = GrepTool::new(tmp.path());
10352            let out = tool
10353                .execute(
10354                    "t",
10355                    serde_json::json!({
10356                        "pattern": "hello",
10357                        "path": tmp.path().join("case.txt").to_string_lossy(),
10358                        "ignoreCase": true
10359                    }),
10360                    None,
10361                )
10362                .await
10363                .unwrap();
10364            let text = get_text(&out.content);
10365            assert!(text.contains("Hello"));
10366            assert!(text.contains("hello"));
10367            assert!(text.contains("HELLO"));
10368        });
10369    }
10370
10371    #[test]
10372    fn test_grep_case_sensitive_by_default() {
10373        asupersync::test_utils::run_test(|| async {
10374            let tmp = tempfile::tempdir().unwrap();
10375            std::fs::write(tmp.path().join("case_sensitive.txt"), "Hello\nHELLO").unwrap();
10376
10377            let tool = GrepTool::new(tmp.path());
10378            let out = tool
10379                .execute(
10380                    "t",
10381                    serde_json::json!({
10382                        "pattern": "hello",
10383                        "path": tmp.path().join("case_sensitive.txt").to_string_lossy()
10384                    }),
10385                    None,
10386                )
10387                .await
10388                .unwrap();
10389            let text = get_text(&out.content);
10390            assert!(
10391                text.contains("No matches found"),
10392                "expected case-sensitive search to find no matches, got: {text}"
10393            );
10394        });
10395    }
10396
10397    #[test]
10398    fn test_grep_append_non_matching_lines_invariant() {
10399        asupersync::test_utils::run_test(|| async {
10400            let tmp = tempfile::tempdir().unwrap();
10401            let file = tmp.path().join("base.txt");
10402            std::fs::write(&file, "needle one\nskip\nneedle two\n").unwrap();
10403
10404            let tool = GrepTool::new(tmp.path());
10405            let base_out = tool
10406                .execute(
10407                    "t",
10408                    serde_json::json!({
10409                        "pattern": "needle",
10410                        "path": file.to_string_lossy(),
10411                        "limit": 100
10412                    }),
10413                    None,
10414                )
10415                .await
10416                .unwrap();
10417            let base_text = get_text(&base_out.content);
10418
10419            std::fs::write(&file, "needle one\nskip\nneedle two\nalpha\nbeta\n").unwrap();
10420            let extended_out = tool
10421                .execute(
10422                    "t",
10423                    serde_json::json!({
10424                        "pattern": "needle",
10425                        "path": file.to_string_lossy(),
10426                        "limit": 100
10427                    }),
10428                    None,
10429                )
10430                .await
10431                .unwrap();
10432            let extended_text = get_text(&extended_out.content);
10433
10434            assert_eq!(
10435                base_text, extended_text,
10436                "adding non-matching lines should not alter grep output"
10437            );
10438        });
10439    }
10440
10441    #[test]
10442    fn test_grep_no_matches() {
10443        asupersync::test_utils::run_test(|| async {
10444            let tmp = tempfile::tempdir().unwrap();
10445            std::fs::write(tmp.path().join("nothing.txt"), "alpha\nbeta\ngamma").unwrap();
10446
10447            let tool = GrepTool::new(tmp.path());
10448            let out = tool
10449                .execute(
10450                    "t",
10451                    serde_json::json!({
10452                        "pattern": "ZZZZZ_NOMATCH",
10453                        "path": tmp.path().join("nothing.txt").to_string_lossy()
10454                    }),
10455                    None,
10456                )
10457                .await
10458                .unwrap();
10459            let text = get_text(&out.content);
10460            assert!(
10461                text.to_lowercase().contains("no match")
10462                    || text.is_empty()
10463                    || text.to_lowercase().contains("no results"),
10464                "expected no-match indication, got: {text}"
10465            );
10466        });
10467    }
10468
10469    #[test]
10470    fn test_grep_context_lines() {
10471        asupersync::test_utils::run_test(|| async {
10472            let tmp = tempfile::tempdir().unwrap();
10473            std::fs::write(
10474                tmp.path().join("ctx.txt"),
10475                "aaa\nbbb\nccc\ntarget\nddd\neee\nfff",
10476            )
10477            .unwrap();
10478
10479            let tool = GrepTool::new(tmp.path());
10480            let out = tool
10481                .execute(
10482                    "t",
10483                    serde_json::json!({
10484                        "pattern": "target",
10485                        "path": tmp.path().join("ctx.txt").to_string_lossy(),
10486                        "context": 1
10487                    }),
10488                    None,
10489                )
10490                .await
10491                .unwrap();
10492            let text = get_text(&out.content);
10493            assert!(text.contains("target"));
10494            assert!(text.contains("ccc"), "expected context line before match");
10495            assert!(text.contains("ddd"), "expected context line after match");
10496        });
10497    }
10498
10499    #[test]
10500    fn test_grep_limit() {
10501        asupersync::test_utils::run_test(|| async {
10502            let tmp = tempfile::tempdir().unwrap();
10503            let content: String = (0..200)
10504                .map(|i| format!("match_line_{i}"))
10505                .collect::<Vec<_>>()
10506                .join("\n");
10507            std::fs::write(tmp.path().join("many.txt"), &content).unwrap();
10508
10509            let tool = GrepTool::new(tmp.path());
10510            let out = tool
10511                .execute(
10512                    "t",
10513                    serde_json::json!({
10514                        "pattern": "match_line",
10515                        "path": tmp.path().join("many.txt").to_string_lossy(),
10516                        "limit": 5
10517                    }),
10518                    None,
10519                )
10520                .await
10521                .unwrap();
10522            let text = get_text(&out.content);
10523            // With limit=5, we should see at most 5 matches
10524            let match_count = text.matches("match_line_").count();
10525            assert!(
10526                match_count <= 5,
10527                "expected at most 5 matches with limit=5, got {match_count}"
10528            );
10529            let details = out.details.expect("expected limit details");
10530            assert_eq!(
10531                details
10532                    .get("matchLimitReached")
10533                    .and_then(serde_json::Value::as_u64),
10534                Some(5)
10535            );
10536        });
10537    }
10538
10539    #[test]
10540    fn test_grep_exact_limit_does_not_report_limit_reached() {
10541        asupersync::test_utils::run_test(|| async {
10542            let tmp = tempfile::tempdir().unwrap();
10543            let content = (0..5)
10544                .map(|i| format!("match_line_{i}"))
10545                .collect::<Vec<_>>()
10546                .join("\n");
10547            std::fs::write(tmp.path().join("exact.txt"), &content).unwrap();
10548
10549            let tool = GrepTool::new(tmp.path());
10550            let out = tool
10551                .execute(
10552                    "t",
10553                    serde_json::json!({
10554                        "pattern": "match_line",
10555                        "path": tmp.path().join("exact.txt").to_string_lossy(),
10556                        "limit": 5
10557                    }),
10558                    None,
10559                )
10560                .await
10561                .unwrap();
10562
10563            let text = get_text(&out.content);
10564            assert_eq!(text.matches("match_line_").count(), 5);
10565            assert!(
10566                !text.contains("matches limit reached"),
10567                "exact-limit grep results should not claim truncation: {text}"
10568            );
10569            assert!(
10570                out.details
10571                    .as_ref()
10572                    .and_then(|details| details.get("matchLimitReached"))
10573                    .is_none(),
10574                "exact-limit grep results should not set matchLimitReached"
10575            );
10576        });
10577    }
10578
10579    #[test]
10580    fn test_grep_large_output_does_not_deadlock_reader_threads() {
10581        asupersync::test_utils::run_test(|| async {
10582            use std::fmt::Write as _;
10583
10584            let tmp = tempfile::tempdir().unwrap();
10585            let mut content = String::with_capacity(80_000);
10586            for i in 0..5000 {
10587                let _ = writeln!(&mut content, "needle_line_{i}");
10588            }
10589            let file = tmp.path().join("large_grep.txt");
10590            std::fs::write(&file, content).unwrap();
10591
10592            let tool = GrepTool::new(tmp.path());
10593            let run = tool.execute(
10594                "t",
10595                serde_json::json!({
10596                    "pattern": "needle_line_",
10597                    "path": file.to_string_lossy(),
10598                    "limit": 6000
10599                }),
10600                None,
10601            );
10602
10603            let out = asupersync::time::timeout(
10604                asupersync::time::wall_now(),
10605                Duration::from_secs(15),
10606                Box::pin(run),
10607            )
10608            .await
10609            .expect("grep timed out; possible stdout/stderr reader deadlock")
10610            .expect("grep should succeed");
10611
10612            let text = get_text(&out.content);
10613            assert!(text.contains("needle_line_0"));
10614        });
10615    }
10616
10617    #[test]
10618    fn test_grep_respects_gitignore() {
10619        asupersync::test_utils::run_test(|| async {
10620            let tmp = tempfile::tempdir().unwrap();
10621            std::fs::write(tmp.path().join(".gitignore"), "ignored.txt\n").unwrap();
10622            std::fs::write(tmp.path().join("ignored.txt"), "needle in ignored file").unwrap();
10623            std::fs::write(tmp.path().join("visible.txt"), "nothing here").unwrap();
10624
10625            let tool = GrepTool::new(tmp.path());
10626            let out = tool
10627                .execute("t", serde_json::json!({ "pattern": "needle" }), None)
10628                .await
10629                .unwrap();
10630
10631            let text = get_text(&out.content);
10632            assert!(
10633                text.contains("No matches found"),
10634                "expected ignored file to be excluded, got: {text}"
10635            );
10636        });
10637    }
10638
10639    #[test]
10640    fn test_grep_literal_mode() {
10641        asupersync::test_utils::run_test(|| async {
10642            let tmp = tempfile::tempdir().unwrap();
10643            std::fs::write(tmp.path().join("literal.txt"), "a+b\na.b\nab\na\\+b").unwrap();
10644
10645            let tool = GrepTool::new(tmp.path());
10646            let out = tool
10647                .execute(
10648                    "t",
10649                    serde_json::json!({
10650                        "pattern": "a+b",
10651                        "path": tmp.path().join("literal.txt").to_string_lossy(),
10652                        "literal": true
10653                    }),
10654                    None,
10655                )
10656                .await
10657                .unwrap();
10658            let text = get_text(&out.content);
10659            assert!(text.contains("a+b"), "literal match should find 'a+b'");
10660        });
10661    }
10662
10663    #[test]
10664    fn test_grep_hashline_output() {
10665        asupersync::test_utils::run_test(|| async {
10666            let tmp = tempfile::tempdir().unwrap();
10667            std::fs::write(
10668                tmp.path().join("hash.txt"),
10669                "apple\nbanana\napricot\ncherry",
10670            )
10671            .unwrap();
10672
10673            let tool = GrepTool::new(tmp.path());
10674            let out = tool
10675                .execute(
10676                    "t",
10677                    serde_json::json!({
10678                        "pattern": "ap",
10679                        "path": tmp.path().join("hash.txt").to_string_lossy(),
10680                        "hashline": true
10681                    }),
10682                    None,
10683                )
10684                .await
10685                .unwrap();
10686            let text = get_text(&out.content);
10687            // Hashline output should contain N#AB tags instead of bare line numbers
10688            // Line 1 (apple) and line 3 (apricot) should match
10689            assert!(text.contains("apple"), "should contain apple");
10690            assert!(text.contains("apricot"), "should contain apricot");
10691            assert!(
10692                !text.contains("banana"),
10693                "should not contain banana context"
10694            );
10695            // Verify hashline tag format: digit(s) followed by # and two uppercase letters
10696            let re = regex::Regex::new(r"\d+#[A-Z]{2}").unwrap();
10697            assert!(
10698                re.is_match(&text),
10699                "hashline output should contain N#AB tags, got: {text}"
10700            );
10701        });
10702    }
10703
10704    #[test]
10705    fn test_grep_hashline_with_context() {
10706        asupersync::test_utils::run_test(|| async {
10707            let tmp = tempfile::tempdir().unwrap();
10708            std::fs::write(
10709                tmp.path().join("ctx.txt"),
10710                "line1\nline2\ntarget\nline4\nline5",
10711            )
10712            .unwrap();
10713
10714            let tool = GrepTool::new(tmp.path());
10715            let out = tool
10716                .execute(
10717                    "t",
10718                    serde_json::json!({
10719                        "pattern": "target",
10720                        "path": tmp.path().join("ctx.txt").to_string_lossy(),
10721                        "hashline": true,
10722                        "context": 1
10723                    }),
10724                    None,
10725                )
10726                .await
10727                .unwrap();
10728            let text = get_text(&out.content);
10729            // With context=1, should include line2, target, line4
10730            assert!(text.contains("line2"), "should contain context line2");
10731            assert!(text.contains("target"), "should contain match");
10732            assert!(text.contains("line4"), "should contain context line4");
10733            // Match lines use `:` separator, context lines use `-`
10734            let re_match = regex::Regex::new(r"\d+#[A-Z]{2}: target").unwrap();
10735            assert!(
10736                re_match.is_match(&text),
10737                "match line should use : separator with hashline tag, got: {text}"
10738            );
10739            let re_ctx = regex::Regex::new(r"\d+#[A-Z]{2}- line").unwrap();
10740            assert!(
10741                re_ctx.is_match(&text),
10742                "context line should use - separator with hashline tag, got: {text}"
10743            );
10744        });
10745    }
10746
10747    // ========================================================================
10748    // Find Tool Tests
10749    // ========================================================================
10750
10751    #[test]
10752    fn test_find_glob_pattern() {
10753        asupersync::test_utils::run_test(|| async {
10754            if find_fd_binary().is_none() {
10755                return;
10756            }
10757            let tmp = tempfile::tempdir().unwrap();
10758            std::fs::write(tmp.path().join("file1.rs"), "").unwrap();
10759            std::fs::write(tmp.path().join("file2.rs"), "").unwrap();
10760            std::fs::write(tmp.path().join("file3.txt"), "").unwrap();
10761
10762            let tool = FindTool::new(tmp.path());
10763            let out = tool
10764                .execute(
10765                    "t",
10766                    serde_json::json!({
10767                        "pattern": "*.rs",
10768                        "path": tmp.path().to_string_lossy()
10769                    }),
10770                    None,
10771                )
10772                .await
10773                .unwrap();
10774            let text = get_text(&out.content);
10775            assert!(text.contains("file1.rs"));
10776            assert!(text.contains("file2.rs"));
10777            assert!(!text.contains("file3.txt"));
10778        });
10779    }
10780
10781    #[test]
10782    fn test_find_append_non_matching_file_invariant() {
10783        asupersync::test_utils::run_test(|| async {
10784            if find_fd_binary().is_none() {
10785                return;
10786            }
10787            let tmp = tempfile::tempdir().unwrap();
10788            std::fs::write(tmp.path().join("match.txt"), "a").unwrap();
10789
10790            let tool = FindTool::new(tmp.path());
10791            let base_out = tool
10792                .execute(
10793                    "t",
10794                    serde_json::json!({
10795                        "pattern": "*.txt",
10796                        "path": tmp.path().to_string_lossy()
10797                    }),
10798                    None,
10799                )
10800                .await
10801                .unwrap();
10802            let base_text = get_text(&base_out.content);
10803
10804            std::fs::write(tmp.path().join("ignore.md"), "b").unwrap();
10805            let extended_out = tool
10806                .execute(
10807                    "t",
10808                    serde_json::json!({
10809                        "pattern": "*.txt",
10810                        "path": tmp.path().to_string_lossy()
10811                    }),
10812                    None,
10813                )
10814                .await
10815                .unwrap();
10816            let extended_text = get_text(&extended_out.content);
10817
10818            assert_eq!(
10819                base_text, extended_text,
10820                "adding non-matching files should not alter find output"
10821            );
10822        });
10823    }
10824
10825    #[test]
10826    fn test_find_rejects_outside_cwd() {
10827        asupersync::test_utils::run_test(|| async {
10828            let cwd = tempfile::tempdir().unwrap();
10829            let outside = tempfile::tempdir().unwrap();
10830            std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
10831
10832            let tool = FindTool::new(cwd.path());
10833            let err = tool
10834                .execute(
10835                    "t",
10836                    serde_json::json!({
10837                        "pattern": "*.txt",
10838                        "path": outside.path().to_string_lossy()
10839                    }),
10840                    None,
10841                )
10842                .await
10843                .unwrap_err();
10844            assert!(err.to_string().contains("outside the working directory"));
10845        });
10846    }
10847
10848    #[test]
10849    fn test_find_limit() {
10850        asupersync::test_utils::run_test(|| async {
10851            if find_fd_binary().is_none() {
10852                return;
10853            }
10854            let tmp = tempfile::tempdir().unwrap();
10855            for i in 0..20 {
10856                std::fs::write(tmp.path().join(format!("f{i}.txt")), "").unwrap();
10857            }
10858
10859            let tool = FindTool::new(tmp.path());
10860            let out = tool
10861                .execute(
10862                    "t",
10863                    serde_json::json!({
10864                        "pattern": "*.txt",
10865                        "path": tmp.path().to_string_lossy(),
10866                        "limit": 5
10867                    }),
10868                    None,
10869                )
10870                .await
10871                .unwrap();
10872            let text = get_text(&out.content);
10873            let file_count = text.lines().filter(|l| l.contains(".txt")).count();
10874            assert!(
10875                file_count <= 5,
10876                "expected at most 5 files with limit=5, got {file_count}"
10877            );
10878            let details = out.details.expect("expected limit details");
10879            assert_eq!(
10880                details
10881                    .get("resultLimitReached")
10882                    .and_then(serde_json::Value::as_u64),
10883                Some(5)
10884            );
10885        });
10886    }
10887
10888    #[test]
10889    fn test_find_exact_limit_does_not_report_limit_reached() {
10890        asupersync::test_utils::run_test(|| async {
10891            if find_fd_binary().is_none() {
10892                return;
10893            }
10894            let tmp = tempfile::tempdir().unwrap();
10895            for i in 0..5 {
10896                std::fs::write(tmp.path().join(format!("f{i}.txt")), "").unwrap();
10897            }
10898
10899            let tool = FindTool::new(tmp.path());
10900            let out = tool
10901                .execute(
10902                    "t",
10903                    serde_json::json!({
10904                        "pattern": "*.txt",
10905                        "path": tmp.path().to_string_lossy(),
10906                        "limit": 5
10907                    }),
10908                    None,
10909                )
10910                .await
10911                .unwrap();
10912
10913            let text = get_text(&out.content);
10914            assert_eq!(text.lines().filter(|line| line.contains(".txt")).count(), 5);
10915            assert!(
10916                !text.contains("results limit reached"),
10917                "exact-limit find results should not claim truncation: {text}"
10918            );
10919            assert!(
10920                out.details
10921                    .as_ref()
10922                    .and_then(|details| details.get("resultLimitReached"))
10923                    .is_none(),
10924                "exact-limit find results should not set resultLimitReached"
10925            );
10926        });
10927    }
10928
10929    #[test]
10930    fn test_find_zero_limit_is_rejected() {
10931        asupersync::test_utils::run_test(|| async {
10932            if find_fd_binary().is_none() {
10933                return;
10934            }
10935            let tmp = tempfile::tempdir().unwrap();
10936            std::fs::write(tmp.path().join("file.txt"), "").unwrap();
10937
10938            let tool = FindTool::new(tmp.path());
10939            let err = tool
10940                .execute(
10941                    "t",
10942                    serde_json::json!({
10943                        "pattern": "*.txt",
10944                        "path": tmp.path().to_string_lossy(),
10945                        "limit": 0
10946                    }),
10947                    None,
10948                )
10949                .await
10950                .expect_err("limit=0 should be rejected");
10951
10952            assert!(
10953                err.to_string().contains("`limit` must be greater than 0"),
10954                "expected validation error, got: {err}"
10955            );
10956        });
10957    }
10958
10959    #[test]
10960    fn test_find_no_matches() {
10961        asupersync::test_utils::run_test(|| async {
10962            if find_fd_binary().is_none() {
10963                return;
10964            }
10965            let tmp = tempfile::tempdir().unwrap();
10966            std::fs::write(tmp.path().join("only.txt"), "").unwrap();
10967
10968            let tool = FindTool::new(tmp.path());
10969            let out = tool
10970                .execute(
10971                    "t",
10972                    serde_json::json!({
10973                        "pattern": "*.rs",
10974                        "path": tmp.path().to_string_lossy()
10975                    }),
10976                    None,
10977                )
10978                .await
10979                .unwrap();
10980            let text = get_text(&out.content);
10981            assert!(
10982                text.to_lowercase().contains("no files found")
10983                    || text.to_lowercase().contains("no matches")
10984                    || text.is_empty(),
10985                "expected no-match indication, got: {text}"
10986            );
10987        });
10988    }
10989
10990    #[test]
10991    fn test_find_nonexistent_path() {
10992        asupersync::test_utils::run_test(|| async {
10993            if find_fd_binary().is_none() {
10994                return;
10995            }
10996            let tmp = tempfile::tempdir().unwrap();
10997            let tool = FindTool::new(tmp.path());
10998            let err = tool
10999                .execute(
11000                    "t",
11001                    serde_json::json!({
11002                        "pattern": "*.rs",
11003                        "path": tmp.path().join("nonexistent").to_string_lossy()
11004                    }),
11005                    None,
11006                )
11007                .await;
11008            assert!(err.is_err());
11009        });
11010    }
11011
11012    #[test]
11013    fn test_find_nested_directories() {
11014        asupersync::test_utils::run_test(|| async {
11015            if find_fd_binary().is_none() {
11016                return;
11017            }
11018            let tmp = tempfile::tempdir().unwrap();
11019            std::fs::create_dir_all(tmp.path().join("a/b/c")).unwrap();
11020            std::fs::write(tmp.path().join("top.rs"), "").unwrap();
11021            std::fs::write(tmp.path().join("a/mid.rs"), "").unwrap();
11022            std::fs::write(tmp.path().join("a/b/c/deep.rs"), "").unwrap();
11023
11024            let tool = FindTool::new(tmp.path());
11025            let out = tool
11026                .execute(
11027                    "t",
11028                    serde_json::json!({
11029                        "pattern": "*.rs",
11030                        "path": tmp.path().to_string_lossy()
11031                    }),
11032                    None,
11033                )
11034                .await
11035                .unwrap();
11036            let text = get_text(&out.content);
11037            assert!(text.contains("top.rs"));
11038            assert!(text.contains("mid.rs"));
11039            assert!(text.contains("deep.rs"));
11040        });
11041    }
11042
11043    #[test]
11044    fn test_find_results_are_sorted() {
11045        // FindTool sorts by modification time (most recent first), then alphabetically
11046        // as a tie-breaker for files with the same mtime.
11047        asupersync::test_utils::run_test(|| async {
11048            if find_fd_binary().is_none() {
11049                return;
11050            }
11051            let tmp = tempfile::tempdir().unwrap();
11052
11053            // Create files with delays to ensure distinct modification times.
11054            // Order: oldest first, so the expected output (most recent first) is reversed.
11055            std::fs::write(tmp.path().join("oldest.txt"), "").unwrap();
11056            std::thread::sleep(std::time::Duration::from_millis(50));
11057            std::fs::write(tmp.path().join("middle.txt"), "").unwrap();
11058            std::thread::sleep(std::time::Duration::from_millis(50));
11059            std::fs::write(tmp.path().join("newest.txt"), "").unwrap();
11060
11061            let tool = FindTool::new(tmp.path());
11062            let out = tool
11063                .execute(
11064                    "t",
11065                    serde_json::json!({
11066                        "pattern": "*.txt",
11067                        "path": tmp.path().to_string_lossy()
11068                    }),
11069                    None,
11070                )
11071                .await
11072                .unwrap();
11073            let lines: Vec<String> = get_text(&out.content)
11074                .lines()
11075                .map(str::trim)
11076                .filter(|line| !line.is_empty())
11077                .map(str::to_string)
11078                .collect();
11079
11080            // Expected order: most recent first
11081            assert_eq!(
11082                lines,
11083                vec!["newest.txt", "middle.txt", "oldest.txt"],
11084                "expected mtime-sorted find output (most recent first)"
11085            );
11086        });
11087    }
11088
11089    #[test]
11090    fn test_find_respects_gitignore() {
11091        asupersync::test_utils::run_test(|| async {
11092            if find_fd_binary().is_none() {
11093                return;
11094            }
11095            let tmp = tempfile::tempdir().unwrap();
11096            std::fs::write(tmp.path().join(".gitignore"), "ignored.txt\n").unwrap();
11097            std::fs::write(tmp.path().join("ignored.txt"), "").unwrap();
11098
11099            let tool = FindTool::new(tmp.path());
11100            let out = tool
11101                .execute(
11102                    "t",
11103                    serde_json::json!({
11104                        "pattern": "*.txt",
11105                        "path": tmp.path().to_string_lossy()
11106                    }),
11107                    None,
11108                )
11109                .await
11110                .unwrap();
11111            let text = get_text(&out.content);
11112            assert!(
11113                text.contains("No files found matching pattern"),
11114                "expected .gitignore'd files to be excluded, got: {text}"
11115            );
11116        });
11117    }
11118
11119    // ========================================================================
11120    // Ls Tool Tests
11121    // ========================================================================
11122
11123    #[test]
11124    fn test_ls_directory_listing() {
11125        asupersync::test_utils::run_test(|| async {
11126            let tmp = tempfile::tempdir().unwrap();
11127            std::fs::write(tmp.path().join("file_a.txt"), "content").unwrap();
11128            std::fs::write(tmp.path().join("file_b.rs"), "fn main() {}").unwrap();
11129            std::fs::create_dir(tmp.path().join("subdir")).unwrap();
11130
11131            let tool = LsTool::new(tmp.path());
11132            let out = tool
11133                .execute(
11134                    "t",
11135                    serde_json::json!({ "path": tmp.path().to_string_lossy() }),
11136                    None,
11137                )
11138                .await
11139                .unwrap();
11140            let text = get_text(&out.content);
11141            assert!(text.contains("file_a.txt"));
11142            assert!(text.contains("file_b.rs"));
11143            assert!(text.contains("subdir"));
11144        });
11145    }
11146
11147    #[test]
11148    fn test_ls_rejects_outside_cwd() {
11149        asupersync::test_utils::run_test(|| async {
11150            let cwd = tempfile::tempdir().unwrap();
11151            let outside = tempfile::tempdir().unwrap();
11152            std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
11153
11154            let tool = LsTool::new(cwd.path());
11155            let err = tool
11156                .execute(
11157                    "t",
11158                    serde_json::json!({ "path": outside.path().to_string_lossy() }),
11159                    None,
11160                )
11161                .await
11162                .unwrap_err();
11163            assert!(err.to_string().contains("outside the working directory"));
11164        });
11165    }
11166
11167    #[test]
11168    fn test_ls_trailing_slash_for_dirs() {
11169        asupersync::test_utils::run_test(|| async {
11170            let tmp = tempfile::tempdir().unwrap();
11171            std::fs::write(tmp.path().join("file.txt"), "").unwrap();
11172            std::fs::create_dir(tmp.path().join("mydir")).unwrap();
11173
11174            let tool = LsTool::new(tmp.path());
11175            let out = tool
11176                .execute(
11177                    "t",
11178                    serde_json::json!({ "path": tmp.path().to_string_lossy() }),
11179                    None,
11180                )
11181                .await
11182                .unwrap();
11183            let text = get_text(&out.content);
11184            assert!(
11185                text.contains("mydir/"),
11186                "expected trailing slash for directory, got: {text}"
11187            );
11188        });
11189    }
11190
11191    #[test]
11192    fn test_ls_limit() {
11193        asupersync::test_utils::run_test(|| async {
11194            let tmp = tempfile::tempdir().unwrap();
11195            for i in 0..20 {
11196                std::fs::write(tmp.path().join(format!("item_{i:02}.txt")), "").unwrap();
11197            }
11198
11199            let tool = LsTool::new(tmp.path());
11200            let out = tool
11201                .execute(
11202                    "t",
11203                    serde_json::json!({
11204                        "path": tmp.path().to_string_lossy(),
11205                        "limit": 5
11206                    }),
11207                    None,
11208                )
11209                .await
11210                .unwrap();
11211            let text = get_text(&out.content);
11212            let entry_count = text.lines().filter(|l| l.contains("item_")).count();
11213            assert!(
11214                entry_count <= 5,
11215                "expected at most 5 entries, got {entry_count}"
11216            );
11217            let details = out.details.expect("expected limit details");
11218            assert_eq!(
11219                details
11220                    .get("entryLimitReached")
11221                    .and_then(serde_json::Value::as_u64),
11222                Some(5)
11223            );
11224        });
11225    }
11226
11227    #[test]
11228    fn test_ls_zero_limit_is_rejected() {
11229        asupersync::test_utils::run_test(|| async {
11230            let tmp = tempfile::tempdir().unwrap();
11231            std::fs::write(tmp.path().join("item.txt"), "").unwrap();
11232
11233            let tool = LsTool::new(tmp.path());
11234            let err = tool
11235                .execute(
11236                    "t",
11237                    serde_json::json!({
11238                        "path": tmp.path().to_string_lossy(),
11239                        "limit": 0
11240                    }),
11241                    None,
11242                )
11243                .await
11244                .expect_err("limit=0 should be rejected");
11245
11246            assert!(
11247                err.to_string().contains("`limit` must be greater than 0"),
11248                "expected validation error, got: {err}"
11249            );
11250        });
11251    }
11252
11253    #[test]
11254    fn test_ls_nonexistent_directory() {
11255        asupersync::test_utils::run_test(|| async {
11256            let tmp = tempfile::tempdir().unwrap();
11257            let tool = LsTool::new(tmp.path());
11258            let err = tool
11259                .execute(
11260                    "t",
11261                    serde_json::json!({ "path": tmp.path().join("nope").to_string_lossy() }),
11262                    None,
11263                )
11264                .await;
11265            assert!(err.is_err());
11266        });
11267    }
11268
11269    #[test]
11270    fn test_ls_empty_directory() {
11271        asupersync::test_utils::run_test(|| async {
11272            let tmp = tempfile::tempdir().unwrap();
11273            let empty_dir = tmp.path().join("empty");
11274            std::fs::create_dir(&empty_dir).unwrap();
11275
11276            let tool = LsTool::new(tmp.path());
11277            let out = tool
11278                .execute(
11279                    "t",
11280                    serde_json::json!({ "path": empty_dir.to_string_lossy() }),
11281                    None,
11282                )
11283                .await
11284                .unwrap();
11285            assert!(!out.is_error);
11286        });
11287    }
11288
11289    #[test]
11290    fn test_ls_default_cwd() {
11291        asupersync::test_utils::run_test(|| async {
11292            let tmp = tempfile::tempdir().unwrap();
11293            std::fs::write(tmp.path().join("in_cwd.txt"), "").unwrap();
11294
11295            let tool = LsTool::new(tmp.path());
11296            let out = tool
11297                .execute("t", serde_json::json!({}), None)
11298                .await
11299                .unwrap();
11300            let text = get_text(&out.content);
11301            assert!(
11302                text.contains("in_cwd.txt"),
11303                "expected cwd listing to include the file, got: {text}"
11304            );
11305        });
11306    }
11307
11308    // ========================================================================
11309    // Additional helper tests
11310    // ========================================================================
11311
11312    #[test]
11313    fn test_truncate_head_no_truncation() {
11314        let content = "short".to_string();
11315        let result = truncate_head(content, 100, 1000);
11316        assert!(!result.truncated);
11317        assert_eq!(result.content, "short");
11318        assert_eq!(result.truncated_by, None);
11319    }
11320
11321    #[test]
11322    fn test_truncate_tail_no_truncation() {
11323        let content = "short".to_string();
11324        let result = truncate_tail(content, 100, 1000);
11325        assert!(!result.truncated);
11326        assert_eq!(result.content, "short");
11327    }
11328
11329    #[test]
11330    fn test_truncate_head_empty_input() {
11331        let result = truncate_head(String::new(), 100, 1000);
11332        assert!(!result.truncated);
11333        assert_eq!(result.content, "");
11334    }
11335
11336    #[test]
11337    fn test_truncate_tail_empty_input() {
11338        let result = truncate_tail(String::new(), 100, 1000);
11339        assert!(!result.truncated);
11340        assert_eq!(result.content, "");
11341    }
11342
11343    #[test]
11344    fn test_detect_line_ending_crlf() {
11345        assert_eq!(detect_line_ending("hello\r\nworld"), "\r\n");
11346    }
11347
11348    #[test]
11349    fn test_detect_line_ending_cr() {
11350        assert_eq!(detect_line_ending("hello\rworld"), "\r");
11351    }
11352
11353    #[test]
11354    fn test_detect_line_ending_lf() {
11355        assert_eq!(detect_line_ending("hello\nworld"), "\n");
11356    }
11357
11358    #[test]
11359    fn test_detect_line_ending_no_newline() {
11360        assert_eq!(detect_line_ending("hello world"), "\n");
11361    }
11362
11363    #[test]
11364    fn test_normalize_to_lf() {
11365        assert_eq!(normalize_to_lf("a\r\nb\rc\nd"), "a\nb\nc\nd");
11366    }
11367
11368    #[test]
11369    fn test_count_overlapping_occurrences() {
11370        assert_eq!(count_overlapping_occurrences("aaaa", "aa"), 3);
11371        assert_eq!(count_overlapping_occurrences("abababa", "aba"), 3);
11372        assert_eq!(count_overlapping_occurrences("abc", "d"), 0);
11373        assert_eq!(count_overlapping_occurrences("abc", ""), 0);
11374    }
11375
11376    proptest! {
11377        #![proptest_config(ProptestConfig { cases: 64, .. ProptestConfig::default() })]
11378
11379        #[test]
11380        fn proptest_line_ending_roundtrip_invariant(
11381            input in arbitrary_text(),
11382            ending in prop_oneof![
11383                Just("\n".to_string()),
11384                Just("\r\n".to_string()),
11385                Just("\r".to_string()),
11386            ],
11387        ) {
11388            let normalized = normalize_to_lf(&input);
11389            let restored = restore_line_endings(&normalized, &ending);
11390            let renormalized = normalize_to_lf(&restored);
11391            prop_assert_eq!(renormalized, normalized);
11392        }
11393    }
11394
11395    #[test]
11396    fn test_strip_bom_present() {
11397        let (result, had_bom) = strip_bom("\u{FEFF}hello");
11398        assert_eq!(result, "hello");
11399        assert!(had_bom);
11400    }
11401
11402    #[test]
11403    fn test_strip_bom_absent() {
11404        let (result, had_bom) = strip_bom("hello");
11405        assert_eq!(result, "hello");
11406        assert!(!had_bom);
11407    }
11408
11409    #[test]
11410    fn test_resolve_path_tilde_expansion() {
11411        let cwd = PathBuf::from("/home/user/project");
11412        let result = resolve_path("~/file.txt", &cwd);
11413        // Tilde expansion depends on environment, but should not be literal ~/
11414        assert!(!result.to_string_lossy().starts_with("~/"));
11415    }
11416
11417    fn arbitrary_text() -> impl Strategy<Value = String> {
11418        prop::collection::vec(any::<u8>(), 0..512)
11419            .prop_map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
11420    }
11421
11422    fn match_char_strategy() -> impl Strategy<Value = char> {
11423        prop_oneof![
11424            8 => any::<char>(),
11425            1 => Just('\u{00A0}'),
11426            1 => Just('\u{202F}'),
11427            1 => Just('\u{205F}'),
11428            1 => Just('\u{3000}'),
11429            1 => Just('\u{2018}'),
11430            1 => Just('\u{2019}'),
11431            1 => Just('\u{201C}'),
11432            1 => Just('\u{201D}'),
11433            1 => Just('\u{201E}'),
11434            1 => Just('\u{201F}'),
11435            1 => Just('\u{2010}'),
11436            1 => Just('\u{2011}'),
11437            1 => Just('\u{2012}'),
11438            1 => Just('\u{2013}'),
11439            1 => Just('\u{2014}'),
11440            1 => Just('\u{2015}'),
11441            1 => Just('\u{2212}'),
11442            1 => Just('\u{200D}'),
11443            1 => Just('\u{0301}'),
11444        ]
11445    }
11446
11447    fn arbitrary_match_text() -> impl Strategy<Value = String> {
11448        prop_oneof![
11449            9 => prop::collection::vec(match_char_strategy(), 0..2048),
11450            1 => prop::collection::vec(match_char_strategy(), 8192..16384),
11451        ]
11452        .prop_map(|chars| chars.into_iter().collect())
11453    }
11454
11455    fn line_char_strategy() -> impl Strategy<Value = char> {
11456        prop_oneof![
11457            8 => any::<char>().prop_filter("single-line chars only", |c| *c != '\n'),
11458            1 => Just('é'),
11459            1 => Just('你'),
11460            1 => Just('😀'),
11461        ]
11462    }
11463
11464    fn boundary_line_text() -> impl Strategy<Value = String> {
11465        prop_oneof![
11466            Just(0usize),
11467            Just(GREP_MAX_LINE_LENGTH.saturating_sub(1)),
11468            Just(GREP_MAX_LINE_LENGTH),
11469            Just(GREP_MAX_LINE_LENGTH + 1),
11470            0usize..(GREP_MAX_LINE_LENGTH + 128),
11471        ]
11472        .prop_flat_map(|len| {
11473            prop::collection::vec(line_char_strategy(), len)
11474                .prop_map(|chars| chars.into_iter().collect())
11475        })
11476    }
11477
11478    fn safe_relative_segment() -> impl Strategy<Value = String> {
11479        prop_oneof![
11480            proptest::string::string_regex("[A-Za-z0-9._-]{1,12}")
11481                .expect("segment regex should compile"),
11482            Just("emoji😀".to_string()),
11483            Just("accent-é".to_string()),
11484            Just("rtl-עברית".to_string()),
11485            Just("line\nbreak".to_string()),
11486            Just("nul\0byte".to_string()),
11487        ]
11488        .prop_filter("segment cannot be . or ..", |segment| {
11489            segment != "." && segment != ".."
11490        })
11491    }
11492
11493    fn safe_relative_path() -> impl Strategy<Value = String> {
11494        prop::collection::vec(safe_relative_segment(), 1..6).prop_map(|segments| segments.join("/"))
11495    }
11496
11497    fn pathish_input() -> impl Strategy<Value = String> {
11498        prop_oneof![
11499            5 => safe_relative_path(),
11500            2 => safe_relative_path().prop_map(|p| format!("../{p}")),
11501            2 => safe_relative_path().prop_map(|p| format!("../../{p}")),
11502            1 => safe_relative_path().prop_map(|p| format!("/tmp/{p}")),
11503            1 => safe_relative_path().prop_map(|p| format!("~/{p}")),
11504            1 => Just("~".to_string()),
11505            1 => Just(".".to_string()),
11506            1 => Just("..".to_string()),
11507            1 => Just("././nested/../file.txt".to_string()),
11508        ]
11509    }
11510
11511    proptest! {
11512        #![proptest_config(ProptestConfig { cases: 64, .. ProptestConfig::default() })]
11513
11514        #[test]
11515        fn proptest_truncate_head_invariants(
11516            input in arbitrary_text(),
11517            max_lines in 0usize..32,
11518            max_bytes in 0usize..256,
11519        ) {
11520            let result = truncate_head(input.clone(), max_lines, max_bytes);
11521
11522            prop_assert!(result.output_lines <= max_lines);
11523            prop_assert!(result.output_bytes <= max_bytes);
11524            prop_assert_eq!(result.output_bytes, result.content.len());
11525
11526            prop_assert_eq!(result.truncated, result.truncated_by.is_some());
11527            prop_assert!(input.starts_with(&result.content));
11528
11529            let repeat = truncate_head(result.content.clone(), max_lines, max_bytes);
11530            prop_assert_eq!(&repeat.content, &result.content);
11531
11532            if result.truncated {
11533                prop_assert!(result.total_lines > max_lines || result.total_bytes > max_bytes);
11534            } else {
11535                prop_assert_eq!(&result.content, &input);
11536                prop_assert!(result.total_lines <= max_lines);
11537                prop_assert!(result.total_bytes <= max_bytes);
11538            }
11539
11540            if result.first_line_exceeds_limit {
11541                prop_assert!(result.truncated);
11542                prop_assert_eq!(result.truncated_by, Some(TruncatedBy::Bytes));
11543                prop_assert!(result.output_bytes <= max_bytes);
11544                prop_assert!(result.output_lines <= 1);
11545                prop_assert!(input.starts_with(&result.content));
11546            }
11547        }
11548
11549        #[test]
11550        fn proptest_truncate_tail_invariants(
11551            input in arbitrary_text(),
11552            max_lines in 0usize..32,
11553            max_bytes in 0usize..256,
11554        ) {
11555            let result = truncate_tail(input.clone(), max_lines, max_bytes);
11556
11557            prop_assert!(result.output_lines <= max_lines);
11558            prop_assert!(result.output_bytes <= max_bytes);
11559            prop_assert_eq!(result.output_bytes, result.content.len());
11560
11561            prop_assert_eq!(result.truncated, result.truncated_by.is_some());
11562            prop_assert!(input.ends_with(&result.content));
11563
11564            let repeat = truncate_tail(result.content.clone(), max_lines, max_bytes);
11565            prop_assert_eq!(&repeat.content, &result.content);
11566
11567            if result.last_line_partial {
11568                prop_assert!(result.truncated);
11569                prop_assert_eq!(result.truncated_by, Some(TruncatedBy::Bytes));
11570                // Partial output may span 1-2 lines when the input has a
11571                // trailing newline (the empty line after \n is preserved).
11572                prop_assert!(result.output_lines >= 1 && result.output_lines <= 2);
11573                let content_trimmed = result.content.trim_end_matches('\n');
11574                prop_assert!(input
11575                    .split('\n')
11576                    .rev()
11577                    .any(|line| line.ends_with(content_trimmed)));
11578            }
11579        }
11580
11581        #[test]
11582        fn proptest_truncate_head_monotonic_limits(
11583            input in arbitrary_text(),
11584            max_lines_a in 0usize..32,
11585            max_lines_b in 0usize..32,
11586            max_bytes_a in 0usize..256,
11587            max_bytes_b in 0usize..256,
11588        ) {
11589            let low_lines = max_lines_a.min(max_lines_b);
11590            let high_lines = max_lines_a.max(max_lines_b);
11591            let low_bytes = max_bytes_a.min(max_bytes_b);
11592            let high_bytes = max_bytes_a.max(max_bytes_b);
11593
11594            let small = truncate_head(input.clone(), low_lines, low_bytes);
11595            let large = truncate_head(input, high_lines, high_bytes);
11596
11597            prop_assert!(large.content.starts_with(&small.content));
11598            prop_assert!(large.output_bytes >= small.output_bytes);
11599            prop_assert!(large.output_lines >= small.output_lines);
11600        }
11601
11602        #[test]
11603        fn proptest_truncate_tail_monotonic_limits(
11604            input in arbitrary_text(),
11605            max_lines_a in 0usize..32,
11606            max_lines_b in 0usize..32,
11607            max_bytes_a in 0usize..256,
11608            max_bytes_b in 0usize..256,
11609        ) {
11610            let low_lines = max_lines_a.min(max_lines_b);
11611            let high_lines = max_lines_a.max(max_lines_b);
11612            let low_bytes = max_bytes_a.min(max_bytes_b);
11613            let high_bytes = max_bytes_a.max(max_bytes_b);
11614
11615            let small = truncate_tail(input.clone(), low_lines, low_bytes);
11616            let large = truncate_tail(input, high_lines, high_bytes);
11617
11618            prop_assert!(large.content.ends_with(&small.content));
11619            prop_assert!(large.output_bytes >= small.output_bytes);
11620            prop_assert!(large.output_lines >= small.output_lines);
11621        }
11622
11623        #[test]
11624        fn proptest_truncate_head_prefix_invariant_under_append(
11625            base in arbitrary_text(),
11626            suffix in arbitrary_text(),
11627            max_lines in 0usize..32,
11628            max_bytes in 0usize..256,
11629        ) {
11630            let base_result = truncate_head(base.clone(), max_lines, max_bytes);
11631            let extended_result = truncate_head(format!("{base}{suffix}"), max_lines, max_bytes);
11632            prop_assert!(extended_result.content.starts_with(&base_result.content));
11633        }
11634
11635        #[test]
11636        fn proptest_truncate_tail_suffix_invariant_under_prepend(
11637            base in arbitrary_text(),
11638            prefix in arbitrary_text(),
11639            max_lines in 0usize..32,
11640            max_bytes in 0usize..256,
11641        ) {
11642            let base_result = truncate_tail(base.clone(), max_lines, max_bytes);
11643            let extended_result = truncate_tail(format!("{prefix}{base}"), max_lines, max_bytes);
11644            prop_assert!(extended_result.content.ends_with(&base_result.content));
11645        }
11646    }
11647
11648    proptest! {
11649        #![proptest_config(ProptestConfig { cases: 128, .. ProptestConfig::default() })]
11650
11651        #[test]
11652        fn proptest_normalize_for_match_invariants(input in arbitrary_match_text()) {
11653            let normalized = normalize_for_match(&input);
11654            let renormalized = normalize_for_match(&normalized);
11655
11656            prop_assert_eq!(&renormalized, &normalized);
11657            prop_assert!(normalized.len() <= input.len());
11658            prop_assert!(
11659                normalized.chars().all(|c| {
11660                    !is_special_unicode_space(c)
11661                        && !matches!(
11662                            c,
11663                            '\u{2018}'
11664                                | '\u{2019}'
11665                                | '\u{201C}'
11666                                | '\u{201D}'
11667                                | '\u{201E}'
11668                                | '\u{201F}'
11669                                | '\u{2010}'
11670                                | '\u{2011}'
11671                                | '\u{2012}'
11672                                | '\u{2013}'
11673                                | '\u{2014}'
11674                                | '\u{2015}'
11675                                | '\u{2212}'
11676                        )
11677                }),
11678                "normalize_for_match should remove target punctuation/space variants"
11679            );
11680        }
11681
11682        #[test]
11683        fn proptest_truncate_line_boundary_invariants(line in boundary_line_text()) {
11684            const TRUNCATION_SUFFIX: &str = "... [truncated]";
11685
11686            let result = truncate_line(&line, GREP_MAX_LINE_LENGTH);
11687            let line_char_count = line.chars().count();
11688            let suffix_chars = TRUNCATION_SUFFIX.chars().count();
11689
11690            if line_char_count <= GREP_MAX_LINE_LENGTH {
11691                prop_assert!(!result.was_truncated);
11692                prop_assert_eq!(result.text, line);
11693            } else {
11694                prop_assert!(result.was_truncated);
11695                prop_assert!(result.text.ends_with(TRUNCATION_SUFFIX));
11696                let expected_prefix: String = line.chars().take(GREP_MAX_LINE_LENGTH).collect();
11697                let expected = format!("{expected_prefix}{TRUNCATION_SUFFIX}");
11698                prop_assert_eq!(&result.text, &expected);
11699                prop_assert!(result.text.chars().count() <= GREP_MAX_LINE_LENGTH + suffix_chars);
11700            }
11701        }
11702
11703        #[test]
11704        fn proptest_resolve_path_safe_relative_invariants(relative_path in safe_relative_path()) {
11705            let cwd = PathBuf::from("/tmp/pi-agent-rust-tools-proptest");
11706            let resolved = resolve_path(&relative_path, &cwd);
11707            let normalized = normalize_dot_segments(&resolved);
11708
11709            prop_assert_eq!(&resolved, &cwd.join(&relative_path));
11710            prop_assert!(resolved.starts_with(&cwd));
11711            prop_assert!(normalized.starts_with(&cwd));
11712            prop_assert_eq!(normalize_dot_segments(&normalized), normalized);
11713        }
11714
11715        #[test]
11716        fn proptest_normalize_dot_segments_pathish_invariants(path_input in pathish_input()) {
11717            let cwd = PathBuf::from("/tmp/pi-agent-rust-tools-proptest");
11718            let resolved = resolve_path(&path_input, &cwd);
11719            let normalized_once = normalize_dot_segments(&resolved);
11720            let normalized_twice = normalize_dot_segments(&normalized_once);
11721
11722            prop_assert_eq!(&normalized_once, &normalized_twice);
11723            prop_assert!(
11724                normalized_once
11725                    .components()
11726                    .all(|component| !matches!(component, std::path::Component::CurDir))
11727            );
11728
11729            if std::path::Path::new(&path_input).is_absolute() {
11730                prop_assert!(resolved.is_absolute());
11731                prop_assert!(normalized_once.is_absolute());
11732            }
11733        }
11734    }
11735
11736    // ========================================================================
11737    // Fuzzy find / edit-matching strategies
11738    // ========================================================================
11739
11740    /// Strategy generating content text with occasional Unicode normalization
11741    /// targets (curly quotes, special spaces, em-dashes) and trailing
11742    /// whitespace.
11743    fn fuzzy_content_strategy() -> impl Strategy<Value = String> {
11744        prop::collection::vec(
11745            prop_oneof![
11746                8 => any::<char>().prop_filter("no nul", |c| *c != '\0'),
11747                1 => Just('\u{00A0}'),
11748                1 => Just('\u{2019}'),
11749                1 => Just('\u{201C}'),
11750                1 => Just('\u{2014}'),
11751            ],
11752            1..512,
11753        )
11754        .prop_map(|chars| chars.into_iter().collect())
11755    }
11756
11757    /// Strategy for generating a needle substring from content. Picks a
11758    /// random sub-slice of the content (may be empty).
11759    fn needle_from_content(content: String) -> impl Strategy<Value = (String, String)> {
11760        let len = content.len();
11761        if len == 0 {
11762            return Just((content, String::new())).boxed();
11763        }
11764        (0..len)
11765            .prop_flat_map(move |start| {
11766                let c = content.clone();
11767                let remaining = c.len() - start;
11768                let max_needle = remaining.min(256);
11769                (Just(c), start..=start + max_needle.saturating_sub(1))
11770            })
11771            .prop_filter_map("valid char boundary", |(c, end)| {
11772                // Find the nearest valid char boundaries
11773                let start_candidates: Vec<usize> =
11774                    (0..c.len()).filter(|i| c.is_char_boundary(*i)).collect();
11775                if start_candidates.is_empty() {
11776                    return None;
11777                }
11778                let start = *start_candidates
11779                    .iter()
11780                    .min_by_key(|&&i| i.abs_diff(end.saturating_sub(end / 2)))
11781                    .unwrap_or(&0);
11782                let end_clamped = end.min(c.len());
11783                // Find next valid char boundary >= end_clamped
11784                let actual_end = (end_clamped..=c.len())
11785                    .find(|i| c.is_char_boundary(*i))
11786                    .unwrap_or(c.len());
11787                if start >= actual_end {
11788                    return Some((c, String::new()));
11789                }
11790                Some((c.clone(), c[start..actual_end].to_string()))
11791            })
11792            .boxed()
11793    }
11794
11795    proptest! {
11796        #![proptest_config(ProptestConfig { cases: 128, .. ProptestConfig::default() })]
11797
11798        /// Exact substrings of content are always found by `fuzzy_find_text`.
11799        #[test]
11800        fn proptest_fuzzy_find_text_exact_match_invariants(
11801            (content, needle) in fuzzy_content_strategy().prop_flat_map(needle_from_content)
11802        ) {
11803            let result = fuzzy_find_text(&content, &needle);
11804            if needle.is_empty() {
11805                // Empty needle: exact match at index 0 (str::find("") == Some(0))
11806                prop_assert!(result.found, "empty needle should always match");
11807                prop_assert_eq!(result.index, 0);
11808                prop_assert_eq!(result.match_length, 0);
11809            } else {
11810                prop_assert!(
11811                    result.found,
11812                    "exact substring must be found: content len={}, needle len={}",
11813                    content.len(),
11814                    needle.len()
11815                );
11816                // The matched span should be valid UTF-8 byte indices
11817                prop_assert!(content.is_char_boundary(result.index));
11818                prop_assert!(content.is_char_boundary(result.index + result.match_length));
11819                // The matched text should contain the needle (exact match path)
11820                let matched = &content[result.index..result.index + result.match_length];
11821                prop_assert_eq!(matched, needle.as_str());
11822            }
11823        }
11824
11825        /// Normalized text with Unicode variants is found via fuzzy matching.
11826        /// If we take content containing curly quotes / em-dashes, normalize
11827        /// it, then search for the normalized version, `fuzzy_find_text` must
11828        /// locate it.
11829        #[test]
11830        fn proptest_fuzzy_find_text_normalized_match_invariants(
11831            content in arbitrary_match_text()
11832        ) {
11833            // Normalize the whole content to get an ASCII-equivalent version
11834            let normalized = build_normalized_content(&content);
11835            if normalized.is_empty() {
11836                return Ok(());
11837            }
11838            // Take a prefix of normalized as needle (up to 128 chars)
11839            let needle_end = normalized
11840                .char_indices()
11841                .nth(128.min(normalized.chars().count().saturating_sub(1)))
11842                .map_or(normalized.len(), |(i, _)| i);
11843            // Find the nearest char boundary
11844            let needle_end = (needle_end..=normalized.len())
11845                .find(|i| normalized.is_char_boundary(*i))
11846                .unwrap_or(normalized.len());
11847            let needle = &normalized[..needle_end];
11848            if needle.is_empty() {
11849                return Ok(());
11850            }
11851
11852            let result = fuzzy_find_text(&content, needle);
11853            prop_assert!(
11854                result.found,
11855                "normalized needle should be found via fuzzy match: needle={:?}",
11856                needle
11857            );
11858            // Verify the result points to valid UTF-8
11859            prop_assert!(content.is_char_boundary(result.index));
11860            prop_assert!(content.is_char_boundary(result.index + result.match_length));
11861        }
11862
11863        /// `build_normalized_content` should be idempotent and never larger
11864        /// than the input.
11865        #[test]
11866        fn proptest_build_normalized_content_invariants(input in arbitrary_match_text()) {
11867            let normalized = build_normalized_content(&input);
11868            let renormalized = build_normalized_content(&normalized);
11869
11870            // Idempotency
11871            prop_assert_eq!(
11872                &renormalized,
11873                &normalized,
11874                "build_normalized_content should be idempotent"
11875            );
11876
11877            // Size: normalized text strips trailing whitespace per line and
11878            // may replace multi-byte Unicode with single-byte ASCII, so it
11879            // should never be larger than the input.
11880            prop_assert!(
11881                normalized.len() <= input.len(),
11882                "normalized should not be larger: {} vs {}",
11883                normalized.len(),
11884                input.len()
11885            );
11886
11887            // Line count should be preserved (normalization does not add or
11888            // remove newlines).
11889            let input_lines = input.split('\n').count();
11890            let norm_lines = normalized.split('\n').count();
11891            prop_assert_eq!(
11892                norm_lines, input_lines,
11893                "line count must be preserved by normalization"
11894            );
11895
11896            // No target Unicode chars should remain
11897            prop_assert!(
11898                normalized.chars().all(|c| {
11899                    !is_special_unicode_space(c)
11900                        && !matches!(
11901                            c,
11902                            '\u{2018}'
11903                                | '\u{2019}'
11904                                | '\u{201C}'
11905                                | '\u{201D}'
11906                                | '\u{201E}'
11907                                | '\u{201F}'
11908                                | '\u{2010}'
11909                                | '\u{2011}'
11910                                | '\u{2012}'
11911                                | '\u{2013}'
11912                                | '\u{2014}'
11913                                | '\u{2015}'
11914                                | '\u{2212}'
11915                        )
11916                }),
11917                "normalized content should not contain target Unicode chars"
11918            );
11919        }
11920
11921        /// Appending trailing whitespace to each line should not change the
11922        /// normalized content (metamorphic invariant).
11923        #[test]
11924        fn proptest_build_normalized_content_trailing_whitespace_invariant(
11925            input in arbitrary_match_text()
11926        ) {
11927            let normalized = build_normalized_content(&input);
11928            let mut with_trailing = String::new();
11929            let mut lines = input.split('\n').peekable();
11930
11931            while let Some(line) = lines.next() {
11932                with_trailing.push_str(line);
11933                with_trailing.push_str("  \t");
11934                if lines.peek().is_some() {
11935                    with_trailing.push('\n');
11936                }
11937            }
11938
11939            let normalized_trailing = build_normalized_content(&with_trailing);
11940            prop_assert_eq!(normalized_trailing, normalized);
11941        }
11942
11943        /// `map_normalized_range_to_original` should produce valid byte
11944        /// ranges in the original content and the extracted original slice,
11945        /// when re-normalized, should start with the expected normalized
11946        /// prefix. Trailing whitespace at line ends makes an exact match
11947        /// impossible (normalization strips it), so we verify the key
11948        /// structural invariant: the range is valid and the non-whitespace
11949        /// content round-trips correctly.
11950        #[test]
11951        fn proptest_map_normalized_range_roundtrip(input in arbitrary_match_text()) {
11952            let normalized = build_normalized_content(&input);
11953            if normalized.is_empty() {
11954                return Ok(());
11955            }
11956
11957            // Pick a range in the normalized text at char boundaries
11958            let norm_chars: Vec<(usize, char)> = normalized.char_indices().collect();
11959            let norm_len = norm_chars.len();
11960            if norm_len == 0 {
11961                return Ok(());
11962            }
11963
11964            // Use the first quarter as the match range for determinism
11965            let end_char = (norm_len / 4).max(1).min(norm_len);
11966            let norm_start = norm_chars[0].0;
11967            let norm_end = if end_char < norm_chars.len() {
11968                norm_chars[end_char].0
11969            } else {
11970                normalized.len()
11971            };
11972            let norm_match_len = norm_end - norm_start;
11973
11974            let (orig_start, orig_len) =
11975                map_normalized_range_to_original(&input, norm_start, norm_match_len);
11976
11977            // Invariant 1: result is within input bounds
11978            prop_assert!(
11979                orig_start + orig_len <= input.len(),
11980                "mapped range {orig_start}..{} exceeds input len {}",
11981                orig_start + orig_len,
11982                input.len()
11983            );
11984
11985            // Invariant 2: result is at valid char boundaries
11986            prop_assert!(
11987                input.is_char_boundary(orig_start),
11988                "orig_start {} is not a char boundary",
11989                orig_start
11990            );
11991            prop_assert!(
11992                input.is_char_boundary(orig_start + orig_len),
11993                "orig_end {} is not a char boundary",
11994                orig_start + orig_len
11995            );
11996
11997            // Invariant 3: original range is at least as large as
11998            // normalized range (original may include trailing whitespace
11999            // and multi-byte Unicode chars that normalize to fewer bytes)
12000            prop_assert!(
12001                orig_len >= norm_match_len
12002                    || orig_len == 0
12003                    || norm_match_len == 0,
12004                "original range ({orig_len}) should be >= normalized range ({norm_match_len})"
12005            );
12006
12007            // Invariant 4: the normalized expected slice, when searched
12008            // for in the original content via fuzzy_find_text, should be
12009            // found at or before the mapped position.
12010            let expected_norm = &normalized[norm_start..norm_end];
12011            if !expected_norm.is_empty() {
12012                let fuzzy_result = fuzzy_find_text(&input, expected_norm);
12013                prop_assert!(
12014                    fuzzy_result.found,
12015                    "normalized needle should be findable in original content"
12016                );
12017            }
12018        }
12019    }
12020
12021    #[test]
12022    fn test_truncate_head_preserves_newline() {
12023        // "Line1\nLine2" truncated to 1 line should be "Line1\n"
12024        let content = "Line1\nLine2".to_string();
12025        let result = truncate_head(content, 1, 1000);
12026        assert_eq!(result.content, "Line1\n");
12027
12028        // "Line1" truncated to 1 line should be "Line1"
12029        let content = "Line1".to_string();
12030        let result = truncate_head(content, 1, 1000);
12031        assert_eq!(result.content, "Line1");
12032
12033        // "Line1\n" truncated to 1 line should be "Line1\n"
12034        let content = "Line1\n".to_string();
12035        let result = truncate_head(content, 1, 1000);
12036        assert_eq!(result.content, "Line1\n");
12037    }
12038
12039    #[test]
12040    fn test_edit_crlf_content_correctness() {
12041        // Regression test: ensure we don't mix original indices with normalized content slices.
12042        asupersync::test_utils::run_test(|| async {
12043            let tmp = tempfile::tempdir().unwrap();
12044            let path = tmp.path().join("crlf.txt");
12045            // "line1" (5) + "\r\n" (2) + "line2" (5) + "\r\n" (2) + "line3" (5) = 19 bytes
12046            let content = "line1\r\nline2\r\nline3";
12047            std::fs::write(&path, content).unwrap();
12048
12049            let tool = EditTool::new(tmp.path());
12050
12051            // Replacing "line2" should work correctly and preserve CRLF.
12052            // Original "line2" is at index 7. Normalized "line2" is at index 6.
12053            // If we used original index (7) on normalized string ("line1\nline2\nline3"),
12054            // we would start at "ine2..." instead of "line2...", corrupting the file.
12055            let out = tool
12056                .execute(
12057                    "t",
12058                    serde_json::json!({
12059                        "path": path.to_string_lossy(),
12060                        "oldText": "line2",
12061                        "newText": "changed"
12062                    }),
12063                    None,
12064                )
12065                .await
12066                .unwrap();
12067
12068            assert!(!out.is_error);
12069            let new_content = std::fs::read_to_string(&path).unwrap();
12070
12071            // Expect: "line1\r\nchanged\r\nline3"
12072            assert_eq!(new_content, "line1\r\nchanged\r\nline3");
12073        });
12074    }
12075
12076    #[test]
12077    fn test_edit_cr_content_correctness() {
12078        asupersync::test_utils::run_test(|| async {
12079            let tmp = tempfile::tempdir().unwrap();
12080            let path = tmp.path().join("cr.txt");
12081            std::fs::write(&path, "line1\rline2\rline3").unwrap();
12082
12083            let tool = EditTool::new(tmp.path());
12084            let out = tool
12085                .execute(
12086                    "t",
12087                    serde_json::json!({
12088                        "path": path.to_string_lossy(),
12089                        "oldText": "line2",
12090                        "newText": "changed"
12091                    }),
12092                    None,
12093                )
12094                .await
12095                .unwrap();
12096
12097            assert!(!out.is_error);
12098            let new_content = std::fs::read_to_string(&path).unwrap();
12099            assert_eq!(new_content, "line1\rchanged\rline3");
12100        });
12101    }
12102
12103    // ========================================================================
12104    // Hashline tests
12105    // ========================================================================
12106
12107    #[test]
12108    fn test_compute_line_hash_basic() {
12109        // Same content at same index should produce same hash
12110        let h1 = compute_line_hash(0, "fn main() {");
12111        let h2 = compute_line_hash(0, "fn main() {");
12112        assert_eq!(h1, h2);
12113
12114        // Different content should (usually) produce different hash
12115        let h3 = compute_line_hash(0, "fn foo() {");
12116        // Not guaranteed different for all inputs, but these specific ones should differ
12117        assert_ne!(h1, h3);
12118
12119        // Hash is 2 bytes from NIBBLE_STR
12120        for &b in &h1 {
12121            assert!(NIBBLE_STR.contains(&b), "hash byte {b} not in NIBBLE_STR");
12122        }
12123    }
12124
12125    #[test]
12126    fn test_compute_line_hash_punctuation_only() {
12127        // Punctuation-only lines use line_idx as seed, so same content at
12128        // different indices should produce different hashes.
12129        let h1 = compute_line_hash(0, "}");
12130        let h2 = compute_line_hash(1, "}");
12131        assert_ne!(
12132            h1, h2,
12133            "punctuation-only lines at different indices should differ"
12134        );
12135
12136        // Blank lines also use idx as seed
12137        let h3 = compute_line_hash(0, "");
12138        let h4 = compute_line_hash(1, "");
12139        assert_ne!(h3, h4);
12140    }
12141
12142    #[test]
12143    fn test_compute_line_hash_whitespace_invariant() {
12144        // Leading/trailing whitespace should not affect hash (whitespace stripped)
12145        let h1 = compute_line_hash(0, "return 42;");
12146        let h2 = compute_line_hash(0, "    return 42;");
12147        let h3 = compute_line_hash(0, "\treturn 42;");
12148        assert_eq!(h1, h2);
12149        assert_eq!(h1, h3);
12150    }
12151
12152    #[test]
12153    fn test_format_hashline_tag() {
12154        let tag = format_hashline_tag(0, "fn main() {");
12155        // Should be "1#XX" format (1-indexed)
12156        assert!(
12157            tag.starts_with("1#"),
12158            "tag should start with 1#, got: {tag}"
12159        );
12160        assert_eq!(tag.len(), 4, "tag should be 4 chars: N#AB");
12161
12162        let tag10 = format_hashline_tag(9, "line 10");
12163        assert!(tag10.starts_with("10#"));
12164        assert_eq!(tag10.len(), 5); // "10#AB"
12165    }
12166
12167    #[test]
12168    fn test_parse_hashline_tag_valid() {
12169        // Simple valid tag
12170        let (line, hash) = parse_hashline_tag("5#KJ").unwrap();
12171        assert_eq!(line, 5);
12172        assert_eq!(hash, [b'K', b'J']);
12173
12174        // With spaces around #
12175        let (line, hash) = parse_hashline_tag("  10 # QR ").unwrap();
12176        assert_eq!(line, 10);
12177        assert_eq!(hash, [b'Q', b'R']);
12178
12179        // With diff markers
12180        let (line, hash) = parse_hashline_tag("> + 3#ZZ").unwrap();
12181        assert_eq!(line, 3);
12182        assert_eq!(hash, [b'Z', b'Z']);
12183    }
12184
12185    #[test]
12186    fn test_parse_hashline_tag_invalid() {
12187        // Line number 0
12188        assert!(parse_hashline_tag("0#KJ").is_err());
12189        // No hash
12190        assert!(parse_hashline_tag("5#").is_err());
12191        // Invalid chars in hash
12192        assert!(parse_hashline_tag("5#AA").is_err()); // 'A' not in NIBBLE_STR
12193        // No number
12194        assert!(parse_hashline_tag("#KJ").is_err());
12195        // Empty
12196        assert!(parse_hashline_tag("").is_err());
12197    }
12198
12199    #[test]
12200    fn test_strip_hashline_prefix() {
12201        assert_eq!(strip_hashline_prefix("5#KJ:hello world"), "hello world");
12202        assert_eq!(strip_hashline_prefix("100#ZZ:fn main() {"), "fn main() {");
12203        assert_eq!(strip_hashline_prefix(" 5 # KJ:hello world"), "hello world");
12204        assert_eq!(strip_hashline_prefix("> + 5#KJ:hello world"), "hello world");
12205        assert_eq!(strip_hashline_prefix("5#KJ :hello world"), "hello world");
12206        // No prefix → unchanged
12207        assert_eq!(strip_hashline_prefix("hello world"), "hello world");
12208        assert_eq!(strip_hashline_prefix(""), "");
12209    }
12210
12211    #[test]
12212    fn test_hashline_edit_single_replace() {
12213        asupersync::test_utils::run_test(|| async {
12214            let dir = tempfile::tempdir().unwrap();
12215            let file = dir.path().join("test.txt");
12216            std::fs::write(&file, "line1\nline2\nline3\n").unwrap();
12217
12218            let tool = HashlineEditTool::new(dir.path());
12219
12220            // Get the hash for line 2 (idx=1)
12221            let tag2 = format_hashline_tag(1, "line2");
12222
12223            let input = serde_json::json!({
12224                "path": file.to_str().unwrap(),
12225                "edits": [{
12226                    "op": "replace",
12227                    "pos": tag2,
12228                    "lines": ["changed"]
12229                }]
12230            });
12231
12232            let out = tool.execute("test", input, None).await.unwrap();
12233            assert!(!out.is_error);
12234
12235            let content = std::fs::read_to_string(&file).unwrap();
12236            assert_eq!(content, "line1\nchanged\nline3\n");
12237        });
12238    }
12239
12240    #[test]
12241    fn test_hashline_edit_range_replace() {
12242        asupersync::test_utils::run_test(|| async {
12243            let dir = tempfile::tempdir().unwrap();
12244            let file = dir.path().join("test.txt");
12245            std::fs::write(&file, "a\nb\nc\nd\ne\n").unwrap();
12246
12247            let tool = HashlineEditTool::new(dir.path());
12248
12249            let tag_b = format_hashline_tag(1, "b");
12250            let tag_d = format_hashline_tag(3, "d");
12251
12252            let input = serde_json::json!({
12253                "path": file.to_str().unwrap(),
12254                "edits": [{
12255                    "op": "replace",
12256                    "pos": tag_b,
12257                    "end": tag_d,
12258                    "lines": ["X", "Y"]
12259                }]
12260            });
12261
12262            let out = tool.execute("test", input, None).await.unwrap();
12263            assert!(!out.is_error);
12264
12265            let content = std::fs::read_to_string(&file).unwrap();
12266            assert_eq!(content, "a\nX\nY\ne\n");
12267        });
12268    }
12269
12270    #[test]
12271    fn test_hashline_edit_prepend() {
12272        asupersync::test_utils::run_test(|| async {
12273            let dir = tempfile::tempdir().unwrap();
12274            let file = dir.path().join("test.txt");
12275            std::fs::write(&file, "a\nb\nc\n").unwrap();
12276
12277            let tool = HashlineEditTool::new(dir.path());
12278            let tag_b = format_hashline_tag(1, "b");
12279
12280            let input = serde_json::json!({
12281                "path": file.to_str().unwrap(),
12282                "edits": [{
12283                    "op": "prepend",
12284                    "pos": tag_b,
12285                    "lines": ["inserted"]
12286                }]
12287            });
12288
12289            let out = tool.execute("test", input, None).await.unwrap();
12290            assert!(!out.is_error);
12291
12292            let content = std::fs::read_to_string(&file).unwrap();
12293            assert_eq!(content, "a\ninserted\nb\nc\n");
12294        });
12295    }
12296
12297    #[test]
12298    fn test_hashline_edit_append() {
12299        asupersync::test_utils::run_test(|| async {
12300            let dir = tempfile::tempdir().unwrap();
12301            let file = dir.path().join("test.txt");
12302            std::fs::write(&file, "a\nb\nc\n").unwrap();
12303
12304            let tool = HashlineEditTool::new(dir.path());
12305            let tag_b = format_hashline_tag(1, "b");
12306
12307            let input = serde_json::json!({
12308                "path": file.to_str().unwrap(),
12309                "edits": [{
12310                    "op": "append",
12311                    "pos": tag_b,
12312                    "lines": ["inserted"]
12313                }]
12314            });
12315
12316            let out = tool.execute("test", input, None).await.unwrap();
12317            assert!(!out.is_error);
12318
12319            let content = std::fs::read_to_string(&file).unwrap();
12320            assert_eq!(content, "a\nb\ninserted\nc\n");
12321        });
12322    }
12323
12324    #[test]
12325    fn test_hashline_edit_bottom_up_ordering() {
12326        asupersync::test_utils::run_test(|| async {
12327            let dir = tempfile::tempdir().unwrap();
12328            let file = dir.path().join("test.txt");
12329            std::fs::write(&file, "a\nb\nc\nd\n").unwrap();
12330
12331            let tool = HashlineEditTool::new(dir.path());
12332            let tag_b = format_hashline_tag(1, "b");
12333            let tag_d = format_hashline_tag(3, "d");
12334
12335            // Two edits at different positions — both should apply correctly
12336            let input = serde_json::json!({
12337                "path": file.to_str().unwrap(),
12338                "edits": [
12339                    { "op": "replace", "pos": tag_b, "lines": ["B"] },
12340                    { "op": "replace", "pos": tag_d, "lines": ["D"] }
12341                ]
12342            });
12343
12344            let out = tool.execute("test", input, None).await.unwrap();
12345            assert!(!out.is_error);
12346
12347            let content = std::fs::read_to_string(&file).unwrap();
12348            assert_eq!(content, "a\nB\nc\nD\n");
12349        });
12350    }
12351
12352    #[test]
12353    fn test_hashline_edit_hash_mismatch() {
12354        asupersync::test_utils::run_test(|| async {
12355            let dir = tempfile::tempdir().unwrap();
12356            let file = dir.path().join("test.txt");
12357            std::fs::write(&file, "hello\nworld\n").unwrap();
12358
12359            let tool = HashlineEditTool::new(dir.path());
12360
12361            // Use a deliberately wrong hash
12362            let input = serde_json::json!({
12363                "path": file.to_str().unwrap(),
12364                "edits": [{
12365                    "op": "replace",
12366                    "pos": "1#ZZ",
12367                    "lines": ["changed"]
12368                }]
12369            });
12370
12371            let result = tool.execute("test", input, None).await;
12372            assert!(result.is_err());
12373            let err_msg = result.unwrap_err().to_string();
12374            assert!(
12375                err_msg.contains("Hash validation failed"),
12376                "error should mention hash validation: {err_msg}"
12377            );
12378        });
12379    }
12380
12381    #[test]
12382    fn test_hashline_edit_dedup() {
12383        asupersync::test_utils::run_test(|| async {
12384            let dir = tempfile::tempdir().unwrap();
12385            let file = dir.path().join("test.txt");
12386            std::fs::write(&file, "a\nb\nc\n").unwrap();
12387
12388            let tool = HashlineEditTool::new(dir.path());
12389            let tag_b = format_hashline_tag(1, "b");
12390
12391            // Duplicate edits should be deduplicated
12392            let input = serde_json::json!({
12393                "path": file.to_str().unwrap(),
12394                "edits": [
12395                    { "op": "replace", "pos": &tag_b, "lines": ["B"] },
12396                    { "op": "replace", "pos": &tag_b, "lines": ["B"] }
12397                ]
12398            });
12399
12400            let out = tool.execute("test", input, None).await.unwrap();
12401            assert!(!out.is_error);
12402
12403            let content = std::fs::read_to_string(&file).unwrap();
12404            assert_eq!(content, "a\nB\nc\n");
12405        });
12406    }
12407
12408    #[test]
12409    fn test_hashline_edit_noop_detection() {
12410        asupersync::test_utils::run_test(|| async {
12411            let dir = tempfile::tempdir().unwrap();
12412            let file = dir.path().join("test.txt");
12413            std::fs::write(&file, "a\nb\nc\n").unwrap();
12414
12415            let tool = HashlineEditTool::new(dir.path());
12416            let tag_b = format_hashline_tag(1, "b");
12417
12418            // Replacing with identical content is a no-op
12419            let input = serde_json::json!({
12420                "path": file.to_str().unwrap(),
12421                "edits": [{
12422                    "op": "replace",
12423                    "pos": &tag_b,
12424                    "lines": ["b"]
12425                }]
12426            });
12427
12428            let result = tool.execute("test", input, None).await;
12429            assert!(result.is_err());
12430            let err_msg = result.unwrap_err().to_string();
12431            assert!(
12432                err_msg.contains("no-ops"),
12433                "error should mention no-ops: {err_msg}"
12434            );
12435        });
12436    }
12437
12438    #[test]
12439    fn test_hashline_read_output_format() {
12440        asupersync::test_utils::run_test(|| async {
12441            let dir = tempfile::tempdir().unwrap();
12442            let file = dir.path().join("test.txt");
12443            std::fs::write(&file, "fn main() {\n    println!(\"hello\");\n}\n").unwrap();
12444
12445            let tool = ReadTool::new(dir.path());
12446            let input = serde_json::json!({
12447                "path": file.to_str().unwrap(),
12448                "hashline": true
12449            });
12450
12451            let out = tool.execute("test", input, None).await.unwrap();
12452            assert!(!out.is_error);
12453            let text = get_text(&out.content);
12454
12455            // Each line should be in N#AB:content format
12456            for line in text.lines() {
12457                if line.starts_with('[') || line.is_empty() {
12458                    continue; // skip metadata lines
12459                }
12460                assert!(
12461                    hashline_tag_regex().is_match(line),
12462                    "line should match hashline format: {line:?}"
12463                );
12464                assert!(
12465                    line.contains(':'),
12466                    "line should contain ':' separator: {line:?}"
12467                );
12468            }
12469
12470            // First line should start with "1#"
12471            let first_line = text.lines().next().unwrap();
12472            assert!(first_line.starts_with("1#"), "first line: {first_line:?}");
12473        });
12474    }
12475
12476    #[test]
12477    fn test_hashline_edit_prefix_stripping() {
12478        asupersync::test_utils::run_test(|| async {
12479            let dir = tempfile::tempdir().unwrap();
12480            let file = dir.path().join("test.txt");
12481            std::fs::write(&file, "a\nb\nc\n").unwrap();
12482
12483            let tool = HashlineEditTool::new(dir.path());
12484            let tag_b = format_hashline_tag(1, "b");
12485
12486            // Model copies hashline tags into replacement — they should be stripped
12487            let input = serde_json::json!({
12488                "path": file.to_str().unwrap(),
12489                "edits": [{
12490                    "op": "replace",
12491                    "pos": &tag_b,
12492                    "lines": ["2#KJ:changed"]
12493                }]
12494            });
12495
12496            let out = tool.execute("test", input, None).await.unwrap();
12497            assert!(!out.is_error);
12498
12499            let content = std::fs::read_to_string(&file).unwrap();
12500            assert_eq!(content, "a\nchanged\nc\n");
12501        });
12502    }
12503
12504    #[test]
12505    fn test_hashline_edit_delete_lines() {
12506        asupersync::test_utils::run_test(|| async {
12507            let dir = tempfile::tempdir().unwrap();
12508            let file = dir.path().join("test.txt");
12509            std::fs::write(&file, "a\nb\nc\nd\n").unwrap();
12510
12511            let tool = HashlineEditTool::new(dir.path());
12512            let tag_b = format_hashline_tag(1, "b");
12513            let tag_c = format_hashline_tag(2, "c");
12514
12515            // Replace range with null (delete)
12516            let input = serde_json::json!({
12517                "path": file.to_str().unwrap(),
12518                "edits": [{
12519                    "op": "replace",
12520                    "pos": &tag_b,
12521                    "end": &tag_c,
12522                    "lines": null
12523                }]
12524            });
12525
12526            let out = tool.execute("test", input, None).await.unwrap();
12527            assert!(!out.is_error);
12528
12529            let content = std::fs::read_to_string(&file).unwrap();
12530            assert_eq!(content, "a\nd\n");
12531        });
12532    }
12533
12534    #[test]
12535    fn test_hashline_edit_crlf_preservation() {
12536        asupersync::test_utils::run_test(|| async {
12537            let dir = tempfile::tempdir().unwrap();
12538            let file = dir.path().join("test.txt");
12539            std::fs::write(&file, "line1\r\nline2\r\nline3").unwrap();
12540
12541            let tool = HashlineEditTool::new(dir.path());
12542            let tag2 = format_hashline_tag(1, "line2");
12543
12544            let input = serde_json::json!({
12545                "path": file.to_str().unwrap(),
12546                "edits": [{
12547                    "op": "replace",
12548                    "pos": tag2,
12549                    "lines": ["changed"]
12550                }]
12551            });
12552
12553            let out = tool.execute("test", input, None).await.unwrap();
12554            assert!(!out.is_error);
12555
12556            let content = std::fs::read_to_string(&file).unwrap();
12557            assert_eq!(content, "line1\r\nchanged\r\nline3");
12558        });
12559    }
12560
12561    #[test]
12562    fn test_hashline_edit_cr_preservation() {
12563        asupersync::test_utils::run_test(|| async {
12564            let dir = tempfile::tempdir().unwrap();
12565            let file = dir.path().join("test.txt");
12566            std::fs::write(&file, "line1\rline2\rline3").unwrap();
12567
12568            let tool = HashlineEditTool::new(dir.path());
12569            let tag2 = format_hashline_tag(1, "line2");
12570
12571            let input = serde_json::json!({
12572                "path": file.to_str().unwrap(),
12573                "edits": [{
12574                    "op": "replace",
12575                    "pos": tag2,
12576                    "lines": ["changed"]
12577                }]
12578            });
12579
12580            let out = tool.execute("test", input, None).await.unwrap();
12581            assert!(!out.is_error);
12582
12583            let content = std::fs::read_to_string(&file).unwrap();
12584            assert_eq!(content, "line1\rchanged\rline3");
12585        });
12586    }
12587
12588    #[test]
12589    fn test_hashline_edit_empty_file_append() {
12590        asupersync::test_utils::run_test(|| async {
12591            let dir = tempfile::tempdir().unwrap();
12592            let file = dir.path().join("empty.txt");
12593            std::fs::write(&file, "").unwrap();
12594
12595            let tool = HashlineEditTool::new(dir.path());
12596
12597            // EOF append with no pos on empty file
12598            let input = serde_json::json!({
12599                "path": file.to_str().unwrap(),
12600                "edits": [{
12601                    "op": "append",
12602                    "lines": ["new_line"]
12603                }]
12604            });
12605
12606            let out = tool.execute("test", input, None).await.unwrap();
12607            assert!(!out.is_error);
12608
12609            let content = std::fs::read_to_string(&file).unwrap();
12610            assert!(content.contains("new_line"));
12611        });
12612    }
12613
12614    #[test]
12615    fn test_hashline_edit_single_line_no_trailing_newline() {
12616        asupersync::test_utils::run_test(|| async {
12617            let dir = tempfile::tempdir().unwrap();
12618            let file = dir.path().join("single.txt");
12619            std::fs::write(&file, "hello").unwrap();
12620
12621            let tool = HashlineEditTool::new(dir.path());
12622            let tag = format_hashline_tag(0, "hello");
12623
12624            let input = serde_json::json!({
12625                "path": file.to_str().unwrap(),
12626                "edits": [{
12627                    "op": "replace",
12628                    "pos": tag,
12629                    "lines": ["world"]
12630                }]
12631            });
12632
12633            let out = tool.execute("test", input, None).await.unwrap();
12634            assert!(!out.is_error);
12635
12636            let content = std::fs::read_to_string(&file).unwrap();
12637            assert_eq!(content, "world");
12638        });
12639    }
12640
12641    #[test]
12642    fn test_hashline_edit_preserves_bom_hash_validation() {
12643        asupersync::test_utils::run_test(|| async {
12644            let dir = tempfile::tempdir().unwrap();
12645            let file = dir.path().join("bom.txt");
12646            let bom = "\u{FEFF}";
12647            std::fs::write(&file, format!("{bom}alpha\nbeta\n")).unwrap();
12648
12649            let tool = HashlineEditTool::new(dir.path());
12650            let tag1 = format_hashline_tag(0, &format!("{bom}alpha"));
12651
12652            let input = serde_json::json!({
12653                "path": file.to_str().unwrap(),
12654                "edits": [{
12655                    "op": "replace",
12656                    "pos": tag1,
12657                    "lines": ["gamma"]
12658                }]
12659            });
12660
12661            let out = tool.execute("test", input, None).await.unwrap();
12662            assert!(!out.is_error);
12663
12664            let content = std::fs::read_to_string(&file).unwrap();
12665            assert_eq!(content, format!("{bom}gamma\nbeta\n"));
12666        });
12667    }
12668
12669    #[test]
12670    fn test_hashline_edit_bof_prepend_no_pos() {
12671        asupersync::test_utils::run_test(|| async {
12672            let dir = tempfile::tempdir().unwrap();
12673            let file = dir.path().join("test.txt");
12674            std::fs::write(&file, "a\nb\nc\n").unwrap();
12675
12676            let tool = HashlineEditTool::new(dir.path());
12677
12678            // Prepend with no pos should insert at BOF (before line 0)
12679            let input = serde_json::json!({
12680                "path": file.to_str().unwrap(),
12681                "edits": [{
12682                    "op": "prepend",
12683                    "lines": ["header"]
12684                }]
12685            });
12686
12687            let out = tool.execute("test", input, None).await.unwrap();
12688            assert!(!out.is_error);
12689
12690            let content = std::fs::read_to_string(&file).unwrap();
12691            assert_eq!(content, "header\na\nb\nc\n");
12692        });
12693    }
12694
12695    #[test]
12696    fn test_hashline_edit_eof_append_no_pos() {
12697        asupersync::test_utils::run_test(|| async {
12698            let dir = tempfile::tempdir().unwrap();
12699            let file = dir.path().join("test.txt");
12700            std::fs::write(&file, "a\nb\nc\n").unwrap();
12701
12702            let tool = HashlineEditTool::new(dir.path());
12703
12704            // Append with no pos should insert at EOF (after last line)
12705            let input = serde_json::json!({
12706                "path": file.to_str().unwrap(),
12707                "edits": [{
12708                    "op": "append",
12709                    "lines": ["footer"]
12710                }]
12711            });
12712
12713            let out = tool.execute("test", input, None).await.unwrap();
12714            assert!(!out.is_error);
12715
12716            let content = std::fs::read_to_string(&file).unwrap();
12717            assert!(
12718                content.contains("footer"),
12719                "content should contain footer: {content:?}"
12720            );
12721        });
12722    }
12723
12724    #[test]
12725    fn test_hashline_edit_overlapping_replace_ranges_rejected() {
12726        asupersync::test_utils::run_test(|| async {
12727            let dir = tempfile::tempdir().unwrap();
12728            let file = dir.path().join("test.txt");
12729            std::fs::write(&file, "a\nb\nc\nd\ne\n").unwrap();
12730
12731            let tool = HashlineEditTool::new(dir.path());
12732            let tag_b = format_hashline_tag(1, "b");
12733            let tag_d = format_hashline_tag(3, "d");
12734            let tag_c = format_hashline_tag(2, "c");
12735            let tag_e = format_hashline_tag(4, "e");
12736
12737            // Two overlapping replace ranges: lines 2-4 and lines 3-5
12738            let input = serde_json::json!({
12739                "path": file.to_str().unwrap(),
12740                "edits": [
12741                    { "op": "replace", "pos": &tag_b, "end": &tag_d, "lines": ["X"] },
12742                    { "op": "replace", "pos": &tag_c, "end": &tag_e, "lines": ["Y"] }
12743                ]
12744            });
12745
12746            let result = tool.execute("test", input, None).await;
12747            assert!(result.is_err());
12748            let err_msg = result.unwrap_err().to_string();
12749            assert!(
12750                err_msg.contains("Overlapping"),
12751                "error should mention overlapping: {err_msg}"
12752            );
12753        });
12754    }
12755
12756    #[test]
12757    fn test_hashline_edit_reversed_range_rejected() {
12758        asupersync::test_utils::run_test(|| async {
12759            let dir = tempfile::tempdir().unwrap();
12760            let file = dir.path().join("test.txt");
12761            std::fs::write(&file, "a\nb\nc\nd\n").unwrap();
12762
12763            let tool = HashlineEditTool::new(dir.path());
12764            let tag_b = format_hashline_tag(1, "b");
12765            let tag_d = format_hashline_tag(3, "d");
12766
12767            // End anchor before start anchor
12768            let input = serde_json::json!({
12769                "path": file.to_str().unwrap(),
12770                "edits": [{
12771                    "op": "replace",
12772                    "pos": &tag_d,
12773                    "end": &tag_b,
12774                    "lines": ["X"]
12775                }]
12776            });
12777
12778            let result = tool.execute("test", input, None).await;
12779            assert!(result.is_err());
12780            let err_msg = result.unwrap_err().to_string();
12781            assert!(
12782                err_msg.contains("before start"),
12783                "error should mention before start: {err_msg}"
12784            );
12785        });
12786    }
12787
12788    #[test]
12789    fn test_hashline_edit_trailing_newline_semantics() {
12790        asupersync::test_utils::run_test(|| async {
12791            let dir = tempfile::tempdir().unwrap();
12792            let file = dir.path().join("test.txt");
12793            // File with trailing newline: split produces ["line1", "line2", ""]
12794            std::fs::write(&file, "line1\nline2\n").unwrap();
12795
12796            let tool = HashlineEditTool::new(dir.path());
12797            let tag2 = format_hashline_tag(1, "line2");
12798
12799            // Replace line2, trailing newline should be preserved
12800            let input = serde_json::json!({
12801                "path": file.to_str().unwrap(),
12802                "edits": [{
12803                    "op": "replace",
12804                    "pos": tag2,
12805                    "lines": ["changed"]
12806                }]
12807            });
12808
12809            let out = tool.execute("test", input, None).await.unwrap();
12810            assert!(!out.is_error);
12811
12812            let content = std::fs::read_to_string(&file).unwrap();
12813            assert_eq!(content, "line1\nchanged\n");
12814        });
12815    }
12816}
pi/tools.rs

pi/
tools.rs