Skip to main content

sa3p_engine/
lib.rs

1//! Sans-IO execution engine for SA3P operator instructions.
2//!
3//! The engine consumes `sa3p_parser::Instruction` values and executes
4//! command semantics via trait boundaries:
5//! - `VirtualFileSystem`
6//! - `TerminalProvider`
7//!
8//! This keeps business logic transport/host agnostic while still producing
9//! deterministic per-turn outputs and state headers.
10
11use std::collections::HashMap;
12use std::fmt::Write as _;
13use std::path::{Path, PathBuf};
14use std::time::Duration;
15
16use sa3p_parser::{Attributes, Instruction};
17use strsim::levenshtein;
18use thiserror::Error;
19
20#[derive(Debug, Error)]
21pub enum EngineError {
22    #[error("missing required attribute `{0}`")]
23    MissingAttribute(&'static str),
24    #[error("invalid integer for attribute `{name}`: {value}")]
25    InvalidInteger { name: &'static str, value: String },
26    #[error("operation is out of order: {0}")]
27    InvalidState(String),
28    #[error("invalid utf-8 payload")]
29    InvalidUtf8,
30    #[error("virtual filesystem error: {0}")]
31    Vfs(String),
32    #[error("terminal error: {0}")]
33    Terminal(String),
34}
35
36pub type Result<T> = std::result::Result<T, EngineError>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FileHash {
40    pub path: PathBuf,
41    pub sha256: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NodeKind {
46    File,
47    Directory,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct TreeNode {
52    pub path: PathBuf,
53    pub kind: NodeKind,
54    pub descendant_file_count: usize,
55    pub modified_recently: bool,
56}
57
58pub trait VirtualFileSystem {
59    fn read(&self, path: &Path) -> Result<Vec<u8>>;
60    fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
61    fn hash(&self, path: &Path) -> Result<String>;
62    fn cwd(&self) -> Result<PathBuf>;
63    fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
64    fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum ProcessSignal {
69    SigInt,
70    SigTerm,
71    SigKill,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct TerminalExecution {
76    pub output: String,
77    pub exit_code: Option<i32>,
78    pub cwd: PathBuf,
79    pub detached_pid: Option<u32>,
80}
81
82pub trait TerminalProvider {
83    fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
84    fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
85    fn active_pids(&self) -> Vec<u32>;
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StateHeader {
90    pub cwd: PathBuf,
91    pub recent_hashes: Vec<FileHash>,
92    pub active_pids: Vec<u32>,
93}
94
95impl StateHeader {
96    pub fn render(&self) -> String {
97        let mut out = String::new();
98        let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
99
100        if self.recent_hashes.is_empty() {
101            out.push_str(" | RECENT_HASHES: none");
102        } else {
103            out.push_str(" | RECENT_HASHES: ");
104            for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
105                if idx > 0 {
106                    out.push_str(", ");
107                }
108                let _ = write!(
109                    &mut out,
110                    "{}#{}",
111                    file_hash.path.display(),
112                    shorten_hash(&file_hash.sha256)
113                );
114            }
115        }
116
117        if self.active_pids.is_empty() {
118            out.push_str(" | ACTIVE_PIDS: none]");
119        } else {
120            out.push_str(" | ACTIVE_PIDS: ");
121            for (idx, pid) in self.active_pids.iter().enumerate() {
122                if idx > 0 {
123                    out.push_str(", ");
124                }
125                let _ = write!(&mut out, "{pid}");
126            }
127            out.push(']');
128        }
129
130        out
131    }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct TurnResult {
136    pub state_header: StateHeader,
137    pub outputs: Vec<EngineOutput>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub enum EngineOutput {
142    ReadFile(ReadFileOutput),
143    WriteFile(WriteFileOutput),
144    ApplyEdit(ApplyEditOutput),
145    ListFiles(ListFilesOutput),
146    Terminal(TerminalExecution),
147    Signal(SignalOutput),
148    Warning(String),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub struct ReadFileOutput {
153    pub path: PathBuf,
154    pub size_bytes: usize,
155    pub sha256: String,
156    pub requested_start: usize,
157    pub requested_end: usize,
158    pub served_start: usize,
159    pub served_end: usize,
160    pub total_lines: usize,
161    pub body: String,
162    pub warning: Option<String>,
163}
164
165impl ReadFileOutput {
166    pub fn fidelity_header(&self) -> String {
167        format!(
168            "[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
169            self.path.display(),
170            human_bytes(self.size_bytes),
171            shorten_hash(&self.sha256),
172            self.served_start,
173            self.served_end,
174            self.total_lines
175        )
176    }
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WriteFileOutput {
181    pub path: PathBuf,
182    pub size_bytes: usize,
183    pub sha256: String,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub enum EditTier {
188    Exact,
189    WhitespaceAgnostic,
190    ContextualAnchor,
191    NotApplied,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct ApplyEditOutput {
196    pub path: PathBuf,
197    pub applied: bool,
198    pub tier: EditTier,
199    pub sha256: Option<String>,
200    pub format: Option<String>,
201    pub reason_code: Option<String>,
202    pub warning: Option<String>,
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct ListFilesOutput {
207    pub path: PathBuf,
208    pub lines: Vec<String>,
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct SignalOutput {
213    pub pid: u32,
214    pub signal: ProcessSignal,
215}
216
217#[derive(Debug)]
218pub struct Engine<VFS, Terminal> {
219    vfs: VFS,
220    terminal: Terminal,
221    known_hashes: HashMap<PathBuf, String>,
222    pending: Option<PendingOperation>,
223    max_list_lines: usize,
224    dense_dir_threshold: usize,
225    terminal_timeout: Duration,
226    recent_hash_limit: usize,
227}
228
229impl<VFS, Terminal> Engine<VFS, Terminal>
230where
231    VFS: VirtualFileSystem,
232    Terminal: TerminalProvider,
233{
234    pub fn new(vfs: VFS, terminal: Terminal) -> Self {
235        Self {
236            vfs,
237            terminal,
238            known_hashes: HashMap::new(),
239            pending: None,
240            max_list_lines: 300,
241            dense_dir_threshold: 200,
242            terminal_timeout: Duration::from_secs(5),
243            recent_hash_limit: 5,
244        }
245    }
246
247    pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
248        self.terminal_timeout = timeout;
249        self
250    }
251
252    pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
253        self.max_list_lines = max_lines;
254        self.dense_dir_threshold = dense_threshold;
255        self
256    }
257
258    pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
259        let mut outputs = Vec::new();
260
261        for instruction in instructions {
262            self.apply_instruction(instruction, &mut outputs)?;
263        }
264
265        let state_header = StateHeader {
266            cwd: self.vfs.cwd()?,
267            recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
268            active_pids: {
269                let mut pids = self.terminal.active_pids();
270                pids.sort_unstable();
271                pids
272            },
273        };
274
275        Ok(TurnResult {
276            state_header,
277            outputs,
278        })
279    }
280
281    fn apply_instruction(
282        &mut self,
283        instruction: Instruction,
284        outputs: &mut Vec<EngineOutput>,
285    ) -> Result<()> {
286        match instruction {
287            Instruction::Text(text) => {
288                if let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() {
289                    if edit.capture.is_none() {
290                        edit.raw_body.extend_from_slice(text.as_bytes());
291                    }
292                }
293            }
294            Instruction::StartTag { name, attributes } => {
295                self.handle_start_tag(&name, attributes, outputs)?;
296            }
297            Instruction::EndTag(name) => {
298                self.handle_end_tag(&name, outputs)?;
299            }
300            Instruction::WriteChunk(bytes) => {
301                let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
302                    return Err(EngineError::InvalidState(
303                        "received WriteChunk outside <write_file>".to_string(),
304                    ));
305                };
306                write.buffer.extend_from_slice(&bytes);
307            }
308            Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
309                Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
310                    Some(ApplyCapture::Search) if tag == "search" => {
311                        edit.search.extend_from_slice(&bytes);
312                    }
313                    Some(ApplyCapture::Replace) if tag == "replace" => {
314                        edit.replace.extend_from_slice(&bytes);
315                    }
316                    None => {
317                        edit.raw_body.extend_from_slice(&bytes);
318                    }
319                    _ => {
320                        return Err(EngineError::InvalidState(format!(
321                            "unexpected raw chunk for <{tag}> while applying edit"
322                        )));
323                    }
324                },
325                Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
326                    term.command.extend_from_slice(&bytes);
327                }
328                _ => {
329                    return Err(EngineError::InvalidState(format!(
330                        "received raw chunk for <{tag}> without active matching context"
331                    )));
332                }
333            },
334        }
335
336        Ok(())
337    }
338
339    fn handle_start_tag(
340        &mut self,
341        name: &str,
342        attributes: Attributes,
343        outputs: &mut Vec<EngineOutput>,
344    ) -> Result<()> {
345        match name {
346            "write_file" => {
347                self.ensure_no_pending("write_file")?;
348                let path = required_path(&attributes)?;
349                self.pending = Some(PendingOperation::WriteFile(PendingWrite {
350                    path,
351                    buffer: Vec::new(),
352                }));
353            }
354            "read_file" => {
355                let output = self.execute_read_file(&attributes)?;
356                outputs.push(EngineOutput::ReadFile(output));
357            }
358            "apply_edit" => {
359                self.ensure_no_pending("apply_edit")?;
360                let path = required_path(&attributes)?;
361                self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
362                    path,
363                    search: Vec::new(),
364                    replace: Vec::new(),
365                    raw_body: Vec::new(),
366                    capture: None,
367                }));
368            }
369            "search" => {
370                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
371                    return Err(EngineError::InvalidState(
372                        "<search> must be nested under <apply_edit>".to_string(),
373                    ));
374                };
375                edit.capture = Some(ApplyCapture::Search);
376            }
377            "replace" => {
378                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
379                    return Err(EngineError::InvalidState(
380                        "<replace> must be nested under <apply_edit>".to_string(),
381                    ));
382                };
383                edit.capture = Some(ApplyCapture::Replace);
384            }
385            "list_files" => {
386                let output = self.execute_list_files(&attributes)?;
387                outputs.push(EngineOutput::ListFiles(output));
388            }
389            "terminal" => {
390                self.ensure_no_pending("terminal")?;
391                let command = attributes
392                    .get("cmd")
393                    .or_else(|| attributes.get("command"))
394                    .cloned()
395                    .unwrap_or_default()
396                    .into_bytes();
397                self.pending = Some(PendingOperation::Terminal(PendingTerminal { command }));
398            }
399            "terminal_signal" => {
400                let signal_output = self.execute_terminal_signal(&attributes)?;
401                outputs.push(EngineOutput::Signal(signal_output));
402            }
403            other => outputs.push(EngineOutput::Warning(format!(
404                "unsupported start tag <{other}> ignored"
405            ))),
406        }
407
408        Ok(())
409    }
410
411    fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
412        match name {
413            "write_file" => {
414                let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
415                    return Err(EngineError::InvalidState(
416                        "</write_file> received without matching start".to_string(),
417                    ));
418                };
419                let output = self.finalize_write(write)?;
420                outputs.push(EngineOutput::WriteFile(output));
421            }
422            "search" | "replace" => {
423                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
424                    return Err(EngineError::InvalidState(format!(
425                        "</{name}> received without active <apply_edit>"
426                    )));
427                };
428                edit.capture = None;
429            }
430            "apply_edit" => {
431                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
432                    return Err(EngineError::InvalidState(
433                        "</apply_edit> received without matching start".to_string(),
434                    ));
435                };
436                let output = self.finalize_apply_edit(edit)?;
437                outputs.push(EngineOutput::ApplyEdit(output));
438            }
439            "terminal" => {
440                let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
441                    return Err(EngineError::InvalidState(
442                        "</terminal> received without matching start".to_string(),
443                    ));
444                };
445                let command =
446                    String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
447                let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
448                outputs.push(EngineOutput::Terminal(output));
449            }
450            other => outputs.push(EngineOutput::Warning(format!(
451                "unsupported end tag </{other}> ignored"
452            ))),
453        }
454
455        Ok(())
456    }
457
458    fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
459        self.vfs.write_atomic(&write.path, &write.buffer)?;
460        let sha256 = self.vfs.hash(&write.path)?;
461        self.known_hashes.insert(write.path.clone(), sha256.clone());
462
463        Ok(WriteFileOutput {
464            path: write.path,
465            size_bytes: write.buffer.len(),
466            sha256,
467        })
468    }
469
470    fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
471        let path = required_path(attributes)?;
472        let requested_start = optional_usize(attributes, "start_line")?
473            .unwrap_or(1)
474            .max(1);
475        let requested_end =
476            optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
477
478        let bytes = self.vfs.read(&path)?;
479        let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
480        let sha256 = self.vfs.hash(&path)?;
481        self.known_hashes.insert(path.clone(), sha256.clone());
482
483        let all_lines: Vec<&str> = text.lines().collect();
484        let total_lines = all_lines.len();
485
486        let (served_start, served_end, warning) = if total_lines == 0 {
487            (0, 0, Some("file is empty; returning no lines".to_string()))
488        } else {
489            let served_start = requested_start.min(total_lines);
490            let served_end = requested_end.max(served_start).min(total_lines);
491            let warning = if served_start != requested_start || served_end != requested_end {
492                Some(format!(
493                    "requested lines {}-{} adjusted to {}-{} (file has {} lines)",
494                    requested_start, requested_end, served_start, served_end, total_lines
495                ))
496            } else {
497                None
498            };
499            (served_start, served_end, warning)
500        };
501
502        let body = if total_lines == 0 {
503            String::new()
504        } else {
505            let mut rendered = Vec::new();
506            for line_idx in served_start..=served_end {
507                let content = all_lines[line_idx - 1];
508                rendered.push(format!("[{line_idx}] {content}"));
509            }
510            rendered.join("\n")
511        };
512
513        Ok(ReadFileOutput {
514            path,
515            size_bytes: bytes.len(),
516            sha256,
517            requested_start,
518            requested_end,
519            served_start,
520            served_end,
521            total_lines,
522            body,
523            warning,
524        })
525    }
526
527    fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
528        let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
529        let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
530        let raw_body = String::from_utf8(edit.raw_body).map_err(|_| EngineError::InvalidUtf8)?;
531        let path = edit.path;
532        let edit_input = resolve_apply_edit_input(&search, &replace, &raw_body);
533
534        if let Some(previous_hash) = self.known_hashes.get(&path) {
535            let current_hash = self.vfs.hash(&path)?;
536            if previous_hash != &current_hash {
537                return Ok(ApplyEditOutput {
538                    path,
539                    applied: false,
540                    tier: EditTier::NotApplied,
541                    sha256: None,
542                    format: edit_input.format,
543                    reason_code: Some("stale_hash".to_string()),
544                    warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
545                });
546            }
547        }
548
549        let original_bytes = self.vfs.read(&path)?;
550        let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
551
552        if let Some(reason_code) = edit_input.reason_code {
553            return Ok(ApplyEditOutput {
554                path,
555                applied: false,
556                tier: EditTier::NotApplied,
557                sha256: None,
558                format: edit_input.format,
559                reason_code: Some(reason_code),
560                warning: edit_input
561                    .warning
562                    .or_else(|| Some("invalid apply_edit payload".to_string())),
563            });
564        }
565
566        let apply_result = if let Some(hunks) = edit_input.diff_hunks {
567            apply_diff_hunks_with_tiers(&original, &hunks)
568        } else if let (Some(search), Some(replace)) =
569            (edit_input.search.as_deref(), edit_input.replace.as_deref())
570        {
571            apply_edit_with_tiers(&original, search, replace)
572        } else {
573            None
574        };
575        let Some((rewritten, tier)) = apply_result else {
576            return Ok(ApplyEditOutput {
577                path,
578                applied: false,
579                tier: EditTier::NotApplied,
580                sha256: None,
581                format: edit_input.format,
582                reason_code: Some("no_match".to_string()),
583                warning: Some("no suitable target block found for apply_edit".to_string()),
584            });
585        };
586
587        self.vfs.write_atomic(&path, rewritten.as_bytes())?;
588        let sha256 = self.vfs.hash(&path)?;
589        self.known_hashes.insert(path.clone(), sha256.clone());
590
591        Ok(ApplyEditOutput {
592            path,
593            applied: true,
594            tier,
595            sha256: Some(sha256),
596            format: edit_input.format,
597            reason_code: None,
598            warning: None,
599        })
600    }
601
602    fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
603        let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
604        let mut nodes = self.vfs.list_tree(&path)?;
605        nodes.sort_by(|a, b| a.path.cmp(&b.path));
606
607        let mut lines = Vec::new();
608
609        for node in nodes {
610            if lines.len() >= self.max_list_lines {
611                lines.push("[... truncated due to token budget ...]".to_string());
612                break;
613            }
614
615            let mut line = match node.kind {
616                NodeKind::Directory => {
617                    if node.descendant_file_count >= self.dense_dir_threshold {
618                        format!(
619                            "[dir] {}/ ({} files, omitted)",
620                            node.path.display(),
621                            node.descendant_file_count
622                        )
623                    } else {
624                        format!("[dir] {}/", node.path.display())
625                    }
626                }
627                NodeKind::File => format!("[file] {}", node.path.display()),
628            };
629
630            if node.modified_recently {
631                line.push_str(" (*)");
632            }
633
634            lines.push(line);
635        }
636
637        Ok(ListFilesOutput { path, lines })
638    }
639
640    fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
641        let pid_value = required_attr(attributes, "pid")?;
642        let pid = pid_value
643            .parse::<u32>()
644            .map_err(|_| EngineError::InvalidInteger {
645                name: "pid",
646                value: pid_value.to_string(),
647            })?;
648        let signal = match attributes
649            .get("signal")
650            .map(|v| v.to_ascii_uppercase())
651            .unwrap_or_else(|| "SIGINT".to_string())
652            .as_str()
653        {
654            "SIGINT" => ProcessSignal::SigInt,
655            "SIGTERM" => ProcessSignal::SigTerm,
656            "SIGKILL" => ProcessSignal::SigKill,
657            other => {
658                return Err(EngineError::InvalidState(format!(
659                    "unsupported signal `{other}`"
660                )));
661            }
662        };
663
664        self.terminal.signal(pid, signal)?;
665        Ok(SignalOutput { pid, signal })
666    }
667
668    fn ensure_no_pending(&self, next: &str) -> Result<()> {
669        if self.pending.is_some() {
670            return Err(EngineError::InvalidState(format!(
671                "cannot start <{next}> while another command block is still open"
672            )));
673        }
674        Ok(())
675    }
676}
677
678#[derive(Debug)]
679enum PendingOperation {
680    WriteFile(PendingWrite),
681    ApplyEdit(PendingApplyEdit),
682    Terminal(PendingTerminal),
683}
684
685#[derive(Debug)]
686struct PendingWrite {
687    path: PathBuf,
688    buffer: Vec<u8>,
689}
690
691#[derive(Debug)]
692struct PendingApplyEdit {
693    path: PathBuf,
694    search: Vec<u8>,
695    replace: Vec<u8>,
696    raw_body: Vec<u8>,
697    capture: Option<ApplyCapture>,
698}
699
700#[derive(Debug, Clone, Copy)]
701enum ApplyCapture {
702    Search,
703    Replace,
704}
705
706#[derive(Debug)]
707struct PendingTerminal {
708    command: Vec<u8>,
709}
710
711#[derive(Debug, Clone, Copy, PartialEq, Eq)]
712pub enum Capability {
713    WriteFile,
714    ApplyEdit,
715    ReadFile,
716    ListFiles,
717    Terminal,
718}
719
720impl Capability {
721    fn tag(&self) -> &'static str {
722        match self {
723            Capability::WriteFile => "<write_file path=\"...\"></write_file>",
724            Capability::ApplyEdit => {
725                "<apply_edit path=\"...\">[search/replace blocks or patch body]</apply_edit>"
726            }
727            Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
728            Capability::ListFiles => "<list_files path=\"...\" />",
729            Capability::Terminal => "<terminal>...</terminal> or <terminal cmd=\"...\" />",
730        }
731    }
732}
733
734pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
735    let enabled: Vec<Capability> = if capabilities.is_empty() {
736        vec![
737            Capability::WriteFile,
738            Capability::ApplyEdit,
739            Capability::ReadFile,
740            Capability::ListFiles,
741            Capability::Terminal,
742        ]
743    } else {
744        capabilities.to_vec()
745    };
746
747    let mut out = String::from(
748        "You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
749    );
750    out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
751    out.push_str("Available Commands:\n");
752    for capability in &enabled {
753        out.push_str(capability.tag());
754        out.push('\n');
755    }
756    out.push_str("\nRules:\n");
757    out.push_str("1. Do not escape strings inside tags.\n");
758    out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
759    out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
760    out.push_str(
761        "4. apply_edit accepts XML search/replace blocks, SEARCH/REPLACE markers, or unified diff hunks.\n",
762    );
763    out
764}
765
766fn required_path(attributes: &Attributes) -> Result<PathBuf> {
767    optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
768}
769
770fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
771    Ok(attributes.get(key).map(PathBuf::from))
772}
773
774fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
775    attributes
776        .get(key)
777        .map(|value| value.as_str())
778        .ok_or(EngineError::MissingAttribute(key))
779}
780
781fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
782    let Some(value) = attributes.get(key) else {
783        return Ok(None);
784    };
785    let parsed = value
786        .parse::<usize>()
787        .map_err(|_| EngineError::InvalidInteger {
788            name: key,
789            value: value.clone(),
790        })?;
791    Ok(Some(parsed))
792}
793
794fn shorten_hash(hash: &str) -> String {
795    hash.chars().take(8).collect()
796}
797
798fn human_bytes(bytes: usize) -> String {
799    const KB: f64 = 1024.0;
800    const MB: f64 = KB * 1024.0;
801
802    let bytes_f = bytes as f64;
803    if bytes_f >= MB {
804        format!("{:.1}mb", bytes_f / MB)
805    } else if bytes_f >= KB {
806        format!("{:.1}kb", bytes_f / KB)
807    } else {
808        format!("{bytes}b")
809    }
810}
811
812fn apply_edit_with_tiers(
813    original: &str,
814    search: &str,
815    replace: &str,
816) -> Option<(String, EditTier)> {
817    if search.is_empty() {
818        return None;
819    }
820
821    if let Some(output) = apply_exact(original, search, replace) {
822        return Some((output, EditTier::Exact));
823    }
824
825    if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
826        return Some((output, EditTier::WhitespaceAgnostic));
827    }
828
829    apply_contextual_anchor(original, search, replace)
830        .map(|output| (output, EditTier::ContextualAnchor))
831}
832
833#[derive(Debug, Clone, PartialEq, Eq)]
834struct DiffHunkReplacement {
835    search: String,
836    replace: String,
837}
838
839#[derive(Debug, Clone)]
840struct ResolvedApplyEditInput {
841    search: Option<String>,
842    replace: Option<String>,
843    diff_hunks: Option<Vec<DiffHunkReplacement>>,
844    format: Option<String>,
845    reason_code: Option<String>,
846    warning: Option<String>,
847}
848
849fn resolve_apply_edit_input(search: &str, replace: &str, raw_body: &str) -> ResolvedApplyEditInput {
850    if !search.is_empty() {
851        return ResolvedApplyEditInput {
852            search: Some(search.to_string()),
853            replace: Some(replace.to_string()),
854            diff_hunks: None,
855            format: Some("search_replace_tags".to_string()),
856            reason_code: None,
857            warning: None,
858        };
859    }
860
861    let body = decode_basic_xml_entities(raw_body).trim().to_string();
862    if body.is_empty() {
863        return ResolvedApplyEditInput {
864            search: None,
865            replace: None,
866            diff_hunks: None,
867            format: None,
868            reason_code: Some("empty_edit".to_string()),
869            warning: Some(
870                "apply_edit requires <search>/<replace> blocks or a non-empty patch body"
871                    .to_string(),
872            ),
873        };
874    }
875
876    if let Some((parsed_search, parsed_replace)) = parse_search_replace_markers(&body) {
877        return ResolvedApplyEditInput {
878            search: Some(parsed_search),
879            replace: Some(parsed_replace),
880            diff_hunks: None,
881            format: Some("search_replace_markers".to_string()),
882            reason_code: None,
883            warning: None,
884        };
885    }
886
887    if let Some((parsed_search, parsed_replace)) = parse_apply_edit_xml_blocks(&body) {
888        return ResolvedApplyEditInput {
889            search: Some(parsed_search),
890            replace: Some(parsed_replace),
891            diff_hunks: None,
892            format: Some("search_replace_xml_blocks".to_string()),
893            reason_code: None,
894            warning: None,
895        };
896    }
897
898    if let Some(hunks) = parse_unified_diff_hunks(&body) {
899        if hunks.is_empty() {
900            return ResolvedApplyEditInput {
901                search: None,
902                replace: None,
903                diff_hunks: None,
904                format: Some("unified_diff".to_string()),
905                reason_code: Some("no_hunks".to_string()),
906                warning: Some(
907                    "unified diff was detected but no @@ hunk blocks were parsed".to_string(),
908                ),
909            };
910        }
911        return ResolvedApplyEditInput {
912            search: None,
913            replace: None,
914            diff_hunks: Some(hunks),
915            format: Some("unified_diff".to_string()),
916            reason_code: None,
917            warning: None,
918        };
919    }
920
921    ResolvedApplyEditInput {
922        search: None,
923        replace: None,
924        diff_hunks: None,
925        format: Some("raw_text".to_string()),
926        reason_code: Some("parse_error".to_string()),
927        warning: Some("unsupported apply_edit body format".to_string()),
928    }
929}
930
931fn decode_basic_xml_entities(input: &str) -> String {
932    input
933        .replace("&lt;", "<")
934        .replace("&gt;", ">")
935        .replace("&amp;", "&")
936}
937
938fn parse_search_replace_markers(input: &str) -> Option<(String, String)> {
939    let mut mode = 0_u8;
940    let mut search = Vec::new();
941    let mut replace = Vec::new();
942    let mut saw_markers = false;
943
944    for line in input.lines() {
945        let trimmed = line.trim();
946        if trimmed.eq("<<<<<<< SEARCH") {
947            mode = 1;
948            saw_markers = true;
949            continue;
950        }
951        if trimmed.eq("=======") && mode == 1 {
952            mode = 2;
953            continue;
954        }
955        if trimmed.eq(">>>>>>> REPLACE") && mode == 2 {
956            mode = 0;
957            continue;
958        }
959        match mode {
960            1 => search.push(line),
961            2 => replace.push(line),
962            _ => {}
963        }
964    }
965
966    if !saw_markers {
967        return None;
968    }
969    Some((search.join("\n"), replace.join("\n")))
970}
971
972fn parse_apply_edit_xml_blocks(input: &str) -> Option<(String, String)> {
973    let search = extract_tag_body(input, "search")?;
974    let replace = extract_tag_body(input, "replace")?;
975    Some((search, replace))
976}
977
978fn extract_tag_body(input: &str, tag: &str) -> Option<String> {
979    let open = format!("<{tag}>");
980    let close = format!("</{tag}>");
981    let start = input.find(&open)? + open.len();
982    let end = input[start..].find(&close)? + start;
983    Some(input[start..end].to_string())
984}
985
986fn parse_unified_diff_hunks(input: &str) -> Option<Vec<DiffHunkReplacement>> {
987    let lines = input.lines().collect::<Vec<_>>();
988    let mut idx = 0usize;
989    let mut hunks = Vec::new();
990    let mut saw_hunk_header = false;
991
992    while idx < lines.len() {
993        let line = lines[idx].trim_end_matches('\r');
994        if line.starts_with("@@") {
995            saw_hunk_header = true;
996            idx = idx.saturating_add(1);
997            let mut search_lines = Vec::new();
998            let mut replace_lines = Vec::new();
999
1000            while idx < lines.len() {
1001                let current = lines[idx].trim_end_matches('\r');
1002                if current.starts_with("@@") {
1003                    break;
1004                }
1005                if current.starts_with("diff --git ")
1006                    || current.starts_with("*** End Patch")
1007                    || current.starts_with("*** Update File:")
1008                {
1009                    break;
1010                }
1011                if current.eq("\\ No newline at end of file") {
1012                    idx = idx.saturating_add(1);
1013                    continue;
1014                }
1015                if let Some(rest) = current.strip_prefix('+') {
1016                    if !current.starts_with("+++") {
1017                        replace_lines.push(rest.to_string());
1018                    }
1019                } else if let Some(rest) = current.strip_prefix('-') {
1020                    if !current.starts_with("---") {
1021                        search_lines.push(rest.to_string());
1022                    }
1023                } else if let Some(rest) = current.strip_prefix(' ') {
1024                    search_lines.push(rest.to_string());
1025                    replace_lines.push(rest.to_string());
1026                }
1027                idx = idx.saturating_add(1);
1028            }
1029
1030            if !(search_lines.is_empty() && replace_lines.is_empty()) {
1031                hunks.push(DiffHunkReplacement {
1032                    search: search_lines.join("\n"),
1033                    replace: replace_lines.join("\n"),
1034                });
1035            }
1036            continue;
1037        }
1038        idx = idx.saturating_add(1);
1039    }
1040
1041    saw_hunk_header.then_some(hunks)
1042}
1043
1044fn apply_diff_hunks_with_tiers(
1045    original: &str,
1046    hunks: &[DiffHunkReplacement],
1047) -> Option<(String, EditTier)> {
1048    let mut current = original.to_string();
1049    let mut strongest_tier = EditTier::Exact;
1050    for hunk in hunks {
1051        let (next, tier) = apply_edit_with_tiers(&current, &hunk.search, &hunk.replace)?;
1052        if edit_tier_rank(&tier) > edit_tier_rank(&strongest_tier) {
1053            strongest_tier = tier;
1054        }
1055        current = next;
1056    }
1057    Some((current, strongest_tier))
1058}
1059
1060const fn edit_tier_rank(tier: &EditTier) -> usize {
1061    match tier {
1062        EditTier::Exact => 0,
1063        EditTier::WhitespaceAgnostic => 1,
1064        EditTier::ContextualAnchor => 2,
1065        EditTier::NotApplied => 3,
1066    }
1067}
1068
1069fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
1070    let idx = original.find(search)?;
1071    let mut out = String::with_capacity(original.len() + replace.len());
1072    out.push_str(&original[..idx]);
1073    out.push_str(replace);
1074    out.push_str(&original[idx + search.len()..]);
1075    Some(out)
1076}
1077
1078fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
1079    let original_lines = collect_line_spans(original);
1080    let search_lines: Vec<&str> = search.lines().collect();
1081    if search_lines.is_empty() || original_lines.len() < search_lines.len() {
1082        return None;
1083    }
1084
1085    for start in 0..=original_lines.len() - search_lines.len() {
1086        let window = &original_lines[start..start + search_lines.len()];
1087        if window
1088            .iter()
1089            .zip(search_lines.iter())
1090            .all(|(candidate, target)| candidate.text.trim() == target.trim())
1091        {
1092            let range_start = window.first()?.start;
1093            let range_end = window.last()?.end;
1094            return Some(splice(original, range_start, range_end, replace));
1095        }
1096    }
1097
1098    None
1099}
1100
1101fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
1102    let original_lines = collect_line_spans(original);
1103    let search_lines: Vec<&str> = search.lines().collect();
1104    if search_lines.is_empty() || original_lines.is_empty() {
1105        return None;
1106    }
1107
1108    let window_len = search_lines.len().min(original_lines.len());
1109    let normalized_search = normalize_for_distance(search);
1110    let mut best: Option<(usize, usize, usize)> = None;
1111
1112    for start in 0..=original_lines.len() - window_len {
1113        let window = &original_lines[start..start + window_len];
1114        let joined = window
1115            .iter()
1116            .map(|line| line.text)
1117            .collect::<Vec<_>>()
1118            .join("\n");
1119        let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
1120
1121        match best {
1122            Some((best_score, _, _)) if score >= best_score => {}
1123            _ => best = Some((score, start, start + window_len - 1)),
1124        }
1125    }
1126
1127    let (score, line_start, line_end) = best?;
1128    let threshold = normalized_search.len().max(6) / 3;
1129    if score > threshold {
1130        return None;
1131    }
1132
1133    let range_start = original_lines[line_start].start;
1134    let range_end = original_lines[line_end].end;
1135    Some(splice(original, range_start, range_end, replace))
1136}
1137
1138fn normalize_for_distance(input: &str) -> String {
1139    input
1140        .lines()
1141        .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
1142        .collect::<Vec<_>>()
1143        .join("\n")
1144        .trim()
1145        .to_string()
1146}
1147
1148fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
1149    let mut out = String::with_capacity(original.len() + replace.len());
1150    out.push_str(&original[..range_start]);
1151    out.push_str(replace);
1152    out.push_str(&original[range_end..]);
1153    out
1154}
1155
1156#[derive(Debug)]
1157struct LineSpan<'a> {
1158    start: usize,
1159    end: usize,
1160    text: &'a str,
1161}
1162
1163fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
1164    let mut spans = Vec::new();
1165    let mut offset = 0usize;
1166
1167    for chunk in input.split_inclusive('\n') {
1168        let end = offset + chunk.len();
1169        let text = chunk.strip_suffix('\n').unwrap_or(chunk);
1170        spans.push(LineSpan {
1171            start: offset,
1172            end,
1173            text,
1174        });
1175        offset = end;
1176    }
1177
1178    if input.is_empty() {
1179        return spans;
1180    }
1181
1182    if !input.ends_with('\n') {
1183        if let Some(last) = spans.last_mut() {
1184            last.end = input.len();
1185        }
1186    }
1187
1188    spans
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193    use super::*;
1194    use std::collections::BTreeMap;
1195    use std::sync::{Arc, Mutex};
1196
1197    #[derive(Clone, Default)]
1198    struct InMemoryVfs {
1199        files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
1200        tree: Arc<Mutex<Vec<TreeNode>>>,
1201    }
1202
1203    impl InMemoryVfs {
1204        fn set_file(&self, path: &str, body: &str) {
1205            self.files
1206                .lock()
1207                .expect("lock")
1208                .insert(PathBuf::from(path), body.as_bytes().to_vec());
1209        }
1210
1211        fn get_file(&self, path: &str) -> String {
1212            String::from_utf8(
1213                self.files
1214                    .lock()
1215                    .expect("lock")
1216                    .get(&PathBuf::from(path))
1217                    .cloned()
1218                    .unwrap_or_default(),
1219            )
1220            .expect("utf8")
1221        }
1222
1223        fn set_tree(&self, nodes: Vec<TreeNode>) {
1224            *self.tree.lock().expect("lock") = nodes;
1225        }
1226    }
1227
1228    impl VirtualFileSystem for InMemoryVfs {
1229        fn read(&self, path: &Path) -> Result<Vec<u8>> {
1230            self.files
1231                .lock()
1232                .expect("lock")
1233                .get(path)
1234                .cloned()
1235                .ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
1236        }
1237
1238        fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
1239            self.files
1240                .lock()
1241                .expect("lock")
1242                .insert(path.to_path_buf(), bytes.to_vec());
1243            Ok(())
1244        }
1245
1246        fn hash(&self, path: &Path) -> Result<String> {
1247            let bytes = self.read(path)?;
1248            Ok(simple_hash(&bytes))
1249        }
1250
1251        fn cwd(&self) -> Result<PathBuf> {
1252            Ok(PathBuf::from("/virtual"))
1253        }
1254
1255        fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
1256            Ok(self.tree.lock().expect("lock").clone())
1257        }
1258
1259        fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
1260            let files = self.files.lock().expect("lock");
1261            let mut entries: Vec<_> = files
1262                .iter()
1263                .map(|(path, body)| FileHash {
1264                    path: path.clone(),
1265                    sha256: simple_hash(body),
1266                })
1267                .collect();
1268            entries.sort_by(|a, b| a.path.cmp(&b.path));
1269            entries.truncate(limit);
1270            Ok(entries)
1271        }
1272    }
1273
1274    #[derive(Default)]
1275    struct MockTerminal {
1276        pids: Vec<u32>,
1277        last_command: Option<String>,
1278    }
1279
1280    impl TerminalProvider for MockTerminal {
1281        fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
1282            self.last_command = Some(command.to_string());
1283            Ok(TerminalExecution {
1284                output: format!("ran: {command}"),
1285                exit_code: Some(0),
1286                cwd: PathBuf::from("/virtual"),
1287                detached_pid: None,
1288            })
1289        }
1290
1291        fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
1292            self.pids.retain(|existing| *existing != pid);
1293            Ok(())
1294        }
1295
1296        fn active_pids(&self) -> Vec<u32> {
1297            self.pids.clone()
1298        }
1299    }
1300
1301    #[test]
1302    fn write_file_chunks_commit_atomically() {
1303        let vfs = InMemoryVfs::default();
1304        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1305
1306        let turn = engine
1307            .execute_turn(vec![
1308                Instruction::StartTag {
1309                    name: "write_file".to_string(),
1310                    attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
1311                },
1312                Instruction::WriteChunk(b"fn main()".to_vec()),
1313                Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
1314                Instruction::EndTag("write_file".to_string()),
1315            ])
1316            .expect("turn should run");
1317
1318        assert_eq!(
1319            vfs.get_file("src/main.rs"),
1320            "fn main() { println!(\"ok\"); }"
1321        );
1322        assert!(matches!(
1323            turn.outputs.as_slice(),
1324            [EngineOutput::WriteFile(WriteFileOutput { .. })]
1325        ));
1326    }
1327
1328    #[test]
1329    fn read_file_returns_fidelity_header_and_numbered_lines() {
1330        let vfs = InMemoryVfs::default();
1331        vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
1332        let mut engine = Engine::new(vfs, MockTerminal::default());
1333
1334        let turn = engine
1335            .execute_turn(vec![Instruction::StartTag {
1336                name: "read_file".to_string(),
1337                attributes: BTreeMap::from([
1338                    ("path".to_string(), "src/lib.rs".to_string()),
1339                    ("start_line".to_string(), "3".to_string()),
1340                    ("end_line".to_string(), "9".to_string()),
1341                ]),
1342            }])
1343            .expect("turn should run");
1344
1345        let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
1346            panic!("expected read output");
1347        };
1348
1349        assert_eq!(output.served_start, 3);
1350        assert_eq!(output.served_end, 4);
1351        assert_eq!(output.body, "[3] c\n[4] d");
1352        assert!(output.warning.is_some());
1353        assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
1354    }
1355
1356    #[test]
1357    fn apply_edit_uses_whitespace_agnostic_matching() {
1358        let vfs = InMemoryVfs::default();
1359        vfs.set_file("src/lib.rs", "fn main() {\n    println!(\"x\");\n}\n");
1360        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1361
1362        let turn = engine
1363            .execute_turn(vec![
1364                Instruction::StartTag {
1365                    name: "apply_edit".to_string(),
1366                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1367                },
1368                Instruction::StartTag {
1369                    name: "search".to_string(),
1370                    attributes: BTreeMap::new(),
1371                },
1372                Instruction::RawChunk {
1373                    tag: "search".to_string(),
1374                    bytes: b"   println!(\"x\");   ".to_vec(),
1375                },
1376                Instruction::EndTag("search".to_string()),
1377                Instruction::StartTag {
1378                    name: "replace".to_string(),
1379                    attributes: BTreeMap::new(),
1380                },
1381                Instruction::RawChunk {
1382                    tag: "replace".to_string(),
1383                    bytes: b"println!(\"y\");".to_vec(),
1384                },
1385                Instruction::EndTag("replace".to_string()),
1386                Instruction::EndTag("apply_edit".to_string()),
1387            ])
1388            .expect("turn should run");
1389
1390        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1391            panic!("expected apply_edit output");
1392        };
1393
1394        assert!(edit.applied);
1395        assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
1396        assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
1397    }
1398
1399    #[test]
1400    fn apply_edit_accepts_unified_diff_hunk_body() {
1401        let vfs = InMemoryVfs::default();
1402        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1403        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1404
1405        let turn = engine
1406            .execute_turn(vec![
1407                Instruction::StartTag {
1408                    name: "apply_edit".to_string(),
1409                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1410                },
1411                Instruction::Text(
1412                    "@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n".to_string(),
1413                ),
1414                Instruction::EndTag("apply_edit".to_string()),
1415            ])
1416            .expect("turn should run");
1417
1418        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1419            panic!("expected apply_edit output");
1420        };
1421
1422        assert!(edit.applied);
1423        assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1424        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1425    }
1426
1427    #[test]
1428    fn apply_edit_accepts_begin_patch_wrapper_body() {
1429        let vfs = InMemoryVfs::default();
1430        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1431        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1432
1433        let turn = engine
1434            .execute_turn(vec![
1435                Instruction::StartTag {
1436                    name: "apply_edit".to_string(),
1437                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1438                },
1439                Instruction::Text(
1440                    "*** Begin Patch\n*** Update File: src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
1441                ),
1442                Instruction::EndTag("apply_edit".to_string()),
1443            ])
1444            .expect("turn should run");
1445
1446        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1447            panic!("expected apply_edit output");
1448        };
1449
1450        assert!(edit.applied);
1451        assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1452        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1453    }
1454
1455    #[test]
1456    fn apply_edit_accepts_search_replace_markers_body() {
1457        let vfs = InMemoryVfs::default();
1458        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1459        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1460
1461        let turn = engine
1462            .execute_turn(vec![
1463                Instruction::StartTag {
1464                    name: "apply_edit".to_string(),
1465                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1466                },
1467                Instruction::Text(
1468                    "<<<<<<< SEARCH\nbeta\n=======\nbeta (edited)\n>>>>>>> REPLACE\n".to_string(),
1469                ),
1470                Instruction::EndTag("apply_edit".to_string()),
1471            ])
1472            .expect("turn should run");
1473
1474        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1475            panic!("expected apply_edit output");
1476        };
1477
1478        assert!(edit.applied);
1479        assert_eq!(edit.format.as_deref(), Some("search_replace_markers"));
1480        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1481    }
1482
1483    #[test]
1484    fn apply_edit_accepts_xml_search_replace_blocks_in_raw_body() {
1485        let vfs = InMemoryVfs::default();
1486        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1487        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1488
1489        let turn = engine
1490            .execute_turn(vec![
1491                Instruction::StartTag {
1492                    name: "apply_edit".to_string(),
1493                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1494                },
1495                Instruction::RawChunk {
1496                    tag: "apply_edit".to_string(),
1497                    bytes: b"<search>beta</search><replace>beta (edited)</replace>".to_vec(),
1498                },
1499                Instruction::EndTag("apply_edit".to_string()),
1500            ])
1501            .expect("turn should run");
1502
1503        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1504            panic!("expected apply_edit output");
1505        };
1506
1507        assert!(edit.applied);
1508        assert_eq!(edit.format.as_deref(), Some("search_replace_xml_blocks"));
1509        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1510    }
1511
1512    #[test]
1513    fn apply_edit_reports_parse_error_reason_code_for_unsupported_raw_body() {
1514        let vfs = InMemoryVfs::default();
1515        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1516        let mut engine = Engine::new(vfs, MockTerminal::default());
1517
1518        let turn = engine
1519            .execute_turn(vec![
1520                Instruction::StartTag {
1521                    name: "apply_edit".to_string(),
1522                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1523                },
1524                Instruction::Text("totally unsupported patch format".to_string()),
1525                Instruction::EndTag("apply_edit".to_string()),
1526            ])
1527            .expect("turn should run");
1528
1529        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1530            panic!("expected apply_edit output");
1531        };
1532
1533        assert!(!edit.applied);
1534        assert_eq!(edit.reason_code.as_deref(), Some("parse_error"));
1535        assert_eq!(edit.format.as_deref(), Some("raw_text"));
1536    }
1537
1538    #[test]
1539    fn apply_edit_warns_if_file_changed_since_last_read() {
1540        let vfs = InMemoryVfs::default();
1541        vfs.set_file("src/lib.rs", "alpha\nbeta\n");
1542        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1543
1544        let _ = engine
1545            .execute_turn(vec![Instruction::StartTag {
1546                name: "read_file".to_string(),
1547                attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1548            }])
1549            .expect("read should work");
1550
1551        vfs.set_file("src/lib.rs", "external\nchange\n");
1552
1553        let turn = engine
1554            .execute_turn(vec![
1555                Instruction::StartTag {
1556                    name: "apply_edit".to_string(),
1557                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1558                },
1559                Instruction::StartTag {
1560                    name: "search".to_string(),
1561                    attributes: BTreeMap::new(),
1562                },
1563                Instruction::RawChunk {
1564                    tag: "search".to_string(),
1565                    bytes: b"beta".to_vec(),
1566                },
1567                Instruction::EndTag("search".to_string()),
1568                Instruction::StartTag {
1569                    name: "replace".to_string(),
1570                    attributes: BTreeMap::new(),
1571                },
1572                Instruction::RawChunk {
1573                    tag: "replace".to_string(),
1574                    bytes: b"gamma".to_vec(),
1575                },
1576                Instruction::EndTag("replace".to_string()),
1577                Instruction::EndTag("apply_edit".to_string()),
1578            ])
1579            .expect("apply should run");
1580
1581        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1582            panic!("expected apply_edit output");
1583        };
1584
1585        assert!(!edit.applied);
1586        assert_eq!(edit.tier, EditTier::NotApplied);
1587        assert!(
1588            edit.warning
1589                .as_deref()
1590                .unwrap_or_default()
1591                .contains("File modified externally")
1592        );
1593    }
1594
1595    #[test]
1596    fn list_files_omits_dense_directories_and_marks_recent() {
1597        let vfs = InMemoryVfs::default();
1598        vfs.set_tree(vec![
1599            TreeNode {
1600                path: PathBuf::from("src"),
1601                kind: NodeKind::Directory,
1602                descendant_file_count: 3,
1603                modified_recently: false,
1604            },
1605            TreeNode {
1606                path: PathBuf::from("src/lib.rs"),
1607                kind: NodeKind::File,
1608                descendant_file_count: 0,
1609                modified_recently: true,
1610            },
1611            TreeNode {
1612                path: PathBuf::from("node_modules"),
1613                kind: NodeKind::Directory,
1614                descendant_file_count: 2400,
1615                modified_recently: false,
1616            },
1617        ]);
1618
1619        let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
1620        let turn = engine
1621            .execute_turn(vec![Instruction::StartTag {
1622                name: "list_files".to_string(),
1623                attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
1624            }])
1625            .expect("list should run");
1626
1627        let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
1628            panic!("expected list output");
1629        };
1630
1631        assert!(
1632            output
1633                .lines
1634                .iter()
1635                .any(|line| line.contains("node_modules") && line.contains("omitted"))
1636        );
1637        assert!(output.lines.iter().any(|line| line.contains("(*)")));
1638    }
1639
1640    #[test]
1641    fn terminal_executes_command_and_reports_state_header() {
1642        let vfs = InMemoryVfs::default();
1643        let terminal = MockTerminal {
1644            pids: vec![42, 7],
1645            ..Default::default()
1646        };
1647
1648        let mut engine = Engine::new(vfs, terminal);
1649        let turn = engine
1650            .execute_turn(vec![
1651                Instruction::StartTag {
1652                    name: "terminal".to_string(),
1653                    attributes: BTreeMap::new(),
1654                },
1655                Instruction::RawChunk {
1656                    tag: "terminal".to_string(),
1657                    bytes: b"echo hi".to_vec(),
1658                },
1659                Instruction::EndTag("terminal".to_string()),
1660            ])
1661            .expect("terminal turn should run");
1662
1663        assert!(matches!(
1664            turn.outputs.as_slice(),
1665            [EngineOutput::Terminal(TerminalExecution { .. })]
1666        ));
1667        assert_eq!(turn.state_header.active_pids, vec![7, 42]);
1668        assert!(turn.state_header.render().contains("CWD: /virtual"));
1669    }
1670
1671    #[test]
1672    fn terminal_supports_attribute_command_form() {
1673        let vfs = InMemoryVfs::default();
1674        let mut engine = Engine::new(vfs, MockTerminal::default());
1675
1676        let turn = engine
1677            .execute_turn(vec![
1678                Instruction::StartTag {
1679                    name: "terminal".to_string(),
1680                    attributes: BTreeMap::from([("cmd".to_string(), "echo attr".to_string())]),
1681                },
1682                Instruction::EndTag("terminal".to_string()),
1683            ])
1684            .expect("terminal command should run");
1685
1686        let EngineOutput::Terminal(output) = &turn.outputs[0] else {
1687            panic!("expected terminal output");
1688        };
1689        assert!(output.output.contains("ran: echo attr"));
1690    }
1691
1692    #[test]
1693    fn unknown_tags_emit_warnings_instead_of_silent_noops() {
1694        let vfs = InMemoryVfs::default();
1695        let mut engine = Engine::new(vfs, MockTerminal::default());
1696
1697        let turn = engine
1698            .execute_turn(vec![
1699                Instruction::StartTag {
1700                    name: "mystery_tool".to_string(),
1701                    attributes: BTreeMap::new(),
1702                },
1703                Instruction::EndTag("mystery_tool".to_string()),
1704            ])
1705            .expect("turn should run");
1706
1707        assert_eq!(turn.outputs.len(), 2);
1708        assert!(matches!(
1709            &turn.outputs[0],
1710            EngineOutput::Warning(message) if message.contains("unsupported start tag <mystery_tool>")
1711        ));
1712        assert!(matches!(
1713            &turn.outputs[1],
1714            EngineOutput::Warning(message) if message.contains("unsupported end tag </mystery_tool>")
1715        ));
1716    }
1717
1718    #[test]
1719    fn system_prompt_includes_enabled_commands() {
1720        let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
1721        assert!(prompt.contains("<read_file"));
1722        assert!(prompt.contains("<terminal>"));
1723        assert!(!prompt.contains("<write_file path"));
1724    }
1725
1726    fn simple_hash(input: &[u8]) -> String {
1727        let mut acc: u64 = 1469598103934665603;
1728        for b in input {
1729            acc ^= *b as u64;
1730            acc = acc.wrapping_mul(1099511628211);
1731        }
1732        format!("{acc:016x}")
1733    }
1734}