Skip to main content

sa3p_engine/
lib.rs

1//! Sans-IO execution engine for SA3P operator instructions.
2//!
3//! The engine consumes `sa3p_parser::Instruction` values and executes
4//! command semantics via trait boundaries:
5//! - `VirtualFileSystem`
6//! - `TerminalProvider`
7//!
8//! This keeps business logic transport/host agnostic while still producing
9//! deterministic per-turn outputs and state headers.
10
11use std::collections::HashMap;
12use std::fmt::Write as _;
13use std::path::{Path, PathBuf};
14use std::time::Duration;
15
16use sa3p_parser::{Attributes, Instruction};
17use strsim::levenshtein;
18use thiserror::Error;
19
20#[derive(Debug, Error)]
21pub enum EngineError {
22    #[error("missing required attribute `{0}`")]
23    MissingAttribute(&'static str),
24    #[error("invalid integer for attribute `{name}`: {value}")]
25    InvalidInteger { name: &'static str, value: String },
26    #[error("operation is out of order: {0}")]
27    InvalidState(String),
28    #[error("invalid utf-8 payload")]
29    InvalidUtf8,
30    #[error("virtual filesystem error: {0}")]
31    Vfs(String),
32    #[error("terminal error: {0}")]
33    Terminal(String),
34}
35
36pub type Result<T> = std::result::Result<T, EngineError>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FileHash {
40    pub path: PathBuf,
41    pub sha256: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NodeKind {
46    File,
47    Directory,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct TreeNode {
52    pub path: PathBuf,
53    pub kind: NodeKind,
54    pub descendant_file_count: usize,
55    pub modified_recently: bool,
56}
57
58pub trait VirtualFileSystem {
59    fn read(&self, path: &Path) -> Result<Vec<u8>>;
60    fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
61    fn hash(&self, path: &Path) -> Result<String>;
62    fn cwd(&self) -> Result<PathBuf>;
63    fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
64    fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum ProcessSignal {
69    SigInt,
70    SigTerm,
71    SigKill,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct TerminalExecution {
76    pub output: String,
77    pub exit_code: Option<i32>,
78    pub cwd: PathBuf,
79    pub detached_pid: Option<u32>,
80}
81
82pub trait TerminalProvider {
83    fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
84    fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
85    fn active_pids(&self) -> Vec<u32>;
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StateHeader {
90    pub cwd: PathBuf,
91    pub recent_hashes: Vec<FileHash>,
92    pub active_pids: Vec<u32>,
93}
94
95impl StateHeader {
96    pub fn render(&self) -> String {
97        let mut out = String::new();
98        let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
99
100        if self.recent_hashes.is_empty() {
101            out.push_str(" | RECENT_HASHES: none");
102        } else {
103            out.push_str(" | RECENT_HASHES: ");
104            for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
105                if idx > 0 {
106                    out.push_str(", ");
107                }
108                let _ = write!(
109                    &mut out,
110                    "{}#{}",
111                    file_hash.path.display(),
112                    shorten_hash(&file_hash.sha256)
113                );
114            }
115        }
116
117        if self.active_pids.is_empty() {
118            out.push_str(" | ACTIVE_PIDS: none]");
119        } else {
120            out.push_str(" | ACTIVE_PIDS: ");
121            for (idx, pid) in self.active_pids.iter().enumerate() {
122                if idx > 0 {
123                    out.push_str(", ");
124                }
125                let _ = write!(&mut out, "{pid}");
126            }
127            out.push(']');
128        }
129
130        out
131    }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct TurnResult {
136    pub state_header: StateHeader,
137    pub outputs: Vec<EngineOutput>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub enum EngineOutput {
142    ReadFile(ReadFileOutput),
143    WriteFile(WriteFileOutput),
144    ApplyEdit(ApplyEditOutput),
145    ListFiles(ListFilesOutput),
146    Terminal(TerminalExecution),
147    Signal(SignalOutput),
148    Warning(String),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub struct ReadFileOutput {
153    pub path: PathBuf,
154    pub size_bytes: usize,
155    pub sha256: String,
156    pub requested_start: usize,
157    pub requested_end: usize,
158    pub served_start: usize,
159    pub served_end: usize,
160    pub total_lines: usize,
161    pub body: String,
162    pub warning: Option<String>,
163}
164
165impl ReadFileOutput {
166    pub fn fidelity_header(&self) -> String {
167        format!(
168            "[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
169            self.path.display(),
170            human_bytes(self.size_bytes),
171            shorten_hash(&self.sha256),
172            self.served_start,
173            self.served_end,
174            self.total_lines
175        )
176    }
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WriteFileOutput {
181    pub path: PathBuf,
182    pub size_bytes: usize,
183    pub sha256: String,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub enum EditTier {
188    Exact,
189    WhitespaceAgnostic,
190    ContextualAnchor,
191    NotApplied,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct ApplyEditOutput {
196    pub path: PathBuf,
197    pub applied: bool,
198    pub tier: EditTier,
199    pub sha256: Option<String>,
200    pub format: Option<String>,
201    pub reason_code: Option<String>,
202    pub warning: Option<String>,
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct ListFilesOutput {
207    pub path: PathBuf,
208    pub lines: Vec<String>,
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct SignalOutput {
213    pub pid: u32,
214    pub signal: ProcessSignal,
215}
216
217pub const DEFAULT_MAX_LIST_LINES: usize = 300;
218pub const DEFAULT_DENSE_DIR_THRESHOLD: usize = 200;
219pub const DEFAULT_TERMINAL_TIMEOUT_SECS: u64 = 5;
220pub const DEFAULT_RECENT_HASH_LIMIT: usize = 5;
221
222pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS: &str = "search_replace_tags";
223pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS: &str = "search_replace_markers";
224pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS: &str = "search_replace_xml_blocks";
225pub const APPLY_EDIT_FORMAT_UNIFIED_DIFF: &str = "unified_diff";
226pub const APPLY_EDIT_FORMAT_RAW_TEXT: &str = "raw_text";
227
228pub const APPLY_EDIT_REASON_STALE_HASH: &str = "stale_hash";
229pub const APPLY_EDIT_REASON_EMPTY_EDIT: &str = "empty_edit";
230pub const APPLY_EDIT_REASON_PARSE_ERROR: &str = "parse_error";
231pub const APPLY_EDIT_REASON_NO_HUNKS: &str = "no_hunks";
232pub const APPLY_EDIT_REASON_NO_MATCH: &str = "no_match";
233
234pub const STATE_HEADER_FIELD_CWD: &str = "cwd";
235pub const STATE_HEADER_FIELD_RECENT_HASHES: &str = "recent_hashes";
236pub const STATE_HEADER_FIELD_ACTIVE_PIDS: &str = "active_pids";
237
238pub const APPLY_EDIT_SUPPORTED_FORMATS: &[&str] = &[
239    APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS,
240    APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS,
241    APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS,
242    APPLY_EDIT_FORMAT_UNIFIED_DIFF,
243];
244
245pub const APPLY_EDIT_REASON_CODES: &[&str] = &[
246    APPLY_EDIT_REASON_STALE_HASH,
247    APPLY_EDIT_REASON_EMPTY_EDIT,
248    APPLY_EDIT_REASON_PARSE_ERROR,
249    APPLY_EDIT_REASON_NO_HUNKS,
250    APPLY_EDIT_REASON_NO_MATCH,
251];
252
253pub const STATE_HEADER_FIELDS: &[&str] = &[
254    STATE_HEADER_FIELD_CWD,
255    STATE_HEADER_FIELD_RECENT_HASHES,
256    STATE_HEADER_FIELD_ACTIVE_PIDS,
257];
258
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct CapabilityDescriptor {
261    pub name: &'static str,
262    pub tag: &'static str,
263}
264
265#[derive(Debug, Clone, PartialEq, Eq)]
266pub struct CapabilityManifest {
267    pub commands: Vec<CapabilityDescriptor>,
268    pub apply_edit_formats: Vec<&'static str>,
269    pub apply_edit_reason_codes: Vec<&'static str>,
270    pub apply_edit_tiers: Vec<EditTier>,
271    pub state_header_fields: Vec<&'static str>,
272    pub max_list_lines: usize,
273    pub dense_dir_threshold: usize,
274    pub terminal_timeout: Duration,
275    pub recent_hash_limit: usize,
276}
277
278#[derive(Debug)]
279pub struct Engine<VFS, Terminal> {
280    vfs: VFS,
281    terminal: Terminal,
282    known_hashes: HashMap<PathBuf, String>,
283    pending: Option<PendingOperation>,
284    max_list_lines: usize,
285    dense_dir_threshold: usize,
286    terminal_timeout: Duration,
287    recent_hash_limit: usize,
288}
289
290impl<VFS, Terminal> Engine<VFS, Terminal>
291where
292    VFS: VirtualFileSystem,
293    Terminal: TerminalProvider,
294{
295    pub fn new(vfs: VFS, terminal: Terminal) -> Self {
296        Self {
297            vfs,
298            terminal,
299            known_hashes: HashMap::new(),
300            pending: None,
301            max_list_lines: DEFAULT_MAX_LIST_LINES,
302            dense_dir_threshold: DEFAULT_DENSE_DIR_THRESHOLD,
303            terminal_timeout: Duration::from_secs(DEFAULT_TERMINAL_TIMEOUT_SECS),
304            recent_hash_limit: DEFAULT_RECENT_HASH_LIMIT,
305        }
306    }
307
308    pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
309        self.terminal_timeout = timeout;
310        self
311    }
312
313    pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
314        self.max_list_lines = max_lines;
315        self.dense_dir_threshold = dense_threshold;
316        self
317    }
318
319    pub fn capability_manifest(&self) -> CapabilityManifest {
320        self.capability_manifest_for(DEFAULT_CAPABILITIES)
321    }
322
323    pub fn capability_manifest_for(&self, capabilities: &[Capability]) -> CapabilityManifest {
324        let enabled = if capabilities.is_empty() {
325            DEFAULT_CAPABILITIES.to_vec()
326        } else {
327            capabilities.to_vec()
328        };
329
330        CapabilityManifest {
331            commands: enabled
332                .iter()
333                .map(|capability| CapabilityDescriptor {
334                    name: capability.name(),
335                    tag: capability.tag(),
336                })
337                .collect(),
338            apply_edit_formats: APPLY_EDIT_SUPPORTED_FORMATS.to_vec(),
339            apply_edit_reason_codes: APPLY_EDIT_REASON_CODES.to_vec(),
340            apply_edit_tiers: vec![
341                EditTier::Exact,
342                EditTier::WhitespaceAgnostic,
343                EditTier::ContextualAnchor,
344                EditTier::NotApplied,
345            ],
346            state_header_fields: STATE_HEADER_FIELDS.to_vec(),
347            max_list_lines: self.max_list_lines,
348            dense_dir_threshold: self.dense_dir_threshold,
349            terminal_timeout: self.terminal_timeout,
350            recent_hash_limit: self.recent_hash_limit,
351        }
352    }
353
354    pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
355        let mut outputs = Vec::new();
356
357        for instruction in instructions {
358            self.apply_instruction(instruction, &mut outputs)?;
359        }
360
361        let state_header = StateHeader {
362            cwd: self.vfs.cwd()?,
363            recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
364            active_pids: {
365                let mut pids = self.terminal.active_pids();
366                pids.sort_unstable();
367                pids
368            },
369        };
370
371        Ok(TurnResult {
372            state_header,
373            outputs,
374        })
375    }
376
377    fn apply_instruction(
378        &mut self,
379        instruction: Instruction,
380        outputs: &mut Vec<EngineOutput>,
381    ) -> Result<()> {
382        match instruction {
383            Instruction::Text(text) => {
384                if let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() {
385                    if edit.capture.is_none() {
386                        edit.raw_body.extend_from_slice(text.as_bytes());
387                    }
388                }
389            }
390            Instruction::StartTag { name, attributes } => {
391                self.handle_start_tag(&name, attributes, outputs)?;
392            }
393            Instruction::EndTag(name) => {
394                self.handle_end_tag(&name, outputs)?;
395            }
396            Instruction::WriteChunk(bytes) => {
397                let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
398                    return Err(EngineError::InvalidState(
399                        "received WriteChunk outside <write_file>".to_string(),
400                    ));
401                };
402                write.buffer.extend_from_slice(&bytes);
403            }
404            Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
405                Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
406                    Some(ApplyCapture::Search) if tag == "search" => {
407                        edit.search.extend_from_slice(&bytes);
408                    }
409                    Some(ApplyCapture::Replace) if tag == "replace" => {
410                        edit.replace.extend_from_slice(&bytes);
411                    }
412                    None => {
413                        edit.raw_body.extend_from_slice(&bytes);
414                    }
415                    _ => {
416                        return Err(EngineError::InvalidState(format!(
417                            "unexpected raw chunk for <{tag}> while applying edit"
418                        )));
419                    }
420                },
421                Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
422                    term.command.extend_from_slice(&bytes);
423                }
424                _ => {
425                    return Err(EngineError::InvalidState(format!(
426                        "received raw chunk for <{tag}> without active matching context"
427                    )));
428                }
429            },
430        }
431
432        Ok(())
433    }
434
435    fn handle_start_tag(
436        &mut self,
437        name: &str,
438        attributes: Attributes,
439        outputs: &mut Vec<EngineOutput>,
440    ) -> Result<()> {
441        match name {
442            "write_file" => {
443                self.ensure_no_pending("write_file")?;
444                let path = required_path(&attributes)?;
445                self.pending = Some(PendingOperation::WriteFile(PendingWrite {
446                    path,
447                    buffer: Vec::new(),
448                }));
449            }
450            "read_file" => {
451                let output = self.execute_read_file(&attributes)?;
452                outputs.push(EngineOutput::ReadFile(output));
453            }
454            "apply_edit" => {
455                self.ensure_no_pending("apply_edit")?;
456                let path = required_path(&attributes)?;
457                self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
458                    path,
459                    search: Vec::new(),
460                    replace: Vec::new(),
461                    raw_body: Vec::new(),
462                    capture: None,
463                }));
464            }
465            "search" => {
466                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
467                    return Err(EngineError::InvalidState(
468                        "<search> must be nested under <apply_edit>".to_string(),
469                    ));
470                };
471                edit.capture = Some(ApplyCapture::Search);
472            }
473            "replace" => {
474                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
475                    return Err(EngineError::InvalidState(
476                        "<replace> must be nested under <apply_edit>".to_string(),
477                    ));
478                };
479                edit.capture = Some(ApplyCapture::Replace);
480            }
481            "list_files" => {
482                let output = self.execute_list_files(&attributes)?;
483                outputs.push(EngineOutput::ListFiles(output));
484            }
485            "terminal" => {
486                self.ensure_no_pending("terminal")?;
487                let command = attributes
488                    .get("cmd")
489                    .or_else(|| attributes.get("command"))
490                    .cloned()
491                    .unwrap_or_default()
492                    .into_bytes();
493                self.pending = Some(PendingOperation::Terminal(PendingTerminal { command }));
494            }
495            "terminal_signal" => {
496                let signal_output = self.execute_terminal_signal(&attributes)?;
497                outputs.push(EngineOutput::Signal(signal_output));
498            }
499            other => outputs.push(EngineOutput::Warning(format!(
500                "unsupported start tag <{other}> ignored"
501            ))),
502        }
503
504        Ok(())
505    }
506
507    fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
508        match name {
509            "write_file" => {
510                let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
511                    return Err(EngineError::InvalidState(
512                        "</write_file> received without matching start".to_string(),
513                    ));
514                };
515                let output = self.finalize_write(write)?;
516                outputs.push(EngineOutput::WriteFile(output));
517            }
518            "search" | "replace" => {
519                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
520                    return Err(EngineError::InvalidState(format!(
521                        "</{name}> received without active <apply_edit>"
522                    )));
523                };
524                edit.capture = None;
525            }
526            "apply_edit" => {
527                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
528                    return Err(EngineError::InvalidState(
529                        "</apply_edit> received without matching start".to_string(),
530                    ));
531                };
532                let output = self.finalize_apply_edit(edit)?;
533                outputs.push(EngineOutput::ApplyEdit(output));
534            }
535            "terminal" => {
536                let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
537                    return Err(EngineError::InvalidState(
538                        "</terminal> received without matching start".to_string(),
539                    ));
540                };
541                let command =
542                    String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
543                let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
544                outputs.push(EngineOutput::Terminal(output));
545            }
546            other => outputs.push(EngineOutput::Warning(format!(
547                "unsupported end tag </{other}> ignored"
548            ))),
549        }
550
551        Ok(())
552    }
553
554    fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
555        self.vfs.write_atomic(&write.path, &write.buffer)?;
556        let sha256 = self.vfs.hash(&write.path)?;
557        self.known_hashes.insert(write.path.clone(), sha256.clone());
558
559        Ok(WriteFileOutput {
560            path: write.path,
561            size_bytes: write.buffer.len(),
562            sha256,
563        })
564    }
565
566    fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
567        let path = required_path(attributes)?;
568        let requested_start = optional_usize(attributes, "start_line")?
569            .unwrap_or(1)
570            .max(1);
571        let requested_end =
572            optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
573
574        let bytes = self.vfs.read(&path)?;
575        let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
576        let sha256 = self.vfs.hash(&path)?;
577        self.known_hashes.insert(path.clone(), sha256.clone());
578
579        let all_lines: Vec<&str> = text.lines().collect();
580        let total_lines = all_lines.len();
581
582        let (served_start, served_end, warning) = if total_lines == 0 {
583            (0, 0, Some("file is empty; returning no lines".to_string()))
584        } else {
585            let served_start = requested_start.min(total_lines);
586            let served_end = requested_end.max(served_start).min(total_lines);
587            let warning = if served_start != requested_start || served_end != requested_end {
588                Some(format!(
589                    "requested lines {}-{} adjusted to {}-{} (file has {} lines)",
590                    requested_start, requested_end, served_start, served_end, total_lines
591                ))
592            } else {
593                None
594            };
595            (served_start, served_end, warning)
596        };
597
598        let body = if total_lines == 0 {
599            String::new()
600        } else {
601            let mut rendered = Vec::new();
602            for line_idx in served_start..=served_end {
603                let content = all_lines[line_idx - 1];
604                rendered.push(format!("[{line_idx}] {content}"));
605            }
606            rendered.join("\n")
607        };
608
609        Ok(ReadFileOutput {
610            path,
611            size_bytes: bytes.len(),
612            sha256,
613            requested_start,
614            requested_end,
615            served_start,
616            served_end,
617            total_lines,
618            body,
619            warning,
620        })
621    }
622
623    fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
624        let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
625        let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
626        let raw_body = String::from_utf8(edit.raw_body).map_err(|_| EngineError::InvalidUtf8)?;
627        let path = edit.path;
628        let edit_input = resolve_apply_edit_input(&search, &replace, &raw_body);
629
630        if let Some(previous_hash) = self.known_hashes.get(&path) {
631            let current_hash = self.vfs.hash(&path)?;
632            if previous_hash != &current_hash {
633                return Ok(ApplyEditOutput {
634                    path,
635                    applied: false,
636                    tier: EditTier::NotApplied,
637                    sha256: None,
638                    format: edit_input.format,
639                    reason_code: Some(APPLY_EDIT_REASON_STALE_HASH.to_string()),
640                    warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
641                });
642            }
643        }
644
645        let original_bytes = self.vfs.read(&path)?;
646        let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
647
648        if let Some(reason_code) = edit_input.reason_code {
649            return Ok(ApplyEditOutput {
650                path,
651                applied: false,
652                tier: EditTier::NotApplied,
653                sha256: None,
654                format: edit_input.format,
655                reason_code: Some(reason_code),
656                warning: edit_input
657                    .warning
658                    .or_else(|| Some("invalid apply_edit payload".to_string())),
659            });
660        }
661
662        let apply_result = if let Some(hunks) = edit_input.diff_hunks {
663            apply_diff_hunks_with_tiers(&original, &hunks)
664        } else if let (Some(search), Some(replace)) =
665            (edit_input.search.as_deref(), edit_input.replace.as_deref())
666        {
667            apply_edit_with_tiers(&original, search, replace)
668        } else {
669            None
670        };
671        let Some((rewritten, tier)) = apply_result else {
672            return Ok(ApplyEditOutput {
673                path,
674                applied: false,
675                tier: EditTier::NotApplied,
676                sha256: None,
677                format: edit_input.format,
678                reason_code: Some(APPLY_EDIT_REASON_NO_MATCH.to_string()),
679                warning: Some("no suitable target block found for apply_edit".to_string()),
680            });
681        };
682
683        self.vfs.write_atomic(&path, rewritten.as_bytes())?;
684        let sha256 = self.vfs.hash(&path)?;
685        self.known_hashes.insert(path.clone(), sha256.clone());
686
687        Ok(ApplyEditOutput {
688            path,
689            applied: true,
690            tier,
691            sha256: Some(sha256),
692            format: edit_input.format,
693            reason_code: None,
694            warning: None,
695        })
696    }
697
698    fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
699        let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
700        let mut nodes = self.vfs.list_tree(&path)?;
701        nodes.sort_by(|a, b| a.path.cmp(&b.path));
702
703        let mut lines = Vec::new();
704
705        for node in nodes {
706            if lines.len() >= self.max_list_lines {
707                lines.push("[... truncated due to token budget ...]".to_string());
708                break;
709            }
710
711            let mut line = match node.kind {
712                NodeKind::Directory => {
713                    if node.descendant_file_count >= self.dense_dir_threshold {
714                        format!(
715                            "[dir] {}/ ({} files, omitted)",
716                            node.path.display(),
717                            node.descendant_file_count
718                        )
719                    } else {
720                        format!("[dir] {}/", node.path.display())
721                    }
722                }
723                NodeKind::File => format!("[file] {}", node.path.display()),
724            };
725
726            if node.modified_recently {
727                line.push_str(" (*)");
728            }
729
730            lines.push(line);
731        }
732
733        Ok(ListFilesOutput { path, lines })
734    }
735
736    fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
737        let pid_value = required_attr(attributes, "pid")?;
738        let pid = pid_value
739            .parse::<u32>()
740            .map_err(|_| EngineError::InvalidInteger {
741                name: "pid",
742                value: pid_value.to_string(),
743            })?;
744        let signal = match attributes
745            .get("signal")
746            .map(|v| v.to_ascii_uppercase())
747            .unwrap_or_else(|| "SIGINT".to_string())
748            .as_str()
749        {
750            "SIGINT" => ProcessSignal::SigInt,
751            "SIGTERM" => ProcessSignal::SigTerm,
752            "SIGKILL" => ProcessSignal::SigKill,
753            other => {
754                return Err(EngineError::InvalidState(format!(
755                    "unsupported signal `{other}`"
756                )));
757            }
758        };
759
760        self.terminal.signal(pid, signal)?;
761        Ok(SignalOutput { pid, signal })
762    }
763
764    fn ensure_no_pending(&self, next: &str) -> Result<()> {
765        if self.pending.is_some() {
766            return Err(EngineError::InvalidState(format!(
767                "cannot start <{next}> while another command block is still open"
768            )));
769        }
770        Ok(())
771    }
772}
773
774#[derive(Debug)]
775enum PendingOperation {
776    WriteFile(PendingWrite),
777    ApplyEdit(PendingApplyEdit),
778    Terminal(PendingTerminal),
779}
780
781#[derive(Debug)]
782struct PendingWrite {
783    path: PathBuf,
784    buffer: Vec<u8>,
785}
786
787#[derive(Debug)]
788struct PendingApplyEdit {
789    path: PathBuf,
790    search: Vec<u8>,
791    replace: Vec<u8>,
792    raw_body: Vec<u8>,
793    capture: Option<ApplyCapture>,
794}
795
796#[derive(Debug, Clone, Copy)]
797enum ApplyCapture {
798    Search,
799    Replace,
800}
801
802#[derive(Debug)]
803struct PendingTerminal {
804    command: Vec<u8>,
805}
806
807#[derive(Debug, Clone, Copy, PartialEq, Eq)]
808pub enum Capability {
809    WriteFile,
810    ApplyEdit,
811    ReadFile,
812    ListFiles,
813    Terminal,
814    TerminalSignal,
815}
816
817pub const DEFAULT_CAPABILITIES: &[Capability] = &[
818    Capability::WriteFile,
819    Capability::ApplyEdit,
820    Capability::ReadFile,
821    Capability::ListFiles,
822    Capability::Terminal,
823    Capability::TerminalSignal,
824];
825
826impl Capability {
827    pub fn name(&self) -> &'static str {
828        match self {
829            Capability::WriteFile => "write_file",
830            Capability::ApplyEdit => "apply_edit",
831            Capability::ReadFile => "read_file",
832            Capability::ListFiles => "list_files",
833            Capability::Terminal => "terminal",
834            Capability::TerminalSignal => "terminal_signal",
835        }
836    }
837
838    pub fn tag(&self) -> &'static str {
839        match self {
840            Capability::WriteFile => "<write_file path=\"...\"></write_file>",
841            Capability::ApplyEdit => {
842                "<apply_edit path=\"...\">[search/replace blocks or patch body]</apply_edit>"
843            }
844            Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
845            Capability::ListFiles => "<list_files path=\"...\" />",
846            Capability::Terminal => "<terminal>...</terminal> or <terminal cmd=\"...\" />",
847            Capability::TerminalSignal => "<terminal_signal pid=\"...\" signal=\"SIGINT\" />",
848        }
849    }
850}
851
852pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
853    let enabled: Vec<Capability> = if capabilities.is_empty() {
854        DEFAULT_CAPABILITIES.to_vec()
855    } else {
856        capabilities.to_vec()
857    };
858
859    let mut out = String::from(
860        "You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
861    );
862    out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
863    out.push_str("Available Commands:\n");
864    for capability in &enabled {
865        out.push_str(capability.tag());
866        out.push('\n');
867    }
868    out.push_str("\nRules:\n");
869    out.push_str("1. Do not escape strings inside tags.\n");
870    out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
871    out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
872    out.push_str(
873        "4. apply_edit accepts XML search/replace blocks, SEARCH/REPLACE markers, or unified diff hunks.\n",
874    );
875    out
876}
877
878fn required_path(attributes: &Attributes) -> Result<PathBuf> {
879    optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
880}
881
882fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
883    Ok(attributes.get(key).map(PathBuf::from))
884}
885
886fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
887    attributes
888        .get(key)
889        .map(|value| value.as_str())
890        .ok_or(EngineError::MissingAttribute(key))
891}
892
893fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
894    let Some(value) = attributes.get(key) else {
895        return Ok(None);
896    };
897    let parsed = value
898        .parse::<usize>()
899        .map_err(|_| EngineError::InvalidInteger {
900            name: key,
901            value: value.clone(),
902        })?;
903    Ok(Some(parsed))
904}
905
906fn shorten_hash(hash: &str) -> String {
907    hash.chars().take(8).collect()
908}
909
910fn human_bytes(bytes: usize) -> String {
911    const KB: f64 = 1024.0;
912    const MB: f64 = KB * 1024.0;
913
914    let bytes_f = bytes as f64;
915    if bytes_f >= MB {
916        format!("{:.1}mb", bytes_f / MB)
917    } else if bytes_f >= KB {
918        format!("{:.1}kb", bytes_f / KB)
919    } else {
920        format!("{bytes}b")
921    }
922}
923
924fn apply_edit_with_tiers(
925    original: &str,
926    search: &str,
927    replace: &str,
928) -> Option<(String, EditTier)> {
929    if search.is_empty() {
930        return None;
931    }
932
933    if let Some(output) = apply_exact(original, search, replace) {
934        return Some((output, EditTier::Exact));
935    }
936
937    if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
938        return Some((output, EditTier::WhitespaceAgnostic));
939    }
940
941    apply_contextual_anchor(original, search, replace)
942        .map(|output| (output, EditTier::ContextualAnchor))
943}
944
945#[derive(Debug, Clone, PartialEq, Eq)]
946struct DiffHunkReplacement {
947    search: String,
948    replace: String,
949}
950
951#[derive(Debug, Clone)]
952struct ResolvedApplyEditInput {
953    search: Option<String>,
954    replace: Option<String>,
955    diff_hunks: Option<Vec<DiffHunkReplacement>>,
956    format: Option<String>,
957    reason_code: Option<String>,
958    warning: Option<String>,
959}
960
961fn resolve_apply_edit_input(search: &str, replace: &str, raw_body: &str) -> ResolvedApplyEditInput {
962    if !search.is_empty() {
963        return ResolvedApplyEditInput {
964            search: Some(search.to_string()),
965            replace: Some(replace.to_string()),
966            diff_hunks: None,
967            format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS.to_string()),
968            reason_code: None,
969            warning: None,
970        };
971    }
972
973    let body = decode_basic_xml_entities(raw_body).trim().to_string();
974    if body.is_empty() {
975        return ResolvedApplyEditInput {
976            search: None,
977            replace: None,
978            diff_hunks: None,
979            format: None,
980            reason_code: Some(APPLY_EDIT_REASON_EMPTY_EDIT.to_string()),
981            warning: Some(
982                "apply_edit requires <search>/<replace> blocks or a non-empty patch body"
983                    .to_string(),
984            ),
985        };
986    }
987
988    if let Some((parsed_search, parsed_replace)) = parse_search_replace_markers(&body) {
989        return ResolvedApplyEditInput {
990            search: Some(parsed_search),
991            replace: Some(parsed_replace),
992            diff_hunks: None,
993            format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS.to_string()),
994            reason_code: None,
995            warning: None,
996        };
997    }
998
999    if let Some((parsed_search, parsed_replace)) = parse_apply_edit_xml_blocks(&body) {
1000        return ResolvedApplyEditInput {
1001            search: Some(parsed_search),
1002            replace: Some(parsed_replace),
1003            diff_hunks: None,
1004            format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS.to_string()),
1005            reason_code: None,
1006            warning: None,
1007        };
1008    }
1009
1010    if let Some(hunks) = parse_unified_diff_hunks(&body) {
1011        if hunks.is_empty() {
1012            return ResolvedApplyEditInput {
1013                search: None,
1014                replace: None,
1015                diff_hunks: None,
1016                format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
1017                reason_code: Some(APPLY_EDIT_REASON_NO_HUNKS.to_string()),
1018                warning: Some(
1019                    "unified diff was detected but no @@ hunk blocks were parsed".to_string(),
1020                ),
1021            };
1022        }
1023        return ResolvedApplyEditInput {
1024            search: None,
1025            replace: None,
1026            diff_hunks: Some(hunks),
1027            format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
1028            reason_code: None,
1029            warning: None,
1030        };
1031    }
1032
1033    ResolvedApplyEditInput {
1034        search: None,
1035        replace: None,
1036        diff_hunks: None,
1037        format: Some(APPLY_EDIT_FORMAT_RAW_TEXT.to_string()),
1038        reason_code: Some(APPLY_EDIT_REASON_PARSE_ERROR.to_string()),
1039        warning: Some("unsupported apply_edit body format".to_string()),
1040    }
1041}
1042
1043fn decode_basic_xml_entities(input: &str) -> String {
1044    input
1045        .replace("&lt;", "<")
1046        .replace("&gt;", ">")
1047        .replace("&amp;", "&")
1048}
1049
1050fn parse_search_replace_markers(input: &str) -> Option<(String, String)> {
1051    let mut mode = 0_u8;
1052    let mut search = Vec::new();
1053    let mut replace = Vec::new();
1054    let mut saw_markers = false;
1055
1056    for line in input.lines() {
1057        let trimmed = line.trim();
1058        if trimmed.eq("<<<<<<< SEARCH") {
1059            mode = 1;
1060            saw_markers = true;
1061            continue;
1062        }
1063        if trimmed.eq("=======") && mode == 1 {
1064            mode = 2;
1065            continue;
1066        }
1067        if trimmed.eq(">>>>>>> REPLACE") && mode == 2 {
1068            mode = 0;
1069            continue;
1070        }
1071        match mode {
1072            1 => search.push(line),
1073            2 => replace.push(line),
1074            _ => {}
1075        }
1076    }
1077
1078    if !saw_markers {
1079        return None;
1080    }
1081    Some((search.join("\n"), replace.join("\n")))
1082}
1083
1084fn parse_apply_edit_xml_blocks(input: &str) -> Option<(String, String)> {
1085    let search = extract_tag_body(input, "search")?;
1086    let replace = extract_tag_body(input, "replace")?;
1087    Some((search, replace))
1088}
1089
1090fn extract_tag_body(input: &str, tag: &str) -> Option<String> {
1091    let open = format!("<{tag}>");
1092    let close = format!("</{tag}>");
1093    let start = input.find(&open)? + open.len();
1094    let end = input[start..].find(&close)? + start;
1095    Some(input[start..end].to_string())
1096}
1097
1098fn parse_unified_diff_hunks(input: &str) -> Option<Vec<DiffHunkReplacement>> {
1099    let lines = input.lines().collect::<Vec<_>>();
1100    let mut idx = 0usize;
1101    let mut hunks = Vec::new();
1102    let mut saw_hunk_header = false;
1103
1104    while idx < lines.len() {
1105        let line = lines[idx].trim_end_matches('\r');
1106        if line.starts_with("@@") {
1107            saw_hunk_header = true;
1108            idx = idx.saturating_add(1);
1109            let mut search_lines = Vec::new();
1110            let mut replace_lines = Vec::new();
1111
1112            while idx < lines.len() {
1113                let current = lines[idx].trim_end_matches('\r');
1114                if current.starts_with("@@") {
1115                    break;
1116                }
1117                if current.starts_with("diff --git ")
1118                    || current.starts_with("*** End Patch")
1119                    || current.starts_with("*** Update File:")
1120                {
1121                    break;
1122                }
1123                if current.eq("\\ No newline at end of file") {
1124                    idx = idx.saturating_add(1);
1125                    continue;
1126                }
1127                if let Some(rest) = current.strip_prefix('+') {
1128                    if !current.starts_with("+++") {
1129                        replace_lines.push(rest.to_string());
1130                    }
1131                } else if let Some(rest) = current.strip_prefix('-') {
1132                    if !current.starts_with("---") {
1133                        search_lines.push(rest.to_string());
1134                    }
1135                } else if let Some(rest) = current.strip_prefix(' ') {
1136                    search_lines.push(rest.to_string());
1137                    replace_lines.push(rest.to_string());
1138                }
1139                idx = idx.saturating_add(1);
1140            }
1141
1142            if !(search_lines.is_empty() && replace_lines.is_empty()) {
1143                hunks.push(DiffHunkReplacement {
1144                    search: search_lines.join("\n"),
1145                    replace: replace_lines.join("\n"),
1146                });
1147            }
1148            continue;
1149        }
1150        idx = idx.saturating_add(1);
1151    }
1152
1153    saw_hunk_header.then_some(hunks)
1154}
1155
1156fn apply_diff_hunks_with_tiers(
1157    original: &str,
1158    hunks: &[DiffHunkReplacement],
1159) -> Option<(String, EditTier)> {
1160    let mut current = original.to_string();
1161    let mut strongest_tier = EditTier::Exact;
1162    for hunk in hunks {
1163        let (next, tier) = apply_edit_with_tiers(&current, &hunk.search, &hunk.replace)?;
1164        if edit_tier_rank(&tier) > edit_tier_rank(&strongest_tier) {
1165            strongest_tier = tier;
1166        }
1167        current = next;
1168    }
1169    Some((current, strongest_tier))
1170}
1171
1172const fn edit_tier_rank(tier: &EditTier) -> usize {
1173    match tier {
1174        EditTier::Exact => 0,
1175        EditTier::WhitespaceAgnostic => 1,
1176        EditTier::ContextualAnchor => 2,
1177        EditTier::NotApplied => 3,
1178    }
1179}
1180
1181fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
1182    let idx = original.find(search)?;
1183    let mut out = String::with_capacity(original.len() + replace.len());
1184    out.push_str(&original[..idx]);
1185    out.push_str(replace);
1186    out.push_str(&original[idx + search.len()..]);
1187    Some(out)
1188}
1189
1190fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
1191    let original_lines = collect_line_spans(original);
1192    let search_lines: Vec<&str> = search.lines().collect();
1193    if search_lines.is_empty() || original_lines.len() < search_lines.len() {
1194        return None;
1195    }
1196
1197    for start in 0..=original_lines.len() - search_lines.len() {
1198        let window = &original_lines[start..start + search_lines.len()];
1199        if window
1200            .iter()
1201            .zip(search_lines.iter())
1202            .all(|(candidate, target)| candidate.text.trim() == target.trim())
1203        {
1204            let range_start = window.first()?.start;
1205            let range_end = window.last()?.end;
1206            return Some(splice(original, range_start, range_end, replace));
1207        }
1208    }
1209
1210    None
1211}
1212
1213fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
1214    let original_lines = collect_line_spans(original);
1215    let search_lines: Vec<&str> = search.lines().collect();
1216    if search_lines.is_empty() || original_lines.is_empty() {
1217        return None;
1218    }
1219
1220    let window_len = search_lines.len().min(original_lines.len());
1221    let normalized_search = normalize_for_distance(search);
1222    let mut best: Option<(usize, usize, usize)> = None;
1223
1224    for start in 0..=original_lines.len() - window_len {
1225        let window = &original_lines[start..start + window_len];
1226        let joined = window
1227            .iter()
1228            .map(|line| line.text)
1229            .collect::<Vec<_>>()
1230            .join("\n");
1231        let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
1232
1233        match best {
1234            Some((best_score, _, _)) if score >= best_score => {}
1235            _ => best = Some((score, start, start + window_len - 1)),
1236        }
1237    }
1238
1239    let (score, line_start, line_end) = best?;
1240    let threshold = normalized_search.len().max(6) / 3;
1241    if score > threshold {
1242        return None;
1243    }
1244
1245    let range_start = original_lines[line_start].start;
1246    let range_end = original_lines[line_end].end;
1247    Some(splice(original, range_start, range_end, replace))
1248}
1249
1250fn normalize_for_distance(input: &str) -> String {
1251    input
1252        .lines()
1253        .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
1254        .collect::<Vec<_>>()
1255        .join("\n")
1256        .trim()
1257        .to_string()
1258}
1259
1260fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
1261    let mut out = String::with_capacity(original.len() + replace.len());
1262    out.push_str(&original[..range_start]);
1263    out.push_str(replace);
1264    out.push_str(&original[range_end..]);
1265    out
1266}
1267
1268#[derive(Debug)]
1269struct LineSpan<'a> {
1270    start: usize,
1271    end: usize,
1272    text: &'a str,
1273}
1274
1275fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
1276    let mut spans = Vec::new();
1277    let mut offset = 0usize;
1278
1279    for chunk in input.split_inclusive('\n') {
1280        let end = offset + chunk.len();
1281        let text = chunk.strip_suffix('\n').unwrap_or(chunk);
1282        spans.push(LineSpan {
1283            start: offset,
1284            end,
1285            text,
1286        });
1287        offset = end;
1288    }
1289
1290    if input.is_empty() {
1291        return spans;
1292    }
1293
1294    if !input.ends_with('\n') {
1295        if let Some(last) = spans.last_mut() {
1296            last.end = input.len();
1297        }
1298    }
1299
1300    spans
1301}
1302
1303#[cfg(test)]
1304mod tests {
1305    use super::*;
1306    use std::collections::BTreeMap;
1307    use std::sync::{Arc, Mutex};
1308
1309    #[derive(Clone, Default)]
1310    struct InMemoryVfs {
1311        files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
1312        tree: Arc<Mutex<Vec<TreeNode>>>,
1313    }
1314
1315    impl InMemoryVfs {
1316        fn set_file(&self, path: &str, body: &str) {
1317            self.files
1318                .lock()
1319                .expect("lock")
1320                .insert(PathBuf::from(path), body.as_bytes().to_vec());
1321        }
1322
1323        fn get_file(&self, path: &str) -> String {
1324            String::from_utf8(
1325                self.files
1326                    .lock()
1327                    .expect("lock")
1328                    .get(&PathBuf::from(path))
1329                    .cloned()
1330                    .unwrap_or_default(),
1331            )
1332            .expect("utf8")
1333        }
1334
1335        fn set_tree(&self, nodes: Vec<TreeNode>) {
1336            *self.tree.lock().expect("lock") = nodes;
1337        }
1338    }
1339
1340    impl VirtualFileSystem for InMemoryVfs {
1341        fn read(&self, path: &Path) -> Result<Vec<u8>> {
1342            self.files
1343                .lock()
1344                .expect("lock")
1345                .get(path)
1346                .cloned()
1347                .ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
1348        }
1349
1350        fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
1351            self.files
1352                .lock()
1353                .expect("lock")
1354                .insert(path.to_path_buf(), bytes.to_vec());
1355            Ok(())
1356        }
1357
1358        fn hash(&self, path: &Path) -> Result<String> {
1359            let bytes = self.read(path)?;
1360            Ok(simple_hash(&bytes))
1361        }
1362
1363        fn cwd(&self) -> Result<PathBuf> {
1364            Ok(PathBuf::from("/virtual"))
1365        }
1366
1367        fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
1368            Ok(self.tree.lock().expect("lock").clone())
1369        }
1370
1371        fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
1372            let files = self.files.lock().expect("lock");
1373            let mut entries: Vec<_> = files
1374                .iter()
1375                .map(|(path, body)| FileHash {
1376                    path: path.clone(),
1377                    sha256: simple_hash(body),
1378                })
1379                .collect();
1380            entries.sort_by(|a, b| a.path.cmp(&b.path));
1381            entries.truncate(limit);
1382            Ok(entries)
1383        }
1384    }
1385
1386    #[derive(Default)]
1387    struct MockTerminal {
1388        pids: Vec<u32>,
1389        last_command: Option<String>,
1390    }
1391
1392    impl TerminalProvider for MockTerminal {
1393        fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
1394            self.last_command = Some(command.to_string());
1395            Ok(TerminalExecution {
1396                output: format!("ran: {command}"),
1397                exit_code: Some(0),
1398                cwd: PathBuf::from("/virtual"),
1399                detached_pid: None,
1400            })
1401        }
1402
1403        fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
1404            self.pids.retain(|existing| *existing != pid);
1405            Ok(())
1406        }
1407
1408        fn active_pids(&self) -> Vec<u32> {
1409            self.pids.clone()
1410        }
1411    }
1412
1413    #[test]
1414    fn write_file_chunks_commit_atomically() {
1415        let vfs = InMemoryVfs::default();
1416        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1417
1418        let turn = engine
1419            .execute_turn(vec![
1420                Instruction::StartTag {
1421                    name: "write_file".to_string(),
1422                    attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
1423                },
1424                Instruction::WriteChunk(b"fn main()".to_vec()),
1425                Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
1426                Instruction::EndTag("write_file".to_string()),
1427            ])
1428            .expect("turn should run");
1429
1430        assert_eq!(
1431            vfs.get_file("src/main.rs"),
1432            "fn main() { println!(\"ok\"); }"
1433        );
1434        assert!(matches!(
1435            turn.outputs.as_slice(),
1436            [EngineOutput::WriteFile(WriteFileOutput { .. })]
1437        ));
1438    }
1439
1440    #[test]
1441    fn read_file_returns_fidelity_header_and_numbered_lines() {
1442        let vfs = InMemoryVfs::default();
1443        vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
1444        let mut engine = Engine::new(vfs, MockTerminal::default());
1445
1446        let turn = engine
1447            .execute_turn(vec![Instruction::StartTag {
1448                name: "read_file".to_string(),
1449                attributes: BTreeMap::from([
1450                    ("path".to_string(), "src/lib.rs".to_string()),
1451                    ("start_line".to_string(), "3".to_string()),
1452                    ("end_line".to_string(), "9".to_string()),
1453                ]),
1454            }])
1455            .expect("turn should run");
1456
1457        let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
1458            panic!("expected read output");
1459        };
1460
1461        assert_eq!(output.served_start, 3);
1462        assert_eq!(output.served_end, 4);
1463        assert_eq!(output.body, "[3] c\n[4] d");
1464        assert!(output.warning.is_some());
1465        assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
1466    }
1467
1468    #[test]
1469    fn apply_edit_uses_whitespace_agnostic_matching() {
1470        let vfs = InMemoryVfs::default();
1471        vfs.set_file("src/lib.rs", "fn main() {\n    println!(\"x\");\n}\n");
1472        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1473
1474        let turn = engine
1475            .execute_turn(vec![
1476                Instruction::StartTag {
1477                    name: "apply_edit".to_string(),
1478                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1479                },
1480                Instruction::StartTag {
1481                    name: "search".to_string(),
1482                    attributes: BTreeMap::new(),
1483                },
1484                Instruction::RawChunk {
1485                    tag: "search".to_string(),
1486                    bytes: b"   println!(\"x\");   ".to_vec(),
1487                },
1488                Instruction::EndTag("search".to_string()),
1489                Instruction::StartTag {
1490                    name: "replace".to_string(),
1491                    attributes: BTreeMap::new(),
1492                },
1493                Instruction::RawChunk {
1494                    tag: "replace".to_string(),
1495                    bytes: b"println!(\"y\");".to_vec(),
1496                },
1497                Instruction::EndTag("replace".to_string()),
1498                Instruction::EndTag("apply_edit".to_string()),
1499            ])
1500            .expect("turn should run");
1501
1502        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1503            panic!("expected apply_edit output");
1504        };
1505
1506        assert!(edit.applied);
1507        assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
1508        assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
1509    }
1510
1511    #[test]
1512    fn apply_edit_accepts_unified_diff_hunk_body() {
1513        let vfs = InMemoryVfs::default();
1514        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1515        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1516
1517        let turn = engine
1518            .execute_turn(vec![
1519                Instruction::StartTag {
1520                    name: "apply_edit".to_string(),
1521                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1522                },
1523                Instruction::Text(
1524                    "@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n".to_string(),
1525                ),
1526                Instruction::EndTag("apply_edit".to_string()),
1527            ])
1528            .expect("turn should run");
1529
1530        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1531            panic!("expected apply_edit output");
1532        };
1533
1534        assert!(edit.applied);
1535        assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1536        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1537    }
1538
1539    #[test]
1540    fn apply_edit_accepts_begin_patch_wrapper_body() {
1541        let vfs = InMemoryVfs::default();
1542        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1543        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1544
1545        let turn = engine
1546            .execute_turn(vec![
1547                Instruction::StartTag {
1548                    name: "apply_edit".to_string(),
1549                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1550                },
1551                Instruction::Text(
1552                    "*** Begin Patch\n*** Update File: src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
1553                ),
1554                Instruction::EndTag("apply_edit".to_string()),
1555            ])
1556            .expect("turn should run");
1557
1558        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1559            panic!("expected apply_edit output");
1560        };
1561
1562        assert!(edit.applied);
1563        assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1564        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1565    }
1566
1567    #[test]
1568    fn apply_edit_accepts_begin_patch_wrapper_with_absolute_update_file_path() {
1569        let vfs = InMemoryVfs::default();
1570        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1571        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1572
1573        let turn = engine
1574            .execute_turn(vec![
1575                Instruction::StartTag {
1576                    name: "apply_edit".to_string(),
1577                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1578                },
1579                Instruction::Text(
1580                    "*** Begin Patch\n*** Update File: /tmp/workspace/src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
1581                ),
1582                Instruction::EndTag("apply_edit".to_string()),
1583            ])
1584            .expect("turn should run");
1585
1586        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1587            panic!("expected apply_edit output");
1588        };
1589
1590        assert!(edit.applied);
1591        assert_eq!(edit.format.as_deref(), Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF));
1592        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1593    }
1594
1595    #[test]
1596    fn apply_edit_accepts_search_replace_markers_body() {
1597        let vfs = InMemoryVfs::default();
1598        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1599        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1600
1601        let turn = engine
1602            .execute_turn(vec![
1603                Instruction::StartTag {
1604                    name: "apply_edit".to_string(),
1605                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1606                },
1607                Instruction::Text(
1608                    "<<<<<<< SEARCH\nbeta\n=======\nbeta (edited)\n>>>>>>> REPLACE\n".to_string(),
1609                ),
1610                Instruction::EndTag("apply_edit".to_string()),
1611            ])
1612            .expect("turn should run");
1613
1614        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1615            panic!("expected apply_edit output");
1616        };
1617
1618        assert!(edit.applied);
1619        assert_eq!(edit.format.as_deref(), Some("search_replace_markers"));
1620        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1621    }
1622
1623    #[test]
1624    fn apply_edit_accepts_xml_escaped_search_replace_markers_body() {
1625        let vfs = InMemoryVfs::default();
1626        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1627        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1628
1629        let turn = engine
1630            .execute_turn(vec![
1631                Instruction::StartTag {
1632                    name: "apply_edit".to_string(),
1633                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1634                },
1635                Instruction::Text(
1636                    "&lt;&lt;&lt;&lt;&lt;&lt;&lt; SEARCH\nbeta\n=======\nbeta (escaped)\n&gt;&gt;&gt;&gt;&gt;&gt;&gt; REPLACE\n".to_string(),
1637                ),
1638                Instruction::EndTag("apply_edit".to_string()),
1639            ])
1640            .expect("turn should run");
1641
1642        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1643            panic!("expected apply_edit output");
1644        };
1645
1646        assert!(edit.applied);
1647        assert_eq!(
1648            edit.format.as_deref(),
1649            Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
1650        );
1651        assert!(vfs.get_file("src/lib.rs").contains("beta (escaped)"));
1652    }
1653
1654    #[test]
1655    fn apply_edit_accepts_xml_search_replace_blocks_in_raw_body() {
1656        let vfs = InMemoryVfs::default();
1657        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1658        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1659
1660        let turn = engine
1661            .execute_turn(vec![
1662                Instruction::StartTag {
1663                    name: "apply_edit".to_string(),
1664                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1665                },
1666                Instruction::RawChunk {
1667                    tag: "apply_edit".to_string(),
1668                    bytes: b"<search>beta</search><replace>beta (edited)</replace>".to_vec(),
1669                },
1670                Instruction::EndTag("apply_edit".to_string()),
1671            ])
1672            .expect("turn should run");
1673
1674        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1675            panic!("expected apply_edit output");
1676        };
1677
1678        assert!(edit.applied);
1679        assert_eq!(edit.format.as_deref(), Some("search_replace_xml_blocks"));
1680        assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1681    }
1682
1683    #[test]
1684    fn apply_edit_reports_parse_error_reason_code_for_unsupported_raw_body() {
1685        let vfs = InMemoryVfs::default();
1686        vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1687        let mut engine = Engine::new(vfs, MockTerminal::default());
1688
1689        let turn = engine
1690            .execute_turn(vec![
1691                Instruction::StartTag {
1692                    name: "apply_edit".to_string(),
1693                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1694                },
1695                Instruction::Text("totally unsupported patch format".to_string()),
1696                Instruction::EndTag("apply_edit".to_string()),
1697            ])
1698            .expect("turn should run");
1699
1700        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1701            panic!("expected apply_edit output");
1702        };
1703
1704        assert!(!edit.applied);
1705        assert_eq!(edit.reason_code.as_deref(), Some("parse_error"));
1706        assert_eq!(edit.format.as_deref(), Some("raw_text"));
1707    }
1708
1709    #[test]
1710    fn capability_manifest_reports_current_engine_contract() {
1711        let engine = Engine::new(InMemoryVfs::default(), MockTerminal::default())
1712            .with_terminal_timeout(Duration::from_secs(9))
1713            .with_list_budget(77, 33);
1714
1715        let manifest = engine.capability_manifest();
1716
1717        assert_eq!(
1718            manifest
1719                .commands
1720                .iter()
1721                .map(|command| command.name)
1722                .collect::<Vec<_>>(),
1723            vec![
1724                "write_file",
1725                "apply_edit",
1726                "read_file",
1727                "list_files",
1728                "terminal",
1729                "terminal_signal",
1730            ]
1731        );
1732        assert!(
1733            manifest
1734                .apply_edit_formats
1735                .contains(&APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
1736        );
1737        assert!(
1738            manifest
1739                .apply_edit_reason_codes
1740                .contains(&APPLY_EDIT_REASON_PARSE_ERROR)
1741        );
1742        assert_eq!(
1743            manifest.apply_edit_tiers,
1744            vec![
1745                EditTier::Exact,
1746                EditTier::WhitespaceAgnostic,
1747                EditTier::ContextualAnchor,
1748                EditTier::NotApplied,
1749            ]
1750        );
1751        assert_eq!(manifest.state_header_fields, STATE_HEADER_FIELDS);
1752        assert_eq!(manifest.max_list_lines, 77);
1753        assert_eq!(manifest.dense_dir_threshold, 33);
1754        assert_eq!(manifest.terminal_timeout, Duration::from_secs(9));
1755        assert_eq!(manifest.recent_hash_limit, DEFAULT_RECENT_HASH_LIMIT);
1756    }
1757
1758    #[test]
1759    fn apply_edit_warns_if_file_changed_since_last_read() {
1760        let vfs = InMemoryVfs::default();
1761        vfs.set_file("src/lib.rs", "alpha\nbeta\n");
1762        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1763
1764        let _ = engine
1765            .execute_turn(vec![Instruction::StartTag {
1766                name: "read_file".to_string(),
1767                attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1768            }])
1769            .expect("read should work");
1770
1771        vfs.set_file("src/lib.rs", "external\nchange\n");
1772
1773        let turn = engine
1774            .execute_turn(vec![
1775                Instruction::StartTag {
1776                    name: "apply_edit".to_string(),
1777                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1778                },
1779                Instruction::StartTag {
1780                    name: "search".to_string(),
1781                    attributes: BTreeMap::new(),
1782                },
1783                Instruction::RawChunk {
1784                    tag: "search".to_string(),
1785                    bytes: b"beta".to_vec(),
1786                },
1787                Instruction::EndTag("search".to_string()),
1788                Instruction::StartTag {
1789                    name: "replace".to_string(),
1790                    attributes: BTreeMap::new(),
1791                },
1792                Instruction::RawChunk {
1793                    tag: "replace".to_string(),
1794                    bytes: b"gamma".to_vec(),
1795                },
1796                Instruction::EndTag("replace".to_string()),
1797                Instruction::EndTag("apply_edit".to_string()),
1798            ])
1799            .expect("apply should run");
1800
1801        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1802            panic!("expected apply_edit output");
1803        };
1804
1805        assert!(!edit.applied);
1806        assert_eq!(edit.tier, EditTier::NotApplied);
1807        assert!(
1808            edit.warning
1809                .as_deref()
1810                .unwrap_or_default()
1811                .contains("File modified externally")
1812        );
1813    }
1814
1815    #[test]
1816    fn list_files_omits_dense_directories_and_marks_recent() {
1817        let vfs = InMemoryVfs::default();
1818        vfs.set_tree(vec![
1819            TreeNode {
1820                path: PathBuf::from("src"),
1821                kind: NodeKind::Directory,
1822                descendant_file_count: 3,
1823                modified_recently: false,
1824            },
1825            TreeNode {
1826                path: PathBuf::from("src/lib.rs"),
1827                kind: NodeKind::File,
1828                descendant_file_count: 0,
1829                modified_recently: true,
1830            },
1831            TreeNode {
1832                path: PathBuf::from("node_modules"),
1833                kind: NodeKind::Directory,
1834                descendant_file_count: 2400,
1835                modified_recently: false,
1836            },
1837        ]);
1838
1839        let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
1840        let turn = engine
1841            .execute_turn(vec![Instruction::StartTag {
1842                name: "list_files".to_string(),
1843                attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
1844            }])
1845            .expect("list should run");
1846
1847        let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
1848            panic!("expected list output");
1849        };
1850
1851        assert!(
1852            output
1853                .lines
1854                .iter()
1855                .any(|line| line.contains("node_modules") && line.contains("omitted"))
1856        );
1857        assert!(output.lines.iter().any(|line| line.contains("(*)")));
1858    }
1859
1860    #[test]
1861    fn terminal_executes_command_and_reports_state_header() {
1862        let vfs = InMemoryVfs::default();
1863        let terminal = MockTerminal {
1864            pids: vec![42, 7],
1865            ..Default::default()
1866        };
1867
1868        let mut engine = Engine::new(vfs, terminal);
1869        let turn = engine
1870            .execute_turn(vec![
1871                Instruction::StartTag {
1872                    name: "terminal".to_string(),
1873                    attributes: BTreeMap::new(),
1874                },
1875                Instruction::RawChunk {
1876                    tag: "terminal".to_string(),
1877                    bytes: b"echo hi".to_vec(),
1878                },
1879                Instruction::EndTag("terminal".to_string()),
1880            ])
1881            .expect("terminal turn should run");
1882
1883        assert!(matches!(
1884            turn.outputs.as_slice(),
1885            [EngineOutput::Terminal(TerminalExecution { .. })]
1886        ));
1887        assert_eq!(turn.state_header.active_pids, vec![7, 42]);
1888        assert!(turn.state_header.render().contains("CWD: /virtual"));
1889    }
1890
1891    #[test]
1892    fn terminal_supports_attribute_command_form() {
1893        let vfs = InMemoryVfs::default();
1894        let mut engine = Engine::new(vfs, MockTerminal::default());
1895
1896        let turn = engine
1897            .execute_turn(vec![
1898                Instruction::StartTag {
1899                    name: "terminal".to_string(),
1900                    attributes: BTreeMap::from([("cmd".to_string(), "echo attr".to_string())]),
1901                },
1902                Instruction::EndTag("terminal".to_string()),
1903            ])
1904            .expect("terminal command should run");
1905
1906        let EngineOutput::Terminal(output) = &turn.outputs[0] else {
1907            panic!("expected terminal output");
1908        };
1909        assert!(output.output.contains("ran: echo attr"));
1910    }
1911
1912    #[test]
1913    fn unknown_tags_emit_warnings_instead_of_silent_noops() {
1914        let vfs = InMemoryVfs::default();
1915        let mut engine = Engine::new(vfs, MockTerminal::default());
1916
1917        let turn = engine
1918            .execute_turn(vec![
1919                Instruction::StartTag {
1920                    name: "mystery_tool".to_string(),
1921                    attributes: BTreeMap::new(),
1922                },
1923                Instruction::EndTag("mystery_tool".to_string()),
1924            ])
1925            .expect("turn should run");
1926
1927        assert_eq!(turn.outputs.len(), 2);
1928        assert!(matches!(
1929            &turn.outputs[0],
1930            EngineOutput::Warning(message) if message.contains("unsupported start tag <mystery_tool>")
1931        ));
1932        assert!(matches!(
1933            &turn.outputs[1],
1934            EngineOutput::Warning(message) if message.contains("unsupported end tag </mystery_tool>")
1935        ));
1936    }
1937
1938    #[test]
1939    fn system_prompt_includes_enabled_commands() {
1940        let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
1941        assert!(prompt.contains("<read_file"));
1942        assert!(prompt.contains("<terminal>"));
1943        assert!(!prompt.contains("<write_file path"));
1944    }
1945
1946    fn simple_hash(input: &[u8]) -> String {
1947        let mut acc: u64 = 1469598103934665603;
1948        for b in input {
1949            acc ^= *b as u64;
1950            acc = acc.wrapping_mul(1099511628211);
1951        }
1952        format!("{acc:016x}")
1953    }
1954}