1use std::collections::HashMap;
12use std::fmt::Write as _;
13use std::path::{Path, PathBuf};
14use std::time::Duration;
15
16use sa3p_parser::{Attributes, Instruction};
17use strsim::levenshtein;
18use thiserror::Error;
19
20#[derive(Debug, Error)]
21pub enum EngineError {
22 #[error("missing required attribute `{0}`")]
23 MissingAttribute(&'static str),
24 #[error("invalid integer for attribute `{name}`: {value}")]
25 InvalidInteger { name: &'static str, value: String },
26 #[error("operation is out of order: {0}")]
27 InvalidState(String),
28 #[error("invalid utf-8 payload")]
29 InvalidUtf8,
30 #[error("virtual filesystem error: {0}")]
31 Vfs(String),
32 #[error("terminal error: {0}")]
33 Terminal(String),
34}
35
36pub type Result<T> = std::result::Result<T, EngineError>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FileHash {
40 pub path: PathBuf,
41 pub sha256: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NodeKind {
46 File,
47 Directory,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct TreeNode {
52 pub path: PathBuf,
53 pub kind: NodeKind,
54 pub descendant_file_count: usize,
55 pub modified_recently: bool,
56}
57
58pub trait VirtualFileSystem {
59 fn read(&self, path: &Path) -> Result<Vec<u8>>;
60 fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
61 fn hash(&self, path: &Path) -> Result<String>;
62 fn cwd(&self) -> Result<PathBuf>;
63 fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
64 fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum ProcessSignal {
69 SigInt,
70 SigTerm,
71 SigKill,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct TerminalExecution {
76 pub output: String,
77 pub exit_code: Option<i32>,
78 pub cwd: PathBuf,
79 pub detached_pid: Option<u32>,
80}
81
82pub trait TerminalProvider {
83 fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
84 fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
85 fn active_pids(&self) -> Vec<u32>;
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StateHeader {
90 pub cwd: PathBuf,
91 pub recent_hashes: Vec<FileHash>,
92 pub active_pids: Vec<u32>,
93}
94
95impl StateHeader {
96 pub fn render(&self) -> String {
97 let mut out = String::new();
98 let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
99
100 if self.recent_hashes.is_empty() {
101 out.push_str(" | RECENT_HASHES: none");
102 } else {
103 out.push_str(" | RECENT_HASHES: ");
104 for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
105 if idx > 0 {
106 out.push_str(", ");
107 }
108 let _ = write!(
109 &mut out,
110 "{}#{}",
111 file_hash.path.display(),
112 shorten_hash(&file_hash.sha256)
113 );
114 }
115 }
116
117 if self.active_pids.is_empty() {
118 out.push_str(" | ACTIVE_PIDS: none]");
119 } else {
120 out.push_str(" | ACTIVE_PIDS: ");
121 for (idx, pid) in self.active_pids.iter().enumerate() {
122 if idx > 0 {
123 out.push_str(", ");
124 }
125 let _ = write!(&mut out, "{pid}");
126 }
127 out.push(']');
128 }
129
130 out
131 }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct TurnResult {
136 pub state_header: StateHeader,
137 pub outputs: Vec<EngineOutput>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub enum EngineOutput {
142 ReadFile(ReadFileOutput),
143 WriteFile(WriteFileOutput),
144 ApplyEdit(ApplyEditOutput),
145 ListFiles(ListFilesOutput),
146 Terminal(TerminalExecution),
147 Signal(SignalOutput),
148 Warning(String),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub struct ReadFileOutput {
153 pub path: PathBuf,
154 pub size_bytes: usize,
155 pub sha256: String,
156 pub requested_start: usize,
157 pub requested_end: usize,
158 pub served_start: usize,
159 pub served_end: usize,
160 pub total_lines: usize,
161 pub body: String,
162 pub warning: Option<String>,
163}
164
165impl ReadFileOutput {
166 pub fn fidelity_header(&self) -> String {
167 format!(
168 "[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
169 self.path.display(),
170 human_bytes(self.size_bytes),
171 shorten_hash(&self.sha256),
172 self.served_start,
173 self.served_end,
174 self.total_lines
175 )
176 }
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WriteFileOutput {
181 pub path: PathBuf,
182 pub size_bytes: usize,
183 pub sha256: String,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub enum EditTier {
188 Exact,
189 WhitespaceAgnostic,
190 ContextualAnchor,
191 NotApplied,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct ApplyEditOutput {
196 pub path: PathBuf,
197 pub applied: bool,
198 pub tier: EditTier,
199 pub sha256: Option<String>,
200 pub format: Option<String>,
201 pub reason_code: Option<String>,
202 pub warning: Option<String>,
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct ListFilesOutput {
207 pub path: PathBuf,
208 pub lines: Vec<String>,
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct SignalOutput {
213 pub pid: u32,
214 pub signal: ProcessSignal,
215}
216
217pub const DEFAULT_MAX_LIST_LINES: usize = 300;
218pub const DEFAULT_DENSE_DIR_THRESHOLD: usize = 200;
219pub const DEFAULT_TERMINAL_TIMEOUT_SECS: u64 = 5;
220pub const DEFAULT_RECENT_HASH_LIMIT: usize = 5;
221
222pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS: &str = "search_replace_tags";
223pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS: &str = "search_replace_markers";
224pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS: &str = "search_replace_xml_blocks";
225pub const APPLY_EDIT_FORMAT_UNIFIED_DIFF: &str = "unified_diff";
226pub const APPLY_EDIT_FORMAT_RAW_TEXT: &str = "raw_text";
227
228pub const APPLY_EDIT_REASON_STALE_HASH: &str = "stale_hash";
229pub const APPLY_EDIT_REASON_EMPTY_EDIT: &str = "empty_edit";
230pub const APPLY_EDIT_REASON_PARSE_ERROR: &str = "parse_error";
231pub const APPLY_EDIT_REASON_NO_HUNKS: &str = "no_hunks";
232pub const APPLY_EDIT_REASON_NO_MATCH: &str = "no_match";
233
234pub const STATE_HEADER_FIELD_CWD: &str = "cwd";
235pub const STATE_HEADER_FIELD_RECENT_HASHES: &str = "recent_hashes";
236pub const STATE_HEADER_FIELD_ACTIVE_PIDS: &str = "active_pids";
237
238pub const APPLY_EDIT_SUPPORTED_FORMATS: &[&str] = &[
239 APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS,
240 APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS,
241 APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS,
242 APPLY_EDIT_FORMAT_UNIFIED_DIFF,
243];
244
245pub const APPLY_EDIT_REASON_CODES: &[&str] = &[
246 APPLY_EDIT_REASON_STALE_HASH,
247 APPLY_EDIT_REASON_EMPTY_EDIT,
248 APPLY_EDIT_REASON_PARSE_ERROR,
249 APPLY_EDIT_REASON_NO_HUNKS,
250 APPLY_EDIT_REASON_NO_MATCH,
251];
252
253pub const STATE_HEADER_FIELDS: &[&str] = &[
254 STATE_HEADER_FIELD_CWD,
255 STATE_HEADER_FIELD_RECENT_HASHES,
256 STATE_HEADER_FIELD_ACTIVE_PIDS,
257];
258
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct CapabilityDescriptor {
261 pub name: &'static str,
262 pub tag: &'static str,
263}
264
265#[derive(Debug, Clone, PartialEq, Eq)]
266pub struct CapabilityManifest {
267 pub commands: Vec<CapabilityDescriptor>,
268 pub apply_edit_formats: Vec<&'static str>,
269 pub apply_edit_reason_codes: Vec<&'static str>,
270 pub apply_edit_tiers: Vec<EditTier>,
271 pub state_header_fields: Vec<&'static str>,
272 pub max_list_lines: usize,
273 pub dense_dir_threshold: usize,
274 pub terminal_timeout: Duration,
275 pub recent_hash_limit: usize,
276}
277
278#[derive(Debug)]
279pub struct Engine<VFS, Terminal> {
280 vfs: VFS,
281 terminal: Terminal,
282 known_hashes: HashMap<PathBuf, String>,
283 pending: Option<PendingOperation>,
284 max_list_lines: usize,
285 dense_dir_threshold: usize,
286 terminal_timeout: Duration,
287 recent_hash_limit: usize,
288}
289
290impl<VFS, Terminal> Engine<VFS, Terminal>
291where
292 VFS: VirtualFileSystem,
293 Terminal: TerminalProvider,
294{
295 pub fn new(vfs: VFS, terminal: Terminal) -> Self {
296 Self {
297 vfs,
298 terminal,
299 known_hashes: HashMap::new(),
300 pending: None,
301 max_list_lines: DEFAULT_MAX_LIST_LINES,
302 dense_dir_threshold: DEFAULT_DENSE_DIR_THRESHOLD,
303 terminal_timeout: Duration::from_secs(DEFAULT_TERMINAL_TIMEOUT_SECS),
304 recent_hash_limit: DEFAULT_RECENT_HASH_LIMIT,
305 }
306 }
307
308 pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
309 self.terminal_timeout = timeout;
310 self
311 }
312
313 pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
314 self.max_list_lines = max_lines;
315 self.dense_dir_threshold = dense_threshold;
316 self
317 }
318
319 pub fn capability_manifest(&self) -> CapabilityManifest {
320 self.capability_manifest_for(DEFAULT_CAPABILITIES)
321 }
322
323 pub fn capability_manifest_for(&self, capabilities: &[Capability]) -> CapabilityManifest {
324 let enabled = if capabilities.is_empty() {
325 DEFAULT_CAPABILITIES.to_vec()
326 } else {
327 capabilities.to_vec()
328 };
329
330 CapabilityManifest {
331 commands: enabled
332 .iter()
333 .map(|capability| CapabilityDescriptor {
334 name: capability.name(),
335 tag: capability.tag(),
336 })
337 .collect(),
338 apply_edit_formats: APPLY_EDIT_SUPPORTED_FORMATS.to_vec(),
339 apply_edit_reason_codes: APPLY_EDIT_REASON_CODES.to_vec(),
340 apply_edit_tiers: vec![
341 EditTier::Exact,
342 EditTier::WhitespaceAgnostic,
343 EditTier::ContextualAnchor,
344 EditTier::NotApplied,
345 ],
346 state_header_fields: STATE_HEADER_FIELDS.to_vec(),
347 max_list_lines: self.max_list_lines,
348 dense_dir_threshold: self.dense_dir_threshold,
349 terminal_timeout: self.terminal_timeout,
350 recent_hash_limit: self.recent_hash_limit,
351 }
352 }
353
354 pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
355 let mut outputs = Vec::new();
356
357 for instruction in instructions {
358 self.apply_instruction(instruction, &mut outputs)?;
359 }
360
361 let state_header = StateHeader {
362 cwd: self.vfs.cwd()?,
363 recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
364 active_pids: {
365 let mut pids = self.terminal.active_pids();
366 pids.sort_unstable();
367 pids
368 },
369 };
370
371 Ok(TurnResult {
372 state_header,
373 outputs,
374 })
375 }
376
377 fn apply_instruction(
378 &mut self,
379 instruction: Instruction,
380 outputs: &mut Vec<EngineOutput>,
381 ) -> Result<()> {
382 match instruction {
383 Instruction::Text(text) => {
384 if let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() {
385 if edit.capture.is_none() {
386 edit.raw_body.extend_from_slice(text.as_bytes());
387 }
388 }
389 }
390 Instruction::StartTag { name, attributes } => {
391 self.handle_start_tag(&name, attributes, outputs)?;
392 }
393 Instruction::EndTag(name) => {
394 self.handle_end_tag(&name, outputs)?;
395 }
396 Instruction::WriteChunk(bytes) => {
397 let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
398 return Err(EngineError::InvalidState(
399 "received WriteChunk outside <write_file>".to_string(),
400 ));
401 };
402 write.buffer.extend_from_slice(&bytes);
403 }
404 Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
405 Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
406 Some(ApplyCapture::Search) if tag == "search" => {
407 edit.search.extend_from_slice(&bytes);
408 }
409 Some(ApplyCapture::Replace) if tag == "replace" => {
410 edit.replace.extend_from_slice(&bytes);
411 }
412 None => {
413 edit.raw_body.extend_from_slice(&bytes);
414 }
415 _ => {
416 return Err(EngineError::InvalidState(format!(
417 "unexpected raw chunk for <{tag}> while applying edit"
418 )));
419 }
420 },
421 Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
422 term.command.extend_from_slice(&bytes);
423 }
424 _ => {
425 return Err(EngineError::InvalidState(format!(
426 "received raw chunk for <{tag}> without active matching context"
427 )));
428 }
429 },
430 }
431
432 Ok(())
433 }
434
435 fn handle_start_tag(
436 &mut self,
437 name: &str,
438 attributes: Attributes,
439 outputs: &mut Vec<EngineOutput>,
440 ) -> Result<()> {
441 match name {
442 "write_file" => {
443 self.ensure_no_pending("write_file")?;
444 let path = required_path(&attributes)?;
445 self.pending = Some(PendingOperation::WriteFile(PendingWrite {
446 path,
447 buffer: Vec::new(),
448 }));
449 }
450 "read_file" => {
451 let output = self.execute_read_file(&attributes)?;
452 outputs.push(EngineOutput::ReadFile(output));
453 }
454 "apply_edit" => {
455 self.ensure_no_pending("apply_edit")?;
456 let path = required_path(&attributes)?;
457 self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
458 path,
459 search: Vec::new(),
460 replace: Vec::new(),
461 raw_body: Vec::new(),
462 capture: None,
463 }));
464 }
465 "search" => {
466 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
467 return Err(EngineError::InvalidState(
468 "<search> must be nested under <apply_edit>".to_string(),
469 ));
470 };
471 edit.capture = Some(ApplyCapture::Search);
472 }
473 "replace" => {
474 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
475 return Err(EngineError::InvalidState(
476 "<replace> must be nested under <apply_edit>".to_string(),
477 ));
478 };
479 edit.capture = Some(ApplyCapture::Replace);
480 }
481 "list_files" => {
482 let output = self.execute_list_files(&attributes)?;
483 outputs.push(EngineOutput::ListFiles(output));
484 }
485 "terminal" => {
486 self.ensure_no_pending("terminal")?;
487 let command = attributes
488 .get("cmd")
489 .or_else(|| attributes.get("command"))
490 .cloned()
491 .unwrap_or_default()
492 .into_bytes();
493 self.pending = Some(PendingOperation::Terminal(PendingTerminal { command }));
494 }
495 "terminal_signal" => {
496 let signal_output = self.execute_terminal_signal(&attributes)?;
497 outputs.push(EngineOutput::Signal(signal_output));
498 }
499 other => outputs.push(EngineOutput::Warning(format!(
500 "unsupported start tag <{other}> ignored"
501 ))),
502 }
503
504 Ok(())
505 }
506
507 fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
508 match name {
509 "write_file" => {
510 let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
511 return Err(EngineError::InvalidState(
512 "</write_file> received without matching start".to_string(),
513 ));
514 };
515 let output = self.finalize_write(write)?;
516 outputs.push(EngineOutput::WriteFile(output));
517 }
518 "search" | "replace" => {
519 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
520 return Err(EngineError::InvalidState(format!(
521 "</{name}> received without active <apply_edit>"
522 )));
523 };
524 edit.capture = None;
525 }
526 "apply_edit" => {
527 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
528 return Err(EngineError::InvalidState(
529 "</apply_edit> received without matching start".to_string(),
530 ));
531 };
532 let output = self.finalize_apply_edit(edit)?;
533 outputs.push(EngineOutput::ApplyEdit(output));
534 }
535 "terminal" => {
536 let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
537 return Err(EngineError::InvalidState(
538 "</terminal> received without matching start".to_string(),
539 ));
540 };
541 let command =
542 String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
543 let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
544 outputs.push(EngineOutput::Terminal(output));
545 }
546 other => outputs.push(EngineOutput::Warning(format!(
547 "unsupported end tag </{other}> ignored"
548 ))),
549 }
550
551 Ok(())
552 }
553
554 fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
555 self.vfs.write_atomic(&write.path, &write.buffer)?;
556 let sha256 = self.vfs.hash(&write.path)?;
557 self.known_hashes.insert(write.path.clone(), sha256.clone());
558
559 Ok(WriteFileOutput {
560 path: write.path,
561 size_bytes: write.buffer.len(),
562 sha256,
563 })
564 }
565
566 fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
567 let path = required_path(attributes)?;
568 let requested_start = optional_usize(attributes, "start_line")?
569 .unwrap_or(1)
570 .max(1);
571 let requested_end =
572 optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
573
574 let bytes = self.vfs.read(&path)?;
575 let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
576 let sha256 = self.vfs.hash(&path)?;
577 self.known_hashes.insert(path.clone(), sha256.clone());
578
579 let all_lines: Vec<&str> = text.lines().collect();
580 let total_lines = all_lines.len();
581
582 let (served_start, served_end, warning) = if total_lines == 0 {
583 (0, 0, Some("file is empty; returning no lines".to_string()))
584 } else {
585 let served_start = requested_start.min(total_lines);
586 let served_end = requested_end.max(served_start).min(total_lines);
587 let warning = if served_start != requested_start || served_end != requested_end {
588 Some(format!(
589 "requested lines {}-{} adjusted to {}-{} (file has {} lines)",
590 requested_start, requested_end, served_start, served_end, total_lines
591 ))
592 } else {
593 None
594 };
595 (served_start, served_end, warning)
596 };
597
598 let body = if total_lines == 0 {
599 String::new()
600 } else {
601 let mut rendered = Vec::new();
602 for line_idx in served_start..=served_end {
603 let content = all_lines[line_idx - 1];
604 rendered.push(format!("[{line_idx}] {content}"));
605 }
606 rendered.join("\n")
607 };
608
609 Ok(ReadFileOutput {
610 path,
611 size_bytes: bytes.len(),
612 sha256,
613 requested_start,
614 requested_end,
615 served_start,
616 served_end,
617 total_lines,
618 body,
619 warning,
620 })
621 }
622
623 fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
624 let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
625 let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
626 let raw_body = String::from_utf8(edit.raw_body).map_err(|_| EngineError::InvalidUtf8)?;
627 let path = edit.path;
628 let edit_input = resolve_apply_edit_input(&search, &replace, &raw_body);
629
630 if let Some(previous_hash) = self.known_hashes.get(&path) {
631 let current_hash = self.vfs.hash(&path)?;
632 if previous_hash != ¤t_hash {
633 return Ok(ApplyEditOutput {
634 path,
635 applied: false,
636 tier: EditTier::NotApplied,
637 sha256: None,
638 format: edit_input.format,
639 reason_code: Some(APPLY_EDIT_REASON_STALE_HASH.to_string()),
640 warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
641 });
642 }
643 }
644
645 let original_bytes = self.vfs.read(&path)?;
646 let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
647
648 if let Some(reason_code) = edit_input.reason_code {
649 return Ok(ApplyEditOutput {
650 path,
651 applied: false,
652 tier: EditTier::NotApplied,
653 sha256: None,
654 format: edit_input.format,
655 reason_code: Some(reason_code),
656 warning: edit_input
657 .warning
658 .or_else(|| Some("invalid apply_edit payload".to_string())),
659 });
660 }
661
662 let apply_result = if let Some(hunks) = edit_input.diff_hunks {
663 apply_diff_hunks_with_tiers(&original, &hunks)
664 } else if let (Some(search), Some(replace)) =
665 (edit_input.search.as_deref(), edit_input.replace.as_deref())
666 {
667 apply_edit_with_tiers(&original, search, replace)
668 } else {
669 None
670 };
671 let Some((rewritten, tier)) = apply_result else {
672 return Ok(ApplyEditOutput {
673 path,
674 applied: false,
675 tier: EditTier::NotApplied,
676 sha256: None,
677 format: edit_input.format,
678 reason_code: Some(APPLY_EDIT_REASON_NO_MATCH.to_string()),
679 warning: Some("no suitable target block found for apply_edit".to_string()),
680 });
681 };
682
683 self.vfs.write_atomic(&path, rewritten.as_bytes())?;
684 let sha256 = self.vfs.hash(&path)?;
685 self.known_hashes.insert(path.clone(), sha256.clone());
686
687 Ok(ApplyEditOutput {
688 path,
689 applied: true,
690 tier,
691 sha256: Some(sha256),
692 format: edit_input.format,
693 reason_code: None,
694 warning: None,
695 })
696 }
697
698 fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
699 let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
700 let mut nodes = self.vfs.list_tree(&path)?;
701 nodes.sort_by(|a, b| a.path.cmp(&b.path));
702
703 let mut lines = Vec::new();
704
705 for node in nodes {
706 if lines.len() >= self.max_list_lines {
707 lines.push("[... truncated due to token budget ...]".to_string());
708 break;
709 }
710
711 let mut line = match node.kind {
712 NodeKind::Directory => {
713 if node.descendant_file_count >= self.dense_dir_threshold {
714 format!(
715 "[dir] {}/ ({} files, omitted)",
716 node.path.display(),
717 node.descendant_file_count
718 )
719 } else {
720 format!("[dir] {}/", node.path.display())
721 }
722 }
723 NodeKind::File => format!("[file] {}", node.path.display()),
724 };
725
726 if node.modified_recently {
727 line.push_str(" (*)");
728 }
729
730 lines.push(line);
731 }
732
733 Ok(ListFilesOutput { path, lines })
734 }
735
736 fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
737 let pid_value = required_attr(attributes, "pid")?;
738 let pid = pid_value
739 .parse::<u32>()
740 .map_err(|_| EngineError::InvalidInteger {
741 name: "pid",
742 value: pid_value.to_string(),
743 })?;
744 let signal = match attributes
745 .get("signal")
746 .map(|v| v.to_ascii_uppercase())
747 .unwrap_or_else(|| "SIGINT".to_string())
748 .as_str()
749 {
750 "SIGINT" => ProcessSignal::SigInt,
751 "SIGTERM" => ProcessSignal::SigTerm,
752 "SIGKILL" => ProcessSignal::SigKill,
753 other => {
754 return Err(EngineError::InvalidState(format!(
755 "unsupported signal `{other}`"
756 )));
757 }
758 };
759
760 self.terminal.signal(pid, signal)?;
761 Ok(SignalOutput { pid, signal })
762 }
763
764 fn ensure_no_pending(&self, next: &str) -> Result<()> {
765 if self.pending.is_some() {
766 return Err(EngineError::InvalidState(format!(
767 "cannot start <{next}> while another command block is still open"
768 )));
769 }
770 Ok(())
771 }
772}
773
774#[derive(Debug)]
775enum PendingOperation {
776 WriteFile(PendingWrite),
777 ApplyEdit(PendingApplyEdit),
778 Terminal(PendingTerminal),
779}
780
781#[derive(Debug)]
782struct PendingWrite {
783 path: PathBuf,
784 buffer: Vec<u8>,
785}
786
787#[derive(Debug)]
788struct PendingApplyEdit {
789 path: PathBuf,
790 search: Vec<u8>,
791 replace: Vec<u8>,
792 raw_body: Vec<u8>,
793 capture: Option<ApplyCapture>,
794}
795
796#[derive(Debug, Clone, Copy)]
797enum ApplyCapture {
798 Search,
799 Replace,
800}
801
802#[derive(Debug)]
803struct PendingTerminal {
804 command: Vec<u8>,
805}
806
807#[derive(Debug, Clone, Copy, PartialEq, Eq)]
808pub enum Capability {
809 WriteFile,
810 ApplyEdit,
811 ReadFile,
812 ListFiles,
813 Terminal,
814 TerminalSignal,
815}
816
817pub const DEFAULT_CAPABILITIES: &[Capability] = &[
818 Capability::WriteFile,
819 Capability::ApplyEdit,
820 Capability::ReadFile,
821 Capability::ListFiles,
822 Capability::Terminal,
823 Capability::TerminalSignal,
824];
825
826impl Capability {
827 pub fn name(&self) -> &'static str {
828 match self {
829 Capability::WriteFile => "write_file",
830 Capability::ApplyEdit => "apply_edit",
831 Capability::ReadFile => "read_file",
832 Capability::ListFiles => "list_files",
833 Capability::Terminal => "terminal",
834 Capability::TerminalSignal => "terminal_signal",
835 }
836 }
837
838 pub fn tag(&self) -> &'static str {
839 match self {
840 Capability::WriteFile => "<write_file path=\"...\"></write_file>",
841 Capability::ApplyEdit => {
842 "<apply_edit path=\"...\">[search/replace blocks or patch body]</apply_edit>"
843 }
844 Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
845 Capability::ListFiles => "<list_files path=\"...\" />",
846 Capability::Terminal => "<terminal>...</terminal> or <terminal cmd=\"...\" />",
847 Capability::TerminalSignal => "<terminal_signal pid=\"...\" signal=\"SIGINT\" />",
848 }
849 }
850}
851
852pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
853 let enabled: Vec<Capability> = if capabilities.is_empty() {
854 DEFAULT_CAPABILITIES.to_vec()
855 } else {
856 capabilities.to_vec()
857 };
858
859 let mut out = String::from(
860 "You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
861 );
862 out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
863 out.push_str("Available Commands:\n");
864 for capability in &enabled {
865 out.push_str(capability.tag());
866 out.push('\n');
867 }
868 out.push_str("\nRules:\n");
869 out.push_str("1. Do not escape strings inside tags.\n");
870 out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
871 out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
872 out.push_str(
873 "4. apply_edit accepts XML search/replace blocks, SEARCH/REPLACE markers, or unified diff hunks.\n",
874 );
875 out
876}
877
878fn required_path(attributes: &Attributes) -> Result<PathBuf> {
879 optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
880}
881
882fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
883 Ok(attributes.get(key).map(PathBuf::from))
884}
885
886fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
887 attributes
888 .get(key)
889 .map(|value| value.as_str())
890 .ok_or(EngineError::MissingAttribute(key))
891}
892
893fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
894 let Some(value) = attributes.get(key) else {
895 return Ok(None);
896 };
897 let parsed = value
898 .parse::<usize>()
899 .map_err(|_| EngineError::InvalidInteger {
900 name: key,
901 value: value.clone(),
902 })?;
903 Ok(Some(parsed))
904}
905
906fn shorten_hash(hash: &str) -> String {
907 hash.chars().take(8).collect()
908}
909
910fn human_bytes(bytes: usize) -> String {
911 const KB: f64 = 1024.0;
912 const MB: f64 = KB * 1024.0;
913
914 let bytes_f = bytes as f64;
915 if bytes_f >= MB {
916 format!("{:.1}mb", bytes_f / MB)
917 } else if bytes_f >= KB {
918 format!("{:.1}kb", bytes_f / KB)
919 } else {
920 format!("{bytes}b")
921 }
922}
923
924fn apply_edit_with_tiers(
925 original: &str,
926 search: &str,
927 replace: &str,
928) -> Option<(String, EditTier)> {
929 if search.is_empty() {
930 return None;
931 }
932
933 if let Some(output) = apply_exact(original, search, replace) {
934 return Some((output, EditTier::Exact));
935 }
936
937 if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
938 return Some((output, EditTier::WhitespaceAgnostic));
939 }
940
941 apply_contextual_anchor(original, search, replace)
942 .map(|output| (output, EditTier::ContextualAnchor))
943}
944
945#[derive(Debug, Clone, PartialEq, Eq)]
946struct DiffHunkReplacement {
947 search: String,
948 replace: String,
949}
950
951#[derive(Debug, Clone)]
952struct ResolvedApplyEditInput {
953 search: Option<String>,
954 replace: Option<String>,
955 diff_hunks: Option<Vec<DiffHunkReplacement>>,
956 format: Option<String>,
957 reason_code: Option<String>,
958 warning: Option<String>,
959}
960
961fn resolve_apply_edit_input(search: &str, replace: &str, raw_body: &str) -> ResolvedApplyEditInput {
962 if !search.is_empty() {
963 return ResolvedApplyEditInput {
964 search: Some(search.to_string()),
965 replace: Some(replace.to_string()),
966 diff_hunks: None,
967 format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS.to_string()),
968 reason_code: None,
969 warning: None,
970 };
971 }
972
973 let body = decode_basic_xml_entities(raw_body).trim().to_string();
974 if body.is_empty() {
975 return ResolvedApplyEditInput {
976 search: None,
977 replace: None,
978 diff_hunks: None,
979 format: None,
980 reason_code: Some(APPLY_EDIT_REASON_EMPTY_EDIT.to_string()),
981 warning: Some(
982 "apply_edit requires <search>/<replace> blocks or a non-empty patch body"
983 .to_string(),
984 ),
985 };
986 }
987
988 if let Some((parsed_search, parsed_replace)) = parse_search_replace_markers(&body) {
989 return ResolvedApplyEditInput {
990 search: Some(parsed_search),
991 replace: Some(parsed_replace),
992 diff_hunks: None,
993 format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS.to_string()),
994 reason_code: None,
995 warning: None,
996 };
997 }
998
999 if let Some((parsed_search, parsed_replace)) = parse_apply_edit_xml_blocks(&body) {
1000 return ResolvedApplyEditInput {
1001 search: Some(parsed_search),
1002 replace: Some(parsed_replace),
1003 diff_hunks: None,
1004 format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS.to_string()),
1005 reason_code: None,
1006 warning: None,
1007 };
1008 }
1009
1010 if let Some(hunks) = parse_unified_diff_hunks(&body) {
1011 if hunks.is_empty() {
1012 return ResolvedApplyEditInput {
1013 search: None,
1014 replace: None,
1015 diff_hunks: None,
1016 format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
1017 reason_code: Some(APPLY_EDIT_REASON_NO_HUNKS.to_string()),
1018 warning: Some(
1019 "unified diff was detected but no @@ hunk blocks were parsed".to_string(),
1020 ),
1021 };
1022 }
1023 return ResolvedApplyEditInput {
1024 search: None,
1025 replace: None,
1026 diff_hunks: Some(hunks),
1027 format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
1028 reason_code: None,
1029 warning: None,
1030 };
1031 }
1032
1033 ResolvedApplyEditInput {
1034 search: None,
1035 replace: None,
1036 diff_hunks: None,
1037 format: Some(APPLY_EDIT_FORMAT_RAW_TEXT.to_string()),
1038 reason_code: Some(APPLY_EDIT_REASON_PARSE_ERROR.to_string()),
1039 warning: Some("unsupported apply_edit body format".to_string()),
1040 }
1041}
1042
1043fn decode_basic_xml_entities(input: &str) -> String {
1044 input
1045 .replace("<", "<")
1046 .replace(">", ">")
1047 .replace("&", "&")
1048}
1049
1050fn parse_search_replace_markers(input: &str) -> Option<(String, String)> {
1051 let mut mode = 0_u8;
1052 let mut search = Vec::new();
1053 let mut replace = Vec::new();
1054 let mut saw_markers = false;
1055
1056 for line in input.lines() {
1057 let trimmed = line.trim();
1058 if trimmed.eq("<<<<<<< SEARCH") {
1059 mode = 1;
1060 saw_markers = true;
1061 continue;
1062 }
1063 if trimmed.eq("=======") && mode == 1 {
1064 mode = 2;
1065 continue;
1066 }
1067 if trimmed.eq(">>>>>>> REPLACE") && mode == 2 {
1068 mode = 0;
1069 continue;
1070 }
1071 match mode {
1072 1 => search.push(line),
1073 2 => replace.push(line),
1074 _ => {}
1075 }
1076 }
1077
1078 if !saw_markers {
1079 return None;
1080 }
1081 Some((search.join("\n"), replace.join("\n")))
1082}
1083
1084fn parse_apply_edit_xml_blocks(input: &str) -> Option<(String, String)> {
1085 let search = extract_tag_body(input, "search")?;
1086 let replace = extract_tag_body(input, "replace")?;
1087 Some((search, replace))
1088}
1089
1090fn extract_tag_body(input: &str, tag: &str) -> Option<String> {
1091 let open = format!("<{tag}>");
1092 let close = format!("</{tag}>");
1093 let start = input.find(&open)? + open.len();
1094 let end = input[start..].find(&close)? + start;
1095 Some(input[start..end].to_string())
1096}
1097
1098fn parse_unified_diff_hunks(input: &str) -> Option<Vec<DiffHunkReplacement>> {
1099 let lines = input.lines().collect::<Vec<_>>();
1100 let mut idx = 0usize;
1101 let mut hunks = Vec::new();
1102 let mut saw_hunk_header = false;
1103
1104 while idx < lines.len() {
1105 let line = lines[idx].trim_end_matches('\r');
1106 if line.starts_with("@@") {
1107 saw_hunk_header = true;
1108 idx = idx.saturating_add(1);
1109 let mut search_lines = Vec::new();
1110 let mut replace_lines = Vec::new();
1111
1112 while idx < lines.len() {
1113 let current = lines[idx].trim_end_matches('\r');
1114 if current.starts_with("@@") {
1115 break;
1116 }
1117 if current.starts_with("diff --git ")
1118 || current.starts_with("*** End Patch")
1119 || current.starts_with("*** Update File:")
1120 {
1121 break;
1122 }
1123 if current.eq("\\ No newline at end of file") {
1124 idx = idx.saturating_add(1);
1125 continue;
1126 }
1127 if let Some(rest) = current.strip_prefix('+') {
1128 if !current.starts_with("+++") {
1129 replace_lines.push(rest.to_string());
1130 }
1131 } else if let Some(rest) = current.strip_prefix('-') {
1132 if !current.starts_with("---") {
1133 search_lines.push(rest.to_string());
1134 }
1135 } else if let Some(rest) = current.strip_prefix(' ') {
1136 search_lines.push(rest.to_string());
1137 replace_lines.push(rest.to_string());
1138 }
1139 idx = idx.saturating_add(1);
1140 }
1141
1142 if !(search_lines.is_empty() && replace_lines.is_empty()) {
1143 hunks.push(DiffHunkReplacement {
1144 search: search_lines.join("\n"),
1145 replace: replace_lines.join("\n"),
1146 });
1147 }
1148 continue;
1149 }
1150 idx = idx.saturating_add(1);
1151 }
1152
1153 saw_hunk_header.then_some(hunks)
1154}
1155
1156fn apply_diff_hunks_with_tiers(
1157 original: &str,
1158 hunks: &[DiffHunkReplacement],
1159) -> Option<(String, EditTier)> {
1160 let mut current = original.to_string();
1161 let mut strongest_tier = EditTier::Exact;
1162 for hunk in hunks {
1163 let (next, tier) = apply_edit_with_tiers(¤t, &hunk.search, &hunk.replace)?;
1164 if edit_tier_rank(&tier) > edit_tier_rank(&strongest_tier) {
1165 strongest_tier = tier;
1166 }
1167 current = next;
1168 }
1169 Some((current, strongest_tier))
1170}
1171
1172const fn edit_tier_rank(tier: &EditTier) -> usize {
1173 match tier {
1174 EditTier::Exact => 0,
1175 EditTier::WhitespaceAgnostic => 1,
1176 EditTier::ContextualAnchor => 2,
1177 EditTier::NotApplied => 3,
1178 }
1179}
1180
1181fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
1182 let idx = original.find(search)?;
1183 let mut out = String::with_capacity(original.len() + replace.len());
1184 out.push_str(&original[..idx]);
1185 out.push_str(replace);
1186 out.push_str(&original[idx + search.len()..]);
1187 Some(out)
1188}
1189
1190fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
1191 let original_lines = collect_line_spans(original);
1192 let search_lines: Vec<&str> = search.lines().collect();
1193 if search_lines.is_empty() || original_lines.len() < search_lines.len() {
1194 return None;
1195 }
1196
1197 for start in 0..=original_lines.len() - search_lines.len() {
1198 let window = &original_lines[start..start + search_lines.len()];
1199 if window
1200 .iter()
1201 .zip(search_lines.iter())
1202 .all(|(candidate, target)| candidate.text.trim() == target.trim())
1203 {
1204 let range_start = window.first()?.start;
1205 let range_end = window.last()?.end;
1206 return Some(splice(original, range_start, range_end, replace));
1207 }
1208 }
1209
1210 None
1211}
1212
1213fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
1214 let original_lines = collect_line_spans(original);
1215 let search_lines: Vec<&str> = search.lines().collect();
1216 if search_lines.is_empty() || original_lines.is_empty() {
1217 return None;
1218 }
1219
1220 let window_len = search_lines.len().min(original_lines.len());
1221 let normalized_search = normalize_for_distance(search);
1222 let mut best: Option<(usize, usize, usize)> = None;
1223
1224 for start in 0..=original_lines.len() - window_len {
1225 let window = &original_lines[start..start + window_len];
1226 let joined = window
1227 .iter()
1228 .map(|line| line.text)
1229 .collect::<Vec<_>>()
1230 .join("\n");
1231 let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
1232
1233 match best {
1234 Some((best_score, _, _)) if score >= best_score => {}
1235 _ => best = Some((score, start, start + window_len - 1)),
1236 }
1237 }
1238
1239 let (score, line_start, line_end) = best?;
1240 let threshold = normalized_search.len().max(6) / 3;
1241 if score > threshold {
1242 return None;
1243 }
1244
1245 let range_start = original_lines[line_start].start;
1246 let range_end = original_lines[line_end].end;
1247 Some(splice(original, range_start, range_end, replace))
1248}
1249
1250fn normalize_for_distance(input: &str) -> String {
1251 input
1252 .lines()
1253 .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
1254 .collect::<Vec<_>>()
1255 .join("\n")
1256 .trim()
1257 .to_string()
1258}
1259
1260fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
1261 let mut out = String::with_capacity(original.len() + replace.len());
1262 out.push_str(&original[..range_start]);
1263 out.push_str(replace);
1264 out.push_str(&original[range_end..]);
1265 out
1266}
1267
1268#[derive(Debug)]
1269struct LineSpan<'a> {
1270 start: usize,
1271 end: usize,
1272 text: &'a str,
1273}
1274
1275fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
1276 let mut spans = Vec::new();
1277 let mut offset = 0usize;
1278
1279 for chunk in input.split_inclusive('\n') {
1280 let end = offset + chunk.len();
1281 let text = chunk.strip_suffix('\n').unwrap_or(chunk);
1282 spans.push(LineSpan {
1283 start: offset,
1284 end,
1285 text,
1286 });
1287 offset = end;
1288 }
1289
1290 if input.is_empty() {
1291 return spans;
1292 }
1293
1294 if !input.ends_with('\n') {
1295 if let Some(last) = spans.last_mut() {
1296 last.end = input.len();
1297 }
1298 }
1299
1300 spans
1301}
1302
1303#[cfg(test)]
1304mod tests {
1305 use super::*;
1306 use std::collections::BTreeMap;
1307 use std::sync::{Arc, Mutex};
1308
1309 #[derive(Clone, Default)]
1310 struct InMemoryVfs {
1311 files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
1312 tree: Arc<Mutex<Vec<TreeNode>>>,
1313 }
1314
1315 impl InMemoryVfs {
1316 fn set_file(&self, path: &str, body: &str) {
1317 self.files
1318 .lock()
1319 .expect("lock")
1320 .insert(PathBuf::from(path), body.as_bytes().to_vec());
1321 }
1322
1323 fn get_file(&self, path: &str) -> String {
1324 String::from_utf8(
1325 self.files
1326 .lock()
1327 .expect("lock")
1328 .get(&PathBuf::from(path))
1329 .cloned()
1330 .unwrap_or_default(),
1331 )
1332 .expect("utf8")
1333 }
1334
1335 fn set_tree(&self, nodes: Vec<TreeNode>) {
1336 *self.tree.lock().expect("lock") = nodes;
1337 }
1338 }
1339
1340 impl VirtualFileSystem for InMemoryVfs {
1341 fn read(&self, path: &Path) -> Result<Vec<u8>> {
1342 self.files
1343 .lock()
1344 .expect("lock")
1345 .get(path)
1346 .cloned()
1347 .ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
1348 }
1349
1350 fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
1351 self.files
1352 .lock()
1353 .expect("lock")
1354 .insert(path.to_path_buf(), bytes.to_vec());
1355 Ok(())
1356 }
1357
1358 fn hash(&self, path: &Path) -> Result<String> {
1359 let bytes = self.read(path)?;
1360 Ok(simple_hash(&bytes))
1361 }
1362
1363 fn cwd(&self) -> Result<PathBuf> {
1364 Ok(PathBuf::from("/virtual"))
1365 }
1366
1367 fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
1368 Ok(self.tree.lock().expect("lock").clone())
1369 }
1370
1371 fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
1372 let files = self.files.lock().expect("lock");
1373 let mut entries: Vec<_> = files
1374 .iter()
1375 .map(|(path, body)| FileHash {
1376 path: path.clone(),
1377 sha256: simple_hash(body),
1378 })
1379 .collect();
1380 entries.sort_by(|a, b| a.path.cmp(&b.path));
1381 entries.truncate(limit);
1382 Ok(entries)
1383 }
1384 }
1385
1386 #[derive(Default)]
1387 struct MockTerminal {
1388 pids: Vec<u32>,
1389 last_command: Option<String>,
1390 }
1391
1392 impl TerminalProvider for MockTerminal {
1393 fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
1394 self.last_command = Some(command.to_string());
1395 Ok(TerminalExecution {
1396 output: format!("ran: {command}"),
1397 exit_code: Some(0),
1398 cwd: PathBuf::from("/virtual"),
1399 detached_pid: None,
1400 })
1401 }
1402
1403 fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
1404 self.pids.retain(|existing| *existing != pid);
1405 Ok(())
1406 }
1407
1408 fn active_pids(&self) -> Vec<u32> {
1409 self.pids.clone()
1410 }
1411 }
1412
1413 #[test]
1414 fn write_file_chunks_commit_atomically() {
1415 let vfs = InMemoryVfs::default();
1416 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1417
1418 let turn = engine
1419 .execute_turn(vec![
1420 Instruction::StartTag {
1421 name: "write_file".to_string(),
1422 attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
1423 },
1424 Instruction::WriteChunk(b"fn main()".to_vec()),
1425 Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
1426 Instruction::EndTag("write_file".to_string()),
1427 ])
1428 .expect("turn should run");
1429
1430 assert_eq!(
1431 vfs.get_file("src/main.rs"),
1432 "fn main() { println!(\"ok\"); }"
1433 );
1434 assert!(matches!(
1435 turn.outputs.as_slice(),
1436 [EngineOutput::WriteFile(WriteFileOutput { .. })]
1437 ));
1438 }
1439
1440 #[test]
1441 fn read_file_returns_fidelity_header_and_numbered_lines() {
1442 let vfs = InMemoryVfs::default();
1443 vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
1444 let mut engine = Engine::new(vfs, MockTerminal::default());
1445
1446 let turn = engine
1447 .execute_turn(vec![Instruction::StartTag {
1448 name: "read_file".to_string(),
1449 attributes: BTreeMap::from([
1450 ("path".to_string(), "src/lib.rs".to_string()),
1451 ("start_line".to_string(), "3".to_string()),
1452 ("end_line".to_string(), "9".to_string()),
1453 ]),
1454 }])
1455 .expect("turn should run");
1456
1457 let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
1458 panic!("expected read output");
1459 };
1460
1461 assert_eq!(output.served_start, 3);
1462 assert_eq!(output.served_end, 4);
1463 assert_eq!(output.body, "[3] c\n[4] d");
1464 assert!(output.warning.is_some());
1465 assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
1466 }
1467
1468 #[test]
1469 fn apply_edit_uses_whitespace_agnostic_matching() {
1470 let vfs = InMemoryVfs::default();
1471 vfs.set_file("src/lib.rs", "fn main() {\n println!(\"x\");\n}\n");
1472 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1473
1474 let turn = engine
1475 .execute_turn(vec![
1476 Instruction::StartTag {
1477 name: "apply_edit".to_string(),
1478 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1479 },
1480 Instruction::StartTag {
1481 name: "search".to_string(),
1482 attributes: BTreeMap::new(),
1483 },
1484 Instruction::RawChunk {
1485 tag: "search".to_string(),
1486 bytes: b" println!(\"x\"); ".to_vec(),
1487 },
1488 Instruction::EndTag("search".to_string()),
1489 Instruction::StartTag {
1490 name: "replace".to_string(),
1491 attributes: BTreeMap::new(),
1492 },
1493 Instruction::RawChunk {
1494 tag: "replace".to_string(),
1495 bytes: b"println!(\"y\");".to_vec(),
1496 },
1497 Instruction::EndTag("replace".to_string()),
1498 Instruction::EndTag("apply_edit".to_string()),
1499 ])
1500 .expect("turn should run");
1501
1502 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1503 panic!("expected apply_edit output");
1504 };
1505
1506 assert!(edit.applied);
1507 assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
1508 assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
1509 }
1510
1511 #[test]
1512 fn apply_edit_accepts_unified_diff_hunk_body() {
1513 let vfs = InMemoryVfs::default();
1514 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1515 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1516
1517 let turn = engine
1518 .execute_turn(vec![
1519 Instruction::StartTag {
1520 name: "apply_edit".to_string(),
1521 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1522 },
1523 Instruction::Text(
1524 "@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n".to_string(),
1525 ),
1526 Instruction::EndTag("apply_edit".to_string()),
1527 ])
1528 .expect("turn should run");
1529
1530 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1531 panic!("expected apply_edit output");
1532 };
1533
1534 assert!(edit.applied);
1535 assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1536 assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1537 }
1538
1539 #[test]
1540 fn apply_edit_accepts_begin_patch_wrapper_body() {
1541 let vfs = InMemoryVfs::default();
1542 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1543 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1544
1545 let turn = engine
1546 .execute_turn(vec![
1547 Instruction::StartTag {
1548 name: "apply_edit".to_string(),
1549 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1550 },
1551 Instruction::Text(
1552 "*** Begin Patch\n*** Update File: src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
1553 ),
1554 Instruction::EndTag("apply_edit".to_string()),
1555 ])
1556 .expect("turn should run");
1557
1558 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1559 panic!("expected apply_edit output");
1560 };
1561
1562 assert!(edit.applied);
1563 assert_eq!(edit.format.as_deref(), Some("unified_diff"));
1564 assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1565 }
1566
1567 #[test]
1568 fn apply_edit_accepts_begin_patch_wrapper_with_absolute_update_file_path() {
1569 let vfs = InMemoryVfs::default();
1570 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1571 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1572
1573 let turn = engine
1574 .execute_turn(vec![
1575 Instruction::StartTag {
1576 name: "apply_edit".to_string(),
1577 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1578 },
1579 Instruction::Text(
1580 "*** Begin Patch\n*** Update File: /tmp/workspace/src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
1581 ),
1582 Instruction::EndTag("apply_edit".to_string()),
1583 ])
1584 .expect("turn should run");
1585
1586 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1587 panic!("expected apply_edit output");
1588 };
1589
1590 assert!(edit.applied);
1591 assert_eq!(edit.format.as_deref(), Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF));
1592 assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1593 }
1594
1595 #[test]
1596 fn apply_edit_accepts_search_replace_markers_body() {
1597 let vfs = InMemoryVfs::default();
1598 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1599 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1600
1601 let turn = engine
1602 .execute_turn(vec![
1603 Instruction::StartTag {
1604 name: "apply_edit".to_string(),
1605 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1606 },
1607 Instruction::Text(
1608 "<<<<<<< SEARCH\nbeta\n=======\nbeta (edited)\n>>>>>>> REPLACE\n".to_string(),
1609 ),
1610 Instruction::EndTag("apply_edit".to_string()),
1611 ])
1612 .expect("turn should run");
1613
1614 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1615 panic!("expected apply_edit output");
1616 };
1617
1618 assert!(edit.applied);
1619 assert_eq!(edit.format.as_deref(), Some("search_replace_markers"));
1620 assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1621 }
1622
1623 #[test]
1624 fn apply_edit_accepts_xml_escaped_search_replace_markers_body() {
1625 let vfs = InMemoryVfs::default();
1626 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1627 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1628
1629 let turn = engine
1630 .execute_turn(vec![
1631 Instruction::StartTag {
1632 name: "apply_edit".to_string(),
1633 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1634 },
1635 Instruction::Text(
1636 "<<<<<<< SEARCH\nbeta\n=======\nbeta (escaped)\n>>>>>>> REPLACE\n".to_string(),
1637 ),
1638 Instruction::EndTag("apply_edit".to_string()),
1639 ])
1640 .expect("turn should run");
1641
1642 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1643 panic!("expected apply_edit output");
1644 };
1645
1646 assert!(edit.applied);
1647 assert_eq!(
1648 edit.format.as_deref(),
1649 Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
1650 );
1651 assert!(vfs.get_file("src/lib.rs").contains("beta (escaped)"));
1652 }
1653
1654 #[test]
1655 fn apply_edit_accepts_xml_search_replace_blocks_in_raw_body() {
1656 let vfs = InMemoryVfs::default();
1657 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1658 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1659
1660 let turn = engine
1661 .execute_turn(vec![
1662 Instruction::StartTag {
1663 name: "apply_edit".to_string(),
1664 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1665 },
1666 Instruction::RawChunk {
1667 tag: "apply_edit".to_string(),
1668 bytes: b"<search>beta</search><replace>beta (edited)</replace>".to_vec(),
1669 },
1670 Instruction::EndTag("apply_edit".to_string()),
1671 ])
1672 .expect("turn should run");
1673
1674 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1675 panic!("expected apply_edit output");
1676 };
1677
1678 assert!(edit.applied);
1679 assert_eq!(edit.format.as_deref(), Some("search_replace_xml_blocks"));
1680 assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
1681 }
1682
1683 #[test]
1684 fn apply_edit_reports_parse_error_reason_code_for_unsupported_raw_body() {
1685 let vfs = InMemoryVfs::default();
1686 vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
1687 let mut engine = Engine::new(vfs, MockTerminal::default());
1688
1689 let turn = engine
1690 .execute_turn(vec![
1691 Instruction::StartTag {
1692 name: "apply_edit".to_string(),
1693 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1694 },
1695 Instruction::Text("totally unsupported patch format".to_string()),
1696 Instruction::EndTag("apply_edit".to_string()),
1697 ])
1698 .expect("turn should run");
1699
1700 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1701 panic!("expected apply_edit output");
1702 };
1703
1704 assert!(!edit.applied);
1705 assert_eq!(edit.reason_code.as_deref(), Some("parse_error"));
1706 assert_eq!(edit.format.as_deref(), Some("raw_text"));
1707 }
1708
1709 #[test]
1710 fn capability_manifest_reports_current_engine_contract() {
1711 let engine = Engine::new(InMemoryVfs::default(), MockTerminal::default())
1712 .with_terminal_timeout(Duration::from_secs(9))
1713 .with_list_budget(77, 33);
1714
1715 let manifest = engine.capability_manifest();
1716
1717 assert_eq!(
1718 manifest
1719 .commands
1720 .iter()
1721 .map(|command| command.name)
1722 .collect::<Vec<_>>(),
1723 vec![
1724 "write_file",
1725 "apply_edit",
1726 "read_file",
1727 "list_files",
1728 "terminal",
1729 "terminal_signal",
1730 ]
1731 );
1732 assert!(
1733 manifest
1734 .apply_edit_formats
1735 .contains(&APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
1736 );
1737 assert!(
1738 manifest
1739 .apply_edit_reason_codes
1740 .contains(&APPLY_EDIT_REASON_PARSE_ERROR)
1741 );
1742 assert_eq!(
1743 manifest.apply_edit_tiers,
1744 vec![
1745 EditTier::Exact,
1746 EditTier::WhitespaceAgnostic,
1747 EditTier::ContextualAnchor,
1748 EditTier::NotApplied,
1749 ]
1750 );
1751 assert_eq!(manifest.state_header_fields, STATE_HEADER_FIELDS);
1752 assert_eq!(manifest.max_list_lines, 77);
1753 assert_eq!(manifest.dense_dir_threshold, 33);
1754 assert_eq!(manifest.terminal_timeout, Duration::from_secs(9));
1755 assert_eq!(manifest.recent_hash_limit, DEFAULT_RECENT_HASH_LIMIT);
1756 }
1757
1758 #[test]
1759 fn apply_edit_warns_if_file_changed_since_last_read() {
1760 let vfs = InMemoryVfs::default();
1761 vfs.set_file("src/lib.rs", "alpha\nbeta\n");
1762 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1763
1764 let _ = engine
1765 .execute_turn(vec![Instruction::StartTag {
1766 name: "read_file".to_string(),
1767 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1768 }])
1769 .expect("read should work");
1770
1771 vfs.set_file("src/lib.rs", "external\nchange\n");
1772
1773 let turn = engine
1774 .execute_turn(vec![
1775 Instruction::StartTag {
1776 name: "apply_edit".to_string(),
1777 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1778 },
1779 Instruction::StartTag {
1780 name: "search".to_string(),
1781 attributes: BTreeMap::new(),
1782 },
1783 Instruction::RawChunk {
1784 tag: "search".to_string(),
1785 bytes: b"beta".to_vec(),
1786 },
1787 Instruction::EndTag("search".to_string()),
1788 Instruction::StartTag {
1789 name: "replace".to_string(),
1790 attributes: BTreeMap::new(),
1791 },
1792 Instruction::RawChunk {
1793 tag: "replace".to_string(),
1794 bytes: b"gamma".to_vec(),
1795 },
1796 Instruction::EndTag("replace".to_string()),
1797 Instruction::EndTag("apply_edit".to_string()),
1798 ])
1799 .expect("apply should run");
1800
1801 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1802 panic!("expected apply_edit output");
1803 };
1804
1805 assert!(!edit.applied);
1806 assert_eq!(edit.tier, EditTier::NotApplied);
1807 assert!(
1808 edit.warning
1809 .as_deref()
1810 .unwrap_or_default()
1811 .contains("File modified externally")
1812 );
1813 }
1814
1815 #[test]
1816 fn list_files_omits_dense_directories_and_marks_recent() {
1817 let vfs = InMemoryVfs::default();
1818 vfs.set_tree(vec![
1819 TreeNode {
1820 path: PathBuf::from("src"),
1821 kind: NodeKind::Directory,
1822 descendant_file_count: 3,
1823 modified_recently: false,
1824 },
1825 TreeNode {
1826 path: PathBuf::from("src/lib.rs"),
1827 kind: NodeKind::File,
1828 descendant_file_count: 0,
1829 modified_recently: true,
1830 },
1831 TreeNode {
1832 path: PathBuf::from("node_modules"),
1833 kind: NodeKind::Directory,
1834 descendant_file_count: 2400,
1835 modified_recently: false,
1836 },
1837 ]);
1838
1839 let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
1840 let turn = engine
1841 .execute_turn(vec![Instruction::StartTag {
1842 name: "list_files".to_string(),
1843 attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
1844 }])
1845 .expect("list should run");
1846
1847 let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
1848 panic!("expected list output");
1849 };
1850
1851 assert!(
1852 output
1853 .lines
1854 .iter()
1855 .any(|line| line.contains("node_modules") && line.contains("omitted"))
1856 );
1857 assert!(output.lines.iter().any(|line| line.contains("(*)")));
1858 }
1859
1860 #[test]
1861 fn terminal_executes_command_and_reports_state_header() {
1862 let vfs = InMemoryVfs::default();
1863 let terminal = MockTerminal {
1864 pids: vec![42, 7],
1865 ..Default::default()
1866 };
1867
1868 let mut engine = Engine::new(vfs, terminal);
1869 let turn = engine
1870 .execute_turn(vec![
1871 Instruction::StartTag {
1872 name: "terminal".to_string(),
1873 attributes: BTreeMap::new(),
1874 },
1875 Instruction::RawChunk {
1876 tag: "terminal".to_string(),
1877 bytes: b"echo hi".to_vec(),
1878 },
1879 Instruction::EndTag("terminal".to_string()),
1880 ])
1881 .expect("terminal turn should run");
1882
1883 assert!(matches!(
1884 turn.outputs.as_slice(),
1885 [EngineOutput::Terminal(TerminalExecution { .. })]
1886 ));
1887 assert_eq!(turn.state_header.active_pids, vec![7, 42]);
1888 assert!(turn.state_header.render().contains("CWD: /virtual"));
1889 }
1890
1891 #[test]
1892 fn terminal_supports_attribute_command_form() {
1893 let vfs = InMemoryVfs::default();
1894 let mut engine = Engine::new(vfs, MockTerminal::default());
1895
1896 let turn = engine
1897 .execute_turn(vec![
1898 Instruction::StartTag {
1899 name: "terminal".to_string(),
1900 attributes: BTreeMap::from([("cmd".to_string(), "echo attr".to_string())]),
1901 },
1902 Instruction::EndTag("terminal".to_string()),
1903 ])
1904 .expect("terminal command should run");
1905
1906 let EngineOutput::Terminal(output) = &turn.outputs[0] else {
1907 panic!("expected terminal output");
1908 };
1909 assert!(output.output.contains("ran: echo attr"));
1910 }
1911
1912 #[test]
1913 fn unknown_tags_emit_warnings_instead_of_silent_noops() {
1914 let vfs = InMemoryVfs::default();
1915 let mut engine = Engine::new(vfs, MockTerminal::default());
1916
1917 let turn = engine
1918 .execute_turn(vec![
1919 Instruction::StartTag {
1920 name: "mystery_tool".to_string(),
1921 attributes: BTreeMap::new(),
1922 },
1923 Instruction::EndTag("mystery_tool".to_string()),
1924 ])
1925 .expect("turn should run");
1926
1927 assert_eq!(turn.outputs.len(), 2);
1928 assert!(matches!(
1929 &turn.outputs[0],
1930 EngineOutput::Warning(message) if message.contains("unsupported start tag <mystery_tool>")
1931 ));
1932 assert!(matches!(
1933 &turn.outputs[1],
1934 EngineOutput::Warning(message) if message.contains("unsupported end tag </mystery_tool>")
1935 ));
1936 }
1937
1938 #[test]
1939 fn system_prompt_includes_enabled_commands() {
1940 let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
1941 assert!(prompt.contains("<read_file"));
1942 assert!(prompt.contains("<terminal>"));
1943 assert!(!prompt.contains("<write_file path"));
1944 }
1945
1946 fn simple_hash(input: &[u8]) -> String {
1947 let mut acc: u64 = 1469598103934665603;
1948 for b in input {
1949 acc ^= *b as u64;
1950 acc = acc.wrapping_mul(1099511628211);
1951 }
1952 format!("{acc:016x}")
1953 }
1954}