1use std::collections::HashMap;
12use std::fmt::Write as _;
13use std::path::{Path, PathBuf};
14use std::time::Duration;
15
16use sa3p_parser::{Attributes, Instruction};
17use strsim::levenshtein;
18use thiserror::Error;
19
20#[derive(Debug, Error)]
21pub enum EngineError {
22 #[error("missing required attribute `{0}`")]
23 MissingAttribute(&'static str),
24 #[error("invalid integer for attribute `{name}`: {value}")]
25 InvalidInteger { name: &'static str, value: String },
26 #[error("operation is out of order: {0}")]
27 InvalidState(String),
28 #[error("invalid utf-8 payload")]
29 InvalidUtf8,
30 #[error("virtual filesystem error: {0}")]
31 Vfs(String),
32 #[error("terminal error: {0}")]
33 Terminal(String),
34}
35
36pub type Result<T> = std::result::Result<T, EngineError>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FileHash {
40 pub path: PathBuf,
41 pub sha256: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NodeKind {
46 File,
47 Directory,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct TreeNode {
52 pub path: PathBuf,
53 pub kind: NodeKind,
54 pub descendant_file_count: usize,
55 pub modified_recently: bool,
56}
57
58pub trait VirtualFileSystem {
59 fn read(&self, path: &Path) -> Result<Vec<u8>>;
60 fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
61 fn hash(&self, path: &Path) -> Result<String>;
62 fn cwd(&self) -> Result<PathBuf>;
63 fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
64 fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum ProcessSignal {
69 SigInt,
70 SigTerm,
71 SigKill,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct TerminalExecution {
76 pub output: String,
77 pub exit_code: Option<i32>,
78 pub cwd: PathBuf,
79 pub detached_pid: Option<u32>,
80}
81
82pub trait TerminalProvider {
83 fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
84 fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
85 fn active_pids(&self) -> Vec<u32>;
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StateHeader {
90 pub cwd: PathBuf,
91 pub recent_hashes: Vec<FileHash>,
92 pub active_pids: Vec<u32>,
93}
94
95impl StateHeader {
96 pub fn render(&self) -> String {
97 let mut out = String::new();
98 let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
99
100 if self.recent_hashes.is_empty() {
101 out.push_str(" | RECENT_HASHES: none");
102 } else {
103 out.push_str(" | RECENT_HASHES: ");
104 for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
105 if idx > 0 {
106 out.push_str(", ");
107 }
108 let _ = write!(
109 &mut out,
110 "{}#{}",
111 file_hash.path.display(),
112 shorten_hash(&file_hash.sha256)
113 );
114 }
115 }
116
117 if self.active_pids.is_empty() {
118 out.push_str(" | ACTIVE_PIDS: none]");
119 } else {
120 out.push_str(" | ACTIVE_PIDS: ");
121 for (idx, pid) in self.active_pids.iter().enumerate() {
122 if idx > 0 {
123 out.push_str(", ");
124 }
125 let _ = write!(&mut out, "{pid}");
126 }
127 out.push(']');
128 }
129
130 out
131 }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct TurnResult {
136 pub state_header: StateHeader,
137 pub outputs: Vec<EngineOutput>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub enum EngineOutput {
142 ReadFile(ReadFileOutput),
143 WriteFile(WriteFileOutput),
144 ApplyEdit(ApplyEditOutput),
145 ListFiles(ListFilesOutput),
146 Terminal(TerminalExecution),
147 Signal(SignalOutput),
148 Warning(String),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub struct ReadFileOutput {
153 pub path: PathBuf,
154 pub size_bytes: usize,
155 pub sha256: String,
156 pub requested_start: usize,
157 pub requested_end: usize,
158 pub served_start: usize,
159 pub served_end: usize,
160 pub total_lines: usize,
161 pub body: String,
162 pub warning: Option<String>,
163}
164
165impl ReadFileOutput {
166 pub fn fidelity_header(&self) -> String {
167 format!(
168 "[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
169 self.path.display(),
170 human_bytes(self.size_bytes),
171 shorten_hash(&self.sha256),
172 self.served_start,
173 self.served_end,
174 self.total_lines
175 )
176 }
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WriteFileOutput {
181 pub path: PathBuf,
182 pub size_bytes: usize,
183 pub sha256: String,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub enum EditTier {
188 Exact,
189 WhitespaceAgnostic,
190 ContextualAnchor,
191 NotApplied,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct ApplyEditOutput {
196 pub path: PathBuf,
197 pub applied: bool,
198 pub tier: EditTier,
199 pub sha256: Option<String>,
200 pub warning: Option<String>,
201}
202
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct ListFilesOutput {
205 pub path: PathBuf,
206 pub lines: Vec<String>,
207}
208
209#[derive(Debug, Clone, PartialEq, Eq)]
210pub struct SignalOutput {
211 pub pid: u32,
212 pub signal: ProcessSignal,
213}
214
215#[derive(Debug)]
216pub struct Engine<VFS, Terminal> {
217 vfs: VFS,
218 terminal: Terminal,
219 known_hashes: HashMap<PathBuf, String>,
220 pending: Option<PendingOperation>,
221 max_list_lines: usize,
222 dense_dir_threshold: usize,
223 terminal_timeout: Duration,
224 recent_hash_limit: usize,
225}
226
227impl<VFS, Terminal> Engine<VFS, Terminal>
228where
229 VFS: VirtualFileSystem,
230 Terminal: TerminalProvider,
231{
232 pub fn new(vfs: VFS, terminal: Terminal) -> Self {
233 Self {
234 vfs,
235 terminal,
236 known_hashes: HashMap::new(),
237 pending: None,
238 max_list_lines: 300,
239 dense_dir_threshold: 200,
240 terminal_timeout: Duration::from_secs(5),
241 recent_hash_limit: 5,
242 }
243 }
244
245 pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
246 self.terminal_timeout = timeout;
247 self
248 }
249
250 pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
251 self.max_list_lines = max_lines;
252 self.dense_dir_threshold = dense_threshold;
253 self
254 }
255
256 pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
257 let mut outputs = Vec::new();
258
259 for instruction in instructions {
260 self.apply_instruction(instruction, &mut outputs)?;
261 }
262
263 let state_header = StateHeader {
264 cwd: self.vfs.cwd()?,
265 recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
266 active_pids: {
267 let mut pids = self.terminal.active_pids();
268 pids.sort_unstable();
269 pids
270 },
271 };
272
273 Ok(TurnResult {
274 state_header,
275 outputs,
276 })
277 }
278
279 fn apply_instruction(
280 &mut self,
281 instruction: Instruction,
282 outputs: &mut Vec<EngineOutput>,
283 ) -> Result<()> {
284 match instruction {
285 Instruction::Text(_) => {}
286 Instruction::StartTag { name, attributes } => {
287 self.handle_start_tag(&name, attributes, outputs)?;
288 }
289 Instruction::EndTag(name) => {
290 self.handle_end_tag(&name, outputs)?;
291 }
292 Instruction::WriteChunk(bytes) => {
293 let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
294 return Err(EngineError::InvalidState(
295 "received WriteChunk outside <write_file>".to_string(),
296 ));
297 };
298 write.buffer.extend_from_slice(&bytes);
299 }
300 Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
301 Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
302 Some(ApplyCapture::Search) if tag == "search" => {
303 edit.search.extend_from_slice(&bytes);
304 }
305 Some(ApplyCapture::Replace) if tag == "replace" => {
306 edit.replace.extend_from_slice(&bytes);
307 }
308 _ => {
309 return Err(EngineError::InvalidState(format!(
310 "unexpected raw chunk for <{tag}> while applying edit"
311 )));
312 }
313 },
314 Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
315 term.command.extend_from_slice(&bytes);
316 }
317 _ => {
318 return Err(EngineError::InvalidState(format!(
319 "received raw chunk for <{tag}> without active matching context"
320 )));
321 }
322 },
323 }
324
325 Ok(())
326 }
327
328 fn handle_start_tag(
329 &mut self,
330 name: &str,
331 attributes: Attributes,
332 outputs: &mut Vec<EngineOutput>,
333 ) -> Result<()> {
334 match name {
335 "write_file" => {
336 self.ensure_no_pending("write_file")?;
337 let path = required_path(&attributes)?;
338 self.pending = Some(PendingOperation::WriteFile(PendingWrite {
339 path,
340 buffer: Vec::new(),
341 }));
342 }
343 "read_file" => {
344 let output = self.execute_read_file(&attributes)?;
345 outputs.push(EngineOutput::ReadFile(output));
346 }
347 "apply_edit" => {
348 self.ensure_no_pending("apply_edit")?;
349 let path = required_path(&attributes)?;
350 self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
351 path,
352 search: Vec::new(),
353 replace: Vec::new(),
354 capture: None,
355 }));
356 }
357 "search" => {
358 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
359 return Err(EngineError::InvalidState(
360 "<search> must be nested under <apply_edit>".to_string(),
361 ));
362 };
363 edit.capture = Some(ApplyCapture::Search);
364 }
365 "replace" => {
366 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
367 return Err(EngineError::InvalidState(
368 "<replace> must be nested under <apply_edit>".to_string(),
369 ));
370 };
371 edit.capture = Some(ApplyCapture::Replace);
372 }
373 "list_files" => {
374 let output = self.execute_list_files(&attributes)?;
375 outputs.push(EngineOutput::ListFiles(output));
376 }
377 "terminal" => {
378 self.ensure_no_pending("terminal")?;
379 self.pending = Some(PendingOperation::Terminal(PendingTerminal {
380 command: Vec::new(),
381 }));
382 }
383 "terminal_signal" => {
384 let signal_output = self.execute_terminal_signal(&attributes)?;
385 outputs.push(EngineOutput::Signal(signal_output));
386 }
387 _ => {}
388 }
389
390 Ok(())
391 }
392
393 fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
394 match name {
395 "write_file" => {
396 let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
397 return Err(EngineError::InvalidState(
398 "</write_file> received without matching start".to_string(),
399 ));
400 };
401 let output = self.finalize_write(write)?;
402 outputs.push(EngineOutput::WriteFile(output));
403 }
404 "search" | "replace" => {
405 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
406 return Err(EngineError::InvalidState(format!(
407 "</{name}> received without active <apply_edit>"
408 )));
409 };
410 edit.capture = None;
411 }
412 "apply_edit" => {
413 let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
414 return Err(EngineError::InvalidState(
415 "</apply_edit> received without matching start".to_string(),
416 ));
417 };
418 let output = self.finalize_apply_edit(edit)?;
419 outputs.push(EngineOutput::ApplyEdit(output));
420 }
421 "terminal" => {
422 let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
423 return Err(EngineError::InvalidState(
424 "</terminal> received without matching start".to_string(),
425 ));
426 };
427 let command =
428 String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
429 let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
430 outputs.push(EngineOutput::Terminal(output));
431 }
432 _ => {}
433 }
434
435 Ok(())
436 }
437
438 fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
439 self.vfs.write_atomic(&write.path, &write.buffer)?;
440 let sha256 = self.vfs.hash(&write.path)?;
441 self.known_hashes.insert(write.path.clone(), sha256.clone());
442
443 Ok(WriteFileOutput {
444 path: write.path,
445 size_bytes: write.buffer.len(),
446 sha256,
447 })
448 }
449
450 fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
451 let path = required_path(attributes)?;
452 let requested_start = optional_usize(attributes, "start_line")?
453 .unwrap_or(1)
454 .max(1);
455 let requested_end =
456 optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
457
458 let bytes = self.vfs.read(&path)?;
459 let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
460 let sha256 = self.vfs.hash(&path)?;
461 self.known_hashes.insert(path.clone(), sha256.clone());
462
463 let all_lines: Vec<&str> = text.lines().collect();
464 let total_lines = all_lines.len();
465
466 let (served_start, served_end, warning) = if total_lines == 0 {
467 (0, 0, Some("file is empty; returning no lines".to_string()))
468 } else {
469 let served_start = requested_start.min(total_lines);
470 let served_end = requested_end.max(served_start).min(total_lines);
471 let warning = if served_start != requested_start || served_end != requested_end {
472 Some(format!(
473 "requested lines {}-{} adjusted to {}-{} (file has {} lines)",
474 requested_start, requested_end, served_start, served_end, total_lines
475 ))
476 } else {
477 None
478 };
479 (served_start, served_end, warning)
480 };
481
482 let body = if total_lines == 0 {
483 String::new()
484 } else {
485 let mut rendered = Vec::new();
486 for line_idx in served_start..=served_end {
487 let content = all_lines[line_idx - 1];
488 rendered.push(format!("[{line_idx}] {content}"));
489 }
490 rendered.join("\n")
491 };
492
493 Ok(ReadFileOutput {
494 path,
495 size_bytes: bytes.len(),
496 sha256,
497 requested_start,
498 requested_end,
499 served_start,
500 served_end,
501 total_lines,
502 body,
503 warning,
504 })
505 }
506
507 fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
508 let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
509 let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
510
511 if let Some(previous_hash) = self.known_hashes.get(&edit.path) {
512 let current_hash = self.vfs.hash(&edit.path)?;
513 if previous_hash != ¤t_hash {
514 return Ok(ApplyEditOutput {
515 path: edit.path,
516 applied: false,
517 tier: EditTier::NotApplied,
518 sha256: None,
519 warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
520 });
521 }
522 }
523
524 let original_bytes = self.vfs.read(&edit.path)?;
525 let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
526
527 let Some((rewritten, tier)) = apply_edit_with_tiers(&original, &search, &replace) else {
528 return Ok(ApplyEditOutput {
529 path: edit.path,
530 applied: false,
531 tier: EditTier::NotApplied,
532 sha256: None,
533 warning: Some("no suitable target block found for apply_edit".to_string()),
534 });
535 };
536
537 self.vfs.write_atomic(&edit.path, rewritten.as_bytes())?;
538 let sha256 = self.vfs.hash(&edit.path)?;
539 self.known_hashes.insert(edit.path.clone(), sha256.clone());
540
541 Ok(ApplyEditOutput {
542 path: edit.path,
543 applied: true,
544 tier,
545 sha256: Some(sha256),
546 warning: None,
547 })
548 }
549
550 fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
551 let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
552 let mut nodes = self.vfs.list_tree(&path)?;
553 nodes.sort_by(|a, b| a.path.cmp(&b.path));
554
555 let mut lines = Vec::new();
556
557 for node in nodes {
558 if lines.len() >= self.max_list_lines {
559 lines.push("[... truncated due to token budget ...]".to_string());
560 break;
561 }
562
563 let mut line = match node.kind {
564 NodeKind::Directory => {
565 if node.descendant_file_count >= self.dense_dir_threshold {
566 format!(
567 "[dir] {}/ ({} files, omitted)",
568 node.path.display(),
569 node.descendant_file_count
570 )
571 } else {
572 format!("[dir] {}/", node.path.display())
573 }
574 }
575 NodeKind::File => format!("[file] {}", node.path.display()),
576 };
577
578 if node.modified_recently {
579 line.push_str(" (*)");
580 }
581
582 lines.push(line);
583 }
584
585 Ok(ListFilesOutput { path, lines })
586 }
587
588 fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
589 let pid_value = required_attr(attributes, "pid")?;
590 let pid = pid_value
591 .parse::<u32>()
592 .map_err(|_| EngineError::InvalidInteger {
593 name: "pid",
594 value: pid_value.to_string(),
595 })?;
596 let signal = match attributes
597 .get("signal")
598 .map(|v| v.to_ascii_uppercase())
599 .unwrap_or_else(|| "SIGINT".to_string())
600 .as_str()
601 {
602 "SIGINT" => ProcessSignal::SigInt,
603 "SIGTERM" => ProcessSignal::SigTerm,
604 "SIGKILL" => ProcessSignal::SigKill,
605 other => {
606 return Err(EngineError::InvalidState(format!(
607 "unsupported signal `{other}`"
608 )));
609 }
610 };
611
612 self.terminal.signal(pid, signal)?;
613 Ok(SignalOutput { pid, signal })
614 }
615
616 fn ensure_no_pending(&self, next: &str) -> Result<()> {
617 if self.pending.is_some() {
618 return Err(EngineError::InvalidState(format!(
619 "cannot start <{next}> while another command block is still open"
620 )));
621 }
622 Ok(())
623 }
624}
625
626#[derive(Debug)]
627enum PendingOperation {
628 WriteFile(PendingWrite),
629 ApplyEdit(PendingApplyEdit),
630 Terminal(PendingTerminal),
631}
632
633#[derive(Debug)]
634struct PendingWrite {
635 path: PathBuf,
636 buffer: Vec<u8>,
637}
638
639#[derive(Debug)]
640struct PendingApplyEdit {
641 path: PathBuf,
642 search: Vec<u8>,
643 replace: Vec<u8>,
644 capture: Option<ApplyCapture>,
645}
646
647#[derive(Debug, Clone, Copy)]
648enum ApplyCapture {
649 Search,
650 Replace,
651}
652
653#[derive(Debug)]
654struct PendingTerminal {
655 command: Vec<u8>,
656}
657
658#[derive(Debug, Clone, Copy, PartialEq, Eq)]
659pub enum Capability {
660 WriteFile,
661 ApplyEdit,
662 ReadFile,
663 ListFiles,
664 Terminal,
665}
666
667impl Capability {
668 fn tag(&self) -> &'static str {
669 match self {
670 Capability::WriteFile => "<write_file path=\"...\"></write_file>",
671 Capability::ApplyEdit => {
672 "<apply_edit path=\"...\"><search>...</search><replace>...</replace></apply_edit>"
673 }
674 Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
675 Capability::ListFiles => "<list_files path=\"...\" />",
676 Capability::Terminal => "<terminal>...</terminal>",
677 }
678 }
679}
680
681pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
682 let enabled: Vec<Capability> = if capabilities.is_empty() {
683 vec![
684 Capability::WriteFile,
685 Capability::ApplyEdit,
686 Capability::ReadFile,
687 Capability::ListFiles,
688 Capability::Terminal,
689 ]
690 } else {
691 capabilities.to_vec()
692 };
693
694 let mut out = String::from(
695 "You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
696 );
697 out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
698 out.push_str("Available Commands:\n");
699 for capability in &enabled {
700 out.push_str(capability.tag());
701 out.push('\n');
702 }
703 out.push_str("\nRules:\n");
704 out.push_str("1. Do not escape strings inside tags.\n");
705 out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
706 out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
707 out
708}
709
710fn required_path(attributes: &Attributes) -> Result<PathBuf> {
711 optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
712}
713
714fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
715 Ok(attributes.get(key).map(PathBuf::from))
716}
717
718fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
719 attributes
720 .get(key)
721 .map(|value| value.as_str())
722 .ok_or(EngineError::MissingAttribute(key))
723}
724
725fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
726 let Some(value) = attributes.get(key) else {
727 return Ok(None);
728 };
729 let parsed = value
730 .parse::<usize>()
731 .map_err(|_| EngineError::InvalidInteger {
732 name: key,
733 value: value.clone(),
734 })?;
735 Ok(Some(parsed))
736}
737
738fn shorten_hash(hash: &str) -> String {
739 hash.chars().take(8).collect()
740}
741
742fn human_bytes(bytes: usize) -> String {
743 const KB: f64 = 1024.0;
744 const MB: f64 = KB * 1024.0;
745
746 let bytes_f = bytes as f64;
747 if bytes_f >= MB {
748 format!("{:.1}mb", bytes_f / MB)
749 } else if bytes_f >= KB {
750 format!("{:.1}kb", bytes_f / KB)
751 } else {
752 format!("{bytes}b")
753 }
754}
755
756fn apply_edit_with_tiers(
757 original: &str,
758 search: &str,
759 replace: &str,
760) -> Option<(String, EditTier)> {
761 if search.is_empty() {
762 return None;
763 }
764
765 if let Some(output) = apply_exact(original, search, replace) {
766 return Some((output, EditTier::Exact));
767 }
768
769 if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
770 return Some((output, EditTier::WhitespaceAgnostic));
771 }
772
773 apply_contextual_anchor(original, search, replace)
774 .map(|output| (output, EditTier::ContextualAnchor))
775}
776
777fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
778 let idx = original.find(search)?;
779 let mut out = String::with_capacity(original.len() + replace.len());
780 out.push_str(&original[..idx]);
781 out.push_str(replace);
782 out.push_str(&original[idx + search.len()..]);
783 Some(out)
784}
785
786fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
787 let original_lines = collect_line_spans(original);
788 let search_lines: Vec<&str> = search.lines().collect();
789 if search_lines.is_empty() || original_lines.len() < search_lines.len() {
790 return None;
791 }
792
793 for start in 0..=original_lines.len() - search_lines.len() {
794 let window = &original_lines[start..start + search_lines.len()];
795 if window
796 .iter()
797 .zip(search_lines.iter())
798 .all(|(candidate, target)| candidate.text.trim() == target.trim())
799 {
800 let range_start = window.first()?.start;
801 let range_end = window.last()?.end;
802 return Some(splice(original, range_start, range_end, replace));
803 }
804 }
805
806 None
807}
808
809fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
810 let original_lines = collect_line_spans(original);
811 let search_lines: Vec<&str> = search.lines().collect();
812 if search_lines.is_empty() || original_lines.is_empty() {
813 return None;
814 }
815
816 let window_len = search_lines.len().min(original_lines.len());
817 let normalized_search = normalize_for_distance(search);
818 let mut best: Option<(usize, usize, usize)> = None;
819
820 for start in 0..=original_lines.len() - window_len {
821 let window = &original_lines[start..start + window_len];
822 let joined = window
823 .iter()
824 .map(|line| line.text)
825 .collect::<Vec<_>>()
826 .join("\n");
827 let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
828
829 match best {
830 Some((best_score, _, _)) if score >= best_score => {}
831 _ => best = Some((score, start, start + window_len - 1)),
832 }
833 }
834
835 let (score, line_start, line_end) = best?;
836 let threshold = normalized_search.len().max(6) / 3;
837 if score > threshold {
838 return None;
839 }
840
841 let range_start = original_lines[line_start].start;
842 let range_end = original_lines[line_end].end;
843 Some(splice(original, range_start, range_end, replace))
844}
845
846fn normalize_for_distance(input: &str) -> String {
847 input
848 .lines()
849 .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
850 .collect::<Vec<_>>()
851 .join("\n")
852 .trim()
853 .to_string()
854}
855
856fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
857 let mut out = String::with_capacity(original.len() + replace.len());
858 out.push_str(&original[..range_start]);
859 out.push_str(replace);
860 out.push_str(&original[range_end..]);
861 out
862}
863
864#[derive(Debug)]
865struct LineSpan<'a> {
866 start: usize,
867 end: usize,
868 text: &'a str,
869}
870
871fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
872 let mut spans = Vec::new();
873 let mut offset = 0usize;
874
875 for chunk in input.split_inclusive('\n') {
876 let end = offset + chunk.len();
877 let text = chunk.strip_suffix('\n').unwrap_or(chunk);
878 spans.push(LineSpan {
879 start: offset,
880 end,
881 text,
882 });
883 offset = end;
884 }
885
886 if input.is_empty() {
887 return spans;
888 }
889
890 if !input.ends_with('\n') {
891 if let Some(last) = spans.last_mut() {
892 last.end = input.len();
893 }
894 }
895
896 spans
897}
898
899#[cfg(test)]
900mod tests {
901 use super::*;
902 use std::collections::BTreeMap;
903 use std::sync::{Arc, Mutex};
904
905 #[derive(Clone, Default)]
906 struct InMemoryVfs {
907 files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
908 tree: Arc<Mutex<Vec<TreeNode>>>,
909 }
910
911 impl InMemoryVfs {
912 fn set_file(&self, path: &str, body: &str) {
913 self.files
914 .lock()
915 .expect("lock")
916 .insert(PathBuf::from(path), body.as_bytes().to_vec());
917 }
918
919 fn get_file(&self, path: &str) -> String {
920 String::from_utf8(
921 self.files
922 .lock()
923 .expect("lock")
924 .get(&PathBuf::from(path))
925 .cloned()
926 .unwrap_or_default(),
927 )
928 .expect("utf8")
929 }
930
931 fn set_tree(&self, nodes: Vec<TreeNode>) {
932 *self.tree.lock().expect("lock") = nodes;
933 }
934 }
935
936 impl VirtualFileSystem for InMemoryVfs {
937 fn read(&self, path: &Path) -> Result<Vec<u8>> {
938 self.files
939 .lock()
940 .expect("lock")
941 .get(path)
942 .cloned()
943 .ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
944 }
945
946 fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
947 self.files
948 .lock()
949 .expect("lock")
950 .insert(path.to_path_buf(), bytes.to_vec());
951 Ok(())
952 }
953
954 fn hash(&self, path: &Path) -> Result<String> {
955 let bytes = self.read(path)?;
956 Ok(simple_hash(&bytes))
957 }
958
959 fn cwd(&self) -> Result<PathBuf> {
960 Ok(PathBuf::from("/virtual"))
961 }
962
963 fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
964 Ok(self.tree.lock().expect("lock").clone())
965 }
966
967 fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
968 let files = self.files.lock().expect("lock");
969 let mut entries: Vec<_> = files
970 .iter()
971 .map(|(path, body)| FileHash {
972 path: path.clone(),
973 sha256: simple_hash(body),
974 })
975 .collect();
976 entries.sort_by(|a, b| a.path.cmp(&b.path));
977 entries.truncate(limit);
978 Ok(entries)
979 }
980 }
981
982 #[derive(Default)]
983 struct MockTerminal {
984 pids: Vec<u32>,
985 last_command: Option<String>,
986 }
987
988 impl TerminalProvider for MockTerminal {
989 fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
990 self.last_command = Some(command.to_string());
991 Ok(TerminalExecution {
992 output: format!("ran: {command}"),
993 exit_code: Some(0),
994 cwd: PathBuf::from("/virtual"),
995 detached_pid: None,
996 })
997 }
998
999 fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
1000 self.pids.retain(|existing| *existing != pid);
1001 Ok(())
1002 }
1003
1004 fn active_pids(&self) -> Vec<u32> {
1005 self.pids.clone()
1006 }
1007 }
1008
1009 #[test]
1010 fn write_file_chunks_commit_atomically() {
1011 let vfs = InMemoryVfs::default();
1012 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1013
1014 let turn = engine
1015 .execute_turn(vec![
1016 Instruction::StartTag {
1017 name: "write_file".to_string(),
1018 attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
1019 },
1020 Instruction::WriteChunk(b"fn main()".to_vec()),
1021 Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
1022 Instruction::EndTag("write_file".to_string()),
1023 ])
1024 .expect("turn should run");
1025
1026 assert_eq!(
1027 vfs.get_file("src/main.rs"),
1028 "fn main() { println!(\"ok\"); }"
1029 );
1030 assert!(matches!(
1031 turn.outputs.as_slice(),
1032 [EngineOutput::WriteFile(WriteFileOutput { .. })]
1033 ));
1034 }
1035
1036 #[test]
1037 fn read_file_returns_fidelity_header_and_numbered_lines() {
1038 let vfs = InMemoryVfs::default();
1039 vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
1040 let mut engine = Engine::new(vfs, MockTerminal::default());
1041
1042 let turn = engine
1043 .execute_turn(vec![Instruction::StartTag {
1044 name: "read_file".to_string(),
1045 attributes: BTreeMap::from([
1046 ("path".to_string(), "src/lib.rs".to_string()),
1047 ("start_line".to_string(), "3".to_string()),
1048 ("end_line".to_string(), "9".to_string()),
1049 ]),
1050 }])
1051 .expect("turn should run");
1052
1053 let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
1054 panic!("expected read output");
1055 };
1056
1057 assert_eq!(output.served_start, 3);
1058 assert_eq!(output.served_end, 4);
1059 assert_eq!(output.body, "[3] c\n[4] d");
1060 assert!(output.warning.is_some());
1061 assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
1062 }
1063
1064 #[test]
1065 fn apply_edit_uses_whitespace_agnostic_matching() {
1066 let vfs = InMemoryVfs::default();
1067 vfs.set_file("src/lib.rs", "fn main() {\n println!(\"x\");\n}\n");
1068 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1069
1070 let turn = engine
1071 .execute_turn(vec![
1072 Instruction::StartTag {
1073 name: "apply_edit".to_string(),
1074 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1075 },
1076 Instruction::StartTag {
1077 name: "search".to_string(),
1078 attributes: BTreeMap::new(),
1079 },
1080 Instruction::RawChunk {
1081 tag: "search".to_string(),
1082 bytes: b" println!(\"x\"); ".to_vec(),
1083 },
1084 Instruction::EndTag("search".to_string()),
1085 Instruction::StartTag {
1086 name: "replace".to_string(),
1087 attributes: BTreeMap::new(),
1088 },
1089 Instruction::RawChunk {
1090 tag: "replace".to_string(),
1091 bytes: b"println!(\"y\");".to_vec(),
1092 },
1093 Instruction::EndTag("replace".to_string()),
1094 Instruction::EndTag("apply_edit".to_string()),
1095 ])
1096 .expect("turn should run");
1097
1098 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1099 panic!("expected apply_edit output");
1100 };
1101
1102 assert!(edit.applied);
1103 assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
1104 assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
1105 }
1106
1107 #[test]
1108 fn apply_edit_warns_if_file_changed_since_last_read() {
1109 let vfs = InMemoryVfs::default();
1110 vfs.set_file("src/lib.rs", "alpha\nbeta\n");
1111 let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1112
1113 let _ = engine
1114 .execute_turn(vec![Instruction::StartTag {
1115 name: "read_file".to_string(),
1116 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1117 }])
1118 .expect("read should work");
1119
1120 vfs.set_file("src/lib.rs", "external\nchange\n");
1121
1122 let turn = engine
1123 .execute_turn(vec![
1124 Instruction::StartTag {
1125 name: "apply_edit".to_string(),
1126 attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1127 },
1128 Instruction::StartTag {
1129 name: "search".to_string(),
1130 attributes: BTreeMap::new(),
1131 },
1132 Instruction::RawChunk {
1133 tag: "search".to_string(),
1134 bytes: b"beta".to_vec(),
1135 },
1136 Instruction::EndTag("search".to_string()),
1137 Instruction::StartTag {
1138 name: "replace".to_string(),
1139 attributes: BTreeMap::new(),
1140 },
1141 Instruction::RawChunk {
1142 tag: "replace".to_string(),
1143 bytes: b"gamma".to_vec(),
1144 },
1145 Instruction::EndTag("replace".to_string()),
1146 Instruction::EndTag("apply_edit".to_string()),
1147 ])
1148 .expect("apply should run");
1149
1150 let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1151 panic!("expected apply_edit output");
1152 };
1153
1154 assert!(!edit.applied);
1155 assert_eq!(edit.tier, EditTier::NotApplied);
1156 assert!(
1157 edit.warning
1158 .as_deref()
1159 .unwrap_or_default()
1160 .contains("File modified externally")
1161 );
1162 }
1163
1164 #[test]
1165 fn list_files_omits_dense_directories_and_marks_recent() {
1166 let vfs = InMemoryVfs::default();
1167 vfs.set_tree(vec![
1168 TreeNode {
1169 path: PathBuf::from("src"),
1170 kind: NodeKind::Directory,
1171 descendant_file_count: 3,
1172 modified_recently: false,
1173 },
1174 TreeNode {
1175 path: PathBuf::from("src/lib.rs"),
1176 kind: NodeKind::File,
1177 descendant_file_count: 0,
1178 modified_recently: true,
1179 },
1180 TreeNode {
1181 path: PathBuf::from("node_modules"),
1182 kind: NodeKind::Directory,
1183 descendant_file_count: 2400,
1184 modified_recently: false,
1185 },
1186 ]);
1187
1188 let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
1189 let turn = engine
1190 .execute_turn(vec![Instruction::StartTag {
1191 name: "list_files".to_string(),
1192 attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
1193 }])
1194 .expect("list should run");
1195
1196 let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
1197 panic!("expected list output");
1198 };
1199
1200 assert!(
1201 output
1202 .lines
1203 .iter()
1204 .any(|line| line.contains("node_modules") && line.contains("omitted"))
1205 );
1206 assert!(output.lines.iter().any(|line| line.contains("(*)")));
1207 }
1208
1209 #[test]
1210 fn terminal_executes_command_and_reports_state_header() {
1211 let vfs = InMemoryVfs::default();
1212 let terminal = MockTerminal {
1213 pids: vec![42, 7],
1214 ..Default::default()
1215 };
1216
1217 let mut engine = Engine::new(vfs, terminal);
1218 let turn = engine
1219 .execute_turn(vec![
1220 Instruction::StartTag {
1221 name: "terminal".to_string(),
1222 attributes: BTreeMap::new(),
1223 },
1224 Instruction::RawChunk {
1225 tag: "terminal".to_string(),
1226 bytes: b"echo hi".to_vec(),
1227 },
1228 Instruction::EndTag("terminal".to_string()),
1229 ])
1230 .expect("terminal turn should run");
1231
1232 assert!(matches!(
1233 turn.outputs.as_slice(),
1234 [EngineOutput::Terminal(TerminalExecution { .. })]
1235 ));
1236 assert_eq!(turn.state_header.active_pids, vec![7, 42]);
1237 assert!(turn.state_header.render().contains("CWD: /virtual"));
1238 }
1239
1240 #[test]
1241 fn system_prompt_includes_enabled_commands() {
1242 let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
1243 assert!(prompt.contains("<read_file"));
1244 assert!(prompt.contains("<terminal>"));
1245 assert!(!prompt.contains("<write_file path"));
1246 }
1247
1248 fn simple_hash(input: &[u8]) -> String {
1249 let mut acc: u64 = 1469598103934665603;
1250 for b in input {
1251 acc ^= *b as u64;
1252 acc = acc.wrapping_mul(1099511628211);
1253 }
1254 format!("{acc:016x}")
1255 }
1256}