use std::collections::HashMap;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use std::time::Duration;
use sa3p_parser::{Attributes, Instruction};
use strsim::levenshtein;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum EngineError {
#[error("missing required attribute `{0}`")]
MissingAttribute(&'static str),
#[error("invalid integer for attribute `{name}`: {value}")]
InvalidInteger { name: &'static str, value: String },
#[error("operation is out of order: {0}")]
InvalidState(String),
#[error("invalid utf-8 payload")]
InvalidUtf8,
#[error("virtual filesystem error: {0}")]
Vfs(String),
#[error("terminal error: {0}")]
Terminal(String),
}
pub type Result<T> = std::result::Result<T, EngineError>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileHash {
pub path: PathBuf,
pub sha256: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeKind {
File,
Directory,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TreeNode {
pub path: PathBuf,
pub kind: NodeKind,
pub descendant_file_count: usize,
pub modified_recently: bool,
}
pub trait VirtualFileSystem {
fn read(&self, path: &Path) -> Result<Vec<u8>>;
fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
fn hash(&self, path: &Path) -> Result<String>;
fn cwd(&self) -> Result<PathBuf>;
fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessSignal {
SigInt,
SigTerm,
SigKill,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TerminalExecution {
pub output: String,
pub exit_code: Option<i32>,
pub cwd: PathBuf,
pub detached_pid: Option<u32>,
}
pub trait TerminalProvider {
fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
fn active_pids(&self) -> Vec<u32>;
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StateHeader {
pub cwd: PathBuf,
pub recent_hashes: Vec<FileHash>,
pub active_pids: Vec<u32>,
}
impl StateHeader {
pub fn render(&self) -> String {
let mut out = String::new();
let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
if self.recent_hashes.is_empty() {
out.push_str(" | RECENT_HASHES: none");
} else {
out.push_str(" | RECENT_HASHES: ");
for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
if idx > 0 {
out.push_str(", ");
}
let _ = write!(
&mut out,
"{}#{}",
file_hash.path.display(),
shorten_hash(&file_hash.sha256)
);
}
}
if self.active_pids.is_empty() {
out.push_str(" | ACTIVE_PIDS: none]");
} else {
out.push_str(" | ACTIVE_PIDS: ");
for (idx, pid) in self.active_pids.iter().enumerate() {
if idx > 0 {
out.push_str(", ");
}
let _ = write!(&mut out, "{pid}");
}
out.push(']');
}
out
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TurnResult {
pub state_header: StateHeader,
pub outputs: Vec<EngineOutput>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EngineOutput {
ReadFile(ReadFileOutput),
WriteFile(WriteFileOutput),
ApplyEdit(ApplyEditOutput),
ListFiles(ListFilesOutput),
Terminal(TerminalExecution),
Signal(SignalOutput),
Warning(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReadFileOutput {
pub path: PathBuf,
pub size_bytes: usize,
pub sha256: String,
pub requested_start: usize,
pub requested_end: usize,
pub served_start: usize,
pub served_end: usize,
pub total_lines: usize,
pub body: String,
pub warning: Option<String>,
}
impl ReadFileOutput {
pub fn fidelity_header(&self) -> String {
format!(
"[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
self.path.display(),
human_bytes(self.size_bytes),
shorten_hash(&self.sha256),
self.served_start,
self.served_end,
self.total_lines
)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WriteFileOutput {
pub path: PathBuf,
pub size_bytes: usize,
pub sha256: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EditTier {
Exact,
WhitespaceAgnostic,
ContextualAnchor,
NotApplied,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ApplyEditOutput {
pub path: PathBuf,
pub applied: bool,
pub tier: EditTier,
pub sha256: Option<String>,
pub format: Option<String>,
pub reason_code: Option<String>,
pub warning: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ListFilesOutput {
pub path: PathBuf,
pub lines: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SignalOutput {
pub pid: u32,
pub signal: ProcessSignal,
}
pub const DEFAULT_MAX_LIST_LINES: usize = 300;
pub const DEFAULT_DENSE_DIR_THRESHOLD: usize = 200;
pub const DEFAULT_TERMINAL_TIMEOUT_SECS: u64 = 5;
pub const DEFAULT_RECENT_HASH_LIMIT: usize = 5;
pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS: &str = "search_replace_tags";
pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS: &str = "search_replace_markers";
pub const APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS: &str = "search_replace_xml_blocks";
pub const APPLY_EDIT_FORMAT_UNIFIED_DIFF: &str = "unified_diff";
pub const APPLY_EDIT_FORMAT_RAW_TEXT: &str = "raw_text";
pub const APPLY_EDIT_REASON_STALE_HASH: &str = "stale_hash";
pub const APPLY_EDIT_REASON_EMPTY_EDIT: &str = "empty_edit";
pub const APPLY_EDIT_REASON_PARSE_ERROR: &str = "parse_error";
pub const APPLY_EDIT_REASON_NO_HUNKS: &str = "no_hunks";
pub const APPLY_EDIT_REASON_NO_MATCH: &str = "no_match";
pub const STATE_HEADER_FIELD_CWD: &str = "cwd";
pub const STATE_HEADER_FIELD_RECENT_HASHES: &str = "recent_hashes";
pub const STATE_HEADER_FIELD_ACTIVE_PIDS: &str = "active_pids";
pub const APPLY_EDIT_SUPPORTED_FORMATS: &[&str] = &[
APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS,
APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS,
APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS,
APPLY_EDIT_FORMAT_UNIFIED_DIFF,
];
pub const APPLY_EDIT_REASON_CODES: &[&str] = &[
APPLY_EDIT_REASON_STALE_HASH,
APPLY_EDIT_REASON_EMPTY_EDIT,
APPLY_EDIT_REASON_PARSE_ERROR,
APPLY_EDIT_REASON_NO_HUNKS,
APPLY_EDIT_REASON_NO_MATCH,
];
pub const STATE_HEADER_FIELDS: &[&str] = &[
STATE_HEADER_FIELD_CWD,
STATE_HEADER_FIELD_RECENT_HASHES,
STATE_HEADER_FIELD_ACTIVE_PIDS,
];
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CapabilityDescriptor {
pub name: &'static str,
pub tag: &'static str,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CapabilityManifest {
pub commands: Vec<CapabilityDescriptor>,
pub apply_edit_formats: Vec<&'static str>,
pub apply_edit_reason_codes: Vec<&'static str>,
pub apply_edit_tiers: Vec<EditTier>,
pub state_header_fields: Vec<&'static str>,
pub max_list_lines: usize,
pub dense_dir_threshold: usize,
pub terminal_timeout: Duration,
pub recent_hash_limit: usize,
}
#[derive(Debug)]
pub struct Engine<VFS, Terminal> {
vfs: VFS,
terminal: Terminal,
known_hashes: HashMap<PathBuf, String>,
pending: Option<PendingOperation>,
max_list_lines: usize,
dense_dir_threshold: usize,
terminal_timeout: Duration,
recent_hash_limit: usize,
}
impl<VFS, Terminal> Engine<VFS, Terminal>
where
VFS: VirtualFileSystem,
Terminal: TerminalProvider,
{
pub fn new(vfs: VFS, terminal: Terminal) -> Self {
Self {
vfs,
terminal,
known_hashes: HashMap::new(),
pending: None,
max_list_lines: DEFAULT_MAX_LIST_LINES,
dense_dir_threshold: DEFAULT_DENSE_DIR_THRESHOLD,
terminal_timeout: Duration::from_secs(DEFAULT_TERMINAL_TIMEOUT_SECS),
recent_hash_limit: DEFAULT_RECENT_HASH_LIMIT,
}
}
pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
self.terminal_timeout = timeout;
self
}
pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
self.max_list_lines = max_lines;
self.dense_dir_threshold = dense_threshold;
self
}
pub fn capability_manifest(&self) -> CapabilityManifest {
self.capability_manifest_for(DEFAULT_CAPABILITIES)
}
pub fn capability_manifest_for(&self, capabilities: &[Capability]) -> CapabilityManifest {
let enabled = if capabilities.is_empty() {
DEFAULT_CAPABILITIES.to_vec()
} else {
capabilities.to_vec()
};
CapabilityManifest {
commands: enabled
.iter()
.map(|capability| CapabilityDescriptor {
name: capability.name(),
tag: capability.tag(),
})
.collect(),
apply_edit_formats: APPLY_EDIT_SUPPORTED_FORMATS.to_vec(),
apply_edit_reason_codes: APPLY_EDIT_REASON_CODES.to_vec(),
apply_edit_tiers: vec![
EditTier::Exact,
EditTier::WhitespaceAgnostic,
EditTier::ContextualAnchor,
EditTier::NotApplied,
],
state_header_fields: STATE_HEADER_FIELDS.to_vec(),
max_list_lines: self.max_list_lines,
dense_dir_threshold: self.dense_dir_threshold,
terminal_timeout: self.terminal_timeout,
recent_hash_limit: self.recent_hash_limit,
}
}
pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
let mut outputs = Vec::new();
for instruction in instructions {
self.apply_instruction(instruction, &mut outputs)?;
}
let state_header = StateHeader {
cwd: self.vfs.cwd()?,
recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
active_pids: {
let mut pids = self.terminal.active_pids();
pids.sort_unstable();
pids
},
};
Ok(TurnResult {
state_header,
outputs,
})
}
fn apply_instruction(
&mut self,
instruction: Instruction,
outputs: &mut Vec<EngineOutput>,
) -> Result<()> {
match instruction {
Instruction::Text(text) => {
if let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() {
if edit.capture.is_none() {
edit.raw_body.extend_from_slice(text.as_bytes());
}
}
}
Instruction::StartTag { name, attributes } => {
self.handle_start_tag(&name, attributes, outputs)?;
}
Instruction::EndTag(name) => {
self.handle_end_tag(&name, outputs)?;
}
Instruction::WriteChunk(bytes) => {
let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
return Err(EngineError::InvalidState(
"received WriteChunk outside <write_file>".to_string(),
));
};
write.buffer.extend_from_slice(&bytes);
}
Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
Some(ApplyCapture::Search) if tag == "search" => {
edit.search.extend_from_slice(&bytes);
}
Some(ApplyCapture::Replace) if tag == "replace" => {
edit.replace.extend_from_slice(&bytes);
}
None => {
edit.raw_body.extend_from_slice(&bytes);
}
_ => {
return Err(EngineError::InvalidState(format!(
"unexpected raw chunk for <{tag}> while applying edit"
)));
}
},
Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
term.command.extend_from_slice(&bytes);
}
_ => {
return Err(EngineError::InvalidState(format!(
"received raw chunk for <{tag}> without active matching context"
)));
}
},
}
Ok(())
}
fn handle_start_tag(
&mut self,
name: &str,
attributes: Attributes,
outputs: &mut Vec<EngineOutput>,
) -> Result<()> {
match name {
"write_file" => {
self.ensure_no_pending("write_file")?;
let path = required_path(&attributes)?;
self.pending = Some(PendingOperation::WriteFile(PendingWrite {
path,
buffer: Vec::new(),
}));
}
"read_file" => {
let output = self.execute_read_file(&attributes)?;
outputs.push(EngineOutput::ReadFile(output));
}
"apply_edit" => {
self.ensure_no_pending("apply_edit")?;
let path = required_path(&attributes)?;
self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
path,
search: Vec::new(),
replace: Vec::new(),
raw_body: Vec::new(),
capture: None,
}));
}
"search" => {
let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
return Err(EngineError::InvalidState(
"<search> must be nested under <apply_edit>".to_string(),
));
};
edit.capture = Some(ApplyCapture::Search);
}
"replace" => {
let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
return Err(EngineError::InvalidState(
"<replace> must be nested under <apply_edit>".to_string(),
));
};
edit.capture = Some(ApplyCapture::Replace);
}
"list_files" => {
let output = self.execute_list_files(&attributes)?;
outputs.push(EngineOutput::ListFiles(output));
}
"terminal" => {
self.ensure_no_pending("terminal")?;
let command = attributes
.get("cmd")
.or_else(|| attributes.get("command"))
.cloned()
.unwrap_or_default()
.into_bytes();
self.pending = Some(PendingOperation::Terminal(PendingTerminal { command }));
}
"terminal_signal" => {
let signal_output = self.execute_terminal_signal(&attributes)?;
outputs.push(EngineOutput::Signal(signal_output));
}
other => outputs.push(EngineOutput::Warning(format!(
"unsupported start tag <{other}> ignored"
))),
}
Ok(())
}
fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
match name {
"write_file" => {
let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
return Err(EngineError::InvalidState(
"</write_file> received without matching start".to_string(),
));
};
let output = self.finalize_write(write)?;
outputs.push(EngineOutput::WriteFile(output));
}
"search" | "replace" => {
let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
return Err(EngineError::InvalidState(format!(
"</{name}> received without active <apply_edit>"
)));
};
edit.capture = None;
}
"apply_edit" => {
let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
return Err(EngineError::InvalidState(
"</apply_edit> received without matching start".to_string(),
));
};
let output = self.finalize_apply_edit(edit)?;
outputs.push(EngineOutput::ApplyEdit(output));
}
"terminal" => {
let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
return Err(EngineError::InvalidState(
"</terminal> received without matching start".to_string(),
));
};
let command =
String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
outputs.push(EngineOutput::Terminal(output));
}
other => outputs.push(EngineOutput::Warning(format!(
"unsupported end tag </{other}> ignored"
))),
}
Ok(())
}
fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
self.vfs.write_atomic(&write.path, &write.buffer)?;
let sha256 = self.vfs.hash(&write.path)?;
self.known_hashes.insert(write.path.clone(), sha256.clone());
Ok(WriteFileOutput {
path: write.path,
size_bytes: write.buffer.len(),
sha256,
})
}
fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
let path = required_path(attributes)?;
let requested_start = optional_usize(attributes, "start_line")?
.unwrap_or(1)
.max(1);
let requested_end =
optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
let bytes = self.vfs.read(&path)?;
let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
let sha256 = self.vfs.hash(&path)?;
self.known_hashes.insert(path.clone(), sha256.clone());
let all_lines: Vec<&str> = text.lines().collect();
let total_lines = all_lines.len();
let (served_start, served_end, warning) = if total_lines == 0 {
(0, 0, Some("file is empty; returning no lines".to_string()))
} else {
let served_start = requested_start.min(total_lines);
let served_end = requested_end.max(served_start).min(total_lines);
let warning = if served_start != requested_start || served_end != requested_end {
Some(format!(
"requested lines {}-{} adjusted to {}-{} (file has {} lines)",
requested_start, requested_end, served_start, served_end, total_lines
))
} else {
None
};
(served_start, served_end, warning)
};
let body = if total_lines == 0 {
String::new()
} else {
let mut rendered = Vec::new();
for line_idx in served_start..=served_end {
let content = all_lines[line_idx - 1];
rendered.push(format!("[{line_idx}] {content}"));
}
rendered.join("\n")
};
Ok(ReadFileOutput {
path,
size_bytes: bytes.len(),
sha256,
requested_start,
requested_end,
served_start,
served_end,
total_lines,
body,
warning,
})
}
fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
let raw_body = String::from_utf8(edit.raw_body).map_err(|_| EngineError::InvalidUtf8)?;
let path = edit.path;
let edit_input = resolve_apply_edit_input(&search, &replace, &raw_body);
if let Some(previous_hash) = self.known_hashes.get(&path) {
let current_hash = self.vfs.hash(&path)?;
if previous_hash != ¤t_hash {
return Ok(ApplyEditOutput {
path,
applied: false,
tier: EditTier::NotApplied,
sha256: None,
format: edit_input.format,
reason_code: Some(APPLY_EDIT_REASON_STALE_HASH.to_string()),
warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
});
}
}
let original_bytes = self.vfs.read(&path)?;
let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
if let Some(reason_code) = edit_input.reason_code {
return Ok(ApplyEditOutput {
path,
applied: false,
tier: EditTier::NotApplied,
sha256: None,
format: edit_input.format,
reason_code: Some(reason_code),
warning: edit_input
.warning
.or_else(|| Some("invalid apply_edit payload".to_string())),
});
}
let apply_result = if let Some(hunks) = edit_input.diff_hunks {
apply_diff_hunks_with_tiers(&original, &hunks)
} else if let (Some(search), Some(replace)) =
(edit_input.search.as_deref(), edit_input.replace.as_deref())
{
apply_edit_with_tiers(&original, search, replace)
} else {
None
};
let Some((rewritten, tier)) = apply_result else {
return Ok(ApplyEditOutput {
path,
applied: false,
tier: EditTier::NotApplied,
sha256: None,
format: edit_input.format,
reason_code: Some(APPLY_EDIT_REASON_NO_MATCH.to_string()),
warning: Some("no suitable target block found for apply_edit".to_string()),
});
};
self.vfs.write_atomic(&path, rewritten.as_bytes())?;
let sha256 = self.vfs.hash(&path)?;
self.known_hashes.insert(path.clone(), sha256.clone());
Ok(ApplyEditOutput {
path,
applied: true,
tier,
sha256: Some(sha256),
format: edit_input.format,
reason_code: None,
warning: None,
})
}
fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
let mut nodes = self.vfs.list_tree(&path)?;
nodes.sort_by(|a, b| a.path.cmp(&b.path));
let mut lines = Vec::new();
for node in nodes {
if lines.len() >= self.max_list_lines {
lines.push("[... truncated due to token budget ...]".to_string());
break;
}
let mut line = match node.kind {
NodeKind::Directory => {
if node.descendant_file_count >= self.dense_dir_threshold {
format!(
"[dir] {}/ ({} files, omitted)",
node.path.display(),
node.descendant_file_count
)
} else {
format!("[dir] {}/", node.path.display())
}
}
NodeKind::File => format!("[file] {}", node.path.display()),
};
if node.modified_recently {
line.push_str(" (*)");
}
lines.push(line);
}
Ok(ListFilesOutput { path, lines })
}
fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
let pid_value = required_attr(attributes, "pid")?;
let pid = pid_value
.parse::<u32>()
.map_err(|_| EngineError::InvalidInteger {
name: "pid",
value: pid_value.to_string(),
})?;
let signal = match attributes
.get("signal")
.map(|v| v.to_ascii_uppercase())
.unwrap_or_else(|| "SIGINT".to_string())
.as_str()
{
"SIGINT" => ProcessSignal::SigInt,
"SIGTERM" => ProcessSignal::SigTerm,
"SIGKILL" => ProcessSignal::SigKill,
other => {
return Err(EngineError::InvalidState(format!(
"unsupported signal `{other}`"
)));
}
};
self.terminal.signal(pid, signal)?;
Ok(SignalOutput { pid, signal })
}
fn ensure_no_pending(&self, next: &str) -> Result<()> {
if self.pending.is_some() {
return Err(EngineError::InvalidState(format!(
"cannot start <{next}> while another command block is still open"
)));
}
Ok(())
}
}
#[derive(Debug)]
enum PendingOperation {
WriteFile(PendingWrite),
ApplyEdit(PendingApplyEdit),
Terminal(PendingTerminal),
}
#[derive(Debug)]
struct PendingWrite {
path: PathBuf,
buffer: Vec<u8>,
}
#[derive(Debug)]
struct PendingApplyEdit {
path: PathBuf,
search: Vec<u8>,
replace: Vec<u8>,
raw_body: Vec<u8>,
capture: Option<ApplyCapture>,
}
#[derive(Debug, Clone, Copy)]
enum ApplyCapture {
Search,
Replace,
}
#[derive(Debug)]
struct PendingTerminal {
command: Vec<u8>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Capability {
WriteFile,
ApplyEdit,
ReadFile,
ListFiles,
Terminal,
TerminalSignal,
}
pub const DEFAULT_CAPABILITIES: &[Capability] = &[
Capability::WriteFile,
Capability::ApplyEdit,
Capability::ReadFile,
Capability::ListFiles,
Capability::Terminal,
Capability::TerminalSignal,
];
impl Capability {
pub fn name(&self) -> &'static str {
match self {
Capability::WriteFile => "write_file",
Capability::ApplyEdit => "apply_edit",
Capability::ReadFile => "read_file",
Capability::ListFiles => "list_files",
Capability::Terminal => "terminal",
Capability::TerminalSignal => "terminal_signal",
}
}
pub fn tag(&self) -> &'static str {
match self {
Capability::WriteFile => "<write_file path=\"...\"></write_file>",
Capability::ApplyEdit => {
"<apply_edit path=\"...\">[search/replace blocks or patch body]</apply_edit>"
}
Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
Capability::ListFiles => "<list_files path=\"...\" />",
Capability::Terminal => "<terminal>...</terminal> or <terminal cmd=\"...\" />",
Capability::TerminalSignal => "<terminal_signal pid=\"...\" signal=\"SIGINT\" />",
}
}
}
pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
let enabled: Vec<Capability> = if capabilities.is_empty() {
DEFAULT_CAPABILITIES.to_vec()
} else {
capabilities.to_vec()
};
let mut out = String::from(
"You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
);
out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
out.push_str("Available Commands:\n");
for capability in &enabled {
out.push_str(capability.tag());
out.push('\n');
}
out.push_str("\nRules:\n");
out.push_str("1. Do not escape strings inside tags.\n");
out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
out.push_str(
"4. apply_edit accepts XML search/replace blocks, SEARCH/REPLACE markers, or unified diff hunks.\n",
);
out
}
fn required_path(attributes: &Attributes) -> Result<PathBuf> {
optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
}
fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
Ok(attributes.get(key).map(PathBuf::from))
}
fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
attributes
.get(key)
.map(|value| value.as_str())
.ok_or(EngineError::MissingAttribute(key))
}
fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
let Some(value) = attributes.get(key) else {
return Ok(None);
};
let parsed = value
.parse::<usize>()
.map_err(|_| EngineError::InvalidInteger {
name: key,
value: value.clone(),
})?;
Ok(Some(parsed))
}
fn shorten_hash(hash: &str) -> String {
hash.chars().take(8).collect()
}
fn human_bytes(bytes: usize) -> String {
const KB: f64 = 1024.0;
const MB: f64 = KB * 1024.0;
let bytes_f = bytes as f64;
if bytes_f >= MB {
format!("{:.1}mb", bytes_f / MB)
} else if bytes_f >= KB {
format!("{:.1}kb", bytes_f / KB)
} else {
format!("{bytes}b")
}
}
fn apply_edit_with_tiers(
original: &str,
search: &str,
replace: &str,
) -> Option<(String, EditTier)> {
if search.is_empty() {
return None;
}
if let Some(output) = apply_exact(original, search, replace) {
return Some((output, EditTier::Exact));
}
if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
return Some((output, EditTier::WhitespaceAgnostic));
}
apply_contextual_anchor(original, search, replace)
.map(|output| (output, EditTier::ContextualAnchor))
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DiffHunkReplacement {
search: String,
replace: String,
}
#[derive(Debug, Clone)]
struct ResolvedApplyEditInput {
search: Option<String>,
replace: Option<String>,
diff_hunks: Option<Vec<DiffHunkReplacement>>,
format: Option<String>,
reason_code: Option<String>,
warning: Option<String>,
}
fn resolve_apply_edit_input(search: &str, replace: &str, raw_body: &str) -> ResolvedApplyEditInput {
if !search.is_empty() {
return ResolvedApplyEditInput {
search: Some(search.to_string()),
replace: Some(replace.to_string()),
diff_hunks: None,
format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_TAGS.to_string()),
reason_code: None,
warning: None,
};
}
let body = decode_basic_xml_entities(raw_body).trim().to_string();
if body.is_empty() {
return ResolvedApplyEditInput {
search: None,
replace: None,
diff_hunks: None,
format: None,
reason_code: Some(APPLY_EDIT_REASON_EMPTY_EDIT.to_string()),
warning: Some(
"apply_edit requires <search>/<replace> blocks or a non-empty patch body"
.to_string(),
),
};
}
if let Some((parsed_search, parsed_replace)) = parse_search_replace_markers(&body) {
return ResolvedApplyEditInput {
search: Some(parsed_search),
replace: Some(parsed_replace),
diff_hunks: None,
format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS.to_string()),
reason_code: None,
warning: None,
};
}
if let Some((parsed_search, parsed_replace)) = parse_apply_edit_xml_blocks(&body) {
return ResolvedApplyEditInput {
search: Some(parsed_search),
replace: Some(parsed_replace),
diff_hunks: None,
format: Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_XML_BLOCKS.to_string()),
reason_code: None,
warning: None,
};
}
if let Some(hunks) = parse_unified_diff_hunks(&body) {
if hunks.is_empty() {
return ResolvedApplyEditInput {
search: None,
replace: None,
diff_hunks: None,
format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
reason_code: Some(APPLY_EDIT_REASON_NO_HUNKS.to_string()),
warning: Some(
"unified diff was detected but no @@ hunk blocks were parsed".to_string(),
),
};
}
return ResolvedApplyEditInput {
search: None,
replace: None,
diff_hunks: Some(hunks),
format: Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF.to_string()),
reason_code: None,
warning: None,
};
}
ResolvedApplyEditInput {
search: None,
replace: None,
diff_hunks: None,
format: Some(APPLY_EDIT_FORMAT_RAW_TEXT.to_string()),
reason_code: Some(APPLY_EDIT_REASON_PARSE_ERROR.to_string()),
warning: Some("unsupported apply_edit body format".to_string()),
}
}
fn decode_basic_xml_entities(input: &str) -> String {
input
.replace("<", "<")
.replace(">", ">")
.replace("&", "&")
}
fn parse_search_replace_markers(input: &str) -> Option<(String, String)> {
let mut mode = 0_u8;
let mut search = Vec::new();
let mut replace = Vec::new();
let mut saw_markers = false;
for line in input.lines() {
let trimmed = line.trim();
if trimmed.eq("<<<<<<< SEARCH") {
mode = 1;
saw_markers = true;
continue;
}
if trimmed.eq("=======") && mode == 1 {
mode = 2;
continue;
}
if trimmed.eq(">>>>>>> REPLACE") && mode == 2 {
mode = 0;
continue;
}
match mode {
1 => search.push(line),
2 => replace.push(line),
_ => {}
}
}
if !saw_markers {
return None;
}
Some((search.join("\n"), replace.join("\n")))
}
fn parse_apply_edit_xml_blocks(input: &str) -> Option<(String, String)> {
let search = extract_tag_body(input, "search")?;
let replace = extract_tag_body(input, "replace")?;
Some((search, replace))
}
fn extract_tag_body(input: &str, tag: &str) -> Option<String> {
let open = format!("<{tag}>");
let close = format!("</{tag}>");
let start = input.find(&open)? + open.len();
let end = input[start..].find(&close)? + start;
Some(input[start..end].to_string())
}
fn parse_unified_diff_hunks(input: &str) -> Option<Vec<DiffHunkReplacement>> {
let lines = input.lines().collect::<Vec<_>>();
let mut idx = 0usize;
let mut hunks = Vec::new();
let mut saw_hunk_header = false;
while idx < lines.len() {
let line = lines[idx].trim_end_matches('\r');
if line.starts_with("@@") {
saw_hunk_header = true;
idx = idx.saturating_add(1);
let mut search_lines = Vec::new();
let mut replace_lines = Vec::new();
while idx < lines.len() {
let current = lines[idx].trim_end_matches('\r');
if current.starts_with("@@") {
break;
}
if current.starts_with("diff --git ")
|| current.starts_with("*** End Patch")
|| current.starts_with("*** Update File:")
{
break;
}
if current.eq("\\ No newline at end of file") {
idx = idx.saturating_add(1);
continue;
}
if let Some(rest) = current.strip_prefix('+') {
if !current.starts_with("+++") {
replace_lines.push(rest.to_string());
}
} else if let Some(rest) = current.strip_prefix('-') {
if !current.starts_with("---") {
search_lines.push(rest.to_string());
}
} else if let Some(rest) = current.strip_prefix(' ') {
search_lines.push(rest.to_string());
replace_lines.push(rest.to_string());
}
idx = idx.saturating_add(1);
}
if !(search_lines.is_empty() && replace_lines.is_empty()) {
hunks.push(DiffHunkReplacement {
search: search_lines.join("\n"),
replace: replace_lines.join("\n"),
});
}
continue;
}
idx = idx.saturating_add(1);
}
saw_hunk_header.then_some(hunks)
}
fn apply_diff_hunks_with_tiers(
original: &str,
hunks: &[DiffHunkReplacement],
) -> Option<(String, EditTier)> {
let mut current = original.to_string();
let mut strongest_tier = EditTier::Exact;
for hunk in hunks {
let (next, tier) = apply_edit_with_tiers(¤t, &hunk.search, &hunk.replace)?;
if edit_tier_rank(&tier) > edit_tier_rank(&strongest_tier) {
strongest_tier = tier;
}
current = next;
}
Some((current, strongest_tier))
}
const fn edit_tier_rank(tier: &EditTier) -> usize {
match tier {
EditTier::Exact => 0,
EditTier::WhitespaceAgnostic => 1,
EditTier::ContextualAnchor => 2,
EditTier::NotApplied => 3,
}
}
fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
let idx = original.find(search)?;
let mut out = String::with_capacity(original.len() + replace.len());
out.push_str(&original[..idx]);
out.push_str(replace);
out.push_str(&original[idx + search.len()..]);
Some(out)
}
fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
let original_lines = collect_line_spans(original);
let search_lines: Vec<&str> = search.lines().collect();
if search_lines.is_empty() || original_lines.len() < search_lines.len() {
return None;
}
for start in 0..=original_lines.len() - search_lines.len() {
let window = &original_lines[start..start + search_lines.len()];
if window
.iter()
.zip(search_lines.iter())
.all(|(candidate, target)| candidate.text.trim() == target.trim())
{
let range_start = window.first()?.start;
let range_end = window.last()?.end;
return Some(splice(original, range_start, range_end, replace));
}
}
None
}
fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
let original_lines = collect_line_spans(original);
let search_lines: Vec<&str> = search.lines().collect();
if search_lines.is_empty() || original_lines.is_empty() {
return None;
}
let window_len = search_lines.len().min(original_lines.len());
let normalized_search = normalize_for_distance(search);
let mut best: Option<(usize, usize, usize)> = None;
for start in 0..=original_lines.len() - window_len {
let window = &original_lines[start..start + window_len];
let joined = window
.iter()
.map(|line| line.text)
.collect::<Vec<_>>()
.join("\n");
let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
match best {
Some((best_score, _, _)) if score >= best_score => {}
_ => best = Some((score, start, start + window_len - 1)),
}
}
let (score, line_start, line_end) = best?;
let threshold = normalized_search.len().max(6) / 3;
if score > threshold {
return None;
}
let range_start = original_lines[line_start].start;
let range_end = original_lines[line_end].end;
Some(splice(original, range_start, range_end, replace))
}
fn normalize_for_distance(input: &str) -> String {
input
.lines()
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
}
fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
let mut out = String::with_capacity(original.len() + replace.len());
out.push_str(&original[..range_start]);
out.push_str(replace);
out.push_str(&original[range_end..]);
out
}
#[derive(Debug)]
struct LineSpan<'a> {
start: usize,
end: usize,
text: &'a str,
}
fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
let mut spans = Vec::new();
let mut offset = 0usize;
for chunk in input.split_inclusive('\n') {
let end = offset + chunk.len();
let text = chunk.strip_suffix('\n').unwrap_or(chunk);
spans.push(LineSpan {
start: offset,
end,
text,
});
offset = end;
}
if input.is_empty() {
return spans;
}
if !input.ends_with('\n') {
if let Some(last) = spans.last_mut() {
last.end = input.len();
}
}
spans
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
use std::sync::{Arc, Mutex};
#[derive(Clone, Default)]
struct InMemoryVfs {
files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
tree: Arc<Mutex<Vec<TreeNode>>>,
}
impl InMemoryVfs {
fn set_file(&self, path: &str, body: &str) {
self.files
.lock()
.expect("lock")
.insert(PathBuf::from(path), body.as_bytes().to_vec());
}
fn get_file(&self, path: &str) -> String {
String::from_utf8(
self.files
.lock()
.expect("lock")
.get(&PathBuf::from(path))
.cloned()
.unwrap_or_default(),
)
.expect("utf8")
}
fn set_tree(&self, nodes: Vec<TreeNode>) {
*self.tree.lock().expect("lock") = nodes;
}
}
impl VirtualFileSystem for InMemoryVfs {
fn read(&self, path: &Path) -> Result<Vec<u8>> {
self.files
.lock()
.expect("lock")
.get(path)
.cloned()
.ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
}
fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
self.files
.lock()
.expect("lock")
.insert(path.to_path_buf(), bytes.to_vec());
Ok(())
}
fn hash(&self, path: &Path) -> Result<String> {
let bytes = self.read(path)?;
Ok(simple_hash(&bytes))
}
fn cwd(&self) -> Result<PathBuf> {
Ok(PathBuf::from("/virtual"))
}
fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
Ok(self.tree.lock().expect("lock").clone())
}
fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
let files = self.files.lock().expect("lock");
let mut entries: Vec<_> = files
.iter()
.map(|(path, body)| FileHash {
path: path.clone(),
sha256: simple_hash(body),
})
.collect();
entries.sort_by(|a, b| a.path.cmp(&b.path));
entries.truncate(limit);
Ok(entries)
}
}
#[derive(Default)]
struct MockTerminal {
pids: Vec<u32>,
last_command: Option<String>,
}
impl TerminalProvider for MockTerminal {
fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
self.last_command = Some(command.to_string());
Ok(TerminalExecution {
output: format!("ran: {command}"),
exit_code: Some(0),
cwd: PathBuf::from("/virtual"),
detached_pid: None,
})
}
fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
self.pids.retain(|existing| *existing != pid);
Ok(())
}
fn active_pids(&self) -> Vec<u32> {
self.pids.clone()
}
}
#[test]
fn write_file_chunks_commit_atomically() {
let vfs = InMemoryVfs::default();
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "write_file".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
},
Instruction::WriteChunk(b"fn main()".to_vec()),
Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
Instruction::EndTag("write_file".to_string()),
])
.expect("turn should run");
assert_eq!(
vfs.get_file("src/main.rs"),
"fn main() { println!(\"ok\"); }"
);
assert!(matches!(
turn.outputs.as_slice(),
[EngineOutput::WriteFile(WriteFileOutput { .. })]
));
}
#[test]
fn read_file_returns_fidelity_header_and_numbered_lines() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
let mut engine = Engine::new(vfs, MockTerminal::default());
let turn = engine
.execute_turn(vec![Instruction::StartTag {
name: "read_file".to_string(),
attributes: BTreeMap::from([
("path".to_string(), "src/lib.rs".to_string()),
("start_line".to_string(), "3".to_string()),
("end_line".to_string(), "9".to_string()),
]),
}])
.expect("turn should run");
let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
panic!("expected read output");
};
assert_eq!(output.served_start, 3);
assert_eq!(output.served_end, 4);
assert_eq!(output.body, "[3] c\n[4] d");
assert!(output.warning.is_some());
assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
}
#[test]
fn apply_edit_uses_whitespace_agnostic_matching() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "fn main() {\n println!(\"x\");\n}\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::StartTag {
name: "search".to_string(),
attributes: BTreeMap::new(),
},
Instruction::RawChunk {
tag: "search".to_string(),
bytes: b" println!(\"x\"); ".to_vec(),
},
Instruction::EndTag("search".to_string()),
Instruction::StartTag {
name: "replace".to_string(),
attributes: BTreeMap::new(),
},
Instruction::RawChunk {
tag: "replace".to_string(),
bytes: b"println!(\"y\");".to_vec(),
},
Instruction::EndTag("replace".to_string()),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
}
#[test]
fn apply_edit_accepts_unified_diff_hunk_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text(
"@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n".to_string(),
),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.format.as_deref(), Some("unified_diff"));
assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
}
#[test]
fn apply_edit_accepts_begin_patch_wrapper_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text(
"*** Begin Patch\n*** Update File: src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.format.as_deref(), Some("unified_diff"));
assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
}
#[test]
fn apply_edit_accepts_begin_patch_wrapper_with_absolute_update_file_path() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text(
"*** Begin Patch\n*** Update File: /tmp/workspace/src/lib.rs\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+beta (edited)\n gamma\n*** End Patch\n".to_string(),
),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.format.as_deref(), Some(APPLY_EDIT_FORMAT_UNIFIED_DIFF));
assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
}
#[test]
fn apply_edit_accepts_search_replace_markers_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text(
"<<<<<<< SEARCH\nbeta\n=======\nbeta (edited)\n>>>>>>> REPLACE\n".to_string(),
),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.format.as_deref(), Some("search_replace_markers"));
assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
}
#[test]
fn apply_edit_accepts_xml_escaped_search_replace_markers_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text(
"<<<<<<< SEARCH\nbeta\n=======\nbeta (escaped)\n>>>>>>> REPLACE\n".to_string(),
),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(
edit.format.as_deref(),
Some(APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
);
assert!(vfs.get_file("src/lib.rs").contains("beta (escaped)"));
}
#[test]
fn apply_edit_accepts_xml_search_replace_blocks_in_raw_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::RawChunk {
tag: "apply_edit".to_string(),
bytes: b"<search>beta</search><replace>beta (edited)</replace>".to_vec(),
},
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(edit.applied);
assert_eq!(edit.format.as_deref(), Some("search_replace_xml_blocks"));
assert!(vfs.get_file("src/lib.rs").contains("beta (edited)"));
}
#[test]
fn apply_edit_reports_parse_error_reason_code_for_unsupported_raw_body() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\ngamma\n");
let mut engine = Engine::new(vfs, MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::Text("totally unsupported patch format".to_string()),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("turn should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(!edit.applied);
assert_eq!(edit.reason_code.as_deref(), Some("parse_error"));
assert_eq!(edit.format.as_deref(), Some("raw_text"));
}
#[test]
fn capability_manifest_reports_current_engine_contract() {
let engine = Engine::new(InMemoryVfs::default(), MockTerminal::default())
.with_terminal_timeout(Duration::from_secs(9))
.with_list_budget(77, 33);
let manifest = engine.capability_manifest();
assert_eq!(
manifest
.commands
.iter()
.map(|command| command.name)
.collect::<Vec<_>>(),
vec![
"write_file",
"apply_edit",
"read_file",
"list_files",
"terminal",
"terminal_signal",
]
);
assert!(
manifest
.apply_edit_formats
.contains(&APPLY_EDIT_FORMAT_SEARCH_REPLACE_MARKERS)
);
assert!(
manifest
.apply_edit_reason_codes
.contains(&APPLY_EDIT_REASON_PARSE_ERROR)
);
assert_eq!(
manifest.apply_edit_tiers,
vec![
EditTier::Exact,
EditTier::WhitespaceAgnostic,
EditTier::ContextualAnchor,
EditTier::NotApplied,
]
);
assert_eq!(manifest.state_header_fields, STATE_HEADER_FIELDS);
assert_eq!(manifest.max_list_lines, 77);
assert_eq!(manifest.dense_dir_threshold, 33);
assert_eq!(manifest.terminal_timeout, Duration::from_secs(9));
assert_eq!(manifest.recent_hash_limit, DEFAULT_RECENT_HASH_LIMIT);
}
#[test]
fn apply_edit_warns_if_file_changed_since_last_read() {
let vfs = InMemoryVfs::default();
vfs.set_file("src/lib.rs", "alpha\nbeta\n");
let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
let _ = engine
.execute_turn(vec![Instruction::StartTag {
name: "read_file".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
}])
.expect("read should work");
vfs.set_file("src/lib.rs", "external\nchange\n");
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "apply_edit".to_string(),
attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
},
Instruction::StartTag {
name: "search".to_string(),
attributes: BTreeMap::new(),
},
Instruction::RawChunk {
tag: "search".to_string(),
bytes: b"beta".to_vec(),
},
Instruction::EndTag("search".to_string()),
Instruction::StartTag {
name: "replace".to_string(),
attributes: BTreeMap::new(),
},
Instruction::RawChunk {
tag: "replace".to_string(),
bytes: b"gamma".to_vec(),
},
Instruction::EndTag("replace".to_string()),
Instruction::EndTag("apply_edit".to_string()),
])
.expect("apply should run");
let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
panic!("expected apply_edit output");
};
assert!(!edit.applied);
assert_eq!(edit.tier, EditTier::NotApplied);
assert!(
edit.warning
.as_deref()
.unwrap_or_default()
.contains("File modified externally")
);
}
#[test]
fn list_files_omits_dense_directories_and_marks_recent() {
let vfs = InMemoryVfs::default();
vfs.set_tree(vec![
TreeNode {
path: PathBuf::from("src"),
kind: NodeKind::Directory,
descendant_file_count: 3,
modified_recently: false,
},
TreeNode {
path: PathBuf::from("src/lib.rs"),
kind: NodeKind::File,
descendant_file_count: 0,
modified_recently: true,
},
TreeNode {
path: PathBuf::from("node_modules"),
kind: NodeKind::Directory,
descendant_file_count: 2400,
modified_recently: false,
},
]);
let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
let turn = engine
.execute_turn(vec![Instruction::StartTag {
name: "list_files".to_string(),
attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
}])
.expect("list should run");
let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
panic!("expected list output");
};
assert!(
output
.lines
.iter()
.any(|line| line.contains("node_modules") && line.contains("omitted"))
);
assert!(output.lines.iter().any(|line| line.contains("(*)")));
}
#[test]
fn terminal_executes_command_and_reports_state_header() {
let vfs = InMemoryVfs::default();
let terminal = MockTerminal {
pids: vec![42, 7],
..Default::default()
};
let mut engine = Engine::new(vfs, terminal);
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "terminal".to_string(),
attributes: BTreeMap::new(),
},
Instruction::RawChunk {
tag: "terminal".to_string(),
bytes: b"echo hi".to_vec(),
},
Instruction::EndTag("terminal".to_string()),
])
.expect("terminal turn should run");
assert!(matches!(
turn.outputs.as_slice(),
[EngineOutput::Terminal(TerminalExecution { .. })]
));
assert_eq!(turn.state_header.active_pids, vec![7, 42]);
assert!(turn.state_header.render().contains("CWD: /virtual"));
}
#[test]
fn terminal_supports_attribute_command_form() {
let vfs = InMemoryVfs::default();
let mut engine = Engine::new(vfs, MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "terminal".to_string(),
attributes: BTreeMap::from([("cmd".to_string(), "echo attr".to_string())]),
},
Instruction::EndTag("terminal".to_string()),
])
.expect("terminal command should run");
let EngineOutput::Terminal(output) = &turn.outputs[0] else {
panic!("expected terminal output");
};
assert!(output.output.contains("ran: echo attr"));
}
#[test]
fn unknown_tags_emit_warnings_instead_of_silent_noops() {
let vfs = InMemoryVfs::default();
let mut engine = Engine::new(vfs, MockTerminal::default());
let turn = engine
.execute_turn(vec![
Instruction::StartTag {
name: "mystery_tool".to_string(),
attributes: BTreeMap::new(),
},
Instruction::EndTag("mystery_tool".to_string()),
])
.expect("turn should run");
assert_eq!(turn.outputs.len(), 2);
assert!(matches!(
&turn.outputs[0],
EngineOutput::Warning(message) if message.contains("unsupported start tag <mystery_tool>")
));
assert!(matches!(
&turn.outputs[1],
EngineOutput::Warning(message) if message.contains("unsupported end tag </mystery_tool>")
));
}
#[test]
fn system_prompt_includes_enabled_commands() {
let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
assert!(prompt.contains("<read_file"));
assert!(prompt.contains("<terminal>"));
assert!(!prompt.contains("<write_file path"));
}
fn simple_hash(input: &[u8]) -> String {
let mut acc: u64 = 1469598103934665603;
for b in input {
acc ^= *b as u64;
acc = acc.wrapping_mul(1099511628211);
}
format!("{acc:016x}")
}
}