Skip to main content

sa3p_engine/
lib.rs

1//! Sans-IO execution engine for SA3P operator instructions.
2//!
3//! The engine consumes `sa3p_parser::Instruction` values and executes
4//! command semantics via trait boundaries:
5//! - `VirtualFileSystem`
6//! - `TerminalProvider`
7//!
8//! This keeps business logic transport/host agnostic while still producing
9//! deterministic per-turn outputs and state headers.
10
11use std::collections::HashMap;
12use std::fmt::Write as _;
13use std::path::{Path, PathBuf};
14use std::time::Duration;
15
16use sa3p_parser::{Attributes, Instruction};
17use strsim::levenshtein;
18use thiserror::Error;
19
20#[derive(Debug, Error)]
21pub enum EngineError {
22    #[error("missing required attribute `{0}`")]
23    MissingAttribute(&'static str),
24    #[error("invalid integer for attribute `{name}`: {value}")]
25    InvalidInteger { name: &'static str, value: String },
26    #[error("operation is out of order: {0}")]
27    InvalidState(String),
28    #[error("invalid utf-8 payload")]
29    InvalidUtf8,
30    #[error("virtual filesystem error: {0}")]
31    Vfs(String),
32    #[error("terminal error: {0}")]
33    Terminal(String),
34}
35
36pub type Result<T> = std::result::Result<T, EngineError>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FileHash {
40    pub path: PathBuf,
41    pub sha256: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NodeKind {
46    File,
47    Directory,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct TreeNode {
52    pub path: PathBuf,
53    pub kind: NodeKind,
54    pub descendant_file_count: usize,
55    pub modified_recently: bool,
56}
57
58pub trait VirtualFileSystem {
59    fn read(&self, path: &Path) -> Result<Vec<u8>>;
60    fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()>;
61    fn hash(&self, path: &Path) -> Result<String>;
62    fn cwd(&self) -> Result<PathBuf>;
63    fn list_tree(&self, path: &Path) -> Result<Vec<TreeNode>>;
64    fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>>;
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum ProcessSignal {
69    SigInt,
70    SigTerm,
71    SigKill,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct TerminalExecution {
76    pub output: String,
77    pub exit_code: Option<i32>,
78    pub cwd: PathBuf,
79    pub detached_pid: Option<u32>,
80}
81
82pub trait TerminalProvider {
83    fn run(&mut self, command: &str, timeout: Duration) -> Result<TerminalExecution>;
84    fn signal(&mut self, pid: u32, signal: ProcessSignal) -> Result<()>;
85    fn active_pids(&self) -> Vec<u32>;
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StateHeader {
90    pub cwd: PathBuf,
91    pub recent_hashes: Vec<FileHash>,
92    pub active_pids: Vec<u32>,
93}
94
95impl StateHeader {
96    pub fn render(&self) -> String {
97        let mut out = String::new();
98        let _ = write!(&mut out, "[CWD: {}", self.cwd.display());
99
100        if self.recent_hashes.is_empty() {
101            out.push_str(" | RECENT_HASHES: none");
102        } else {
103            out.push_str(" | RECENT_HASHES: ");
104            for (idx, file_hash) in self.recent_hashes.iter().enumerate() {
105                if idx > 0 {
106                    out.push_str(", ");
107                }
108                let _ = write!(
109                    &mut out,
110                    "{}#{}",
111                    file_hash.path.display(),
112                    shorten_hash(&file_hash.sha256)
113                );
114            }
115        }
116
117        if self.active_pids.is_empty() {
118            out.push_str(" | ACTIVE_PIDS: none]");
119        } else {
120            out.push_str(" | ACTIVE_PIDS: ");
121            for (idx, pid) in self.active_pids.iter().enumerate() {
122                if idx > 0 {
123                    out.push_str(", ");
124                }
125                let _ = write!(&mut out, "{pid}");
126            }
127            out.push(']');
128        }
129
130        out
131    }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct TurnResult {
136    pub state_header: StateHeader,
137    pub outputs: Vec<EngineOutput>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub enum EngineOutput {
142    ReadFile(ReadFileOutput),
143    WriteFile(WriteFileOutput),
144    ApplyEdit(ApplyEditOutput),
145    ListFiles(ListFilesOutput),
146    Terminal(TerminalExecution),
147    Signal(SignalOutput),
148    Warning(String),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub struct ReadFileOutput {
153    pub path: PathBuf,
154    pub size_bytes: usize,
155    pub sha256: String,
156    pub requested_start: usize,
157    pub requested_end: usize,
158    pub served_start: usize,
159    pub served_end: usize,
160    pub total_lines: usize,
161    pub body: String,
162    pub warning: Option<String>,
163}
164
165impl ReadFileOutput {
166    pub fn fidelity_header(&self) -> String {
167        format!(
168            "[PATH: {} | SIZE: {} | SHA256: {} | LINES: {}-{}/{}]",
169            self.path.display(),
170            human_bytes(self.size_bytes),
171            shorten_hash(&self.sha256),
172            self.served_start,
173            self.served_end,
174            self.total_lines
175        )
176    }
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WriteFileOutput {
181    pub path: PathBuf,
182    pub size_bytes: usize,
183    pub sha256: String,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub enum EditTier {
188    Exact,
189    WhitespaceAgnostic,
190    ContextualAnchor,
191    NotApplied,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct ApplyEditOutput {
196    pub path: PathBuf,
197    pub applied: bool,
198    pub tier: EditTier,
199    pub sha256: Option<String>,
200    pub warning: Option<String>,
201}
202
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct ListFilesOutput {
205    pub path: PathBuf,
206    pub lines: Vec<String>,
207}
208
209#[derive(Debug, Clone, PartialEq, Eq)]
210pub struct SignalOutput {
211    pub pid: u32,
212    pub signal: ProcessSignal,
213}
214
215#[derive(Debug)]
216pub struct Engine<VFS, Terminal> {
217    vfs: VFS,
218    terminal: Terminal,
219    known_hashes: HashMap<PathBuf, String>,
220    pending: Option<PendingOperation>,
221    max_list_lines: usize,
222    dense_dir_threshold: usize,
223    terminal_timeout: Duration,
224    recent_hash_limit: usize,
225}
226
227impl<VFS, Terminal> Engine<VFS, Terminal>
228where
229    VFS: VirtualFileSystem,
230    Terminal: TerminalProvider,
231{
232    pub fn new(vfs: VFS, terminal: Terminal) -> Self {
233        Self {
234            vfs,
235            terminal,
236            known_hashes: HashMap::new(),
237            pending: None,
238            max_list_lines: 300,
239            dense_dir_threshold: 200,
240            terminal_timeout: Duration::from_secs(5),
241            recent_hash_limit: 5,
242        }
243    }
244
245    pub fn with_terminal_timeout(mut self, timeout: Duration) -> Self {
246        self.terminal_timeout = timeout;
247        self
248    }
249
250    pub fn with_list_budget(mut self, max_lines: usize, dense_threshold: usize) -> Self {
251        self.max_list_lines = max_lines;
252        self.dense_dir_threshold = dense_threshold;
253        self
254    }
255
256    pub fn execute_turn(&mut self, instructions: Vec<Instruction>) -> Result<TurnResult> {
257        let mut outputs = Vec::new();
258
259        for instruction in instructions {
260            self.apply_instruction(instruction, &mut outputs)?;
261        }
262
263        let state_header = StateHeader {
264            cwd: self.vfs.cwd()?,
265            recent_hashes: self.vfs.recent_file_hashes(self.recent_hash_limit)?,
266            active_pids: {
267                let mut pids = self.terminal.active_pids();
268                pids.sort_unstable();
269                pids
270            },
271        };
272
273        Ok(TurnResult {
274            state_header,
275            outputs,
276        })
277    }
278
279    fn apply_instruction(
280        &mut self,
281        instruction: Instruction,
282        outputs: &mut Vec<EngineOutput>,
283    ) -> Result<()> {
284        match instruction {
285            Instruction::Text(_) => {}
286            Instruction::StartTag { name, attributes } => {
287                self.handle_start_tag(&name, attributes, outputs)?;
288            }
289            Instruction::EndTag(name) => {
290                self.handle_end_tag(&name, outputs)?;
291            }
292            Instruction::WriteChunk(bytes) => {
293                let Some(PendingOperation::WriteFile(write)) = self.pending.as_mut() else {
294                    return Err(EngineError::InvalidState(
295                        "received WriteChunk outside <write_file>".to_string(),
296                    ));
297                };
298                write.buffer.extend_from_slice(&bytes);
299            }
300            Instruction::RawChunk { tag, bytes } => match self.pending.as_mut() {
301                Some(PendingOperation::ApplyEdit(edit)) => match edit.capture {
302                    Some(ApplyCapture::Search) if tag == "search" => {
303                        edit.search.extend_from_slice(&bytes);
304                    }
305                    Some(ApplyCapture::Replace) if tag == "replace" => {
306                        edit.replace.extend_from_slice(&bytes);
307                    }
308                    _ => {
309                        return Err(EngineError::InvalidState(format!(
310                            "unexpected raw chunk for <{tag}> while applying edit"
311                        )));
312                    }
313                },
314                Some(PendingOperation::Terminal(term)) if tag == "terminal" => {
315                    term.command.extend_from_slice(&bytes);
316                }
317                _ => {
318                    return Err(EngineError::InvalidState(format!(
319                        "received raw chunk for <{tag}> without active matching context"
320                    )));
321                }
322            },
323        }
324
325        Ok(())
326    }
327
328    fn handle_start_tag(
329        &mut self,
330        name: &str,
331        attributes: Attributes,
332        outputs: &mut Vec<EngineOutput>,
333    ) -> Result<()> {
334        match name {
335            "write_file" => {
336                self.ensure_no_pending("write_file")?;
337                let path = required_path(&attributes)?;
338                self.pending = Some(PendingOperation::WriteFile(PendingWrite {
339                    path,
340                    buffer: Vec::new(),
341                }));
342            }
343            "read_file" => {
344                let output = self.execute_read_file(&attributes)?;
345                outputs.push(EngineOutput::ReadFile(output));
346            }
347            "apply_edit" => {
348                self.ensure_no_pending("apply_edit")?;
349                let path = required_path(&attributes)?;
350                self.pending = Some(PendingOperation::ApplyEdit(PendingApplyEdit {
351                    path,
352                    search: Vec::new(),
353                    replace: Vec::new(),
354                    capture: None,
355                }));
356            }
357            "search" => {
358                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
359                    return Err(EngineError::InvalidState(
360                        "<search> must be nested under <apply_edit>".to_string(),
361                    ));
362                };
363                edit.capture = Some(ApplyCapture::Search);
364            }
365            "replace" => {
366                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
367                    return Err(EngineError::InvalidState(
368                        "<replace> must be nested under <apply_edit>".to_string(),
369                    ));
370                };
371                edit.capture = Some(ApplyCapture::Replace);
372            }
373            "list_files" => {
374                let output = self.execute_list_files(&attributes)?;
375                outputs.push(EngineOutput::ListFiles(output));
376            }
377            "terminal" => {
378                self.ensure_no_pending("terminal")?;
379                self.pending = Some(PendingOperation::Terminal(PendingTerminal {
380                    command: Vec::new(),
381                }));
382            }
383            "terminal_signal" => {
384                let signal_output = self.execute_terminal_signal(&attributes)?;
385                outputs.push(EngineOutput::Signal(signal_output));
386            }
387            _ => {}
388        }
389
390        Ok(())
391    }
392
393    fn handle_end_tag(&mut self, name: &str, outputs: &mut Vec<EngineOutput>) -> Result<()> {
394        match name {
395            "write_file" => {
396                let Some(PendingOperation::WriteFile(write)) = self.pending.take() else {
397                    return Err(EngineError::InvalidState(
398                        "</write_file> received without matching start".to_string(),
399                    ));
400                };
401                let output = self.finalize_write(write)?;
402                outputs.push(EngineOutput::WriteFile(output));
403            }
404            "search" | "replace" => {
405                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.as_mut() else {
406                    return Err(EngineError::InvalidState(format!(
407                        "</{name}> received without active <apply_edit>"
408                    )));
409                };
410                edit.capture = None;
411            }
412            "apply_edit" => {
413                let Some(PendingOperation::ApplyEdit(edit)) = self.pending.take() else {
414                    return Err(EngineError::InvalidState(
415                        "</apply_edit> received without matching start".to_string(),
416                    ));
417                };
418                let output = self.finalize_apply_edit(edit)?;
419                outputs.push(EngineOutput::ApplyEdit(output));
420            }
421            "terminal" => {
422                let Some(PendingOperation::Terminal(term)) = self.pending.take() else {
423                    return Err(EngineError::InvalidState(
424                        "</terminal> received without matching start".to_string(),
425                    ));
426                };
427                let command =
428                    String::from_utf8(term.command).map_err(|_| EngineError::InvalidUtf8)?;
429                let output = self.terminal.run(command.trim(), self.terminal_timeout)?;
430                outputs.push(EngineOutput::Terminal(output));
431            }
432            _ => {}
433        }
434
435        Ok(())
436    }
437
438    fn finalize_write(&mut self, write: PendingWrite) -> Result<WriteFileOutput> {
439        self.vfs.write_atomic(&write.path, &write.buffer)?;
440        let sha256 = self.vfs.hash(&write.path)?;
441        self.known_hashes.insert(write.path.clone(), sha256.clone());
442
443        Ok(WriteFileOutput {
444            path: write.path,
445            size_bytes: write.buffer.len(),
446            sha256,
447        })
448    }
449
450    fn execute_read_file(&mut self, attributes: &Attributes) -> Result<ReadFileOutput> {
451        let path = required_path(attributes)?;
452        let requested_start = optional_usize(attributes, "start_line")?
453            .unwrap_or(1)
454            .max(1);
455        let requested_end =
456            optional_usize(attributes, "end_line")?.unwrap_or(requested_start + 200);
457
458        let bytes = self.vfs.read(&path)?;
459        let text = String::from_utf8(bytes.clone()).map_err(|_| EngineError::InvalidUtf8)?;
460        let sha256 = self.vfs.hash(&path)?;
461        self.known_hashes.insert(path.clone(), sha256.clone());
462
463        let all_lines: Vec<&str> = text.lines().collect();
464        let total_lines = all_lines.len();
465
466        let (served_start, served_end, warning) = if total_lines == 0 {
467            (0, 0, Some("file is empty; returning no lines".to_string()))
468        } else {
469            let served_start = requested_start.min(total_lines);
470            let served_end = requested_end.max(served_start).min(total_lines);
471            let warning = if served_start != requested_start || served_end != requested_end {
472                Some(format!(
473                    "requested lines {}-{} adjusted to {}-{} (file has {} lines)",
474                    requested_start, requested_end, served_start, served_end, total_lines
475                ))
476            } else {
477                None
478            };
479            (served_start, served_end, warning)
480        };
481
482        let body = if total_lines == 0 {
483            String::new()
484        } else {
485            let mut rendered = Vec::new();
486            for line_idx in served_start..=served_end {
487                let content = all_lines[line_idx - 1];
488                rendered.push(format!("[{line_idx}] {content}"));
489            }
490            rendered.join("\n")
491        };
492
493        Ok(ReadFileOutput {
494            path,
495            size_bytes: bytes.len(),
496            sha256,
497            requested_start,
498            requested_end,
499            served_start,
500            served_end,
501            total_lines,
502            body,
503            warning,
504        })
505    }
506
507    fn finalize_apply_edit(&mut self, edit: PendingApplyEdit) -> Result<ApplyEditOutput> {
508        let search = String::from_utf8(edit.search).map_err(|_| EngineError::InvalidUtf8)?;
509        let replace = String::from_utf8(edit.replace).map_err(|_| EngineError::InvalidUtf8)?;
510
511        if let Some(previous_hash) = self.known_hashes.get(&edit.path) {
512            let current_hash = self.vfs.hash(&edit.path)?;
513            if previous_hash != &current_hash {
514                return Ok(ApplyEditOutput {
515                    path: edit.path,
516                    applied: false,
517                    tier: EditTier::NotApplied,
518                    sha256: None,
519                    warning: Some("[WARN: File modified externally. Please re-read.]".to_string()),
520                });
521            }
522        }
523
524        let original_bytes = self.vfs.read(&edit.path)?;
525        let original = String::from_utf8(original_bytes).map_err(|_| EngineError::InvalidUtf8)?;
526
527        let Some((rewritten, tier)) = apply_edit_with_tiers(&original, &search, &replace) else {
528            return Ok(ApplyEditOutput {
529                path: edit.path,
530                applied: false,
531                tier: EditTier::NotApplied,
532                sha256: None,
533                warning: Some("no suitable target block found for apply_edit".to_string()),
534            });
535        };
536
537        self.vfs.write_atomic(&edit.path, rewritten.as_bytes())?;
538        let sha256 = self.vfs.hash(&edit.path)?;
539        self.known_hashes.insert(edit.path.clone(), sha256.clone());
540
541        Ok(ApplyEditOutput {
542            path: edit.path,
543            applied: true,
544            tier,
545            sha256: Some(sha256),
546            warning: None,
547        })
548    }
549
550    fn execute_list_files(&self, attributes: &Attributes) -> Result<ListFilesOutput> {
551        let path = optional_path(attributes, "path")?.unwrap_or_else(|| PathBuf::from("."));
552        let mut nodes = self.vfs.list_tree(&path)?;
553        nodes.sort_by(|a, b| a.path.cmp(&b.path));
554
555        let mut lines = Vec::new();
556
557        for node in nodes {
558            if lines.len() >= self.max_list_lines {
559                lines.push("[... truncated due to token budget ...]".to_string());
560                break;
561            }
562
563            let mut line = match node.kind {
564                NodeKind::Directory => {
565                    if node.descendant_file_count >= self.dense_dir_threshold {
566                        format!(
567                            "[dir] {}/ ({} files, omitted)",
568                            node.path.display(),
569                            node.descendant_file_count
570                        )
571                    } else {
572                        format!("[dir] {}/", node.path.display())
573                    }
574                }
575                NodeKind::File => format!("[file] {}", node.path.display()),
576            };
577
578            if node.modified_recently {
579                line.push_str(" (*)");
580            }
581
582            lines.push(line);
583        }
584
585        Ok(ListFilesOutput { path, lines })
586    }
587
588    fn execute_terminal_signal(&mut self, attributes: &Attributes) -> Result<SignalOutput> {
589        let pid_value = required_attr(attributes, "pid")?;
590        let pid = pid_value
591            .parse::<u32>()
592            .map_err(|_| EngineError::InvalidInteger {
593                name: "pid",
594                value: pid_value.to_string(),
595            })?;
596        let signal = match attributes
597            .get("signal")
598            .map(|v| v.to_ascii_uppercase())
599            .unwrap_or_else(|| "SIGINT".to_string())
600            .as_str()
601        {
602            "SIGINT" => ProcessSignal::SigInt,
603            "SIGTERM" => ProcessSignal::SigTerm,
604            "SIGKILL" => ProcessSignal::SigKill,
605            other => {
606                return Err(EngineError::InvalidState(format!(
607                    "unsupported signal `{other}`"
608                )));
609            }
610        };
611
612        self.terminal.signal(pid, signal)?;
613        Ok(SignalOutput { pid, signal })
614    }
615
616    fn ensure_no_pending(&self, next: &str) -> Result<()> {
617        if self.pending.is_some() {
618            return Err(EngineError::InvalidState(format!(
619                "cannot start <{next}> while another command block is still open"
620            )));
621        }
622        Ok(())
623    }
624}
625
626#[derive(Debug)]
627enum PendingOperation {
628    WriteFile(PendingWrite),
629    ApplyEdit(PendingApplyEdit),
630    Terminal(PendingTerminal),
631}
632
633#[derive(Debug)]
634struct PendingWrite {
635    path: PathBuf,
636    buffer: Vec<u8>,
637}
638
639#[derive(Debug)]
640struct PendingApplyEdit {
641    path: PathBuf,
642    search: Vec<u8>,
643    replace: Vec<u8>,
644    capture: Option<ApplyCapture>,
645}
646
647#[derive(Debug, Clone, Copy)]
648enum ApplyCapture {
649    Search,
650    Replace,
651}
652
653#[derive(Debug)]
654struct PendingTerminal {
655    command: Vec<u8>,
656}
657
658#[derive(Debug, Clone, Copy, PartialEq, Eq)]
659pub enum Capability {
660    WriteFile,
661    ApplyEdit,
662    ReadFile,
663    ListFiles,
664    Terminal,
665}
666
667impl Capability {
668    fn tag(&self) -> &'static str {
669        match self {
670            Capability::WriteFile => "<write_file path=\"...\"></write_file>",
671            Capability::ApplyEdit => {
672                "<apply_edit path=\"...\"><search>...</search><replace>...</replace></apply_edit>"
673            }
674            Capability::ReadFile => "<read_file path=\"...\" start_line=\"..\" end_line=\"...\" />",
675            Capability::ListFiles => "<list_files path=\"...\" />",
676            Capability::Terminal => "<terminal>...</terminal>",
677        }
678    }
679}
680
681pub fn generate_system_prompt(capabilities: &[Capability]) -> String {
682    let enabled: Vec<Capability> = if capabilities.is_empty() {
683        vec![
684            Capability::WriteFile,
685            Capability::ApplyEdit,
686            Capability::ReadFile,
687            Capability::ListFiles,
688            Capability::Terminal,
689        ]
690    } else {
691        capabilities.to_vec()
692    };
693
694    let mut out = String::from(
695        "You are a Headless Operator. You do not use JSON for tools. You interact directly with the system using XML-style tags.\n",
696    );
697    out.push_str("Everything outside of a tag is considered internal monologue and will not be executed.\n\n");
698    out.push_str("Available Commands:\n");
699    for capability in &enabled {
700        out.push_str(capability.tag());
701        out.push('\n');
702    }
703    out.push_str("\nRules:\n");
704    out.push_str("1. Do not escape strings inside tags.\n");
705    out.push_str("2. Wait for [EXIT_CODE] or detached PID before assuming terminal completion.\n");
706    out.push_str("3. Use apply_edit for small changes and write_file for complete rewrites.\n");
707    out
708}
709
710fn required_path(attributes: &Attributes) -> Result<PathBuf> {
711    optional_path(attributes, "path")?.ok_or(EngineError::MissingAttribute("path"))
712}
713
714fn optional_path(attributes: &Attributes, key: &'static str) -> Result<Option<PathBuf>> {
715    Ok(attributes.get(key).map(PathBuf::from))
716}
717
718fn required_attr<'a>(attributes: &'a Attributes, key: &'static str) -> Result<&'a str> {
719    attributes
720        .get(key)
721        .map(|value| value.as_str())
722        .ok_or(EngineError::MissingAttribute(key))
723}
724
725fn optional_usize(attributes: &Attributes, key: &'static str) -> Result<Option<usize>> {
726    let Some(value) = attributes.get(key) else {
727        return Ok(None);
728    };
729    let parsed = value
730        .parse::<usize>()
731        .map_err(|_| EngineError::InvalidInteger {
732            name: key,
733            value: value.clone(),
734        })?;
735    Ok(Some(parsed))
736}
737
738fn shorten_hash(hash: &str) -> String {
739    hash.chars().take(8).collect()
740}
741
742fn human_bytes(bytes: usize) -> String {
743    const KB: f64 = 1024.0;
744    const MB: f64 = KB * 1024.0;
745
746    let bytes_f = bytes as f64;
747    if bytes_f >= MB {
748        format!("{:.1}mb", bytes_f / MB)
749    } else if bytes_f >= KB {
750        format!("{:.1}kb", bytes_f / KB)
751    } else {
752        format!("{bytes}b")
753    }
754}
755
756fn apply_edit_with_tiers(
757    original: &str,
758    search: &str,
759    replace: &str,
760) -> Option<(String, EditTier)> {
761    if search.is_empty() {
762        return None;
763    }
764
765    if let Some(output) = apply_exact(original, search, replace) {
766        return Some((output, EditTier::Exact));
767    }
768
769    if let Some(output) = apply_whitespace_agnostic(original, search, replace) {
770        return Some((output, EditTier::WhitespaceAgnostic));
771    }
772
773    apply_contextual_anchor(original, search, replace)
774        .map(|output| (output, EditTier::ContextualAnchor))
775}
776
777fn apply_exact(original: &str, search: &str, replace: &str) -> Option<String> {
778    let idx = original.find(search)?;
779    let mut out = String::with_capacity(original.len() + replace.len());
780    out.push_str(&original[..idx]);
781    out.push_str(replace);
782    out.push_str(&original[idx + search.len()..]);
783    Some(out)
784}
785
786fn apply_whitespace_agnostic(original: &str, search: &str, replace: &str) -> Option<String> {
787    let original_lines = collect_line_spans(original);
788    let search_lines: Vec<&str> = search.lines().collect();
789    if search_lines.is_empty() || original_lines.len() < search_lines.len() {
790        return None;
791    }
792
793    for start in 0..=original_lines.len() - search_lines.len() {
794        let window = &original_lines[start..start + search_lines.len()];
795        if window
796            .iter()
797            .zip(search_lines.iter())
798            .all(|(candidate, target)| candidate.text.trim() == target.trim())
799        {
800            let range_start = window.first()?.start;
801            let range_end = window.last()?.end;
802            return Some(splice(original, range_start, range_end, replace));
803        }
804    }
805
806    None
807}
808
809fn apply_contextual_anchor(original: &str, search: &str, replace: &str) -> Option<String> {
810    let original_lines = collect_line_spans(original);
811    let search_lines: Vec<&str> = search.lines().collect();
812    if search_lines.is_empty() || original_lines.is_empty() {
813        return None;
814    }
815
816    let window_len = search_lines.len().min(original_lines.len());
817    let normalized_search = normalize_for_distance(search);
818    let mut best: Option<(usize, usize, usize)> = None;
819
820    for start in 0..=original_lines.len() - window_len {
821        let window = &original_lines[start..start + window_len];
822        let joined = window
823            .iter()
824            .map(|line| line.text)
825            .collect::<Vec<_>>()
826            .join("\n");
827        let score = levenshtein(&normalize_for_distance(&joined), &normalized_search);
828
829        match best {
830            Some((best_score, _, _)) if score >= best_score => {}
831            _ => best = Some((score, start, start + window_len - 1)),
832        }
833    }
834
835    let (score, line_start, line_end) = best?;
836    let threshold = normalized_search.len().max(6) / 3;
837    if score > threshold {
838        return None;
839    }
840
841    let range_start = original_lines[line_start].start;
842    let range_end = original_lines[line_end].end;
843    Some(splice(original, range_start, range_end, replace))
844}
845
846fn normalize_for_distance(input: &str) -> String {
847    input
848        .lines()
849        .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
850        .collect::<Vec<_>>()
851        .join("\n")
852        .trim()
853        .to_string()
854}
855
856fn splice(original: &str, range_start: usize, range_end: usize, replace: &str) -> String {
857    let mut out = String::with_capacity(original.len() + replace.len());
858    out.push_str(&original[..range_start]);
859    out.push_str(replace);
860    out.push_str(&original[range_end..]);
861    out
862}
863
864#[derive(Debug)]
865struct LineSpan<'a> {
866    start: usize,
867    end: usize,
868    text: &'a str,
869}
870
871fn collect_line_spans(input: &str) -> Vec<LineSpan<'_>> {
872    let mut spans = Vec::new();
873    let mut offset = 0usize;
874
875    for chunk in input.split_inclusive('\n') {
876        let end = offset + chunk.len();
877        let text = chunk.strip_suffix('\n').unwrap_or(chunk);
878        spans.push(LineSpan {
879            start: offset,
880            end,
881            text,
882        });
883        offset = end;
884    }
885
886    if input.is_empty() {
887        return spans;
888    }
889
890    if !input.ends_with('\n') {
891        if let Some(last) = spans.last_mut() {
892            last.end = input.len();
893        }
894    }
895
896    spans
897}
898
899#[cfg(test)]
900mod tests {
901    use super::*;
902    use std::collections::BTreeMap;
903    use std::sync::{Arc, Mutex};
904
905    #[derive(Clone, Default)]
906    struct InMemoryVfs {
907        files: Arc<Mutex<HashMap<PathBuf, Vec<u8>>>>,
908        tree: Arc<Mutex<Vec<TreeNode>>>,
909    }
910
911    impl InMemoryVfs {
912        fn set_file(&self, path: &str, body: &str) {
913            self.files
914                .lock()
915                .expect("lock")
916                .insert(PathBuf::from(path), body.as_bytes().to_vec());
917        }
918
919        fn get_file(&self, path: &str) -> String {
920            String::from_utf8(
921                self.files
922                    .lock()
923                    .expect("lock")
924                    .get(&PathBuf::from(path))
925                    .cloned()
926                    .unwrap_or_default(),
927            )
928            .expect("utf8")
929        }
930
931        fn set_tree(&self, nodes: Vec<TreeNode>) {
932            *self.tree.lock().expect("lock") = nodes;
933        }
934    }
935
936    impl VirtualFileSystem for InMemoryVfs {
937        fn read(&self, path: &Path) -> Result<Vec<u8>> {
938            self.files
939                .lock()
940                .expect("lock")
941                .get(path)
942                .cloned()
943                .ok_or_else(|| EngineError::Vfs(format!("missing file {}", path.display())))
944        }
945
946        fn write_atomic(&self, path: &Path, bytes: &[u8]) -> Result<()> {
947            self.files
948                .lock()
949                .expect("lock")
950                .insert(path.to_path_buf(), bytes.to_vec());
951            Ok(())
952        }
953
954        fn hash(&self, path: &Path) -> Result<String> {
955            let bytes = self.read(path)?;
956            Ok(simple_hash(&bytes))
957        }
958
959        fn cwd(&self) -> Result<PathBuf> {
960            Ok(PathBuf::from("/virtual"))
961        }
962
963        fn list_tree(&self, _path: &Path) -> Result<Vec<TreeNode>> {
964            Ok(self.tree.lock().expect("lock").clone())
965        }
966
967        fn recent_file_hashes(&self, limit: usize) -> Result<Vec<FileHash>> {
968            let files = self.files.lock().expect("lock");
969            let mut entries: Vec<_> = files
970                .iter()
971                .map(|(path, body)| FileHash {
972                    path: path.clone(),
973                    sha256: simple_hash(body),
974                })
975                .collect();
976            entries.sort_by(|a, b| a.path.cmp(&b.path));
977            entries.truncate(limit);
978            Ok(entries)
979        }
980    }
981
982    #[derive(Default)]
983    struct MockTerminal {
984        pids: Vec<u32>,
985        last_command: Option<String>,
986    }
987
988    impl TerminalProvider for MockTerminal {
989        fn run(&mut self, command: &str, _timeout: Duration) -> Result<TerminalExecution> {
990            self.last_command = Some(command.to_string());
991            Ok(TerminalExecution {
992                output: format!("ran: {command}"),
993                exit_code: Some(0),
994                cwd: PathBuf::from("/virtual"),
995                detached_pid: None,
996            })
997        }
998
999        fn signal(&mut self, pid: u32, _signal: ProcessSignal) -> Result<()> {
1000            self.pids.retain(|existing| *existing != pid);
1001            Ok(())
1002        }
1003
1004        fn active_pids(&self) -> Vec<u32> {
1005            self.pids.clone()
1006        }
1007    }
1008
1009    #[test]
1010    fn write_file_chunks_commit_atomically() {
1011        let vfs = InMemoryVfs::default();
1012        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1013
1014        let turn = engine
1015            .execute_turn(vec![
1016                Instruction::StartTag {
1017                    name: "write_file".to_string(),
1018                    attributes: BTreeMap::from([("path".to_string(), "src/main.rs".to_string())]),
1019                },
1020                Instruction::WriteChunk(b"fn main()".to_vec()),
1021                Instruction::WriteChunk(b" { println!(\"ok\"); }".to_vec()),
1022                Instruction::EndTag("write_file".to_string()),
1023            ])
1024            .expect("turn should run");
1025
1026        assert_eq!(
1027            vfs.get_file("src/main.rs"),
1028            "fn main() { println!(\"ok\"); }"
1029        );
1030        assert!(matches!(
1031            turn.outputs.as_slice(),
1032            [EngineOutput::WriteFile(WriteFileOutput { .. })]
1033        ));
1034    }
1035
1036    #[test]
1037    fn read_file_returns_fidelity_header_and_numbered_lines() {
1038        let vfs = InMemoryVfs::default();
1039        vfs.set_file("src/lib.rs", "a\nb\nc\nd\n");
1040        let mut engine = Engine::new(vfs, MockTerminal::default());
1041
1042        let turn = engine
1043            .execute_turn(vec![Instruction::StartTag {
1044                name: "read_file".to_string(),
1045                attributes: BTreeMap::from([
1046                    ("path".to_string(), "src/lib.rs".to_string()),
1047                    ("start_line".to_string(), "3".to_string()),
1048                    ("end_line".to_string(), "9".to_string()),
1049                ]),
1050            }])
1051            .expect("turn should run");
1052
1053        let EngineOutput::ReadFile(output) = &turn.outputs[0] else {
1054            panic!("expected read output");
1055        };
1056
1057        assert_eq!(output.served_start, 3);
1058        assert_eq!(output.served_end, 4);
1059        assert_eq!(output.body, "[3] c\n[4] d");
1060        assert!(output.warning.is_some());
1061        assert!(output.fidelity_header().contains("PATH: src/lib.rs"));
1062    }
1063
1064    #[test]
1065    fn apply_edit_uses_whitespace_agnostic_matching() {
1066        let vfs = InMemoryVfs::default();
1067        vfs.set_file("src/lib.rs", "fn main() {\n    println!(\"x\");\n}\n");
1068        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1069
1070        let turn = engine
1071            .execute_turn(vec![
1072                Instruction::StartTag {
1073                    name: "apply_edit".to_string(),
1074                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1075                },
1076                Instruction::StartTag {
1077                    name: "search".to_string(),
1078                    attributes: BTreeMap::new(),
1079                },
1080                Instruction::RawChunk {
1081                    tag: "search".to_string(),
1082                    bytes: b"   println!(\"x\");   ".to_vec(),
1083                },
1084                Instruction::EndTag("search".to_string()),
1085                Instruction::StartTag {
1086                    name: "replace".to_string(),
1087                    attributes: BTreeMap::new(),
1088                },
1089                Instruction::RawChunk {
1090                    tag: "replace".to_string(),
1091                    bytes: b"println!(\"y\");".to_vec(),
1092                },
1093                Instruction::EndTag("replace".to_string()),
1094                Instruction::EndTag("apply_edit".to_string()),
1095            ])
1096            .expect("turn should run");
1097
1098        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1099            panic!("expected apply_edit output");
1100        };
1101
1102        assert!(edit.applied);
1103        assert_eq!(edit.tier, EditTier::WhitespaceAgnostic);
1104        assert!(vfs.get_file("src/lib.rs").contains("println!(\"y\");"));
1105    }
1106
1107    #[test]
1108    fn apply_edit_warns_if_file_changed_since_last_read() {
1109        let vfs = InMemoryVfs::default();
1110        vfs.set_file("src/lib.rs", "alpha\nbeta\n");
1111        let mut engine = Engine::new(vfs.clone(), MockTerminal::default());
1112
1113        let _ = engine
1114            .execute_turn(vec![Instruction::StartTag {
1115                name: "read_file".to_string(),
1116                attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1117            }])
1118            .expect("read should work");
1119
1120        vfs.set_file("src/lib.rs", "external\nchange\n");
1121
1122        let turn = engine
1123            .execute_turn(vec![
1124                Instruction::StartTag {
1125                    name: "apply_edit".to_string(),
1126                    attributes: BTreeMap::from([("path".to_string(), "src/lib.rs".to_string())]),
1127                },
1128                Instruction::StartTag {
1129                    name: "search".to_string(),
1130                    attributes: BTreeMap::new(),
1131                },
1132                Instruction::RawChunk {
1133                    tag: "search".to_string(),
1134                    bytes: b"beta".to_vec(),
1135                },
1136                Instruction::EndTag("search".to_string()),
1137                Instruction::StartTag {
1138                    name: "replace".to_string(),
1139                    attributes: BTreeMap::new(),
1140                },
1141                Instruction::RawChunk {
1142                    tag: "replace".to_string(),
1143                    bytes: b"gamma".to_vec(),
1144                },
1145                Instruction::EndTag("replace".to_string()),
1146                Instruction::EndTag("apply_edit".to_string()),
1147            ])
1148            .expect("apply should run");
1149
1150        let EngineOutput::ApplyEdit(edit) = &turn.outputs[0] else {
1151            panic!("expected apply_edit output");
1152        };
1153
1154        assert!(!edit.applied);
1155        assert_eq!(edit.tier, EditTier::NotApplied);
1156        assert!(
1157            edit.warning
1158                .as_deref()
1159                .unwrap_or_default()
1160                .contains("File modified externally")
1161        );
1162    }
1163
1164    #[test]
1165    fn list_files_omits_dense_directories_and_marks_recent() {
1166        let vfs = InMemoryVfs::default();
1167        vfs.set_tree(vec![
1168            TreeNode {
1169                path: PathBuf::from("src"),
1170                kind: NodeKind::Directory,
1171                descendant_file_count: 3,
1172                modified_recently: false,
1173            },
1174            TreeNode {
1175                path: PathBuf::from("src/lib.rs"),
1176                kind: NodeKind::File,
1177                descendant_file_count: 0,
1178                modified_recently: true,
1179            },
1180            TreeNode {
1181                path: PathBuf::from("node_modules"),
1182                kind: NodeKind::Directory,
1183                descendant_file_count: 2400,
1184                modified_recently: false,
1185            },
1186        ]);
1187
1188        let mut engine = Engine::new(vfs, MockTerminal::default()).with_list_budget(100, 200);
1189        let turn = engine
1190            .execute_turn(vec![Instruction::StartTag {
1191                name: "list_files".to_string(),
1192                attributes: BTreeMap::from([("path".to_string(), ".".to_string())]),
1193            }])
1194            .expect("list should run");
1195
1196        let EngineOutput::ListFiles(output) = &turn.outputs[0] else {
1197            panic!("expected list output");
1198        };
1199
1200        assert!(
1201            output
1202                .lines
1203                .iter()
1204                .any(|line| line.contains("node_modules") && line.contains("omitted"))
1205        );
1206        assert!(output.lines.iter().any(|line| line.contains("(*)")));
1207    }
1208
1209    #[test]
1210    fn terminal_executes_command_and_reports_state_header() {
1211        let vfs = InMemoryVfs::default();
1212        let terminal = MockTerminal {
1213            pids: vec![42, 7],
1214            ..Default::default()
1215        };
1216
1217        let mut engine = Engine::new(vfs, terminal);
1218        let turn = engine
1219            .execute_turn(vec![
1220                Instruction::StartTag {
1221                    name: "terminal".to_string(),
1222                    attributes: BTreeMap::new(),
1223                },
1224                Instruction::RawChunk {
1225                    tag: "terminal".to_string(),
1226                    bytes: b"echo hi".to_vec(),
1227                },
1228                Instruction::EndTag("terminal".to_string()),
1229            ])
1230            .expect("terminal turn should run");
1231
1232        assert!(matches!(
1233            turn.outputs.as_slice(),
1234            [EngineOutput::Terminal(TerminalExecution { .. })]
1235        ));
1236        assert_eq!(turn.state_header.active_pids, vec![7, 42]);
1237        assert!(turn.state_header.render().contains("CWD: /virtual"));
1238    }
1239
1240    #[test]
1241    fn system_prompt_includes_enabled_commands() {
1242        let prompt = generate_system_prompt(&[Capability::ReadFile, Capability::Terminal]);
1243        assert!(prompt.contains("<read_file"));
1244        assert!(prompt.contains("<terminal>"));
1245        assert!(!prompt.contains("<write_file path"));
1246    }
1247
1248    fn simple_hash(input: &[u8]) -> String {
1249        let mut acc: u64 = 1469598103934665603;
1250        for b in input {
1251            acc ^= *b as u64;
1252            acc = acc.wrapping_mul(1099511628211);
1253        }
1254        format!("{acc:016x}")
1255    }
1256}