newt_coder/
coder.rs

1//! The Coder orchestrator — prompt -> infer -> normalize -> apply.
2//!
3//! One method, [`Coder::run`], wires the four pieces together:
4//!
5//! 1. [`build_prompt`](crate::prompt::build_prompt) scans the workspace
6//!    for relevant files and composes a `(system, user)` pair around
7//!    the S5 whole-file directive.
8//! 2. The injected [`InferenceBackend`] runs one `complete` turn.
9//! 3. [`normalize_emission`](crate::emission::normalize_emission)
10//!    classifies the raw reply as `WholeFiles` / `UnifiedDiff` /
11//!    `Prose`.
12//! 4. The classified emission is applied to the workspace:
13//!    `apply_whole_files` for the directive's happy path,
14//!    `apply_patch` for the diff fallback, no-op + warn for prose.
15//!
16//! The caller (newt-acp-worker) then runs `git diff` to capture the
17//! real workspace diff — the foreman's empty-diff signal is computed
18//! from `git diff`, not from anything in this struct.
19
20use std::path::Path;
21use std::sync::Arc;
22
23use newt_core::Caveats;
24use newt_inference::{ChatRequest, InferenceBackend};
25
26use crate::emission::{normalize_emission, Emission};
27use crate::error::{CoderError, Result};
28use crate::prompt::{build_prompt, build_reprompt, CoderPrompt};
29
30/// The coder. Holds the inference backend the orchestrator uses for
31/// each `run` call; the backend is `Arc<dyn …>` so callers can share
32/// one backend across coder + non-coder paths.
33pub struct Coder {
34    backend: Arc<dyn InferenceBackend>,
35}
36
37/// Outcome of one `Coder::run` turn. Surfaced via the ACP worker's
38/// `TaskReply.emission_shape` so the foreman's scorecard can
39/// distinguish T0a / T0b / T0c instead of lumping them as "empty
40/// diff".
41#[derive(Debug, Clone)]
42pub struct CoderRun {
43    /// Wire-stable shape label: "whole_files", "unified_diff", "prose".
44    pub emission_shape: String,
45    /// Model id the inference backend returned.
46    pub model_id: String,
47    /// Relative paths of files the run wrote (empty for prose / diff
48    /// — the diff path doesn't tell us which files it touched without
49    /// re-parsing).
50    pub files_written: Vec<String>,
51    /// The raw model reply. Useful for audit logs and post-mortem.
52    /// NOTE: when the whole-file re-prompt fallback fires this becomes a
53    /// composite first+retry transcript — use [`Self::first_emission`]
54    /// when you need just the model's initial output.
55    pub raw_reply: String,
56    /// The model's *first* raw emission, before any re-prompt fallback.
57    /// Always the initial reply (never a composite), so the eval
58    /// scorecard can judge it with `git apply --check` (#30B) to tell a
59    /// clean diff from a sloppy one the fuzzy worker merely rescued.
60    pub first_emission: String,
61}
62
63impl Coder {
64    /// Build a coder bound to `backend`.
65    pub fn new(backend: Arc<dyn InferenceBackend>) -> Self {
66        Self { backend }
67    }
68
69    /// Run one turn against `workspace` under the authority carried by
70    /// `caveats`.
71    ///
72    /// `caveats` is the peer's signed, verified attenuated authority — see
73    /// `docs/decisions/agentic_object_capability_security.md` and the
74    /// 35a [`caveats_for_peer`] extractor in `newt-mesh`. Every tool
75    /// dispatch this method makes (`fs_read` for the prompt scan,
76    /// `net` for the inference call, `fs_write` for the apply, plus the
77    /// `max_calls` budget for inference turns) goes through the
78    /// enforcement helpers below — no path bypasses the check, even when
79    /// `caveats == Caveats::top()`. That symmetry is load-bearing: 35c
80    /// will tighten authority per peer, and a "skip checks if top"
81    /// shortcut would silently break that tightening.
82    ///
83    /// On any caveat refusal we return [`CoderError::CapabilityDenied`]
84    /// carrying the axis name and the concrete target the dispatch tried
85    /// to touch — enough context for the arbiter scorecard to count this
86    /// as a scrubbed sortie rather than a model failure.
87    ///
88    /// Happy path: build prompt -> infer -> normalize -> apply.
89    ///
90    /// Weak-model fallback: when the model emits a [`Emission::UnifiedDiff`]
91    /// (even under the whole-file directive) and that diff fails to apply
92    /// — its line numbers / context are too far off even for the fuzzy
93    /// matcher in `newt-tools::apply_patch` — we issue exactly ONE
94    /// re-prompt asking for the COMPLETE file(s) in `FILE:`/`END-FILE`
95    /// form, then apply via the hardened `apply_whole_files` path. The
96    /// retry counts as a *second* inference call against the
97    /// `max_calls` budget; if that budget would be exhausted we return
98    /// the original apply error rather than escalating to a denial.
99    ///
100    /// [`caveats_for_peer`]: https://docs.rs/newt-mesh/latest/newt_mesh/caveats/fn.caveats_for_peer.html
101    pub async fn run(&self, workspace: &Path, task: &str, caveats: &Caveats) -> Result<CoderRun> {
102        // 1. Build the prompt. `build_prompt` is what *reads* the
103        //    workspace, so the fs_read check is gated on the files the
104        //    prompt actually injected, not on the candidate set the
105        //    scanner considered.
106        let prompt = build_prompt(workspace, task)?;
107        check_fs_read(caveats, &prompt)?;
108        tracing::info!(
109            files_included = prompt.included_files.len(),
110            user_chars = prompt.user.len(),
111            "newt-coder prompt built"
112        );
113
114        // 2. First inference call — guarded by the net + max_calls axes.
115        let mut calls_used: u64 = 0;
116        check_call_budget(caveats, calls_used)?;
117        check_net(caveats, self.backend.as_ref())?;
118        let req = ChatRequest::new().system(prompt.system).user(prompt.user);
119        let reply = self
120            .backend
121            .complete(req)
122            .await
123            .map_err(|e| CoderError::Inference(e.to_string()))?;
124        calls_used += 1;
125        let raw = reply.content.clone();
126        let model_id = reply.model_id.clone();
127
128        let emission = normalize_emission(&raw)?;
129        let shape_label = emission.shape_label().to_string();
130
131        // 3. Try to apply the first emission — `apply` consults the
132        //    fs_write axis before each write.
133        match self.apply(&emission, workspace, caveats) {
134            Ok(files_written) => {
135                tracing::info!(
136                    emission_shape = %shape_label,
137                    files_written = files_written.len(),
138                    "newt-coder run complete"
139                );
140                Ok(CoderRun {
141                    emission_shape: shape_label,
142                    model_id,
143                    files_written,
144                    first_emission: raw.clone(),
145                    raw_reply: raw,
146                })
147            }
148            // The first emission was diff-shaped and did not apply: either a
149            // unified diff whose context was too far off even for the fuzzy
150            // matcher, or diff content the model wrapped in FILE:/END-FILE
151            // markers (classified as whole-files but rejected by the
152            // diff-shape guard). Both are recoverable with a single re-prompt
153            // for proper whole-file output.
154            Err(first_err)
155                if matches!(emission, Emission::UnifiedDiff(_))
156                    || matches!(first_err, CoderError::LooksLikeDiff { .. }) =>
157            {
158                tracing::warn!(
159                    error = %first_err,
160                    "newt-coder: diff-shaped emission did not apply, re-prompting for whole files"
161                );
162                self.reprompt_whole_files(workspace, task, raw, first_err, calls_used, caveats)
163                    .await
164            }
165            Err(other) => Err(other),
166        }
167    }
168
169    /// Single-retry fallback: re-prompt the model for the complete
170    /// file(s) and apply via `apply_whole_files`.
171    ///
172    /// Bounded to ONE additional inference call — there is no loop. The
173    /// retry counts as a *second* tool call against
174    /// [`Caveats::max_calls`], and if the budget would be exhausted by
175    /// that second call we fall through to `original_err` (the apply
176    /// failure from the first attempt). On any failure of the retry
177    /// (inference error, the model returning yet another diff / prose,
178    /// or the whole-file apply failing the shape guards or fs_write
179    /// caveat) we return `original_err`, so the caller sees the root
180    /// cause rather than a confusing second-order failure.
181    async fn reprompt_whole_files(
182        &self,
183        workspace: &Path,
184        task: &str,
185        first_raw: String,
186        original_err: CoderError,
187        calls_used: u64,
188        caveats: &Caveats,
189    ) -> Result<CoderRun> {
190        // The retry would be the (calls_used + 1)-th call; if the
191        // budget can't cover it, don't degrade the diagnostic by
192        // surfacing a fresh capability denial — keep the original
193        // apply failure, which is more actionable.
194        if !caveats.max_calls.permits_one_more(calls_used) {
195            tracing::warn!(
196                calls_used,
197                "newt-coder: re-prompt skipped, max_calls budget exhausted"
198            );
199            return Err(original_err);
200        }
201
202        let prompt = match build_reprompt(workspace, task) {
203            Ok(p) => p,
204            Err(e) => {
205                tracing::warn!(error = %e, "newt-coder: re-prompt build failed");
206                return Err(original_err);
207            }
208        };
209        // The re-prompt re-reads the same workspace; fs_read scope must
210        // still permit every file the second pass would inject.
211        if let Err(e) = check_fs_read(caveats, &prompt) {
212            tracing::warn!(error = %e, "newt-coder: re-prompt fs_read denied");
213            return Err(original_err);
214        }
215
216        let req = ChatRequest::new().system(prompt.system).user(prompt.user);
217        let reply = match self.backend.complete(req).await {
218            Ok(r) => r,
219            Err(e) => {
220                tracing::warn!(error = %e, "newt-coder: re-prompt inference failed");
221                return Err(original_err);
222            }
223        };
224        let retry_raw = reply.content.clone();
225        let model_id = reply.model_id.clone();
226
227        // The retry must yield whole files; anything else (another diff,
228        // prose) is not usable for this fallback.
229        let emission = match normalize_emission(&retry_raw) {
230            Ok(em @ Emission::WholeFiles(_)) => em,
231            Ok(other) => {
232                tracing::warn!(
233                    emission_shape = %other.shape_label(),
234                    "newt-coder: re-prompt did not return whole files"
235                );
236                return Err(original_err);
237            }
238            Err(e) => {
239                tracing::warn!(error = %e, "newt-coder: re-prompt emission malformed");
240                return Err(original_err);
241            }
242        };
243
244        let shape_label = emission.shape_label().to_string();
245        match self.apply(&emission, workspace, caveats) {
246            Ok(files_written) => {
247                tracing::info!(
248                    emission_shape = %shape_label,
249                    files_written = files_written.len(),
250                    "newt-coder: re-prompt whole-file fallback applied"
251                );
252                Ok(CoderRun {
253                    // Reflect what *actually* applied: the whole-file retry,
254                    // not the original diff.
255                    emission_shape: shape_label,
256                    model_id,
257                    files_written,
258                    // The first emission is the diff the model actually
259                    // produced for the task; the scorecard judges *that*,
260                    // not the rescued retry.
261                    first_emission: first_raw.clone(),
262                    // Keep an audit trail of both turns: the first
263                    // (rejected) diff and the retry that landed.
264                    raw_reply: format!(
265                        "[diff-apply failed, re-prompted for whole files]\n\
266                         --- first reply ---\n{first_raw}\n\
267                         --- retry reply ---\n{retry_raw}"
268                    ),
269                })
270            }
271            Err(e) => {
272                tracing::warn!(
273                    error = %e,
274                    "newt-coder: re-prompt whole-file apply failed"
275                );
276                Err(original_err)
277            }
278        }
279    }
280
281    /// Apply one classified emission to `workspace`, under `caveats`.
282    /// Returns the list of relative paths written, where known.
283    ///
284    /// Every filesystem write goes through the `fs_write` axis first.
285    /// For a [`Emission::WholeFiles`] emission we know every target
286    /// path up front, so the check happens before any write touches
287    /// disk — partial-apply is never possible under a denied caveat.
288    /// For a [`Emission::UnifiedDiff`] we cannot enumerate paths
289    /// without re-parsing, so we require `fs_write` to be
290    /// [`Scope::All`](newt_core::Scope::All) — bounded fs_write +
291    /// diff emission is a denial. This is conservative on purpose:
292    /// 35c will swap diff dispatch for a parser that knows the paths,
293    /// and the conservative rule is easier to weaken later than to
294    /// retrofit a "we already wrote half the diff" rollback.
295    fn apply(
296        &self,
297        emission: &Emission,
298        workspace: &Path,
299        caveats: &Caveats,
300    ) -> Result<Vec<String>> {
301        match emission {
302            Emission::WholeFiles(files) => {
303                // Shape guards before writing. A whole-file emission
304                // legitimately rewrites every line (renames, signature
305                // changes, new doc comments), so we do NOT compare the
306                // body against what's on disk. We reject only bodies
307                // whose *shape* is wrong; the real correctness gate is
308                // the downstream `git diff` capture plus the eval
309                // compile/test evaluators.
310                for (path, contents) in files {
311                    reject_bad_shape(path, contents)?;
312                }
313                // Caveat check: every target path must be permitted on
314                // the fs_write axis. We loop *all* paths before
315                // committing any write so a denial on the second file
316                // can't leave the first file half-written.
317                for path in files.keys() {
318                    if !caveats.permits_fs_write(path) {
319                        return Err(CoderError::CapabilityDenied {
320                            kind: "fs_write",
321                            target: path.clone(),
322                        });
323                    }
324                }
325                // `apply_whole_files` wants `(String, String)` tuples;
326                // collect to give it owned values without leaking the
327                // BTreeMap iterator's lifetime into the call.
328                let pairs: Vec<(String, String)> =
329                    files.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
330                let written = newt_tools::apply_whole_files(workspace, pairs)
331                    .map_err(|e| CoderError::FileWrite(e.to_string()))?;
332                Ok(written)
333            }
334            Emission::UnifiedDiff(diff) => {
335                // We can't enumerate the touched paths without
336                // re-parsing the diff. Be conservative: require
337                // `fs_write = All`. Anything narrower denies the
338                // dispatch up front. Target the diff blob itself so
339                // the error message points the reader at the
340                // can't-enumerate-paths reason.
341                if !matches!(caveats.fs_write, newt_core::Scope::All) {
342                    return Err(CoderError::CapabilityDenied {
343                        kind: "fs_write",
344                        target: "<unified_diff: paths not enumerable>".to_string(),
345                    });
346                }
347                // Legacy path: model emitted a real diff. We don't
348                // know which files it touched without re-parsing, so
349                // return an empty `files_written` — the caller's
350                // `git diff` capture is the source of truth.
351                newt_tools::apply_patch(workspace, diff)
352                    .map_err(|e| CoderError::FileWrite(e.to_string()))?;
353                Ok(Vec::new())
354            }
355            Emission::Prose(prose) => {
356                tracing::warn!(
357                    prose_len = prose.len(),
358                    "newt-coder: prose-only emission, no edits"
359                );
360                Ok(Vec::new())
361            }
362        }
363    }
364}
365
366// ── Enforcement helpers ────────────────────────────────────────────────────
367//
368// One helper per axis the dispatch sites consult. Every helper goes through
369// `Caveats::permits_*` even when the caveat is `top` — there is no fast-path
370// bypass, by design. See the module/`Coder::run` doc comments.
371
372/// Check whether `caveats.max_calls` permits one more inference call
373/// given `used_so_far` calls already counted against this run.
374fn check_call_budget(caveats: &Caveats, used_so_far: u64) -> Result<()> {
375    if caveats.max_calls.permits_one_more(used_so_far) {
376        Ok(())
377    } else {
378        Err(CoderError::CapabilityDenied {
379            kind: "max_calls",
380            target: format!("turn #{}", used_so_far + 1),
381        })
382    }
383}
384
385/// Check whether `caveats.net` permits the network call the backend
386/// would make on `complete()`. Backends with no endpoint (mocks,
387/// in-process plugins) skip the check vacuously — there is no host to
388/// consult.
389fn check_net(caveats: &Caveats, backend: &dyn InferenceBackend) -> Result<()> {
390    let endpoint = match backend.endpoint() {
391        Some(e) => e,
392        None => return Ok(()),
393    };
394    let host = host_from_endpoint(endpoint);
395    if caveats.permits_net(host) {
396        Ok(())
397    } else {
398        Err(CoderError::CapabilityDenied {
399            kind: "net",
400            target: host.to_string(),
401        })
402    }
403}
404
405/// Check whether `caveats.fs_read` permits every file the prompt
406/// actually injected. We gate on `included_files` (what was read), not
407/// on the wider candidate set the scanner considered, so the denial
408/// fires only when the model would have *seen* a forbidden path.
409fn check_fs_read(caveats: &Caveats, prompt: &CoderPrompt) -> Result<()> {
410    for path in &prompt.included_files {
411        let s = path.to_string_lossy();
412        if !caveats.permits_fs_read(&s) {
413            return Err(CoderError::CapabilityDenied {
414                kind: "fs_read",
415                target: s.into_owned(),
416            });
417        }
418    }
419    Ok(())
420}
421
422/// Extract the host portion of an HTTP(S) URL — enough for the
423/// `caveats.net` exact-match check, without dragging in a `url` crate
424/// dependency. Strips `scheme://`, then takes everything up to the
425/// first `/`, `?`, or port `:`. Returns the input unchanged if no
426/// scheme prefix is present (treating it as already a bare host).
427fn host_from_endpoint(endpoint: &str) -> &str {
428    let after_scheme = endpoint
429        .find("://")
430        .map(|i| &endpoint[i + 3..])
431        .unwrap_or(endpoint);
432    let end = after_scheme
433        .find(['/', ':', '?'])
434        .unwrap_or(after_scheme.len());
435    &after_scheme[..end]
436}
437
438/// Reject a whole-file emission whose body has the wrong *shape*.
439///
440/// This replaces the old "first non-blank line must equal the file's
441/// existing anchor line" check, which wrongly rejected correct output
442/// whenever a rename or signature change altered line 1. Instead we
443/// only refuse bodies that are:
444///
445/// - empty / whitespace-only ([`CoderError::EmptyEmission`]),
446/// - diff-shaped — first non-blank line starts with `--- `, `+++ `, or
447///   `@@` ([`CoderError::LooksLikeDiff`]), or
448/// - still prefixed with a leaked `FILE:` marker as their first
449///   non-blank line ([`CoderError::LeakedMarker`]) — defense in depth
450///   in case [`crate::emission`] did not strip it.
451fn reject_bad_shape(path: &str, contents: &str) -> Result<()> {
452    let first_non_blank = contents.lines().find(|l| !l.trim().is_empty());
453    match first_non_blank {
454        None => Err(CoderError::EmptyEmission {
455            path: path.to_string(),
456        }),
457        Some(first) => {
458            let trimmed = first.trim_start();
459            if trimmed.starts_with("--- ")
460                || trimmed.starts_with("+++ ")
461                || trimmed.starts_with("@@")
462            {
463                return Err(CoderError::LooksLikeDiff {
464                    path: path.to_string(),
465                });
466            }
467            if trimmed.starts_with("FILE:") {
468                return Err(CoderError::LeakedMarker {
469                    path: path.to_string(),
470                });
471            }
472            Ok(())
473        }
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use std::collections::BTreeMap;
481    use std::fs;
482    use tempfile::TempDir;
483
484    // Apply-only tests; the end-to-end smoke (build_prompt -> backend
485    // -> normalize -> apply) lives in tests/coder_smoke.rs.
486
487    fn coder_with_no_backend_used() -> Coder {
488        // The `apply` method does not call the backend, so we can use
489        // any backend here. We construct one only so the type checks.
490        // Tests in tests/ use a real MockBackend for the run() path.
491        struct Stub;
492        #[async_trait::async_trait]
493        impl InferenceBackend for Stub {
494            fn name(&self) -> &str {
495                "stub"
496            }
497            fn model_id(&self) -> &str {
498                "stub-model"
499            }
500            fn supports_tier(&self, _t: newt_core::router::Tier) -> bool {
501                false
502            }
503            async fn complete(
504                &self,
505                _req: ChatRequest,
506            ) -> anyhow::Result<newt_inference::ChatReply> {
507                unreachable!("apply tests do not call the backend")
508            }
509        }
510        Coder::new(Arc::new(Stub))
511    }
512
513    #[test]
514    fn apply_whole_files_writes_to_workspace() {
515        let tmp = TempDir::new().unwrap();
516        let coder = coder_with_no_backend_used();
517
518        let mut files = BTreeMap::new();
519        files.insert("src/lib.rs".to_string(), "pub fn hello() {}\n".to_string());
520
521        let written = coder
522            .apply(&Emission::WholeFiles(files), tmp.path(), &Caveats::top())
523            .unwrap();
524        assert_eq!(written, vec!["src/lib.rs".to_string()]);
525        let content = fs::read_to_string(tmp.path().join("src/lib.rs")).unwrap();
526        assert_eq!(content, "pub fn hello() {}\n");
527    }
528
529    #[test]
530    fn apply_prose_writes_nothing() {
531        let tmp = TempDir::new().unwrap();
532        let coder = coder_with_no_backend_used();
533        let written = coder
534            .apply(
535                &Emission::Prose("I've updated it.".to_string()),
536                tmp.path(),
537                &Caveats::top(),
538            )
539            .unwrap();
540        assert!(written.is_empty());
541    }
542
543    #[test]
544    fn apply_unified_diff_returns_empty_files_written() {
545        let tmp = TempDir::new().unwrap();
546        // Seed a file so the diff actually applies.
547        fs::write(tmp.path().join("a.txt"), "old\n").unwrap();
548        let diff = "\
549--- a/a.txt
550+++ b/a.txt
551@@ -1 +1 @@
552-old
553+new
554";
555        let coder = coder_with_no_backend_used();
556        let written = coder
557            .apply(
558                &Emission::UnifiedDiff(diff.to_string()),
559                tmp.path(),
560                &Caveats::top(),
561            )
562            .unwrap();
563        assert!(written.is_empty(), "diff path returns empty files_written");
564        let content = fs::read_to_string(tmp.path().join("a.txt")).unwrap();
565        assert_eq!(content, "new\n");
566    }
567
568    #[test]
569    fn apply_bad_diff_surfaces_filewrite_error() {
570        let tmp = TempDir::new().unwrap();
571        let coder = coder_with_no_backend_used();
572        let bad = Emission::UnifiedDiff("not a real diff".to_string());
573        let err = coder.apply(&bad, tmp.path(), &Caveats::top()).unwrap_err();
574        assert!(matches!(err, CoderError::FileWrite(_)));
575    }
576
577    fn whole_files(path: &str, contents: &str) -> Emission {
578        let mut m = BTreeMap::new();
579        m.insert(path.to_string(), contents.to_string());
580        Emission::WholeFiles(m)
581    }
582
583    #[test]
584    fn apply_whole_files_accepts_line_one_change() {
585        // Regression for failures 1 & 2 (rename / signature change):
586        // the emitted first line differs from the existing first line,
587        // which the old anchor check wrongly rejected. It must now apply.
588        let tmp = TempDir::new().unwrap();
589        let coder = coder_with_no_backend_used();
590        fs::create_dir_all(tmp.path().join("src")).unwrap();
591        fs::write(
592            tmp.path().join("src/lib.rs"),
593            "pub fn hello(name: &str) -> String {\n    format!(\"hi {name}\")\n}\n",
594        )
595        .unwrap();
596
597        let new_body = "pub fn greet(name: &str) -> String {\n    format!(\"hi {name}\")\n}\n";
598        let written = coder
599            .apply(
600                &whole_files("src/lib.rs", new_body),
601                tmp.path(),
602                &Caveats::top(),
603            )
604            .unwrap();
605        assert_eq!(written, vec!["src/lib.rs".to_string()]);
606        assert_eq!(
607            fs::read_to_string(tmp.path().join("src/lib.rs")).unwrap(),
608            new_body
609        );
610    }
611
612    #[test]
613    fn apply_whole_files_rejects_diff_shaped_contents() {
614        let tmp = TempDir::new().unwrap();
615        let coder = coder_with_no_backend_used();
616        fs::write(tmp.path().join("a.txt"), "old\n").unwrap();
617        let diff = "--- a/a.txt\n+++ b/a.txt\n@@ -1 +1 @@\n-old\n+new\n";
618        let err = coder
619            .apply(&whole_files("a.txt", diff), tmp.path(), &Caveats::top())
620            .unwrap_err();
621        assert!(matches!(err, CoderError::LooksLikeDiff { ref path } if path == "a.txt"));
622        // The file must not have been overwritten.
623        assert_eq!(
624            fs::read_to_string(tmp.path().join("a.txt")).unwrap(),
625            "old\n"
626        );
627    }
628
629    #[test]
630    fn apply_whole_files_rejects_hunk_only_contents() {
631        let tmp = TempDir::new().unwrap();
632        let coder = coder_with_no_backend_used();
633        let hunk = "@@ -1,2 +1,2 @@\n-old\n+new\n";
634        let err = coder
635            .apply(&whole_files("a.txt", hunk), tmp.path(), &Caveats::top())
636            .unwrap_err();
637        assert!(matches!(err, CoderError::LooksLikeDiff { .. }));
638    }
639
640    #[test]
641    fn apply_whole_files_rejects_empty_contents() {
642        let tmp = TempDir::new().unwrap();
643        let coder = coder_with_no_backend_used();
644        let err = coder
645            .apply(&whole_files("a.txt", ""), tmp.path(), &Caveats::top())
646            .unwrap_err();
647        assert!(matches!(err, CoderError::EmptyEmission { ref path } if path == "a.txt"));
648    }
649
650    #[test]
651    fn apply_whole_files_rejects_whitespace_only_contents() {
652        let tmp = TempDir::new().unwrap();
653        let coder = coder_with_no_backend_used();
654        let err = coder
655            .apply(
656                &whole_files("a.txt", "   \n\t\n"),
657                tmp.path(),
658                &Caveats::top(),
659            )
660            .unwrap_err();
661        assert!(matches!(err, CoderError::EmptyEmission { .. }));
662    }
663
664    #[test]
665    fn apply_whole_files_rejects_leaked_file_marker() {
666        // Defense in depth (failures 3 & 4): even if a leaked FILE:
667        // marker slipped past the parser, the writer must refuse it.
668        let tmp = TempDir::new().unwrap();
669        let coder = coder_with_no_backend_used();
670        let body = "FILE: src/lib.rs\npub fn add(a: i32, b: i32) -> i32 { a + b }\n";
671        let err = coder
672            .apply(
673                &whole_files("src/lib.rs", body),
674                tmp.path(),
675                &Caveats::top(),
676            )
677            .unwrap_err();
678        assert!(matches!(err, CoderError::LeakedMarker { ref path } if path == "src/lib.rs"));
679    }
680
681    #[test]
682    fn reject_bad_shape_messages_start_with_file_write_failed() {
683        for err in [
684            super::reject_bad_shape("p", "").unwrap_err(),
685            super::reject_bad_shape("p", "--- a/p\n").unwrap_err(),
686            super::reject_bad_shape("p", "FILE: p\n").unwrap_err(),
687        ] {
688            assert!(
689                err.to_string().starts_with("file write failed:"),
690                "message did not start with prefix: {err}"
691            );
692        }
693    }
694
695    // ── Caveat enforcement at the apply boundary ─────────────────────────
696
697    #[test]
698    fn apply_whole_files_denies_path_outside_fs_write_scope() {
699        let tmp = TempDir::new().unwrap();
700        let coder = coder_with_no_backend_used();
701        let caveats = Caveats {
702            fs_write: newt_core::Scope::only(["allowed.rs".to_string()]),
703            ..Caveats::top()
704        };
705
706        // Allowed write succeeds.
707        let allowed = coder
708            .apply(
709                &whole_files("allowed.rs", "fn ok() {}\n"),
710                tmp.path(),
711                &caveats,
712            )
713            .expect("permitted write must succeed");
714        assert_eq!(allowed, vec!["allowed.rs".to_string()]);
715
716        // Forbidden write returns CapabilityDenied.
717        let err = coder
718            .apply(
719                &whole_files("forbidden.rs", "fn evil() {}\n"),
720                tmp.path(),
721                &caveats,
722            )
723            .unwrap_err();
724        match err {
725            CoderError::CapabilityDenied { kind, target } => {
726                assert_eq!(kind, "fs_write");
727                assert_eq!(target, "forbidden.rs");
728            }
729            other => panic!("expected CapabilityDenied, got {other:?}"),
730        }
731        // And the file was never created.
732        assert!(!tmp.path().join("forbidden.rs").exists());
733    }
734
735    #[test]
736    fn apply_whole_files_denies_atomically_on_partial_scope() {
737        // A multi-file emission where one path is denied must write
738        // NOTHING — the check loops every path before committing any
739        // write. Regression for the "wrote half the emission then
740        // refused" failure mode.
741        let tmp = TempDir::new().unwrap();
742        let coder = coder_with_no_backend_used();
743        let caveats = Caveats {
744            fs_write: newt_core::Scope::only(["a.rs".to_string()]),
745            ..Caveats::top()
746        };
747        let mut files = BTreeMap::new();
748        files.insert("a.rs".to_string(), "fn a() {}\n".to_string());
749        files.insert("b.rs".to_string(), "fn b() {}\n".to_string());
750
751        let err = coder
752            .apply(&Emission::WholeFiles(files), tmp.path(), &caveats)
753            .unwrap_err();
754        assert!(matches!(
755            err,
756            CoderError::CapabilityDenied {
757                kind: "fs_write",
758                ..
759            }
760        ));
761        // Neither file landed.
762        assert!(!tmp.path().join("a.rs").exists());
763        assert!(!tmp.path().join("b.rs").exists());
764    }
765
766    #[test]
767    fn apply_unified_diff_denied_under_bounded_fs_write() {
768        // We can't enumerate diff paths up front, so any non-`All`
769        // fs_write scope conservatively denies the dispatch.
770        let tmp = TempDir::new().unwrap();
771        let coder = coder_with_no_backend_used();
772        let caveats = Caveats {
773            fs_write: newt_core::Scope::only(["whatever.rs".to_string()]),
774            ..Caveats::top()
775        };
776        let diff = Emission::UnifiedDiff(
777            "--- a/whatever.rs\n+++ b/whatever.rs\n@@ -1 +1 @@\n-x\n+y\n".to_string(),
778        );
779        let err = coder.apply(&diff, tmp.path(), &caveats).unwrap_err();
780        assert!(matches!(
781            err,
782            CoderError::CapabilityDenied {
783                kind: "fs_write",
784                ..
785            }
786        ));
787    }
788
789    // ── host_from_endpoint ───────────────────────────────────────────────
790
791    #[test]
792    fn host_from_endpoint_strips_scheme_and_path() {
793        assert_eq!(
794            super::host_from_endpoint("http://localhost:11434/api/chat"),
795            "localhost"
796        );
797        assert_eq!(
798            super::host_from_endpoint("https://allowed.example.com/v1/chat"),
799            "allowed.example.com"
800        );
801        // No scheme — treated as a bare host.
802        assert_eq!(
803            super::host_from_endpoint("bare.host.local"),
804            "bare.host.local"
805        );
806        // No path, just host:port.
807        assert_eq!(super::host_from_endpoint("http://h:8080"), "h");
808        // Empty path component.
809        assert_eq!(super::host_from_endpoint("https://only.host/"), "only.host");
810    }
811
812    // ── check_call_budget ────────────────────────────────────────────────
813
814    #[test]
815    fn check_call_budget_passes_under_unlimited() {
816        super::check_call_budget(&Caveats::top(), 0).unwrap();
817        super::check_call_budget(&Caveats::top(), 999_999).unwrap();
818    }
819
820    #[test]
821    fn check_call_budget_passes_within_bound() {
822        let caveats = Caveats {
823            max_calls: newt_core::CountBound::AtMost(3),
824            ..Caveats::top()
825        };
826        super::check_call_budget(&caveats, 0).unwrap();
827        super::check_call_budget(&caveats, 2).unwrap();
828    }
829
830    #[test]
831    fn check_call_budget_denies_at_bound() {
832        let caveats = Caveats {
833            max_calls: newt_core::CountBound::AtMost(2),
834            ..Caveats::top()
835        };
836        let err = super::check_call_budget(&caveats, 2).unwrap_err();
837        match err {
838            CoderError::CapabilityDenied { kind, target } => {
839                assert_eq!(kind, "max_calls");
840                assert!(target.contains("#3"));
841            }
842            other => panic!("expected CapabilityDenied, got {other:?}"),
843        }
844    }
845}
newt_coder/coder.rs

newt_coder/
coder.rs