newt_coder/coder.rs
1//! The Coder orchestrator — prompt -> infer -> normalize -> apply.
2//!
3//! One method, [`Coder::run`], wires the four pieces together:
4//!
5//! 1. [`build_prompt`](crate::prompt::build_prompt) scans the workspace
6//! for relevant files and composes a `(system, user)` pair around
7//! the S5 whole-file directive.
8//! 2. The injected [`InferenceBackend`] runs one `complete` turn.
9//! 3. [`normalize_emission`](crate::emission::normalize_emission)
10//! classifies the raw reply as `WholeFiles` / `UnifiedDiff` /
11//! `Prose`.
12//! 4. The classified emission is applied to the workspace:
13//! `apply_whole_files` for the directive's happy path,
14//! `apply_patch` for the diff fallback, no-op + warn for prose.
15//!
16//! The caller (newt-acp-worker) then runs `git diff` to capture the
17//! real workspace diff — the foreman's empty-diff signal is computed
18//! from `git diff`, not from anything in this struct.
19
20use std::path::Path;
21use std::sync::Arc;
22
23use newt_core::Caveats;
24use newt_inference::{ChatRequest, InferenceBackend};
25
26use crate::emission::{normalize_emission, Emission};
27use crate::error::{CoderError, Result};
28use crate::prompt::{build_prompt, build_reprompt, CoderPrompt};
29
30/// The coder. Holds the inference backend the orchestrator uses for
31/// each `run` call; the backend is `Arc<dyn …>` so callers can share
32/// one backend across coder + non-coder paths.
33pub struct Coder {
34 backend: Arc<dyn InferenceBackend>,
35}
36
37/// Outcome of one `Coder::run` turn. Surfaced via the ACP worker's
38/// `TaskReply.emission_shape` so the foreman's scorecard can
39/// distinguish T0a / T0b / T0c instead of lumping them as "empty
40/// diff".
41#[derive(Debug, Clone)]
42pub struct CoderRun {
43 /// Wire-stable shape label: "whole_files", "unified_diff", "prose".
44 pub emission_shape: String,
45 /// Model id the inference backend returned.
46 pub model_id: String,
47 /// Relative paths of files the run wrote (empty for prose / diff
48 /// — the diff path doesn't tell us which files it touched without
49 /// re-parsing).
50 pub files_written: Vec<String>,
51 /// The raw model reply. Useful for audit logs and post-mortem.
52 /// NOTE: when the whole-file re-prompt fallback fires this becomes a
53 /// composite first+retry transcript — use [`Self::first_emission`]
54 /// when you need just the model's initial output.
55 pub raw_reply: String,
56 /// The model's *first* raw emission, before any re-prompt fallback.
57 /// Always the initial reply (never a composite), so the eval
58 /// scorecard can judge it with `git apply --check` (#30B) to tell a
59 /// clean diff from a sloppy one the fuzzy worker merely rescued.
60 pub first_emission: String,
61}
62
63impl Coder {
64 /// Build a coder bound to `backend`.
65 pub fn new(backend: Arc<dyn InferenceBackend>) -> Self {
66 Self { backend }
67 }
68
69 /// Run one turn against `workspace` under the authority carried by
70 /// `caveats`.
71 ///
72 /// `caveats` is the peer's signed, verified attenuated authority — see
73 /// `docs/decisions/agentic_object_capability_security.md` and the
74 /// 35a [`caveats_for_peer`] extractor in `newt-mesh`. Every tool
75 /// dispatch this method makes (`fs_read` for the prompt scan,
76 /// `net` for the inference call, `fs_write` for the apply, plus the
77 /// `max_calls` budget for inference turns) goes through the
78 /// enforcement helpers below — no path bypasses the check, even when
79 /// `caveats == Caveats::top()`. That symmetry is load-bearing: 35c
80 /// will tighten authority per peer, and a "skip checks if top"
81 /// shortcut would silently break that tightening.
82 ///
83 /// On any caveat refusal we return [`CoderError::CapabilityDenied`]
84 /// carrying the axis name and the concrete target the dispatch tried
85 /// to touch — enough context for the arbiter scorecard to count this
86 /// as a scrubbed sortie rather than a model failure.
87 ///
88 /// Happy path: build prompt -> infer -> normalize -> apply.
89 ///
90 /// Weak-model fallback: when the model emits a [`Emission::UnifiedDiff`]
91 /// (even under the whole-file directive) and that diff fails to apply
92 /// — its line numbers / context are too far off even for the fuzzy
93 /// matcher in `newt-tools::apply_patch` — we issue exactly ONE
94 /// re-prompt asking for the COMPLETE file(s) in `FILE:`/`END-FILE`
95 /// form, then apply via the hardened `apply_whole_files` path. The
96 /// retry counts as a *second* inference call against the
97 /// `max_calls` budget; if that budget would be exhausted we return
98 /// the original apply error rather than escalating to a denial.
99 ///
100 /// [`caveats_for_peer`]: https://docs.rs/newt-mesh/latest/newt_mesh/caveats/fn.caveats_for_peer.html
101 pub async fn run(&self, workspace: &Path, task: &str, caveats: &Caveats) -> Result<CoderRun> {
102 // 1. Build the prompt. `build_prompt` is what *reads* the
103 // workspace, so the fs_read check is gated on the files the
104 // prompt actually injected, not on the candidate set the
105 // scanner considered.
106 let prompt = build_prompt(workspace, task)?;
107 check_fs_read(caveats, &prompt)?;
108 tracing::info!(
109 files_included = prompt.included_files.len(),
110 user_chars = prompt.user.len(),
111 "newt-coder prompt built"
112 );
113
114 // 2. First inference call — guarded by the net + max_calls axes.
115 let mut calls_used: u64 = 0;
116 check_call_budget(caveats, calls_used)?;
117 check_net(caveats, self.backend.as_ref())?;
118 let req = ChatRequest::new().system(prompt.system).user(prompt.user);
119 let reply = self
120 .backend
121 .complete(req)
122 .await
123 .map_err(|e| CoderError::Inference(e.to_string()))?;
124 calls_used += 1;
125 let raw = reply.content.clone();
126 let model_id = reply.model_id.clone();
127
128 let emission = normalize_emission(&raw)?;
129 let shape_label = emission.shape_label().to_string();
130
131 // 3. Try to apply the first emission — `apply` consults the
132 // fs_write axis before each write.
133 match self.apply(&emission, workspace, caveats) {
134 Ok(files_written) => {
135 tracing::info!(
136 emission_shape = %shape_label,
137 files_written = files_written.len(),
138 "newt-coder run complete"
139 );
140 Ok(CoderRun {
141 emission_shape: shape_label,
142 model_id,
143 files_written,
144 first_emission: raw.clone(),
145 raw_reply: raw,
146 })
147 }
148 // The first emission was diff-shaped and did not apply: either a
149 // unified diff whose context was too far off even for the fuzzy
150 // matcher, or diff content the model wrapped in FILE:/END-FILE
151 // markers (classified as whole-files but rejected by the
152 // diff-shape guard). Both are recoverable with a single re-prompt
153 // for proper whole-file output.
154 Err(first_err)
155 if matches!(emission, Emission::UnifiedDiff(_))
156 || matches!(first_err, CoderError::LooksLikeDiff { .. }) =>
157 {
158 tracing::warn!(
159 error = %first_err,
160 "newt-coder: diff-shaped emission did not apply, re-prompting for whole files"
161 );
162 self.reprompt_whole_files(workspace, task, raw, first_err, calls_used, caveats)
163 .await
164 }
165 Err(other) => Err(other),
166 }
167 }
168
169 /// Single-retry fallback: re-prompt the model for the complete
170 /// file(s) and apply via `apply_whole_files`.
171 ///
172 /// Bounded to ONE additional inference call — there is no loop. The
173 /// retry counts as a *second* tool call against
174 /// [`Caveats::max_calls`], and if the budget would be exhausted by
175 /// that second call we fall through to `original_err` (the apply
176 /// failure from the first attempt). On any failure of the retry
177 /// (inference error, the model returning yet another diff / prose,
178 /// or the whole-file apply failing the shape guards or fs_write
179 /// caveat) we return `original_err`, so the caller sees the root
180 /// cause rather than a confusing second-order failure.
181 async fn reprompt_whole_files(
182 &self,
183 workspace: &Path,
184 task: &str,
185 first_raw: String,
186 original_err: CoderError,
187 calls_used: u64,
188 caveats: &Caveats,
189 ) -> Result<CoderRun> {
190 // The retry would be the (calls_used + 1)-th call; if the
191 // budget can't cover it, don't degrade the diagnostic by
192 // surfacing a fresh capability denial — keep the original
193 // apply failure, which is more actionable.
194 if !caveats.max_calls.permits_one_more(calls_used) {
195 tracing::warn!(
196 calls_used,
197 "newt-coder: re-prompt skipped, max_calls budget exhausted"
198 );
199 return Err(original_err);
200 }
201
202 let prompt = match build_reprompt(workspace, task) {
203 Ok(p) => p,
204 Err(e) => {
205 tracing::warn!(error = %e, "newt-coder: re-prompt build failed");
206 return Err(original_err);
207 }
208 };
209 // The re-prompt re-reads the same workspace; fs_read scope must
210 // still permit every file the second pass would inject.
211 if let Err(e) = check_fs_read(caveats, &prompt) {
212 tracing::warn!(error = %e, "newt-coder: re-prompt fs_read denied");
213 return Err(original_err);
214 }
215
216 let req = ChatRequest::new().system(prompt.system).user(prompt.user);
217 let reply = match self.backend.complete(req).await {
218 Ok(r) => r,
219 Err(e) => {
220 tracing::warn!(error = %e, "newt-coder: re-prompt inference failed");
221 return Err(original_err);
222 }
223 };
224 let retry_raw = reply.content.clone();
225 let model_id = reply.model_id.clone();
226
227 // The retry must yield whole files; anything else (another diff,
228 // prose) is not usable for this fallback.
229 let emission = match normalize_emission(&retry_raw) {
230 Ok(em @ Emission::WholeFiles(_)) => em,
231 Ok(other) => {
232 tracing::warn!(
233 emission_shape = %other.shape_label(),
234 "newt-coder: re-prompt did not return whole files"
235 );
236 return Err(original_err);
237 }
238 Err(e) => {
239 tracing::warn!(error = %e, "newt-coder: re-prompt emission malformed");
240 return Err(original_err);
241 }
242 };
243
244 let shape_label = emission.shape_label().to_string();
245 match self.apply(&emission, workspace, caveats) {
246 Ok(files_written) => {
247 tracing::info!(
248 emission_shape = %shape_label,
249 files_written = files_written.len(),
250 "newt-coder: re-prompt whole-file fallback applied"
251 );
252 Ok(CoderRun {
253 // Reflect what *actually* applied: the whole-file retry,
254 // not the original diff.
255 emission_shape: shape_label,
256 model_id,
257 files_written,
258 // The first emission is the diff the model actually
259 // produced for the task; the scorecard judges *that*,
260 // not the rescued retry.
261 first_emission: first_raw.clone(),
262 // Keep an audit trail of both turns: the first
263 // (rejected) diff and the retry that landed.
264 raw_reply: format!(
265 "[diff-apply failed, re-prompted for whole files]\n\
266 --- first reply ---\n{first_raw}\n\
267 --- retry reply ---\n{retry_raw}"
268 ),
269 })
270 }
271 Err(e) => {
272 tracing::warn!(
273 error = %e,
274 "newt-coder: re-prompt whole-file apply failed"
275 );
276 Err(original_err)
277 }
278 }
279 }
280
281 /// Apply one classified emission to `workspace`, under `caveats`.
282 /// Returns the list of relative paths written, where known.
283 ///
284 /// Every filesystem write goes through the `fs_write` axis first.
285 /// For a [`Emission::WholeFiles`] emission we know every target
286 /// path up front, so the check happens before any write touches
287 /// disk — partial-apply is never possible under a denied caveat.
288 /// For a [`Emission::UnifiedDiff`] we cannot enumerate paths
289 /// without re-parsing, so we require `fs_write` to be
290 /// [`Scope::All`](newt_core::Scope::All) — bounded fs_write +
291 /// diff emission is a denial. This is conservative on purpose:
292 /// 35c will swap diff dispatch for a parser that knows the paths,
293 /// and the conservative rule is easier to weaken later than to
294 /// retrofit a "we already wrote half the diff" rollback.
295 fn apply(
296 &self,
297 emission: &Emission,
298 workspace: &Path,
299 caveats: &Caveats,
300 ) -> Result<Vec<String>> {
301 match emission {
302 Emission::WholeFiles(files) => {
303 // Shape guards before writing. A whole-file emission
304 // legitimately rewrites every line (renames, signature
305 // changes, new doc comments), so we do NOT compare the
306 // body against what's on disk. We reject only bodies
307 // whose *shape* is wrong; the real correctness gate is
308 // the downstream `git diff` capture plus the eval
309 // compile/test evaluators.
310 for (path, contents) in files {
311 reject_bad_shape(path, contents)?;
312 }
313 // Caveat check: every target path must be permitted on
314 // the fs_write axis. We loop *all* paths before
315 // committing any write so a denial on the second file
316 // can't leave the first file half-written.
317 for path in files.keys() {
318 if !caveats.permits_fs_write(path) {
319 return Err(CoderError::CapabilityDenied {
320 kind: "fs_write",
321 target: path.clone(),
322 });
323 }
324 }
325 // `apply_whole_files` wants `(String, String)` tuples;
326 // collect to give it owned values without leaking the
327 // BTreeMap iterator's lifetime into the call.
328 let pairs: Vec<(String, String)> =
329 files.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
330 let written = newt_tools::apply_whole_files(workspace, pairs)
331 .map_err(|e| CoderError::FileWrite(e.to_string()))?;
332 Ok(written)
333 }
334 Emission::UnifiedDiff(diff) => {
335 // We can't enumerate the touched paths without
336 // re-parsing the diff. Be conservative: require
337 // `fs_write = All`. Anything narrower denies the
338 // dispatch up front. Target the diff blob itself so
339 // the error message points the reader at the
340 // can't-enumerate-paths reason.
341 if !matches!(caveats.fs_write, newt_core::Scope::All) {
342 return Err(CoderError::CapabilityDenied {
343 kind: "fs_write",
344 target: "<unified_diff: paths not enumerable>".to_string(),
345 });
346 }
347 // Legacy path: model emitted a real diff. We don't
348 // know which files it touched without re-parsing, so
349 // return an empty `files_written` — the caller's
350 // `git diff` capture is the source of truth.
351 newt_tools::apply_patch(workspace, diff)
352 .map_err(|e| CoderError::FileWrite(e.to_string()))?;
353 Ok(Vec::new())
354 }
355 Emission::Prose(prose) => {
356 tracing::warn!(
357 prose_len = prose.len(),
358 "newt-coder: prose-only emission, no edits"
359 );
360 Ok(Vec::new())
361 }
362 }
363 }
364}
365
366// ── Enforcement helpers ────────────────────────────────────────────────────
367//
368// One helper per axis the dispatch sites consult. Every helper goes through
369// `Caveats::permits_*` even when the caveat is `top` — there is no fast-path
370// bypass, by design. See the module/`Coder::run` doc comments.
371
372/// Check whether `caveats.max_calls` permits one more inference call
373/// given `used_so_far` calls already counted against this run.
374fn check_call_budget(caveats: &Caveats, used_so_far: u64) -> Result<()> {
375 if caveats.max_calls.permits_one_more(used_so_far) {
376 Ok(())
377 } else {
378 Err(CoderError::CapabilityDenied {
379 kind: "max_calls",
380 target: format!("turn #{}", used_so_far + 1),
381 })
382 }
383}
384
385/// Check whether `caveats.net` permits the network call the backend
386/// would make on `complete()`. Backends with no endpoint (mocks,
387/// in-process plugins) skip the check vacuously — there is no host to
388/// consult.
389fn check_net(caveats: &Caveats, backend: &dyn InferenceBackend) -> Result<()> {
390 let endpoint = match backend.endpoint() {
391 Some(e) => e,
392 None => return Ok(()),
393 };
394 let host = host_from_endpoint(endpoint);
395 if caveats.permits_net(host) {
396 Ok(())
397 } else {
398 Err(CoderError::CapabilityDenied {
399 kind: "net",
400 target: host.to_string(),
401 })
402 }
403}
404
405/// Check whether `caveats.fs_read` permits every file the prompt
406/// actually injected. We gate on `included_files` (what was read), not
407/// on the wider candidate set the scanner considered, so the denial
408/// fires only when the model would have *seen* a forbidden path.
409fn check_fs_read(caveats: &Caveats, prompt: &CoderPrompt) -> Result<()> {
410 for path in &prompt.included_files {
411 let s = path.to_string_lossy();
412 if !caveats.permits_fs_read(&s) {
413 return Err(CoderError::CapabilityDenied {
414 kind: "fs_read",
415 target: s.into_owned(),
416 });
417 }
418 }
419 Ok(())
420}
421
422/// Extract the host portion of an HTTP(S) URL — enough for the
423/// `caveats.net` exact-match check, without dragging in a `url` crate
424/// dependency. Strips `scheme://`, then takes everything up to the
425/// first `/`, `?`, or port `:`. Returns the input unchanged if no
426/// scheme prefix is present (treating it as already a bare host).
427fn host_from_endpoint(endpoint: &str) -> &str {
428 let after_scheme = endpoint
429 .find("://")
430 .map(|i| &endpoint[i + 3..])
431 .unwrap_or(endpoint);
432 let end = after_scheme
433 .find(['/', ':', '?'])
434 .unwrap_or(after_scheme.len());
435 &after_scheme[..end]
436}
437
438/// Reject a whole-file emission whose body has the wrong *shape*.
439///
440/// This replaces the old "first non-blank line must equal the file's
441/// existing anchor line" check, which wrongly rejected correct output
442/// whenever a rename or signature change altered line 1. Instead we
443/// only refuse bodies that are:
444///
445/// - empty / whitespace-only ([`CoderError::EmptyEmission`]),
446/// - diff-shaped — first non-blank line starts with `--- `, `+++ `, or
447/// `@@` ([`CoderError::LooksLikeDiff`]), or
448/// - still prefixed with a leaked `FILE:` marker as their first
449/// non-blank line ([`CoderError::LeakedMarker`]) — defense in depth
450/// in case [`crate::emission`] did not strip it.
451fn reject_bad_shape(path: &str, contents: &str) -> Result<()> {
452 let first_non_blank = contents.lines().find(|l| !l.trim().is_empty());
453 match first_non_blank {
454 None => Err(CoderError::EmptyEmission {
455 path: path.to_string(),
456 }),
457 Some(first) => {
458 let trimmed = first.trim_start();
459 if trimmed.starts_with("--- ")
460 || trimmed.starts_with("+++ ")
461 || trimmed.starts_with("@@")
462 {
463 return Err(CoderError::LooksLikeDiff {
464 path: path.to_string(),
465 });
466 }
467 if trimmed.starts_with("FILE:") {
468 return Err(CoderError::LeakedMarker {
469 path: path.to_string(),
470 });
471 }
472 Ok(())
473 }
474 }
475}
476
477#[cfg(test)]
478mod tests {
479 use super::*;
480 use std::collections::BTreeMap;
481 use std::fs;
482 use tempfile::TempDir;
483
484 // Apply-only tests; the end-to-end smoke (build_prompt -> backend
485 // -> normalize -> apply) lives in tests/coder_smoke.rs.
486
487 fn coder_with_no_backend_used() -> Coder {
488 // The `apply` method does not call the backend, so we can use
489 // any backend here. We construct one only so the type checks.
490 // Tests in tests/ use a real MockBackend for the run() path.
491 struct Stub;
492 #[async_trait::async_trait]
493 impl InferenceBackend for Stub {
494 fn name(&self) -> &str {
495 "stub"
496 }
497 fn model_id(&self) -> &str {
498 "stub-model"
499 }
500 fn supports_tier(&self, _t: newt_core::router::Tier) -> bool {
501 false
502 }
503 async fn complete(
504 &self,
505 _req: ChatRequest,
506 ) -> anyhow::Result<newt_inference::ChatReply> {
507 unreachable!("apply tests do not call the backend")
508 }
509 }
510 Coder::new(Arc::new(Stub))
511 }
512
513 #[test]
514 fn apply_whole_files_writes_to_workspace() {
515 let tmp = TempDir::new().unwrap();
516 let coder = coder_with_no_backend_used();
517
518 let mut files = BTreeMap::new();
519 files.insert("src/lib.rs".to_string(), "pub fn hello() {}\n".to_string());
520
521 let written = coder
522 .apply(&Emission::WholeFiles(files), tmp.path(), &Caveats::top())
523 .unwrap();
524 assert_eq!(written, vec!["src/lib.rs".to_string()]);
525 let content = fs::read_to_string(tmp.path().join("src/lib.rs")).unwrap();
526 assert_eq!(content, "pub fn hello() {}\n");
527 }
528
529 #[test]
530 fn apply_prose_writes_nothing() {
531 let tmp = TempDir::new().unwrap();
532 let coder = coder_with_no_backend_used();
533 let written = coder
534 .apply(
535 &Emission::Prose("I've updated it.".to_string()),
536 tmp.path(),
537 &Caveats::top(),
538 )
539 .unwrap();
540 assert!(written.is_empty());
541 }
542
543 #[test]
544 fn apply_unified_diff_returns_empty_files_written() {
545 let tmp = TempDir::new().unwrap();
546 // Seed a file so the diff actually applies.
547 fs::write(tmp.path().join("a.txt"), "old\n").unwrap();
548 let diff = "\
549--- a/a.txt
550+++ b/a.txt
551@@ -1 +1 @@
552-old
553+new
554";
555 let coder = coder_with_no_backend_used();
556 let written = coder
557 .apply(
558 &Emission::UnifiedDiff(diff.to_string()),
559 tmp.path(),
560 &Caveats::top(),
561 )
562 .unwrap();
563 assert!(written.is_empty(), "diff path returns empty files_written");
564 let content = fs::read_to_string(tmp.path().join("a.txt")).unwrap();
565 assert_eq!(content, "new\n");
566 }
567
568 #[test]
569 fn apply_bad_diff_surfaces_filewrite_error() {
570 let tmp = TempDir::new().unwrap();
571 let coder = coder_with_no_backend_used();
572 let bad = Emission::UnifiedDiff("not a real diff".to_string());
573 let err = coder.apply(&bad, tmp.path(), &Caveats::top()).unwrap_err();
574 assert!(matches!(err, CoderError::FileWrite(_)));
575 }
576
577 fn whole_files(path: &str, contents: &str) -> Emission {
578 let mut m = BTreeMap::new();
579 m.insert(path.to_string(), contents.to_string());
580 Emission::WholeFiles(m)
581 }
582
583 #[test]
584 fn apply_whole_files_accepts_line_one_change() {
585 // Regression for failures 1 & 2 (rename / signature change):
586 // the emitted first line differs from the existing first line,
587 // which the old anchor check wrongly rejected. It must now apply.
588 let tmp = TempDir::new().unwrap();
589 let coder = coder_with_no_backend_used();
590 fs::create_dir_all(tmp.path().join("src")).unwrap();
591 fs::write(
592 tmp.path().join("src/lib.rs"),
593 "pub fn hello(name: &str) -> String {\n format!(\"hi {name}\")\n}\n",
594 )
595 .unwrap();
596
597 let new_body = "pub fn greet(name: &str) -> String {\n format!(\"hi {name}\")\n}\n";
598 let written = coder
599 .apply(
600 &whole_files("src/lib.rs", new_body),
601 tmp.path(),
602 &Caveats::top(),
603 )
604 .unwrap();
605 assert_eq!(written, vec!["src/lib.rs".to_string()]);
606 assert_eq!(
607 fs::read_to_string(tmp.path().join("src/lib.rs")).unwrap(),
608 new_body
609 );
610 }
611
612 #[test]
613 fn apply_whole_files_rejects_diff_shaped_contents() {
614 let tmp = TempDir::new().unwrap();
615 let coder = coder_with_no_backend_used();
616 fs::write(tmp.path().join("a.txt"), "old\n").unwrap();
617 let diff = "--- a/a.txt\n+++ b/a.txt\n@@ -1 +1 @@\n-old\n+new\n";
618 let err = coder
619 .apply(&whole_files("a.txt", diff), tmp.path(), &Caveats::top())
620 .unwrap_err();
621 assert!(matches!(err, CoderError::LooksLikeDiff { ref path } if path == "a.txt"));
622 // The file must not have been overwritten.
623 assert_eq!(
624 fs::read_to_string(tmp.path().join("a.txt")).unwrap(),
625 "old\n"
626 );
627 }
628
629 #[test]
630 fn apply_whole_files_rejects_hunk_only_contents() {
631 let tmp = TempDir::new().unwrap();
632 let coder = coder_with_no_backend_used();
633 let hunk = "@@ -1,2 +1,2 @@\n-old\n+new\n";
634 let err = coder
635 .apply(&whole_files("a.txt", hunk), tmp.path(), &Caveats::top())
636 .unwrap_err();
637 assert!(matches!(err, CoderError::LooksLikeDiff { .. }));
638 }
639
640 #[test]
641 fn apply_whole_files_rejects_empty_contents() {
642 let tmp = TempDir::new().unwrap();
643 let coder = coder_with_no_backend_used();
644 let err = coder
645 .apply(&whole_files("a.txt", ""), tmp.path(), &Caveats::top())
646 .unwrap_err();
647 assert!(matches!(err, CoderError::EmptyEmission { ref path } if path == "a.txt"));
648 }
649
650 #[test]
651 fn apply_whole_files_rejects_whitespace_only_contents() {
652 let tmp = TempDir::new().unwrap();
653 let coder = coder_with_no_backend_used();
654 let err = coder
655 .apply(
656 &whole_files("a.txt", " \n\t\n"),
657 tmp.path(),
658 &Caveats::top(),
659 )
660 .unwrap_err();
661 assert!(matches!(err, CoderError::EmptyEmission { .. }));
662 }
663
664 #[test]
665 fn apply_whole_files_rejects_leaked_file_marker() {
666 // Defense in depth (failures 3 & 4): even if a leaked FILE:
667 // marker slipped past the parser, the writer must refuse it.
668 let tmp = TempDir::new().unwrap();
669 let coder = coder_with_no_backend_used();
670 let body = "FILE: src/lib.rs\npub fn add(a: i32, b: i32) -> i32 { a + b }\n";
671 let err = coder
672 .apply(
673 &whole_files("src/lib.rs", body),
674 tmp.path(),
675 &Caveats::top(),
676 )
677 .unwrap_err();
678 assert!(matches!(err, CoderError::LeakedMarker { ref path } if path == "src/lib.rs"));
679 }
680
681 #[test]
682 fn reject_bad_shape_messages_start_with_file_write_failed() {
683 for err in [
684 super::reject_bad_shape("p", "").unwrap_err(),
685 super::reject_bad_shape("p", "--- a/p\n").unwrap_err(),
686 super::reject_bad_shape("p", "FILE: p\n").unwrap_err(),
687 ] {
688 assert!(
689 err.to_string().starts_with("file write failed:"),
690 "message did not start with prefix: {err}"
691 );
692 }
693 }
694
695 // ── Caveat enforcement at the apply boundary ─────────────────────────
696
697 #[test]
698 fn apply_whole_files_denies_path_outside_fs_write_scope() {
699 let tmp = TempDir::new().unwrap();
700 let coder = coder_with_no_backend_used();
701 let caveats = Caveats {
702 fs_write: newt_core::Scope::only(["allowed.rs".to_string()]),
703 ..Caveats::top()
704 };
705
706 // Allowed write succeeds.
707 let allowed = coder
708 .apply(
709 &whole_files("allowed.rs", "fn ok() {}\n"),
710 tmp.path(),
711 &caveats,
712 )
713 .expect("permitted write must succeed");
714 assert_eq!(allowed, vec!["allowed.rs".to_string()]);
715
716 // Forbidden write returns CapabilityDenied.
717 let err = coder
718 .apply(
719 &whole_files("forbidden.rs", "fn evil() {}\n"),
720 tmp.path(),
721 &caveats,
722 )
723 .unwrap_err();
724 match err {
725 CoderError::CapabilityDenied { kind, target } => {
726 assert_eq!(kind, "fs_write");
727 assert_eq!(target, "forbidden.rs");
728 }
729 other => panic!("expected CapabilityDenied, got {other:?}"),
730 }
731 // And the file was never created.
732 assert!(!tmp.path().join("forbidden.rs").exists());
733 }
734
735 #[test]
736 fn apply_whole_files_denies_atomically_on_partial_scope() {
737 // A multi-file emission where one path is denied must write
738 // NOTHING — the check loops every path before committing any
739 // write. Regression for the "wrote half the emission then
740 // refused" failure mode.
741 let tmp = TempDir::new().unwrap();
742 let coder = coder_with_no_backend_used();
743 let caveats = Caveats {
744 fs_write: newt_core::Scope::only(["a.rs".to_string()]),
745 ..Caveats::top()
746 };
747 let mut files = BTreeMap::new();
748 files.insert("a.rs".to_string(), "fn a() {}\n".to_string());
749 files.insert("b.rs".to_string(), "fn b() {}\n".to_string());
750
751 let err = coder
752 .apply(&Emission::WholeFiles(files), tmp.path(), &caveats)
753 .unwrap_err();
754 assert!(matches!(
755 err,
756 CoderError::CapabilityDenied {
757 kind: "fs_write",
758 ..
759 }
760 ));
761 // Neither file landed.
762 assert!(!tmp.path().join("a.rs").exists());
763 assert!(!tmp.path().join("b.rs").exists());
764 }
765
766 #[test]
767 fn apply_unified_diff_denied_under_bounded_fs_write() {
768 // We can't enumerate diff paths up front, so any non-`All`
769 // fs_write scope conservatively denies the dispatch.
770 let tmp = TempDir::new().unwrap();
771 let coder = coder_with_no_backend_used();
772 let caveats = Caveats {
773 fs_write: newt_core::Scope::only(["whatever.rs".to_string()]),
774 ..Caveats::top()
775 };
776 let diff = Emission::UnifiedDiff(
777 "--- a/whatever.rs\n+++ b/whatever.rs\n@@ -1 +1 @@\n-x\n+y\n".to_string(),
778 );
779 let err = coder.apply(&diff, tmp.path(), &caveats).unwrap_err();
780 assert!(matches!(
781 err,
782 CoderError::CapabilityDenied {
783 kind: "fs_write",
784 ..
785 }
786 ));
787 }
788
789 // ── host_from_endpoint ───────────────────────────────────────────────
790
791 #[test]
792 fn host_from_endpoint_strips_scheme_and_path() {
793 assert_eq!(
794 super::host_from_endpoint("http://localhost:11434/api/chat"),
795 "localhost"
796 );
797 assert_eq!(
798 super::host_from_endpoint("https://allowed.example.com/v1/chat"),
799 "allowed.example.com"
800 );
801 // No scheme — treated as a bare host.
802 assert_eq!(
803 super::host_from_endpoint("bare.host.local"),
804 "bare.host.local"
805 );
806 // No path, just host:port.
807 assert_eq!(super::host_from_endpoint("http://h:8080"), "h");
808 // Empty path component.
809 assert_eq!(super::host_from_endpoint("https://only.host/"), "only.host");
810 }
811
812 // ── check_call_budget ────────────────────────────────────────────────
813
814 #[test]
815 fn check_call_budget_passes_under_unlimited() {
816 super::check_call_budget(&Caveats::top(), 0).unwrap();
817 super::check_call_budget(&Caveats::top(), 999_999).unwrap();
818 }
819
820 #[test]
821 fn check_call_budget_passes_within_bound() {
822 let caveats = Caveats {
823 max_calls: newt_core::CountBound::AtMost(3),
824 ..Caveats::top()
825 };
826 super::check_call_budget(&caveats, 0).unwrap();
827 super::check_call_budget(&caveats, 2).unwrap();
828 }
829
830 #[test]
831 fn check_call_budget_denies_at_bound() {
832 let caveats = Caveats {
833 max_calls: newt_core::CountBound::AtMost(2),
834 ..Caveats::top()
835 };
836 let err = super::check_call_budget(&caveats, 2).unwrap_err();
837 match err {
838 CoderError::CapabilityDenied { kind, target } => {
839 assert_eq!(kind, "max_calls");
840 assert!(target.contains("#3"));
841 }
842 other => panic!("expected CapabilityDenied, got {other:?}"),
843 }
844 }
845}