koda-core 0.3.2

Core engine for the Koda AI coding agent (macOS and Linux only)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//! E2E tests for the `/undo` system through the full inference loop.
//!
//! Closes priority 3 of #1264 ("Undo System — High: No E2E test for the
//! undo stack"). Unit tests in `src/undo.rs` cover the [`UndoStack`]
//! data structure in isolation; this file exercises the production
//! pipeline end-to-end:
//!
//! ```text
//! MockProvider → inference_loop → tool dispatch → undo.snapshot()
//!//!                                       session.run_turn()
//!//!                                       undo.commit_turn()  ← #1264 fix
//! ```
//!
//! ## What these tests would have caught (and did)
//!
//! Before the #1264 fix, `commit_turn()` was *never called in production
//! code* — only from `undo.rs`'s own unit tests. That meant `pending`
//! accumulated forever, `entries` stayed empty, and the `/undo` slash
//! command always reported "Nothing to undo" no matter how many files
//! had been written. The very first test in this file
//! (`undo_restores_overwritten_file_through_inference_loop`) reproduces
//! that bug deterministically.
//!
//! ## Why E2E and not just more unit tests
//!
//! The unit tests in `src/undo.rs` exercise `UndoStack` correctly — and
//! the bug above proves that's not enough. The contract this file
//! protects is *"file mutations made through the inference loop are
//! recoverable via `/undo`"*, which spans three modules
//! (`tools/mod.rs`, `session.rs`, `undo.rs`) and is exactly the kind of
//! seam that unit tests can't see.

use koda_core::providers::{ToolCall, mock::MockResponse};
use koda_test_utils::{Env, MockProvider};

// ── Helpers ──────────────────────────────────────────────────

/// Build a `Write` tool-call with a unique id (parallel calls in one
/// `MockResponse::ToolCalls` need distinct ids). Sets `overwrite: true`
/// because the Write tool refuses to clobber an existing file by
/// default — a separate safety guard, and not what these undo tests
/// are exercising.
fn write_call(id: &str, file_path: &str, content: &str) -> ToolCall {
    ToolCall {
        id: id.into(),
        function_name: "Write".into(),
        arguments: serde_json::json!({
            "file_path": file_path,
            "content": content,
            "overwrite": true,
        })
        .to_string(),
        thought_signature: None,
    }
}

/// Snapshot the current undo-stack depth. Uses `expect` so a poisoned
/// mutex (which only happens if a panicking test left the lock held)
/// fails the assertion with a clear message rather than a `Result`
/// chain that drowns the real failure.
fn undo_depth(env: &Env) -> usize {
    env.tools
        .undo
        .lock()
        .expect("undo mutex poisoned — a previous test panicked while holding it")
        .depth()
}

/// Pop the most recent undo entry and return the human-readable summary.
/// Panics if the stack is empty (use `undo_depth` first if that's a
/// possibility you need to assert against).
fn undo_one(env: &Env) -> String {
    env.tools
        .undo
        .lock()
        .expect("undo mutex poisoned")
        .undo()
        .expect("undo stack should have at least one entry")
}

// ── Tests ────────────────────────────────────────────────────

/// Baseline: a single Write through the inference loop is snapshotted
/// AND committed, so /undo restores the original content.
///
/// This test failed before the #1264 fix because `commit_turn()` was
/// never called — `depth()` stayed at 0 even after a successful Write.
#[tokio::test]
async fn undo_restores_overwritten_file_through_inference_loop() {
    let env = Env::new().await;
    let path = env.root.join("greeting.txt");
    std::fs::write(&path, "hello, world").unwrap();

    env.insert_user_message("rewrite greeting").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({
                "file_path": "greeting.txt",
                "content": "GOODBYE, WORLD",
                "overwrite": true,
            }),
        ),
        MockResponse::Text("Done.".into()),
    ]);
    env.run_inference(&provider).await;

    // Sanity: the Write actually happened.
    assert_eq!(std::fs::read_to_string(&path).unwrap(), "GOODBYE, WORLD");
    // The fix: turn-end committed the snapshot into an undoable entry.
    assert_eq!(
        undo_depth(&env),
        1,
        "exactly one turn happened; expected one undo entry"
    );

    // The point: undo restores the original.
    let summary = undo_one(&env);
    assert!(
        summary.contains("restored"),
        "expected 'restored' in summary, got: {summary}"
    );
    assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello, world");
}

/// A Write that *creates* a brand-new file should be undoable by
/// *removing* the file (not by writing empty content).
#[tokio::test]
async fn undo_removes_newly_created_file_through_inference_loop() {
    let env = Env::new().await;
    let path = env.root.join("new_file.txt");
    assert!(!path.exists(), "precondition: file must not exist");

    env.insert_user_message("create new_file.txt").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({
                "file_path": "new_file.txt",
                "content": "fresh content",
            }),
        ),
        MockResponse::Text("Created.".into()),
    ]);
    env.run_inference(&provider).await;

    assert!(path.exists(), "Write should have created the file");

    let summary = undo_one(&env);
    assert!(
        summary.contains("removed") || summary.contains("newly created"),
        "summary should mention removal of newly-created file, got: {summary}"
    );
    assert!(
        !path.exists(),
        "undo should have removed the newly-created file"
    );
}

/// Edit on an existing file: snapshot captures pre-edit content, undo
/// restores it byte-for-byte.
#[tokio::test]
async fn undo_after_edit_restores_original_through_inference_loop() {
    let env = Env::new().await;
    let path = env.root.join("config.toml");
    let original = "name = \"alpha\"\nversion = \"1.0\"\n";
    std::fs::write(&path, original).unwrap();

    env.insert_user_message("rename to beta").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call(
            "Edit",
            serde_json::json!({
                "file_path": "config.toml",
                "replacements": [
                    {"old_str": "alpha", "new_str": "beta"}
                ],
            }),
        ),
        MockResponse::Text("Renamed.".into()),
    ]);
    env.run_inference(&provider).await;

    // Sanity: edit landed.
    assert!(std::fs::read_to_string(&path).unwrap().contains("beta"));

    undo_one(&env);
    assert_eq!(
        std::fs::read_to_string(&path).unwrap(),
        original,
        "undo must restore the exact pre-edit content (whitespace + all)"
    );
}

/// Delete: snapshot captures the file contents, undo recreates the file
/// with those contents.
///
/// Note: Delete on a *non-Koda-owned* file emits `ApprovalRequest`
/// (destructive op) which `Env::run_inference` has no responder wired
/// up for, so the Delete would silently never execute. We sidestep
/// that by creating the file *through Koda* in a setup turn (Write
/// tool) so Koda owns it; per `trust.rs` and #465, deletion of
/// Koda-owned files auto-approves. This is the same pattern the
/// existing `test_delete_tool_standalone_e2e` uses.
#[tokio::test]
async fn undo_after_delete_restores_file_through_inference_loop() {
    let env = Env::new().await;
    let path = env.root.join("doomed.txt");
    let content = "important data\nline 2\n";

    // ── Setup turn: create via Write so Koda owns the file ──
    env.insert_user_message("create doomed.txt").await;
    let setup = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({
                "path": path.to_string_lossy(),
                "content": content,
            }),
        ),
        MockResponse::Text("Created.".into()),
    ]);
    env.run_inference(&setup).await;
    assert!(path.exists(), "setup: Write must have created the file");
    assert_eq!(undo_depth(&env), 1, "setup turn = 1 entry");

    // ── Test turn: delete it ──
    env.insert_user_message("delete it").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call(
            "Delete",
            serde_json::json!({"path": path.to_string_lossy()}),
        ),
        MockResponse::Text("Gone.".into()),
    ]);
    env.run_inference(&provider).await;

    assert!(!path.exists(), "Delete should have removed the file");
    assert_eq!(undo_depth(&env), 2, "setup + delete = 2 entries");

    // Undo the delete: file comes back with original content.
    undo_one(&env);
    assert!(path.exists(), "undo should have recreated the file");
    assert_eq!(std::fs::read_to_string(&path).unwrap(), content);
    assert_eq!(undo_depth(&env), 1);
}
/// Multiple file mutations within a single turn must collapse into ONE
/// undo entry (not N entries). This is the contract that lets users
/// undo "what the agent did this turn" as an atomic unit, regardless
/// of how many files it touched.
///
/// Scripts a turn with three sequential Write tool-calls (one Write per
/// roundtrip inside the same `run_inference` call) so we exercise the
/// `pending`-deduplication path explicitly.
#[tokio::test]
async fn multiple_mutations_in_one_turn_share_one_undo_entry() {
    let env = Env::new().await;
    for name in ["a.txt", "b.txt", "c.txt"] {
        std::fs::write(env.root.join(name), format!("orig-{name}")).unwrap();
    }

    env.insert_user_message("rewrite all three").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({"file_path": "a.txt", "content": "NEW-A", "overwrite": true}),
        ),
        MockResponse::tool_call(
            "Write",
            serde_json::json!({"file_path": "b.txt", "content": "NEW-B", "overwrite": true}),
        ),
        MockResponse::tool_call(
            "Write",
            serde_json::json!({"file_path": "c.txt", "content": "NEW-C", "overwrite": true}),
        ),
        MockResponse::Text("All three rewritten.".into()),
    ]);
    env.run_inference(&provider).await;

    assert_eq!(
        undo_depth(&env),
        1,
        "three writes in one turn must produce exactly one undo entry"
    );

    let summary = undo_one(&env);
    assert!(
        summary.contains("3 file"),
        "summary should report 3 files restored, got: {summary}"
    );

    // All three files restored to original.
    for name in ["a.txt", "b.txt", "c.txt"] {
        assert_eq!(
            std::fs::read_to_string(env.root.join(name)).unwrap(),
            format!("orig-{name}"),
            "{name} should be restored to original"
        );
    }

    // Stack now empty.
    assert_eq!(undo_depth(&env), 0);
}

/// Two separate turns produce two independent undo entries. Undoing
/// pops them in LIFO order, restoring intermediate state first.
#[tokio::test]
async fn two_turns_create_two_independent_undo_entries() {
    let env = Env::new().await;
    let path = env.root.join("evolving.txt");
    std::fs::write(&path, "v1").unwrap();

    // ── Turn 1 ──
    env.insert_user_message("upgrade to v2").await;
    let provider1 = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({"file_path": "evolving.txt", "content": "v2", "overwrite": true}),
        ),
        MockResponse::Text("Upgraded to v2.".into()),
    ]);
    env.run_inference(&provider1).await;
    assert_eq!(undo_depth(&env), 1);

    // ── Turn 2 ──
    env.insert_user_message("upgrade to v3").await;
    let provider2 = MockProvider::new(vec![
        MockResponse::tool_call(
            "Write",
            serde_json::json!({"file_path": "evolving.txt", "content": "v3", "overwrite": true}),
        ),
        MockResponse::Text("Upgraded to v3.".into()),
    ]);
    env.run_inference(&provider2).await;
    assert_eq!(undo_depth(&env), 2, "second turn must add a second entry");

    // Undo turn 2 → file is back at v2 (NOT v1).
    undo_one(&env);
    assert_eq!(std::fs::read_to_string(&path).unwrap(), "v2");
    assert_eq!(undo_depth(&env), 1);

    // Undo turn 1 → file is back at v1.
    undo_one(&env);
    assert_eq!(std::fs::read_to_string(&path).unwrap(), "v1");
    assert_eq!(undo_depth(&env), 0);
}

/// Read-only tools (Glob, Grep, Read) must not push anything into the
/// undo stack — that would clutter the user's `/undo` history with
/// no-op entries and waste memory snapshotting unread bytes.
///
/// Asserts via `commit_turn`'s "no-op when pending is empty" contract:
/// after a turn that only invokes read-only tools, `depth()` stays 0.
#[tokio::test]
async fn read_only_tools_dont_affect_undo_stack() {
    let env = Env::new().await;
    let src = env.root.join("src");
    std::fs::create_dir_all(&src).unwrap();
    std::fs::write(src.join("main.rs"), "fn main() {}").unwrap();
    std::fs::write(src.join("lib.rs"), "pub mod foo;").unwrap();

    env.insert_user_message("explore the codebase").await;
    let provider = MockProvider::new(vec![
        MockResponse::tool_call("Glob", serde_json::json!({"pattern": "src/*.rs"})),
        MockResponse::tool_call("Grep", serde_json::json!({"pattern": "fn", "path": "."})),
        MockResponse::tool_call("Read", serde_json::json!({"file_path": "src/main.rs"})),
        MockResponse::Text("Explored.".into()),
    ]);
    env.run_inference(&provider).await;

    assert_eq!(
        undo_depth(&env),
        0,
        "read-only tools must not produce any undo entries"
    );
}

/// Parallel tool calls (multiple ToolCalls in a single LLM response)
/// in one turn must also collapse into ONE undo entry. This protects
/// against a regression where someone "fixes" the dispatch loop to
/// commit per-tool-call instead of per-turn.
#[tokio::test]
async fn parallel_writes_in_one_response_share_one_undo_entry() {
    let env = Env::new().await;
    for name in ["x.txt", "y.txt"] {
        std::fs::write(env.root.join(name), format!("original-{name}")).unwrap();
    }

    env.insert_user_message("rewrite x and y in parallel").await;
    let provider = MockProvider::new(vec![
        // Two ToolCalls in ONE LlmResponse — i.e. "parallel tool use".
        MockResponse::ToolCalls(vec![
            write_call("call_x", "x.txt", "AFTER-X"),
            write_call("call_y", "y.txt", "AFTER-Y"),
        ]),
        MockResponse::Text("Both rewritten.".into()),
    ]);
    env.run_inference(&provider).await;

    assert_eq!(
        undo_depth(&env),
        1,
        "two parallel writes in one response must produce one entry, not two"
    );

    undo_one(&env);
    assert_eq!(
        std::fs::read_to_string(env.root.join("x.txt")).unwrap(),
        "original-x.txt"
    );
    assert_eq!(
        std::fs::read_to_string(env.root.join("y.txt")).unwrap(),
        "original-y.txt"
    );
}