koda-core 0.2.23

Core engine for the Koda AI coding agent (macOS and Linux only)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
//! End-to-end tests for the always-on built-in egress proxy
//! (Phase 3b of #934, slice 6).
//!
//! These tests exercise the full vertical slice from
//! [`KodaSession::new`] all the way down to a real Bash-spawned child
//! process observing the canonical proxy env-var bouquet attached by
//! [`koda_core::sandbox::build`]. They are the integration-level proof
//! that slices 1–5 wired together correctly:
//!
//! ```text
//!   KodaSession::new()
//!     └─> BuiltInProxy::spawn()      ── slice 4
//!     └─> agent.tools.set_proxy_port ── slice 5
//!//!   tools.execute("Bash", …)         ── slice 5 (dispatch)
//!     └─> shell::run_shell_command   ── slice 5
//!     └─> sandbox::build(.., port)   ── slice 5
//!     └─> Command + env { HTTPS_PROXY=http://127.0.0.1:port, … }
//!//!   real `sh -c` child sees HTTPS_PROXY
//! ```
//!
//! Run with: `cargo test -p koda-core --test builtin_proxy_e2e_test`
//! Add `--ignored` to also run the live curl-through-proxy test.

use koda_core::{
    agent::KodaAgent, config::ProviderType, providers, session::KodaSession, tools::ToolRegistry,
    trust::TrustMode,
};
use koda_test_utils::Env;
use std::sync::Arc;

/// Build an `Arc<KodaAgent>` wired to a fresh ToolRegistry on the given root.
/// Mirrors what `KodaConfig` → `KodaAgent` would do in production but skips
/// MCP/skills discovery (we don't need them here).
fn build_agent(root: std::path::PathBuf, max_context_tokens: usize) -> Arc<KodaAgent> {
    let tools = ToolRegistry::new(root.clone(), max_context_tokens);
    let tool_defs = ToolRegistry::new(root.clone(), max_context_tokens).get_definitions(&[], &[]);
    Arc::new(KodaAgent {
        project_root: root,
        tools,
        tool_defs,
        system_prompt: "You are a test assistant.".to_string(),
    })
}

/// Construct a real `KodaSession` via `KodaSession::new` so the proxy
/// auto-spawn path actually runs. Returns the session + the upstream
/// agent for inspection.
async fn make_real_session(env: &Env) -> KodaSession {
    let agent = build_agent(env.root.clone(), env.config.max_context_tokens);
    // Use the regular constructor — this is what we're testing.
    // The provider is built from env.config (Mock by default in our env).
    // No network is touched at session-construction time.
    let _ = providers::create_provider(&env.config); // smoke-check provider builds
    KodaSession::new(
        env.session_id.clone(),
        agent,
        env.db.clone(),
        &env.config,
        TrustMode::Auto,
    )
    .await
}

// ── Hermetic E2E (always runs) ────────────────────────────────────────

/// `KodaSession::new` must spawn the built-in proxy unconditionally.
/// Matches the post-pivot "always-on, no opt-in" contract: koda is
/// config-free, so the user never has to enable anything.
#[tokio::test]
async fn session_new_auto_spawns_proxy() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;

    assert!(
        session.proxy.is_some(),
        "KodaSession::new must auto-spawn the built-in proxy"
    );
    let port = session.proxy.as_ref().unwrap().port;
    assert!(port > 0, "proxy must report a real ephemeral port");
    assert_eq!(
        session.agent.tools.proxy_port(),
        Some(port),
        "agent's ToolRegistry must hold the same port the session spawned"
    );
}

/// The real proof: a Bash invocation routed through the full
/// dispatch pipeline must observe the proxy env-var bouquet, with
/// the URL pointing at the *actual* listening port of the
/// session-spawned proxy.
///
/// This is the slice 5+6 integration assertion in one test \u2014 if the
/// port leakslong the chain (session\u2192tools\u2192shell\u2192sandbox::build),
/// the assertion below fires.
#[tokio::test]
async fn bash_sees_proxy_env_pointing_at_session_proxy() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let port = session.proxy.as_ref().expect("proxy auto-spawned").port;

    // Run the full Bash dispatch path. `tools.execute` is what the
    // engine loop calls in production for every model-emitted tool call.
    let result = session
        .agent
        .tools
        .execute("Bash", r#"{"command":"echo \"$HTTPS_PROXY\""}"#, None, None)
        .await;

    assert!(
        result.success,
        "Bash dispatch must succeed; got output: {}",
        result.output
    );
    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    // Phase 3c.1 caveat: on Linux with kernel-enforced egress, stage 2
    // rewrites HTTPS_PROXY to point at the *in-netns* port, not the
    // host port — so an exact port match would fail there for the
    // right reason. We just verify the env var has the expected shape
    // (`http://127.0.0.1:<some-port>`) on both platforms; the exact
    // port plumbing is covered by `linux_kernel_allows_tcp_to_proxy_port`.
    let prefix = "http://127.0.0.1:";
    let url_line = combined
        .lines()
        .find(|l| l.starts_with(prefix))
        .unwrap_or_else(|| panic!("no HTTPS_PROXY url in {combined:?}"));
    let port_str = &url_line[prefix.len()..];
    let observed_port: u16 = port_str
        .trim()
        .parse()
        .unwrap_or_else(|_| panic!("HTTPS_PROXY port not parseable: {url_line:?}"));
    assert!(
        observed_port > 0,
        "HTTPS_PROXY must contain a non-zero port (host proxy is {port}); got {url_line:?}"
    );
}

/// The proxy listener spawned by `KodaSession::new` must actually be
/// alive and accepting TCP connections. Bash sees the URL but if the
/// proxy isn't really there (e.g. the spawn task crashed silently),
/// every outbound HTTP call would hang or fail with ECONNREFUSED.
///
/// This catches: "we wired the env var but the proxy task died on
/// startup" \u2014 a class of bug that the dispatch test above can't see.
#[tokio::test]
async fn session_proxy_accepts_tcp_connections() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let port = session.proxy.as_ref().expect("proxy spawned").port;

    let conn = tokio::net::TcpStream::connect(("127.0.0.1", port)).await;
    assert!(
        conn.is_ok(),
        "proxy must accept connections on 127.0.0.1:{port}; got: {conn:?}"
    );
}

/// Two sessions in the same process must each get their own proxy on
/// distinct ports. Regression guard: if we ever accidentally cache /
/// share a single proxy across sessions, sub-agent isolation would
/// break (Phase 5 territory but cheap to guard now).
#[tokio::test]
async fn distinct_sessions_get_distinct_proxies() {
    let env_a = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let env_b = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let sess_a = make_real_session(&env_a).await;
    let sess_b = make_real_session(&env_b).await;

    let port_a = sess_a.proxy.as_ref().expect("a spawned").port;
    let port_b = sess_b.proxy.as_ref().expect("b spawned").port;
    assert_ne!(
        port_a, port_b,
        "each session must get its own ephemeral proxy port"
    );
}

/// Dropping a session must abort its proxy. Verified by re-binding the
/// same port \u2014 if the listener is gone, the rebind succeeds; if the
/// proxy task leaked, the rebind fails with EADDRINUSE.
#[tokio::test]
async fn dropping_session_releases_proxy_port() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let port = {
        let session = make_real_session(&env).await;
        session.proxy.as_ref().expect("spawned").port
        // session drops here \u2014 ProxyHandle::Drop should abort the task.
    };

    // Give the runtime a tick to actually run the abort + close the listener.
    // Polling rather than fixed-sleep keeps this fast and non-flaky.
    let mut bound = None;
    for _ in 0..50 {
        match std::net::TcpListener::bind(("127.0.0.1", port)) {
            Ok(l) => {
                bound = Some(l);
                break;
            }
            Err(_) => tokio::time::sleep(std::time::Duration::from_millis(20)).await,
        }
    }
    assert!(
        bound.is_some(),
        "after session drop, port {port} must be re-bindable (proxy task did not abort)"
    );
}

// ── Phase 3c: kernel-enforced egress (macOS only) ─────────────────────────

/// **The Phase 3c headline contract.** Even an ill-behaved binary that
/// completely ignores `HTTPS_PROXY` and tries to open a raw TCP
/// connection to a non-proxy port must be denied by the seatbelt
/// kernel sandbox. This is the security upgrade over Phase 3b: 3b
/// trusts clients to honor env vars; 3c forces them.
///
/// We exercise this with bash's `/dev/tcp/host/port` magic — a
/// pure-bash TCP open that doesn't honor any proxy env var. If the
/// kernel denies the connect (as 3c requires), bash prints an error
/// and the conditional reports `blocked`. If the kernel lets it
/// through, the connect either succeeds (`connected`) or fails
/// because nothing is listening on that port (`other-fail`) — either
/// non-`blocked` outcome means 3c isn't enforcing.
///
/// macOS only: the bwrap backend on Linux can't kernel-enforce port
/// filtering yet (see Phase 3c.1).
#[cfg(target_os = "macos")]
#[tokio::test]
async fn macos_kernel_blocks_direct_tcp_to_non_proxy_port() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let proxy_port = session.proxy.as_ref().expect("proxy spawned").port;

    // Pick a target port that is definitely NOT the proxy port. We
    // don't actually need anything listening there — we're testing the
    // kernel's ability to refuse the syscall, which happens before
    // userspace observes connection success or failure.
    let target_port = if proxy_port == 1 { 2 } else { 1 };

    // bash's `/dev/tcp/host/port` performs a connect(2) at the libc
    // level with no proxy honoring. The redirect uses fd 3 to keep
    // stdout/stderr clean for our parsing.
    let cmd = format!(
        r#"{{"command":"if exec 3<>/dev/tcp/127.0.0.1/{target_port} 2>/dev/null; then echo connected; exec 3<&-; else echo blocked; fi"}}"#,
    );
    let result = session.agent.tools.execute("Bash", &cmd, None, None).await;
    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    assert!(
        combined.contains("blocked"),
        "kernel-enforced sandbox must refuse direct TCP to non-proxy port \
         {target_port} (proxy is on {proxy_port}); got:\n{combined}"
    );
}

/// Companion to `macos_kernel_blocks_direct_tcp_to_non_proxy_port`:
/// the kernel sandbox MUST still permit connections to the actual
/// proxy port, otherwise even well-behaved clients can't reach the
/// filter. Verifies the SBPL allow-rule actually allows the loopback
/// proxy port through.
#[cfg(target_os = "macos")]
#[tokio::test]
async fn macos_kernel_allows_tcp_to_proxy_port() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let proxy_port = session.proxy.as_ref().expect("proxy spawned").port;

    // Connect to the proxy port from inside the sandbox via /dev/tcp.
    // The proxy is up, so connect(2) should succeed; the sandbox must
    // not get in the way.
    let cmd = format!(
        r#"{{"command":"if exec 3<>/dev/tcp/127.0.0.1/{proxy_port} 2>/dev/null; then echo connected; exec 3<&-; else echo blocked; fi"}}"#,
    );
    let result = session.agent.tools.execute("Bash", &cmd, None, None).await;
    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    assert!(
        combined.contains("connected"),
        "kernel-enforced sandbox must permit TCP to the proxy port {proxy_port}; got:\n{combined}"
    );
}

// ── Phase 3c.1: kernel-enforced egress on Linux ──────────────────────────
//
// Mirrors the macOS pair above. The kernel mechanism is different
// (bwrap netns + UDS bridge instead of seatbelt SBPL) but the
// observable contract from inside the sandbox is identical: direct
// TCP to a non-proxy port must fail; TCP to the proxy port must
// succeed.

/// Linux equivalent of `macos_kernel_blocks_direct_tcp_to_non_proxy_port`.
///
/// Catches regressions in the bwrap `--unshare-net` + stage 2 stack:
/// if any of the four pieces (UDS bridge, --unshare-net flag,
/// stage 2 fork, env rewriting) silently degrades, this test will
/// see a `connected` instead of `blocked` and fail.
///
/// Skips when `bwrap` isn't installed (CI may run on a runner without
/// bubblewrap; we handle that elsewhere with a top-level skip).
#[cfg(target_os = "linux")]
#[tokio::test]
async fn linux_kernel_blocks_direct_tcp_to_non_proxy_port() {
    if !koda_sandbox::bwrap::is_available() {
        eprintln!("bwrap not available; skipping Linux kernel-enforcement test");
        return;
    }

    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let proxy_port = session.proxy.as_ref().expect("proxy spawned").port;

    // Sanity-check that the kernel-enforced path is actually wired
    // up for this session. If the UDS bridge didn't spawn (e.g.
    // /tmp not writable in the test sandbox), the assertions below
    // would still pass for the wrong reason — the env-var path also
    // doesn't enable connections to non-allowlisted hosts. So we
    // assert the kernel path is live before testing it.
    let uds = koda_sandbox::bwrap_proxy::proxy_uds_path(std::process::id(), proxy_port);
    assert!(
        uds.exists(),
        "Phase 3c.1.b regression: UDS bridge {} should exist for the kernel-enforced \
         path to activate. Without it, this test would pass spuriously via the \
         env-var fallback.",
        uds.display()
    );

    let target_port = if proxy_port == 1 { 2 } else { 1 };

    // bash's /dev/tcp/host/port performs a connect(2) at libc level
    // with no proxy honoring — same probe used in the macOS test.
    // We invoke bash explicitly because Ubuntu's /bin/sh is dash,
    // which doesn't implement the /dev/tcp magic.
    let cmd = format!(
        r#"{{"command":"bash -c 'if exec 3<>/dev/tcp/127.0.0.1/{target_port} 2>/dev/null; then echo connected; exec 3<&-; else echo blocked; fi'"}}"#,
    );
    let result = session.agent.tools.execute("Bash", &cmd, None, None).await;
    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    assert!(
        combined.contains("blocked"),
        "kernel-enforced sandbox must refuse direct TCP to non-proxy port \
         {target_port} (proxy is on {proxy_port}); got:\n{combined}"
    );
}

/// Linux equivalent of `macos_kernel_allows_tcp_to_proxy_port`.
///
/// In the bwrap+stage2 design, "the proxy port" inside the sandbox is
/// the *in-netns ephemeral port* stage 2 binds (which then bridges
/// through the UDS to the real host proxy). The user command sees
/// `HTTPS_PROXY=http://127.0.0.1:NEW_PORT` because stage 2 rewrote
/// it. So this test parses the rewritten env var rather than using
/// the host port directly — that's both more honest and more
/// regression-resistant (a broken rewriter would surface here).
#[cfg(target_os = "linux")]
#[tokio::test]
async fn linux_kernel_allows_tcp_to_proxy_port() {
    if !koda_sandbox::bwrap::is_available() {
        eprintln!("bwrap not available; skipping Linux kernel-allow test");
        return;
    }

    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;
    let proxy_port = session.proxy.as_ref().expect("proxy spawned").port;

    let uds = koda_sandbox::bwrap_proxy::proxy_uds_path(std::process::id(), proxy_port);
    if !uds.exists() {
        eprintln!(
            "UDS bridge {} not present — kernel-enforced path inactive; skipping",
            uds.display()
        );
        return;
    }

    // Read the (stage-2-rewritten) HTTPS_PROXY from inside the
    // sandbox, extract the port via bash parameter expansion
    // (`${var##*:}` = strip everything up to the last `:`), then
    // verify /dev/tcp can reach it. Bash explicitly because
    // Ubuntu's /bin/sh is dash (no /dev/tcp, no `${var##*:}`).
    let cmd = r#"{"command":"bash -c 'port=\"${HTTPS_PROXY##*:}\"; if exec 3<>/dev/tcp/127.0.0.1/$port 2>/dev/null; then echo connected; exec 3<&-; else echo blocked; fi'"}"#;
    let result = session.agent.tools.execute("Bash", cmd, None, None).await;
    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    assert!(
        combined.contains("connected"),
        "kernel-enforced sandbox must permit TCP to the in-netns proxy port \
         (rewritten by stage 2 from host port {proxy_port}); got:\n{combined}"
    );
}

// ── Live curl smoke (opt-in via --ignored) ───────────────────────────────

/// End-to-end denial proof: a Bash invocation that asks curl to fetch
/// a non-allowlisted host must fail with a proxy 403 \u2014 *before* any
/// real DNS lookup or TCP connect to the upstream happens, because the
/// proxy intercepts CONNECT requests and consults the filter first.
///
/// Hermetic in spirit: the upstream `blocked.test` is intentionally
/// non-routable, but with a working proxy we should never get that
/// far. If this test ever starts failing with "DNS failure" or
/// "connection refused" instead of a 4xx from the proxy, it means
/// curl bypassed the proxy entirely.
///
/// Marked `#[ignore]` only because it needs `curl` on PATH \u2014 nearly
/// universal but not quite guaranteed on stripped-down CI containers.
/// Run with: `cargo test -p koda-core --test builtin_proxy_e2e_test -- --ignored`
#[tokio::test]
#[ignore = "needs curl; run with --ignored"]
async fn curl_to_blocked_host_returns_403_via_proxy() {
    let env = Env::builder()
        .provider_type(ProviderType::Mock)
        .build()
        .await;
    let session = make_real_session(&env).await;

    // `blocked.test` is in the IETF .test TLD (RFC 6761) so it MUST
    // not resolve. If curl honors HTTPS_PROXY the proxy will deny at
    // CONNECT before any DNS lookup happens.
    let result = session
        .agent
        .tools
        .execute(
            "Bash",
            r#"{"command":"curl --max-time 5 -sS -o /dev/null -w '%{http_code}\\n' https://blocked.test/ 2>&1; echo exit=$?"}"#,
            None,
            None,
        )
        .await;

    let combined = format!(
        "{}\n{}",
        result.output,
        result.full_output.as_deref().unwrap_or("")
    );
    // curl with proxy denial typically prints "Received HTTP code 403
    // from proxy after CONNECT" and exits 56. Either of those signals
    // the integration is working.
    assert!(
        combined.contains("403") || combined.contains("CONNECT") || combined.contains("proxy"),
        "expected proxy denial signal in output; got:\n{combined}"
    );
    assert!(
        !combined.contains("exit=0"),
        "curl must NOT succeed (host is blocked); output:\n{combined}"
    );
}