astrid-capsule 0.8.0

Core runtime management for User-Space Capsules in Astrid OS
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
//! Shared state for wasmtime Component Model host functions.
//!
//! [`HostState`] is used as the `T` in `wasmtime::Store<T>` and is directly
//! accessible to all host trait implementations generated by `bindgen!`.

use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;

use tokio::sync::{Semaphore, mpsc, watch};
use tokio_util::sync::CancellationToken;

use crate::capsule::CapsuleId;
use astrid_core::uplink::{InboundMessage, MAX_UPLINKS_PER_CAPSULE, UplinkDescriptor};
use astrid_storage::ScopedKvStore;
use astrid_storage::secret::SecretStore;

/// An active network stream owned by a capsule.
///
/// Holds either an inbound Unix-socket connection (accepted from the
/// capsule's pre-provisioned listener) or an outbound TCP connection
/// (opened via `net.connect-tcp`). The read/write/close host fns
/// dispatch on the variant; both variants implement
/// [`tokio::io::AsyncRead`] / [`tokio::io::AsyncWrite`], so the inner
/// framing logic is shared.
#[derive(Debug, Clone)]
pub enum NetStream {
    /// Inbound Unix-domain socket accepted from the kernel's listener.
    Unix(Arc<tokio::sync::Mutex<tokio::net::UnixStream>>),
    /// Outbound TCP connection opened via `net.connect-tcp`.
    Tcp(TcpStreamSlot),
}

/// Per-stream state for an outbound TCP connection.
///
/// Holds the raw [`tokio::net::TcpStream`] (behind an `Arc<Mutex<…>>` so
/// host fns that all take `&mut HostState` can each lock it cooperatively)
/// plus the std-style configurable socket options that the WIT surface
/// exposes — read/write timeouts. `TCP_NODELAY` and `TTL` are not cached
/// here; they live on the underlying socket and are read back via
/// `TcpStream::nodelay()` / `ttl()` on demand.
#[derive(Debug, Clone)]
pub struct TcpStreamSlot {
    /// The connected TCP socket. Shared via `Arc<Mutex<…>>` so the
    /// existing `net-read` / `net-write` dispatchers can clone the
    /// outer `NetStream` and then lock the inner stream.
    pub stream: Arc<tokio::sync::Mutex<tokio::net::TcpStream>>,
    /// Read timeout applied to each `net-read-bytes` / `net-peek` call.
    /// `None` → use the host default (~50 ms poll for `Pending` parity
    /// with `net-read`).
    pub read_timeout: Option<std::time::Duration>,
    /// Write timeout applied to each `net-write-bytes` call.
    /// `None` → no extra timeout beyond the host cancellation token.
    pub write_timeout: Option<std::time::Duration>,
}

/// The lifecycle phase a capsule is currently executing in.
///
/// Set on [`HostState`] during `#[install]` or `#[upgrade]` dispatch.
/// The `astrid_elicit` host function checks this field and rejects calls
/// outside of a lifecycle phase.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LifecyclePhase {
    /// First-time installation.
    Install,
    /// Upgrading from a previous version.
    Upgrade,
}

/// Metadata for an interceptor binding declared in `Capsule.toml`.
///
/// Under the new per-domain WIT, interceptors are NOT capsule-side
/// `Resource<Subscription>` handles — the kernel matches incoming IPC
/// messages against interceptor patterns and dispatches them directly
/// to `astrid-hook-trigger`. This struct exists purely to:
///
/// 1. Let the guest enumerate what it's subscribed to via
///    `astrid:ipc/host.get-interceptor-bindings` (debugging /
///    tooling), and
/// 2. Anchor the registry-side routing table used by the run-loop
///    dispatcher.
///
/// `handle_id` is informational only — the guest cannot turn it
/// back into a `Resource<Subscription>` and the kernel does not key
/// any storage on it.
#[derive(Debug, Clone, serde::Serialize)]
pub struct InterceptorHandle {
    /// Enumeration index; informational only under the new ABI.
    pub handle_id: u64,
    /// The interceptor action name from the manifest.
    pub action: String,
    /// The event topic this interceptor subscribes to.
    pub topic: String,
}

use crate::engine::wasm::host::process::ProcessTracker;
use crate::security::CapsuleSecurityGate;

/// A principal-scoped filesystem mount: physical root, VFS, and capability handle.
///
/// The three are bound together — the [`DirHandle`](astrid_capabilities::DirHandle)
/// is confined to the specific [`Vfs`](astrid_vfs::Vfs) instance, and both are rooted
/// at `root`. They must always be installed and cleared as a unit, so callers
/// cannot accidentally pair an invocation-scoped VFS with a load-time handle
/// (which would break capability confinement).
///
/// `Clone` so the Store pool can hand the same principal mount to each of a
/// capsule's N pooled instances — all fields (`PathBuf`, `Arc<dyn Vfs>`,
/// `DirHandle`) share or copy cheaply (issue #816).
#[derive(Clone)]
pub struct PrincipalMount {
    /// Canonical physical directory this mount is rooted at.
    pub root: PathBuf,
    /// VFS wrapping `root`. Direct [`HostVfs`](astrid_vfs::HostVfs) —
    /// writes are permanent (no OverlayVfs CoW layer).
    pub vfs: Arc<dyn astrid_vfs::Vfs>,
    /// Capability handle that confines access to `root`.
    pub handle: astrid_capabilities::DirHandle,
}

impl std::fmt::Debug for PrincipalMount {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("PrincipalMount")
            .field("root", &self.root)
            .field("handle", &self.handle)
            .finish_non_exhaustive()
    }
}

/// Shared state accessible to all host functions via `Store<HostState>`.
pub struct HostState {
    /// WASI context for Component Model WASI imports (clocks, random, etc.).
    pub wasi_ctx: wasmtime_wasi::WasiCtx,
    /// Resource table for WASI resource types (streams, descriptors, etc.).
    pub resource_table: wasmtime::component::ResourceTable,
    /// Per-Store memory limiter: enforces the per-invocation linear-memory
    /// ceiling **and** records the invoking principal's peak into the shared
    /// memory ledger. Wired via `Store::limiter`; re-targeted per invocation
    /// (cap + attributee principal) since a pooled Store crosses principals.
    /// Replaces a plain `wasmtime::StoreLimits`.
    pub store_meter: crate::memory_ledger::StoreMemoryMeter,
    /// The principal this capsule is running on behalf of.
    pub principal: astrid_core::principal::PrincipalId,
    /// The plugin this state belongs to.
    pub capsule_id: CapsuleId,
    /// Per-capsule log file at `home/{principal}/.local/log/{capsule}.log`.
    /// Capsule `astrid_log` output writes here instead of the system tracing
    /// subscriber. `None` if the log file couldn't be opened.
    pub capsule_log: Option<Arc<std::sync::Mutex<std::fs::File>>>,
    /// Context of the current caller (set per-invocation by the dispatcher).
    pub caller_context: Option<astrid_events::ipc::IpcMessage>,
    /// `true` while a kernel-dispatched interceptor is executing.
    ///
    /// Set by `WasmEngine::invoke_interceptor` around the typed-func
    /// call and read by `install_recv_invocation_context` to detect
    /// that `caller_context` is owned by the *outer* interceptor
    /// invocation, not by recv. Without this flag a nested
    /// `ipc::recv` that drained a message from a different publisher
    /// would rewrite the interceptor's caller — making subsequent
    /// `publish_json` calls stamp the wrong principal. The outer
    /// invocation's principal owns every outbound publish for the
    /// duration of the interceptor.
    pub interceptor_active: bool,
    /// The unique session UUID for this plugin's execution state.
    pub capsule_uuid: uuid::Uuid,
    /// Workspace root directory (file operations are confined here).
    pub workspace_root: PathBuf,
    /// The Virtual File System (VFS) instance for this plugin.
    pub vfs: Arc<dyn astrid_vfs::Vfs>,
    /// The root capability handle for the VFS.
    pub vfs_root_handle: astrid_capabilities::DirHandle,
    /// Load-time principal home mount (`home://` paths). `None` if the
    /// principal's home directory does not exist on disk.
    pub home: Option<PrincipalMount>,
    /// Load-time principal tmp mount (`/tmp/` paths, backed by
    /// `~/.astrid/home/{principal}/.local/tmp/`). `None` if unavailable.
    pub tmp: Option<PrincipalMount>,
    /// Concrete reference to the [`OverlayVfs`](astrid_vfs::OverlayVfs) for
    /// commit/rollback operations. `None` for non-overlay VFS configurations
    /// (e.g., tests with a plain `HostVfs`).
    pub overlay_vfs: Option<Arc<astrid_vfs::OverlayVfs>>,
    /// Reference to the ephemeral upper directory to keep it alive for the session.
    pub upper_dir: Option<Arc<tempfile::TempDir>>,
    /// Plugin-scoped KV store (`plugin:{capsule_id}` namespace).
    pub kv: ScopedKvStore,
    /// Per-invocation KV store scoped to a different principal.
    ///
    /// Set by `WasmEngine::invoke_interceptor` when the IPC message's
    /// principal differs from `self.principal`. Host functions use
    /// `invocation_kv.as_ref().unwrap_or(&kv)` to transparently scope
    /// reads/writes to the calling principal.
    pub invocation_kv: Option<ScopedKvStore>,
    /// Per-invocation home mount for the calling principal.
    ///
    /// Populated by `WasmEngine::invoke_interceptor` when the IPC message's
    /// principal differs from `self.principal`. When set, overrides `home`
    /// for scheme resolution and security gate checks. Cleared on exit.
    pub invocation_home: Option<PrincipalMount>,
    /// Per-invocation tmp mount for the calling principal. Same lifecycle
    /// as `invocation_home`.
    pub invocation_tmp: Option<PrincipalMount>,
    /// Per-invocation secret store scoped to the calling principal.
    ///
    /// Set by `WasmEngine::invoke_interceptor` when the IPC message's
    /// principal differs from `self.principal`. When set, overrides
    /// `secret_store` so `has_secret` and future secret host fns scope
    /// reads/writes to the invoking principal. Cleared on exit.
    pub invocation_secret_store: Option<Arc<dyn SecretStore>>,
    /// Per-invocation capsule log file scoped to the calling principal.
    ///
    /// Same lifecycle as [`invocation_secret_store`](Self::invocation_secret_store).
    /// When set, `astrid_log` writes to the invoking principal's
    /// `~/.astrid/home/{principal}/.local/log/{capsule}/{date}.log` instead
    /// of the capsule owner's.
    pub invocation_capsule_log: Option<Arc<std::sync::Mutex<std::fs::File>>>,
    /// Per-invocation quota profile for the invoking principal.
    ///
    /// Set by [`WasmEngine::invoke_interceptor`](super::WasmEngine::invoke_interceptor)
    /// after resolving the invoking principal's `PrincipalProfile` through
    /// the shared [`PrincipalProfileCache`](crate::profile_cache::PrincipalProfileCache).
    /// Host functions that gate on per-principal sub-budgets (IPC
    /// throughput, background processes, HTTP streams) read this through
    /// [`effective_profile`](Self::effective_profile). Cleared on exit.
    ///
    /// Resolved on BOTH per-invocation paths: the dispatcher-driven
    /// interceptor path (`invoke_interceptor`) and the guest-pulled
    /// `ipc::recv` path ([`install_recv_invocation_context`](Self::install_recv_invocation_context),
    /// via [`profile_cache`](Self::profile_cache)). Either way the *default*
    /// quotas apply when the principal is unconfigured, by one of two
    /// mechanisms: with no cache (tests / single-tenant) this stays `None`
    /// and [`effective_profile`](Self::effective_profile) substitutes the
    /// process-global default; with a cache, a principal that has no profile
    /// file resolves to `Some(PrincipalProfile::default())` (a missing file is
    /// not an error — see [`PrincipalProfile::load`](astrid_core::profile::PrincipalProfile::load)).
    pub invocation_profile: Option<Arc<astrid_core::profile::PrincipalProfile>>,
    /// Shared profile-cache handle, used by the `ipc::recv` path to resolve
    /// the invoking principal's [`PrincipalProfile`](astrid_core::profile::PrincipalProfile)
    /// into [`invocation_profile`](Self::invocation_profile).
    ///
    /// The interceptor path resolves the profile in
    /// [`WasmEngine::invoke_interceptor`](super::WasmEngine::invoke_interceptor)
    /// and installs it directly; the guest-pulled `ipc::recv` path has no
    /// dispatcher frame to do that, so
    /// [`install_recv_invocation_context`](Self::install_recv_invocation_context)
    /// resolves through this handle. `None` in tests / single-tenant — the
    /// per-principal quota fields then fall back to the default profile.
    pub profile_cache: Option<Arc<crate::profile_cache::PrincipalProfileCache>>,
    /// Per-invocation env overlay for the invoking principal.
    ///
    /// Loaded from
    /// `$ASTRID_HOME/home/{principal}/.config/env/{capsule_id}.env.json`
    /// when the dispatcher establishes a per-invocation context whose
    /// principal differs from the capsule's load-time principal.
    /// `get_config` checks this overlay before falling back to
    /// [`config`](Self::config) (the manifest defaults loaded at
    /// capsule boot).
    ///
    /// Without this overlay, the gateway's
    /// `POST /api/capsules/{id}/env/{field}` route — which writes to
    /// the per-principal path above — was effectively write-only for
    /// every principal other than `default`: the capsule's
    /// `env::var(...)` reads still returned the manifest's
    /// load-time default. Most visibly, an operator setting
    /// `base_url = http://localhost:1234` on the openai-compat
    /// capsule for a gateway-minted bearer would see their LLM
    /// request still hit `api.openai.com` (the manifest default).
    pub invocation_env_overlay: Option<HashMap<String, String>>,
    /// System Event Bus for IPC publish/subscribe.
    pub event_bus: astrid_events::EventBus,
    /// Rate limiter for IPC message publishing.
    ///
    /// `Arc` so a capsule's pool of `Store`s shares one limiter — otherwise
    /// each pooled `Store` would get its own budget and the per-capsule
    /// throughput cap would be `pool_size`× too loose (issue #816). The
    /// limiter is internally concurrent (`DashMap`), so shared `&self` access
    /// is contention-free.
    pub ipc_limiter: Arc<astrid_events::ipc::IpcRateLimiter>,
    /// Plugin configuration from the manifest.
    ///
    /// Holds only **non-secret** env values (the `[env]` declarations
    /// in `Capsule.toml` whose `type` is not `"secret"` — base URLs,
    /// model names, log levels, etc.). Secret-typed keys are
    /// resolved live in [`crate::engine::wasm::host::sys`] through
    /// `resolve_secret` (file-per-secret store, see
    /// [`astrid_storage::FileSecretStore`]) instead of being preloaded
    /// here, so plaintext secret material never sits in
    /// `wasm_config`'s hot memory.
    pub config: HashMap<String, serde_json::Value>,
    /// Manifest-declared secret-typed env keys. Populated at load
    /// time from `manifest.env` entries with `type = "secret"`. Used
    /// by [`crate::engine::wasm::host::sys::Host::get_config`] to
    /// route reads through the file-per-secret store instead of
    /// `config`. The scope (per-agent vs host-wide) is an
    /// operator-side decision at `astrid secret set` time, not a
    /// manifest declaration, so the kernel always tries per-agent
    /// first and falls through to host-wide regardless of where the
    /// operator stored the value.
    pub secret_env: std::collections::HashSet<String>,
    /// IPC topic patterns this capsule is allowed to publish to.
    /// Empty means DENY ALL (fail-closed).
    pub ipc_publish_patterns: Vec<String>,
    /// IPC topic patterns this capsule is allowed to subscribe to.
    /// Empty means DENY ALL (fail-closed).
    pub ipc_subscribe_patterns: Vec<String>,
    /// Optional security gate for gated operations (HTTP, file I/O).
    pub security: Option<Arc<dyn CapsuleSecurityGate>>,
    /// Hook manager for executing user scripts synchronously via airlock.
    pub hook_manager: Option<Arc<dyn std::any::Any + Send + Sync>>,
    /// Shared capsule registry for `hooks::trigger` fan-out dispatch.
    ///
    /// When set, the `astrid_trigger_hook` host function can iterate the
    /// registry to find capsules with matching interceptors, invoke them,
    /// and collect responses. This is the kernel mechanism that WASM
    /// capsules use to dispatch hooks to other capsules.
    pub capsule_registry: Option<Arc<tokio::sync::RwLock<crate::registry::CapsuleRegistry>>>,
    /// Tokio runtime handle for bridging async operations in sync host functions.
    pub runtime_handle: tokio::runtime::Handle,
    /// Whether the plugin manifest declares `CapsuleCapability::Uplink`.
    ///
    /// Used to gate `astrid_register_uplink` — only uplink plugins
    /// are allowed to register uplinks.
    pub has_uplink_capability: bool,
    /// The calling capsule's OWN held capability NAMES, precomputed at load
    /// from `manifest.capabilities` via
    /// [`CapabilitiesDef::held_names`](crate::manifest::CapabilitiesDef::held_names).
    ///
    /// Backs `astrid:sys/host.enumerate-capabilities`, which is **infallible**
    /// — so it reads this owned, lock-free snapshot rather than the
    /// `capsule_registry` (whose lookup can fail with `registry-unavailable`,
    /// the failure mode `check-capsule-capability` carries but `enumerate`
    /// must not). Capsule capabilities are fixed at load (the grant/revoke
    /// model is principal-scoped, a separate axis), so a snapshot taken once
    /// is correct for the capsule's whole lifetime and across the pool.
    pub capability_names: Vec<String>,
    /// Whether this capsule's OWNER principal holds `audit:read_all`,
    /// resolved at LOAD the PRIVILEGED way (against the profile cache +
    /// live group config — **not** the manifest, unlike
    /// [`has_uplink_capability`](Self::has_uplink_capability) which is read
    /// straight off `manifest.capabilities.uplink`).
    ///
    /// Governs the audit-topic subscription seam: `true` ⇒ a subscription
    /// covering `astrid.v1.audit.entry` is the unscoped firehose (every
    /// principal's entries, as before); `false` (the default) ⇒ the
    /// subscription is route-scoped to the owner principal so it observes
    /// only its own audit entries. Fail-secure: `false`. A capsule cannot
    /// flip this via its `Capsule.toml` — it chooses neither its load
    /// principal nor that principal's operator-owned capabilities.
    pub audit_firehose: bool,
    /// Sender for inbound messages from uplink plugins.
    ///
    /// Set during plugin loading when the manifest declares
    /// [`CapsuleCapability::Uplink`](crate::CapsuleCapability). Feeds into
    /// the gateway's inbound router.
    pub inbound_tx: Option<mpsc::Sender<InboundMessage>>,
    /// Uplinks registered by the WASM guest via `astrid_register_uplink`.
    pub registered_uplinks: Vec<UplinkDescriptor>,
    /// Optional natively bound unix listener.
    pub cli_socket_listener: Option<Arc<tokio::sync::Mutex<tokio::net::UnixListener>>>,
    /// Active lifecycle phase, if any. `None` during normal runtime.
    /// Set to `Some(Install)` or `Some(Upgrade)` during lifecycle dispatch.
    /// Gates the `astrid_elicit` host function.
    pub lifecycle_phase: Option<LifecyclePhase>,
    /// Secret store for capsule credentials (keychain with KV fallback).
    pub secret_store: Arc<dyn SecretStore>,
    /// Readiness signal sender for run-loop capsules.
    ///
    /// When the WASM guest calls `astrid_signal_ready`, the host sends `true`
    /// on this channel. The kernel waits on the corresponding receiver before
    /// loading dependent capsules.
    pub ready_tx: Option<watch::Sender<bool>>,
    /// Bounded-concurrency semaphore for **blocking** host calls — the ones
    /// that `block_in_place` + `block_on` and pin a tokio worker for the whole
    /// permit-held wait (KV, identity, sys, fs, the net/process security gates,
    /// DNS, sockets). Sized to roughly `cores - 2` so blocking host work cannot
    /// starve the tokio scheduler. Created via
    /// [`default_blocking_semaphore`](HostState::default_blocking_semaphore);
    /// override via `[capsule].host_blocking_concurrency`.
    pub blocking_semaphore: Arc<Semaphore>,
    /// Bounded-concurrency semaphore for **async-I/O** host calls — the ones
    /// that `.await` real I/O directly and free the tokio worker while pending
    /// (HTTP request/stream, `ipc::recv`). Sized much larger than
    /// [`blocking_semaphore`](Self::blocking_semaphore) and clamped by the
    /// process file-descriptor limit, since each in-flight call may hold a
    /// socket — this is the outbound-throughput gate the LLM path rides on
    /// (`astrid#816`). Created via
    /// [`default_io_semaphore`](HostState::default_io_semaphore); override via
    /// `[capsule].host_io_concurrency`.
    pub io_semaphore: Arc<Semaphore>,
    /// Cooperative cancellation token for long-running host function calls.
    ///
    /// Triggered during capsule unload to unblock every blocking host
    /// fn that races it: `Subscription.recv`, `elicit`, the various
    /// listener `accept` paths, `connect-tcp`, `read`/`read-bytes`/
    /// `write-bytes` on `tcp-stream`, `read-chunk` on `http-stream`,
    /// `pollable.block`, `poll.poll`, `sleep-ns`, and the `wait` /
    /// `spawn` paths on `process`. Streams (`astrid:io/streams`)
    /// short-circuit with `Closed` when this fires; pollables
    /// short-circuit with `Cancelled`.
    pub cancel_token: CancellationToken,
    /// Session token for authenticating CLI socket connections. Only set for
    /// the CLI proxy capsule (which has `net_bind` capability).
    pub session_token: Option<std::sync::Arc<astrid_core::session_token::SessionToken>>,
    /// Interceptor binding metadata, populated during `WasmEngine::load()`
    /// from `[[interceptor]]` entries in `Capsule.toml`. The kernel
    /// matches incoming IPC messages against these patterns and calls
    /// `astrid-hook-trigger` directly — these handles are not
    /// `Resource<Subscription>` references and the guest cannot
    /// consume from them. See [`InterceptorHandle`] for the metadata
    /// shape and `astrid:ipc/host.get-interceptor-bindings` for the
    /// guest-facing accessor.
    pub interceptor_handles: Vec<InterceptorHandle>,
    /// Shared allowance store for capsule-level approval requests.
    ///
    /// When set, the `astrid_request_approval` host function can check
    /// existing allowances before prompting the user. Approvals with
    /// session/always scope create new allowances here.
    pub allowance_store: Option<std::sync::Arc<astrid_approval::AllowanceStore>>,
    /// Shared identity store for resolving platform users to `AstridUserId`.
    ///
    /// When `None`, identity host functions return an error.
    pub identity_store: Option<std::sync::Arc<dyn astrid_storage::IdentityStore>>,
    /// Active HTTP streaming responses, keyed by handle ID.
    ///
    /// Each entry holds a `reqwest::Response` whose body is being consumed
    /// incrementally by `astrid_http_stream_read` plus the principal that
    /// started the stream, so per-principal HTTP-stream caps can be
    /// enforced by creator without blocking other principals on the same
    /// capsule. Cleaned up by `astrid_http_stream_close` or when the
    /// capsule unloads.
    pub active_http_streams: HashMap<u64, crate::engine::wasm::host::http::ActiveHttpStream>,
    /// Monotonic counter for HTTP stream handle IDs.
    /// Starts at 1 (0 reserved as sentinel).
    pub next_http_stream_id: u64,
    /// Tracks active child process PIDs for cancellation.
    ///
    /// Shared with the cancel listener background task. The spawn host function
    /// registers/unregisters PIDs; the listener calls `cancel_all()` when a
    /// `tool.v1.request.cancel` event arrives.
    pub process_tracker: Arc<ProcessTracker>,
    /// Host-owned registry for the PERSISTENT tier of `astrid:process`.
    ///
    /// Cloned (`Arc`) into every pooled `HostState` of a capsule exactly like
    /// `process_tracker`, so a `process-id` minted on one instance is
    /// reattachable from another after a pool reset. Persistent children live
    /// here — off the wasmtime resource table — which is what lets them
    /// survive instance churn.
    pub persistent_processes: Arc<crate::engine::wasm::host::process::PersistentProcessRegistry>,
    /// Live count of `NetStream` entries currently in the resource table.
    /// Maintained alongside `ResourceTable` insertions / drops so the
    /// `MAX_ACTIVE_STREAMS` gate is O(1) instead of iterating every
    /// resource (the table may hold hundreds of pollables / errors /
    /// http handles unrelated to net). Single-threaded: wasmtime
    /// stores are owned by exactly one OS thread.
    pub net_stream_count: usize,
    /// Live count of `SubscriptionEntry` entries. Same rationale as
    /// `net_stream_count`.
    pub subscription_count: usize,
    /// Live count of `ManagedProcess` entries — overall.
    pub process_count_total: usize,
    /// Per-creator-principal count of `ManagedProcess` entries. The
    /// process-spawn gate needs to enforce per-principal sub-budgets
    /// without iterating the whole resource table.
    pub process_count_by_principal:
        std::collections::HashMap<astrid_core::principal::PrincipalId, usize>,
    /// Bound run-loop CPU-bound signal: set `true` by the ipc `recv` host fn
    /// each time the guest blocks on recv, read + cleared by the run-loop's
    /// epoch-deadline callback once per window.
    ///
    /// This is the cooperative-yield signal that distinguishes a legitimate
    /// recv/accept loop (sets it every iteration → never trapped) from a
    /// no-recv spinner (never sets it → interrupt-trapped after
    /// `MAX_NO_YIELD_WINDOWS`). Only the dedicated, mutex-guarded run-loop
    /// Store reads it; pooled/lifecycle Stores leave it inert. Single Store =
    /// single thread, so the callback and the host fn never race.
    pub recv_yielded: bool,
    /// Bound run-loop CPU-bound counter: consecutive epoch windows in which
    /// the guest burned CPU without a single `recv` (`recv_yielded` stayed
    /// false). The run-loop epoch callback increments it each no-recv window
    /// and traps the guest once it reaches `MAX_NO_YIELD_WINDOWS`; a recv
    /// resets it to 0. Inert for pooled/lifecycle Stores.
    pub no_yield_windows: u32,
}

impl wasmtime_wasi::WasiView for HostState {
    fn ctx(&mut self) -> wasmtime_wasi::WasiCtxView<'_> {
        wasmtime_wasi::WasiCtxView {
            ctx: &mut self.wasi_ctx,
            table: &mut self.resource_table,
        }
    }
}

impl HostState {
    /// Register a uplink descriptor (called from the host function).
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The per-capsule uplink limit ([`MAX_UPLINKS_PER_CAPSULE`]) has been reached.
    /// - A uplink with the same name and platform already exists.
    pub fn register_uplink(&mut self, descriptor: UplinkDescriptor) -> Result<(), &'static str> {
        if self.registered_uplinks.len() >= MAX_UPLINKS_PER_CAPSULE {
            return Err("uplink registration limit reached");
        }
        // Reject duplicate name+platform combinations
        let duplicate = self
            .registered_uplinks
            .iter()
            .any(|c| c.name == descriptor.name && c.platform == descriptor.platform);
        if duplicate {
            return Err("duplicate uplink name and platform");
        }
        self.registered_uplinks.push(descriptor);
        Ok(())
    }

    /// Return the registered uplinks.
    #[must_use]
    pub fn uplinks(&self) -> &[UplinkDescriptor] {
        &self.registered_uplinks
    }

    /// Default **blocking** host-call semaphore (host-derived).
    ///
    /// Sized to roughly `cores - 2`, reserving headroom for the tokio scheduler
    /// and event dispatch so blocking host work cannot starve them. See
    /// [`limits::host_blocking_concurrency_default`](super::limits::host_blocking_concurrency_default).
    /// Used by the lifecycle-hook and test paths; the pooled interceptor path
    /// sizes from the resolved [`CapsuleRuntimeLimits`](super::limits::CapsuleRuntimeLimits).
    #[must_use]
    pub fn default_blocking_semaphore() -> Arc<Semaphore> {
        Arc::new(Semaphore::new(
            super::limits::host_blocking_concurrency_default(),
        ))
    }

    /// Default **async-I/O** host-call semaphore (host-derived, fd-clamped).
    ///
    /// Sized much larger than the blocking semaphore and clamped by the process
    /// file-descriptor limit. See
    /// [`limits::host_io_concurrency_default`](super::limits::host_io_concurrency_default).
    #[must_use]
    pub fn default_io_semaphore() -> Arc<Semaphore> {
        Arc::new(Semaphore::new(super::limits::host_io_concurrency_default()))
    }

    /// Set the inbound message sender.
    pub fn set_inbound_tx(&mut self, tx: mpsc::Sender<InboundMessage>) {
        self.inbound_tx = Some(tx);
    }

    /// Return the KV namespace for this capsule scoped to its principal.
    ///
    /// Format: `{principal}:capsule:{capsule_id}`. This is the same namespace
    /// used when the `ScopedKvStore` was created, but exposed here for cases
    /// where host functions need to construct the namespace dynamically.
    #[must_use]
    pub fn principal_kv_namespace(&self) -> String {
        format!("{}:capsule:{}", self.principal, self.capsule_id)
    }

    /// Return the effective KV store for the current invocation.
    ///
    /// Uses `invocation_kv` if set (different principal), falls back to
    /// the capsule's default `kv` store.
    #[must_use]
    pub fn effective_kv(&self) -> &ScopedKvStore {
        #[cfg(debug_assertions)]
        self.debug_assert_invocation_field_set(self.invocation_kv.is_some(), "invocation_kv");
        self.invocation_kv.as_ref().unwrap_or(&self.kv)
    }

    /// Debug-only consistency check: when the caller's principal differs from
    /// the capsule owner's, the corresponding `invocation_*` field **must** be
    /// populated. Otherwise the accessor silently returns the owner's resource
    /// and the invoking principal's reads/writes leak to the owner's scope.
    ///
    /// In practice the setup in [`WasmEngine::invoke_interceptor`] guarantees
    /// `invocation_kv` and `invocation_secret_store` are populated whenever
    /// the principal mismatches (the only failure path is
    /// `ScopedKvStore::with_namespace` rejecting an empty/null-byte namespace,
    /// which our format string never produces). This assertion catches any
    /// regression that breaks that invariant in debug builds.
    ///
    /// Not applied to `invocation_home` / `invocation_tmp` / `invocation_capsule_log`:
    /// those legitimately stay `None` for unregistered principals (the VFS
    /// bundle registration gate and log-open registration gate).
    #[cfg(debug_assertions)]
    fn debug_assert_invocation_field_set(&self, is_set: bool, field_name: &str) {
        let principal_mismatches = self
            .caller_context
            .as_ref()
            .and_then(|m| m.principal.as_deref())
            .and_then(|p| astrid_core::PrincipalId::new(p).ok())
            .is_some_and(|p| p != self.principal);
        if principal_mismatches && !is_set {
            debug_assert!(
                false,
                "invocation principal differs from capsule owner ({owner}) but {field_name} is None — \
                 effective_* accessor would fall back to the owner's resource, leaking reads/writes",
                owner = self.principal,
            );
        }
    }

    /// Return the effective home mount for the current invocation.
    ///
    /// Prefers `invocation_home` (set when serving a different principal)
    /// over `home` (set at capsule load for the owning principal).
    #[must_use]
    pub fn effective_home(&self) -> Option<&PrincipalMount> {
        self.invocation_home.as_ref().or(self.home.as_ref())
    }

    /// Return the effective tmp mount for the current invocation. Same
    /// precedence as [`effective_home`](Self::effective_home).
    #[must_use]
    pub fn effective_tmp(&self) -> Option<&PrincipalMount> {
        self.invocation_tmp.as_ref().or(self.tmp.as_ref())
    }

    /// Owned copy of the effective home root path.
    ///
    /// Convenience for host fs functions that need to pass the principal
    /// home into a security-gate check running inside an `async move` block.
    #[must_use]
    pub fn effective_home_root_buf(&self) -> Option<PathBuf> {
        self.effective_home().map(|m| m.root.clone())
    }

    /// Return the effective secret store for the current invocation.
    ///
    /// Prefers `invocation_secret_store` (set when serving a different
    /// principal) over the load-time `secret_store`.
    #[must_use]
    pub fn effective_secret_store(&self) -> &Arc<dyn SecretStore> {
        #[cfg(debug_assertions)]
        self.debug_assert_invocation_field_set(
            self.invocation_secret_store.is_some(),
            "invocation_secret_store",
        );
        self.invocation_secret_store
            .as_ref()
            .unwrap_or(&self.secret_store)
    }

    /// Return the effective capsule log file for the current invocation.
    ///
    /// Same precedence as [`effective_secret_store`](Self::effective_secret_store).
    /// Returns `None` if neither the invocation nor load-time log is open.
    #[must_use]
    pub fn effective_capsule_log(&self) -> Option<&Arc<std::sync::Mutex<std::fs::File>>> {
        self.invocation_capsule_log
            .as_ref()
            .or(self.capsule_log.as_ref())
    }

    /// Return the principal whose budget should be charged for host-fn
    /// side-effects in the current invocation.
    ///
    /// Prefers the invoking principal from [`caller_context`](Self::caller_context)
    /// (set per-invocation by [`WasmEngine::invoke_interceptor`](super::WasmEngine::invoke_interceptor))
    /// and falls back to the capsule owner's [`principal`](Self::principal) when
    /// no caller is in scope — load-time host calls, tests, and daemons'
    /// self-triggered paths run on the owner's budget, matching the VFS/KV
    /// `effective_*` accessors.
    #[must_use]
    pub fn effective_principal(&self) -> astrid_core::principal::PrincipalId {
        self.caller_context
            .as_ref()
            .and_then(|m| m.principal.as_deref())
            .and_then(|p| astrid_core::principal::PrincipalId::new(p).ok())
            .unwrap_or_else(|| self.principal.clone())
    }

    /// Return the effective quota profile for the current invocation.
    ///
    /// Prefers `invocation_profile` (set by
    /// [`WasmEngine::invoke_interceptor`](super::WasmEngine::invoke_interceptor)
    /// for the calling principal) and falls back to the process-global
    /// [`PrincipalProfile::default_ref`](astrid_core::profile::PrincipalProfile::default_ref)
    /// when no invocation profile is in scope — load-time host calls, tests,
    /// and single-tenant deployments all legitimately run without one.
    ///
    /// The fallback path intentionally does **not** substitute the capsule
    /// owner's profile: that would leak the owner's quotas to every
    /// unauthenticated call path. Using `Default` preserves single-tenant
    /// parity while keeping the security invariant honest.
    #[must_use]
    pub fn effective_profile(&self) -> &astrid_core::profile::PrincipalProfile {
        match self.invocation_profile.as_deref() {
            Some(p) => p,
            None => astrid_core::profile::PrincipalProfile::default_ref(),
        }
    }

    /// Install per-invocation context from an inbound IPC message picked
    /// up via [`ipc::Host::ipc_recv`](crate::engine::wasm::host::ipc) /
    /// [`ipc::Host::ipc_poll`].
    ///
    /// Mirrors the principal-isolation setup done in
    /// [`WasmEngine::invoke_interceptor`](super::WasmEngine::invoke_interceptor)
    /// for the dispatcher path, but driven by `recv`/`poll` so that
    /// `run + ipc::recv` capsules (prompt-builder, registry,
    /// context-engine) also stamp publishes with the publisher's
    /// principal and route reads/writes to the invoking principal's
    /// namespaces. Without this hook these capsules silently fall back
    /// to the owner principal (`default` for the standard distro),
    /// breaking chat for any non-default agent: the publish goes out
    /// stamped `default`, downstream interceptors load the wrong KV
    /// namespace, the turn-state phase doesn't match, and the chain
    /// stalls.
    ///
    /// Sets up the subset relevant to publish stamping and per-principal
    /// KV / log routing:
    /// - [`caller_context`](Self::caller_context) — drives both
    ///   [`effective_principal`](Self::effective_principal) and the
    ///   `principal_str` chosen by `publish_inner`.
    /// - [`invocation_kv`](Self::invocation_kv) — per-principal KV
    ///   namespace; falls back to load-time `kv` on failure.
    /// - [`invocation_capsule_log`](Self::invocation_capsule_log) —
    ///   per-principal log file; falls back to load-time `capsule_log`
    ///   when the principal has no home directory yet.
    /// - [`invocation_profile`](Self::invocation_profile) — the publishing
    ///   principal's quota profile (owner included), resolved through
    ///   [`profile_cache`](Self::profile_cache) so per-principal ceilings
    ///   (background-process count, IPC throughput, HTTP streams) apply on
    ///   this path too; falls back to the process-global default on a missing
    ///   cache or failed load.
    ///
    /// Skipped vs the interceptor path (each is independently
    /// recoverable; documenting the gaps so the omissions are
    /// auditable):
    /// - `invocation_home` / `invocation_tmp` / `invocation_secret_store` —
    ///   none of the current run+recv capsules touch home/tmp paths
    ///   or secrets from the recv loop. Add when one starts to.
    /// - `store_meter` — the per-invocation linear-memory ceiling stays the
    ///   capsule owner's; the recv path does not re-target it per publisher
    ///   the way `invoke_interceptor` does. Acceptable because the run+recv
    ///   capsules are shared singletons whose per-call allocation is bounded
    ///   by the bus message-size limits. Re-target when a recv-driven capsule
    ///   needs per-principal memory enforcement.
    pub(crate) fn install_recv_invocation_context(&mut self, msg: &astrid_events::ipc::IpcMessage) {
        // Fast path: if the new message's principal matches whatever
        // we already have installed, keep the existing
        // `invocation_kv` / `invocation_capsule_log` rather than
        // re-opening the namespace and log file. The chat-stack run
        // loop calls this on every recv tick — re-init each time
        // burns I/O and allocations for no behavioural change.
        // An interceptor's caller is owned by the dispatch path
        // (`WasmEngine::invoke_interceptor`), not by recv. Nested
        // `ipc::recv` calls inside an interceptor must NOT overwrite
        // it — otherwise a recv'd message from a different publisher
        // (or the empty-batch clear path below) would silently flip
        // every subsequent `publish_json` away from the principal the
        // interceptor was dispatched under.
        if self.interceptor_active {
            return;
        }

        let new_principal = msg.principal.clone();
        let existing_principal = self
            .caller_context
            .as_ref()
            .and_then(|c| c.principal.clone());
        if new_principal == existing_principal {
            // Refresh the caller context so e.g. topic name / payload
            // tracking stays current, but skip the expensive resets.
            self.caller_context = Some(msg.clone());
            return;
        }

        self.caller_context = Some(msg.clone());

        // The publishing principal, parsed once. Used two different ways
        // below, matching the split in the interceptor path
        // (`invoke_interceptor`):
        //
        //   • QUOTA profile — resolved for EVERY publisher, the owner
        //     included. `effective_profile()`'s fallback is the process-global
        //     *default*, never the owner's profile, so an owner-published
        //     message must still resolve the owner's profile or its on-disk
        //     quotas are silently ignored. (For an owner with no profile file
        //     the cache returns the default, so this is a no-op in the common
        //     single-tenant case and only bites once an operator configures
        //     the owner principal.)
        //
        //   • KV / log / env overrides — installed only when the publisher
        //     DIFFERS from the load-time owner. The load-time `kv` /
        //     `capsule_log` / `config` are already the owner's, so an
        //     owner-published message has nothing to override and these are
        //     cleared back to the load-time values.
        let publisher: Option<astrid_core::PrincipalId> = msg
            .principal
            .as_deref()
            .and_then(|p| astrid_core::PrincipalId::new(p).ok());

        // Resolve the publisher's quota profile (owner included) so
        // per-principal ceilings (background-process count, IPC throughput,
        // HTTP streams) apply on the guest-pulled `recv` path too — not only
        // the dispatcher-driven interceptor path. When `msg.principal` is
        // absent/unparseable the owner's own profile is resolved, mirroring
        // `invoke_interceptor`'s `owner_principal` fallback. Best-effort: a
        // failed load logs and leaves `invocation_profile = None` (the same
        // process-global default fall-back as a missing cache), never denying
        // the message — the recv path has no error channel.
        let profile_principal = publisher.clone().unwrap_or_else(|| self.principal.clone());
        self.invocation_profile = self.profile_cache.as_ref().and_then(|cache| {
            match cache.resolve(&profile_principal) {
                Ok(profile) => Some(profile),
                Err(e) => {
                    tracing::warn!(
                        principal = %profile_principal,
                        error = %e,
                        "recv-path profile resolve failed; per-principal quotas fall back to the default profile"
                    );
                    None
                },
            }
        });

        // KV / log / env scoping overrides only kick in for a non-owner
        // publisher; an owner-published message clears them back to the
        // load-time (owner) values.
        let Some(p) = publisher.filter(|p| *p != self.principal) else {
            self.invocation_kv = None;
            self.invocation_capsule_log = None;
            self.invocation_env_overlay = None;
            return;
        };

        let ns = format!("{}:capsule:{}", p, self.capsule_id);
        self.invocation_kv = match self.kv.with_namespace(&ns) {
            Ok(kv) => Some(kv),
            Err(e) => {
                tracing::warn!(
                    principal = %p,
                    error = %e,
                    "Failed to create invocation KV scope on ipc::recv path"
                );
                None
            },
        };

        self.invocation_capsule_log = super::open_capsule_log(&p, self.capsule_id.as_str(), false);
        self.invocation_env_overlay =
            super::load_invocation_env_overlay(&p, self.capsule_id.as_str());
    }
}

impl std::fmt::Debug for HostState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("HostState")
            .field("capsule_id", &self.capsule_id)
            .field("workspace_root", &self.workspace_root)
            .field("vfs_root_handle", &self.vfs_root_handle)
            .field("has_home", &self.home.is_some())
            .field("has_tmp", &self.tmp.is_some())
            .field("has_security", &self.security.is_some())
            .field("has_uplink_capability", &self.has_uplink_capability)
            .field("audit_firehose", &self.audit_firehose)
            .field("has_inbound_tx", &self.inbound_tx.is_some())
            .field("registered_uplinks", &self.registered_uplinks.len())
            .field(
                "blocking_semaphore_permits",
                &self.blocking_semaphore.available_permits(),
            )
            .field(
                "io_semaphore_permits",
                &self.io_semaphore.available_permits(),
            )
            .field("cancel_token_cancelled", &self.cancel_token.is_cancelled())
            .field("has_identity_store", &self.identity_store.is_some())
            .field("active_http_streams", &self.active_http_streams.len())
            .field("process_tracker", &self.process_tracker)
            .field("persistent_processes", &self.persistent_processes)
            .finish_non_exhaustive()
    }
}

#[cfg(test)]
#[path = "host_state_tests.rs"]
mod tests;