alien_core/debug_session.rs
1//! Wire shapes for `alien debug` sessions.
2//!
3//! These types are the contract between the CLI, the manager (push mode),
4//! and the agent (pull mode). Pure data + a small AWS-host parser; no
5//! cloud-client dependencies, so any layer in the dep graph can speak them.
6//!
7//! The credential *translator* — projecting a resolved [`crate::ClientConfig`]
8//! onto a `DebugSessionResponse::Push` payload — lives in `alien-platform-core`,
9//! one tier above the cloud-client crates.
10
11use std::collections::BTreeMap;
12
13use serde::{Deserialize, Serialize};
14
15/// Errors raised by the debug-session producer (manager or agent). Callers
16/// wrap into their own error type when surfacing across crate boundaries.
17#[derive(Debug, thiserror::Error)]
18pub enum DebugSessionError {
19 /// A credential variant we don't know how to project onto a user shell.
20 /// The resolver path normally produces exportable variants — hitting this
21 /// means an upstream change.
22 #[error("alien debug ({platform}): {message}")]
23 UnsupportedCredential { platform: String, message: String },
24
25 /// The deployment's platform doesn't have a push-mode shell session
26 /// (Kubernetes, Local, Test).
27 #[error("alien debug: {message}")]
28 UnsupportedPlatform { message: String },
29
30 /// I/O error reading a credential off the manager's / agent's filesystem
31 /// (e.g. the Azure federated-token file).
32 #[error("alien debug ({platform}): {message}")]
33 IoError { platform: String, message: String },
34
35 /// Failed to mint a token from the resolved credential (e.g. GCP
36 /// `IAMCredentials.generateAccessToken`).
37 #[error("alien debug ({platform}): {message}")]
38 TokenMintFailed { platform: String, message: String },
39}
40
41/// Wire response. Discriminated by `kind`. Identical shape on both ends so
42/// the CLI can deserialize a session regardless of who produced it (manager
43/// or agent).
44#[derive(Debug, Clone, Serialize, Deserialize)]
45#[serde(tag = "kind", rename_all = "camelCase")]
46pub enum DebugSessionResponse {
47 /// Cloud credentials projected as env vars (+ optional files / setup
48 /// script). The CLI execs the user's command with the merged env.
49 Push(PushDebugSession),
50 /// Pure-Kubernetes session: a self-contained kubeconfig the CLI binds to
51 /// `KUBECONFIG`.
52 Pull(PullDebugSession),
53 /// Session creation is async. Returned for pull-mode deployments where
54 /// the agent must first open an outbound tunnel back to the manager
55 /// before the kubeconfig resolves. The CLI long-polls `poll_url` until
56 /// the session resolves to `Pull` (kubeconfig ready) or errors out.
57 Pending(PendingDebugSession),
58 /// Push-mode cloud session via a manager-hosted WebSocket tunnel.
59 /// Credentials stay on the manager; the CLI dials `tunnel_url`, spawns
60 /// a local HTTP proxy, and every cloud-CLI request the child process
61 /// emits is forwarded over the tunnel for the manager to re-sign with
62 /// the impersonated identity.
63 PushTunnel(PushTunnelDebugSession),
64 /// Runtime shell/exec session via an agent-hosted process tunnel.
65 RuntimeTunnel(RuntimeTunnelDebugSession),
66}
67
68impl DebugSessionResponse {
69 /// RFC3339 expiry of the underlying credentials, if the producer
70 /// surfaced one. Used by the CLI's on-disk cache to skip near-expired
71 /// sessions.
72 pub fn expires_at(&self) -> Option<&str> {
73 match self {
74 Self::Push(p) => p.expires_at.as_deref(),
75 Self::Pull(p) => p.expires_at.as_deref(),
76 Self::Pending(_) => None,
77 Self::PushTunnel(p) => p.expires_at.as_deref(),
78 Self::RuntimeTunnel(p) => p.expires_at.as_deref(),
79 }
80 }
81}
82
83/// What kind of debug session the caller is requesting.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
85#[serde(rename_all = "camelCase")]
86pub enum DebugSessionKind {
87 /// Existing behavior: run local commands with deployment context.
88 Context,
89 /// Open an interactive shell in the deployment runtime.
90 RuntimeShell,
91 /// Run one non-interactive command in the deployment runtime.
92 RuntimeExec,
93}
94
95impl Default for DebugSessionKind {
96 fn default() -> Self {
97 Self::Context
98 }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
102#[serde(rename_all = "camelCase")]
103pub struct RuntimeTunnelDebugSession {
104 /// Server-assigned session id.
105 pub session_id: String,
106 /// `local` for v1.
107 pub platform: String,
108 /// Absolute `wss://…/v1/debug/sessions/<sid>/runtime-client` URL.
109 pub tunnel_url: String,
110 /// Bearer the CLI presents on the WebSocket upgrade.
111 pub client_token: String,
112 /// Runtime operation this tunnel accepts.
113 pub kind: DebugSessionKind,
114 /// Runtime frame protocol version.
115 pub protocol_version: u32,
116 /// RFC3339 expiry.
117 #[serde(default, skip_serializing_if = "Option::is_none")]
118 pub expires_at: Option<String>,
119}
120
121/// Client-to-agent runtime debug frames, relayed by the manager.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123#[serde(tag = "type", rename_all = "camelCase")]
124pub enum RuntimeClientFrame {
125 /// Start an interactive shell with an optional initial terminal size.
126 StartShell {
127 /// Terminal columns.
128 cols: u16,
129 /// Terminal rows.
130 rows: u16,
131 },
132 /// Start a non-interactive command.
133 StartExec {
134 /// Executable and argv to run on the remote host.
135 command: Vec<String>,
136 /// Optional timeout in milliseconds.
137 timeout_ms: Option<u64>,
138 },
139 /// Standard input bytes, base64-encoded.
140 Stdin {
141 /// Base64-encoded bytes.
142 data_b64: String,
143 },
144 /// Resize the interactive terminal.
145 Resize {
146 /// Terminal columns.
147 cols: u16,
148 /// Terminal rows.
149 rows: u16,
150 },
151 /// Close stdin for the remote process.
152 CloseStdin,
153 /// Cancel the remote process.
154 Cancel,
155}
156
157/// Agent-to-client runtime debug frames, relayed by the manager.
158#[derive(Debug, Clone, Serialize, Deserialize)]
159#[serde(tag = "type", rename_all = "camelCase")]
160pub enum RuntimeAgentFrame {
161 /// The process has started.
162 Started {
163 /// Optional process id where available.
164 pid: Option<u32>,
165 /// Human-readable account or shell description.
166 detail: Option<String>,
167 },
168 /// Standard output bytes, base64-encoded.
169 Stdout {
170 /// Base64-encoded bytes.
171 data_b64: String,
172 },
173 /// Standard error bytes, base64-encoded.
174 Stderr {
175 /// Base64-encoded bytes.
176 data_b64: String,
177 },
178 /// The process exited.
179 Exit {
180 /// Process exit code. `None` means signal/unknown.
181 code: Option<i32>,
182 /// Whether the process was terminated by the runtime timeout.
183 timed_out: bool,
184 /// Whether output was truncated.
185 output_truncated: bool,
186 },
187 /// The remote runtime failed before producing an exit code.
188 Error {
189 /// Safe, user-facing error message.
190 message: String,
191 },
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(rename_all = "camelCase")]
196pub struct PushTunnelDebugSession {
197 /// Server-assigned session id (`ds_…`).
198 pub session_id: String,
199 /// `aws`, `gcp`, `azure`. Drives which `_ENDPOINT_URL` env var the CLI
200 /// sets and which signing flow the manager runs.
201 pub provider: String,
202 /// Absolute `wss://…/v1/debug/sessions/<sid>/push-tunnel` URL.
203 pub tunnel_url: String,
204 /// Bearer the CLI presents on the WebSocket upgrade and on subsequent
205 /// HTTP proxy requests.
206 pub client_token: String,
207 /// RFC3339 expiry mirroring the underlying impersonated credential's TTL.
208 #[serde(default, skip_serializing_if = "Option::is_none")]
209 pub expires_at: Option<String>,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213#[serde(rename_all = "camelCase")]
214pub struct PushDebugSession {
215 /// Short identifier surfaced to the user (e.g. `"aws"`, `"gcp"`, `"azure"`).
216 pub provider: String,
217 /// Environment variables the CLI sets on the spawned process.
218 pub env: BTreeMap<String, String>,
219 /// Files the CLI materializes under the per-session tempdir before exec.
220 /// When `env_var` is set, the CLI binds that env var to the resulting
221 /// absolute file path.
222 #[serde(default, skip_serializing_if = "Vec::is_empty")]
223 pub files: Vec<DebugCredFile>,
224 /// Optional shell snippet the CLI runs (`sh -c`) after env/files are set
225 /// up but before the user's command. Must be idempotent.
226 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub setup_script: Option<String>,
228 /// RFC3339 expiry. None when the credential type doesn't expose one.
229 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub expires_at: Option<String>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
234#[serde(rename_all = "camelCase")]
235pub struct DebugCredFile {
236 /// Filename — no path components. Written under the per-session tempdir.
237 pub file_name: String,
238 /// File contents.
239 pub content: String,
240 /// If set, the CLI binds this env var to the file's absolute path.
241 #[serde(default, skip_serializing_if = "Option::is_none")]
242 pub env_var: Option<String>,
243}
244
245/// Async-session handle. The CLI polls `poll_url` until the manager has
246/// received the agent's tunnel-ready signal and can return a `Pull` payload
247/// whose kubeconfig points at the per-session HTTPS proxy on the manager.
248#[derive(Debug, Clone, Serialize, Deserialize)]
249#[serde(rename_all = "camelCase")]
250pub struct PendingDebugSession {
251 /// Server-assigned session id. Embedded in URLs and command channel
252 /// messages so all parties (CLI, manager, agent) reference the same row.
253 pub session_id: String,
254 /// Absolute URL the CLI should GET to poll for readiness. Same auth as
255 /// `POST /v1/debug/sessions`.
256 pub poll_url: String,
257 /// Suggested initial poll interval in milliseconds. The CLI should back
258 /// off on repeated `pending` responses but never poll faster than this.
259 #[serde(default)]
260 pub poll_interval_ms: u32,
261 /// RFC3339 absolute deadline. The CLI should give up after this and
262 /// surface the most recent status. Bounded server-side; defaults to
263 /// the session TTL.
264 pub deadline: String,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
268#[serde(rename_all = "camelCase")]
269pub struct PullDebugSession {
270 /// Server-assigned session id. The CLI sends a DELETE to the manager on
271 /// exit so the agent's `serve_session` ends.
272 #[serde(default, skip_serializing_if = "Option::is_none")]
273 pub session_id: Option<String>,
274 /// Kubeconfig YAML the CLI writes to a temp file and binds to `KUBECONFIG`.
275 pub kubeconfig: String,
276 /// Additional env vars the CLI sets alongside `KUBECONFIG`.
277 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
278 pub env: BTreeMap<String, String>,
279 /// Extra files to materialize alongside the kubeconfig.
280 #[serde(default, skip_serializing_if = "Vec::is_empty")]
281 pub files: Vec<DebugCredFile>,
282 /// When set, the CLI also spawns a local AWS loopback proxy and points
283 /// `AWS_ENDPOINT_URL` at it.
284 #[serde(default, skip_serializing_if = "Option::is_none")]
285 pub aws_endpoint_url: Option<String>,
286 /// GCP equivalent — signed agent-side with the pod's GKE Workload Identity
287 /// token.
288 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub gcp_endpoint_url: Option<String>,
290 /// Azure equivalent — signed agent-side with the pod's Workload Identity
291 /// federated token exchanged for an AAD bearer.
292 #[serde(default, skip_serializing_if = "Option::is_none")]
293 pub azure_endpoint_url: Option<String>,
294 /// Bearer the CLI's cloud loopbacks must present on requests to the
295 /// `*_endpoint_url`s. Same `client_token` as the kubeconfig auth.
296 #[serde(default, skip_serializing_if = "Option::is_none")]
297 pub cloud_proxy_token: Option<String>,
298 /// RFC3339 expiry. None when the SA token doesn't expose one.
299 #[serde(default, skip_serializing_if = "Option::is_none")]
300 pub expires_at: Option<String>,
301}
302
303/// Derive `(service, signing_region)` from an AWS API URL host.
304///
305/// Handles the common shapes:
306///
307/// - `<service>.<region>.amazonaws.com` → (service, region)
308/// - `<service>.amazonaws.com` → (service, fallback_region)
309/// - `<bucket>.s3.<region>.amazonaws.com` → ("s3", region)
310/// - `<bucket>.s3.amazonaws.com` → ("s3", fallback_region)
311///
312/// Falls back to `fallback_region` when the host doesn't carry one. Takes
313/// `&str` so this stays HTTP-client-agnostic.
314pub fn extract_aws_service_and_region(host: &str, fallback_region: &str) -> (&'static str, String) {
315 let labels: Vec<&str> = host.split('.').collect();
316
317 let Some(amz_idx) = labels.iter().rposition(|l| *l == "amazonaws") else {
318 return ("execute-api", fallback_region.to_string());
319 };
320
321 let (service, region) = match &labels[..amz_idx] {
322 [_bucket_or_subdomain @ .., service, region]
323 if region.contains('-') && service.len() <= 8 =>
324 {
325 (*service, region.to_string())
326 }
327 [_subdomain @ .., service] => (*service, fallback_region.to_string()),
328 _ => ("execute-api", fallback_region.to_string()),
329 };
330
331 let static_service: &'static str = match service {
332 "sts" => "sts",
333 "iam" => "iam",
334 "ec2" => "ec2",
335 "lambda" => "lambda",
336 "s3" => "s3",
337 "dynamodb" => "dynamodb",
338 "sqs" => "sqs",
339 "sns" => "sns",
340 "ecr" => "ecr",
341 "eks" => "eks",
342 "ecs" => "ecs",
343 "cloudformation" => "cloudformation",
344 "cloudwatch" => "monitoring",
345 "logs" => "logs",
346 "ssm" => "ssm",
347 "secretsmanager" => "secretsmanager",
348 "kms" => "kms",
349 "events" | "eventbridge" => "events",
350 "apigateway" => "apigateway",
351 "execute-api" => "execute-api",
352 _ => "execute-api",
353 };
354
355 (static_service, region)
356}
357
358#[cfg(test)]
359mod aws_endpoint_parsing_tests {
360 use super::extract_aws_service_and_region;
361
362 #[test]
363 fn regional_service() {
364 assert_eq!(
365 extract_aws_service_and_region("ec2.us-east-1.amazonaws.com", "us-west-2"),
366 ("ec2", "us-east-1".to_string())
367 );
368 }
369
370 #[test]
371 fn global_service() {
372 assert_eq!(
373 extract_aws_service_and_region("iam.amazonaws.com", "us-east-1"),
374 ("iam", "us-east-1".to_string())
375 );
376 }
377
378 #[test]
379 fn s3_bucket_regional() {
380 assert_eq!(
381 extract_aws_service_and_region("mybucket.s3.us-east-1.amazonaws.com", "us-west-2"),
382 ("s3", "us-east-1".to_string())
383 );
384 }
385
386 #[test]
387 fn unknown_host() {
388 assert_eq!(
389 extract_aws_service_and_region("internal.example.com", "us-east-1"),
390 ("execute-api", "us-east-1".to_string())
391 );
392 }
393}