Skip to main content

alien_core/
debug_session.rs

1//! Wire shapes for `alien debug` sessions.
2//!
3//! These types are the contract between the CLI, the manager (push mode),
4//! and the agent (pull mode). Pure data + a small AWS-host parser; no
5//! cloud-client dependencies, so any layer in the dep graph can speak them.
6//!
7//! The credential *translator* — projecting a resolved [`crate::ClientConfig`]
8//! onto a `DebugSessionResponse::Push` payload — lives in `alien-platform-core`,
9//! one tier above the cloud-client crates.
10
11use std::collections::BTreeMap;
12
13use serde::{Deserialize, Serialize};
14
15/// Errors raised by the debug-session producer (manager or agent). Callers
16/// wrap into their own error type when surfacing across crate boundaries.
17#[derive(Debug, thiserror::Error)]
18pub enum DebugSessionError {
19    /// A credential variant we don't know how to project onto a user shell.
20    /// The resolver path normally produces exportable variants — hitting this
21    /// means an upstream change.
22    #[error("alien debug ({platform}): {message}")]
23    UnsupportedCredential { platform: String, message: String },
24
25    /// The deployment's platform doesn't have a push-mode shell session
26    /// (Kubernetes, Local, Test).
27    #[error("alien debug: {message}")]
28    UnsupportedPlatform { message: String },
29
30    /// I/O error reading a credential off the manager's / agent's filesystem
31    /// (e.g. the Azure federated-token file).
32    #[error("alien debug ({platform}): {message}")]
33    IoError { platform: String, message: String },
34
35    /// Failed to mint a token from the resolved credential (e.g. GCP
36    /// `IAMCredentials.generateAccessToken`).
37    #[error("alien debug ({platform}): {message}")]
38    TokenMintFailed { platform: String, message: String },
39}
40
41/// Wire response. Discriminated by `kind`. Identical shape on both ends so
42/// the CLI can deserialize a session regardless of who produced it (manager
43/// or agent).
44#[derive(Debug, Clone, Serialize, Deserialize)]
45#[serde(tag = "kind", rename_all = "camelCase")]
46pub enum DebugSessionResponse {
47    /// Cloud credentials projected as env vars (+ optional files / setup
48    /// script). The CLI execs the user's command with the merged env.
49    Push(PushDebugSession),
50    /// Pure-Kubernetes session: a self-contained kubeconfig the CLI binds to
51    /// `KUBECONFIG`.
52    Pull(PullDebugSession),
53    /// Session creation is async. Returned for pull-mode deployments where
54    /// the agent must first open an outbound tunnel back to the manager
55    /// before the kubeconfig resolves. The CLI long-polls `poll_url` until
56    /// the session resolves to `Pull` (kubeconfig ready) or errors out.
57    Pending(PendingDebugSession),
58    /// Push-mode cloud session via a manager-hosted WebSocket tunnel.
59    /// Credentials stay on the manager; the CLI dials `tunnel_url`, spawns
60    /// a local HTTP proxy, and every cloud-CLI request the child process
61    /// emits is forwarded over the tunnel for the manager to re-sign with
62    /// the impersonated identity.
63    PushTunnel(PushTunnelDebugSession),
64    /// Runtime shell/exec session via an agent-hosted process tunnel.
65    RuntimeTunnel(RuntimeTunnelDebugSession),
66}
67
68impl DebugSessionResponse {
69    /// RFC3339 expiry of the underlying credentials, if the producer
70    /// surfaced one. Used by the CLI's on-disk cache to skip near-expired
71    /// sessions.
72    pub fn expires_at(&self) -> Option<&str> {
73        match self {
74            Self::Push(p) => p.expires_at.as_deref(),
75            Self::Pull(p) => p.expires_at.as_deref(),
76            Self::Pending(_) => None,
77            Self::PushTunnel(p) => p.expires_at.as_deref(),
78            Self::RuntimeTunnel(p) => p.expires_at.as_deref(),
79        }
80    }
81}
82
83/// What kind of debug session the caller is requesting.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
85#[serde(rename_all = "camelCase")]
86pub enum DebugSessionKind {
87    /// Existing behavior: run local commands with deployment context.
88    Context,
89    /// Open an interactive shell in the deployment runtime.
90    RuntimeShell,
91    /// Run one non-interactive command in the deployment runtime.
92    RuntimeExec,
93}
94
95impl Default for DebugSessionKind {
96    fn default() -> Self {
97        Self::Context
98    }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
102#[serde(rename_all = "camelCase")]
103pub struct RuntimeTunnelDebugSession {
104    /// Server-assigned session id.
105    pub session_id: String,
106    /// `local` for v1.
107    pub platform: String,
108    /// Absolute `wss://…/v1/debug/sessions/<sid>/runtime-client` URL.
109    pub tunnel_url: String,
110    /// Bearer the CLI presents on the WebSocket upgrade.
111    pub client_token: String,
112    /// Runtime operation this tunnel accepts.
113    pub kind: DebugSessionKind,
114    /// Runtime frame protocol version.
115    pub protocol_version: u32,
116    /// RFC3339 expiry.
117    #[serde(default, skip_serializing_if = "Option::is_none")]
118    pub expires_at: Option<String>,
119}
120
121/// Client-to-agent runtime debug frames, relayed by the manager.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123#[serde(tag = "type", rename_all = "camelCase")]
124pub enum RuntimeClientFrame {
125    /// Start an interactive shell with an optional initial terminal size.
126    StartShell {
127        /// Terminal columns.
128        cols: u16,
129        /// Terminal rows.
130        rows: u16,
131    },
132    /// Start a non-interactive command.
133    StartExec {
134        /// Executable and argv to run on the remote host.
135        command: Vec<String>,
136        /// Optional timeout in milliseconds.
137        timeout_ms: Option<u64>,
138    },
139    /// Standard input bytes, base64-encoded.
140    Stdin {
141        /// Base64-encoded bytes.
142        data_b64: String,
143    },
144    /// Resize the interactive terminal.
145    Resize {
146        /// Terminal columns.
147        cols: u16,
148        /// Terminal rows.
149        rows: u16,
150    },
151    /// Close stdin for the remote process.
152    CloseStdin,
153    /// Cancel the remote process.
154    Cancel,
155}
156
157/// Agent-to-client runtime debug frames, relayed by the manager.
158#[derive(Debug, Clone, Serialize, Deserialize)]
159#[serde(tag = "type", rename_all = "camelCase")]
160pub enum RuntimeAgentFrame {
161    /// The process has started.
162    Started {
163        /// Optional process id where available.
164        pid: Option<u32>,
165        /// Human-readable account or shell description.
166        detail: Option<String>,
167    },
168    /// Standard output bytes, base64-encoded.
169    Stdout {
170        /// Base64-encoded bytes.
171        data_b64: String,
172    },
173    /// Standard error bytes, base64-encoded.
174    Stderr {
175        /// Base64-encoded bytes.
176        data_b64: String,
177    },
178    /// The process exited.
179    Exit {
180        /// Process exit code. `None` means signal/unknown.
181        code: Option<i32>,
182        /// Whether the process was terminated by the runtime timeout.
183        timed_out: bool,
184        /// Whether output was truncated.
185        output_truncated: bool,
186    },
187    /// The remote runtime failed before producing an exit code.
188    Error {
189        /// Safe, user-facing error message.
190        message: String,
191    },
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(rename_all = "camelCase")]
196pub struct PushTunnelDebugSession {
197    /// Server-assigned session id (`ds_…`).
198    pub session_id: String,
199    /// `aws`, `gcp`, `azure`. Drives which `_ENDPOINT_URL` env var the CLI
200    /// sets and which signing flow the manager runs.
201    pub provider: String,
202    /// Absolute `wss://…/v1/debug/sessions/<sid>/push-tunnel` URL.
203    pub tunnel_url: String,
204    /// Bearer the CLI presents on the WebSocket upgrade and on subsequent
205    /// HTTP proxy requests.
206    pub client_token: String,
207    /// RFC3339 expiry mirroring the underlying impersonated credential's TTL.
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub expires_at: Option<String>,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213#[serde(rename_all = "camelCase")]
214pub struct PushDebugSession {
215    /// Short identifier surfaced to the user (e.g. `"aws"`, `"gcp"`, `"azure"`).
216    pub provider: String,
217    /// Environment variables the CLI sets on the spawned process.
218    pub env: BTreeMap<String, String>,
219    /// Files the CLI materializes under the per-session tempdir before exec.
220    /// When `env_var` is set, the CLI binds that env var to the resulting
221    /// absolute file path.
222    #[serde(default, skip_serializing_if = "Vec::is_empty")]
223    pub files: Vec<DebugCredFile>,
224    /// Optional shell snippet the CLI runs (`sh -c`) after env/files are set
225    /// up but before the user's command. Must be idempotent.
226    #[serde(default, skip_serializing_if = "Option::is_none")]
227    pub setup_script: Option<String>,
228    /// RFC3339 expiry. None when the credential type doesn't expose one.
229    #[serde(default, skip_serializing_if = "Option::is_none")]
230    pub expires_at: Option<String>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
234#[serde(rename_all = "camelCase")]
235pub struct DebugCredFile {
236    /// Filename — no path components. Written under the per-session tempdir.
237    pub file_name: String,
238    /// File contents.
239    pub content: String,
240    /// If set, the CLI binds this env var to the file's absolute path.
241    #[serde(default, skip_serializing_if = "Option::is_none")]
242    pub env_var: Option<String>,
243}
244
245/// Async-session handle. The CLI polls `poll_url` until the manager has
246/// received the agent's tunnel-ready signal and can return a `Pull` payload
247/// whose kubeconfig points at the per-session HTTPS proxy on the manager.
248#[derive(Debug, Clone, Serialize, Deserialize)]
249#[serde(rename_all = "camelCase")]
250pub struct PendingDebugSession {
251    /// Server-assigned session id. Embedded in URLs and command channel
252    /// messages so all parties (CLI, manager, agent) reference the same row.
253    pub session_id: String,
254    /// Absolute URL the CLI should GET to poll for readiness. Same auth as
255    /// `POST /v1/debug/sessions`.
256    pub poll_url: String,
257    /// Suggested initial poll interval in milliseconds. The CLI should back
258    /// off on repeated `pending` responses but never poll faster than this.
259    #[serde(default)]
260    pub poll_interval_ms: u32,
261    /// RFC3339 absolute deadline. The CLI should give up after this and
262    /// surface the most recent status. Bounded server-side; defaults to
263    /// the session TTL.
264    pub deadline: String,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
268#[serde(rename_all = "camelCase")]
269pub struct PullDebugSession {
270    /// Server-assigned session id. The CLI sends a DELETE to the manager on
271    /// exit so the agent's `serve_session` ends.
272    #[serde(default, skip_serializing_if = "Option::is_none")]
273    pub session_id: Option<String>,
274    /// Kubeconfig YAML the CLI writes to a temp file and binds to `KUBECONFIG`.
275    pub kubeconfig: String,
276    /// Additional env vars the CLI sets alongside `KUBECONFIG`.
277    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
278    pub env: BTreeMap<String, String>,
279    /// Extra files to materialize alongside the kubeconfig.
280    #[serde(default, skip_serializing_if = "Vec::is_empty")]
281    pub files: Vec<DebugCredFile>,
282    /// When set, the CLI also spawns a local AWS loopback proxy and points
283    /// `AWS_ENDPOINT_URL` at it.
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub aws_endpoint_url: Option<String>,
286    /// GCP equivalent — signed agent-side with the pod's GKE Workload Identity
287    /// token.
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    pub gcp_endpoint_url: Option<String>,
290    /// Azure equivalent — signed agent-side with the pod's Workload Identity
291    /// federated token exchanged for an AAD bearer.
292    #[serde(default, skip_serializing_if = "Option::is_none")]
293    pub azure_endpoint_url: Option<String>,
294    /// Bearer the CLI's cloud loopbacks must present on requests to the
295    /// `*_endpoint_url`s. Same `client_token` as the kubeconfig auth.
296    #[serde(default, skip_serializing_if = "Option::is_none")]
297    pub cloud_proxy_token: Option<String>,
298    /// RFC3339 expiry. None when the SA token doesn't expose one.
299    #[serde(default, skip_serializing_if = "Option::is_none")]
300    pub expires_at: Option<String>,
301}
302
303/// Derive `(service, signing_region)` from an AWS API URL host.
304///
305/// Handles the common shapes:
306///
307/// - `<service>.<region>.amazonaws.com`     → (service, region)
308/// - `<service>.amazonaws.com`              → (service, fallback_region)
309/// - `<bucket>.s3.<region>.amazonaws.com`   → ("s3", region)
310/// - `<bucket>.s3.amazonaws.com`            → ("s3", fallback_region)
311///
312/// Falls back to `fallback_region` when the host doesn't carry one. Takes
313/// `&str` so this stays HTTP-client-agnostic.
314pub fn extract_aws_service_and_region(host: &str, fallback_region: &str) -> (&'static str, String) {
315    let labels: Vec<&str> = host.split('.').collect();
316
317    let Some(amz_idx) = labels.iter().rposition(|l| *l == "amazonaws") else {
318        return ("execute-api", fallback_region.to_string());
319    };
320
321    let (service, region) = match &labels[..amz_idx] {
322        [_bucket_or_subdomain @ .., service, region]
323            if region.contains('-') && service.len() <= 8 =>
324        {
325            (*service, region.to_string())
326        }
327        [_subdomain @ .., service] => (*service, fallback_region.to_string()),
328        _ => ("execute-api", fallback_region.to_string()),
329    };
330
331    let static_service: &'static str = match service {
332        "sts" => "sts",
333        "iam" => "iam",
334        "ec2" => "ec2",
335        "lambda" => "lambda",
336        "s3" => "s3",
337        "dynamodb" => "dynamodb",
338        "sqs" => "sqs",
339        "sns" => "sns",
340        "ecr" => "ecr",
341        "eks" => "eks",
342        "ecs" => "ecs",
343        "cloudformation" => "cloudformation",
344        "cloudwatch" => "monitoring",
345        "logs" => "logs",
346        "ssm" => "ssm",
347        "secretsmanager" => "secretsmanager",
348        "kms" => "kms",
349        "events" | "eventbridge" => "events",
350        "apigateway" => "apigateway",
351        "execute-api" => "execute-api",
352        _ => "execute-api",
353    };
354
355    (static_service, region)
356}
357
358#[cfg(test)]
359mod aws_endpoint_parsing_tests {
360    use super::extract_aws_service_and_region;
361
362    #[test]
363    fn regional_service() {
364        assert_eq!(
365            extract_aws_service_and_region("ec2.us-east-1.amazonaws.com", "us-west-2"),
366            ("ec2", "us-east-1".to_string())
367        );
368    }
369
370    #[test]
371    fn global_service() {
372        assert_eq!(
373            extract_aws_service_and_region("iam.amazonaws.com", "us-east-1"),
374            ("iam", "us-east-1".to_string())
375        );
376    }
377
378    #[test]
379    fn s3_bucket_regional() {
380        assert_eq!(
381            extract_aws_service_and_region("mybucket.s3.us-east-1.amazonaws.com", "us-west-2"),
382            ("s3", "us-east-1".to_string())
383        );
384    }
385
386    #[test]
387    fn unknown_host() {
388        assert_eq!(
389            extract_aws_service_and_region("internal.example.com", "us-east-1"),
390            ("execute-api", "us-east-1".to_string())
391        );
392    }
393}