Skip to main content

fakecloud_lambda/runtime/k8s/
mod.rs

1//! Kubernetes [`LambdaBackend`] implementation.
2//!
3//! Spawns Lambda function runtimes as native Pods in a Kubernetes
4//! cluster instead of as Docker containers. Gated by
5//! `FAKECLOUD_LAMBDA_BACKEND=k8s` (or the global
6//! `FAKECLOUD_CONTAINER_BACKEND=k8s`) on the fakecloud server.
7//!
8//! The shared client bootstrap, Pod lifecycle (create/wait/delete),
9//! reaping, and naming live in the `fakecloud-k8s` crate; this module
10//! only builds the Lambda-specific Pod spec and wires the lifecycle into
11//! the [`LambdaBackend`] trait.
12//!
13//! See `website/content/docs/guides/kubernetes-backend.md` for the
14//! operator-facing setup (ServiceAccount, RBAC, Deployment yaml).
15
16pub mod spec;
17
18use std::time::Duration;
19
20use async_trait::async_trait;
21use fakecloud_k8s::{K8sClient, K8sEnv, K8sEnvError};
22
23use super::backend::{BackendHandle, LambdaBackend, RuntimeError, WarmInstance};
24use crate::state::LambdaFunction;
25use spec::{build_pod_spec, unique_pod_name, PodSpecContext};
26
27/// Which `fakecloud-service` label Lambda Pods carry, so reaping only
28/// touches Lambda Pods.
29const SERVICE: &str = "lambda";
30
31/// Errors that can prevent the K8s backend from initializing. Surfaced
32/// to the operator at fakecloud startup; never silently swallowed.
33#[derive(Debug, thiserror::Error)]
34pub enum K8sBackendError {
35    #[error(transparent)]
36    Env(#[from] K8sEnvError),
37    #[error("failed to connect to the Kubernetes cluster: {0}")]
38    Connect(String),
39}
40
41/// Native Kubernetes Lambda execution backend.
42pub struct K8sBackend {
43    client: K8sClient,
44    /// In-cluster URL of the fakecloud server (e.g.
45    /// `http://fakecloud.fakecloud.svc.cluster.local:4566`). Init
46    /// containers fetch code + layers from this host.
47    self_url: String,
48    /// Just the host part of `self_url` — used to rewrite localhost env
49    /// values so user code can talk to fakecloud from inside the Pod.
50    self_host: String,
51    /// Host:port for the fakecloud ECR endpoint (defaults to the host
52    /// of `self_url` when `FAKECLOUD_K8S_ECR_URL` is unset).
53    ecr_host: String,
54    ecr_port: u16,
55    /// Bearer token the init container presents when fetching code +
56    /// layers. Generated at server startup, kept in process memory only.
57    internal_token: String,
58    /// Optional `imagePullSecrets` reference for image-package functions
59    /// that pull from a registry needing credentials.
60    pull_secret: Option<String>,
61}
62
63impl K8sBackend {
64    /// Read configuration from env vars and connect to the cluster.
65    /// Fails fast on missing required config — never silently degrades.
66    /// `default_ecr_port` is fakecloud's bound port; used as the ECR
67    /// port when `FAKECLOUD_K8S_ECR_URL` is unset.
68    pub async fn from_env(
69        default_ecr_port: u16,
70        internal_token: String,
71    ) -> Result<Self, K8sBackendError> {
72        let env = K8sEnv::from_env(default_ecr_port)?;
73        let client = K8sClient::connect(env.namespace.clone())
74            .await
75            .map_err(|e| K8sBackendError::Connect(e.to_string()))?;
76
77        tracing::info!(
78            namespace = %env.namespace,
79            self_url = %env.self_url,
80            ecr = %format!("{}:{}", env.ecr_host, env.ecr_port),
81            "K8s Lambda backend initialized"
82        );
83
84        Ok(Self {
85            client,
86            self_url: env.self_url,
87            self_host: env.self_host,
88            ecr_host: env.ecr_host,
89            ecr_port: env.ecr_port,
90            internal_token,
91            pull_secret: env.pull_secret,
92        })
93    }
94}
95
96/// Extract the account ID from a function ARN
97/// (`arn:aws:lambda:<region>:<account>:function:<name>[:<qual>]`).
98fn account_id_from_arn(arn: &str) -> &str {
99    arn.split(':').nth(4).unwrap_or("000000000000")
100}
101
102#[async_trait]
103impl LambdaBackend for K8sBackend {
104    fn name(&self) -> &str {
105        "kubernetes"
106    }
107
108    async fn launch(
109        &self,
110        func: &LambdaFunction,
111        _code_zip: Option<&[u8]>,
112        _layers: &[Vec<u8>],
113        deploy_id: &str,
114    ) -> Result<WarmInstance, RuntimeError> {
115        let account_id = account_id_from_arn(&func.function_arn);
116        let ctx = PodSpecContext {
117            instance_id: self.client.instance_id(),
118            namespace: self.client.namespace(),
119            self_url: &self.self_url,
120            self_host: &self.self_host,
121            ecr_host: &self.ecr_host,
122            ecr_port: self.ecr_port,
123            internal_token: &self.internal_token,
124            account_id,
125            pull_secret: self.pull_secret.as_deref(),
126        };
127        let mut pod =
128            build_pod_spec(func, deploy_id, &ctx).map_err(RuntimeError::ContainerStartFailed)?;
129        // Override the deterministic function+deploy name with a per-launch
130        // unique one so concurrent instances of the same function don't collide
131        // and a terminating Pod never blocks its replacement (see
132        // `unique_pod_name`).
133        let pod_name = unique_pod_name(&func.function_name, deploy_id);
134        pod.metadata.name = Some(pod_name.clone());
135
136        self.client
137            .create_pod(&pod)
138            .await
139            .map_err(|e| RuntimeError::ContainerStartFailed(format!("k8s create pod: {e}")))?;
140
141        // Tear the Pod down again if it never becomes ready, so a failed
142        // launch doesn't leak a Pod.
143        let pod_ip = match self
144            .client
145            .wait_for_pod_ip(&pod_name, Duration::from_secs(60))
146            .await
147        {
148            Ok(ip) => ip,
149            Err(e) => {
150                self.client.delete_pod(&pod_name).await;
151                return Err(RuntimeError::ContainerStartFailed(e.to_string()));
152            }
153        };
154        // Pod-Running doesn't guarantee the RIE inside the main container
155        // is listening yet — TCP-handshake the invoke port like Docker.
156        if let Err(e) = K8sClient::wait_for_tcp(&pod_ip, 8080, Duration::from_secs(10)).await {
157            self.client.delete_pod(&pod_name).await;
158            return Err(RuntimeError::ContainerStartFailed(format!(
159                "RIE on {pod_ip}:8080 not ready: {e}"
160            )));
161        }
162
163        tracing::info!(
164            function = %func.function_name,
165            pod = %pod_name,
166            namespace = %self.client.namespace(),
167            pod_ip = %pod_ip,
168            "Lambda Pod started"
169        );
170
171        Ok(WarmInstance {
172            endpoint: format!("{pod_ip}:8080"),
173            handle: BackendHandle::Pod {
174                namespace: self.client.namespace().to_string(),
175                name: pod_name,
176            },
177        })
178    }
179
180    async fn terminate(&self, handle: &BackendHandle) {
181        match handle {
182            BackendHandle::Pod { name, .. } => self.client.delete_pod(name).await,
183            // Docker handles aren't ours to manage — defensive no-op.
184            BackendHandle::Container { .. } => {}
185        }
186    }
187
188    /// Sweep Lambda Pods that belong to a previous fakecloud process.
189    /// Without this, a fakecloud restart leaks the previous run's Pods
190    /// and `Create` collides on function names. Mirrors the docker
191    /// `reaper` semantics.
192    async fn reap_stale(&self) {
193        self.client.reap_stale(SERVICE).await;
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::account_id_from_arn;
200
201    #[test]
202    fn account_id_from_simple_arn() {
203        assert_eq!(
204            account_id_from_arn("arn:aws:lambda:us-east-1:123456789012:function:my-fn"),
205            "123456789012"
206        );
207    }
208
209    #[test]
210    fn account_id_from_qualified_arn() {
211        assert_eq!(
212            account_id_from_arn("arn:aws:lambda:us-east-1:000000000000:function:my-fn:PROD"),
213            "000000000000"
214        );
215    }
216
217    #[test]
218    fn account_id_falls_back_for_malformed_arn() {
219        assert_eq!(account_id_from_arn("not-an-arn"), "000000000000");
220    }
221}