1use crate::container::{ContainerState, ContainerStateManager};
2use crate::error::{NucleusError, Result};
3use crate::resources::{IoDeviceLimit, ResourceLimits};
4use serde::{Deserialize, Serialize};
5use std::fs;
6use std::fs::OpenOptions;
7use std::io::Write;
8use std::os::unix::fs::OpenOptionsExt;
9use std::path::Path;
10use std::time::SystemTime;
11
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub struct CheckpointResourceLimits {
15 pub memory_bytes: Option<u64>,
16 pub memory_high: Option<u64>,
17 pub memory_swap_max: Option<u64>,
18 pub cpu_quota_us: Option<u64>,
19 pub cpu_period_us: u64,
20 pub cpu_weight: Option<u64>,
21 pub pids_max: Option<u64>,
22 pub io_limits: Vec<IoDeviceLimit>,
23}
24
25impl CheckpointResourceLimits {
26 fn from_cgroup_dir(cgroup_path: &Path) -> Result<Self> {
27 let (cpu_quota_us, cpu_period_us) = Self::read_cpu_quota(cgroup_path.join("cpu.max"))?;
28 Ok(Self {
29 memory_bytes: Self::read_optional_u64(cgroup_path.join("memory.max"))?,
30 memory_high: Self::read_optional_u64(cgroup_path.join("memory.high"))?,
31 memory_swap_max: Self::read_optional_u64(cgroup_path.join("memory.swap.max"))?,
32 cpu_quota_us,
33 cpu_period_us,
34 cpu_weight: Self::read_optional_u64(cgroup_path.join("cpu.weight"))?,
35 pids_max: Self::read_optional_u64(cgroup_path.join("pids.max"))?,
36 io_limits: Self::read_io_limits(cgroup_path.join("io.max"))?,
37 })
38 }
39
40 pub fn to_resource_limits(&self) -> ResourceLimits {
41 ResourceLimits {
42 memory_bytes: self.memory_bytes,
43 memory_high: self.memory_high,
44 memory_swap_max: self.memory_swap_max,
45 cpu_quota_us: self.cpu_quota_us,
46 cpu_period_us: self.cpu_period_us,
47 cpu_weight: self.cpu_weight,
48 pids_max: self.pids_max,
49 io_limits: self.io_limits.clone(),
50 memlock_bytes: None,
51 }
52 }
53
54 pub fn validate(&self) -> Result<()> {
55 self.to_resource_limits()
56 .validate_runtime_sanity()
57 .map_err(|e| {
58 NucleusError::CheckpointError(format!("Invalid checkpoint resource limits: {}", e))
59 })
60 }
61
62 pub fn cpu_limit_millicores(&self) -> Option<u64> {
63 if self.cpu_period_us == 0 {
64 return None;
65 }
66 self.cpu_quota_us
67 .map(|quota| quota.saturating_mul(1000) / self.cpu_period_us)
68 }
69
70 fn read_optional_u64(path: impl AsRef<Path>) -> Result<Option<u64>> {
71 let path = path.as_ref();
72 let content = fs::read_to_string(path).map_err(|e| {
73 NucleusError::CheckpointError(format!(
74 "Failed to read cgroup limit file {:?}: {}",
75 path, e
76 ))
77 })?;
78 let value = content.trim();
79 if value == "max" {
80 return Ok(None);
81 }
82 value.parse::<u64>().map(Some).map_err(|e| {
83 NucleusError::CheckpointError(format!(
84 "Failed to parse cgroup limit file {:?}: {}",
85 path, e
86 ))
87 })
88 }
89
90 fn read_cpu_quota(path: impl AsRef<Path>) -> Result<(Option<u64>, u64)> {
91 let path = path.as_ref();
92 let content = fs::read_to_string(path).map_err(|e| {
93 NucleusError::CheckpointError(format!("Failed to read {:?}: {}", path, e))
94 })?;
95 let mut parts = content.split_whitespace();
96 let quota = parts.next().ok_or_else(|| {
97 NucleusError::CheckpointError(format!("Invalid cpu.max format in {:?}", path))
98 })?;
99 let period = parts.next().ok_or_else(|| {
100 NucleusError::CheckpointError(format!("Missing cpu.max period in {:?}", path))
101 })?;
102 if parts.next().is_some() {
103 return Err(NucleusError::CheckpointError(format!(
104 "Invalid cpu.max format in {:?}",
105 path
106 )));
107 }
108
109 let cpu_quota_us = if quota == "max" {
110 None
111 } else {
112 Some(quota.parse::<u64>().map_err(|e| {
113 NucleusError::CheckpointError(format!("Failed to parse cpu.max quota: {}", e))
114 })?)
115 };
116 let cpu_period_us = period.parse::<u64>().map_err(|e| {
117 NucleusError::CheckpointError(format!("Failed to parse cpu.max period: {}", e))
118 })?;
119 if cpu_period_us == 0 {
120 return Err(NucleusError::CheckpointError(format!(
121 "Invalid cpu.max period in {:?}: period must be greater than 0",
122 path
123 )));
124 }
125
126 Ok((cpu_quota_us, cpu_period_us))
127 }
128
129 fn read_io_limits(path: impl AsRef<Path>) -> Result<Vec<IoDeviceLimit>> {
130 let path = path.as_ref();
131 let content = match fs::read_to_string(path) {
132 Ok(content) => content,
133 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
134 Err(e) => {
135 return Err(NucleusError::CheckpointError(format!(
136 "Failed to read {:?}: {}",
137 path, e
138 )))
139 }
140 };
141
142 content
143 .lines()
144 .filter(|line| !line.trim().is_empty())
145 .map(IoDeviceLimit::parse)
146 .collect()
147 }
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct CheckpointMetadata {
153 pub container_id: String,
155
156 pub container_name: String,
158
159 pub original_pid: u32,
161
162 pub command: Vec<String>,
164
165 pub checkpoint_at: u64,
167
168 pub version: String,
170
171 pub using_gvisor: bool,
173
174 pub rootless: bool,
176
177 #[serde(default)]
179 pub cgroup_path: Option<String>,
180
181 #[serde(default)]
183 pub resource_limits: Option<CheckpointResourceLimits>,
184}
185
186impl CheckpointMetadata {
187 pub fn from_state(state: &ContainerState) -> Result<Self> {
189 let checkpoint_at = SystemTime::now()
190 .duration_since(SystemTime::UNIX_EPOCH)
191 .unwrap_or_default()
192 .as_secs();
193 let resource_limits = state
194 .cgroup_path
195 .as_deref()
196 .map(|path| CheckpointResourceLimits::from_cgroup_dir(Path::new(path)))
197 .transpose()?;
198
199 Ok(Self {
200 container_id: state.id.clone(),
201 container_name: state.name.clone(),
202 original_pid: state.pid,
203 command: state.command.clone(),
204 checkpoint_at,
205 version: env!("CARGO_PKG_VERSION").to_string(),
206 using_gvisor: state.using_gvisor,
207 rootless: state.rootless,
208 cgroup_path: state.cgroup_path.clone(),
209 resource_limits,
210 })
211 }
212
213 pub fn save(&self, dir: &Path) -> Result<()> {
215 let path = dir.join("metadata.json");
216 let tmp_path = dir.join("metadata.json.tmp");
217 let json = serde_json::to_string_pretty(self).map_err(|e| {
218 NucleusError::CheckpointError(format!("Failed to serialize metadata: {}", e))
219 })?;
220
221 if tmp_path.exists() {
222 let meta = fs::symlink_metadata(&tmp_path).map_err(|e| {
223 NucleusError::CheckpointError(format!(
224 "Failed to inspect temp metadata file {:?}: {}",
225 tmp_path, e
226 ))
227 })?;
228 if meta.file_type().is_symlink() {
229 return Err(NucleusError::CheckpointError(format!(
230 "Refusing symlink temp metadata file {:?}",
231 tmp_path
232 )));
233 }
234 fs::remove_file(&tmp_path).map_err(|e| {
235 NucleusError::CheckpointError(format!(
236 "Failed to remove stale temp metadata file {:?}: {}",
237 tmp_path, e
238 ))
239 })?;
240 }
241
242 let mut file = OpenOptions::new()
243 .create_new(true)
244 .write(true)
245 .mode(0o600)
246 .custom_flags(libc::O_NOFOLLOW)
247 .open(&tmp_path)
248 .map_err(|e| {
249 NucleusError::CheckpointError(format!(
250 "Failed to open temp metadata file {:?}: {}",
251 tmp_path, e
252 ))
253 })?;
254
255 file.write_all(json.as_bytes()).map_err(|e| {
256 NucleusError::CheckpointError(format!(
257 "Failed to write metadata file {:?}: {}",
258 tmp_path, e
259 ))
260 })?;
261 file.sync_all().map_err(|e| {
262 NucleusError::CheckpointError(format!(
263 "Failed to sync metadata file {:?}: {}",
264 tmp_path, e
265 ))
266 })?;
267
268 fs::rename(&tmp_path, &path).map_err(|e| {
269 NucleusError::CheckpointError(format!(
270 "Failed to atomically replace metadata file {:?}: {}",
271 path, e
272 ))
273 })?;
274 Ok(())
275 }
276
277 pub fn load(dir: &Path) -> Result<Self> {
279 let path = dir.join("metadata.json");
280 let json = ContainerStateManager::read_file_nofollow(&path).map_err(|e| {
281 NucleusError::CheckpointError(format!("Failed to read metadata {:?}: {}", path, e))
282 })?;
283 let metadata: Self = serde_json::from_str(&json).map_err(|e| {
284 NucleusError::CheckpointError(format!("Failed to parse metadata: {}", e))
285 })?;
286 if let Some(resource_limits) = metadata.resource_limits.as_ref() {
287 resource_limits.validate()?;
288 }
289 Ok(metadata)
290 }
291}
292
293#[cfg(test)]
294mod tests {
295 use super::*;
296 use std::os::unix::fs as unix_fs;
297
298 #[test]
299 fn test_save_rejects_symlink_target() {
300 let dir = tempfile::tempdir().unwrap();
304 let attacker_target = dir.path().join("attacker-owned-file");
305 std::fs::write(&attacker_target, "").unwrap();
306
307 let symlink_path = dir.path().join("metadata.json.tmp");
309 unix_fs::symlink(&attacker_target, &symlink_path).unwrap();
310
311 let metadata = CheckpointMetadata {
312 container_id: "test-id".to_string(),
313 container_name: "test".to_string(),
314 original_pid: 1,
315 command: vec!["/bin/sh".to_string()],
316 checkpoint_at: 0,
317 version: "0.0.0".to_string(),
318 using_gvisor: false,
319 rootless: false,
320 cgroup_path: None,
321 resource_limits: None,
322 };
323
324 let result = metadata.save(dir.path());
325 assert!(
326 result.is_err(),
327 "save() must reject symlink at temp file path (O_NOFOLLOW / symlink check)"
328 );
329 }
330
331 #[test]
332 fn test_checkpoint_resource_limits_from_cgroup_dir() {
333 let dir = tempfile::tempdir().unwrap();
334 std::fs::write(dir.path().join("memory.max"), "536870912\n").unwrap();
335 std::fs::write(dir.path().join("memory.high"), "483183820\n").unwrap();
336 std::fs::write(dir.path().join("memory.swap.max"), "0\n").unwrap();
337 std::fs::write(dir.path().join("cpu.max"), "50000 100000\n").unwrap();
338 std::fs::write(dir.path().join("cpu.weight"), "100\n").unwrap();
339 std::fs::write(dir.path().join("pids.max"), "256\n").unwrap();
340 std::fs::write(dir.path().join("io.max"), "8:0 rbps=1048576 wbps=2097152\n").unwrap();
341
342 let limits = CheckpointResourceLimits::from_cgroup_dir(dir.path()).unwrap();
343 assert_eq!(limits.memory_bytes, Some(536_870_912));
344 assert_eq!(limits.memory_high, Some(483_183_820));
345 assert_eq!(limits.memory_swap_max, Some(0));
346 assert_eq!(limits.cpu_quota_us, Some(50_000));
347 assert_eq!(limits.cpu_period_us, 100_000);
348 assert_eq!(limits.cpu_weight, Some(100));
349 assert_eq!(limits.pids_max, Some(256));
350 assert_eq!(limits.io_limits.len(), 1);
351 assert_eq!(limits.cpu_limit_millicores(), Some(500));
352 }
353
354 #[test]
355 fn test_load_rejects_zero_cpu_period_in_metadata() {
356 let dir = tempfile::tempdir().unwrap();
357 std::fs::write(
358 dir.path().join("metadata.json"),
359 r#"{
360 "container_id": "test-id",
361 "container_name": "test",
362 "original_pid": 1,
363 "command": ["/bin/sh"],
364 "checkpoint_at": 0,
365 "version": "0.0.0",
366 "using_gvisor": false,
367 "rootless": false,
368 "cgroup_path": "/sys/fs/cgroup/nucleus-test",
369 "resource_limits": {
370 "memory_bytes": null,
371 "memory_high": null,
372 "memory_swap_max": null,
373 "cpu_quota_us": 50000,
374 "cpu_period_us": 0,
375 "cpu_weight": null,
376 "pids_max": 256,
377 "io_limits": []
378 }
379}"#,
380 )
381 .unwrap();
382
383 let err = CheckpointMetadata::load(dir.path()).unwrap_err();
384 assert!(err
385 .to_string()
386 .contains("Invalid checkpoint resource limits"));
387 }
388}