Skip to main content

sandlock_core/
checkpoint.rs

1use serde::{Serialize, Deserialize};
2use crate::policy::Policy;
3use crate::error::{SandlockError, SandboxError};
4use std::io;
5use std::path::{Path, PathBuf};
6
7/// A frozen snapshot of sandbox state.
8#[derive(Debug, Serialize, Deserialize)]
9pub struct Checkpoint {
10    pub name: String,
11    pub policy: Policy,
12    pub process_state: ProcessState,
13    pub fd_table: Vec<FdInfo>,
14    pub cow_snapshot: Option<PathBuf>,
15    pub app_state: Option<Vec<u8>>,
16}
17
18/// Captured process state via ptrace (registers) + process_vm_readv (memory) + /proc (metadata).
19#[derive(Debug, Serialize, Deserialize)]
20pub struct ProcessState {
21    pub pid: i32,
22    pub cwd: String,
23    pub exe: String,
24    pub regs: Vec<u64>,
25    pub memory_maps: Vec<MemoryMap>,
26    pub memory_data: Vec<MemorySegment>,
27}
28
29#[derive(Debug, Serialize, Deserialize)]
30pub struct MemorySegment {
31    pub start: u64,
32    pub data: Vec<u8>,
33}
34
35#[derive(Debug, Serialize, Deserialize)]
36pub struct MemoryMap {
37    pub start: u64,
38    pub end: u64,
39    pub perms: String,
40    pub offset: u64,
41    pub path: Option<String>,
42}
43
44impl MemoryMap {
45    pub fn writable(&self) -> bool {
46        self.perms.starts_with("rw")
47    }
48
49    pub fn private(&self) -> bool {
50        self.perms.contains('p')
51    }
52
53    pub fn is_special(&self) -> bool {
54        self.path.as_ref().map_or(false, |p| {
55            p.starts_with("[vdso]") || p.starts_with("[vvar]") || p.starts_with("[vsyscall]")
56        })
57    }
58}
59
60#[derive(Debug, Serialize, Deserialize)]
61pub struct FdInfo {
62    pub fd: i32,
63    pub path: String,
64    pub flags: i32,
65    pub offset: u64,
66}
67
68// ---------------------------------------------------------------------------
69// ptrace helpers — PTRACE_SEIZE (doesn't auto-SIGSTOP like ATTACH)
70// ---------------------------------------------------------------------------
71
72fn ptrace_seize(pid: i32) -> io::Result<()> {
73    let ret = unsafe {
74        libc::ptrace(libc::PTRACE_SEIZE as libc::c_uint, pid, 0, 0)
75    };
76    if ret < 0 {
77        return Err(io::Error::last_os_error());
78    }
79    // PTRACE_INTERRUPT stops the tracee without SIGSTOP side effects
80    let ret = unsafe {
81        libc::ptrace(libc::PTRACE_INTERRUPT as libc::c_uint, pid, 0, 0)
82    };
83    if ret < 0 {
84        return Err(io::Error::last_os_error());
85    }
86    // Wait for the ptrace-stop
87    let mut status: i32 = 0;
88    unsafe {
89        libc::waitpid(pid, &mut status, 0);
90    }
91    Ok(())
92}
93
94fn ptrace_detach(pid: i32) -> io::Result<()> {
95    let ret = unsafe { libc::ptrace(libc::PTRACE_DETACH, pid, 0, 0) };
96    if ret < 0 {
97        return Err(io::Error::last_os_error());
98    }
99    Ok(())
100}
101
102fn ptrace_getregs(pid: i32) -> io::Result<Vec<u64>> {
103    // user_regs_struct is 27 u64 fields on x86_64 (216 bytes)
104    let mut regs = vec![0u64; 27];
105    let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) };
106    if ret < 0 {
107        return Err(io::Error::last_os_error());
108    }
109    Ok(regs)
110}
111
112// ---------------------------------------------------------------------------
113// /proc parsing
114// ---------------------------------------------------------------------------
115
116fn parse_proc_maps(pid: i32) -> io::Result<Vec<MemoryMap>> {
117    let content = std::fs::read_to_string(format!("/proc/{}/maps", pid))?;
118    let mut maps = Vec::new();
119    for line in content.lines() {
120        // Format: start-end perms offset dev inode [pathname]
121        let parts: Vec<&str> = line.splitn(6, ' ').collect();
122        if parts.len() < 5 {
123            continue;
124        }
125        let addrs: Vec<&str> = parts[0].split('-').collect();
126        if addrs.len() != 2 {
127            continue;
128        }
129        let start = u64::from_str_radix(addrs[0], 16).unwrap_or(0);
130        let end = u64::from_str_radix(addrs[1], 16).unwrap_or(0);
131        let perms = parts[1].to_string();
132        let offset = u64::from_str_radix(parts[2], 16).unwrap_or(0);
133        let path = if parts.len() >= 6 {
134            let p = parts[5].trim();
135            if p.is_empty() {
136                None
137            } else {
138                Some(p.to_string())
139            }
140        } else {
141            None
142        };
143        maps.push(MemoryMap {
144            start,
145            end,
146            perms,
147            offset,
148            path,
149        });
150    }
151    Ok(maps)
152}
153
154// ---------------------------------------------------------------------------
155// Memory capture — process_vm_readv (scatter-gather, no file I/O)
156// ---------------------------------------------------------------------------
157
158fn capture_memory(pid: i32, maps: &[MemoryMap]) -> io::Result<Vec<MemorySegment>> {
159    let mut segments = Vec::new();
160
161    for map in maps {
162        if !map.writable() || !map.private() || map.is_special() {
163            continue;
164        }
165        let size = (map.end - map.start) as usize;
166        if size > 256 * 1024 * 1024 {
167            continue; // skip segments > 256MB
168        }
169
170        let mut data = vec![0u8; size];
171
172        let local_iov = libc::iovec {
173            iov_base: data.as_mut_ptr() as *mut libc::c_void,
174            iov_len: size,
175        };
176        let remote_iov = libc::iovec {
177            iov_base: map.start as *mut libc::c_void,
178            iov_len: size,
179        };
180
181        let ret = unsafe {
182            libc::process_vm_readv(
183                pid as libc::pid_t,
184                &local_iov as *const libc::iovec,
185                1,
186                &remote_iov as *const libc::iovec,
187                1,
188                0,
189            )
190        };
191
192        if ret == size as isize {
193            segments.push(MemorySegment {
194                start: map.start,
195                data,
196            });
197        }
198        // Skip unreadable segments silently (same as old behavior)
199    }
200    Ok(segments)
201}
202
203// ---------------------------------------------------------------------------
204// FD table capture
205// ---------------------------------------------------------------------------
206
207fn capture_fd_table(pid: i32) -> io::Result<Vec<FdInfo>> {
208    let fd_dir = format!("/proc/{}/fd", pid);
209    let mut fds = Vec::new();
210
211    for entry in std::fs::read_dir(&fd_dir)? {
212        let entry = entry?;
213        let fd_str = entry.file_name().into_string().unwrap_or_default();
214        let fd: i32 = match fd_str.parse() {
215            Ok(f) => f,
216            Err(_) => continue,
217        };
218
219        let path = std::fs::read_link(entry.path())
220            .map(|p| p.display().to_string())
221            .unwrap_or_default();
222
223        // Parse fdinfo for flags and offset
224        let (flags, offset) = parse_fdinfo(pid, fd).unwrap_or((0, 0));
225
226        fds.push(FdInfo {
227            fd,
228            path,
229            flags,
230            offset,
231        });
232    }
233
234    fds.sort_by_key(|f| f.fd);
235    Ok(fds)
236}
237
238fn parse_fdinfo(pid: i32, fd: i32) -> io::Result<(i32, u64)> {
239    let content = std::fs::read_to_string(format!("/proc/{}/fdinfo/{}", pid, fd))?;
240    let mut flags = 0i32;
241    let mut pos = 0u64;
242    for line in content.lines() {
243        if let Some(val) = line.strip_prefix("flags:\t") {
244            flags = i32::from_str_radix(val.trim(), 8).unwrap_or(0);
245        }
246        if let Some(val) = line.strip_prefix("pos:\t") {
247            pos = val.trim().parse().unwrap_or(0);
248        }
249    }
250    Ok((flags, pos))
251}
252
253// ---------------------------------------------------------------------------
254// Main capture function
255// ---------------------------------------------------------------------------
256
257/// Capture a checkpoint from a running, stopped sandbox.
258/// The sandbox must already be frozen (SIGSTOP'd and fork-held).
259pub(crate) fn capture(pid: i32, policy: &Policy) -> Result<Checkpoint, SandlockError> {
260    // Seize via ptrace (PTRACE_SEIZE + PTRACE_INTERRUPT — doesn't auto-SIGSTOP)
261    ptrace_seize(pid).map_err(|e| {
262        SandlockError::Sandbox(SandboxError::Child(format!("ptrace seize: {}", e)))
263    })?;
264
265    // Capture registers
266    let regs = ptrace_getregs(pid).map_err(|e| {
267        SandlockError::Sandbox(SandboxError::Child(format!("ptrace getregs: {}", e)))
268    })?;
269
270    // Capture memory maps
271    let maps =
272        parse_proc_maps(pid).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
273
274    // Capture memory data
275    let memory_data =
276        capture_memory(pid, &maps).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
277
278    // Capture fd table
279    let fd_table =
280        capture_fd_table(pid).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
281
282    // Detach
283    ptrace_detach(pid).map_err(|e| {
284        SandlockError::Sandbox(SandboxError::Child(format!("ptrace detach: {}", e)))
285    })?;
286
287    // Capture cwd and exe from /proc
288    let cwd = std::fs::read_link(format!("/proc/{}/cwd", pid))
289        .map(|p| p.display().to_string())
290        .unwrap_or_default();
291    let exe = std::fs::read_link(format!("/proc/{}/exe", pid))
292        .map(|p| p.display().to_string())
293        .unwrap_or_default();
294
295    Ok(Checkpoint {
296        name: String::new(),
297        policy: policy.clone(),
298        process_state: ProcessState {
299            pid,
300            cwd,
301            exe,
302            regs,
303            memory_maps: maps,
304            memory_data,
305        },
306        fd_table,
307        cow_snapshot: None,
308        app_state: None,
309    })
310}
311
312// ---------------------------------------------------------------------------
313// Save / Load — directory-based format
314// ---------------------------------------------------------------------------
315//
316// Layout:
317//   <dir>/
318//   ├── meta.json            # name, cow_snapshot
319//   ├── policy.dat           # bincode-serialized Policy
320//   ├── app_state.bin        # optional raw app state
321//   └── process/
322//       ├── info.json        # pid, cwd, exe
323//       ├── fds.json         # file descriptor table
324//       ├── memory_map.json  # region metadata
325//       ├── threads/
326//       │   └── 0.bin        # raw register bytes (main thread)
327//       └── memory/
328//           └── <index>.bin  # raw memory contents per segment
329
330fn io_err(e: impl std::fmt::Display) -> SandlockError {
331    SandlockError::Sandbox(SandboxError::Child(e.to_string()))
332}
333
334fn write_json<T: Serialize>(path: &Path, val: &T) -> Result<(), SandlockError> {
335    let json = serde_json::to_string_pretty(val).map_err(io_err)?;
336    std::fs::write(path, json).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))
337}
338
339fn read_json<T: for<'de> Deserialize<'de>>(path: &Path) -> Result<T, SandlockError> {
340    let data = std::fs::read_to_string(path)
341        .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
342    serde_json::from_str(&data).map_err(io_err)
343}
344
345/// JSON schema for meta.json.
346#[derive(Serialize, Deserialize)]
347struct MetaJson {
348    name: String,
349    cow_snapshot: Option<String>,
350}
351
352/// JSON schema for process/info.json.
353#[derive(Serialize, Deserialize)]
354struct InfoJson {
355    pid: i32,
356    cwd: String,
357    exe: String,
358}
359
360/// JSON schema for each entry in process/fds.json.
361#[derive(Serialize, Deserialize)]
362struct FdJson {
363    fd: i32,
364    path: String,
365    flags: i32,
366    offset: u64,
367}
368
369/// JSON schema for each entry in process/memory_map.json.
370#[derive(Serialize, Deserialize)]
371struct MemoryMapJson {
372    start: u64,
373    end: u64,
374    perms: String,
375    offset: u64,
376    path: Option<String>,
377}
378
379impl Checkpoint {
380    /// Persist this checkpoint to a directory.
381    ///
382    /// Writes atomically: creates `<dir>.tmp`, populates it, then renames.
383    pub fn save(&self, dir: &Path) -> Result<(), SandlockError> {
384        let tmp = dir.with_extension("tmp");
385        if tmp.exists() {
386            std::fs::remove_dir_all(&tmp)
387                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
388        }
389        std::fs::create_dir_all(&tmp)
390            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
391
392        let res = self.save_inner(&tmp);
393        if res.is_err() {
394            let _ = std::fs::remove_dir_all(&tmp);
395            return res;
396        }
397
398        // Atomic rename into place
399        if dir.exists() {
400            std::fs::remove_dir_all(dir)
401                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
402        }
403        std::fs::rename(&tmp, dir)
404            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
405
406        Ok(())
407    }
408
409    fn save_inner(&self, dir: &Path) -> Result<(), SandlockError> {
410        // meta.json
411        write_json(&dir.join("meta.json"), &MetaJson {
412            name: self.name.clone(),
413            cow_snapshot: self.cow_snapshot.as_ref().map(|p| p.display().to_string()),
414        })?;
415
416        // policy.dat (bincode — complex struct, not human-readable anyway)
417        let policy_bytes = bincode::serialize(&self.policy).map_err(io_err)?;
418        std::fs::write(dir.join("policy.dat"), &policy_bytes)
419            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
420
421        // app_state.bin
422        if let Some(ref state) = self.app_state {
423            std::fs::write(dir.join("app_state.bin"), state)
424                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
425        }
426
427        // process/
428        let proc_dir = dir.join("process");
429        std::fs::create_dir(&proc_dir)
430            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
431
432        // process/info.json
433        write_json(&proc_dir.join("info.json"), &InfoJson {
434            pid: self.process_state.pid,
435            cwd: self.process_state.cwd.clone(),
436            exe: self.process_state.exe.clone(),
437        })?;
438
439        // process/fds.json
440        let fds: Vec<FdJson> = self.fd_table.iter().map(|f| FdJson {
441            fd: f.fd,
442            path: f.path.clone(),
443            flags: f.flags,
444            offset: f.offset,
445        }).collect();
446        write_json(&proc_dir.join("fds.json"), &fds)?;
447
448        // process/memory_map.json — only captured segments (1:1 with memory/*.bin)
449        // Build map entries for each captured segment by matching start address
450        let maps: Vec<MemoryMapJson> = self.process_state.memory_data.iter().map(|seg| {
451            // Find the corresponding full map entry
452            let map = self.process_state.memory_maps.iter()
453                .find(|m| m.start == seg.start);
454            match map {
455                Some(m) => MemoryMapJson {
456                    start: m.start,
457                    end: m.end,
458                    perms: m.perms.clone(),
459                    offset: m.offset,
460                    path: m.path.clone(),
461                },
462                None => MemoryMapJson {
463                    start: seg.start,
464                    end: seg.start + seg.data.len() as u64,
465                    perms: "rw-p".to_string(),
466                    offset: 0,
467                    path: None,
468                },
469            }
470        }).collect();
471        write_json(&proc_dir.join("memory_map.json"), &maps)?;
472
473        // process/threads/0.bin — main thread register state
474        let threads_dir = proc_dir.join("threads");
475        std::fs::create_dir(&threads_dir)
476            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
477        let reg_bytes: Vec<u8> = self.process_state.regs.iter()
478            .flat_map(|r| r.to_le_bytes())
479            .collect();
480        std::fs::write(threads_dir.join("0.bin"), &reg_bytes)
481            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
482
483        // process/memory/<index>.bin — 1:1 with memory_map.json entries
484        let mem_dir = proc_dir.join("memory");
485        std::fs::create_dir(&mem_dir)
486            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
487        for (i, seg) in self.process_state.memory_data.iter().enumerate() {
488            std::fs::write(mem_dir.join(format!("{}.bin", i)), &seg.data)
489                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
490        }
491
492        Ok(())
493    }
494
495    /// Load a checkpoint from a directory.
496    pub fn load(dir: &Path) -> Result<Self, SandlockError> {
497        if !dir.is_dir() {
498            return Err(SandlockError::Sandbox(SandboxError::Child(
499                format!("Checkpoint not found: {}", dir.display()),
500            )));
501        }
502
503        // meta.json
504        let meta: MetaJson = read_json(&dir.join("meta.json"))?;
505
506        // policy.dat
507        let policy_bytes = std::fs::read(dir.join("policy.dat"))
508            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
509        let policy: Policy = bincode::deserialize(&policy_bytes).map_err(io_err)?;
510
511        // app_state.bin
512        let app_state_path = dir.join("app_state.bin");
513        let app_state = if app_state_path.exists() {
514            Some(std::fs::read(&app_state_path)
515                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?)
516        } else {
517            None
518        };
519
520        // process/
521        let proc_dir = dir.join("process");
522
523        // process/info.json
524        let info: InfoJson = read_json(&proc_dir.join("info.json"))?;
525
526        // process/fds.json
527        let fds_json: Vec<FdJson> = read_json(&proc_dir.join("fds.json"))?;
528        let fd_table: Vec<FdInfo> = fds_json.into_iter().map(|f| FdInfo {
529            fd: f.fd,
530            path: f.path,
531            flags: f.flags,
532            offset: f.offset,
533        }).collect();
534
535        // process/memory_map.json — 1:1 with memory/<i>.bin
536        let maps_json: Vec<MemoryMapJson> = read_json(&proc_dir.join("memory_map.json"))?;
537        let memory_maps: Vec<MemoryMap> = maps_json.iter().map(|m| MemoryMap {
538            start: m.start,
539            end: m.end,
540            perms: m.perms.clone(),
541            offset: m.offset,
542            path: m.path.clone(),
543        }).collect();
544
545        // process/threads/0.bin
546        let reg_bytes = std::fs::read(proc_dir.join("threads").join("0.bin"))
547            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
548        let regs: Vec<u64> = reg_bytes.chunks_exact(8)
549            .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
550            .collect();
551
552        // process/memory/<i>.bin — 1:1 with memory_map.json
553        let mem_dir = proc_dir.join("memory");
554        let mut memory_data = Vec::new();
555        for (i, map) in maps_json.iter().enumerate() {
556            let seg_path = mem_dir.join(format!("{}.bin", i));
557            let data = std::fs::read(&seg_path)
558                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
559            memory_data.push(MemorySegment {
560                start: map.start,
561                data,
562            });
563        }
564
565        Ok(Checkpoint {
566            name: meta.name,
567            policy,
568            process_state: ProcessState {
569                pid: info.pid,
570                cwd: info.cwd,
571                exe: info.exe,
572                regs,
573                memory_maps,
574                memory_data,
575            },
576            fd_table,
577            cow_snapshot: meta.cow_snapshot.map(PathBuf::from),
578            app_state,
579        })
580    }
581}