Skip to main content

sandlock_core/
checkpoint.rs

1use serde::{Serialize, Deserialize};
2use crate::policy::Policy;
3use crate::error::{SandlockError, SandboxError};
4use std::io;
5use std::path::{Path, PathBuf};
6
7/// A frozen snapshot of sandbox state.
8#[derive(Debug, Serialize, Deserialize)]
9pub struct Checkpoint {
10    pub name: String,
11    pub policy: Policy,
12    pub process_state: ProcessState,
13    pub fd_table: Vec<FdInfo>,
14    pub cow_snapshot: Option<PathBuf>,
15    pub app_state: Option<Vec<u8>>,
16}
17
18/// Captured process state via ptrace (registers) + process_vm_readv (memory) + /proc (metadata).
19#[derive(Debug, Serialize, Deserialize)]
20pub struct ProcessState {
21    pub pid: i32,
22    pub cwd: String,
23    pub exe: String,
24    pub regs: Vec<u64>,
25    pub memory_maps: Vec<MemoryMap>,
26    pub memory_data: Vec<MemorySegment>,
27}
28
29#[derive(Debug, Serialize, Deserialize)]
30pub struct MemorySegment {
31    pub start: u64,
32    pub data: Vec<u8>,
33}
34
35#[derive(Debug, Serialize, Deserialize)]
36pub struct MemoryMap {
37    pub start: u64,
38    pub end: u64,
39    pub perms: String,
40    pub offset: u64,
41    pub path: Option<String>,
42}
43
44impl MemoryMap {
45    pub fn writable(&self) -> bool {
46        self.perms.starts_with("rw")
47    }
48
49    pub fn private(&self) -> bool {
50        self.perms.contains('p')
51    }
52
53    pub fn is_special(&self) -> bool {
54        self.path.as_ref().map_or(false, |p| {
55            p.starts_with("[vdso]") || p.starts_with("[vvar]") || p.starts_with("[vsyscall]")
56        })
57    }
58}
59
60#[derive(Debug, Serialize, Deserialize)]
61pub struct FdInfo {
62    pub fd: i32,
63    pub path: String,
64    pub flags: i32,
65    pub offset: u64,
66}
67
68// ---------------------------------------------------------------------------
69// ptrace helpers — PTRACE_SEIZE (doesn't auto-SIGSTOP like ATTACH)
70// ---------------------------------------------------------------------------
71
72fn ptrace_seize(pid: i32) -> io::Result<()> {
73    let ret = unsafe {
74        libc::ptrace(libc::PTRACE_SEIZE as libc::c_uint, pid, 0, 0)
75    };
76    if ret < 0 {
77        return Err(io::Error::last_os_error());
78    }
79    // PTRACE_INTERRUPT stops the tracee without SIGSTOP side effects
80    let ret = unsafe {
81        libc::ptrace(libc::PTRACE_INTERRUPT as libc::c_uint, pid, 0, 0)
82    };
83    if ret < 0 {
84        return Err(io::Error::last_os_error());
85    }
86    // Wait for the ptrace-stop
87    let mut status: i32 = 0;
88    unsafe {
89        libc::waitpid(pid, &mut status, 0);
90    }
91    Ok(())
92}
93
94fn ptrace_detach(pid: i32) -> io::Result<()> {
95    let ret = unsafe { libc::ptrace(libc::PTRACE_DETACH, pid, 0, 0) };
96    if ret < 0 {
97        return Err(io::Error::last_os_error());
98    }
99    Ok(())
100}
101
102fn ptrace_getregs(pid: i32) -> io::Result<Vec<u64>> {
103    #[cfg(target_arch = "x86_64")]
104    {
105        // user_regs_struct is 27 u64 fields on x86_64 (216 bytes)
106        let mut regs = vec![0u64; 27];
107        let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) };
108        if ret < 0 {
109            return Err(io::Error::last_os_error());
110        }
111        Ok(regs)
112    }
113
114    #[cfg(target_arch = "aarch64")]
115    {
116        // Linux arm64 exposes general-purpose registers through
117        // PTRACE_GETREGSET/NT_PRSTATUS. user_pt_regs is:
118        // x0-x30, sp, pc, pstate (34 u64 values).
119        const NT_PRSTATUS: libc::c_int = 1;
120        let mut regs = vec![0u64; 34];
121        let mut iov = libc::iovec {
122            iov_base: regs.as_mut_ptr() as *mut libc::c_void,
123            iov_len: regs.len() * std::mem::size_of::<u64>(),
124        };
125        let ret = unsafe {
126            libc::ptrace(
127                libc::PTRACE_GETREGSET,
128                pid,
129                NT_PRSTATUS as usize as *mut libc::c_void,
130                &mut iov as *mut libc::iovec as *mut libc::c_void,
131            )
132        };
133        if ret < 0 {
134            return Err(io::Error::last_os_error());
135        }
136        regs.truncate(iov.iov_len / std::mem::size_of::<u64>());
137        Ok(regs)
138    }
139
140    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
141    {
142        let _ = pid;
143        Err(io::Error::new(
144            io::ErrorKind::Unsupported,
145            "checkpoint register capture is not implemented on this architecture",
146        ))
147    }
148}
149
150// ---------------------------------------------------------------------------
151// /proc parsing
152// ---------------------------------------------------------------------------
153
154fn parse_proc_maps(pid: i32) -> io::Result<Vec<MemoryMap>> {
155    let content = std::fs::read_to_string(format!("/proc/{}/maps", pid))?;
156    let mut maps = Vec::new();
157    for line in content.lines() {
158        // Format: start-end perms offset dev inode [pathname]
159        let parts: Vec<&str> = line.splitn(6, ' ').collect();
160        if parts.len() < 5 {
161            continue;
162        }
163        let addrs: Vec<&str> = parts[0].split('-').collect();
164        if addrs.len() != 2 {
165            continue;
166        }
167        let start = u64::from_str_radix(addrs[0], 16).unwrap_or(0);
168        let end = u64::from_str_radix(addrs[1], 16).unwrap_or(0);
169        let perms = parts[1].to_string();
170        let offset = u64::from_str_radix(parts[2], 16).unwrap_or(0);
171        let path = if parts.len() >= 6 {
172            let p = parts[5].trim();
173            if p.is_empty() {
174                None
175            } else {
176                Some(p.to_string())
177            }
178        } else {
179            None
180        };
181        maps.push(MemoryMap {
182            start,
183            end,
184            perms,
185            offset,
186            path,
187        });
188    }
189    Ok(maps)
190}
191
192// ---------------------------------------------------------------------------
193// Memory capture — process_vm_readv (scatter-gather, no file I/O)
194// ---------------------------------------------------------------------------
195
196fn capture_memory(pid: i32, maps: &[MemoryMap]) -> io::Result<Vec<MemorySegment>> {
197    let mut segments = Vec::new();
198
199    for map in maps {
200        if !map.writable() || !map.private() || map.is_special() {
201            continue;
202        }
203        let size = (map.end - map.start) as usize;
204        if size > 256 * 1024 * 1024 {
205            continue; // skip segments > 256MB
206        }
207
208        let mut data = vec![0u8; size];
209
210        let local_iov = libc::iovec {
211            iov_base: data.as_mut_ptr() as *mut libc::c_void,
212            iov_len: size,
213        };
214        let remote_iov = libc::iovec {
215            iov_base: map.start as *mut libc::c_void,
216            iov_len: size,
217        };
218
219        let ret = unsafe {
220            libc::process_vm_readv(
221                pid as libc::pid_t,
222                &local_iov as *const libc::iovec,
223                1,
224                &remote_iov as *const libc::iovec,
225                1,
226                0,
227            )
228        };
229
230        if ret == size as isize {
231            segments.push(MemorySegment {
232                start: map.start,
233                data,
234            });
235        }
236        // Skip unreadable segments silently (same as old behavior)
237    }
238    Ok(segments)
239}
240
241// ---------------------------------------------------------------------------
242// FD table capture
243// ---------------------------------------------------------------------------
244
245fn capture_fd_table(pid: i32) -> io::Result<Vec<FdInfo>> {
246    let fd_dir = format!("/proc/{}/fd", pid);
247    let mut fds = Vec::new();
248
249    for entry in std::fs::read_dir(&fd_dir)? {
250        let entry = entry?;
251        let fd_str = entry.file_name().into_string().unwrap_or_default();
252        let fd: i32 = match fd_str.parse() {
253            Ok(f) => f,
254            Err(_) => continue,
255        };
256
257        let path = std::fs::read_link(entry.path())
258            .map(|p| p.display().to_string())
259            .unwrap_or_default();
260
261        // Parse fdinfo for flags and offset
262        let (flags, offset) = parse_fdinfo(pid, fd).unwrap_or((0, 0));
263
264        fds.push(FdInfo {
265            fd,
266            path,
267            flags,
268            offset,
269        });
270    }
271
272    fds.sort_by_key(|f| f.fd);
273    Ok(fds)
274}
275
276fn parse_fdinfo(pid: i32, fd: i32) -> io::Result<(i32, u64)> {
277    let content = std::fs::read_to_string(format!("/proc/{}/fdinfo/{}", pid, fd))?;
278    let mut flags = 0i32;
279    let mut pos = 0u64;
280    for line in content.lines() {
281        if let Some(val) = line.strip_prefix("flags:\t") {
282            flags = i32::from_str_radix(val.trim(), 8).unwrap_or(0);
283        }
284        if let Some(val) = line.strip_prefix("pos:\t") {
285            pos = val.trim().parse().unwrap_or(0);
286        }
287    }
288    Ok((flags, pos))
289}
290
291// ---------------------------------------------------------------------------
292// Main capture function
293// ---------------------------------------------------------------------------
294
295/// Capture a checkpoint from a running, stopped sandbox.
296/// The sandbox must already be frozen (SIGSTOP'd and fork-held).
297pub(crate) fn capture(pid: i32, policy: &Policy) -> Result<Checkpoint, SandlockError> {
298    // Seize via ptrace (PTRACE_SEIZE + PTRACE_INTERRUPT — doesn't auto-SIGSTOP)
299    ptrace_seize(pid).map_err(|e| {
300        SandlockError::Sandbox(SandboxError::Child(format!("ptrace seize: {}", e)))
301    })?;
302
303    // Capture registers
304    let regs = ptrace_getregs(pid).map_err(|e| {
305        SandlockError::Sandbox(SandboxError::Child(format!("ptrace getregs: {}", e)))
306    })?;
307
308    // Capture memory maps
309    let maps =
310        parse_proc_maps(pid).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
311
312    // Capture memory data
313    let memory_data =
314        capture_memory(pid, &maps).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
315
316    // Capture fd table
317    let fd_table =
318        capture_fd_table(pid).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
319
320    // Detach
321    ptrace_detach(pid).map_err(|e| {
322        SandlockError::Sandbox(SandboxError::Child(format!("ptrace detach: {}", e)))
323    })?;
324
325    // Capture cwd and exe from /proc
326    let cwd = std::fs::read_link(format!("/proc/{}/cwd", pid))
327        .map(|p| p.display().to_string())
328        .unwrap_or_default();
329    let exe = std::fs::read_link(format!("/proc/{}/exe", pid))
330        .map(|p| p.display().to_string())
331        .unwrap_or_default();
332
333    Ok(Checkpoint {
334        name: String::new(),
335        policy: policy.clone(),
336        process_state: ProcessState {
337            pid,
338            cwd,
339            exe,
340            regs,
341            memory_maps: maps,
342            memory_data,
343        },
344        fd_table,
345        cow_snapshot: None,
346        app_state: None,
347    })
348}
349
350// ---------------------------------------------------------------------------
351// Save / Load — directory-based format
352// ---------------------------------------------------------------------------
353//
354// Layout:
355//   <dir>/
356//   ├── meta.json            # name, cow_snapshot
357//   ├── policy.dat           # bincode-serialized Policy
358//   ├── app_state.bin        # optional raw app state
359//   └── process/
360//       ├── info.json        # pid, cwd, exe
361//       ├── fds.json         # file descriptor table
362//       ├── memory_map.json  # region metadata
363//       ├── threads/
364//       │   └── 0.bin        # raw register bytes (main thread)
365//       └── memory/
366//           └── <index>.bin  # raw memory contents per segment
367
368fn io_err(e: impl std::fmt::Display) -> SandlockError {
369    SandlockError::Sandbox(SandboxError::Child(e.to_string()))
370}
371
372fn write_json<T: Serialize>(path: &Path, val: &T) -> Result<(), SandlockError> {
373    let json = serde_json::to_string_pretty(val).map_err(io_err)?;
374    std::fs::write(path, json).map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))
375}
376
377fn read_json<T: for<'de> Deserialize<'de>>(path: &Path) -> Result<T, SandlockError> {
378    let data = std::fs::read_to_string(path)
379        .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
380    serde_json::from_str(&data).map_err(io_err)
381}
382
383/// JSON schema for meta.json.
384#[derive(Serialize, Deserialize)]
385struct MetaJson {
386    name: String,
387    cow_snapshot: Option<String>,
388}
389
390/// JSON schema for process/info.json.
391#[derive(Serialize, Deserialize)]
392struct InfoJson {
393    pid: i32,
394    cwd: String,
395    exe: String,
396}
397
398/// JSON schema for each entry in process/fds.json.
399#[derive(Serialize, Deserialize)]
400struct FdJson {
401    fd: i32,
402    path: String,
403    flags: i32,
404    offset: u64,
405}
406
407/// JSON schema for each entry in process/memory_map.json.
408#[derive(Serialize, Deserialize)]
409struct MemoryMapJson {
410    start: u64,
411    end: u64,
412    perms: String,
413    offset: u64,
414    path: Option<String>,
415}
416
417impl Checkpoint {
418    /// Persist this checkpoint to a directory.
419    ///
420    /// Writes atomically: creates `<dir>.tmp`, populates it, then renames.
421    pub fn save(&self, dir: &Path) -> Result<(), SandlockError> {
422        let tmp = dir.with_extension("tmp");
423        if tmp.exists() {
424            std::fs::remove_dir_all(&tmp)
425                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
426        }
427        std::fs::create_dir_all(&tmp)
428            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
429
430        let res = self.save_inner(&tmp);
431        if res.is_err() {
432            let _ = std::fs::remove_dir_all(&tmp);
433            return res;
434        }
435
436        // Atomic rename into place
437        if dir.exists() {
438            std::fs::remove_dir_all(dir)
439                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
440        }
441        std::fs::rename(&tmp, dir)
442            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
443
444        Ok(())
445    }
446
447    fn save_inner(&self, dir: &Path) -> Result<(), SandlockError> {
448        // meta.json
449        write_json(&dir.join("meta.json"), &MetaJson {
450            name: self.name.clone(),
451            cow_snapshot: self.cow_snapshot.as_ref().map(|p| p.display().to_string()),
452        })?;
453
454        // policy.dat (bincode — complex struct, not human-readable anyway)
455        let policy_bytes = bincode::serialize(&self.policy).map_err(io_err)?;
456        std::fs::write(dir.join("policy.dat"), &policy_bytes)
457            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
458
459        // app_state.bin
460        if let Some(ref state) = self.app_state {
461            std::fs::write(dir.join("app_state.bin"), state)
462                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
463        }
464
465        // process/
466        let proc_dir = dir.join("process");
467        std::fs::create_dir(&proc_dir)
468            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
469
470        // process/info.json
471        write_json(&proc_dir.join("info.json"), &InfoJson {
472            pid: self.process_state.pid,
473            cwd: self.process_state.cwd.clone(),
474            exe: self.process_state.exe.clone(),
475        })?;
476
477        // process/fds.json
478        let fds: Vec<FdJson> = self.fd_table.iter().map(|f| FdJson {
479            fd: f.fd,
480            path: f.path.clone(),
481            flags: f.flags,
482            offset: f.offset,
483        }).collect();
484        write_json(&proc_dir.join("fds.json"), &fds)?;
485
486        // process/memory_map.json — only captured segments (1:1 with memory/*.bin)
487        // Build map entries for each captured segment by matching start address
488        let maps: Vec<MemoryMapJson> = self.process_state.memory_data.iter().map(|seg| {
489            // Find the corresponding full map entry
490            let map = self.process_state.memory_maps.iter()
491                .find(|m| m.start == seg.start);
492            match map {
493                Some(m) => MemoryMapJson {
494                    start: m.start,
495                    end: m.end,
496                    perms: m.perms.clone(),
497                    offset: m.offset,
498                    path: m.path.clone(),
499                },
500                None => MemoryMapJson {
501                    start: seg.start,
502                    end: seg.start + seg.data.len() as u64,
503                    perms: "rw-p".to_string(),
504                    offset: 0,
505                    path: None,
506                },
507            }
508        }).collect();
509        write_json(&proc_dir.join("memory_map.json"), &maps)?;
510
511        // process/threads/0.bin — main thread register state
512        let threads_dir = proc_dir.join("threads");
513        std::fs::create_dir(&threads_dir)
514            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
515        let reg_bytes: Vec<u8> = self.process_state.regs.iter()
516            .flat_map(|r| r.to_le_bytes())
517            .collect();
518        std::fs::write(threads_dir.join("0.bin"), &reg_bytes)
519            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
520
521        // process/memory/<index>.bin — 1:1 with memory_map.json entries
522        let mem_dir = proc_dir.join("memory");
523        std::fs::create_dir(&mem_dir)
524            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
525        for (i, seg) in self.process_state.memory_data.iter().enumerate() {
526            std::fs::write(mem_dir.join(format!("{}.bin", i)), &seg.data)
527                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
528        }
529
530        Ok(())
531    }
532
533    /// Load a checkpoint from a directory.
534    pub fn load(dir: &Path) -> Result<Self, SandlockError> {
535        if !dir.is_dir() {
536            return Err(SandlockError::Sandbox(SandboxError::Child(
537                format!("Checkpoint not found: {}", dir.display()),
538            )));
539        }
540
541        // meta.json
542        let meta: MetaJson = read_json(&dir.join("meta.json"))?;
543
544        // policy.dat
545        let policy_bytes = std::fs::read(dir.join("policy.dat"))
546            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
547        let policy: Policy = bincode::deserialize(&policy_bytes).map_err(io_err)?;
548
549        // app_state.bin
550        let app_state_path = dir.join("app_state.bin");
551        let app_state = if app_state_path.exists() {
552            Some(std::fs::read(&app_state_path)
553                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?)
554        } else {
555            None
556        };
557
558        // process/
559        let proc_dir = dir.join("process");
560
561        // process/info.json
562        let info: InfoJson = read_json(&proc_dir.join("info.json"))?;
563
564        // process/fds.json
565        let fds_json: Vec<FdJson> = read_json(&proc_dir.join("fds.json"))?;
566        let fd_table: Vec<FdInfo> = fds_json.into_iter().map(|f| FdInfo {
567            fd: f.fd,
568            path: f.path,
569            flags: f.flags,
570            offset: f.offset,
571        }).collect();
572
573        // process/memory_map.json — 1:1 with memory/<i>.bin
574        let maps_json: Vec<MemoryMapJson> = read_json(&proc_dir.join("memory_map.json"))?;
575        let memory_maps: Vec<MemoryMap> = maps_json.iter().map(|m| MemoryMap {
576            start: m.start,
577            end: m.end,
578            perms: m.perms.clone(),
579            offset: m.offset,
580            path: m.path.clone(),
581        }).collect();
582
583        // process/threads/0.bin
584        let reg_bytes = std::fs::read(proc_dir.join("threads").join("0.bin"))
585            .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
586        let regs: Vec<u64> = reg_bytes.chunks_exact(8)
587            .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
588            .collect();
589
590        // process/memory/<i>.bin — 1:1 with memory_map.json
591        let mem_dir = proc_dir.join("memory");
592        let mut memory_data = Vec::new();
593        for (i, map) in maps_json.iter().enumerate() {
594            let seg_path = mem_dir.join(format!("{}.bin", i));
595            let data = std::fs::read(&seg_path)
596                .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
597            memory_data.push(MemorySegment {
598                start: map.start,
599                data,
600            });
601        }
602
603        Ok(Checkpoint {
604            name: meta.name,
605            policy,
606            process_state: ProcessState {
607                pid: info.pid,
608                cwd: info.cwd,
609                exe: info.exe,
610                regs,
611                memory_maps,
612                memory_data,
613            },
614            fd_table,
615            cow_snapshot: meta.cow_snapshot.map(PathBuf::from),
616            app_state,
617        })
618    }
619}