interceptor_rs/
lib.rs

1//! Intercept is a lib based on `ptrace` that intercepts and modifies Linux system calls.
2//! It currently only supports `x86_64` architecture.
3//!
4//! # Usage
5//! Write a function whose signature is same as a syscall, and mark it as `#[syscall]`,
6//! and you are done.
7//!
8//! ```rust
9//! #[syscall]
10//! fn openat(dfd: i32, mut filename: *const c_char, flags: i32, mode: i32) -> i32 {
11//!     // do something before syscall, logging, changing arguments, etc.
12//!     
13//!     let ret = real!(dfd, filename, flags, mode);
14//!
15//!     // do something after syscall, modifing return value..
16//! }
17//! ```
18//! See more detail in examples
19//!
20//! # Extra Info
21//!
22//! ## Memory in target
23//! We use "LD_PRELOAD" trick to insert a so into target process to malloc extra memory
24//! needed when modified a pointer argument which has larger length.
25//!
26//! ## Remove dependency libgcc_s.so.1
27//! Some glibc released without `libgcc_s.so.1`, we removed this dependency using link
28//! script "linker_without_libgcc.wrap".
29//!
30//! ## special handling of parameter "*const *const c_char"
31//!
32//! > This parameter is used in syscall like `execve`'s `const char *const argv[]`.
33//!
34//! We treat `*const *const c_char` as `*const c_char` for convenience. That's say, original
35//! pointer to pointer has been converted to pointer's content after content. For example:
36//!
37//! #### original
38//! this ptr's address is inside target process, can not be used directly.
39//!
40//! | ptr (*const *const c_char)    | ptr to ptr (*const c_char)    | content   |
41//! | ---                           | ---                           | ---       |
42//! | 0x12345678                    | 0x11111111                    | "aaaa\0"  |
43//! |                               | 0x22222222                    | "bbbb\0"  |
44//! |                               | 0x33333333                    | "cccc\0"  |
45//! |                               | 0x44444444                    | "\0"      |
46//! ### after converted
47//! memory is reallocated in interceptor instance, so address changed.
48//!
49//! | ptr (*const c_char)   | content                   |
50//! | ---                   | ---                       |
51//! | 0x87654321            | "aaaa\0bbbb\0cccc\0\0"    |
52//! ### usage
53//! you can use helper function [`read_ptr_to_ptr`] to read content from converted ptr.
54//! and use [`write_ptr_to_ptr`] to write back.
55//!
56use anyhow::Result;
57use once_cell::sync::Lazy;
58use paste::paste;
59use pete::{Ptracer, Restart, Stop, Tracee};
60pub use ptr::{read_ptr_to_ptr, write_ptr_to_ptr};
61use ptr::{MayBePtr, Number, Ptr, Read, RemoteMem, Write};
62use rand::Rng;
63use std::{cell::RefCell, collections::HashMap, env::current_exe, process::Command, rc::Rc};
64use syscall::{ReturnVariant, ReturnVariantWrapper, SysCall, SysCallWrapper};
65/// A proc-macro that turns a rust fn into a syscall.
66///
67/// See more details in examples.
68pub use syscall_attr::syscall;
69use tracing::debug;
70
71mod ptr;
72#[doc(hidden)]
73pub mod syscall;
74
75/// Provide the main functionality for intercepting.
76pub struct Interceptor {
77    ptracer: Ptracer,
78    syscalls: Vec<SysCallWrapper>,
79    block_calls: HashMap<u64, u64>,
80    contexts: Rc<RefCell<HashMap<String, PackedContext>>>,
81    remote_mem: Rc<RefCell<Option<RemoteMem>>>,
82}
83
84struct PackedContext(
85    Box<dyn Context>,
86    Box<dyn Context>,
87    Box<dyn Context>,
88    Box<dyn Context>,
89    Box<dyn Context>,
90    Box<dyn Context>,
91);
92
93trait Context {}
94
95impl<T> Context for MayBePtr<T> {}
96
97impl Interceptor {
98    /// create child process by specific a [`std::process::Command`]
99    pub fn new(mut cmd: Command) -> Result<Self> {
100        let mut ptracer = Ptracer::new();
101        cmd.env(
102            "LD_PRELOAD",
103            current_exe()?.with_file_name("libinter_mem.so"),
104        );
105        let _child = ptracer.spawn(cmd)?;
106
107        Ok(Self {
108            ptracer,
109            syscalls: Vec::new(),
110            block_calls: HashMap::new(),
111            contexts: Rc::new(RefCell::new(HashMap::new())),
112            remote_mem: Rc::new(RefCell::new(None)),
113        })
114    }
115
116    /// register syscall to interceptor
117    pub fn on<R, A1, A2, A3, A4, A5, A6>(
118        &mut self,
119        syscall: &'static SysCall<R, A1, A2, A3, A4, A5, A6>,
120    ) -> &mut Self
121    where
122        R: Number,
123        A1: Read,
124        A2: Read,
125        A3: Read,
126        A4: Read,
127        A5: Read,
128        A6: Read,
129        MayBePtr<<A1 as Read>::InnerType>: Write<A1> + Ptr<A1>,
130        MayBePtr<<A2 as Read>::InnerType>: Write<A2> + Ptr<A2>,
131        MayBePtr<<A3 as Read>::InnerType>: Write<A3> + Ptr<A3>,
132        MayBePtr<<A4 as Read>::InnerType>: Write<A4> + Ptr<A4>,
133        MayBePtr<<A5 as Read>::InnerType>: Write<A5> + Ptr<A5>,
134        MayBePtr<<A6 as Read>::InnerType>: Write<A6> + Ptr<A6>,
135    {
136        let contexts = self.contexts.clone();
137        let remote_mem = self.remote_mem.clone();
138        self.syscalls.push(SysCallWrapper {
139            name: syscall.name,
140            pre: Box::new(move |tracee, a1, a2, a3, a4, a5, a6| {
141                let mut a1 = A1::read(tracee, a1);
142                let mut a2 = A2::read(tracee, a2);
143                let mut a3 = A3::read(tracee, a3);
144                let mut a4 = A4::read(tracee, a4);
145                let mut a5 = A5::read(tracee, a5);
146                let mut a6 = A6::read(tracee, a6);
147                match syscall.call_pre(a1.get(), a2.get(), a3.get(), a4.get(), a5.get(), a6.get()) {
148                    ReturnVariant::PackedArgs((r1, r2, r3, r4, r5, r6)) => {
149                        let pa = (
150                            a1.write(tracee, remote_mem.clone(), r1),
151                            a2.write(tracee, remote_mem.clone(), r2),
152                            a3.write(tracee, remote_mem.clone(), r3),
153                            a4.write(tracee, remote_mem.clone(), r4),
154                            a5.write(tracee, remote_mem.clone(), r5),
155                            a6.write(tracee, remote_mem.clone(), r6),
156                        );
157                        contexts.borrow_mut().insert(
158                            syscall.name.to_string(),
159                            PackedContext(
160                                Box::new(a1),
161                                Box::new(a2),
162                                Box::new(a3),
163                                Box::new(a4),
164                                Box::new(a5),
165                                Box::new(a6),
166                            ),
167                        );
168                        ReturnVariantWrapper::PackedArgs(pa)
169                    }
170                    ReturnVariant::Normal(r) => ReturnVariantWrapper::Normal(r.to_u64()),
171                }
172            }),
173            post: Box::new(|u| syscall.call_post(R::from_u64(u)).to_u64()),
174        });
175        self
176    }
177
178    /// run the child process and begin intercepting
179    pub fn run(&mut self) -> Result<()> {
180        while let Some(mut tracee) = self.ptracer.wait()? {
181            self.on_stop(&mut tracee)?;
182            self.ptracer.restart(tracee, Restart::Syscall)?;
183        }
184
185        Ok(())
186    }
187
188    fn on_stop(&mut self, tracee: &mut Tracee) -> Result<()> {
189        let mut regs = tracee.registers()?;
190        let pc = regs.rip;
191        let Tracee { pid, stop, .. } = tracee;
192
193        match stop {
194            Stop::SyscallEnter => {
195                let syscall = SYSCALL_TABLE
196                    .get(&regs.orig_rax)
197                    .cloned()
198                    .unwrap_or_else(|| format!("unknown (syscall no = 0x{:x})", regs.orig_rax));
199                debug!(
200                    "pid = {}, pc = {:x}: [{}] {:?}\nregs: {:x?}",
201                    pid, pc, syscall, stop, regs
202                );
203
204                if let Some(sc) = self.syscalls.iter_mut().find(|sc| sc.name == syscall) {
205                    match (sc.pre)(
206                        tracee, regs.rdi, regs.rsi, regs.rdx, regs.r10, regs.r8, regs.r9,
207                    ) {
208                        ReturnVariantWrapper::PackedArgs((r1, r2, r3, r4, r5, r6)) => {
209                            macro_rules! set_reg {
210                                ($r:path ,$n: tt) => {
211                                    paste! {
212                                        if let Some([<r $n>]) = [<r $n>] {
213                                            regs.$r = [<r $n>];
214                                        }
215                                    }
216                                };
217                            }
218
219                            set_reg!(rdi, 1);
220                            set_reg!(rsi, 2);
221                            set_reg!(rdx, 3);
222                            set_reg!(r10, 4);
223                            set_reg!(r8, 5);
224                            set_reg!(r9, 6);
225                            tracee.set_registers(regs)?;
226                            self.contexts.borrow_mut().remove(&syscall);
227                        }
228                        ReturnVariantWrapper::Normal(r) => {
229                            // syscall will be blocked, call a non-exists & random sysno,
230                            let sysno = 512 + rand::thread_rng().gen::<u16>() as u64;
231                            self.block_calls.insert(sysno, r);
232                            debug!(
233                                "block call change sysno {} -> {}. ret: {}",
234                                regs.orig_rax, sysno, r
235                            );
236                            regs.orig_rax = sysno;
237                            tracee.set_registers(regs)?;
238                        }
239                    }
240                }
241            }
242            Stop::SyscallExit => {
243                if let Some(block_call_ret) = self.block_calls.remove(&regs.orig_rax) {
244                    debug!(
245                        "block call sysno: {}, ret: {}",
246                        regs.orig_rax, block_call_ret
247                    );
248                    regs.rax = block_call_ret;
249                    tracee.set_registers(regs)?;
250                } else {
251                    let syscall = SYSCALL_TABLE
252                        .get(&regs.orig_rax)
253                        .cloned()
254                        .unwrap_or_else(|| format!("unknown (syscall no = 0x{:x})", regs.orig_rax));
255                    debug!(
256                        "pid = {}, pc = {:x}: [{}] {:?}\nregs: {:x?}",
257                        pid, pc, syscall, stop, regs
258                    );
259
260                    if let Some(sc) = self.syscalls.iter_mut().find(|sc| sc.name == syscall) {
261                        let ret = (sc.post)(regs.rax);
262                        regs.rax = ret;
263                        tracee.set_registers(regs)?;
264                    }
265                }
266            }
267            _ => {}
268        }
269
270        Ok(())
271    }
272}
273
274/// A fake macro that actually does nothing.
275/// It will be detected in `proc_macro_attribute` and changes intercept logic.
276#[macro_export]
277macro_rules! real {
278    ($($args: tt),*) => {
279        let _ = ($($args),*);
280    };
281}
282
283type SyscallTable = HashMap<u64, String>;
284static SYSCALL_TABLE: Lazy<SyscallTable> = Lazy::new(load_syscall_table);
285const SYSCALLS: &str = include_str!("data/syscalls_x64.tsv");
286
287fn load_syscall_table() -> SyscallTable {
288    let mut syscalls = HashMap::new();
289
290    for line in SYSCALLS.split_terminator('\n') {
291        let (call_no, name) = line
292            .split_once('\t')
293            .map(|(x, y)| (x.trim().parse::<u64>().unwrap(), y.trim().to_owned()))
294            .unwrap();
295        syscalls.insert(call_no, name);
296    }
297
298    syscalls
299}