bugstalker 0.4.4

BugStalker is a modern and lightweight debugger for rust applications.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
use crate::debugger::address::RelocatedAddress;
use crate::debugger::breakpoint::{Breakpoint, BrkptType};
use crate::debugger::debugee::tracee::{StopType, TraceeCtl, TraceeStatus};
use crate::debugger::error::Error;
use crate::debugger::error::Error::{MultipleErrors, ProcessExit, Ptrace, Waitpid};
use crate::debugger::register::debug::DebugRegisterNumber;
use crate::debugger::watchpoint::WatchpointRegistry;
use crate::debugger::{code, register};
use crate::weak_error;
use log::{debug, warn};
use nix::errno::Errno;
use nix::libc::pid_t;
use nix::sys::signal::{SIGSTOP, Signal};
use nix::sys::wait::{WaitStatus, waitpid};
use nix::unistd::Pid;
use nix::{libc, sys};
use std::collections::VecDeque;

/// List of signals that dont interrupt a debugging process and send
/// to debugee directly on fire.
static QUIET_SIGNALS: &[Signal] = &[
    Signal::SIGALRM,
    Signal::SIGURG,
    Signal::SIGCHLD,
    Signal::SIGIO,
    Signal::SIGVTALRM,
    Signal::SIGPROF,
    //Signal::SIGWINCH,
];

/// List of signals that may interrupt a debugging process but debugger will not inject it into.
static TRANSPARENT_SIGNALS: &[Signal] = &[Signal::SIGINT];

#[derive(Debug, Clone)]
pub enum WatchpointHitType {
    /// Hit of the underlying hardware breakpoint cause value changed.
    DebugRegister(DebugRegisterNumber),
    /// Hit of the underlying breakpoint at the end of the watchpoint scope.
    EndOfScope(Vec<u32>),
}

#[derive(Debug)]
pub enum StopReason {
    /// Whole debugee process exited with code.
    DebugeeExit(i32),
    /// Debugee just started.
    DebugeeStart,
    /// Debugee stopped at breakpoint.
    Breakpoint(Pid, RelocatedAddress),
    /// Debugee stopped at watchpoint.
    Watchpoint(Pid, RelocatedAddress, WatchpointHitType),
    /// Debugee stopped with OS signal.
    SignalStop(Pid, Signal),
    /// Debugee stopped with Errno::ESRCH.
    NoSuchProcess(Pid),
}

/// Trace context (or tcx).
#[derive(Clone, Copy)]
pub struct TraceContext<'a> {
    pub breakpoints: &'a [&'a Breakpoint],
    pub watchpoints: &'a WatchpointRegistry,
}

impl<'a> TraceContext<'a> {
    pub fn new(
        breakpoints: &'a [&'a Breakpoint],
        watchpoint_registry: &'a WatchpointRegistry,
    ) -> Self {
        Self {
            breakpoints,
            watchpoints: watchpoint_registry,
        }
    }
}

/// Ptrace tracer.
pub struct Tracer {
    pub(super) tracee_ctl: TraceeCtl,

    inject_signal_queue: VecDeque<(Pid, Signal)>,
    group_stop_guard: bool,
}

impl Tracer {
    /// Create new [`Tracer`] for internally created debugee process.
    ///
    /// # Arguments
    ///
    /// * `proc_pid`: process id
    pub fn new(proc_pid: Pid) -> Self {
        Self {
            tracee_ctl: TraceeCtl::new(proc_pid),
            inject_signal_queue: VecDeque::new(),
            group_stop_guard: false,
        }
    }

    /// Create [`Tracer`] for external process attached by pid.
    ///
    /// # Arguments
    ///
    /// * `proc_pid`: process id
    /// * `threads`: id's of process threads
    pub fn new_external(proc_pid: Pid, threads: &[Pid]) -> Self {
        Self {
            tracee_ctl: TraceeCtl::new_external(proc_pid, threads),
            inject_signal_queue: VecDeque::new(),
            group_stop_guard: false,
        }
    }

    /// Continue debugee execution until stop happened.
    pub fn resume(&mut self, tcx: TraceContext) -> Result<StopReason, Error> {
        loop {
            if let Some(req) = self.inject_signal_queue.pop_front() {
                self.tracee_ctl.cont_stopped_ex(
                    Some(req),
                    self.inject_signal_queue
                        .iter()
                        .map(|(pid, _)| *pid)
                        .collect(),
                )?;

                if let Some((pid, sign)) = self.inject_signal_queue.front().copied() {
                    // if there are more signals - stop debugee again
                    self.group_stop_interrupt(tcx, Pid::from_raw(-1))?;
                    return Ok(StopReason::SignalStop(pid, sign));
                }
            } else {
                self.tracee_ctl.cont_stopped().map_err(MultipleErrors)?;
            }

            debug!(target: "tracer", "resume debugee execution, wait for updates");
            let status = match waitpid(Pid::from_raw(-1), None) {
                Ok(status) => status,
                Err(Errno::ECHILD) => {
                    return Ok(StopReason::NoSuchProcess(self.tracee_ctl.proc_pid()));
                }
                Err(e) => return Err(Waitpid(e)),
            };

            debug!(target: "tracer", "received new thread status: {status:?}");
            if let Some(stop) = self.apply_new_status(tcx, status)? {
                // if stop fired by quiet signal - go to next iteration, this will inject signal at
                // a tracee process and resume it
                if let StopReason::SignalStop(_, signal) = stop
                    && QUIET_SIGNALS.contains(&signal)
                {
                    continue;
                }

                debug!(target: "tracer", "debugee stopped, reason: {stop:?}");
                return Ok(stop);
            }
        }
    }

    /// Interrupt (pause) execution of the whole debugee process.
    ///
    /// This is a best-effort group-stop implemented via `PTRACE_INTERRUPT` for all running tracees.
    /// The function does not return a `StopReason` because the stop is artificial from the debugger side.
    pub fn pause(&mut self, tcx: TraceContext) -> Result<(), Error> {
        // `Pid::from_raw(-1)` means: there is no already-stopped initiator thread.
        self.group_stop_interrupt(tcx, Pid::from_raw(-1))?;
        Ok(())
    }

    fn group_stop_in_progress(&self) -> bool {
        self.group_stop_guard
    }

    fn lock_group_stop(&mut self) {
        self.group_stop_guard = true
    }

    fn unlock_group_stop(&mut self) {
        self.group_stop_guard = false
    }

    /// For stop whole debugee process this function stops tracees (threads) one by one
    /// using PTRACE_INTERRUPT request.
    ///
    /// Stops only already running tracees.
    ///
    /// If tracee receives signals before interrupt - then tracee in signal-stop and no need to interrupt it.
    ///
    /// # Arguments
    ///
    /// * `initiator_pid`: tracee with this thread id already stopped, there is no need to interrupt it.
    fn group_stop_interrupt(&mut self, tcx: TraceContext, initiator_pid: Pid) -> Result<(), Error> {
        if self.group_stop_in_progress() {
            return Ok(());
        }
        self.lock_group_stop();

        debug!(
            target: "tracer",
            "initiate group stop, initiator: {initiator_pid}, debugee state: {:?}",
            self.tracee_ctl.snapshot()
        );

        let non_stopped_exist = self
            .tracee_ctl
            .tracee_iter()
            .any(|t| t.pid != initiator_pid);
        if !non_stopped_exist {
            // no need to group-stop
            debug!(
                target: "tracer",
                "group stop complete, debugee state: {:?}",
                self.tracee_ctl.snapshot()
            );
            self.unlock_group_stop();
            return Ok(());
        }

        // two rounds, cause may be new tracees at first round, they stopped at round 2
        for _ in 0..2 {
            let tracees = self.tracee_ctl.snapshot();

            for tid in tracees.into_iter().map(|t| t.pid) {
                // load current tracee snapshot
                let mut tracee = match self.tracee_ctl.tracee(tid) {
                    None => continue,
                    Some(tracee) => {
                        if tracee.is_stopped() {
                            continue;
                        } else {
                            tracee.clone()
                        }
                    }
                };

                if let Err(e) = sys::ptrace::interrupt(tracee.pid) {
                    // if no such process - continue, it will be removed later, on PTRACE_EVENT_EXIT event.
                    if Errno::ESRCH == e {
                        warn!("thread {} not found, ESRCH", tracee.pid);
                        if let Some(t) = self.tracee_ctl.tracee_mut(tracee.pid) {
                            t.set_stop(StopType::Interrupt);
                        }
                        continue;
                    }
                    return Err(Ptrace(e));
                }

                let mut wait = tracee.wait_one()?;

                while !matches!(wait, WaitStatus::PtraceEvent(_, _, libc::PTRACE_EVENT_STOP)) {
                    let stop = self.apply_new_status(tcx, wait)?;
                    match stop {
                        None => {}
                        Some(StopReason::Breakpoint(pid, _))
                        | Some(StopReason::Watchpoint(pid, _, _)) => {
                            // tracee already stopped cause breakpoint or watchpoint are reached
                            if pid == tracee.pid {
                                break;
                            }
                        }
                        Some(StopReason::DebugeeExit(code)) => return Err(ProcessExit(code)),
                        Some(StopReason::DebugeeStart) => {
                            unreachable!("stop at debugee entry point twice")
                        }
                        Some(StopReason::SignalStop(_, _)) => {
                            // tracee in signal-stop
                            break;
                        }
                        Some(StopReason::NoSuchProcess(_)) => {
                            // expect that tracee will be removed later
                            break;
                        }
                    }

                    // reload tracee, it states must be changed after handle signal
                    tracee = match self.tracee_ctl.tracee(tracee.pid).cloned() {
                        None => break,
                        Some(t) => t,
                    };
                    if tracee.is_stopped()
                        && matches!(tracee.status, TraceeStatus::Stopped(StopType::Interrupt))
                    {
                        break;
                    }

                    wait = tracee.wait_one()?;
                }

                if let Some(t) = self.tracee_ctl.tracee_mut(tracee.pid)
                    && !t.is_stopped()
                {
                    t.set_stop(StopType::Interrupt);
                }
            }
        }

        self.unlock_group_stop();

        debug!(
            target: "tracer",
            "group stop complete, debugee state: {:?}",
            self.tracee_ctl.snapshot()
        );

        Ok(())
    }

    /// Handle tracee event fired by `wait` syscall.
    /// After this function ends tracee_ctl must be in consistent state.
    /// If debugee process stop detected - returns a stop reason.
    ///
    /// # Arguments
    ///
    /// * `status`: new status returned by `waitpid`.
    fn apply_new_status(
        &mut self,
        tcx: TraceContext,
        status: WaitStatus,
    ) -> Result<Option<StopReason>, Error> {
        match status {
            WaitStatus::Exited(pid, code) => {
                // Thread exited with tread id
                self.tracee_ctl.remove(pid);
                if pid == self.tracee_ctl.proc_pid() {
                    return Ok(Some(StopReason::DebugeeExit(code)));
                }
                Ok(None)
            }
            WaitStatus::PtraceEvent(pid, _signal, code) => {
                match code {
                    libc::PTRACE_EVENT_EXEC => {
                        // fire just before debugee start
                        // cause currently `fork()`
                        // in debugee is unsupported we expect this code to call once
                        self.tracee_ctl.add(pid);
                        return Ok(Some(StopReason::DebugeeStart));
                    }
                    libc::PTRACE_EVENT_CLONE => {
                        // fire just before new thread created
                        self.tracee_ctl
                            .tracee_ensure_mut(pid)
                            .set_stop(StopType::Interrupt);
                        let new_thread_id =
                            Pid::from_raw(sys::ptrace::getevent(pid).map_err(Ptrace)? as pid_t);

                        // PTRACE_EVENT_STOP may be received first, and new tracee may be already registered at this point
                        if self.tracee_ctl.tracee_mut(new_thread_id).is_none() {
                            let new_tracee = self.tracee_ctl.add(new_thread_id);
                            let new_trace_status = new_tracee.wait_one()?;
                            if matches!(new_trace_status, WaitStatus::Exited(_, _)) {
                                // this situation can occur if the process has already completed
                                self.tracee_ctl.remove(new_thread_id);
                            } else {
                                // all watchpoints must be distributed to a new tracee
                                weak_error!(tcx.watchpoints.distribute_to_tracee(new_tracee));

                                debug_assert!(
                                    matches!(
                                        new_trace_status,
                                        WaitStatus::PtraceEvent(tid, _, libc::PTRACE_EVENT_STOP) if tid == new_thread_id
                                    ),
                                    "the newly cloned thread must start with PTRACE_EVENT_STOP (cause PTRACE_SEIZE was used), got {new_trace_status:?}"
                                )
                            }
                        }
                    }
                    libc::PTRACE_EVENT_STOP => {
                        // fire right after new thread started or PTRACE_INTERRUPT called.
                        match self.tracee_ctl.tracee_mut(pid) {
                            Some(tracee) => tracee.set_stop(StopType::Interrupt),
                            None => {
                                let tracee = self.tracee_ctl.add(pid);
                                weak_error!(tcx.watchpoints.distribute_to_tracee(tracee));
                            }
                        }
                    }
                    libc::PTRACE_EVENT_EXIT => {
                        // Stop the tracee at exit
                        let tracee = self.tracee_ctl.remove(pid);
                        if let Some(mut tracee) = tracee {
                            // TODO
                            // There is one interesting situation, when tracee may not exist
                            // at this point (according to ptrace documentation, it must exist).
                            // Tracee not exist when thread created inside `std::thread::scoped`.
                            // This can be verified by running watchpoints functional tests.
                            // It is a flaky behavior, but sometimes an error
                            // will be returned at this point.
                            // Currently error here muted, but this behaviour NFR.
                            _ = tracee.r#continue(None);
                        }
                    }
                    _ => {
                        warn!("unsupported (ignored) ptrace event, code: {code}");
                    }
                }
                Ok(None)
            }
            WaitStatus::Stopped(pid, signal) => {
                let info = match sys::ptrace::getsiginfo(pid) {
                    Ok(info) => info,
                    Err(Errno::ESRCH) => return Ok(Some(StopReason::NoSuchProcess(pid))),
                    Err(e) => return Err(Ptrace(e)),
                };

                match signal {
                    Signal::SIGTRAP => match info.si_code {
                        code::TRAP_TRACE => {
                            todo!()
                        }
                        code::TRAP_BRKPT | code::SI_KERNEL => {
                            let current_pc = {
                                let tracee = self.tracee_ctl.tracee_ensure(pid);
                                tracee.set_pc(tracee.pc()?.as_u64() - 1)?;
                                tracee.pc()?
                            };

                            let mb_hit_brkpt = tcx
                                .breakpoints
                                .iter()
                                .find(|brkpt| brkpt.addr == current_pc);
                            debug_assert!(
                                mb_hit_brkpt.is_some(),
                                "the interrupt caught but the breakpoint was not found"
                            );
                            let Some(&brkpt) = mb_hit_brkpt else {
                                return Ok(None);
                            };

                            let has_tmp_breakpoints = tcx
                                .breakpoints
                                .iter()
                                .any(|b| b.is_temporary() | b.is_temporary_async());
                            if has_tmp_breakpoints {
                                let temporary_hit = brkpt.is_temporary() && pid == brkpt.pid;
                                let temporary_async_hit = brkpt.is_temporary_async();
                                let watchpoint_hit = brkpt.is_wp_companion();
                                if !temporary_hit && !watchpoint_hit && !temporary_async_hit {
                                    let mut unusual_brkpt = brkpt.clone();
                                    unusual_brkpt.pid = pid;
                                    if unusual_brkpt.is_enabled() {
                                        unusual_brkpt.disable()?;
                                        while self.single_step(tcx, pid)?.is_some() {}
                                        unusual_brkpt.enable()?;
                                    }
                                    self.tracee_ctl
                                        .tracee_ensure_mut(pid)
                                        .set_stop(StopType::Interrupt);

                                    return Ok(None);
                                }
                            }

                            self.tracee_ctl
                                .tracee_ensure_mut(pid)
                                .set_stop(StopType::Interrupt);
                            self.group_stop_interrupt(tcx, pid)?;

                            if let BrkptType::WatchpointCompanion(wps) = brkpt.r#type() {
                                return Ok(Some(StopReason::Watchpoint(
                                    pid,
                                    current_pc,
                                    WatchpointHitType::EndOfScope(wps.clone()),
                                )));
                            }

                            Ok(Some(StopReason::Breakpoint(pid, current_pc)))
                        }
                        code::TRAP_HWBKPT => {
                            let current_pc = {
                                let tracee = self.tracee_ctl.tracee_ensure(pid);
                                tracee.pc()?
                            };

                            self.tracee_ctl
                                .tracee_ensure_mut(pid)
                                .set_stop(StopType::Interrupt);
                            self.group_stop_interrupt(tcx, pid)?;

                            let mut state = register::debug::HardwareDebugState::current(pid)?;
                            let reg = state.dr6.detect_and_flush().expect("should exists");
                            state.sync(pid)?;
                            let hit_type = WatchpointHitType::DebugRegister(reg);
                            Ok(Some(StopReason::Watchpoint(pid, current_pc, hit_type)))
                        }
                        code => {
                            debug!(
                                target: "tracer",
                                "unexpected SIGTRAP code {code}",
                            );
                            Ok(None)
                        }
                    },
                    _ => {
                        if !TRANSPARENT_SIGNALS.contains(&signal) {
                            self.inject_signal_queue.push_back((pid, signal));
                        }

                        self.tracee_ctl
                            .tracee_ensure_mut(pid)
                            .set_stop(StopType::SignalStop(signal));

                        if !QUIET_SIGNALS.contains(&signal) {
                            self.group_stop_interrupt(tcx, pid)?;
                        }

                        Ok(Some(StopReason::SignalStop(pid, signal)))
                    }
                }
            }
            WaitStatus::Signaled(_, _, _) => Ok(None),
            _ => {
                warn!("unexpected wait status: {status:?}");
                Ok(None)
            }
        }
    }

    /// Execute next instruction, then stop with `TRAP_TRACE`.
    ///
    /// # Arguments
    ///
    /// * `tcx`: trace context
    /// * `pid`: tracee pid
    ///
    /// returns: [`None`] if an instruction step is done successfully.
    /// A [`StopReason::SignalStop`] returned if step interrupt causes tracee in a signal-stop.
    /// A [`StopReason::Watchpoint`] returned if step interrupt causes hardware breakpoint is hit.
    /// Error returned otherwise.
    pub fn single_step(
        &mut self,
        tcx: TraceContext,
        pid: Pid,
    ) -> Result<Option<StopReason>, Error> {
        let tracee = self.tracee_ctl.tracee_ensure(pid);
        let initial_pc = tracee.pc()?;
        tracee.step(None)?;

        let reason = loop {
            let tracee = self.tracee_ctl.tracee_ensure_mut(pid);
            let status = tracee.wait_one()?;
            let info = sys::ptrace::getsiginfo(pid).map_err(Ptrace)?;

            // check that debugee step into an expected trap
            // (breakpoints ignored and are also considered as a trap)
            let in_trap = matches!(status, WaitStatus::Stopped(_, Signal::SIGTRAP))
                && (info.si_code == code::TRAP_TRACE
                    || info.si_code == code::TRAP_BRKPT
                    || info.si_code == code::SI_KERNEL
                    || info.si_code == code::TRAP_HWBKPT);
            if in_trap {
                let pc = tracee.pc()?;
                // check that we aren't on original pc value
                if pc == initial_pc {
                    tracee.step(None)?;
                    continue;
                }

                let mut state = register::debug::HardwareDebugState::current(pid)?;
                let maybe_dr = state.dr6.detect_and_flush();
                state.sync(pid)?;
                if let Some(dr) = maybe_dr {
                    let hit_type = WatchpointHitType::DebugRegister(dr);
                    break Some(StopReason::Watchpoint(pid, pc, hit_type));
                }

                let mb_brkpt = tcx.breakpoints.iter().find(|brkpt| brkpt.addr == pc);
                if let Some(BrkptType::WatchpointCompanion(wps)) = mb_brkpt.map(|b| b.r#type()) {
                    let hit_type = WatchpointHitType::EndOfScope(wps.clone());
                    break Some(StopReason::Watchpoint(pid, pc, hit_type));
                }

                break None;
            }

            let in_trap =
                matches!(status, WaitStatus::Stopped(_, Signal::SIGTRAP)) && (info.si_code == 5);
            if in_trap {
                // if in syscall step to syscall end
                sys::ptrace::syscall(tracee.pid, None).map_err(Ptrace)?;
                let syscall_status = tracee.wait_one()?;
                debug_assert!(matches!(
                    syscall_status,
                    WaitStatus::Stopped(_, Signal::SIGTRAP)
                ));

                // then do step again
                tracee.step(None)?;

                continue;
            }

            let is_interrupt = matches!(
                status,
                WaitStatus::PtraceEvent(p, SIGSTOP, libc::PTRACE_EVENT_STOP) if pid == p,
            );
            if is_interrupt {
                break None;
            }

            let stop = self.apply_new_status(tcx, status)?;
            match stop {
                None => {}
                Some(StopReason::Breakpoint(_, _)) => {
                    unreachable!("breakpoints must be ignore");
                }
                Some(StopReason::Watchpoint(_, _, _)) => {
                    unreachable!("watchpoints must be ignore");
                }
                Some(StopReason::DebugeeExit(code)) => return Err(ProcessExit(code)),
                Some(StopReason::DebugeeStart) => {
                    unreachable!("stop at debugee entry point twice")
                }
                Some(StopReason::SignalStop(_, signal)) => {
                    if QUIET_SIGNALS.contains(&signal) {
                        self.tracee_ctl.tracee_ensure(pid).step(Some(signal))?;
                        continue;
                    }

                    // tracee in signal-stop
                    break stop;
                }
                Some(StopReason::NoSuchProcess(_)) => {
                    // expect that tracee will be removed later
                    break None;
                }
            }
        };
        Ok(reason)
    }
}