dscale 0.5.2

A fast & deterministic simulation framework for benchmarking and testing distributed systems
Documentation
use std::{cmp::Reverse, collections::VecDeque};

use crossbeam_channel::RecvError;

use crate::{
    events::{Event, PidHandlerEvent},
    global_unique_id,
    jiffy::Jiffies,
    runners::{
        RunStatus, SimulationRunner,
        common::RunnerCore,
        task::{TaskId, TaskIndex, TaskResult},
        workers::Workers,
    },
};

pub(crate) struct ScalableRunner {
    core: RunnerCore,
    workers: Workers,
    window_delta: Jiffies,
    on_execution: TaskIndex,
    done: TaskIndex,
    // Whether a process currently has a task executing in the thread pool.
    busy: Vec<bool>,
    // Per-process queue of tasks within the window but deferred because the process is busy.
    // Keeps sequential order per process within window
    waiting: Vec<VecDeque<(TaskId, PidHandlerEvent)>>,
}

impl ScalableRunner {
    pub(crate) fn new(core: RunnerCore, workers: Workers, safe_window: Jiffies) -> Self {
        let num_procs = workers.num_procs();
        Self {
            core,
            workers,
            window_delta: safe_window,
            on_execution: TaskIndex::new(),
            done: TaskIndex::new(),
            busy: vec![false; num_procs],
            waiting: (0..num_procs).map(|_| VecDeque::new()).collect(),
        }
    }

    fn ensure_started(&mut self) {
        if self.core.mark_started() {
            for pid in 0..self.workers.num_procs() {
                let task_id = (Jiffies(0), global_unique_id());
                self.schedule(task_id, PidHandlerEvent::Start { pid });
            }
        }
    }

    fn schedule(&mut self, task_id: TaskId, event: PidHandlerEvent) {
        self.on_execution.push(Reverse(task_id));
        let pid = event.target_pid();
        if self.busy[pid] {
            self.waiting[pid].push_back((task_id, event));
        } else {
            self.busy[pid] = true;
            self.workers.spawn_event(task_id, event);
        }
    }
}

impl SimulationRunner for ScalableRunner {
    fn run_full_budget(&mut self) -> RunStatus {
        self.ensure_started();
        let time_budget = self.core.time_budget;
        let status = self.coordinate(None, time_budget);
        self.join_workers();
        status
    }

    fn run_steps(&mut self, k: usize) -> RunStatus {
        self.ensure_started();
        let time_budget = self.core.time_budget;
        let status = self.coordinate(Some(k), time_budget);
        self.join_workers();
        status
    }

    fn run_sub_budget(&mut self, sub_budget: Jiffies) -> RunStatus {
        self.ensure_started();
        let deadline = std::cmp::min(self.core.clock.now() + sub_budget, self.core.time_budget);
        let status = self.coordinate(None, deadline);
        self.join_workers();
        status
    }
}

impl ScalableRunner {
    fn coordinate(&mut self, max_steps: Option<usize>, deadline: Jiffies) -> RunStatus {
        let mut steps: usize = 0;
        loop {
            if let Some(k) = max_steps {
                if steps >= k {
                    return RunStatus::Completed { steps };
                }
            }

            match self.workers.next_result() {
                Ok(first) => {
                    self.ingest(first);
                    steps += 1;
                    let global_now = self.core.clock.now();
                    if global_now >= deadline {
                        if global_now >= self.core.time_budget {
                            return RunStatus::BudgetExhausted { steps };
                        }
                        return RunStatus::Completed { steps };
                    }

                    if let Some(k) = max_steps {
                        if steps >= k {
                            return RunStatus::Completed { steps };
                        }
                    }

                    while let Some(result) = self.workers.try_next_result() {
                        self.ingest(result);
                        steps += 1;
                        if let Some(k) = max_steps {
                            if steps >= k {
                                return RunStatus::Completed { steps };
                            }
                        }
                    }

                    self.adjust_task_index();
                    self.try_advance();
                }
                Err(RecvError) => {
                    unreachable!("unexpected worker disconnection")
                }
            }
        }
    }

    fn ingest(&mut self, task_result: TaskResult) {
        let pid = task_result.pid;
        self.core.resolve_events(task_result.events);
        self.done.push(Reverse(task_result.id));

        if let Some((waiting_id, waiting_event)) = self.waiting[pid].pop_front() {
            self.workers.spawn_event(waiting_id, waiting_event);
        } else {
            self.busy[pid] = false;
        }
    }

    fn adjust_task_index(&mut self) {
        while let (Some(d), Some(e)) = (self.done.peek(), self.on_execution.peek()) {
            if d == e {
                self.done.pop();
                self.on_execution.pop();
            } else {
                break;
            }
        }
    }

    fn try_advance(&mut self) {
        if self.try_move_window() {
            self.spawn_within_window();
        }
    }

    fn try_move_window(&mut self) -> bool {
        if let Some(top) = self.on_execution.peek() {
            if self.core.clock.now() == top.0.0 {
                return false;
            }
            // There is still some top task executing in window — move to this task
            self.core.advance_time(top.0.0);

            // No tasks in window — try to find new next task outside window
        } else if let Some(next_event) = self.core.event_queue.peek() {
            self.core.advance_time(next_event.0.invocation_time);
        } else {
            // No more events — quiesced. Coordinate will exit on the next
            // iteration when it blocks on next_result and no workers are busy.
            return false;
        }
        true
    }

    fn spawn_within_window(&mut self) {
        while let Some(next_event) = self.core.event_queue.peek() {
            let t = next_event.0.invocation_time;
            if t - self.core.clock.now() > self.window_delta {
                break;
            }
            let next_event = self.core.event_queue.pop().unwrap().0;

            match next_event.event {
                Event::Fault(event) => {
                    // Do not spawn faults.
                    // This also leads to the fact faults will not be counted as steps.
                    self.core.handle_fault_event(event);
                }
                Event::Handler(event) => {
                    let task_id = (t, global_unique_id());
                    if let Some(event) = self.core.handle_pid_handler_event(t, event) {
                        self.schedule(task_id, event);
                    }
                }
            }
        }
    }

    fn join_workers(&mut self) {
        for queue in &mut self.waiting {
            queue.clear();
        }
        while self.busy.iter().any(|&b| b) {
            match self.workers.next_result() {
                Ok(result) => self.busy[result.pid] = false,
                Err(RecvError) => unreachable!("unexpected worker disconnection"),
            }
        }
    }
}