Skip to main content

solverforge_solver/manager/solver_manager/
runtime.rs

1use std::fmt::{self, Debug};
2use std::sync::atomic::{AtomicBool, Ordering};
3
4use solverforge_core::domain::PlanningSolution;
5
6use super::slot::{
7    JobSlot, SLOT_CANCELLED, SLOT_COMPLETED, SLOT_FAILED, SLOT_PAUSED, SLOT_SOLVING,
8};
9use super::types::{SolverEvent, SolverEventMetadata, SolverLifecycleState, SolverTerminalReason};
10use crate::scope::{ProgressCallback, SolverScope};
11use crate::stats::SolverTelemetry;
12
13/// Runtime context for a retained solve job.
14///
15/// This is passed into `Solvable::solve()` so the runtime path can publish
16/// lifecycle events, settle exact pauses, and observe cancellation.
17pub struct SolverRuntime<S: PlanningSolution> {
18    job_id: usize,
19    pub(super) slot: &'static JobSlot<S>,
20}
21
22impl<S: PlanningSolution> Clone for SolverRuntime<S> {
23    fn clone(&self) -> Self {
24        *self
25    }
26}
27
28impl<S: PlanningSolution> Copy for SolverRuntime<S> {}
29
30impl<S: PlanningSolution> Debug for SolverRuntime<S> {
31    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32        f.debug_struct("SolverRuntime")
33            .field("job_id", &self.job_id)
34            .finish()
35    }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39enum EventKind {
40    Progress,
41    Resumed,
42    Cancelled,
43}
44
45impl<S: PlanningSolution> SolverRuntime<S> {
46    pub(super) fn new(job_id: usize, slot: &'static JobSlot<S>) -> Self {
47        Self { job_id, slot }
48    }
49
50    pub fn job_id(&self) -> usize {
51        self.job_id
52    }
53
54    pub fn is_cancel_requested(&self) -> bool {
55        self.slot.terminate.load(Ordering::Acquire)
56    }
57
58    pub(crate) fn is_pause_requested(&self) -> bool {
59        self.slot.pause_requested.load(Ordering::Acquire)
60    }
61
62    pub(crate) fn cancel_flag(&self) -> &'static AtomicBool {
63        &self.slot.terminate
64    }
65
66    pub fn emit_progress(
67        &self,
68        current_score: Option<S::Score>,
69        best_score: Option<S::Score>,
70        telemetry: SolverTelemetry,
71    ) {
72        let lifecycle_state = self.current_state();
73        self.emit_non_snapshot_event(
74            lifecycle_state,
75            current_score,
76            best_score,
77            telemetry,
78            EventKind::Progress,
79        );
80    }
81
82    pub fn emit_best_solution(
83        &self,
84        solution: S,
85        current_score: Option<S::Score>,
86        best_score: S::Score,
87        telemetry: SolverTelemetry,
88    ) {
89        let state = self.current_state();
90        self.slot.with_publication(|sender, record| {
91            let terminal_reason = record.terminal_reason;
92            record.current_score = current_score;
93            record.best_score = Some(best_score);
94            record.telemetry = telemetry;
95
96            let snapshot_revision = record.push_snapshot(super::types::SolverSnapshot {
97                job_id: self.job_id,
98                snapshot_revision: 0,
99                lifecycle_state: state,
100                terminal_reason,
101                current_score,
102                best_score: Some(best_score),
103                telemetry,
104                solution: solution.clone(),
105            });
106
107            let metadata = record.next_metadata(self.job_id, state, Some(snapshot_revision));
108            if let Some(sender) = sender {
109                let _ = sender.send(SolverEvent::BestSolution { metadata, solution });
110            }
111        });
112    }
113
114    pub fn emit_completed(
115        &self,
116        solution: S,
117        current_score: Option<S::Score>,
118        best_score: S::Score,
119        telemetry: SolverTelemetry,
120        terminal_reason: SolverTerminalReason,
121    ) {
122        self.slot.with_publication(|sender, record| {
123            self.slot.state.store(SLOT_COMPLETED, Ordering::SeqCst);
124            record.terminal_reason = Some(terminal_reason);
125            record.checkpoint_available = false;
126            record.current_score = current_score;
127            record.best_score = Some(best_score);
128            record.telemetry = telemetry;
129
130            let snapshot_revision = record.push_snapshot(super::types::SolverSnapshot {
131                job_id: self.job_id,
132                snapshot_revision: 0,
133                lifecycle_state: SolverLifecycleState::Completed,
134                terminal_reason: Some(terminal_reason),
135                current_score,
136                best_score: Some(best_score),
137                telemetry,
138                solution: solution.clone(),
139            });
140
141            let metadata = record.next_metadata(
142                self.job_id,
143                SolverLifecycleState::Completed,
144                Some(snapshot_revision),
145            );
146            if let Some(sender) = sender {
147                let _ = sender.send(SolverEvent::Completed { metadata, solution });
148            }
149        });
150    }
151
152    pub fn emit_cancelled(
153        &self,
154        current_score: Option<S::Score>,
155        best_score: Option<S::Score>,
156        telemetry: SolverTelemetry,
157    ) {
158        self.emit_non_snapshot_terminal_event(
159            SolverLifecycleState::Cancelled,
160            SolverTerminalReason::Cancelled,
161            current_score,
162            best_score,
163            telemetry,
164            EventKind::Cancelled,
165        );
166    }
167
168    pub fn emit_failed(&self, error: String) {
169        if matches!(
170            self.current_state(),
171            SolverLifecycleState::Completed
172                | SolverLifecycleState::Cancelled
173                | SolverLifecycleState::Failed
174        ) {
175            return;
176        }
177
178        self.slot.with_publication(|sender, record| {
179            self.slot.state.store(SLOT_FAILED, Ordering::SeqCst);
180            record.terminal_reason = Some(SolverTerminalReason::Failed);
181            record.checkpoint_available = false;
182            record.failure_message = Some(error.clone());
183            let telemetry = record.telemetry;
184            let metadata = record.next_metadata(self.job_id, SolverLifecycleState::Failed, None);
185            if let Some(sender) = sender {
186                let _ = sender.send(SolverEvent::Failed {
187                    metadata: SolverEventMetadata {
188                        telemetry,
189                        ..metadata
190                    },
191                    error,
192                });
193            }
194        });
195    }
196
197    pub(crate) fn pause_if_requested<D, ProgressCb>(
198        &self,
199        solver_scope: &mut SolverScope<'_, S, D, ProgressCb>,
200    ) where
201        D: solverforge_scoring::Director<S>,
202        ProgressCb: ProgressCallback<S>,
203    {
204        if !self.slot.pause_requested.load(Ordering::Acquire) || self.is_cancel_requested() {
205            return;
206        }
207
208        solver_scope.pause_timers();
209
210        let solution = solver_scope.score_director().clone_working_solution();
211        let current_score = solver_scope.current_score().copied();
212        let best_score = solver_scope.best_score().copied();
213        let telemetry = solver_scope.stats().snapshot();
214        let _ = self.pause_with_snapshot(solution, current_score, best_score, telemetry);
215        solver_scope.resume_timers();
216    }
217
218    pub fn pause_with_snapshot(
219        &self,
220        solution: S,
221        current_score: Option<S::Score>,
222        best_score: Option<S::Score>,
223        telemetry: SolverTelemetry,
224    ) -> bool {
225        if !self.slot.pause_requested.load(Ordering::Acquire) || self.is_cancel_requested() {
226            return false;
227        }
228
229        self.slot.with_publication(|sender, record| {
230            self.slot.state.store(SLOT_PAUSED, Ordering::SeqCst);
231            let terminal_reason = record.terminal_reason;
232            record.checkpoint_available = true;
233            record.current_score = current_score;
234            record.best_score = best_score;
235            record.telemetry = telemetry;
236
237            let snapshot_revision = record.push_snapshot(super::types::SolverSnapshot {
238                job_id: self.job_id,
239                snapshot_revision: 0,
240                lifecycle_state: SolverLifecycleState::Paused,
241                terminal_reason,
242                current_score,
243                best_score,
244                telemetry,
245                solution,
246            });
247
248            let metadata = record.next_metadata(
249                self.job_id,
250                SolverLifecycleState::Paused,
251                Some(snapshot_revision),
252            );
253            if let Some(sender) = sender {
254                let _ = sender.send(SolverEvent::Paused { metadata });
255            }
256        });
257
258        let mut guard = self.slot.pause_gate.lock().unwrap();
259        while self.slot.pause_requested.load(Ordering::Acquire) && !self.is_cancel_requested() {
260            guard = self.slot.pause_condvar.wait(guard).unwrap();
261        }
262        drop(guard);
263
264        if self.is_cancel_requested() {
265            return false;
266        }
267
268        self.slot.state.store(SLOT_SOLVING, Ordering::SeqCst);
269        self.emit_non_snapshot_event(
270            SolverLifecycleState::Solving,
271            current_score,
272            best_score,
273            telemetry,
274            EventKind::Resumed,
275        );
276        true
277    }
278
279    pub(crate) fn is_terminal(&self) -> bool {
280        self.current_state().is_terminal()
281    }
282
283    fn current_state(&self) -> SolverLifecycleState {
284        self.slot
285            .raw_state()
286            .expect("runtime accessed a freed job slot")
287    }
288
289    fn emit_non_snapshot_event(
290        &self,
291        lifecycle_state: SolverLifecycleState,
292        current_score: Option<S::Score>,
293        best_score: Option<S::Score>,
294        telemetry: SolverTelemetry,
295        kind: EventKind,
296    ) {
297        self.slot.with_publication(|sender, record| {
298            record.current_score = current_score;
299            record.best_score = best_score;
300            record.telemetry = telemetry;
301            if lifecycle_state != SolverLifecycleState::Paused {
302                record.checkpoint_available = false;
303            }
304            let metadata = record.next_metadata(self.job_id, lifecycle_state, None);
305            let event = match kind {
306                EventKind::Progress => SolverEvent::Progress { metadata },
307                EventKind::Resumed => SolverEvent::Resumed { metadata },
308                EventKind::Cancelled => unreachable!(),
309            };
310            if let Some(sender) = sender {
311                let _ = sender.send(event);
312            }
313        });
314    }
315
316    fn emit_non_snapshot_terminal_event(
317        &self,
318        lifecycle_state: SolverLifecycleState,
319        terminal_reason: SolverTerminalReason,
320        current_score: Option<S::Score>,
321        best_score: Option<S::Score>,
322        telemetry: SolverTelemetry,
323        kind: EventKind,
324    ) {
325        self.slot.with_publication(|sender, record| {
326            match lifecycle_state {
327                SolverLifecycleState::Cancelled => {
328                    self.slot.state.store(SLOT_CANCELLED, Ordering::SeqCst);
329                }
330                SolverLifecycleState::Failed => {
331                    self.slot.state.store(SLOT_FAILED, Ordering::SeqCst);
332                }
333                _ => {}
334            }
335            record.terminal_reason = Some(terminal_reason);
336            record.checkpoint_available = false;
337            record.current_score = current_score;
338            record.best_score = best_score;
339            record.telemetry = telemetry;
340            let metadata = record.next_metadata(self.job_id, lifecycle_state, None);
341            let event = match kind {
342                EventKind::Cancelled => SolverEvent::Cancelled { metadata },
343                EventKind::Progress | EventKind::Resumed => unreachable!(),
344            };
345            if let Some(sender) = sender {
346                let _ = sender.send(event);
347            }
348        });
349    }
350}
351
352pub(super) fn panic_payload_to_string(payload: Box<dyn std::any::Any + Send>) -> String {
353    if let Some(message) = payload.downcast_ref::<&'static str>() {
354        (*message).to_string()
355    } else if let Some(message) = payload.downcast_ref::<String>() {
356        message.clone()
357    } else {
358        "solver panicked with a non-string payload".to_string()
359    }
360}