obeli_sk_executor/
expired_timers_watcher.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
use crate::executor::Append;
use chrono::{DateTime, Utc};
use concepts::storage::DbConnection;
use concepts::storage::DbError;
use concepts::storage::DbPool;
use concepts::storage::ExecutionLog;
use concepts::storage::JoinSetResponseEvent;
use concepts::{
    storage::{ExecutionEventInner, ExpiredTimer, JoinSetResponse},
    FinishedExecutionError,
};
use std::{
    sync::{
        atomic::{AtomicBool, Ordering},
        Arc,
    },
    time::Duration,
};
use tokio::task::AbortHandle;
use tracing::Level;
use tracing::{debug, error, info, instrument, trace, warn};
use utils::time::ClockFn;

#[derive(Debug, Clone)]
pub struct TimersWatcherConfig<C: ClockFn> {
    pub tick_sleep: Duration,
    pub clock_fn: C,
    pub leeway: Duration,
}

#[expect(dead_code)]
#[derive(Debug)]
pub(crate) struct TickProgress {
    pub(crate) expired_locks: usize,
    pub(crate) expired_async_timers: usize,
}

pub struct TaskHandle {
    is_closing: Arc<AtomicBool>,
    abort_handle: AbortHandle,
}

impl TaskHandle {
    #[instrument(level = Level::DEBUG, skip_all, name = "expired_timers_watcher.close")]
    pub async fn close(&self) {
        trace!("Gracefully closing");
        self.is_closing.store(true, Ordering::Relaxed);
        while !self.abort_handle.is_finished() {
            tokio::time::sleep(Duration::from_millis(1)).await;
        }
        debug!("Gracefully closed expired_timers_watcher");
    }
}

impl Drop for TaskHandle {
    fn drop(&mut self) {
        if self.abort_handle.is_finished() {
            return;
        }
        warn!("Aborting the expired_timers_watcher");
        self.abort_handle.abort();
    }
}

pub fn spawn_new<C: ClockFn + 'static, DB: DbConnection + 'static, P: DbPool<DB> + 'static>(
    db_pool: P,
    config: TimersWatcherConfig<C>,
) -> TaskHandle {
    let is_closing = Arc::new(AtomicBool::default());
    let tick_sleep = config.tick_sleep;
    let abort_handle = tokio::spawn({
        let is_closing = is_closing.clone();
        async move {
            debug!("Spawned expired_timers_watcher");
            let mut old_err = None;
            while !is_closing.load(Ordering::Relaxed) {
                let executed_at = config.clock_fn.now() - config.leeway;
                let res = tick(db_pool.connection(), executed_at).await;
                log_err_if_new(res, &mut old_err);
                tokio::time::sleep(tick_sleep).await;
            }
        }
    })
    .abort_handle();
    TaskHandle {
        is_closing,
        abort_handle,
    }
}

fn log_err_if_new(res: Result<TickProgress, DbError>, old_err: &mut Option<DbError>) {
    match (res, &old_err) {
        (Ok(_), _) => {
            *old_err = None;
        }
        (Err(err), Some(old)) if err == *old => {}
        (Err(err), _) => {
            error!("Tick failed: {err:?}");
            *old_err = Some(err);
        }
    }
}

#[instrument(level = Level::TRACE, skip_all)]
pub(crate) async fn tick<DB: DbConnection + 'static>(
    db_connection: DB,
    executed_at: DateTime<Utc>,
) -> Result<TickProgress, DbError> {
    let mut expired_locks = 0;
    let mut expired_async_timers = 0;
    for expired_timer in db_connection.get_expired_timers(executed_at).await? {
        match expired_timer {
            ExpiredTimer::Lock {
                execution_id,
                version,
                intermittent_event_count,
                max_retries,
                retry_exp_backoff,
                parent,
            } => {
                let append = if let Some(duration) = ExecutionLog::can_be_retried_after(
                    intermittent_event_count + 1,
                    max_retries,
                    retry_exp_backoff,
                ) {
                    let expires_at = executed_at + duration;
                    debug!(%execution_id, "Retrying execution with expired lock after {duration:?} at {expires_at}");
                    Append {
                        created_at: executed_at,
                        primary_event: ExecutionEventInner::IntermittentTimeout { expires_at }, // not converting for clarity
                        execution_id: execution_id.clone(),
                        version,
                        parent: None,
                    }
                } else {
                    info!(%execution_id, "Marking execution with expired lock as permanently timed out");
                    // Try to convert to SupportedFunctionResult::Fallible
                    let finished_exec_result = Err(FinishedExecutionError::PermanentTimeout);
                    let parent = parent.map(|(p, j)| (p, j, finished_exec_result.clone()));
                    Append {
                        created_at: executed_at,
                        primary_event: ExecutionEventInner::Finished {
                            result: finished_exec_result,
                        },
                        execution_id: execution_id.clone(),
                        version,
                        parent,
                    }
                };
                let res = append.append(&db_connection).await;
                if let Err(err) = res {
                    debug!(%execution_id, "Failed to update expired lock - {err:?}");
                } else {
                    expired_locks += 1;
                }
            }
            ExpiredTimer::AsyncDelay {
                execution_id,
                join_set_id,
                delay_id,
            } => {
                let event = JoinSetResponse::DelayFinished { delay_id };
                debug!(%execution_id, %join_set_id, %delay_id, "Appending DelayFinishedAsyncResponse");
                let res = db_connection
                    .append_response(
                        executed_at,
                        execution_id.clone(),
                        JoinSetResponseEvent { join_set_id, event },
                    )
                    .await;
                if let Err(err) = res {
                    debug!(%execution_id, %join_set_id, %delay_id, "Failed to update expired async timer - {err:?}");
                } else {
                    expired_async_timers += 1;
                }
            }
        }
    }
    Ok(TickProgress {
        expired_locks,
        expired_async_timers,
    })
}