1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
use crate::executor::Append;
use chrono::{DateTime, Utc};
use concepts::prefixed_ulid::ExecutorId;
use concepts::storage::DbConnection;
use concepts::storage::DbError;
use concepts::storage::JoinSetResponseEvent;
use concepts::FinishedExecutionResult;
use concepts::SupportedFunctionResult;
use concepts::{
    storage::{ExecutionEventInner, ExpiredTimer, JoinSetResponse},
    FinishedExecutionError,
};
use std::{
    sync::{
        atomic::{AtomicBool, Ordering},
        Arc,
    },
    time::Duration,
};
use tokio::task::AbortHandle;
use tracing::Level;
use tracing::Span;
use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument};
use utils::time::ClockFn;
use val_json::type_wrapper::TypeWrapper;
use val_json::wast_val::WastVal;
use val_json::wast_val::WastValWithType;

#[derive(Debug, Clone)]
pub struct TimersWatcherConfig<C: ClockFn> {
    pub tick_sleep: Duration,
    pub clock_fn: C,
}

pub struct TimersWatcherTask<DB: DbConnection> {
    pub(crate) db_connection: DB,
}

#[allow(dead_code)]
#[derive(Debug)]
pub(crate) struct TickProgress {
    pub(crate) expired_locks: usize,
    pub(crate) expired_async_timers: usize,
}

pub struct TaskHandle {
    is_closing: Arc<AtomicBool>,
    abort_handle: AbortHandle,
    span: Span,
}

impl TaskHandle {
    #[instrument(skip_all, parent = &self.span)]
    pub async fn close(&self) {
        trace!("Gracefully closing");
        self.is_closing.store(true, Ordering::Relaxed);
        while !self.abort_handle.is_finished() {
            tokio::time::sleep(Duration::from_millis(1)).await;
        }
        info!("Gracefully closed");
    }
}

impl Drop for TaskHandle {
    #[instrument(skip_all, parent = &self.span)]
    fn drop(&mut self) {
        if self.abort_handle.is_finished() {
            return;
        }
        warn!("Aborting the task");
        self.abort_handle.abort();
    }
}

impl<DB: DbConnection + 'static> TimersWatcherTask<DB> {
    pub fn spawn_new<C: ClockFn + 'static>(
        db_connection: DB,
        config: TimersWatcherConfig<C>,
    ) -> TaskHandle {
        let executor_id = ExecutorId::generate();
        let span = info_span!("expired_timers_watcher",
            executor = %executor_id,
        );
        let is_closing = Arc::new(AtomicBool::default());
        let is_closing_inner = is_closing.clone();
        let tick_sleep = config.tick_sleep;
        let abort_handle = tokio::spawn(
            async move {
                info!("Spawned");
                let task = Self { db_connection };
                let mut old_err = None;
                loop {
                    let executed_at = (config.clock_fn)();
                    let res = task.tick(executed_at).await;
                    Self::log_err_if_new(res, &mut old_err);
                    if is_closing_inner.load(Ordering::Relaxed) {
                        return;
                    }
                    tokio::time::sleep(tick_sleep).await;
                }
            }
            .instrument(span.clone()),
        )
        .abort_handle();
        TaskHandle {
            is_closing,
            abort_handle,
            span,
        }
    }

    fn log_err_if_new(res: Result<TickProgress, DbError>, old_err: &mut Option<DbError>) {
        match (res, &old_err) {
            (Ok(_), _) => {
                *old_err = None;
            }
            (Err(err), Some(old)) if err == *old => {}
            (Err(err), _) => {
                error!("Tick failed: {err:?}");
                *old_err = Some(err);
            }
        }
    }

    #[instrument(level = Level::DEBUG, skip_all)]
    pub(crate) async fn tick(&self, executed_at: DateTime<Utc>) -> Result<TickProgress, DbError> {
        let mut expired_locks = 0;
        let mut expired_async_timers = 0;
        for expired_timer in self.db_connection.get_expired_timers(executed_at).await? {
            match expired_timer {
                ExpiredTimer::Lock {
                    execution_id,
                    version,
                    intermittent_event_count,
                    max_retries,
                    retry_exp_backoff,
                    parent,
                    return_type,
                } => {
                    let append = if intermittent_event_count < max_retries {
                        let duration =
                            retry_exp_backoff * 2_u32.saturating_pow(intermittent_event_count);
                        let expires_at = executed_at + duration;
                        debug!(%execution_id, "Retrying execution with expired lock after {duration:?} at {expires_at}");
                        Append {
                            created_at: executed_at,
                            primary_event: ExecutionEventInner::IntermittentTimeout { expires_at }, // not converting for clarity
                            execution_id,
                            version,
                            parent: None,
                        }
                    } else {
                        info!(%execution_id, "Marking execution with expired lock as permanently timed out");
                        // Try to convert to SupportedFunctionResult::Fallible
                        let finished_exec_result = convert_permanent_timeout(return_type);
                        let parent = parent.map(|(p, j)| (p, j, finished_exec_result.clone()));
                        Append {
                            created_at: executed_at,
                            primary_event: ExecutionEventInner::Finished {
                                result: finished_exec_result,
                            },
                            execution_id,
                            version,
                            parent,
                        }
                    };
                    let res = append.append(&self.db_connection).await;
                    if let Err(err) = res {
                        debug!(%execution_id, "Failed to update expired lock - {err:?}");
                    } else {
                        expired_locks += 1;
                    }
                }
                ExpiredTimer::AsyncDelay {
                    execution_id,
                    join_set_id,
                    delay_id,
                } => {
                    let event = JoinSetResponse::DelayFinished { delay_id };
                    debug!(%execution_id, %join_set_id, %delay_id, "Appending DelayFinishedAsyncResponse");
                    let res = self
                        .db_connection
                        .append_response(
                            executed_at,
                            execution_id,
                            JoinSetResponseEvent { join_set_id, event },
                        )
                        .await;
                    if let Err(err) = res {
                        debug!(%execution_id, %join_set_id, %delay_id, "Failed to update expired async timer - {err:?}");
                    } else {
                        expired_async_timers += 1;
                    }
                }
            }
        }
        Ok(TickProgress {
            expired_locks,
            expired_async_timers,
        })
    }
}

fn convert_permanent_timeout(return_type: Option<TypeWrapper>) -> FinishedExecutionResult {
    match return_type {
        Some(return_type @ TypeWrapper::Result { ok: _, err: None }) => {
            Ok(SupportedFunctionResult::Fallible(WastValWithType {
                r#type: return_type,
                value: WastVal::Result(Err(None)),
            }))
        }
        Some(TypeWrapper::Result {
            ok,
            err: Some(err_type),
        }) if matches!(err_type.as_ref(), TypeWrapper::String) => {
            Ok(SupportedFunctionResult::Fallible(WastValWithType {
                r#type: TypeWrapper::Result {
                    ok,
                    err: Some(err_type),
                },
                value: WastVal::Result(Err(Some(WastVal::String("timeout".to_string()).into()))),
            }))
        }
        _ => Err(FinishedExecutionError::PermanentTimeout),
    }
}