dwctl 8.48.0

The Doubleword Control Layer - A self-hostable observability and analytics platform for LLM applications
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
//! [`fusillade::RequestProcessor`] dispatcher: routes `/v1/responses`
//! claims into the multi-step orchestration loop, defers everything
//! else to the existing [`fusillade::DefaultRequestProcessor`].
//!
//! This is the single point where dwctl decides "is this a multi-step
//! request?" — and from here the loop handles the rest: transition
//! decisions, parallel fan-out, sub-agent recursion, retries, and
//! assembly.
//!
//! ## Lifecycle for a multi-step request
//!
//! 1. Daemon claims a fusillade row whose endpoint is `/v1/responses`.
//! 2. This processor's `process()` is invoked.
//! 3. We construct a per-request [`onwards::run_response_loop`]
//!    invocation against:
//!    - storage = the shared `FusilladeResponseStore` (transition +
//!      assembly + step CRUD);
//!    - tools = the shared `HttpToolExecutor` + a `RequestContext`
//!      carrying the resolved `ResolvedTools` for this request;
//!    - upstream = the model URL/auth resolved from the row's
//!      `endpoint`/`api_key`;
//!    - http_client = the shared onwards `HyperClient`.
//! 4. The loop runs to terminal state. The final assembled response is
//!    persisted as the parent fusillade row's `response_body` (via
//!    `complete_request`), at which point the row reaches `Completed`
//!    and downstream consumers (the `GET /v1/responses/{id}` handler,
//!    streaming subscribers, etc.) see it.
//!
//! ## Single-step requests are unchanged
//!
//! Anything whose endpoint is not `/v1/responses` flows straight
//! through `DefaultRequestProcessor::process(...)`. The batch path,
//! `/v1/chat/completions`, and `/v1/embeddings` get the exact same
//! pipeline as before.

use std::sync::Arc;

use async_trait::async_trait;
use fusillade::request::{Canceled, Claimed, Completed, Failed, Request, RequestCompletionResult};
use fusillade::{
    CancellationFuture, DefaultRequestProcessor, FailureReason, PoolProvider as FusilladePool, RequestProcessor, ShouldRetry, Storage,
};
use onwards::client::HttpClient;
use onwards::traits::{RequestContext, ToolExecutor};
use onwards::{LoopConfig, LoopError, MultiStepStore, UpstreamTarget};

use crate::responses::store::{FusilladeResponseStore, PendingResponseInput};
use crate::tool_executor::ResolvedTools;

/// Dispatches per-claim work to the multi-step loop for `/v1/responses`
/// requests, falling through to [`DefaultRequestProcessor`] for
/// everything else.
///
/// Generic over the tool executor type so test fixtures can wrap the
/// production [`HttpToolExecutor`] with context-injecting shims (the
/// daemon path doesn't have request-scoped middleware to populate
/// `RequestContext.extensions::<ResolvedTools>`, so the test wraps with
/// an injector). Production wiring uses `HttpToolExecutor` directly +
/// the [`tool_resolver`](Self::tool_resolver) field below.
pub struct DwctlRequestProcessor<P, T>
where
    P: FusilladePool + Clone + Send + Sync + 'static,
    T: ToolExecutor + 'static,
{
    pub response_store: Arc<FusilladeResponseStore<P>>,
    pub tool_executor: Arc<T>,
    pub http_client: Arc<dyn HttpClient + Send + Sync>,
    pub loop_config: LoopConfig,
    /// Tool resolver for the daemon path. Tools today are scoped per
    /// (api_key, model alias) — same as the realtime middleware path —
    /// so the daemon resolves the same tool set the original request
    /// would have seen. `None` means no DB-backed resolution; the
    /// processor will run the loop with whatever tools the
    /// `tool_executor` discovers from an empty context (used by the
    /// daemon-test fixture that injects ResolvedTools through its own
    /// shim).
    pub tool_resolver: Option<Arc<dyn DaemonToolResolver>>,
    /// Default processor used for non-`/v1/responses` endpoints. Owns
    /// no state — declared as a field so the trait dispatch below has
    /// a stable receiver.
    pub default: DefaultRequestProcessor,
}

/// Resolve the tool set for a daemon-claimed request. Called once per
/// `/v1/responses` claim before the loop runs. The default production
/// implementation (see [`DbToolResolver`]) runs the same DB join the
/// realtime middleware does, scoped to the row's API key and model
/// alias.
#[async_trait]
pub trait DaemonToolResolver: Send + Sync {
    async fn resolve(&self, api_key: &str, model_alias: &str) -> Result<Option<crate::tool_executor::ResolvedToolSet>, anyhow::Error>;
}

/// Production [`DaemonToolResolver`] backed by the same query the
/// realtime tool injection middleware uses.
pub struct DbToolResolver {
    pub pool: sqlx::PgPool,
}

#[async_trait]
impl DaemonToolResolver for DbToolResolver {
    async fn resolve(&self, api_key: &str, model_alias: &str) -> Result<Option<crate::tool_executor::ResolvedToolSet>, anyhow::Error> {
        crate::tool_injection::resolve_tools_for_request(&self.pool, api_key, Some(model_alias)).await
    }
}

impl<P, T> DwctlRequestProcessor<P, T>
where
    P: FusilladePool + Clone + Send + Sync + 'static,
    T: ToolExecutor + 'static,
{
    pub fn new(
        response_store: Arc<FusilladeResponseStore<P>>,
        tool_executor: Arc<T>,
        http_client: Arc<dyn HttpClient + Send + Sync>,
        loop_config: LoopConfig,
    ) -> Self {
        Self {
            response_store,
            tool_executor,
            http_client,
            loop_config,
            tool_resolver: None,
            default: DefaultRequestProcessor,
        }
    }

    /// Wire in the production tool resolver. Without this, the daemon
    /// path runs the loop with no resolved tools — fine for tests, but
    /// in production this should always be set so multi-step requests
    /// see the same tools their original API key + model alias would.
    pub fn with_tool_resolver(mut self, resolver: Arc<dyn DaemonToolResolver>) -> Self {
        self.tool_resolver = Some(resolver);
        self
    }
}

#[async_trait]
impl<S, H, P, T> RequestProcessor<S, H> for DwctlRequestProcessor<P, T>
where
    S: Storage + Sync,
    H: fusillade::HttpClient + 'static,
    P: FusilladePool + Clone + Send + Sync + 'static,
    T: ToolExecutor + 'static,
{
    async fn process(
        &self,
        request: Request<Claimed>,
        http: H,
        storage: &S,
        should_retry: ShouldRetry,
        cancellation: CancellationFuture,
    ) -> fusillade::Result<RequestCompletionResult> {
        // Multi-step path is gated on the request's API path. fusillade's
        // RequestData splits URL into `endpoint` (base URL like
        // https://api.openai.com) and `path` (e.g. /v1/responses), so we
        // match on `path`. Subpaths (e.g. `/v1/responses/...`) and other
        // routes flow through the default processor unchanged.
        if request.data.path != "/v1/responses" {
            return self.default.process(request, http, storage, should_retry, cancellation).await;
        }

        // The cancellation future is owned by the daemon. The
        // multi-step loop does not currently observe it directly —
        // sub-step HTTP calls inherit it through Tokio's
        // task-cancellation tree because we run inside the daemon's
        // spawned task. We hold it alive here so cancellations
        // propagate into the awaits below via the runtime.
        let _cancellation_holder = cancellation;
        let _should_retry_unused = should_retry;
        let _http_unused = http;
        let _storage_unused = storage;

        let request_id = request.data.id.0.to_string();
        let upstream = UpstreamTarget {
            // For the multi-step path, dwctl redirects to upstream's
            // `/v1/chat/completions` even though the user-visible path
            // is `/v1/responses` — the chat-completions shape is what
            // most upstream models speak (transition.rs encodes the
            // mapping). The fusillade row's `endpoint` carries the
            // upstream base URL.
            url: {
                let base = request.data.endpoint.trim_end_matches('/');
                format!("{base}/v1/chat/completions")
            },
            api_key: if request.data.api_key.is_empty() {
                None
            } else {
                Some(request.data.api_key.clone())
            },
        };

        // Build the per-request RequestContext the same way the
        // realtime middleware does. Tools today are scoped per
        // (api_key, model_alias); the resolver runs the same DB join
        // and produces a ResolvedToolSet, which is injected into the
        // context via the same `ResolvedTools` extension that
        // HttpToolExecutor reads at execute() time. This means
        // daemon-driven multi-step requests see exactly the same
        // tool set their original /v1/responses POST would have seen
        // — no daemon-vs-realtime tool-availability divergence.
        let mut tool_ctx = RequestContext::new().with_model(request.data.model.clone());
        let mut resolved_tool_names = std::collections::HashSet::new();
        if let Some(resolver) = &self.tool_resolver {
            match resolver.resolve(&request.data.api_key, &request.data.model).await {
                Ok(Some(resolved)) => {
                    resolved_tool_names = resolved.tools.keys().cloned().collect();
                    tool_ctx = tool_ctx.with_extension(ResolvedTools(Arc::new(resolved)));
                }
                Ok(None) => {
                    tracing::debug!(
                        request_id = %request.data.id,
                        model = %request.data.model,
                        "no tools resolved for daemon-driven /v1/responses request"
                    );
                }
                Err(e) => {
                    tracing::warn!(
                        error = %e,
                        request_id = %request.data.id,
                        "tool resolution failed for daemon path; running loop with no tools"
                    );
                }
            }
        }

        // The transition function (`next_action_for`) re-parses the
        // user's `/v1/responses` body on every iteration to build the
        // chat-completions messages array; without a `pending_input`
        // registered under this request_id it errors at the first
        // iteration. The warm path stashes this synchronously before
        // kicking off the loop; the daemon path has to do the same
        // here, sourcing the body and per-request fields from the
        // claimed fusillade row. Without this, daemon-driven
        // multi-step requests fail with "no pending input registered"
        // immediately — which is why `service_tier:"flex"` couldn't
        // safely fall through to `handle_flex` until now.
        //
        // The fusillade row's `id` is the same UUID the loop uses as
        // its `request_id` (and that `record_step` reuses as the head
        // step's id), so we register under exactly that key.
        let pending = PendingResponseInput {
            body: request.data.body.clone(),
            api_key: if request.data.api_key.is_empty() {
                None
            } else {
                Some(request.data.api_key.clone())
            },
            created_by: if request.data.created_by.is_empty() {
                None
            } else {
                Some(request.data.created_by.clone())
            },
            base_url: request.data.endpoint.clone(),
            resolved_tool_names,
        };
        if let Err(e) = self.response_store.register_pending_with_id(request.data.id.0, pending) {
            // Fail the request immediately with the real cause rather
            // than letting the loop surface a confusing
            // "no pending input registered" error that doesn't
            // mention the lock at all. Lock poisoning is rare in
            // practice (would require a panic while holding the
            // mutex), but when it does happen we want the failure
            // mode to be diagnosable from the audit log alone.
            return Err(fusillade::FusilladeError::Other(anyhow::anyhow!(
                "register pending input for daemon-driven /v1/responses: {e}"
            )));
        }

        // RAII cleanup: even if the loop panics or the daemon task
        // is cancelled mid-await, the side-channel entry must be
        // dropped — otherwise `pending_inputs` grows unbounded
        // across daemon-claimed requests. An explicit
        // `unregister_pending` call after the loop wouldn't run on
        // task cancellation (the future is dropped at the await
        // point); the guard's `Drop` runs in either path.
        let cleanup_store = self.response_store.clone();
        let cleanup_id = request_id.clone();
        let _pending_guard = scopeguard::guard((), move |_| {
            cleanup_store.unregister_pending(&cleanup_id);
        });

        // Daemon path: no event sink — the user's HTTP connection is
        // long gone by the time we claim. Streaming requests use the
        // warm path (responses::streaming module) which runs the loop
        // inline with a sink wired to the SSE response.
        let result = onwards::run_response_loop(
            &*self.response_store,
            &*self.tool_executor,
            &tool_ctx,
            &upstream,
            self.http_client.clone(),
            None,
            &request_id,
            None,
            self.loop_config,
            0,
        )
        .await;

        match result {
            Ok(_final_payload) => {
                // The loop already persisted every step including the
                // assembled response (via the transition function
                // returning Complete with the assembled body). The
                // parent fusillade row needs to land in Completed
                // state with the assembled JSON in response_body so
                // GET /v1/responses/{id} sees it.
                let assembled = self
                    .response_store
                    .assemble_response(&request_id)
                    .await
                    .map_err(|e| fusillade::FusilladeError::Other(anyhow::anyhow!("assemble_response after loop: {e}")))?;
                // Finalize the head step's sub-request row before the
                // parent. Sub-request rows are created in `processing`
                // state by `create_sub_request_row` and rely on the
                // loop's caller to UPDATE them on terminal — see the
                // comment block in `responses/store.rs` next to the
                // `initial_state: "processing"` literal. Without this,
                // the row that `GET /v1/responses/{id}` reads from
                // (head_step.request_id → fusillade.requests row) stays
                // stuck in `processing` forever, and a polling client
                // sees `"status":"in_progress"` indefinitely even after
                // the parent below transitions to Completed. The warm
                // path does the same call via
                // `streaming::persist_terminal_completed`; the daemon
                // path is the only multi-step caller that was missing
                // it.
                self.response_store
                    .finalize_head_request(&request_id, 200, assembled.clone())
                    .await
                    .map_err(|e| fusillade::FusilladeError::Other(anyhow::anyhow!("finalize head sub-request: {e}")))?;
                let body = serde_json::to_string(&assembled)
                    .map_err(|e| fusillade::FusilladeError::Other(anyhow::anyhow!("serialize assembled response: {e}")))?;
                let completed = Request {
                    data: request.data.clone(),
                    state: Completed {
                        response_status: 200,
                        response_body: body,
                        claimed_at: request.state.claimed_at,
                        started_at: chrono::Utc::now(),
                        completed_at: chrono::Utc::now(),
                        routed_model: request.data.model.clone(),
                    },
                };
                storage.persist(&completed).await?;
                Ok(RequestCompletionResult::Completed(completed))
            }
            Err(LoopError::Failed(payload)) => {
                // Mirror the success path: finalize the head sub-request
                // row before transitioning the parent. Without this,
                // GET /v1/responses/{id} would keep returning
                // `"status":"in_progress"` after the loop has already
                // failed.
                if let Err(e) = self.response_store.finalize_head_request(&request_id, 500, payload.clone()).await {
                    tracing::warn!(error = %e, request_id = %request_id, "Failed to finalize head sub-request after loop failure");
                }
                let body = serde_json::to_string(&payload).unwrap_or_default();
                let failed = Request {
                    data: request.data.clone(),
                    state: Failed {
                        reason: FailureReason::NonRetriableHttpStatus { status: 500, body },
                        failed_at: chrono::Utc::now(),
                        retry_attempt: request.state.retry_attempt,
                        batch_expires_at: request.state.batch_expires_at,
                        routed_model: request.data.model.clone(),
                    },
                };
                storage.persist(&failed).await?;
                Ok(RequestCompletionResult::Failed(failed))
            }
            Err(other) => {
                // Storage-level / cap-level / unexpected errors all
                // become non-retriable failures on the parent row. The
                // multi-step loop already persisted whatever step rows
                // got partway through; surfacing the parent's terminal
                // state lets the caller see what happened.
                //
                // Same head-sub-request finalization concern as the
                // success / `LoopError::Failed` arms above — without
                // this, polling clients see stale `in_progress`. We
                // do best-effort here: the head step / sub-request
                // row may not exist at all if the loop crashed before
                // the first record_step, which is exactly the no-op
                // path inside `finalize_head_request`.
                let payload = serde_json::json!({
                    "type": "loop_error",
                    "message": other.to_string(),
                });
                if let Err(e) = self.response_store.finalize_head_request(&request_id, 500, payload).await {
                    tracing::warn!(error = %e, request_id = %request_id, "Failed to finalize head sub-request after unexpected loop error");
                }
                let failed = Request {
                    data: request.data.clone(),
                    state: Failed {
                        reason: FailureReason::NonRetriableHttpStatus {
                            status: 500,
                            body: format!("multi-step loop error: {other}"),
                        },
                        failed_at: chrono::Utc::now(),
                        retry_attempt: request.state.retry_attempt,
                        batch_expires_at: request.state.batch_expires_at,
                        routed_model: request.data.model.clone(),
                    },
                };
                storage.persist(&failed).await?;
                Ok(RequestCompletionResult::Failed(failed))
            }
        }
    }
}

// Compile-time check that an unused import doesn't sneak in via
// rust-analyzer cleanup.
#[allow(dead_code)]
fn _smoke(_c: Canceled) {}