dirge-agent 0.13.9

Minimalistic coding agent written in Rust, optimized for memory footprint and performance
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
//! Runner-spawning and stream-fn construction for [`AnyAgent`]. Split out of
//! `provider/mod.rs` (dirge-4y4l stage 8): the methods that turn a built
//! `AnyAgent` into a live `AgentRunner` (main session, background review,
//! curator forks) plus the `build_stream_fn*` helpers those paths rely on.
//!
//! Child module of `provider`, so it reaches `AnyAgent`'s private fields and
//! the `AnyAgentInner` variants directly (privacy = defining module +
//! descendants) — no `pub(crate)` field bumps or accessors needed.

#[allow(unused_imports)]
use crate::sync_util::LockExt;
use rig::completion::Message;

use super::{AnyAgent, AnyAgentInner};
use crate::agent::runner::AgentRunner;
use crate::agent::tools::ToolCache;

/// Filter a loop-tool registry down to a caller-supplied allow-list, preserving
/// registration order. Single tested place for the hard tool restriction every
/// forked-agent path relies on — the background review/curator forks and the
/// phased-plan phase agents (explore/plan/reviewer). A tool not in `allowed` is
/// literally absent from the fork, so a prompt-level guard slip can't reach it.
pub(crate) fn filter_loop_tools(
    tools: &[std::sync::Arc<dyn crate::agent::agent_loop::LoopTool>],
    allowed: &[&str],
) -> Vec<std::sync::Arc<dyn crate::agent::agent_loop::LoopTool>> {
    tools
        .iter()
        .filter(|t| allowed.contains(&t.name()))
        .cloned()
        .collect()
}

/// dirge-ygm3: replace the `memory`-named tool in a filtered set with the
/// review-enabled instance (`mark`/`supersede`). Used only for the background
/// review fork, so those actions are reachable there and nowhere else. No-op
/// when the set has no `memory` tool.
pub(crate) fn swap_in_review_memory(
    tools: &mut [std::sync::Arc<dyn crate::agent::agent_loop::LoopTool>],
    review_tool: &std::sync::Arc<dyn crate::agent::agent_loop::LoopTool>,
) {
    for slot in tools.iter_mut() {
        if slot.name() == "memory" {
            *slot = review_tool.clone();
        }
    }
}

impl AnyAgent {
    /// Map a tool slice to rig `ToolDefinition`s for the per-turn request
    /// builder. Shared by the main (`spawn_runner`) and fork
    /// (`spawn_filtered_runner_with_cache`) builders, which built this
    /// identically.
    fn tool_defs_for(
        tools: &[std::sync::Arc<dyn crate::agent::agent_loop::LoopTool>],
    ) -> Vec<rig::completion::ToolDefinition> {
        tools
            .iter()
            .map(|t| crate::agent::agent_loop::loop_tool_to_rig_definition(t.as_ref()))
            .collect()
    }

    /// `model_name` as an `Option`, treating empty as "unset" — the
    /// normalization both spawn builders need for `LoopSpawnConfig.model_name`.
    /// Takes the field by ref (not `&self`) so it's callable after `self` is
    /// partially moved (`cfg.tools = self.loop_tools`).
    fn model_name_opt(model_name: &str) -> Option<String> {
        if model_name.is_empty() {
            None
        } else {
            Some(model_name.to_string())
        }
    }

    pub fn spawn_runner(
        self,
        prompt: String,
        history: Vec<Message>,
        steering_queue: Option<
            std::sync::Arc<std::sync::Mutex<std::collections::VecDeque<String>>>,
        >,
    ) -> AgentRunner {
        use crate::agent::agent_loop::{
            LoopSpawnConfig, retrying_stream_fn, retrying_stream_fn_with_non_retryable,
            rig_history_system_prompt, rig_history_to_loop_messages, spawn_loop_runner,
        };
        use crate::agent::recovery::RecoveryPolicy;

        self.cache.clear();

        let provider_name = self.provider_name().to_string();

        // Convert tool registry → rig ToolDefinitions for the
        // request builder, and keep the registry itself for the
        // loop's dispatch.
        let tool_defs = Self::tool_defs_for(&self.loop_tools);

        // Phase-3: per-session loaded-tool set was allocated at
        // `build_agent` time (when `dynamic_tool_search` is on)
        // and the SAME Arc was passed both to the
        // `ToolSearchTool` registered in `self.loop_tools` and
        // stored on `self.tool_def_filter`. The factory reads it
        // per-request; the tool inserts into it on execute.
        // `None` keeps the legacy path.
        let tool_def_filter = self.tool_def_filter.clone();

        // Build the StreamFn (4.5h-2 + 4.5h-3 chunk timeout).
        let inner_stream_fn =
            self.build_stream_fn_with_filter(tool_defs.clone(), tool_def_filter.clone());
        // Wrap with retry (4.5g) so transient Network / RateLimit
        // errors auto-retry with exponential backoff + Retry-After.
        let policy = RecoveryPolicy::default();
        let stream_fn = if let Some(fallback_model) = self.openai_api_key_fallback_model.clone() {
            let primary_stream_fn = retrying_stream_fn_with_non_retryable(
                inner_stream_fn,
                policy.clone(),
                std::sync::Arc::new(
                    crate::provider::billing_fallback::is_openai_subscription_exhausted_error,
                ),
            );
            let fallback_inner = fallback_model.build_stream_fn_with_filter(
                tool_defs,
                self.chunk_timeout,
                Some("openai".to_string()),
                tool_def_filter.clone(),
            );
            let fallback_stream_fn = retrying_stream_fn(fallback_inner, policy);
            crate::provider::billing_fallback::with_openai_api_billing_fallback(
                primary_stream_fn,
                fallback_stream_fn,
                crate::provider::billing_fallback::prompt_from_ask_sender(
                    self.api_billing_ask_tx.clone(),
                ),
            )
        } else {
            retrying_stream_fn(inner_stream_fn, policy)
        };

        // Merge any system-message content from the history
        // (e.g. compaction summary) into the loop's
        // Context.system_prompt. The Agent's preamble (model
        // identity + tool docs) is the base; session-side
        // system messages append.
        let history_preamble = rig_history_system_prompt(&history);
        // `mut` is consumed only by the plugin-gated append below.
        #[cfg_attr(not(feature = "plugin"), allow(unused_mut))]
        let mut system_prompt = if history_preamble.is_empty() {
            self.preamble.clone()
        } else {
            format!("{}\n\n{}", self.preamble, history_preamble)
        };

        // dirge-wqxj: fire the `before-agent-start` plugin hook with
        // the assembled system prompt. A plugin may call
        // `harness/append-system-prompt` to add project/team context
        // to the preamble before the agent starts. Append-only — the
        // model-identity + tool-docs preamble is preserved.
        #[cfg(feature = "plugin")]
        if let Some(pm) = crate::plugin::hook::global() {
            let mut mgr = pm.lock_ignore_poison();
            let ctx = format!(
                "@{{:system-prompt \"{}\"}}",
                crate::plugin::escape_janet_string(&system_prompt)
            );
            match mgr.dispatch("before-agent-start", &ctx) {
                Ok(_) => {
                    if let Some(append) = mgr.take_system_prompt_append() {
                        let append = append.trim();
                        if !append.is_empty() {
                            system_prompt = format!("{system_prompt}\n\n{append}");
                        }
                    }
                }
                Err(e) => {
                    tracing::warn!(
                        target: "dirge::plugin",
                        error = %e,
                        "before-agent-start hook error — system prompt left unchanged",
                    );
                }
            }
        }

        // Convert rig history → loop messages (Session-side
        // user/assistant/toolResult shapes).
        let loop_history = rig_history_to_loop_messages(history);

        let mut cfg = LoopSpawnConfig::minimal(stream_fn, prompt.clone());
        cfg.system_prompt = system_prompt;
        cfg.history = loop_history;
        cfg.tools = self.loop_tools;
        cfg.provider_name = Some(provider_name);
        cfg.model_name = Self::model_name_opt(&self.model_name);
        cfg.steering_queue = steering_queue;
        cfg.tool_def_filter = tool_def_filter;
        cfg.dynamic_tool_search = self.dynamic_tool_search;
        // Phase 4 part 1: thread the escalation route — when set,
        // the loop's `stream_assistant_response` swaps to this
        // StreamFn for the call immediately following a repair or
        // tree-sitter failure. `escalation_stream_fn=None` keeps
        // the legacy single-provider path byte-for-byte identical.
        cfg.escalation_stream_fn = self.escalation_stream_fn.clone();
        cfg.escalation_provider_name = self.escalation_provider_name.clone();
        // Phase 4 part 2: build a fresh `FileTouchTracker` per
        // session seeded with the current prompt as the active
        // task. `None` keeps the feature off — byte-identical to
        // today.
        cfg.file_touch_tracker = self
            .context_depth_reminder_threshold
            .map(|t| crate::agent::agent_loop::context_depth::FileTouchTracker::new(t, prompt));
        // F6: pre-finalization verifier gate, always on (baked-in). Nudges
        // to verify before finishing when code was edited but not run.
        cfg.verifier = Some(crate::agent::agent_loop::verifier::VerifierGate::new());
        // F6 tier 3: thread the bounded critic (only Some when
        // critic_provider is configured). `None` → no critic.
        cfg.critic_fn = self.critic_fn.clone();
        cfg.goal_fn = self.goal_fn.clone();
        // Goal gate stop condition (`--goal`). Engages only when
        // `goal_fn` above is also present (it's the judge).
        cfg.goal = self.goal.clone();
        // dirge-008x: thread the in-loop compaction summarizer so the
        // proactive folds run LLM summarization (built in `build_agent`).
        cfg.summarize_fn = self.summarize_fn.clone();
        // dirge-nqr: forward the per-run turn cap. `None` keeps the
        // legacy unlimited behavior.
        cfg.max_turns = self.max_turns;
        // dirge-9tfq: forward the BackgroundStore so the spawn pipeline
        // installs a `get_followup_messages` hook that drains pending
        // subagent completions at the outer-loop boundary. `None`
        // (no-tools / test paths) leaves the hook unset and the loop
        // behaves byte-identically to pre-9tfq.
        cfg.bg_store = self.bg_store.clone();
        // dirge-h5tv: thread the memory provider into the loop so
        // auto-compaction can fire on_pre_compress. `None` paths
        // (no provider attached) keep legacy no-op behavior.
        cfg.memory_provider = self.memory_provider.clone();
        #[cfg(feature = "plugin")]
        {
            cfg.plugin_mgr = crate::plugin::hook::global();
        }

        let loop_runner = spawn_loop_runner(cfg);
        loop_runner.into_agent_runner()
    }

    /// Spawn a review runner with only memory + skill tools.
    /// Used by background review (Phase 4) to create a restricted
    /// agent that can only write to project memory and skills.
    ///
    /// dirge-7ls: the review runner gets its OWN `ToolCache` rather
    /// than reusing the main agent's. Even though today's
    /// memory/skill tools don't touch the cache directly, any
    /// future tool added to the review allow-list (or any future
    /// invalidation hook like `cache.clear()` on memory writes)
    /// must not pollute the main agent's cache mid-session.
    /// `subagents/task` is deliberately NOT changed — subagents
    /// share with their parent by design.
    pub fn spawn_review_runner(
        &self,
        prompt: String,
        transcript: String,
    ) -> crate::agent::runner::AgentRunner {
        let (runner, _isolated_cache) =
            self.spawn_review_runner_with_cache(prompt, transcript, ToolCache::new());
        runner
    }

    /// dirge-yai1 — skill-only fork used by the curator's
    /// umbrella-consolidation pass. The curator prompt instructs
    /// the model to only use `skill`, but a tool-level filter is
    /// stronger than a prompt-level guard. Same isolation /
    /// retry / stream-fn selection as `spawn_review_runner`.
    pub fn spawn_curator_runner(
        &self,
        prompt: String,
        transcript: String,
    ) -> crate::agent::runner::AgentRunner {
        let (runner, _isolated_cache) = self.spawn_filtered_runner_with_cache(
            prompt,
            transcript,
            ToolCache::new(),
            &["skill"],
            false,
        );
        runner
    }

    /// P3a (dirge-crrh): fork a phase agent for the phased-plan workflow — a
    /// separate runner with a frozen `transcript`, the given phase `prompt`,
    /// and ONLY the `allowed` tools (a hard whitelist: e.g. read-only for
    /// explore/plan, read+bash for the reviewer). Isolated cache, same
    /// retry/stream-fn machinery as the review fork. The cornerstone of the
    /// explore→plan→review→execute orchestration (P3c) and the reviewer loop
    /// (P3d).
    #[allow(dead_code)] // wired by the phased-plan orchestrator (P3c/P3d)
    pub fn spawn_phase_runner(
        &self,
        prompt: String,
        transcript: String,
        allowed: &[&str],
    ) -> crate::agent::runner::AgentRunner {
        let (runner, _isolated_cache) = self.spawn_filtered_runner_with_cache(
            prompt,
            transcript,
            ToolCache::new(),
            allowed,
            false,
        );
        runner
    }

    /// dirge-mo0w PR-2: memory-only forked runner for the memory
    /// curator's LLM consolidation pass. Inverse of
    /// `spawn_curator_runner` — same forked-runner pattern, but
    /// the tool allow-list is `&["memory"]` so the consolidation
    /// pass can ONLY add/replace/remove memory entries, not write
    /// skills. The model literally cannot reach skill-write tools
    /// even if the prompt-level guard slips.
    pub fn spawn_memory_curator_runner(
        &self,
        prompt: String,
        transcript: String,
    ) -> crate::agent::runner::AgentRunner {
        let (runner, _isolated_cache) = self.spawn_filtered_runner_with_cache(
            prompt,
            transcript,
            ToolCache::new(),
            &["memory"],
            // The consolidation curator only add/replace/remove/promotes — it
            // has no transcript to infer outcomes/contradictions from, so it
            // does NOT get mark/supersede.
            false,
        );
        runner
    }

    /// Internal review-runner constructor with an explicit
    /// caller-supplied cache. Returns the cache alongside the
    /// runner so tests can assert cache isolation via
    /// `ToolCache::shares_storage_with` against `self.cache()`
    /// (dirge-7ls regression test). Callers in production code
    /// should use `spawn_review_runner`, which passes
    /// `ToolCache::new()` here.
    pub(crate) fn spawn_review_runner_with_cache(
        &self,
        prompt: String,
        transcript: String,
        review_cache: ToolCache,
    ) -> (crate::agent::runner::AgentRunner, ToolCache) {
        // dirge-yai1: delegate to the parameterized helper so the
        // curator can reuse the same machinery with a skill-only
        // filter without duplicating the body.
        self.spawn_filtered_runner_with_cache(
            prompt,
            transcript,
            review_cache,
            &["memory", "skill"],
            // The review pass is the one that records outcomes (`mark`) and
            // supersedes contradicted facts — give it the review-enabled tool.
            true,
        )
    }

    /// dirge-yai1: forked-runner factory parameterized by the tool
    /// allow-list. `spawn_review_runner_with_cache` calls in with
    /// `&["memory", "skill"]`; the curator pass calls in with
    /// `&["skill"]` so the model literally cannot write memory
    /// entries even if the prompt-level guard slips. Same cache
    /// isolation, same retry policy, same stream-fn selection as
    /// the original review runner.
    pub(crate) fn spawn_filtered_runner_with_cache(
        &self,
        prompt: String,
        transcript: String,
        review_cache: ToolCache,
        allowed_tools: &[&str],
        // dirge-ygm3: when true, swap in the review-enabled memory tool
        // (`mark`/`supersede`). Only the background REVIEW pass passes true —
        // it's the one that infers outcomes and contradictions from the
        // transcript. The consolidation curator and phase runners pass false.
        review_memory: bool,
    ) -> (crate::agent::runner::AgentRunner, ToolCache) {
        use crate::agent::agent_loop::{LoopSpawnConfig, retrying_stream_fn, spawn_loop_runner};
        use crate::agent::recovery::RecoveryPolicy;

        // Hard guard against accidental sharing: if a caller
        // somehow passes the parent's cache, the regression test
        // would fail — but defense-in-depth, debug_assert that
        // the passed cache is distinct from the parent's.
        debug_assert!(
            !review_cache.shares_storage_with(&self.cache),
            "spawn_filtered_runner_with_cache: review cache must not share storage with the main agent's cache (dirge-7ls)"
        );

        // Filter to the caller-supplied allow-list (shared, tested helper).
        let mut review_tools = filter_loop_tools(&self.loop_tools, allowed_tools);

        // dirge-ygm3: for the review pass, replace the main (non-review) memory
        // tool with the review-enabled one so `mark`/`supersede` are reachable
        // here but nowhere else. No-op if the store didn't load or "memory"
        // wasn't in the allow-list.
        if review_memory && let Some(review_tool) = &self.review_memory_tool {
            swap_in_review_memory(&mut review_tools, review_tool);
        }

        let tool_defs = Self::tool_defs_for(&review_tools);

        // dirge-z73i: prefer the explicit review_stream_fn when the
        // user configured `review_provider` to point at a different
        // alias than `provider`. Falls back to the main agent's
        // stream_fn so unconfigured sessions keep the legacy behavior
        // byte-for-byte.
        let (inner_stream_fn, provider_name_for_review, model_name_for_review) =
            if let Some(rfn) = self.review_stream_fn.clone() {
                (
                    rfn,
                    self.review_provider_name
                        .clone()
                        .unwrap_or_else(|| self.provider_name().to_string()),
                    self.review_model_name.clone(),
                )
            } else {
                (
                    self.build_stream_fn(tool_defs),
                    self.provider_name().to_string(),
                    Self::model_name_opt(&self.model_name),
                )
            };
        let stream_fn = retrying_stream_fn(inner_stream_fn, RecoveryPolicy::default());

        let full_prompt = format!(
            "{}\n\n<session_transcript>\n{}\n</session_transcript>",
            prompt, transcript
        );

        let mut cfg = LoopSpawnConfig::minimal(stream_fn, full_prompt);
        cfg.system_prompt = self.preamble.clone();
        cfg.tools = review_tools;
        cfg.provider_name = Some(provider_name_for_review);
        cfg.model_name = model_name_for_review;

        let loop_runner = spawn_loop_runner(cfg);
        (loop_runner.into_agent_runner(), review_cache)
    }

    /// Phase 4.5h-2: produce a `StreamFn` from this agent's
    /// underlying `CompletionModel`, threading the supplied tool
    /// definitions. Used by the new loop path (`spawn_loop_runner`)
    /// to drive a real LLM through the ported agent_loop.
    ///
    /// Dispatch is a match over `AnyAgentInner`; each variant
    /// extracts its provider-specific `Arc<M>` and threads it
    /// through `rig_stream_fn_from_model::<M>`. The Arc deref +
    /// clone is cheap (refcount bump on the inner Arc, then a
    /// CompletionModel clone — rig's models are themselves
    /// Arc-internal in most provider impls).
    ///
    /// Tool definitions are passed in (not extracted from
    /// `agent.tools`) because the new path uses the LoopTool
    /// registry as the source of truth — phase 4.5h-4 builds
    /// that registry alongside the rig Agent. Callers convert
    /// each `Arc<dyn LoopTool>` to a rig `ToolDefinition` via
    /// `agent_loop::loop_tool_to_rig_definition` before calling
    /// this method.
    pub fn build_stream_fn(
        &self,
        tools: Vec<rig::completion::ToolDefinition>,
    ) -> crate::agent::agent_loop::StreamFn {
        self.build_stream_fn_with_filter(tools, None)
    }

    /// Phase-3 dynamic-tool-search variant. When
    /// `tool_def_filter` is `Some`, the per-request tool list is
    /// filtered to the always-on set + names present in the
    /// shared loaded set (plus `tool_search`). When `None`, the
    /// behavior is byte-for-byte identical to the legacy
    /// `build_stream_fn`.
    pub fn build_stream_fn_with_filter(
        &self,
        tools: Vec<rig::completion::ToolDefinition>,
        tool_def_filter: Option<
            std::sync::Arc<std::sync::Mutex<std::collections::HashSet<String>>>,
        >,
    ) -> crate::agent::agent_loop::StreamFn {
        let chunk_timeout = self.chunk_timeout;
        let provider = Some(self.provider_name().to_string());
        // dirge-iy20: single provider list in `stream_dispatch`. Each
        // arm clones `tools`/passes `tool_def_filter` by move — only
        // one arm runs, so the moves are exclusive.
        crate::provider::stream_dispatch::dispatch_stream_fn! {
            match &self.inner;
            AnyAgentInner(a) => (*a.model).clone(),
            tools = tools.clone(),
            timeout = Some(chunk_timeout),
            provider = provider,
            filter = tool_def_filter,
        }
    }
}