clark_agent/plugins/graceful_turn_limit.rs
1//! Graceful turn limit: warn the model before the hard `max_iterations`
2//! cap fires.
3//!
4//! ## Why
5//!
6//! The hard cap (`LoopConfig::max_iterations`) is a circuit breaker. When
7//! it trips today the loop just stops — whatever the model was doing is
8//! abandoned. For long subagent runs that means a half-finished
9//! investigation, a dangling tool call, or a missing summary. The parent
10//! agent gets no signal that the answer is partial.
11//!
12//! This plugin adds a single steering message a few iterations before the
13//! cap: a host-provided wrap-up instruction, defaulting to a generic *"You
14//! have used your turn budget. Stop calling work tools and deliver your
15//! final result now."* If the model complies, the run ends naturally
16//! with a clean close-out and the loop reports
17//! [`LoopOutcome::WrappedUp`](crate::run::LoopOutcome::WrappedUp). If the
18//! model ignores the warning, the existing hard cap still fires.
19//!
20//! ## Capabilities
21//!
22//! - [`EventObserver`] — increments a completed-turn counter on every
23//! `AgentEvent::TurnEnd`. The plugin owns its own counter rather
24//! than reading the loop's because the trait surface intentionally
25//! doesn't expose iteration state to plugins.
26//! - [`SteeringSource`] — drains a one-shot wrap-up message once the
27//! counter crosses the soft threshold. Subsequent polls return empty.
28//!
29//! ## Lifecycle
30//!
31//! Auto-installed by [`AgentBuilder::build`](crate::config::AgentBuilder::build)
32//! when both `max_iterations` and `grace_iterations > 0` are set. Callers
33//! never need to register it manually.
34
35use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
36use std::sync::Arc;
37
38use async_trait::async_trait;
39
40use crate::event::AgentEvent;
41use crate::plugin::{EventObserver, Plugin, PluginCapabilities, SteeringSource};
42use crate::types::AgentMessage;
43
44type MessageProvider = Arc<dyn Fn() -> String + Send + Sync>;
45/// Host-supplied callback that returns the desired grace window at
46/// fire-check time. Lets hosts scale the wrap-up window to the size of
47/// the work in flight (e.g. "leave at least 2 × open-work-item-count + 2
48/// iterations for close-out") without leaking host types into
49/// `clark-agent`. The callback is invoked on every steering poll, so
50/// keep it cheap: a single `Mutex::lock` + count, no I/O.
51type GraceProvider = Arc<dyn Fn() -> usize + Send + Sync>;
52
53/// Wrap-up steering text injected when the soft limit fires.
54///
55/// Phrased as a directive, not a suggestion — hedged language ("you might
56/// want to wrap up") is reliably ignored, while a clear stop-and-summarize
57/// instruction lands. The closing bullets prompt the model to surface what
58/// it accomplished and what remains so a parent agent can act on partial
59/// results.
60///
61/// Generic by design: it names no specific tool. Hosts whose protocol
62/// requires delivery through a named tool (and whose plain assistant text
63/// is invisible) should override this via
64/// [`crate::AgentBuilder::graceful_turn_limit_message_provider`] to name
65/// their delivery tool.
66const DEFAULT_WRAP_UP_MESSAGE: &str = "\
67You have used your turn budget. Stop calling work tools and deliver your \
68final result now. Summarize:\n\
69- What you accomplished.\n\
70- What remains unfinished, if anything.\n\
71- Any partial findings the caller should know about.\n\
72\n\
73Then stop. Do not call any more work tools. Ask for input only if a user \
74answer is genuinely required before you can continue.";
75
76/// Soft pre-cap warning. See module docs.
77pub struct GracefulTurnLimit {
78 /// Hard cap from `LoopConfig::max_iterations`. The fire-time soft
79 /// limit is `max_iterations - effective_grace()` (saturating), where
80 /// `effective_grace()` is either a host-supplied dynamic value
81 /// (`grace_provider`) or the static `default_grace` set at
82 /// construction.
83 max_iterations: usize,
84
85 /// Static fallback grace used when no `grace_provider` is set, and
86 /// also as the floor when validating the constructor inputs (a
87 /// zero or out-of-range static grace yields `None` so the plugin
88 /// is never installed).
89 default_grace: usize,
90
91 /// Optional host-supplied dynamic grace. When `Some`, it is called
92 /// on every steering poll and the returned value is used as the
93 /// effective grace window. Clamped to `[1, max_iterations - 1]` so
94 /// the soft trigger is always at least one turn before the hard cap.
95 grace_provider: Option<GraceProvider>,
96
97 /// Number of completed assistant turns observed. Used to decide
98 /// when the soft limit has been reached. Atomic because plugins are
99 /// accessed from `&self` across await points.
100 turns_completed: AtomicUsize,
101
102 /// One-shot guard so the wrap-up message is emitted at most once
103 /// even if `next_steering_messages` is polled multiple times after
104 /// the threshold.
105 fired: Arc<AtomicBool>,
106
107 /// Host-specific wrap-up wording. A host uses this to make the
108 /// warning aware of its own work-tracking state without teaching the
109 /// agent core about host-specific types.
110 message_provider: MessageProvider,
111}
112
113impl GracefulTurnLimit {
114 /// Build a plugin from a hard cap and a grace window. Returns
115 /// `None` when no useful soft threshold can be derived — caller
116 /// should skip installation in that case.
117 ///
118 /// Soft limit is `max - grace`, clamped so a soft trigger remains
119 /// observable on at least one turn before the cap. Combinations
120 /// where the soft and hard limits would coincide (`grace == 0` or
121 /// `grace >= max`) yield `None`: at that point the warning would
122 /// either fire after the cap or fire at the same moment, neither
123 /// of which gives the model a chance to recover.
124 pub fn from_hard_cap(max_iterations: usize, grace_iterations: usize) -> Option<Self> {
125 Self::from_hard_cap_with_message_provider(
126 max_iterations,
127 grace_iterations,
128 Arc::new(|| DEFAULT_WRAP_UP_MESSAGE.to_string()),
129 )
130 }
131
132 /// Build with host-provided wrap-up wording. The provider is called
133 /// only when the one-shot warning fires.
134 pub fn from_hard_cap_with_message_provider(
135 max_iterations: usize,
136 grace_iterations: usize,
137 message_provider: MessageProvider,
138 ) -> Option<Self> {
139 Self::from_hard_cap_with_providers(max_iterations, grace_iterations, message_provider, None)
140 }
141
142 /// Build with host-provided wrap-up wording AND a dynamic grace
143 /// provider. The grace provider is consulted on every steering
144 /// poll; its return value is clamped to `[1, max_iterations - 1]`
145 /// before being used as the effective wrap-up window. When the
146 /// provider returns the default value (or is unset), behavior
147 /// matches [`Self::from_hard_cap`].
148 pub fn from_hard_cap_with_providers(
149 max_iterations: usize,
150 default_grace: usize,
151 message_provider: MessageProvider,
152 grace_provider: Option<GraceProvider>,
153 ) -> Option<Self> {
154 if default_grace == 0 || default_grace >= max_iterations {
155 return None;
156 }
157 Some(Self {
158 max_iterations,
159 default_grace,
160 grace_provider,
161 turns_completed: AtomicUsize::new(0),
162 fired: Arc::new(AtomicBool::new(false)),
163 message_provider,
164 })
165 }
166
167 pub fn default_wrap_up_message() -> &'static str {
168 DEFAULT_WRAP_UP_MESSAGE
169 }
170
171 /// Shared one-shot flag the loop reads to distinguish a clean
172 /// natural close from one prompted by the wrap-up steer. Set to
173 /// `true` exactly when (and if) the plugin emits its message.
174 pub fn signal(&self) -> Arc<AtomicBool> {
175 self.fired.clone()
176 }
177
178 /// Effective grace window for this poll. Reads the dynamic
179 /// provider when set, else the static `default_grace`. Always
180 /// clamped to `[1, max_iterations - 1]` so the soft trigger
181 /// fires at least one turn before the hard cap.
182 fn effective_grace(&self) -> usize {
183 let raw = self
184 .grace_provider
185 .as_ref()
186 .map(|p| p())
187 .unwrap_or(self.default_grace);
188 raw.clamp(1, self.max_iterations.saturating_sub(1).max(1))
189 }
190
191 /// Inspection helper for tests and diagnostics. Returns the
192 /// soft threshold (turns count at which the wrap-up fires) computed
193 /// against the current dynamic grace, if any.
194 pub fn soft_limit(&self) -> usize {
195 self.max_iterations.saturating_sub(self.effective_grace())
196 }
197}
198
199impl Plugin for GracefulTurnLimit {
200 fn name(&self) -> &'static str {
201 "graceful_turn_limit"
202 }
203
204 fn capabilities(&self) -> PluginCapabilities {
205 PluginCapabilities {
206 event_observer: true,
207 steering: true,
208 ..PluginCapabilities::default()
209 }
210 }
211}
212
213#[async_trait]
214impl EventObserver for GracefulTurnLimit {
215 async fn on_event(&self, event: &AgentEvent) {
216 if matches!(event, AgentEvent::TurnEnd { .. }) {
217 self.turns_completed.fetch_add(1, Ordering::Relaxed);
218 }
219 }
220}
221
222#[async_trait]
223impl SteeringSource for GracefulTurnLimit {
224 async fn next_steering_messages(&self) -> Vec<AgentMessage> {
225 // Read counter first so a completed turn that happens between
226 // this check and the swap can't sneak past — at worst we fire
227 // one turn late, never twice. `soft_limit()` recomputes from
228 // the dynamic grace provider on each poll, so a host can grow
229 // the wrap-up window as the work in flight grows (e.g. more
230 // open work items mean more iterations needed to deliver a
231 // partial answer cleanly).
232 if self.turns_completed.load(Ordering::Relaxed) < self.soft_limit() {
233 return Vec::new();
234 }
235 // One-shot: swap is the cheapest way to guarantee a single
236 // emission even if the loop polls steering more than once.
237 if self.fired.swap(true, Ordering::Relaxed) {
238 return Vec::new();
239 }
240 let content = (self.message_provider)();
241 vec![AgentMessage::System {
242 content,
243 timestamp: None,
244 }]
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
253 fn from_hard_cap_rejects_zero_grace() {
254 assert!(GracefulTurnLimit::from_hard_cap(50, 0).is_none());
255 }
256
257 #[test]
258 fn from_hard_cap_rejects_grace_at_or_above_cap() {
259 assert!(GracefulTurnLimit::from_hard_cap(10, 10).is_none());
260 assert!(GracefulTurnLimit::from_hard_cap(10, 99).is_none());
261 }
262
263 #[test]
264 fn from_hard_cap_computes_soft_limit() {
265 let plugin = GracefulTurnLimit::from_hard_cap(50, 5).unwrap();
266 assert_eq!(plugin.soft_limit(), 45);
267 }
268
269 #[tokio::test]
270 async fn does_not_fire_before_soft_limit() {
271 let plugin = GracefulTurnLimit::from_hard_cap(10, 3).unwrap();
272 // soft_limit = 7. Complete 6 turns and assert no message.
273 for _ in 0..6 {
274 plugin.on_event(&turn_end()).await;
275 }
276 let msgs = plugin.next_steering_messages().await;
277 assert!(msgs.is_empty());
278 assert!(!plugin.fired.load(Ordering::Relaxed));
279 }
280
281 #[tokio::test]
282 async fn fires_once_at_soft_limit() {
283 let plugin = GracefulTurnLimit::from_hard_cap(10, 3).unwrap();
284 // soft_limit = 7. Complete 7 turns; expect one message.
285 for _ in 0..7 {
286 plugin.on_event(&turn_end()).await;
287 }
288 let first = plugin.next_steering_messages().await;
289 assert_eq!(first.len(), 1, "should emit one wrap-up message");
290 match &first[0] {
291 AgentMessage::System { content, .. } => {
292 assert!(content.starts_with("You have used your turn budget."))
293 }
294 other => panic!("expected system wrap-up message, got {other:?}"),
295 }
296 assert!(plugin.fired.load(Ordering::Relaxed));
297
298 // Second poll: empty. One-shot.
299 let second = plugin.next_steering_messages().await;
300 assert!(second.is_empty(), "wrap-up must be one-shot");
301 }
302
303 #[tokio::test]
304 async fn ignores_non_turn_start_events() {
305 let plugin = GracefulTurnLimit::from_hard_cap(10, 3).unwrap();
306 // Pump a flood of unrelated events. Counter stays at 0.
307 for _ in 0..20 {
308 plugin.on_event(&AgentEvent::AgentStart).await;
309 }
310 let msgs = plugin.next_steering_messages().await;
311 assert!(msgs.is_empty());
312 }
313
314 #[tokio::test]
315 async fn default_wrap_up_is_generic_and_directs_delivery() {
316 let plugin = GracefulTurnLimit::from_hard_cap(10, 3).unwrap();
317 for _ in 0..7 {
318 plugin.on_event(&turn_end()).await;
319 }
320 let msgs = plugin.next_steering_messages().await;
321 let AgentMessage::System { content: text, .. } = &msgs[0] else {
322 panic!("expected system wrap-up message");
323 };
324
325 // Directs the model to stop and deliver...
326 assert!(text.contains("deliver your final result"), "{text}");
327 assert!(text.contains("Stop calling work tools"), "{text}");
328 // ...without leaking any product-specific tool vocabulary. Hosts
329 // override via `graceful_turn_limit_message_provider` to name a
330 // delivery tool.
331 assert!(!text.contains("message_result"), "{text}");
332 assert!(!text.contains("message_ask"), "{text}");
333 }
334
335 #[tokio::test]
336 async fn wrap_up_uses_custom_message_provider_when_supplied() {
337 let plugin = GracefulTurnLimit::from_hard_cap_with_message_provider(
338 10,
339 3,
340 Arc::new(|| "custom wrap-up".to_string()),
341 )
342 .unwrap();
343 for _ in 0..7 {
344 plugin.on_event(&turn_end()).await;
345 }
346
347 let msgs = plugin.next_steering_messages().await;
348 let AgentMessage::System { content, .. } = &msgs[0] else {
349 panic!("expected system wrap-up message");
350 };
351 assert_eq!(content, "custom wrap-up");
352 }
353
354 #[tokio::test]
355 async fn does_not_fire_before_first_completed_turn() {
356 let plugin = GracefulTurnLimit::from_hard_cap(6, 5).unwrap();
357
358 plugin.on_event(&AgentEvent::TurnStart).await;
359 let msgs = plugin.next_steering_messages().await;
360
361 assert!(msgs.is_empty());
362 assert!(!plugin.fired.load(Ordering::Relaxed));
363 }
364
365 #[tokio::test]
366 async fn dynamic_grace_provider_widens_wrap_up_window_for_bigger_jobs() {
367 // The host-supplied callback returns the grace window the
368 // plugin should use at fire-check time. This lets a host scale
369 // the wrap-up budget with the size of the work in flight.
370 let grace = Arc::new(std::sync::Mutex::new(3usize));
371 let grace_for_provider = grace.clone();
372 let plugin = GracefulTurnLimit::from_hard_cap_with_providers(
373 20,
374 3,
375 Arc::new(|| "wrap".to_string()),
376 Some(Arc::new(move || *grace_for_provider.lock().unwrap())),
377 )
378 .unwrap();
379
380 // grace=3 → soft_limit=17. 16 turns is below the threshold.
381 for _ in 0..16 {
382 plugin.on_event(&turn_end()).await;
383 }
384 let early = plugin.next_steering_messages().await;
385 assert!(early.is_empty(), "should not fire at 16 turns with grace=3");
386 assert_eq!(plugin.soft_limit(), 17);
387
388 // Host widens grace BEFORE the plugin would have fired. Now
389 // grace=8 → soft_limit=12. Already past 16 completed turns
390 // (>12), so the next poll fires.
391 *grace.lock().unwrap() = 8;
392 assert_eq!(plugin.soft_limit(), 12);
393 let fired = plugin.next_steering_messages().await;
394 assert_eq!(
395 fired.len(),
396 1,
397 "widened grace must let the plugin fire on the next poll"
398 );
399 assert!(plugin.fired.load(Ordering::Relaxed));
400 }
401
402 #[tokio::test]
403 async fn dynamic_grace_provider_clamps_out_of_range_returns() {
404 // A buggy host that returns 0 or `>= max_iterations` must not
405 // be allowed to disable the soft trigger; the plugin clamps
406 // into `[1, max-1]` so the wrap-up always lands at least one
407 // turn before the hard cap.
408 let plugin = GracefulTurnLimit::from_hard_cap_with_providers(
409 10,
410 3,
411 Arc::new(|| "wrap".to_string()),
412 Some(Arc::new(|| 0)),
413 )
414 .unwrap();
415 // grace clamps to 1 → soft_limit = 9.
416 assert_eq!(plugin.soft_limit(), 9);
417
418 let plugin = GracefulTurnLimit::from_hard_cap_with_providers(
419 10,
420 3,
421 Arc::new(|| "wrap".to_string()),
422 Some(Arc::new(|| 999)),
423 )
424 .unwrap();
425 // grace clamps to max-1 = 9 → soft_limit = 1.
426 assert_eq!(plugin.soft_limit(), 1);
427 }
428
429 fn turn_end() -> AgentEvent {
430 AgentEvent::TurnEnd {
431 message: AgentMessage::Assistant {
432 content: crate::types::AssistantContent::text(""),
433 stop_reason: crate::types::StopReason::ToolUse,
434 error_message: None,
435 timestamp: None,
436 usage: None,
437 },
438 tool_results: Vec::new(),
439 }
440 }
441}