llm-stack 0.7.0

Core traits, types, and tools for the llm-stack SDK
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
//! Tool loop configuration and event types.

use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;

use futures::Stream;
use serde_json::Value;

use crate::chat::{ChatResponse, ToolCall, ToolResult};
use crate::error::LlmError;
use crate::usage::Usage;

use super::cacher::ToolResultCacher;
use super::extractor::ToolResultExtractor;
use super::processor::ToolResultProcessor;

/// Callback type for tool call approval.
pub type ToolApprovalFn = Arc<dyn Fn(&ToolCall) -> ToolApproval + Send + Sync>;

/// Callback type for stop conditions.
pub type StopConditionFn = Arc<dyn Fn(&StopContext) -> StopDecision + Send + Sync>;

/// A pinned, boxed, `Send` stream of [`LoopEvent`] results.
///
/// The unified event stream from [`tool_loop_stream`](super::tool_loop_stream).
/// Emits both LLM streaming events (text deltas, tool call fragments) and
/// loop-level events (iteration boundaries, tool execution progress).
/// Terminates with [`LoopEvent::Done`] carrying the final [`ToolLoopResult`].
pub type LoopStream = Pin<Box<dyn Stream<Item = Result<LoopEvent, LlmError>> + Send>>;

/// Context provided to stop condition callbacks.
///
/// Contains information about the current state of the tool loop
/// to help decide whether to stop early.
#[derive(Debug)]
pub struct StopContext<'a> {
    /// Current iteration number (1-indexed).
    pub iteration: u32,
    /// The response from this iteration.
    pub response: &'a ChatResponse,
    /// Accumulated usage across all iterations so far.
    pub total_usage: &'a Usage,
    /// Total number of tool calls executed so far (across all iterations).
    pub tool_calls_executed: usize,
    /// Tool results from the most recent execution (empty on first response).
    pub last_tool_results: &'a [ToolResult],
}

/// Decision returned by a stop condition callback.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StopDecision {
    /// Continue the tool loop normally.
    Continue,
    /// Stop the loop immediately, using the current response as final.
    Stop,
    /// Stop the loop with a reason (for observability/debugging).
    StopWithReason(String),
}

/// Configuration for detecting repeated tool calls (stuck agents).
///
/// When an agent repeatedly makes the same tool call with identical arguments,
/// it's usually stuck in a loop. This configuration detects that pattern and
/// takes action to break the cycle.
///
/// # Example
///
/// ```rust
/// use llm_stack::tool::{LoopDetectionConfig, LoopAction};
///
/// let config = LoopDetectionConfig {
///     threshold: 3,  // Trigger after 3 consecutive identical calls
///     action: LoopAction::InjectWarning,  // Tell the agent it's looping
/// };
/// ```
#[derive(Debug, Clone, Copy)]
pub struct LoopDetectionConfig {
    /// Number of consecutive identical tool calls before triggering.
    ///
    /// A tool call is "identical" if it has the same name and arguments
    /// (compared via JSON equality). Default: 3.
    pub threshold: u32,

    /// Action to take when a loop is detected.
    pub action: LoopAction,
}

impl Default for LoopDetectionConfig {
    fn default() -> Self {
        Self {
            threshold: 3,
            action: LoopAction::Warn,
        }
    }
}

/// Action to take when a tool call loop is detected.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LoopAction {
    /// Emit [`LoopEvent::LoopDetected`] and continue execution.
    ///
    /// Use this for monitoring/alerting without interrupting the agent.
    Warn,

    /// Stop the loop immediately with an error.
    ///
    /// Returns `LlmError::ToolExecution` describing the loop.
    Stop,

    /// Inject a warning message into the conversation and continue.
    ///
    /// Adds a system message like "You have called {tool} with identical
    /// arguments {n} times. Try a different approach." This often helps
    /// the agent break out of the loop.
    ///
    /// The warning fires at every multiple of `threshold` (3, 6, 9, …)
    /// until the agent changes its approach. This prevents infinite loops
    /// where the agent ignores the first warning.
    InjectWarning,
}

/// Unified event emitted during tool loop execution.
///
/// `LoopEvent` merges LLM streaming events (text deltas, tool call fragments)
/// with loop-level lifecycle events (iteration boundaries, tool execution
/// progress) into a single stream. This gives consumers a complete, ordered
/// view of everything happening inside the loop.
///
/// The stream terminates with [`Done`](Self::Done) carrying the final
/// [`ToolLoopResult`].
///
/// # Example
///
/// ```rust,no_run
/// use llm_stack::tool::{tool_loop_stream, ToolLoopConfig, LoopEvent};
/// use futures::StreamExt;
/// use std::sync::Arc;
///
/// # async fn example(
/// #     provider: Arc<dyn llm_stack::DynProvider>,
/// #     registry: Arc<llm_stack::ToolRegistry<()>>,
/// #     params: llm_stack::ChatParams,
/// # ) {
/// let mut stream = tool_loop_stream(provider, registry, params, ToolLoopConfig::default(), Arc::new(()));
/// while let Some(event) = stream.next().await {
///     match event.unwrap() {
///         LoopEvent::TextDelta(text) => print!("{text}"),
///         LoopEvent::IterationStart { iteration, .. } => {
///             println!("\n--- Iteration {iteration} ---");
///         }
///         LoopEvent::ToolExecutionStart { tool_name, .. } => {
///             println!("[calling {tool_name}...]");
///         }
///         LoopEvent::ToolExecutionEnd { tool_name, duration, .. } => {
///             println!("[{tool_name} completed in {duration:?}]");
///         }
///         LoopEvent::Done(result) => {
///             println!("\nDone: {:?}", result.termination_reason);
///             break;
///         }
///         _ => {}
///     }
/// }
/// # }
/// ```
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum LoopEvent {
    // ── LLM streaming (translated from provider StreamEvent) ────
    /// A fragment of the model's text output.
    TextDelta(String),

    /// A fragment of the model's reasoning (chain-of-thought) output.
    ReasoningDelta(String),

    /// Announces that a new tool call has started.
    ToolCallStart {
        /// Zero-based index identifying this call when multiple tools
        /// are invoked in parallel.
        index: u32,
        /// Provider-assigned identifier linking start → deltas → complete.
        id: String,
        /// The name of the tool being called.
        name: String,
    },

    /// A JSON fragment of the tool call's arguments.
    ToolCallDelta {
        /// The tool-call index this delta belongs to.
        index: u32,
        /// A chunk of the JSON arguments string.
        json_chunk: String,
    },

    /// The fully assembled tool call, ready to execute.
    ToolCallComplete {
        /// The tool-call index this completion corresponds to.
        index: u32,
        /// The complete, parsed tool call.
        call: ToolCall,
    },

    /// Token usage information for this LLM call.
    Usage(Usage),

    // ── Loop lifecycle ──────────────────────────────────────────
    /// A new iteration of the tool loop is starting.
    IterationStart {
        /// The iteration number (1-indexed).
        iteration: u32,
        /// Number of messages in the conversation so far.
        message_count: usize,
    },

    /// About to execute a tool.
    ///
    /// When `parallel_tool_execution` is true, events arrive in **completion
    /// order** (whichever tool finishes first), not the order the LLM listed
    /// the calls. Use `call_id` to correlate start/end pairs.
    ToolExecutionStart {
        /// The tool call ID from the LLM.
        call_id: String,
        /// Name of the tool being called.
        tool_name: String,
        /// Arguments passed to the tool.
        arguments: Value,
    },

    /// Tool execution completed.
    ///
    /// When `parallel_tool_execution` is true, events arrive in **completion
    /// order**. Use `call_id` to correlate with the corresponding
    /// [`ToolExecutionStart`](Self::ToolExecutionStart).
    ToolExecutionEnd {
        /// The tool call ID from the LLM.
        call_id: String,
        /// Name of the tool that was called.
        tool_name: String,
        /// The result from the tool.
        result: ToolResult,
        /// How long the tool took to execute.
        duration: Duration,
    },

    /// A tool result was post-processed (compressed, truncated, etc.).
    ///
    /// Emitted when a [`ToolResultProcessor`](super::ToolResultProcessor)
    /// modifies a tool's output before it enters the conversation context.
    /// Use this for monitoring compression ratios and token savings.
    ToolResultProcessed {
        /// Name of the tool whose result was processed.
        tool_name: String,
        /// Estimated token count of the original output.
        original_tokens: u32,
        /// Estimated token count after processing.
        processed_tokens: u32,
    },

    /// A tool result was semantically extracted (condensed by an LLM).
    ///
    /// Emitted when a [`ToolResultExtractor`](super::ToolResultExtractor)
    /// condenses a large tool result into task-relevant content using an
    /// async extraction call (typically a fast/cheap LLM like Haiku).
    ToolResultExtracted {
        /// Name of the tool whose result was extracted.
        tool_name: String,
        /// Estimated token count before extraction.
        original_tokens: u32,
        /// Estimated token count after extraction.
        extracted_tokens: u32,
    },

    /// A tool result was cached out-of-context.
    ///
    /// Emitted when a [`ToolResultCacher`](super::ToolResultCacher) stores
    /// an oversized result externally and replaces it with a compact summary.
    ToolResultCached {
        /// Name of the tool whose result was cached.
        tool_name: String,
        /// Estimated token count of the content that was cached.
        original_tokens: u32,
        /// Estimated token count of the summary that replaced it.
        summary_tokens: u32,
    },

    /// Old tool results were masked before an LLM call.
    ///
    /// Emitted when observation masking replaces old tool results with
    /// compact placeholders to reduce context size. The full results
    /// may still be available in the result cache.
    ObservationsMasked {
        /// Number of tool results masked in this pass.
        masked_count: usize,
        /// Estimated total tokens saved by masking.
        tokens_saved: u32,
    },

    /// A tool call loop was detected.
    ///
    /// Emitted when the same tool is called with identical arguments
    /// for `threshold` consecutive times. Only emitted when
    /// [`LoopDetectionConfig`] is configured.
    LoopDetected {
        /// Name of the tool being called repeatedly.
        tool_name: String,
        /// Number of consecutive identical calls detected.
        consecutive_count: u32,
        /// The action being taken in response.
        action: LoopAction,
    },

    // ── Terminal ────────────────────────────────────────────────
    /// The loop has finished. Carries the final [`ToolLoopResult`]
    /// with the accumulated response, usage, iteration count, and
    /// termination reason.
    Done(ToolLoopResult),
}

/// Configuration for [`tool_loop`](super::tool_loop) and [`tool_loop_stream`](super::tool_loop_stream).
pub struct ToolLoopConfig {
    /// Maximum number of generate-execute iterations. Default: 10.
    pub max_iterations: u32,
    /// Whether to execute multiple tool calls in parallel. Default: true.
    pub parallel_tool_execution: bool,
    /// Optional callback to approve, deny, or modify each tool call
    /// before execution.
    ///
    /// Called once per tool call in the LLM response, **after** the response
    /// is assembled but **before** any tool is executed. Receives the
    /// [`ToolCall`](crate::chat::ToolCall) as parsed from the LLM output.
    /// Modified arguments are re-validated against the tool's schema.
    ///
    /// Panics in the callback propagate and terminate the loop.
    pub on_tool_call: Option<ToolApprovalFn>,
    /// Optional stop condition checked after each LLM response.
    ///
    /// Called **after** the LLM response is received but **before** tools
    /// are executed. If the callback returns [`StopDecision::Stop`] or
    /// [`StopDecision::StopWithReason`], the loop terminates immediately
    /// without executing the requested tool calls.
    ///
    /// Receives a [`StopContext`] with information about the current
    /// iteration and returns a [`StopDecision`]. Use this to implement:
    ///
    /// - `final_answer` tool patterns (stop when a specific tool is called)
    /// - Token budget enforcement
    /// - Total tool call limits
    /// - Content pattern matching
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use llm_stack::tool::{ToolLoopConfig, StopDecision};
    /// use std::sync::Arc;
    ///
    /// let config = ToolLoopConfig {
    ///     stop_when: Some(Arc::new(|ctx| {
    ///         // Stop if we've executed 5 or more tool calls
    ///         if ctx.tool_calls_executed >= 5 {
    ///             StopDecision::StopWithReason("Tool call limit reached".into())
    ///         } else {
    ///             StopDecision::Continue
    ///         }
    ///     })),
    ///     ..Default::default()
    /// };
    /// ```
    pub stop_when: Option<StopConditionFn>,

    /// Optional loop detection to catch stuck agents.
    ///
    /// When enabled, tracks consecutive identical tool calls (same name
    /// and arguments) and takes action when the threshold is reached.
    ///
    /// # Example
    ///
    /// ```rust
    /// use llm_stack::tool::{ToolLoopConfig, LoopDetectionConfig, LoopAction};
    ///
    /// let config = ToolLoopConfig {
    ///     loop_detection: Some(LoopDetectionConfig {
    ///         threshold: 3,
    ///         action: LoopAction::InjectWarning,
    ///     }),
    ///     ..Default::default()
    /// };
    /// ```
    pub loop_detection: Option<LoopDetectionConfig>,

    /// Maximum wall-clock time for the entire tool loop.
    ///
    /// If exceeded, returns with [`TerminationReason::Timeout`].
    /// This is useful for enforcing time budgets in production systems.
    ///
    /// # Example
    ///
    /// ```rust
    /// use llm_stack::tool::ToolLoopConfig;
    /// use std::time::Duration;
    ///
    /// let config = ToolLoopConfig {
    ///     timeout: Some(Duration::from_secs(30)),
    ///     ..Default::default()
    /// };
    /// ```
    pub timeout: Option<Duration>,

    /// Optional processor that runs on tool results before they enter the
    /// conversation context.
    ///
    /// When set, the processor's [`process`](ToolResultProcessor::process)
    /// method is called on each tool result after execution. If it modifies
    /// the content, a [`LoopEvent::ToolResultProcessed`] event is emitted
    /// for observability.
    ///
    /// Default: `None` (no processing — results pass through unmodified).
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use llm_stack::tool::{ToolLoopConfig, ToolResultProcessor, ProcessedResult};
    /// use std::sync::Arc;
    ///
    /// struct TruncateProcessor;
    /// impl ToolResultProcessor for TruncateProcessor {
    ///     fn process(&self, _tool_name: &str, output: &str) -> ProcessedResult {
    ///         if output.len() > 10_000 {
    ///             ProcessedResult {
    ///                 content: output[..10_000].to_string(),
    ///                 was_processed: true,
    ///                 original_tokens_est: (output.len() as u32) / 4,
    ///                 processed_tokens_est: 2500,
    ///             }
    ///         } else {
    ///             ProcessedResult::unchanged()
    ///         }
    ///     }
    /// }
    ///
    /// let config = ToolLoopConfig {
    ///     result_processor: Some(Arc::new(TruncateProcessor)),
    ///     ..Default::default()
    /// };
    /// ```
    pub result_processor: Option<Arc<dyn ToolResultProcessor>>,

    /// Async semantic extractor for large tool results.
    ///
    /// After the [`result_processor`](Self::result_processor) runs, if the
    /// result still exceeds the extractor's [`extraction_threshold`](ToolResultExtractor::extraction_threshold),
    /// the extractor condenses it using async work (e.g., a fast LLM call).
    ///
    /// The extractor receives the last user message for relevance-guided
    /// extraction. Results below the threshold skip this stage entirely.
    ///
    /// Default: `None` (no semantic extraction).
    pub result_extractor: Option<Arc<dyn ToolResultExtractor>>,

    /// Out-of-context cacher for oversized tool results.
    ///
    /// After the [`result_processor`](Self::result_processor) and optional
    /// [`result_extractor`](Self::result_extractor) run, if the result still
    /// exceeds the cacher's [`inline_threshold`](ToolResultCacher::inline_threshold),
    /// the cacher stores the full content externally and returns a compact
    /// summary for the conversation.
    ///
    /// The caller decides how to store (disk, memory, KV, …). llm-stack
    /// only provides the hook and the threshold check.
    ///
    /// Default: `None` (no caching — oversized results stay inline).
    pub result_cacher: Option<Arc<dyn ToolResultCacher>>,

    /// Observation masking: replace old tool results with compact
    /// placeholders to reduce context size between iterations.
    ///
    /// When enabled, `LoopCore` scans the message history before each
    /// LLM call and masks tool results from old iterations. Masking
    /// preserves the tool call / result structure (so the LLM knows a
    /// tool was called) but replaces the content with a short placeholder.
    ///
    /// Default: `None` (no masking — all tool results stay in context).
    pub masking: Option<ObservationMaskingConfig>,

    /// Agent-directed force-mask set for observation masking.
    ///
    /// When set, tool results from iterations listed in this set are
    /// masked regardless of age. This enables tools like `context_release`
    /// to mark specific iterations as stale during execution.
    ///
    /// The set is shared between the tool loop config and the tool that
    /// writes to it (e.g., via `Arc::clone`). Thread-safe via `Mutex`.
    ///
    /// Default: `None` (only age-based masking applies).
    pub force_mask_iterations: Option<Arc<std::sync::Mutex<std::collections::HashSet<u32>>>>,

    /// Maximum allowed nesting depth for recursive tool loops.
    ///
    /// When a tool calls `tool_loop` internally (e.g., spawning a sub-agent),
    /// the depth is tracked via the context's [`LoopDepth`](super::LoopDepth)
    /// implementation. If `ctx.loop_depth() >= max_depth` at entry,
    /// returns `Err(LlmError::MaxDepthExceeded)`.
    ///
    /// - `Some(n)`: Error if depth >= n
    /// - `None`: No limit (dangerous, use with caution)
    ///
    /// Default: `Some(3)` (allows master → worker → one more level)
    ///
    /// # Example
    ///
    /// ```rust
    /// use llm_stack::tool::ToolLoopConfig;
    ///
    /// // Master/Worker pattern: master=0, worker=1, no grandchildren
    /// let config = ToolLoopConfig {
    ///     max_depth: Some(2),
    ///     ..Default::default()
    /// };
    /// ```
    pub max_depth: Option<u32>,
}

/// Configuration for observation masking within the tool loop.
///
/// Observation masking replaces old tool results with compact placeholders
/// to keep context size bounded during long tool loop runs. This is
/// critical for agents that make many tool calls (10+) in a single
/// request, where accumulated results can fill the context window.
///
/// # How it works
///
/// Tool results are tagged with the iteration they were produced in.
/// Before each LLM call, results older than `max_iterations_to_keep`
/// iterations are replaced with a placeholder like:
///
/// ```text
/// [Masked — {tool_name} result from iteration {N}, {tokens} tokens.
///  Use result_cache tool if available, or re-invoke tool.]
/// ```
///
/// Only results larger than `min_tokens_to_mask` are masked. Small
/// results (e.g., error messages, simple values) stay in-context.
#[derive(Debug, Clone, Copy)]
pub struct ObservationMaskingConfig {
    /// Mask tool results older than this many iterations ago.
    ///
    /// For example, if `max_iterations_to_keep = 2` and we're on
    /// iteration 5, results from iterations 1-2 may be masked.
    ///
    /// Default: 2 (keep results from the last 2 iterations).
    pub max_iterations_to_keep: u32,

    /// Only mask results with estimated token count above this threshold.
    /// Small results (error messages, simple values) are kept inline.
    ///
    /// Default: 500 tokens (~2000 chars).
    pub min_tokens_to_mask: u32,
}

impl Default for ObservationMaskingConfig {
    fn default() -> Self {
        Self {
            max_iterations_to_keep: 2,
            min_tokens_to_mask: 500,
        }
    }
}

impl Clone for ToolLoopConfig {
    fn clone(&self) -> Self {
        Self {
            max_iterations: self.max_iterations,
            parallel_tool_execution: self.parallel_tool_execution,
            on_tool_call: self.on_tool_call.clone(),
            stop_when: self.stop_when.clone(),
            loop_detection: self.loop_detection,
            timeout: self.timeout,
            result_processor: self.result_processor.clone(),
            result_extractor: self.result_extractor.clone(),
            result_cacher: self.result_cacher.clone(),
            masking: self.masking,
            force_mask_iterations: self.force_mask_iterations.clone(),
            max_depth: self.max_depth,
        }
    }
}

impl Default for ToolLoopConfig {
    fn default() -> Self {
        Self {
            max_iterations: 10,
            parallel_tool_execution: true,
            on_tool_call: None,
            stop_when: None,
            loop_detection: None,
            timeout: None,
            result_processor: None,
            result_extractor: None,
            result_cacher: None,
            masking: None,
            force_mask_iterations: None,
            max_depth: Some(3),
        }
    }
}

impl std::fmt::Debug for ToolLoopConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ToolLoopConfig")
            .field("max_iterations", &self.max_iterations)
            .field("parallel_tool_execution", &self.parallel_tool_execution)
            .field("has_on_tool_call", &self.on_tool_call.is_some())
            .field("has_stop_when", &self.stop_when.is_some())
            .field("loop_detection", &self.loop_detection)
            .field("timeout", &self.timeout)
            .field("has_result_processor", &self.result_processor.is_some())
            .field("has_result_extractor", &self.result_extractor.is_some())
            .field("has_result_cacher", &self.result_cacher.is_some())
            .field("masking", &self.masking)
            .field(
                "has_force_mask_iterations",
                &self.force_mask_iterations.is_some(),
            )
            .field("max_depth", &self.max_depth)
            .finish()
    }
}

/// Result of approving a tool call before execution.
#[derive(Debug, Clone)]
pub enum ToolApproval {
    /// Allow the tool call to proceed as-is.
    Approve,
    /// Deny the tool call. The reason is sent back to the LLM as an
    /// error tool result.
    Deny(String),
    /// Modify the tool call arguments before execution.
    Modify(Value),
}

/// The result of a completed tool loop.
#[derive(Debug, Clone)]
pub struct ToolLoopResult {
    /// The final response from the LLM (after all tool iterations).
    pub response: ChatResponse,
    /// How many generate-execute iterations were performed.
    pub iterations: u32,
    /// Accumulated usage across all iterations.
    pub total_usage: Usage,
    /// Why the loop terminated.
    ///
    /// This provides observability into the loop's completion reason,
    /// useful for debugging and monitoring agent behavior.
    pub termination_reason: TerminationReason,
}

/// Why a tool loop terminated.
///
/// Used for observability and debugging. Each variant captures specific
/// information about why the loop ended.
///
/// # Example
///
/// ```rust,no_run
/// use llm_stack::tool::TerminationReason;
/// use std::time::Duration;
///
/// # fn check_result(reason: TerminationReason) {
/// match reason {
///     TerminationReason::Complete => println!("Task completed naturally"),
///     TerminationReason::StopCondition { reason } => {
///         println!("Custom stop: {}", reason.as_deref().unwrap_or("no reason"));
///     }
///     TerminationReason::MaxIterations { limit } => {
///         println!("Hit iteration limit: {limit}");
///     }
///     TerminationReason::LoopDetected { tool_name, count } => {
///         println!("Stuck calling {tool_name} {count} times");
///     }
///     TerminationReason::Timeout { limit } => {
///         println!("Exceeded timeout: {limit:?}");
///     }
/// }
/// # }
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TerminationReason {
    /// LLM returned a response with no tool calls (natural completion).
    Complete,

    /// Custom stop condition returned [`StopDecision::Stop`] or
    /// [`StopDecision::StopWithReason`].
    StopCondition {
        /// The reason provided via [`StopDecision::StopWithReason`], if any.
        reason: Option<String>,
    },

    /// Hit the `max_iterations` limit.
    MaxIterations {
        /// The configured limit that was reached.
        limit: u32,
    },

    /// Loop detection triggered with [`LoopAction::Stop`].
    LoopDetected {
        /// Name of the tool being called repeatedly.
        tool_name: String,
        /// Number of consecutive identical calls.
        count: u32,
    },

    /// Wall-clock timeout exceeded.
    Timeout {
        /// The configured timeout that was exceeded.
        limit: Duration,
    },
}