Skip to main content

vtcode_core/tools/
result_metadata.rs

1//! Tool result metadata and quality scoring
2//!
3//! Provides metadata about tool result quality, confidence, and usefulness.
4//! This allows the agent to make informed decisions about result reliability
5//! and prioritize high-quality results in context windows.
6
7use crate::config::constants::tools;
8use crate::tools::tool_intent;
9use crate::types::CompactStr;
10use hashbrown::HashMap;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use std::fmt;
14use std::time::SystemTime;
15
16/// Result completeness level
17#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
18pub enum ResultCompleteness {
19    /// Full result with no truncation
20    Complete,
21    /// Partial result (more data exists but not shown)
22    Partial,
23    /// Result truncated due to size limits
24    Truncated,
25    /// Empty result (no matches)
26    Empty,
27}
28
29impl ResultCompleteness {
30    /// Deprecated: prefer using the `Display` impl; `ToString` is derived from Display.
31    pub fn to_static_str(&self) -> &'static str {
32        match self {
33            Self::Complete => "complete",
34            Self::Partial => "partial",
35            Self::Truncated => "truncated",
36            Self::Empty => "empty",
37        }
38    }
39}
40
41impl fmt::Display for ResultCompleteness {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        f.write_str(self.to_static_str())
44    }
45}
46
47/// Quality metadata for tool results
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct ResultMetadata {
50    /// Confidence that result is correct (0.0-1.0)
51    #[serde(default = "default_confidence")]
52    pub confidence: f32,
53
54    /// Relevance to current task (0.0-1.0)
55    #[serde(default = "default_relevance")]
56    pub relevance: f32,
57
58    /// Result completeness level
59    pub completeness: ResultCompleteness,
60
61    /// Count of matches/results
62    #[serde(default)]
63    pub result_count: usize,
64
65    /// Likelihood of false positives (0.0-1.0)
66    #[serde(default)]
67    pub false_positive_likelihood: f32,
68
69    /// Detected content types (code, docs, config, binary, etc.)
70    #[serde(default)]
71    pub content_types: Vec<String>,
72
73    /// Tool-specific metrics (lines matched, execution time, etc.)
74    #[serde(default)]
75    pub tool_metrics: HashMap<String, Value>,
76}
77
78fn default_confidence() -> f32 {
79    0.5
80}
81
82fn default_relevance() -> f32 {
83    0.5
84}
85
86impl Default for ResultMetadata {
87    fn default() -> Self {
88        Self {
89            confidence: 0.5,
90            relevance: 0.5,
91            completeness: ResultCompleteness::Complete,
92            result_count: 0,
93            false_positive_likelihood: 0.1,
94            content_types: vec![],
95            tool_metrics: HashMap::new(),
96        }
97    }
98}
99
100impl ResultMetadata {
101    /// Overall quality score (0.0-1.0)
102    #[inline]
103    pub fn quality_score(&self) -> f32 {
104        let weighted = (self.confidence * 0.4)
105            + (self.relevance * 0.4)
106            + (self.false_positive_likelihood * -0.2);
107        weighted.clamp(0.0, 1.0)
108    }
109
110    /// Create metadata for a successful tool execution
111    #[inline]
112    pub fn success(confidence: f32, relevance: f32) -> Self {
113        Self {
114            confidence: confidence.clamp(0.0, 1.0),
115            relevance: relevance.clamp(0.0, 1.0),
116            completeness: ResultCompleteness::Complete,
117            result_count: 1,
118            false_positive_likelihood: 0.05,
119            ..Default::default()
120        }
121    }
122
123    /// Create metadata for empty results
124    #[inline]
125    pub fn empty() -> Self {
126        Self {
127            completeness: ResultCompleteness::Empty,
128            result_count: 0,
129            confidence: 1.0, // High confidence in "no results"
130            ..Default::default()
131        }
132    }
133
134    /// Create metadata for error/inconclusive results
135    pub fn error() -> Self {
136        Self {
137            confidence: 0.2,
138            completeness: ResultCompleteness::Empty,
139            ..Default::default()
140        }
141    }
142
143    /// Merge with another metadata (for combining results)
144    pub fn merge(&mut self, other: &ResultMetadata) {
145        self.result_count += other.result_count;
146        self.confidence = (self.confidence + other.confidence) / 2.0;
147        self.relevance = (self.relevance + other.relevance) / 2.0;
148
149        // Merge content types
150        for ct in &other.content_types {
151            if !self.content_types.contains(ct) {
152                self.content_types.push(ct.clone());
153            }
154        }
155
156        // Merge tool metrics - use extend to avoid double clone
157        self.tool_metrics.extend(
158            other
159                .tool_metrics
160                .iter()
161                .map(|(k, v)| (k.clone(), v.clone())),
162        );
163    }
164}
165
166/// Enhanced tool result with metadata
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct EnhancedToolResult {
169    /// The actual tool result
170    pub value: Value,
171
172    /// Quality metadata
173    pub metadata: ResultMetadata,
174
175    /// When result was produced
176    pub timestamp: u64,
177
178    /// Tool name that produced this
179    pub tool_name: CompactStr,
180
181    /// Whether this was from cache
182    #[serde(default)]
183    pub from_cache: bool,
184}
185
186impl EnhancedToolResult {
187    pub fn new(value: Value, metadata: ResultMetadata, tool_name: impl Into<CompactStr>) -> Self {
188        Self {
189            value,
190            metadata,
191            timestamp: SystemTime::now()
192                .duration_since(SystemTime::UNIX_EPOCH)
193                .unwrap_or_default()
194                .as_secs(),
195            tool_name: tool_name.into(),
196            from_cache: false,
197        }
198    }
199
200    pub fn from_cache(
201        value: Value,
202        metadata: ResultMetadata,
203        tool_name: impl Into<CompactStr>,
204    ) -> Self {
205        Self {
206            value,
207            metadata,
208            timestamp: SystemTime::now()
209                .duration_since(SystemTime::UNIX_EPOCH)
210                .unwrap_or_default()
211                .as_secs(),
212            tool_name: tool_name.into(),
213            from_cache: true,
214        }
215    }
216
217    /// Whether this result is useful enough to include
218    #[inline]
219    pub fn is_useful(&self) -> bool {
220        self.metadata.quality_score() > 0.3
221    }
222
223    /// Whether this result is high quality
224    #[inline]
225    pub fn is_high_quality(&self) -> bool {
226        self.metadata.quality_score() > 0.7
227    }
228
229    /// Convert to a message-friendly format
230    pub fn to_summary(&self) -> String {
231        let quality = (self.metadata.quality_score() * 100.0) as u32;
232        match self.metadata.completeness {
233            ResultCompleteness::Complete => {
234                format!(
235                    "{} found {} results (confidence: {}%)",
236                    self.tool_name, self.metadata.result_count, quality
237                )
238            }
239            ResultCompleteness::Partial => {
240                format!(
241                    "{} found {} results (truncated, confidence: {}%)",
242                    self.tool_name, self.metadata.result_count, quality
243                )
244            }
245            ResultCompleteness::Empty => {
246                format!("{} found no results", self.tool_name)
247            }
248            ResultCompleteness::Truncated => {
249                format!(
250                    "{} found results (truncated due to size, confidence: {}%)",
251                    self.tool_name, quality
252                )
253            }
254        }
255    }
256}
257
258/// Trait for scoring tool results
259pub trait ResultScorer {
260    /// Score a tool result and return metadata
261    fn score(&self, result: &Value) -> ResultMetadata;
262
263    /// Tool name this scorer handles
264    fn tool_name(&self) -> &str;
265}
266
267/// Scorer for grep results
268pub struct GrepScorer;
269
270impl ResultScorer for GrepScorer {
271    fn score(&self, result: &Value) -> ResultMetadata {
272        let mut metadata = ResultMetadata::default();
273        metadata.content_types.push("code".to_string());
274
275        match result {
276            Value::Object(map) => {
277                // Count matches
278                if let Some(matches) = map.get("matches")
279                    && let Some(count) = matches.as_array()
280                {
281                    metadata.result_count = count.len();
282
283                    // High confidence if specific matches
284                    metadata.confidence = if count.len() > 5 {
285                        0.85
286                    } else if !count.is_empty() {
287                        0.80
288                    } else {
289                        1.0 // High confidence in "no matches"
290                    };
291
292                    metadata.relevance = 0.75; // Grep results are usually relevant
293
294                    metadata.completeness = if count.len() < 1000 {
295                        ResultCompleteness::Complete
296                    } else {
297                        ResultCompleteness::Partial
298                    };
299
300                    // Lower false positive chance for specific patterns
301                    metadata.false_positive_likelihood = 0.05;
302                }
303
304                // Track line count
305                if let Some(lines) = map.get("line_count")
306                    && let Some(n) = lines.as_u64()
307                {
308                    metadata
309                        .tool_metrics
310                        .insert("line_count".to_string(), Value::Number(n.into()));
311                }
312            }
313            Value::Array(arr) => {
314                metadata.result_count = arr.len();
315                metadata.confidence = if arr.is_empty() { 1.0 } else { 0.80 };
316                metadata.relevance = 0.75;
317            }
318            _ => {
319                metadata = ResultMetadata::error();
320            }
321        }
322
323        metadata
324    }
325
326    fn tool_name(&self) -> &str {
327        tools::UNIFIED_SEARCH
328    }
329}
330
331/// Scorer for file finding results
332pub struct FindScorer;
333
334impl ResultScorer for FindScorer {
335    fn score(&self, result: &Value) -> ResultMetadata {
336        let mut metadata = ResultMetadata::default();
337        metadata.content_types.push("filesystem".to_string());
338
339        match result {
340            Value::Object(map) => {
341                if let Some(files) = map.get("files")
342                    && let Some(file_arr) = files.as_array()
343                {
344                    metadata.result_count = file_arr.len();
345                    metadata.confidence = if file_arr.is_empty() {
346                        1.0 // High confidence in "no files"
347                    } else {
348                        0.90 // Very high confidence in file paths
349                    };
350                    metadata.relevance = 0.80;
351                    metadata.completeness = ResultCompleteness::Complete;
352                }
353            }
354            Value::Array(arr) => {
355                metadata.result_count = arr.len();
356                metadata.confidence = 0.90;
357                metadata.relevance = 0.80;
358            }
359            _ => {
360                metadata = ResultMetadata::error();
361            }
362        }
363
364        metadata
365    }
366
367    fn tool_name(&self) -> &str {
368        "find"
369    }
370}
371
372/// Scorer for shell command results
373pub struct ShellScorer;
374
375impl ResultScorer for ShellScorer {
376    fn score(&self, result: &Value) -> ResultMetadata {
377        let mut metadata = ResultMetadata::default();
378
379        match result {
380            Value::Object(map) => {
381                // Check for exit code
382                let exit_code = map.get("exit_code").and_then(|v| v.as_i64()).unwrap_or(-1);
383
384                // Success means high confidence
385                if exit_code == 0 {
386                    metadata.confidence = 0.85;
387                } else {
388                    metadata.confidence = 0.20;
389                    metadata.completeness = ResultCompleteness::Empty;
390                }
391
392                if let Some(output) = map.get("output").or_else(|| map.get("stdout"))
393                    && let Some(s) = output.as_str()
394                {
395                    metadata.result_count = s.lines().count();
396                    metadata.relevance = 0.70;
397                }
398            }
399            _ => {
400                metadata = ResultMetadata::error();
401            }
402        }
403
404        metadata
405    }
406
407    fn tool_name(&self) -> &str {
408        tools::UNIFIED_EXEC
409    }
410}
411
412/// Registry for result scorers
413pub struct ScorerRegistry {
414    scorers: HashMap<CompactStr, Box<dyn ResultScorer>>,
415}
416
417impl ScorerRegistry {
418    pub fn new() -> Self {
419        let mut scorers: HashMap<CompactStr, Box<dyn ResultScorer>> = HashMap::new();
420        scorers.insert(
421            CompactStr::from(tools::UNIFIED_SEARCH),
422            Box::new(GrepScorer) as Box<dyn ResultScorer>,
423        );
424        scorers.insert(
425            CompactStr::from("find"),
426            Box::new(FindScorer) as Box<dyn ResultScorer>,
427        );
428        scorers.insert(
429            CompactStr::from(tools::UNIFIED_EXEC),
430            Box::new(ShellScorer) as Box<dyn ResultScorer>,
431        );
432
433        Self { scorers }
434    }
435
436    /// Register a custom scorer
437    pub fn register(&mut self, scorer: Box<dyn ResultScorer>) {
438        self.scorers
439            .insert(CompactStr::from(scorer.tool_name()), scorer);
440    }
441
442    /// Score a tool result
443    pub fn score(&self, tool_name: &str, result: &Value) -> ResultMetadata {
444        let canonical_tool_name =
445            tool_intent::canonical_unified_exec_tool_name(tool_name).unwrap_or(tool_name);
446        if let Some(scorer) = self.scorers.get(canonical_tool_name) {
447            scorer.score(result)
448        } else {
449            // Default scoring for unknown tools
450            match result {
451                Value::Null => ResultMetadata::empty(),
452                Value::Object(_) => ResultMetadata::success(0.6, 0.6),
453                Value::Array(arr) => {
454                    let mut meta = ResultMetadata::success(0.6, 0.6);
455                    meta.result_count = arr.len();
456                    meta
457                }
458                _ => ResultMetadata::success(0.5, 0.5),
459            }
460        }
461    }
462}
463
464impl Default for ScorerRegistry {
465    fn default() -> Self {
466        Self::new()
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use serde_json::json;
474
475    #[test]
476    fn test_result_completeness() {
477        assert_eq!(ResultCompleteness::Complete.to_string(), "complete");
478        assert_eq!(ResultCompleteness::Partial.to_string(), "partial");
479        assert_eq!(ResultCompleteness::Empty.to_string(), "empty");
480    }
481
482    #[test]
483    fn test_quality_score() {
484        let meta = ResultMetadata {
485            confidence: 0.8,
486            relevance: 0.8,
487            false_positive_likelihood: 0.1,
488            ..Default::default()
489        };
490
491        let score = meta.quality_score();
492        assert!(score > 0.6 && score < 0.8);
493    }
494
495    #[test]
496    fn test_enhanced_result_is_useful() {
497        let result = EnhancedToolResult::new(
498            json!({"matches": []}),
499            ResultMetadata::success(0.8, 0.8),
500            tools::UNIFIED_SEARCH.to_string(),
501        );
502
503        assert!(result.is_useful());
504        assert!(!result.is_high_quality());
505    }
506
507    #[test]
508    fn test_grep_scorer() {
509        let scorer = GrepScorer;
510        let result = json!({
511            "matches": ["line1", "line2", "line3"],
512            "line_count": 100
513        });
514
515        let meta = scorer.score(&result);
516        assert_eq!(meta.result_count, 3);
517        assert!(meta.confidence > 0.7);
518    }
519
520    #[test]
521    fn test_scorer_registry() {
522        let registry = ScorerRegistry::new();
523        let result = json!({"files": ["a.txt", "b.txt"]});
524
525        let meta = registry.score("find", &result);
526        assert_eq!(meta.result_count, 2);
527    }
528
529    #[test]
530    fn test_command_session_aliases_share_shell_scorer() {
531        let registry = ScorerRegistry::new();
532        let result = json!({"exit_code": 0, "stdout": "line1\nline2"});
533
534        let unified_meta = registry.score(tools::UNIFIED_EXEC, &result);
535        let legacy_meta = registry.score(tools::RUN_PTY_CMD, &result);
536
537        assert_eq!(legacy_meta.result_count, unified_meta.result_count);
538        assert_eq!(legacy_meta.confidence, unified_meta.confidence);
539        assert_eq!(legacy_meta.relevance, unified_meta.relevance);
540    }
541}