use std::sync::Arc;
use tokio::sync::Mutex;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::session::manager::VisionSessionManager;
use crate::types::error::{McpError, McpResult};
use crate::types::response::{ToolCallResult, ToolDefinition};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualEvidence {
pub evidence_type: VisualEvidenceType,
pub value: String,
pub captured_at: u64,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum VisualEvidenceType {
Text,
Color,
Element,
Layout,
State,
Comparison,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: u32,
pub y: u32,
pub width: u32,
pub height: u32,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum UngroundedReason {
NoCaptureExists,
ElementNotFound,
Contradicted,
CaptureStale,
Obscured,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum VisualHallucinationType {
NonExistent,
WrongAppearance,
WrongLocation,
WrongText,
WrongState,
InventedUI,
StaleDescription,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum HallucinationSeverity {
Minor,
Moderate,
Severe,
Critical,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum VisualTruthStatus {
Valid,
Stale,
Invalidated,
Deleted,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum VisualChangeType {
ColorChange,
TextChange,
PositionChange,
SizeChange,
StateChange,
Removal,
Addition,
LayoutChange,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum ContextType {
DifferentSite,
DifferentVersion,
DifferentDevice,
DifferentUser,
DifferentLocale,
ABVariant,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum DifferenceImpact {
Cosmetic,
UXImpact,
Functional,
Critical,
}
fn word_overlap(a: &str, b: &str) -> f64 {
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
let a_words: std::collections::HashSet<&str> = a_lower
.split(|c: char| !c.is_alphanumeric())
.filter(|w| w.len() >= 2)
.collect();
let b_words: std::collections::HashSet<&str> = b_lower
.split(|c: char| !c.is_alphanumeric())
.filter(|w| w.len() >= 2)
.collect();
if a_words.is_empty() || b_words.is_empty() {
return 0.0;
}
let intersection = a_words.intersection(&b_words).count();
intersection as f64 / a_words.len().max(b_words.len()) as f64
}
fn now_epoch() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
pub fn definition_vision_ground_claim() -> ToolDefinition {
ToolDefinition {
name: "vision_ground_claim".to_string(),
description: Some("Attempt to ground a visual claim against stored captures".to_string()),
input_schema: json!({
"type": "object",
"required": ["claim"],
"properties": {
"claim": { "type": "string", "description": "The visual claim to ground" }
}
}),
}
}
pub async fn execute_vision_ground_claim(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
claim: String,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
if p.claim.trim().is_empty() {
return Ok(ToolCallResult::json(&json!({
"status": "ungrounded", "reason": "Empty claim"
})));
}
let session = session.lock().await;
let store = session.store();
let claim_lower = p.claim.to_lowercase();
let mut evidence: Vec<Value> = Vec::new();
let mut best_score: f64 = 0.0;
for obs in &store.observations {
let mut score: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
score += word_overlap(&claim_lower, &desc.to_lowercase()) * 0.6;
}
for label in &obs.metadata.labels {
if claim_lower.contains(&label.to_lowercase()) {
score += 0.3;
break;
}
}
if score > 0.15 {
if score > best_score {
best_score = score;
}
evidence.push(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"session_id": obs.session_id,
"description": obs.metadata.description,
"labels": obs.metadata.labels,
"relevance": (score * 100.0).round() / 100.0,
"evidence_type": "observation_match",
}));
}
}
evidence.sort_by(|a, b| {
b["relevance"]
.as_f64()
.unwrap_or(0.0)
.partial_cmp(&a["relevance"].as_f64().unwrap_or(0.0))
.unwrap_or(std::cmp::Ordering::Equal)
});
evidence.truncate(10);
let confidence = best_score.min(1.0);
let fully_grounded = confidence > 0.5;
Ok(ToolCallResult::json(&json!({
"claim": p.claim,
"fully_grounded": fully_grounded,
"confidence": (confidence * 100.0).round() / 100.0,
"evidence_count": evidence.len(),
"evidence": evidence,
})))
}
pub fn definition_vision_verify_claim() -> ToolDefinition {
ToolDefinition {
name: "vision_verify_claim".to_string(),
description: Some("Verify if a visual claim is true against current captures".to_string()),
input_schema: json!({
"type": "object",
"required": ["claim"],
"properties": {
"claim": { "type": "string", "description": "The visual claim to verify" },
"max_age_seconds": { "type": "number", "description": "Max age of evidence in seconds", "default": 3600 }
}
}),
}
}
pub async fn execute_vision_verify_claim(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
claim: String,
#[serde(default = "default_max_age")]
max_age_seconds: u64,
}
fn default_max_age() -> u64 {
3600
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let now = now_epoch();
let claim_lower = p.claim.to_lowercase();
let mut best_match: Option<Value> = None;
let mut best_score: f64 = 0.0;
for obs in &store.observations {
let age = now.saturating_sub(obs.timestamp);
if age > p.max_age_seconds {
continue;
}
let mut score: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
score += word_overlap(&claim_lower, &desc.to_lowercase()) * 0.7;
}
for label in &obs.metadata.labels {
if claim_lower.contains(&label.to_lowercase()) {
score += 0.2;
break;
}
}
let recency = 1.0 - (age as f64 / p.max_age_seconds as f64);
score += recency * 0.1;
if score > best_score {
best_score = score;
best_match = Some(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"age_seconds": age,
"description": obs.metadata.description,
}));
}
}
let verified = best_score > 0.4;
Ok(ToolCallResult::json(&json!({
"claim": p.claim,
"verified": verified,
"confidence": (best_score.min(1.0) * 100.0).round() / 100.0,
"status": if verified { "verified" } else { "unverified" },
"best_evidence": best_match,
})))
}
pub fn definition_vision_cite() -> ToolDefinition {
ToolDefinition {
name: "vision_cite".to_string(),
description: Some("Get citation for a visual element from captures".to_string()),
input_schema: json!({
"type": "object",
"required": ["element"],
"properties": {
"element": { "type": "string", "description": "The visual element to cite" },
"capture_id": { "type": "number", "description": "Specific capture to cite from" }
}
}),
}
}
pub async fn execute_vision_cite(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
element: String,
capture_id: Option<u64>,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let element_lower = p.element.to_lowercase();
let mut citations: Vec<Value> = Vec::new();
for obs in &store.observations {
if let Some(cid) = p.capture_id {
if obs.id != cid {
continue;
}
}
let mut relevance: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
relevance += word_overlap(&element_lower, &desc.to_lowercase()) * 0.6;
}
for label in &obs.metadata.labels {
if element_lower.contains(&label.to_lowercase()) {
relevance += 0.3;
break;
}
}
if relevance > 0.1 {
citations.push(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"session_id": obs.session_id,
"description": obs.metadata.description,
"labels": obs.metadata.labels,
"relevance": (relevance * 100.0).round() / 100.0,
"citation_type": if relevance > 0.5 { "strong" } else { "partial" },
}));
}
}
citations.sort_by(|a, b| {
b["relevance"]
.as_f64()
.unwrap_or(0.0)
.partial_cmp(&a["relevance"].as_f64().unwrap_or(0.0))
.unwrap_or(std::cmp::Ordering::Equal)
});
citations.truncate(5);
Ok(ToolCallResult::json(&json!({
"element": p.element,
"citations": citations,
"cited": !citations.is_empty(),
})))
}
pub fn definition_vision_contradict() -> ToolDefinition {
ToolDefinition {
name: "vision_contradict".to_string(),
description: Some("Find evidence that contradicts a visual claim".to_string()),
input_schema: json!({
"type": "object",
"required": ["claim"],
"properties": {
"claim": { "type": "string", "description": "The claim to find contradictions for" }
}
}),
}
}
pub async fn execute_vision_contradict(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
claim: String,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let claim_lower = p.claim.to_lowercase();
let negation_words = [
"not",
"no",
"never",
"without",
"missing",
"absent",
"hidden",
"invisible",
"disabled",
"removed",
"deleted",
];
let has_negation = negation_words.iter().any(|w| claim_lower.contains(w));
let mut contradictions: Vec<Value> = Vec::new();
for obs in &store.observations {
if let Some(desc) = &obs.metadata.description {
let desc_lower = desc.to_lowercase();
let overlap = word_overlap(&claim_lower, &desc_lower);
if overlap < 0.15 {
continue;
}
let desc_has_neg = negation_words.iter().any(|w| desc_lower.contains(w));
let is_contradictory = has_negation != desc_has_neg && overlap > 0.2;
if is_contradictory {
contradictions.push(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"description": desc,
"contradiction_type": "negation_mismatch",
"overlap": (overlap * 100.0).round() / 100.0,
}));
}
}
}
Ok(ToolCallResult::json(&json!({
"claim": p.claim,
"contradictions_found": contradictions.len(),
"contradictions": contradictions,
})))
}
pub fn definition_vision_hallucination_check() -> ToolDefinition {
ToolDefinition {
name: "vision_hallucination_check".to_string(),
description: Some("Check AI description for visual hallucinations".to_string()),
input_schema: json!({
"type": "object",
"required": ["ai_description"],
"properties": {
"ai_description": { "type": "string", "description": "The AI output describing visual state" }
}
}),
}
}
pub async fn execute_vision_hallucination_check(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
ai_description: String,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let claims: Vec<&str> = p
.ai_description
.split(['.', '\n'])
.map(|s| s.trim())
.filter(|s| s.len() > 5)
.collect();
let mut hallucinations: Vec<Value> = Vec::new();
let mut verified: Vec<Value> = Vec::new();
let total = claims.len();
for claim in &claims {
let claim_lower = claim.to_lowercase();
let mut best_score: f64 = 0.0;
for obs in &store.observations {
let mut score: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
score = word_overlap(&claim_lower, &desc.to_lowercase());
}
for label in &obs.metadata.labels {
if claim_lower.contains(&label.to_lowercase()) {
score += 0.2;
break;
}
}
if score > best_score {
best_score = score;
}
}
if best_score > 0.3 {
verified.push(json!({
"claim": claim,
"confidence": (best_score.min(1.0) * 100.0).round() / 100.0,
}));
} else if best_score < 0.1 && !store.observations.is_empty() {
let severity = if claim_lower.contains("click")
|| claim_lower.contains("submit")
|| claim_lower.contains("button")
|| claim_lower.contains("error")
{
"severe"
} else {
"moderate"
};
hallucinations.push(json!({
"claim": claim,
"hallucination_type": "unverifiable",
"severity": severity,
"reason": "No matching visual evidence found",
}));
}
}
let score = if total == 0 {
0.0
} else {
hallucinations.len() as f64 / total as f64
};
Ok(ToolCallResult::json(&json!({
"ai_description": p.ai_description,
"total_claims": total,
"verified_count": verified.len(),
"hallucination_count": hallucinations.len(),
"hallucination_score": (score.min(1.0) * 100.0).round() / 100.0,
"trustworthy": score < 0.3,
"verified_claims": verified,
"hallucinations": hallucinations,
})))
}
pub fn definition_vision_hallucination_fix() -> ToolDefinition {
ToolDefinition {
name: "vision_hallucination_fix".to_string(),
description: Some("Suggest corrections for visual hallucinations".to_string()),
input_schema: json!({
"type": "object",
"required": ["claim"],
"properties": {
"claim": { "type": "string", "description": "The hallucinated claim to fix" }
}
}),
}
}
pub async fn execute_vision_hallucination_fix(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
claim: String,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let claim_lower = p.claim.to_lowercase();
let mut suggestions: Vec<Value> = Vec::new();
for obs in &store.observations {
if let Some(desc) = &obs.metadata.description {
let overlap = word_overlap(&claim_lower, &desc.to_lowercase());
if overlap > 0.1 {
suggestions.push(json!({
"capture_id": obs.id,
"actual_description": desc,
"similarity": (overlap * 100.0).round() / 100.0,
"suggestion": format!("Replace with description from capture {}", obs.id),
}));
}
}
}
suggestions.sort_by(|a, b| {
b["similarity"]
.as_f64()
.unwrap_or(0.0)
.partial_cmp(&a["similarity"].as_f64().unwrap_or(0.0))
.unwrap_or(std::cmp::Ordering::Equal)
});
suggestions.truncate(5);
Ok(ToolCallResult::json(&json!({
"claim": p.claim,
"suggestions": suggestions,
"fix_available": !suggestions.is_empty(),
})))
}
pub fn definition_vision_truth_check() -> ToolDefinition {
ToolDefinition {
name: "vision_truth_check".to_string(),
description: Some("Check if a historical visual claim is still true".to_string()),
input_schema: json!({
"type": "object",
"required": ["claim"],
"properties": {
"claim": { "type": "string", "description": "The visual truth to check" },
"established_at": { "type": "number", "description": "When the truth was established (epoch)" }
}
}),
}
}
pub async fn execute_vision_truth_check(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
claim: String,
established_at: Option<u64>,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let now = now_epoch();
let claim_lower = p.claim.to_lowercase();
let mut old_evidence: Option<Value> = None;
let mut new_evidence: Option<Value> = None;
let mut old_score: f64 = 0.0;
let mut new_score: f64 = 0.0;
for obs in &store.observations {
let mut score: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
score = word_overlap(&claim_lower, &desc.to_lowercase());
}
for label in &obs.metadata.labels {
if claim_lower.contains(&label.to_lowercase()) {
score += 0.2;
break;
}
}
if score < 0.15 {
continue;
}
if let Some(est) = p.established_at {
let dist = obs.timestamp.abs_diff(est);
if dist < 3600 && score > old_score {
old_score = score;
old_evidence = Some(json!({
"capture_id": obs.id, "timestamp": obs.timestamp, "score": score
}));
}
}
let age = now.saturating_sub(obs.timestamp);
if age < 3600 && score > new_score {
new_score = score;
new_evidence = Some(json!({
"capture_id": obs.id, "timestamp": obs.timestamp, "score": score
}));
}
}
let status = if new_score > 0.4 {
"valid"
} else if new_evidence.is_some() || old_evidence.is_some() {
"stale"
} else {
"unknown"
};
Ok(ToolCallResult::json(&json!({
"claim": p.claim,
"status": status,
"established_evidence": old_evidence,
"current_evidence": new_evidence,
"last_verified": now,
})))
}
pub fn definition_vision_truth_refresh() -> ToolDefinition {
ToolDefinition {
name: "vision_truth_refresh".to_string(),
description: Some("Re-verify all maintained visual truths".to_string()),
input_schema: json!({
"type": "object",
"properties": {
"max_age_seconds": { "type": "number", "description": "Truths older than this need refresh", "default": 3600 }
}
}),
}
}
pub async fn execute_vision_truth_refresh(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
#[serde(default = "default_3600")]
max_age_seconds: u64,
}
fn default_3600() -> u64 {
3600
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let now = now_epoch();
let total = store.observations.len();
let mut fresh = 0usize;
let mut stale = 0usize;
for obs in &store.observations {
let age = now.saturating_sub(obs.timestamp);
if age <= p.max_age_seconds {
fresh += 1;
} else {
stale += 1;
}
}
Ok(ToolCallResult::json(&json!({
"total_observations": total,
"fresh": fresh,
"stale": stale,
"max_age_seconds": p.max_age_seconds,
"refresh_needed": stale > 0,
"freshness_ratio": if total == 0 { 1.0 } else { fresh as f64 / total as f64 },
})))
}
pub fn definition_vision_truth_history() -> ToolDefinition {
ToolDefinition {
name: "vision_truth_history".to_string(),
description: Some("Get history of a visual truth over time".to_string()),
input_schema: json!({
"type": "object",
"required": ["subject"],
"properties": {
"subject": { "type": "string", "description": "The visual subject to track" }
}
}),
}
}
pub async fn execute_vision_truth_history(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
subject: String,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let subject_lower = p.subject.to_lowercase();
let mut history: Vec<Value> = Vec::new();
for obs in &store.observations {
let mut score: f64 = 0.0;
if let Some(desc) = &obs.metadata.description {
score = word_overlap(&subject_lower, &desc.to_lowercase());
}
for label in &obs.metadata.labels {
if subject_lower.contains(&label.to_lowercase()) {
score += 0.2;
break;
}
}
if score > 0.15 {
history.push(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"session_id": obs.session_id,
"description": obs.metadata.description,
"relevance": (score * 100.0).round() / 100.0,
}));
}
}
history.sort_by(|a, b| {
a["timestamp"]
.as_u64()
.unwrap_or(0)
.cmp(&b["timestamp"].as_u64().unwrap_or(0))
});
Ok(ToolCallResult::json(&json!({
"subject": p.subject,
"history_count": history.len(),
"history": history,
})))
}
pub fn definition_vision_compare_contexts() -> ToolDefinition {
ToolDefinition {
name: "vision_compare_contexts".to_string(),
description: Some("Compare captures across different visual contexts".to_string()),
input_schema: json!({
"type": "object",
"required": ["capture_ids"],
"properties": {
"capture_ids": { "type": "array", "items": { "type": "number" }, "description": "Capture IDs to compare" },
"context_type": { "type": "string", "description": "Type of context comparison" }
}
}),
}
}
pub async fn execute_vision_compare_contexts(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
capture_ids: Vec<u64>,
context_type: Option<String>,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
if p.capture_ids.len() < 2 {
return Err(McpError::InvalidParams(
"Need at least 2 capture IDs".to_string(),
));
}
let session = session.lock().await;
let store = session.store();
let mut contexts: Vec<Value> = Vec::new();
let mut all_labels: Vec<Vec<String>> = Vec::new();
for &cid in &p.capture_ids {
if let Some(obs) = store.observations.iter().find(|o| o.id == cid) {
contexts.push(json!({
"capture_id": obs.id,
"timestamp": obs.timestamp,
"description": obs.metadata.description,
"labels": obs.metadata.labels,
"session_id": obs.session_id,
}));
all_labels.push(obs.metadata.labels.clone());
}
}
let mut similarity: f64 = 0.0;
let mut shared_labels: Vec<String> = Vec::new();
let mut unique_labels: Vec<Vec<String>> = Vec::new();
if all_labels.len() >= 2 {
let first: std::collections::HashSet<String> = all_labels[0].iter().cloned().collect();
let second: std::collections::HashSet<String> = all_labels[1].iter().cloned().collect();
let shared: Vec<String> = first.intersection(&second).cloned().collect();
let total = first.union(&second).count();
similarity = if total == 0 {
0.0
} else {
shared.len() as f64 / total as f64
};
shared_labels = shared;
unique_labels = all_labels
.iter()
.enumerate()
.map(|(i, labels)| {
let other_idx = if i == 0 { 1 } else { 0 };
let other: std::collections::HashSet<&String> =
all_labels[other_idx].iter().collect();
labels
.iter()
.filter(|l| !other.contains(l))
.cloned()
.collect()
})
.collect();
}
Ok(ToolCallResult::json(&json!({
"contexts": contexts,
"context_type": p.context_type.unwrap_or_else(|| "general".to_string()),
"similarity_score": (similarity * 100.0).round() / 100.0,
"shared_labels": shared_labels,
"unique_per_context": unique_labels,
"found": contexts.len(),
"requested": p.capture_ids.len(),
})))
}
pub fn definition_vision_compare_sites() -> ToolDefinition {
ToolDefinition {
name: "vision_compare_sites".to_string(),
description: Some("Compare captures from two different websites".to_string()),
input_schema: json!({
"type": "object",
"required": ["capture_a", "capture_b"],
"properties": {
"capture_a": { "type": "number", "description": "First site capture ID" },
"capture_b": { "type": "number", "description": "Second site capture ID" },
"element": { "type": "string", "description": "Specific element to compare" }
}
}),
}
}
pub async fn execute_vision_compare_sites(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
capture_a: u64,
capture_b: u64,
element: Option<String>,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let obs_a = store.observations.iter().find(|o| o.id == p.capture_a);
let obs_b = store.observations.iter().find(|o| o.id == p.capture_b);
let (info_a, info_b) = match (obs_a, obs_b) {
(Some(a), Some(b)) => (
json!({"capture_id": a.id, "description": a.metadata.description, "labels": a.metadata.labels}),
json!({"capture_id": b.id, "description": b.metadata.description, "labels": b.metadata.labels}),
),
_ => {
return Ok(ToolCallResult::json(&json!({
"error": "One or both captures not found"
})))
}
};
let desc_a = obs_a
.and_then(|o| o.metadata.description.as_deref())
.unwrap_or("");
let desc_b = obs_b
.and_then(|o| o.metadata.description.as_deref())
.unwrap_or("");
let similarity = word_overlap(desc_a, desc_b);
Ok(ToolCallResult::json(&json!({
"site_a": info_a,
"site_b": info_b,
"element_focus": p.element,
"similarity": (similarity * 100.0).round() / 100.0,
"context_type": "different_site",
})))
}
pub fn definition_vision_compare_versions() -> ToolDefinition {
ToolDefinition {
name: "vision_compare_versions".to_string(),
description: Some("Compare two versions of the same site".to_string()),
input_schema: json!({
"type": "object",
"required": ["capture_old", "capture_new"],
"properties": {
"capture_old": { "type": "number", "description": "Old version capture ID" },
"capture_new": { "type": "number", "description": "New version capture ID" }
}
}),
}
}
pub async fn execute_vision_compare_versions(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct P {
capture_old: u64,
capture_new: u64,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let obs_old = store.observations.iter().find(|o| o.id == p.capture_old);
let obs_new = store.observations.iter().find(|o| o.id == p.capture_new);
let (info_old, info_new) = match (obs_old, obs_new) {
(Some(a), Some(b)) => (
json!({"capture_id": a.id, "timestamp": a.timestamp, "description": a.metadata.description, "labels": a.metadata.labels}),
json!({"capture_id": b.id, "timestamp": b.timestamp, "description": b.metadata.description, "labels": b.metadata.labels}),
),
_ => {
return Ok(ToolCallResult::json(
&json!({"error": "One or both captures not found"}),
))
}
};
let desc_old = obs_old
.and_then(|o| o.metadata.description.as_deref())
.unwrap_or("");
let desc_new = obs_new
.and_then(|o| o.metadata.description.as_deref())
.unwrap_or("");
let similarity = word_overlap(desc_old, desc_new);
let labels_old: std::collections::HashSet<String> = obs_old
.map(|o| o.metadata.labels.iter().cloned().collect())
.unwrap_or_default();
let labels_new: std::collections::HashSet<String> = obs_new
.map(|o| o.metadata.labels.iter().cloned().collect())
.unwrap_or_default();
let added: Vec<&String> = labels_new.difference(&labels_old).collect();
let removed: Vec<&String> = labels_old.difference(&labels_new).collect();
Ok(ToolCallResult::json(&json!({
"old_version": info_old,
"new_version": info_new,
"similarity": (similarity * 100.0).round() / 100.0,
"labels_added": added,
"labels_removed": removed,
"context_type": "different_version",
})))
}
pub fn definition_vision_compare_devices() -> ToolDefinition {
ToolDefinition {
name: "vision_compare_devices".to_string(),
description: Some("Compare same page on different devices".to_string()),
input_schema: json!({
"type": "object",
"required": ["captures"],
"properties": {
"captures": {
"type": "array",
"items": {
"type": "object",
"properties": {
"capture_id": { "type": "number" },
"device": { "type": "string" }
}
},
"description": "Captures with device labels"
}
}
}),
}
}
pub async fn execute_vision_compare_devices(
args: Value,
session: &Arc<Mutex<VisionSessionManager>>,
) -> McpResult<ToolCallResult> {
#[derive(Deserialize)]
struct CaptureDevice {
capture_id: u64,
device: Option<String>,
}
#[derive(Deserialize)]
struct P {
captures: Vec<CaptureDevice>,
}
let p: P = serde_json::from_value(args).map_err(|e| McpError::InvalidParams(e.to_string()))?;
let session = session.lock().await;
let store = session.store();
let mut devices: Vec<Value> = Vec::new();
for cd in &p.captures {
if let Some(obs) = store.observations.iter().find(|o| o.id == cd.capture_id) {
devices.push(json!({
"capture_id": obs.id,
"device": cd.device.as_deref().unwrap_or("unknown"),
"timestamp": obs.timestamp,
"description": obs.metadata.description,
"labels": obs.metadata.labels,
"dimensions": {
"width": obs.metadata.width,
"height": obs.metadata.height,
},
}));
}
}
Ok(ToolCallResult::json(&json!({
"devices": devices,
"device_count": devices.len(),
"context_type": "different_device",
})))
}