1use crate::protocol::{JsonRpcError, JsonRpcResponse};
7use codeprism_core::{
8 resilience::CircuitBreakerConfig, CircuitState, Error as CoreError, ErrorContext,
9 ErrorSeverity, HealthMonitor, MetricsCollector, PerformanceMonitor, RecoveryStrategy,
10 ResilienceManager, RetryConfig,
11};
12use std::sync::Arc;
13use tokio::sync::RwLock;
14use tracing::{debug, error, info, warn};
15
16#[derive(Debug, Clone, thiserror::Error)]
18pub enum McpError {
19 #[error("Core error: {0}")]
21 Core(#[from] CoreError),
22
23 #[error("Protocol error: {0}")]
25 Protocol(String),
26
27 #[error("Tool execution error: {tool_name}: {message}")]
29 ToolExecution {
30 tool_name: String,
31 message: String,
32 context: Option<ErrorContext>,
33 },
34
35 #[error("Resource error: {resource_type}: {message}")]
37 Resource {
38 resource_type: String,
39 message: String,
40 },
41
42 #[error("Prompt error: {prompt_name}: {message}")]
44 Prompt {
45 prompt_name: String,
46 message: String,
47 },
48
49 #[error("Operation cancelled: {operation}")]
51 Cancelled {
52 operation: String,
53 reason: Option<String>,
54 },
55
56 #[error("Operation timed out: {operation} (timeout: {timeout_ms}ms)")]
58 Timeout { operation: String, timeout_ms: u64 },
59
60 #[error("Rate limit exceeded for operation: {operation}")]
62 RateLimit {
63 operation: String,
64 retry_after_ms: u64,
65 },
66}
67
68impl McpError {
69 pub fn severity(&self) -> ErrorSeverity {
71 match self {
72 Self::Core(core_error) => core_error.severity(),
73 Self::Protocol(_) => ErrorSeverity::Error,
74 Self::ToolExecution { .. } => ErrorSeverity::Warning,
75 Self::Resource { .. } => ErrorSeverity::Error,
76 Self::Prompt { .. } => ErrorSeverity::Warning,
77 Self::Cancelled { .. } => ErrorSeverity::Info,
78 Self::Timeout { .. } => ErrorSeverity::Warning,
79 Self::RateLimit { .. } => ErrorSeverity::Warning,
80 }
81 }
82
83 pub fn recovery_strategy(&self) -> RecoveryStrategy {
85 match self {
86 Self::Core(core_error) => core_error.recovery_strategy(),
87 Self::Protocol(_) => RecoveryStrategy::UserIntervention,
88 Self::ToolExecution { .. } => RecoveryStrategy::Fallback,
89 Self::Resource { .. } => RecoveryStrategy::Retry,
90 Self::Prompt { .. } => RecoveryStrategy::Fallback,
91 Self::Cancelled { .. } => RecoveryStrategy::UserIntervention,
92 Self::Timeout { .. } => RecoveryStrategy::Retry,
93 Self::RateLimit { .. } => RecoveryStrategy::Retry,
94 }
95 }
96
97 pub fn should_retry(&self) -> bool {
99 matches!(self.recovery_strategy(), RecoveryStrategy::Retry)
100 }
101
102 pub fn json_rpc_code(&self) -> i32 {
104 match self {
105 Self::Core(core_error) => core_error.error_code(),
106 Self::Protocol(_) => JsonRpcError::INVALID_REQUEST,
107 Self::ToolExecution { .. } => -32100, Self::Resource { .. } => -32101, Self::Prompt { .. } => -32102, Self::Cancelled { .. } => -32015, Self::Timeout { .. } => -32012, Self::RateLimit { .. } => -32016, }
114 }
115
116 pub fn error_type_name(&self) -> &'static str {
118 match self {
119 Self::Core(_) => "Core",
120 Self::Protocol(_) => "Protocol",
121 Self::ToolExecution { .. } => "ToolExecution",
122 Self::Resource { .. } => "Resource",
123 Self::Prompt { .. } => "Prompt",
124 Self::Cancelled { .. } => "Cancelled",
125 Self::Timeout { .. } => "Timeout",
126 Self::RateLimit { .. } => "RateLimit",
127 }
128 }
129
130 pub fn to_json_rpc_error(&self) -> JsonRpcError {
132 JsonRpcError::new(
133 self.json_rpc_code(),
134 self.to_string(),
135 Some(serde_json::json!({
136 "severity": format!("{:?}", self.severity()),
137 "recovery_strategy": format!("{:?}", self.recovery_strategy()),
138 "error_type": self.error_type_name(),
139 })),
140 )
141 }
142}
143
144pub type McpResult<T> = Result<T, McpError>;
146
147pub struct McpErrorHandler {
149 metrics_collector: MetricsCollector,
150 health_monitor: HealthMonitor,
151 #[allow(dead_code)] performance_monitor: PerformanceMonitor,
153 resilience_manager: ResilienceManager,
154 circuit_states: Arc<RwLock<std::collections::HashMap<String, CircuitState>>>,
155}
156
157impl McpErrorHandler {
158 pub fn new() -> Self {
160 let metrics_collector = MetricsCollector::new();
161 let health_monitor = HealthMonitor::new(metrics_collector.clone());
162 let performance_monitor = PerformanceMonitor::new(metrics_collector.clone());
163
164 let retry_config = RetryConfig::new(3, std::time::Duration::from_millis(100))
165 .with_max_delay(std::time::Duration::from_secs(5))
166 .with_backoff_multiplier(2.0)
167 .with_jitter(true);
168
169 let circuit_config = CircuitBreakerConfig {
170 failure_threshold: 5,
171 success_threshold: 3,
172 recovery_timeout: std::time::Duration::from_secs(30),
173 time_window: std::time::Duration::from_secs(60),
174 };
175
176 let resilience_manager = ResilienceManager::new(retry_config, circuit_config);
177
178 Self {
179 metrics_collector,
180 health_monitor,
181 performance_monitor,
182 resilience_manager,
183 circuit_states: Arc::new(RwLock::new(std::collections::HashMap::new())),
184 }
185 }
186
187 pub async fn handle_error(&self, error: &McpError, operation: Option<&str>) {
189 let core_error = match error {
191 McpError::Core(e) => e.clone(),
192 _ => CoreError::other(error.to_string()),
193 };
194 self.metrics_collector.record_error(&core_error, operation);
195
196 if matches!(
198 error.severity(),
199 ErrorSeverity::Error | ErrorSeverity::Critical
200 ) {
201 if let Some(op) = operation {
202 let mut states = self.circuit_states.write().await;
203 let current_state = self.resilience_manager.circuit_state();
204 states.insert(op.to_string(), current_state.clone());
205 self.health_monitor.update_circuit_state(op, current_state);
206 }
207 }
208
209 match error.severity() {
211 ErrorSeverity::Info => info!(
212 error = %error,
213 operation = operation,
214 severity = ?error.severity(),
215 "Informational error"
216 ),
217 ErrorSeverity::Warning => warn!(
218 error = %error,
219 operation = operation,
220 severity = ?error.severity(),
221 recovery_strategy = ?error.recovery_strategy(),
222 "Warning: recoverable error"
223 ),
224 ErrorSeverity::Error => error!(
225 error = %error,
226 operation = operation,
227 severity = ?error.severity(),
228 recovery_strategy = ?error.recovery_strategy(),
229 "Error: significant issue encountered"
230 ),
231 ErrorSeverity::Critical => {
232 error!(
233 error = %error,
234 operation = operation,
235 severity = ?error.severity(),
236 recovery_strategy = ?error.recovery_strategy(),
237 "CRITICAL: system stability at risk"
238 );
239
240 self.trigger_critical_alert(error, operation).await;
242 }
243 }
244 }
245
246 pub async fn execute_with_recovery<F, Fut, T>(
248 &self,
249 operation_name: &str,
250 operation: F,
251 ) -> McpResult<T>
252 where
253 F: Fn() -> Fut + Clone,
254 Fut: std::future::Future<Output = McpResult<T>>,
255 {
256 let resilience_result = self
258 .resilience_manager
259 .execute(|| {
260 let op = operation.clone();
261 async move {
262 match op().await {
263 Ok(value) => Ok(value),
264 Err(mcp_error) => {
265 let core_error = match &mcp_error {
266 McpError::Core(e) => e.clone(),
267 _ => CoreError::other(mcp_error.to_string()),
268 };
269 Err(core_error)
270 }
271 }
272 }
273 })
274 .await;
275
276 let result = match resilience_result {
278 Ok(value) => {
279 self.metrics_collector
281 .record_success(operation_name, std::time::Duration::from_millis(0));
282 Ok(value)
283 }
284 Err(core_error) => {
285 let mcp_error = McpError::Core(core_error);
286 Err(mcp_error)
287 }
288 };
289
290 match &result {
291 Ok(_) => {
292 debug!(
293 operation = operation_name,
294 "Operation completed successfully"
295 );
296 }
297 Err(error) => {
298 self.handle_error(error, Some(operation_name)).await;
299 }
300 }
301
302 result
303 }
304
305 pub async fn execute_with_fallback<F, Fut, T, FB, FutB>(
307 &self,
308 operation_name: &str,
309 operation: F,
310 fallback: FB,
311 ) -> T
312 where
313 F: Fn() -> Fut + Clone,
314 Fut: std::future::Future<Output = McpResult<T>>,
315 FB: Fn() -> FutB,
316 FutB: std::future::Future<Output = T>,
317 {
318 match self.execute_with_recovery(operation_name, operation).await {
319 Ok(result) => result,
320 Err(error) => {
321 warn!(
322 operation = operation_name,
323 error = %error,
324 "Operation failed, using fallback"
325 );
326 fallback().await
327 }
328 }
329 }
330
331 pub fn get_health_status(&self) -> codeprism_core::HealthCheckResult {
333 self.health_monitor.health_check()
334 }
335
336 pub fn get_metrics(&self) -> codeprism_core::MetricsSnapshot {
338 self.metrics_collector.get_metrics_snapshot()
339 }
340
341 pub fn is_healthy(&self) -> bool {
343 self.resilience_manager.is_healthy()
344 }
345
346 pub fn error_to_response(
348 &self,
349 error: &McpError,
350 request_id: serde_json::Value,
351 ) -> JsonRpcResponse {
352 JsonRpcResponse {
353 jsonrpc: "2.0".to_string(),
354 id: request_id,
355 result: None,
356 error: Some(error.to_json_rpc_error()),
357 }
358 }
359
360 pub async fn handle_partial_operation<T>(
362 &self,
363 operation_name: &str,
364 total_items: usize,
365 processed_items: usize,
366 error: &McpError,
367 ) -> McpResult<Option<T>> {
368 let completion_rate = (processed_items as f64 / total_items as f64) * 100.0;
369
370 match error.recovery_strategy() {
371 RecoveryStrategy::Degrade => {
372 if completion_rate >= 80.0 {
373 warn!(
374 operation = operation_name,
375 completion_rate = completion_rate,
376 error = %error,
377 "Operation completed with degraded results"
378 );
379 Ok(None) } else {
381 error!(
382 operation = operation_name,
383 completion_rate = completion_rate,
384 error = %error,
385 "Operation failed with insufficient completion rate"
386 );
387 Err(error.clone())
388 }
389 }
390 _ => Err(error.clone()),
391 }
392 }
393
394 async fn trigger_critical_alert(&self, error: &McpError, operation: Option<&str>) {
396 error!(
402 alert_type = "CRITICAL_ERROR",
403 error = %error,
404 operation = operation,
405 timestamp = %chrono::Utc::now(),
406 "CRITICAL ALERT: Manual intervention required"
407 );
408
409 }
412
413 pub fn create_context(
415 &self,
416 request_id: Option<String>,
417 operation: Option<String>,
418 ) -> ErrorContext {
419 let mut context = ErrorContext::new();
420
421 if let Some(id) = request_id {
422 context = context.with_request_id(id);
423 }
424
425 if let Some(op) = operation {
426 context = context.with_operation(op);
427 }
428
429 let health = self.get_health_status();
431 context = context.with_metadata(
432 "system_health".to_string(),
433 serde_json::to_value(health.status).unwrap_or_default(),
434 );
435
436 context
437 }
438}
439
440impl Default for McpErrorHandler {
441 fn default() -> Self {
442 Self::new()
443 }
444}
445
446#[macro_export]
448macro_rules! mcp_try {
449 ($expr:expr, $handler:expr, $operation:expr) => {
450 match $expr {
451 Ok(value) => value,
452 Err(error) => {
453 let mcp_error = McpError::Core(error);
454 $handler.handle_error(&mcp_error, Some($operation)).await;
455 return Err(mcp_error);
456 }
457 }
458 };
459}
460
461#[macro_export]
462macro_rules! mcp_tool_error {
463 ($tool_name:expr, $message:expr) => {
464 McpError::ToolExecution {
465 tool_name: $tool_name.to_string(),
466 message: $message.to_string(),
467 context: None,
468 }
469 };
470 ($tool_name:expr, $message:expr, $context:expr) => {
471 McpError::ToolExecution {
472 tool_name: $tool_name.to_string(),
473 message: $message.to_string(),
474 context: Some($context),
475 }
476 };
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482
483 #[test]
484 fn test_mcp_error_severity() {
485 let error = McpError::Protocol("test error".to_string());
486 assert_eq!(error.severity(), ErrorSeverity::Error);
487
488 let error = McpError::Cancelled {
489 operation: "test_op".to_string(),
490 reason: None,
491 };
492 assert_eq!(error.severity(), ErrorSeverity::Info);
493 }
494
495 #[test]
496 fn test_mcp_error_json_rpc_conversion() {
497 let error = McpError::ToolExecution {
498 tool_name: "test_tool".to_string(),
499 message: "test error".to_string(),
500 context: None,
501 };
502
503 let json_rpc_error = error.to_json_rpc_error();
504 assert_eq!(json_rpc_error.code, -32100);
505 assert!(json_rpc_error.message.contains("test error"));
506 }
507
508 #[tokio::test]
509 async fn test_error_handler_creation() {
510 let handler = McpErrorHandler::new();
511 assert!(handler.is_healthy());
512 }
513
514 #[tokio::test]
515 async fn test_execute_with_recovery_success() {
516 let handler = McpErrorHandler::new();
517
518 let result = handler
519 .execute_with_recovery("test_op", || async { Ok::<i32, McpError>(42) })
520 .await;
521
522 assert!(result.is_ok());
523 assert_eq!(result.unwrap(), 42);
524 }
525
526 #[tokio::test]
527 async fn test_execute_with_recovery_failure() {
528 let handler = McpErrorHandler::new();
529
530 let result = handler
531 .execute_with_recovery("test_op", || async {
532 Err::<i32, McpError>(McpError::Protocol("test error".to_string()))
533 })
534 .await;
535
536 assert!(result.is_err());
537 }
538
539 #[tokio::test]
540 async fn test_execute_with_fallback() {
541 let handler = McpErrorHandler::new();
542
543 let result = handler
544 .execute_with_fallback(
545 "test_op",
546 || async { Err::<i32, McpError>(McpError::Protocol("test error".to_string())) },
547 || async { 100 },
548 )
549 .await;
550
551 assert_eq!(result, 100);
552 }
553
554 #[tokio::test]
555 async fn test_error_handling_and_metrics() {
556 let handler = McpErrorHandler::new();
557
558 let error = McpError::ToolExecution {
559 tool_name: "test_tool".to_string(),
560 message: "test error".to_string(),
561 context: None,
562 };
563
564 handler.handle_error(&error, Some("test_operation")).await;
565
566 let metrics = handler.get_metrics();
567 assert!(metrics.uptime_seconds < 365 * 24 * 3600); }
570
571 #[test]
572 fn test_error_context_creation() {
573 let handler = McpErrorHandler::new();
574
575 let context = handler.create_context(
576 Some("req-123".to_string()),
577 Some("test_operation".to_string()),
578 );
579
580 assert_eq!(context.request_id, Some("req-123".to_string()));
581 assert_eq!(context.operation, Some("test_operation".to_string()));
582 assert!(!context.metadata.is_empty());
583 }
584
585 #[tokio::test]
586 async fn test_partial_operation_handling() {
587 let handler = McpErrorHandler::new();
588
589 let error = McpError::Core(CoreError::indexing("partial failure"));
591
592 let result = handler
594 .handle_partial_operation::<()>("test_op", 100, 85, &error)
595 .await;
596 assert!(result.is_err());
599
600 let result = handler
602 .handle_partial_operation::<()>("test_op", 100, 50, &error)
603 .await;
604 assert!(result.is_err());
605 }
606}