1use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use rustant_core::error::ToolError;
6use rustant_core::types::{RiskLevel, ToolOutput};
7use serde::{Deserialize, Serialize};
8use serde_json::{Value, json};
9use std::collections::{HashSet, VecDeque};
10use std::path::PathBuf;
11use std::time::Duration;
12
13use crate::registry::Tool;
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17enum ServiceType {
18 Api,
19 Database,
20 Cache,
21 Queue,
22 Frontend,
23 Worker,
24 Gateway,
25}
26
27impl ServiceType {
28 fn from_str(s: &str) -> Option<Self> {
29 match s {
30 "api" => Some(Self::Api),
31 "database" => Some(Self::Database),
32 "cache" => Some(Self::Cache),
33 "queue" => Some(Self::Queue),
34 "frontend" => Some(Self::Frontend),
35 "worker" => Some(Self::Worker),
36 "gateway" => Some(Self::Gateway),
37 _ => None,
38 }
39 }
40}
41
42impl std::fmt::Display for ServiceType {
43 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 match self {
45 Self::Api => write!(f, "api"),
46 Self::Database => write!(f, "database"),
47 Self::Cache => write!(f, "cache"),
48 Self::Queue => write!(f, "queue"),
49 Self::Frontend => write!(f, "frontend"),
50 Self::Worker => write!(f, "worker"),
51 Self::Gateway => write!(f, "gateway"),
52 }
53 }
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
57#[serde(rename_all = "lowercase")]
58enum ServiceStatus {
59 Unknown,
60 Healthy,
61 Degraded,
62 Down,
63}
64
65impl ServiceStatus {
66 fn marker(&self) -> &'static str {
67 match self {
68 Self::Healthy => "\u{2713}", Self::Degraded => "\u{26a0}", Self::Down => "\u{2717}", Self::Unknown => "?",
72 }
73 }
74}
75
76impl std::fmt::Display for ServiceStatus {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 match self {
79 Self::Unknown => write!(f, "Unknown"),
80 Self::Healthy => write!(f, "Healthy"),
81 Self::Degraded => write!(f, "Degraded"),
82 Self::Down => write!(f, "Down"),
83 }
84 }
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
88struct Service {
89 id: usize,
90 name: String,
91 url: String,
92 service_type: ServiceType,
93 dependencies: Vec<usize>,
94 health_endpoint: Option<String>,
95 status: ServiceStatus,
96 last_checked: Option<DateTime<Utc>>,
97 response_time_ms: Option<u64>,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
101#[serde(rename_all = "lowercase")]
102enum IncidentSeverity {
103 Low,
104 Medium,
105 High,
106 Critical,
107}
108
109impl IncidentSeverity {
110 fn from_str(s: &str) -> Option<Self> {
111 match s {
112 "low" => Some(Self::Low),
113 "medium" => Some(Self::Medium),
114 "high" => Some(Self::High),
115 "critical" => Some(Self::Critical),
116 _ => None,
117 }
118 }
119}
120
121impl std::fmt::Display for IncidentSeverity {
122 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
123 match self {
124 Self::Low => write!(f, "Low"),
125 Self::Medium => write!(f, "Medium"),
126 Self::High => write!(f, "High"),
127 Self::Critical => write!(f, "Critical"),
128 }
129 }
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133#[serde(rename_all = "lowercase")]
134enum IncidentStatus {
135 Investigating,
136 Identified,
137 Monitoring,
138 Resolved,
139}
140
141impl std::fmt::Display for IncidentStatus {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 match self {
144 Self::Investigating => write!(f, "Investigating"),
145 Self::Identified => write!(f, "Identified"),
146 Self::Monitoring => write!(f, "Monitoring"),
147 Self::Resolved => write!(f, "Resolved"),
148 }
149 }
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize)]
153struct TimelineEntry {
154 timestamp: DateTime<Utc>,
155 message: String,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159struct Incident {
160 id: usize,
161 title: String,
162 severity: IncidentSeverity,
163 affected_services: Vec<usize>,
164 timeline: Vec<TimelineEntry>,
165 status: IncidentStatus,
166 root_cause: Option<String>,
167 resolution: Option<String>,
168 created_at: DateTime<Utc>,
169}
170
171#[derive(Debug, Default, Serialize, Deserialize)]
172struct MonitorState {
173 services: Vec<Service>,
174 incidents: Vec<Incident>,
175 next_service_id: usize,
176 next_incident_id: usize,
177}
178
179pub struct SystemMonitorTool {
180 workspace: PathBuf,
181}
182
183impl SystemMonitorTool {
184 pub fn new(workspace: PathBuf) -> Self {
185 Self { workspace }
186 }
187
188 fn state_path(&self) -> PathBuf {
189 self.workspace
190 .join(".rustant")
191 .join("monitoring")
192 .join("topology.json")
193 }
194
195 fn load_state(&self) -> MonitorState {
196 let path = self.state_path();
197 if path.exists() {
198 std::fs::read_to_string(&path)
199 .ok()
200 .and_then(|s| serde_json::from_str(&s).ok())
201 .unwrap_or_default()
202 } else {
203 MonitorState {
204 services: Vec::new(),
205 incidents: Vec::new(),
206 next_service_id: 1,
207 next_incident_id: 1,
208 }
209 }
210 }
211
212 fn save_state(&self, state: &MonitorState) -> Result<(), ToolError> {
213 let path = self.state_path();
214 if let Some(parent) = path.parent() {
215 std::fs::create_dir_all(parent).map_err(|e| ToolError::ExecutionFailed {
216 name: "system_monitor".to_string(),
217 message: format!("Failed to create state dir: {}", e),
218 })?;
219 }
220 let json = serde_json::to_string_pretty(state).map_err(|e| ToolError::ExecutionFailed {
221 name: "system_monitor".to_string(),
222 message: format!("Failed to serialize state: {}", e),
223 })?;
224 let tmp = path.with_extension("json.tmp");
225 std::fs::write(&tmp, &json).map_err(|e| ToolError::ExecutionFailed {
226 name: "system_monitor".to_string(),
227 message: format!("Failed to write state: {}", e),
228 })?;
229 std::fs::rename(&tmp, &path).map_err(|e| ToolError::ExecutionFailed {
230 name: "system_monitor".to_string(),
231 message: format!("Failed to rename state file: {}", e),
232 })?;
233 Ok(())
234 }
235
236 fn action_add_service(&self, args: &Value) -> Result<ToolOutput, ToolError> {
237 let name = args
238 .get("name")
239 .and_then(|v| v.as_str())
240 .unwrap_or("")
241 .trim();
242 if name.is_empty() {
243 return Ok(ToolOutput::text("Please provide a service name."));
244 }
245
246 let url = args
247 .get("url")
248 .and_then(|v| v.as_str())
249 .unwrap_or("")
250 .trim();
251 if url.is_empty() {
252 return Ok(ToolOutput::text("Please provide a service URL."));
253 }
254
255 let type_str = args
256 .get("service_type")
257 .and_then(|v| v.as_str())
258 .unwrap_or("");
259 let service_type = match ServiceType::from_str(type_str) {
260 Some(t) => t,
261 None => {
262 return Ok(ToolOutput::text(format!(
263 "Invalid service_type: '{}'. Use: api, database, cache, queue, frontend, worker, gateway",
264 type_str
265 )));
266 }
267 };
268
269 let dependencies: Vec<usize> = args
270 .get("dependencies")
271 .and_then(|v| v.as_array())
272 .map(|arr| {
273 arr.iter()
274 .filter_map(|v| v.as_u64().map(|n| n as usize))
275 .collect()
276 })
277 .unwrap_or_default();
278
279 let health_endpoint = args
280 .get("health_endpoint")
281 .and_then(|v| v.as_str())
282 .map(|s| s.to_string());
283
284 let mut state = self.load_state();
285 let id = state.next_service_id;
286 state.next_service_id += 1;
287
288 state.services.push(Service {
289 id,
290 name: name.to_string(),
291 url: url.to_string(),
292 service_type,
293 dependencies,
294 health_endpoint,
295 status: ServiceStatus::Unknown,
296 last_checked: None,
297 response_time_ms: None,
298 });
299
300 self.save_state(&state)?;
301 Ok(ToolOutput::text(format!(
302 "Added service '{}' (#{}) [{}] at {}",
303 name, id, type_str, url
304 )))
305 }
306
307 fn action_topology(&self) -> Result<ToolOutput, ToolError> {
308 let state = self.load_state();
309 if state.services.is_empty() {
310 return Ok(ToolOutput::text(
311 "No services registered. Use add_service to register services.",
312 ));
313 }
314
315 let mut output = String::from("Service Topology\n================\n\n");
316
317 for service in &state.services {
318 let marker = service.status.marker();
319 output.push_str(&format!(
320 "[{}] {} #{} ({}) - {}\n",
321 marker, service.name, service.id, service.service_type, service.url
322 ));
323 if !service.dependencies.is_empty() {
324 for dep_id in &service.dependencies {
325 if let Some(dep) = state.services.iter().find(|s| s.id == *dep_id) {
326 let dep_marker = dep.status.marker();
327 output
328 .push_str(&format!(" -> [{}] {} #{}\n", dep_marker, dep.name, dep.id));
329 } else {
330 output.push_str(&format!(" -> [?] unknown #{}\n", dep_id));
331 }
332 }
333 }
334 }
335
336 Ok(ToolOutput::text(output))
337 }
338
339 async fn action_health_check(&self, args: &Value) -> Result<ToolOutput, ToolError> {
340 let mut state = self.load_state();
341 if state.services.is_empty() {
342 return Ok(ToolOutput::text("No services to check."));
343 }
344
345 let target_id = args
346 .get("service_id")
347 .and_then(|v| v.as_u64())
348 .map(|n| n as usize);
349
350 let client = reqwest::Client::builder()
351 .timeout(Duration::from_secs(10))
352 .build()
353 .map_err(|e| ToolError::ExecutionFailed {
354 name: "system_monitor".to_string(),
355 message: format!("Failed to create HTTP client: {}", e),
356 })?;
357
358 let indices: Vec<usize> = if let Some(sid) = target_id {
359 match state.services.iter().position(|s| s.id == sid) {
360 Some(idx) => vec![idx],
361 None => {
362 return Ok(ToolOutput::text(format!("Service #{} not found.", sid)));
363 }
364 }
365 } else {
366 (0..state.services.len()).collect()
367 };
368
369 let mut results = Vec::new();
370
371 for idx in indices {
372 let service = &state.services[idx];
373 let endpoint = service.health_endpoint.as_deref().unwrap_or("/health");
374 let check_url = format!("{}{}", service.url.trim_end_matches('/'), endpoint);
375
376 let start = std::time::Instant::now();
377 let result = client.get(&check_url).send().await;
378 let elapsed_ms = start.elapsed().as_millis() as u64;
379
380 let (new_status, detail) = match result {
381 Ok(resp) => {
382 let status_code = resp.status().as_u16();
383 if (200..300).contains(&status_code) {
384 (
385 ServiceStatus::Healthy,
386 format!("HTTP {} ({}ms)", status_code, elapsed_ms),
387 )
388 } else {
389 (
390 ServiceStatus::Degraded,
391 format!("HTTP {} ({}ms)", status_code, elapsed_ms),
392 )
393 }
394 }
395 Err(e) => {
396 let msg = if e.is_timeout() {
397 "timeout".to_string()
398 } else if e.is_connect() {
399 "connection refused".to_string()
400 } else {
401 format!("{}", e)
402 };
403 (ServiceStatus::Down, msg)
404 }
405 };
406
407 let svc = &mut state.services[idx];
408 svc.status = new_status.clone();
409 svc.last_checked = Some(Utc::now());
410 svc.response_time_ms = Some(elapsed_ms);
411
412 results.push(format!(
413 " {} #{} ({}): {} - {}",
414 svc.name,
415 svc.id,
416 new_status.marker(),
417 new_status,
418 detail
419 ));
420 }
421
422 self.save_state(&state)?;
423
424 let mut output = String::from("Health Check Results\n====================\n");
425 for r in &results {
426 output.push_str(r);
427 output.push('\n');
428 }
429 Ok(ToolOutput::text(output))
430 }
431
432 fn action_log_incident(&self, args: &Value) -> Result<ToolOutput, ToolError> {
433 let title = args
434 .get("title")
435 .and_then(|v| v.as_str())
436 .unwrap_or("")
437 .trim();
438 if title.is_empty() {
439 return Ok(ToolOutput::text("Please provide an incident title."));
440 }
441
442 let severity_str = args.get("severity").and_then(|v| v.as_str()).unwrap_or("");
443 let severity = match IncidentSeverity::from_str(severity_str) {
444 Some(s) => s,
445 None => {
446 return Ok(ToolOutput::text(format!(
447 "Invalid severity: '{}'. Use: low, medium, high, critical",
448 severity_str
449 )));
450 }
451 };
452
453 let affected_services: Vec<usize> = args
454 .get("affected_services")
455 .and_then(|v| v.as_array())
456 .map(|arr| {
457 arr.iter()
458 .filter_map(|v| v.as_u64().map(|n| n as usize))
459 .collect()
460 })
461 .unwrap_or_default();
462
463 let message = args
464 .get("message")
465 .and_then(|v| v.as_str())
466 .unwrap_or("Incident created")
467 .to_string();
468
469 let now = Utc::now();
470 let mut state = self.load_state();
471 let id = state.next_incident_id;
472 state.next_incident_id += 1;
473
474 state.incidents.push(Incident {
475 id,
476 title: title.to_string(),
477 severity,
478 affected_services,
479 timeline: vec![TimelineEntry {
480 timestamp: now,
481 message,
482 }],
483 status: IncidentStatus::Investigating,
484 root_cause: None,
485 resolution: None,
486 created_at: now,
487 });
488
489 self.save_state(&state)?;
490 Ok(ToolOutput::text(format!(
491 "Incident #{} logged: '{}' [{}] - Status: Investigating",
492 id, title, severity_str
493 )))
494 }
495
496 fn action_correlate(&self) -> Result<ToolOutput, ToolError> {
497 let state = self.load_state();
498
499 let mut service_incident_count: std::collections::HashMap<usize, usize> =
500 std::collections::HashMap::new();
501 let mut severity_count: std::collections::HashMap<String, usize> =
502 std::collections::HashMap::new();
503
504 for incident in &state.incidents {
505 for sid in &incident.affected_services {
506 *service_incident_count.entry(*sid).or_insert(0) += 1;
507 }
508 *severity_count
509 .entry(format!("{}", incident.severity))
510 .or_insert(0) += 1;
511 }
512
513 let mut output =
514 String::from("Incident Correlation Analysis\n=============================\n\n");
515
516 output.push_str(&format!("Total incidents: {}\n\n", state.incidents.len()));
517
518 if !severity_count.is_empty() {
519 output.push_str("Severity breakdown:\n");
520 for (sev, count) in &severity_count {
521 output.push_str(&format!(" {}: {}\n", sev, count));
522 }
523 output.push('\n');
524 }
525
526 if !service_incident_count.is_empty() {
527 output.push_str("Services by incident frequency:\n");
528 let mut sorted: Vec<_> = service_incident_count.iter().collect();
529 sorted.sort_by(|a, b| b.1.cmp(a.1));
530 for (sid, count) in sorted {
531 let name = state
532 .services
533 .iter()
534 .find(|s| s.id == *sid)
535 .map(|s| s.name.as_str())
536 .unwrap_or("unknown");
537 output.push_str(&format!(" {} (#{}) - {} incidents\n", name, sid, count));
538 }
539 output.push('\n');
540 }
541
542 output.push_str("--- LLM Analysis Prompt ---\n");
543 output.push_str("Based on the above incident data, identify:\n");
544 output.push_str("1. Common failure patterns and root causes\n");
545 output.push_str("2. Services that are single points of failure\n");
546 output.push_str("3. Recommendations for improving reliability\n");
547
548 Ok(ToolOutput::text(output))
549 }
550
551 fn action_generate_runbook(&self, args: &Value) -> Result<ToolOutput, ToolError> {
552 let service_id = match args.get("service_id").and_then(|v| v.as_u64()) {
553 Some(id) => id as usize,
554 None => {
555 return Ok(ToolOutput::text("Please provide a service_id."));
556 }
557 };
558
559 let state = self.load_state();
560 let service = match state.services.iter().find(|s| s.id == service_id) {
561 Some(s) => s,
562 None => {
563 return Ok(ToolOutput::text(format!(
564 "Service #{} not found.",
565 service_id
566 )));
567 }
568 };
569
570 let dep_names: Vec<String> = service
571 .dependencies
572 .iter()
573 .filter_map(|did| {
574 state
575 .services
576 .iter()
577 .find(|s| s.id == *did)
578 .map(|s| format!("{} (#{})", s.name, s.id))
579 })
580 .collect();
581
582 let related_incidents: Vec<&Incident> = state
583 .incidents
584 .iter()
585 .filter(|i| i.affected_services.contains(&service_id))
586 .collect();
587
588 let mut output = String::from("--- Runbook Generation Prompt ---\n\n");
589 output.push_str(&format!("Service: {} (#{}))\n", service.name, service.id));
590 output.push_str(&format!("Type: {}\n", service.service_type));
591 output.push_str(&format!("URL: {}\n", service.url));
592 output.push_str(&format!("Status: {}\n", service.status));
593 if let Some(ref ep) = service.health_endpoint {
594 output.push_str(&format!("Health endpoint: {}\n", ep));
595 }
596
597 if !dep_names.is_empty() {
598 output.push_str(&format!("Dependencies: {}\n", dep_names.join(", ")));
599 }
600
601 if !related_incidents.is_empty() {
602 output.push_str(&format!(
603 "\nRecent incidents ({}): \n",
604 related_incidents.len()
605 ));
606 for inc in related_incidents.iter().rev().take(5) {
607 output.push_str(&format!(
608 " - #{} [{}] {} ({})\n",
609 inc.id, inc.severity, inc.title, inc.status
610 ));
611 }
612 }
613
614 output.push_str("\nPlease generate a runbook covering:\n");
615 output.push_str("1. Service overview and architecture\n");
616 output.push_str("2. Health check procedures\n");
617 output.push_str("3. Common failure modes and troubleshooting steps\n");
618 output.push_str("4. Escalation procedures\n");
619 output.push_str("5. Recovery procedures\n");
620
621 Ok(ToolOutput::text(output))
622 }
623
624 fn action_impact_analysis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
625 let service_id = match args.get("service_id").and_then(|v| v.as_u64()) {
626 Some(id) => id as usize,
627 None => {
628 return Ok(ToolOutput::text("Please provide a service_id."));
629 }
630 };
631
632 let state = self.load_state();
633
634 if !state.services.iter().any(|s| s.id == service_id) {
635 return Ok(ToolOutput::text(format!(
636 "Service #{} not found.",
637 service_id
638 )));
639 }
640
641 let mut impacted: Vec<usize> = Vec::new();
643 let mut visited: HashSet<usize> = HashSet::new();
644 let mut queue: VecDeque<usize> = VecDeque::new();
645
646 queue.push_back(service_id);
647 visited.insert(service_id);
648
649 while let Some(current_id) = queue.pop_front() {
650 for svc in &state.services {
652 if svc.dependencies.contains(¤t_id) && !visited.contains(&svc.id) {
653 visited.insert(svc.id);
654 impacted.push(svc.id);
655 queue.push_back(svc.id);
656 }
657 }
658 }
659
660 let source_name = state
661 .services
662 .iter()
663 .find(|s| s.id == service_id)
664 .map(|s| s.name.as_str())
665 .unwrap_or("unknown");
666
667 let mut output = String::from("Impact Analysis\n===============\n\n");
668 output.push_str(&format!(
669 "If '{}' (#{}) goes down:\n\n",
670 source_name, service_id
671 ));
672
673 if impacted.is_empty() {
674 output.push_str("No other services depend on this service.\n");
675 } else {
676 output.push_str(&format!(
677 "{} service(s) would be affected:\n",
678 impacted.len()
679 ));
680 for sid in &impacted {
681 if let Some(svc) = state.services.iter().find(|s| s.id == *sid) {
682 output.push_str(&format!(
683 " - {} (#{}) [{}]\n",
684 svc.name, svc.id, svc.service_type
685 ));
686 }
687 }
688 }
689
690 Ok(ToolOutput::text(output))
691 }
692
693 fn action_list_services(&self, args: &Value) -> Result<ToolOutput, ToolError> {
694 let state = self.load_state();
695 if state.services.is_empty() {
696 return Ok(ToolOutput::text(
697 "No services registered. Use add_service to register services.",
698 ));
699 }
700
701 let status_filter = args.get("status").and_then(|v| v.as_str());
702
703 let filtered: Vec<&Service> = state
704 .services
705 .iter()
706 .filter(|s| {
707 if let Some(filter) = status_filter {
708 match filter.to_lowercase().as_str() {
709 "healthy" => s.status == ServiceStatus::Healthy,
710 "degraded" => s.status == ServiceStatus::Degraded,
711 "down" => s.status == ServiceStatus::Down,
712 "unknown" => s.status == ServiceStatus::Unknown,
713 _ => true,
714 }
715 } else {
716 true
717 }
718 })
719 .collect();
720
721 if filtered.is_empty() {
722 return Ok(ToolOutput::text(format!(
723 "No services match the filter '{}'.",
724 status_filter.unwrap_or("all")
725 )));
726 }
727
728 let mut output = String::from("Services\n========\n");
729 for svc in &filtered {
730 let checked = svc
731 .last_checked
732 .map(|t| format!(" (last checked: {})", t.format("%Y-%m-%d %H:%M:%S UTC")))
733 .unwrap_or_default();
734 let rt = svc
735 .response_time_ms
736 .map(|ms| format!(" [{}ms]", ms))
737 .unwrap_or_default();
738 output.push_str(&format!(
739 " #{} {} [{}] ({}) - {}{}{}\n",
740 svc.id,
741 svc.name,
742 svc.status.marker(),
743 svc.service_type,
744 svc.status,
745 rt,
746 checked,
747 ));
748 }
749 output.push_str(&format!("\nTotal: {} service(s)\n", filtered.len()));
750
751 Ok(ToolOutput::text(output))
752 }
753}
754
755#[async_trait]
756impl Tool for SystemMonitorTool {
757 fn name(&self) -> &str {
758 "system_monitor"
759 }
760
761 fn description(&self) -> &str {
762 "Production system monitoring: service topology, health checks, incident tracking. Actions: add_service, topology, health_check, log_incident, correlate, generate_runbook, impact_analysis, list_services."
763 }
764
765 fn parameters_schema(&self) -> Value {
766 json!({
767 "type": "object",
768 "properties": {
769 "action": {
770 "type": "string",
771 "enum": ["add_service", "topology", "health_check", "log_incident", "correlate", "generate_runbook", "impact_analysis", "list_services"],
772 "description": "Action to perform"
773 },
774 "name": { "type": "string", "description": "Service name (for add_service)" },
775 "url": { "type": "string", "description": "Service URL (for add_service)" },
776 "service_type": {
777 "type": "string",
778 "enum": ["api", "database", "cache", "queue", "frontend", "worker", "gateway"],
779 "description": "Type of service (for add_service)"
780 },
781 "dependencies": {
782 "type": "array",
783 "items": { "type": "integer" },
784 "description": "Array of service IDs this service depends on (for add_service)"
785 },
786 "health_endpoint": { "type": "string", "description": "Health check endpoint path (default: /health)" },
787 "service_id": { "type": "integer", "description": "Target service ID (for health_check, generate_runbook, impact_analysis)" },
788 "title": { "type": "string", "description": "Incident title (for log_incident)" },
789 "severity": {
790 "type": "string",
791 "enum": ["low", "medium", "high", "critical"],
792 "description": "Incident severity (for log_incident)"
793 },
794 "affected_services": {
795 "type": "array",
796 "items": { "type": "integer" },
797 "description": "Array of affected service IDs (for log_incident)"
798 },
799 "message": { "type": "string", "description": "Incident timeline message (for log_incident)" },
800 "status": {
801 "type": "string",
802 "enum": ["healthy", "degraded", "down", "unknown"],
803 "description": "Filter by status (for list_services)"
804 }
805 },
806 "required": ["action"]
807 })
808 }
809
810 fn risk_level(&self) -> RiskLevel {
811 RiskLevel::Network
812 }
813
814 fn timeout(&self) -> Duration {
815 Duration::from_secs(60)
816 }
817
818 async fn execute(&self, args: Value) -> Result<ToolOutput, ToolError> {
819 let action = args.get("action").and_then(|v| v.as_str()).unwrap_or("");
820
821 match action {
822 "add_service" => self.action_add_service(&args),
823 "topology" => self.action_topology(),
824 "health_check" => self.action_health_check(&args).await,
825 "log_incident" => self.action_log_incident(&args),
826 "correlate" => self.action_correlate(),
827 "generate_runbook" => self.action_generate_runbook(&args),
828 "impact_analysis" => self.action_impact_analysis(&args),
829 "list_services" => self.action_list_services(&args),
830 _ => Ok(ToolOutput::text(format!(
831 "Unknown action: '{}'. Use: add_service, topology, health_check, log_incident, correlate, generate_runbook, impact_analysis, list_services",
832 action
833 ))),
834 }
835 }
836}
837
838#[cfg(test)]
839mod tests {
840 use super::*;
841 use tempfile::TempDir;
842
843 fn make_tool() -> (SystemMonitorTool, PathBuf) {
844 let dir = TempDir::new().unwrap();
845 let workspace = dir.path().canonicalize().unwrap();
846 let tool = SystemMonitorTool::new(workspace.clone());
847 std::mem::forget(dir);
849 (tool, workspace)
850 }
851
852 #[test]
853 fn test_tool_properties() {
854 let (tool, _ws) = make_tool();
855 assert_eq!(tool.name(), "system_monitor");
856 assert!(tool.description().contains("service topology"));
857 assert_eq!(tool.risk_level(), RiskLevel::Network);
858 assert_eq!(tool.timeout(), Duration::from_secs(60));
859 }
860
861 #[test]
862 fn test_schema_validation() {
863 let (tool, _ws) = make_tool();
864 let schema = tool.parameters_schema();
865 assert!(schema.get("properties").is_some());
866 assert!(schema["properties"]["action"].get("enum").is_some());
867 let actions = schema["properties"]["action"]["enum"].as_array().unwrap();
868 assert_eq!(actions.len(), 8);
869 assert!(
870 schema["required"]
871 .as_array()
872 .unwrap()
873 .contains(&json!("action"))
874 );
875 }
876
877 #[tokio::test]
878 async fn test_add_service() {
879 let (tool, ws) = make_tool();
880
881 let result = tool
882 .execute(json!({
883 "action": "add_service",
884 "name": "user-api",
885 "url": "http://localhost:8080",
886 "service_type": "api",
887 "health_endpoint": "/healthz"
888 }))
889 .await
890 .unwrap();
891
892 assert!(result.content.contains("user-api"));
893 assert!(result.content.contains("#1"));
894
895 let state_path = ws.join(".rustant").join("monitoring").join("topology.json");
897 assert!(state_path.exists());
898
899 let state: MonitorState =
900 serde_json::from_str(&std::fs::read_to_string(&state_path).unwrap()).unwrap();
901 assert_eq!(state.services.len(), 1);
902 assert_eq!(state.services[0].name, "user-api");
903 assert_eq!(state.services[0].status, ServiceStatus::Unknown);
904 assert_eq!(
905 state.services[0].health_endpoint,
906 Some("/healthz".to_string())
907 );
908 }
909
910 #[tokio::test]
911 async fn test_topology_output() {
912 let (tool, _ws) = make_tool();
913
914 tool.execute(json!({
916 "action": "add_service",
917 "name": "postgres",
918 "url": "http://db:5432",
919 "service_type": "database"
920 }))
921 .await
922 .unwrap();
923
924 tool.execute(json!({
926 "action": "add_service",
927 "name": "redis",
928 "url": "http://cache:6379",
929 "service_type": "cache"
930 }))
931 .await
932 .unwrap();
933
934 tool.execute(json!({
936 "action": "add_service",
937 "name": "user-api",
938 "url": "http://api:3000",
939 "service_type": "api",
940 "dependencies": [1, 2]
941 }))
942 .await
943 .unwrap();
944
945 let result = tool.execute(json!({"action": "topology"})).await.unwrap();
946
947 assert!(result.content.contains("Service Topology"));
948 assert!(result.content.contains("postgres"));
949 assert!(result.content.contains("redis"));
950 assert!(result.content.contains("user-api"));
951 assert!(result.content.contains("-> [?] postgres #1"));
953 assert!(result.content.contains("-> [?] redis #2"));
954 }
955
956 #[tokio::test]
957 async fn test_health_check_invalid_url() {
958 let (tool, _ws) = make_tool();
959
960 tool.execute(json!({
962 "action": "add_service",
963 "name": "bad-service",
964 "url": "http://127.0.0.1:1",
965 "service_type": "api"
966 }))
967 .await
968 .unwrap();
969
970 let result = tool
971 .execute(json!({
972 "action": "health_check",
973 "service_id": 1
974 }))
975 .await
976 .unwrap();
977
978 assert!(result.content.contains("Health Check Results"));
979 assert!(result.content.contains("bad-service"));
980 assert!(result.content.contains("Down"));
981
982 let state = tool.load_state();
984 assert_eq!(state.services[0].status, ServiceStatus::Down);
985 assert!(state.services[0].last_checked.is_some());
986 }
987
988 #[tokio::test]
989 async fn test_log_incident() {
990 let (tool, _ws) = make_tool();
991
992 tool.execute(json!({
994 "action": "add_service",
995 "name": "api",
996 "url": "http://localhost:8080",
997 "service_type": "api"
998 }))
999 .await
1000 .unwrap();
1001
1002 let result = tool
1003 .execute(json!({
1004 "action": "log_incident",
1005 "title": "API latency spike",
1006 "severity": "high",
1007 "affected_services": [1],
1008 "message": "Response times exceeded 5s"
1009 }))
1010 .await
1011 .unwrap();
1012
1013 assert!(result.content.contains("Incident #1"));
1014 assert!(result.content.contains("API latency spike"));
1015 assert!(result.content.contains("Investigating"));
1016
1017 let state = tool.load_state();
1019 assert_eq!(state.incidents.len(), 1);
1020 assert_eq!(state.incidents[0].title, "API latency spike");
1021 assert_eq!(state.incidents[0].severity, IncidentSeverity::High);
1022 assert_eq!(state.incidents[0].status, IncidentStatus::Investigating);
1023 assert_eq!(state.incidents[0].timeline.len(), 1);
1024 assert_eq!(
1025 state.incidents[0].timeline[0].message,
1026 "Response times exceeded 5s"
1027 );
1028 assert_eq!(state.incidents[0].affected_services, vec![1]);
1029 }
1030
1031 #[tokio::test]
1032 async fn test_impact_analysis() {
1033 let (tool, _ws) = make_tool();
1034
1035 tool.execute(json!({
1037 "action": "add_service",
1038 "name": "database-C",
1039 "url": "http://db:5432",
1040 "service_type": "database"
1041 }))
1042 .await
1043 .unwrap();
1044
1045 tool.execute(json!({
1047 "action": "add_service",
1048 "name": "cache-B",
1049 "url": "http://cache:6379",
1050 "service_type": "cache",
1051 "dependencies": [1]
1052 }))
1053 .await
1054 .unwrap();
1055
1056 tool.execute(json!({
1058 "action": "add_service",
1059 "name": "api-A",
1060 "url": "http://api:3000",
1061 "service_type": "api",
1062 "dependencies": [2]
1063 }))
1064 .await
1065 .unwrap();
1066
1067 let result = tool
1069 .execute(json!({
1070 "action": "impact_analysis",
1071 "service_id": 1
1072 }))
1073 .await
1074 .unwrap();
1075
1076 assert!(result.content.contains("Impact Analysis"));
1077 assert!(result.content.contains("database-C"));
1078 assert!(result.content.contains("cache-B"));
1079 assert!(result.content.contains("api-A"));
1080 assert!(result.content.contains("2 service(s) would be affected"));
1081 }
1082
1083 #[tokio::test]
1084 async fn test_list_services_filter() {
1085 let (tool, _ws) = make_tool();
1086
1087 tool.execute(json!({
1089 "action": "add_service",
1090 "name": "svc-a",
1091 "url": "http://a:80",
1092 "service_type": "api"
1093 }))
1094 .await
1095 .unwrap();
1096
1097 tool.execute(json!({
1098 "action": "add_service",
1099 "name": "svc-b",
1100 "url": "http://b:80",
1101 "service_type": "worker"
1102 }))
1103 .await
1104 .unwrap();
1105
1106 let result = tool
1108 .execute(json!({"action": "list_services"}))
1109 .await
1110 .unwrap();
1111 assert!(result.content.contains("svc-a"));
1112 assert!(result.content.contains("svc-b"));
1113 assert!(result.content.contains("Total: 2"));
1114
1115 let result = tool
1117 .execute(json!({"action": "list_services", "status": "unknown"}))
1118 .await
1119 .unwrap();
1120 assert!(result.content.contains("svc-a"));
1121 assert!(result.content.contains("svc-b"));
1122
1123 let result = tool
1125 .execute(json!({"action": "list_services", "status": "healthy"}))
1126 .await
1127 .unwrap();
1128 assert!(result.content.contains("No services match"));
1129 }
1130
1131 #[tokio::test]
1132 async fn test_correlate_empty() {
1133 let (tool, _ws) = make_tool();
1134
1135 let result = tool.execute(json!({"action": "correlate"})).await.unwrap();
1136
1137 assert!(result.content.contains("Incident Correlation Analysis"));
1138 assert!(result.content.contains("Total incidents: 0"));
1139 assert!(result.content.contains("LLM Analysis Prompt"));
1140 }
1141
1142 #[tokio::test]
1143 async fn test_generate_runbook_returns_prompt() {
1144 let (tool, _ws) = make_tool();
1145
1146 tool.execute(json!({
1147 "action": "add_service",
1148 "name": "payment-api",
1149 "url": "http://payments:8080",
1150 "service_type": "api",
1151 "health_endpoint": "/status"
1152 }))
1153 .await
1154 .unwrap();
1155
1156 let result = tool
1157 .execute(json!({
1158 "action": "generate_runbook",
1159 "service_id": 1
1160 }))
1161 .await
1162 .unwrap();
1163
1164 assert!(result.content.contains("Runbook Generation Prompt"));
1165 assert!(result.content.contains("payment-api"));
1166 assert!(result.content.contains("http://payments:8080"));
1167 assert!(result.content.contains("/status"));
1168 assert!(result.content.contains("Health check procedures"));
1169 assert!(result.content.contains("Recovery procedures"));
1170 }
1171
1172 #[tokio::test]
1173 async fn test_state_roundtrip() {
1174 let (tool, _ws) = make_tool();
1175
1176 tool.execute(json!({
1178 "action": "add_service",
1179 "name": "db",
1180 "url": "http://db:5432",
1181 "service_type": "database"
1182 }))
1183 .await
1184 .unwrap();
1185
1186 tool.execute(json!({
1187 "action": "add_service",
1188 "name": "api",
1189 "url": "http://api:3000",
1190 "service_type": "api",
1191 "dependencies": [1]
1192 }))
1193 .await
1194 .unwrap();
1195
1196 tool.execute(json!({
1197 "action": "log_incident",
1198 "title": "DB down",
1199 "severity": "critical",
1200 "affected_services": [1]
1201 }))
1202 .await
1203 .unwrap();
1204
1205 let state = tool.load_state();
1207 assert_eq!(state.services.len(), 2);
1208 assert_eq!(state.incidents.len(), 1);
1209 assert_eq!(state.next_service_id, 3);
1210 assert_eq!(state.next_incident_id, 2);
1211 assert_eq!(state.services[1].dependencies, vec![1]);
1212 assert_eq!(state.incidents[0].severity, IncidentSeverity::Critical);
1213
1214 let json = serde_json::to_string_pretty(&state).unwrap();
1216 let restored: MonitorState = serde_json::from_str(&json).unwrap();
1217 assert_eq!(restored.services.len(), 2);
1218 assert_eq!(restored.incidents.len(), 1);
1219 assert_eq!(restored.next_service_id, 3);
1220 assert_eq!(restored.next_incident_id, 2);
1221 }
1222
1223 #[tokio::test]
1224 async fn test_unknown_action() {
1225 let (tool, _ws) = make_tool();
1226
1227 let result = tool
1228 .execute(json!({"action": "nonexistent"}))
1229 .await
1230 .unwrap();
1231
1232 assert!(result.content.contains("Unknown action"));
1233 assert!(result.content.contains("nonexistent"));
1234 }
1235}