syncable_cli/agent/tools/
k8s_optimize.rs

1//! K8s Optimize tool - Native Kubernetes resource optimization using Rig's Tool trait
2//!
3//! Analyzes Kubernetes manifests for over-provisioned or under-provisioned
4//! resources and suggests right-sized values.
5//!
6//! Output is optimized for AI agent decision-making with:
7//! - Categorized issues (over-provisioned, under-provisioned, missing resources)
8//! - Priority rankings (critical, high, medium, low)
9//! - Actionable fix recommendations with YAML snippets
10//! - Cost savings estimates (when available)
11//! - Live cluster analysis (optional, via Prometheus)
12//!
13//! # Prometheus Integration
14//!
15//! For data-driven recommendations based on actual usage:
16//! 1. Use `prometheus_discover` to find Prometheus in cluster
17//! 2. Use `prometheus_connect` to establish connection (port-forward or URL)
18//! 3. Use `k8s_optimize` with the prometheus URL from step 2
19
20use super::compression::{CompressionConfig, compress_tool_output};
21use super::error::{ErrorCategory, format_error_for_llm};
22use rig::completion::ToolDefinition;
23use rig::tool::Tool;
24use serde::{Deserialize, Serialize};
25use serde_json::json;
26use std::path::PathBuf;
27
28use crate::analyzer::k8s_optimize::{
29    K8sOptimizeConfig, OptimizationResult, PrometheusAuth, PrometheusClient, Severity, analyze,
30    analyze_content, bytes_to_memory_string, millicores_to_cpu_string, parse_cpu_to_millicores,
31    parse_memory_to_bytes, rule_codes, rule_description,
32};
33
34/// Arguments for the k8s-optimize tool
35#[derive(Debug, Deserialize)]
36pub struct K8sOptimizeArgs {
37    /// Path to K8s manifest file or directory (relative to project root)
38    #[serde(default)]
39    pub path: Option<String>,
40
41    /// Inline YAML content to analyze (alternative to path)
42    #[serde(default)]
43    pub content: Option<String>,
44
45    /// Minimum severity to report: "critical", "high", "medium", "low", "info"
46    #[serde(default)]
47    pub severity: Option<String>,
48
49    /// Minimum waste percentage to report (default: 10)
50    #[serde(default)]
51    pub threshold: Option<u8>,
52
53    /// Include info-level suggestions
54    #[serde(default)]
55    pub include_info: bool,
56
57    /// Include system namespaces (kube-system, etc.)
58    #[serde(default)]
59    pub include_system: bool,
60
61    /// Run FULL comprehensive analysis (optimize + kubelint security + helmlint)
62    #[serde(default)]
63    pub full: bool,
64
65    // ========== Live Analysis Options (Phase 2) ==========
66    /// Connect to a Kubernetes cluster (kubeconfig context name)
67    #[serde(default)]
68    pub cluster: Option<String>,
69
70    /// Prometheus URL for historical metrics (e.g., "http://localhost:9090" from port-forward)
71    /// Use prometheus_discover and prometheus_connect tools to get this URL
72    #[serde(default)]
73    pub prometheus: Option<String>,
74
75    /// Prometheus authentication type: "none", "basic", "bearer" (default: "none")
76    /// Only needed for externally exposed Prometheus, NOT for port-forward connections
77    #[serde(default)]
78    pub prometheus_auth_type: Option<String>,
79
80    /// Username for Prometheus basic auth (only for external Prometheus)
81    #[serde(default)]
82    pub prometheus_username: Option<String>,
83
84    /// Password for Prometheus basic auth (only for external Prometheus)
85    #[serde(default)]
86    pub prometheus_password: Option<String>,
87
88    /// Bearer token for Prometheus auth (only for external Prometheus)
89    #[serde(default)]
90    pub prometheus_token: Option<String>,
91
92    /// Analysis period for live metrics (e.g., "7d", "24h", "1h")
93    #[serde(default)]
94    pub period: Option<String>,
95
96    // ========== Cost Estimation Options (Phase 3) ==========
97    /// Cloud provider for cost estimation: "aws", "gcp", "azure", "onprem"
98    #[serde(default)]
99    pub cloud_provider: Option<String>,
100
101    /// Cloud region for pricing (e.g., "us-east-1", "us-central1")
102    #[serde(default)]
103    pub region: Option<String>,
104}
105
106/// Error type for k8s-optimize tool
107#[derive(Debug, thiserror::Error)]
108#[error("K8s optimize error: {0}")]
109pub struct K8sOptimizeError(String);
110
111/// Result of Prometheus enhancement
112struct PrometheusEnhancement {
113    /// Number of recommendations enhanced with live data
114    enhanced_count: usize,
115    /// Number of workloads with no Prometheus data
116    no_data_count: usize,
117    /// Raw Prometheus data for each workload
118    prometheus_data: Vec<serde_json::Value>,
119}
120
121/// Find Helm charts in a directory.
122fn find_helm_charts(path: &std::path::Path) -> Vec<PathBuf> {
123    let mut charts = Vec::new();
124
125    if path.join("Chart.yaml").exists() {
126        charts.push(path.to_path_buf());
127        return charts;
128    }
129
130    if let Ok(entries) = std::fs::read_dir(path) {
131        for entry in entries.flatten() {
132            let entry_path = entry.path();
133            if entry_path.is_dir() {
134                if entry_path.join("Chart.yaml").exists() {
135                    charts.push(entry_path);
136                } else if let Ok(sub_entries) = std::fs::read_dir(&entry_path) {
137                    for sub_entry in sub_entries.flatten() {
138                        let sub_path = sub_entry.path();
139                        if sub_path.is_dir() && sub_path.join("Chart.yaml").exists() {
140                            charts.push(sub_path);
141                        }
142                    }
143                }
144            }
145        }
146    }
147
148    charts
149}
150
151/// Tool for analyzing Kubernetes resource configurations
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct K8sOptimizeTool {
154    project_root: PathBuf,
155}
156
157impl K8sOptimizeTool {
158    /// Create a new K8sOptimizeTool with the given project root.
159    pub fn new(project_root: PathBuf) -> Self {
160        Self { project_root }
161    }
162
163    /// Build PrometheusAuth from arguments (optional, only for external URLs)
164    fn build_prometheus_auth(args: &K8sOptimizeArgs) -> PrometheusAuth {
165        match args.prometheus_auth_type.as_deref() {
166            Some("basic") => {
167                if let (Some(username), Some(password)) =
168                    (&args.prometheus_username, &args.prometheus_password)
169                {
170                    PrometheusAuth::Basic {
171                        username: username.clone(),
172                        password: password.clone(),
173                    }
174                } else {
175                    PrometheusAuth::None
176                }
177            }
178            Some("bearer") => {
179                if let Some(token) = &args.prometheus_token {
180                    PrometheusAuth::Bearer(token.clone())
181                } else {
182                    PrometheusAuth::None
183                }
184            }
185            _ => PrometheusAuth::None,
186        }
187    }
188
189    /// Enhance recommendations with live Prometheus data.
190    ///
191    /// For each workload in the static analysis, query Prometheus for historical
192    /// CPU/memory usage and replace heuristic recommendations with data-driven ones.
193    async fn enhance_with_prometheus(
194        &self,
195        result: &mut OptimizationResult,
196        client: &PrometheusClient,
197        period: &str,
198    ) -> PrometheusEnhancement {
199        let mut enhanced_count = 0;
200        let mut no_data_count = 0;
201        let mut prometheus_data: Vec<serde_json::Value> = Vec::new();
202
203        for rec in &mut result.recommendations {
204            let namespace = rec.namespace.as_deref().unwrap_or("default");
205            let workload_name = &rec.resource_name;
206            let container = &rec.container;
207
208            // Parse current resource values from String to u64
209            let current_cpu_millicores = rec
210                .current
211                .cpu_request
212                .as_ref()
213                .and_then(|s| parse_cpu_to_millicores(s));
214            let current_memory_bytes = rec
215                .current
216                .memory_request
217                .as_ref()
218                .and_then(|s| parse_memory_to_bytes(s));
219
220            // Query Prometheus for historical data
221            match client
222                .get_container_history(namespace, workload_name, container, period)
223                .await
224            {
225                Ok(history) => {
226                    // Generate data-driven recommendation
227                    let historical_rec = PrometheusClient::generate_recommendation(
228                        &history,
229                        current_cpu_millicores,
230                        current_memory_bytes,
231                        20, // 20% safety margin
232                    );
233
234                    // Convert recommended values back to strings
235                    let cpu_str = millicores_to_cpu_string(historical_rec.recommended_cpu_request);
236                    let mem_str = bytes_to_memory_string(historical_rec.recommended_memory_request);
237                    let cpu_limit_str =
238                        millicores_to_cpu_string(historical_rec.recommended_cpu_request * 2);
239
240                    // Store the prometheus data for output
241                    prometheus_data.push(serde_json::json!({
242                        "workload": format!("{}/{}", namespace, workload_name),
243                        "container": container,
244                        "period": period,
245                        "samples": history.sample_count,
246                        "cpu_usage": {
247                            "min": history.cpu_min,
248                            "p50": history.cpu_p50,
249                            "p95": history.cpu_p95,
250                            "p99": history.cpu_p99,
251                            "max": history.cpu_max,
252                            "avg": history.cpu_avg,
253                        },
254                        "memory_usage": {
255                            "min_bytes": history.memory_min,
256                            "p50_bytes": history.memory_p50,
257                            "p95_bytes": history.memory_p95,
258                            "p99_bytes": history.memory_p99,
259                            "max_bytes": history.memory_max,
260                            "avg_bytes": history.memory_avg,
261                        },
262                        "recommendation": {
263                            "cpu_request": cpu_str,
264                            "memory_request": mem_str,
265                            "cpu_savings_pct": historical_rec.cpu_savings_pct,
266                            "memory_savings_pct": historical_rec.memory_savings_pct,
267                            "confidence": historical_rec.confidence,
268                        }
269                    }));
270
271                    // Update the recommendation with data-driven values (as strings)
272                    rec.recommended.cpu_request = Some(cpu_str.clone());
273                    rec.recommended.memory_request = Some(mem_str.clone());
274
275                    // Update fix_yaml with data-driven values
276                    rec.fix_yaml = format!(
277                        "resources:\n  requests:\n    cpu: \"{}\"\n    memory: \"{}\"\n  limits:\n    cpu: \"{}\"  # 2x request\n    memory: \"{}\"",
278                        cpu_str, mem_str, cpu_limit_str, mem_str,
279                    );
280
281                    // Update message to indicate data-driven
282                    rec.message = format!(
283                        "{} [DATA-DRIVEN: P99 usage CPU={}m, Memory={}Mi over {}, confidence={}%]",
284                        rec.message,
285                        history.cpu_p99,
286                        history.memory_p99 / (1024 * 1024),
287                        period,
288                        historical_rec.confidence
289                    );
290
291                    enhanced_count += 1;
292                }
293                Err(_) => {
294                    // No Prometheus data for this workload, keep heuristic
295                    no_data_count += 1;
296                }
297            }
298        }
299
300        PrometheusEnhancement {
301            enhanced_count,
302            no_data_count,
303            prometheus_data,
304        }
305    }
306
307    /// Build config from arguments.
308    fn build_config(&self, args: &K8sOptimizeArgs) -> K8sOptimizeConfig {
309        let mut config = K8sOptimizeConfig::default();
310
311        if let Some(severity_str) = &args.severity
312            && let Some(severity) = Severity::parse(severity_str)
313        {
314            config = config.with_severity(severity);
315        }
316
317        if let Some(threshold) = args.threshold {
318            config = config.with_threshold(threshold);
319        }
320
321        if args.include_info {
322            config = config.with_info();
323        }
324
325        if args.include_system {
326            config = config.with_system();
327        }
328
329        config
330    }
331
332    /// Format result for AI agent consumption.
333    fn format_for_agent(
334        &self,
335        result: &OptimizationResult,
336        args: &K8sOptimizeArgs,
337    ) -> serde_json::Value {
338        // Create a summary for the agent
339        let mut response = json!({
340            "summary": {
341                "resources_analyzed": result.summary.resources_analyzed,
342                "containers_analyzed": result.summary.containers_analyzed,
343                "over_provisioned": result.summary.over_provisioned,
344                "under_provisioned": result.summary.under_provisioned,
345                "missing_requests": result.summary.missing_requests,
346                "missing_limits": result.summary.missing_limits,
347                "optimal": result.summary.optimal,
348                "total_waste_percentage": result.summary.total_waste_percentage,
349                "mode": result.metadata.mode.to_string(),
350            },
351            "recommendations": result.recommendations.iter().map(|r| {
352                json!({
353                    "resource": format!("{}/{}", r.resource_kind, r.resource_name),
354                    "container": r.container,
355                    "namespace": r.namespace,
356                    "file": r.file_path.display().to_string(),
357                    "line": r.line,
358                    "issue": r.issue.to_string(),
359                    "severity": r.severity.as_str(),
360                    "message": r.message,
361                    "workload_type": r.workload_type.as_str(),
362                    "rule_code": r.rule_code.as_str(),
363                    "rule_description": rule_description(r.rule_code.as_str()),
364                    "current": {
365                        "cpu_request": r.current.cpu_request,
366                        "cpu_limit": r.current.cpu_limit,
367                        "memory_request": r.current.memory_request,
368                        "memory_limit": r.current.memory_limit,
369                    },
370                    "recommended": {
371                        "cpu_request": r.recommended.cpu_request,
372                        "cpu_limit": r.recommended.cpu_limit,
373                        "memory_request": r.recommended.memory_request,
374                        "memory_limit": r.recommended.memory_limit,
375                    },
376                    "fix_yaml": r.fix_yaml,
377                    // Quick fix for agent to apply
378                    "quick_fix": {
379                        "action": "replace_resources",
380                        "file": r.file_path.display().to_string(),
381                        "container": r.container.clone(),
382                        "yaml": r.fix_yaml.clone(),
383                    }
384                })
385            }).collect::<Vec<_>>(),
386            "analysis_metadata": {
387                "duration_ms": result.metadata.duration_ms,
388                "path": result.metadata.path.display().to_string(),
389                "version": result.metadata.version.clone(),
390                "timestamp": result.metadata.timestamp.clone(),
391            }
392        });
393
394        // Add warnings if any
395        if !result.warnings.is_empty() {
396            response["warnings"] = json!(
397                result
398                    .warnings
399                    .iter()
400                    .map(|w| {
401                        json!({
402                            "resource": w.resource,
403                            "issue": w.issue.to_string(),
404                            "severity": w.severity.as_str(),
405                            "message": w.message,
406                        })
407                    })
408                    .collect::<Vec<_>>()
409            );
410        }
411
412        // Add savings estimate if available
413        if let Some(savings) = result.summary.estimated_monthly_savings_usd {
414            response["estimated_savings"] = json!({
415                "monthly_usd": savings,
416                "annual_usd": savings * 12.0,
417            });
418        }
419
420        // Add rule reference for agent
421        response["rule_codes"] = json!({
422            rule_codes::NO_CPU_REQUEST: rule_description(rule_codes::NO_CPU_REQUEST),
423            rule_codes::NO_MEMORY_REQUEST: rule_description(rule_codes::NO_MEMORY_REQUEST),
424            rule_codes::NO_CPU_LIMIT: rule_description(rule_codes::NO_CPU_LIMIT),
425            rule_codes::NO_MEMORY_LIMIT: rule_description(rule_codes::NO_MEMORY_LIMIT),
426            rule_codes::HIGH_CPU_REQUEST: rule_description(rule_codes::HIGH_CPU_REQUEST),
427            rule_codes::HIGH_MEMORY_REQUEST: rule_description(rule_codes::HIGH_MEMORY_REQUEST),
428            rule_codes::EXCESSIVE_CPU_RATIO: rule_description(rule_codes::EXCESSIVE_CPU_RATIO),
429            rule_codes::EXCESSIVE_MEMORY_RATIO: rule_description(rule_codes::EXCESSIVE_MEMORY_RATIO),
430            rule_codes::REQUESTS_EQUAL_LIMITS: rule_description(rule_codes::REQUESTS_EQUAL_LIMITS),
431            rule_codes::UNBALANCED_RESOURCES: rule_description(rule_codes::UNBALANCED_RESOURCES),
432        });
433
434        // Add live analysis info if cluster or prometheus was specified
435        if args.cluster.is_some() || args.prometheus.is_some() {
436            response["live_analysis"] = json!({
437                "enabled": args.prometheus.is_some(),
438                "cluster": args.cluster.clone(),
439                "prometheus": args.prometheus.clone(),
440                "prometheus_auth": if args.prometheus_auth_type.is_some() {
441                    args.prometheus_auth_type.clone()
442                } else {
443                    Some("none".to_string())
444                },
445                "period": args.period.clone().unwrap_or_else(|| "7d".to_string()),
446                "note": if args.prometheus.is_some() {
447                    "Historical metrics analysis using Prometheus data."
448                } else {
449                    "Live analysis requires Prometheus. Use prometheus_discover and prometheus_connect to set up."
450                },
451            });
452        }
453
454        // Add cost estimation info if provider was specified
455        if args.cloud_provider.is_some() {
456            response["cost_estimation"] = json!({
457                "enabled": true,
458                "provider": args.cloud_provider.clone(),
459                "region": args.region.clone().unwrap_or_else(|| "us-east-1".to_string()),
460                "note": "Cost estimation uses approximate on-demand pricing. Actual costs may vary.",
461            });
462        }
463
464        // Add actionable summary for agent
465        let action_items: Vec<String> = result
466            .recommendations
467            .iter()
468            .filter(|r| r.severity >= Severity::Medium)
469            .map(|r| {
470                format!(
471                    "[{}] {} in {}/{}",
472                    r.rule_code.as_str(),
473                    r.message,
474                    r.resource_kind,
475                    r.resource_name
476                )
477            })
478            .collect();
479
480        if !action_items.is_empty() {
481            response["action_items"] = json!(action_items);
482        }
483
484        response
485    }
486}
487
488impl Tool for K8sOptimizeTool {
489    const NAME: &'static str = "k8s_optimize";
490
491    type Args = K8sOptimizeArgs;
492    type Output = String;
493    type Error = K8sOptimizeError;
494
495    async fn definition(&self, _prompt: String) -> ToolDefinition {
496        ToolDefinition {
497            name: Self::NAME.to_string(),
498            description: r#"Analyze Kubernetes manifests for resource optimization.
499
500**IMPORTANT: Only use when user EXPLICITLY asks about:**
501- "optimize my K8s resources" / "right-size my pods"
502- "full analysis" / "comprehensive check" (use full=true)
503- Over-provisioned or under-provisioned resources
504- Cost optimization for Kubernetes
505
506**DO NOT use for:**
507- General K8s linting without optimization focus (use kubelint)
508- Tasks where user didn't ask about optimization
509
510## For Live Cluster Analysis with Historical Metrics
511
512**RECOMMENDED FLOW when user wants data-driven optimization:**
5131. First use `prometheus_discover` to find Prometheus in cluster
5142. Use `prometheus_connect` to establish connection (starts port-forward)
5153. Call `k8s_optimize` with the prometheus URL from step 2
516
517Port-forward is preferred (no auth needed). Auth is only needed for external Prometheus URLs.
518
519## Modes
520- **Standard**: Resource optimization analysis only
521- **Full** (full=true): Comprehensive analysis including:
522  - Resource optimization (CPU/memory waste)
523  - Security checks (kubelint - privileged, RBAC, etc.)
524  - Helm validation (if charts present)
525- **Live**: With prometheus URL for historical metrics (data-driven recommendations)
526
527## Returns (analysis only - does NOT apply changes)
528- Summary with issue counts and waste percentage
529- Recommendations with suggested values (based on actual usage if Prometheus provided)
530- Security findings (if full=true)
531- Does NOT automatically modify files"#
532                .to_string(),
533            parameters: json!({
534                "type": "object",
535                "properties": {
536                    "path": {
537                        "type": "string",
538                        "description": "Path to K8s manifest file or directory (relative to project root). Examples: 'k8s/', 'deployments/api.yaml', 'charts/myapp/', 'terraform/'"
539                    },
540                    "content": {
541                        "type": "string",
542                        "description": "Inline YAML content to analyze (alternative to path)"
543                    },
544                    "severity": {
545                        "type": "string",
546                        "description": "Minimum severity to report: 'critical', 'high', 'medium', 'low', 'info'. Default: 'medium'",
547                        "enum": ["critical", "high", "medium", "low", "info"]
548                    },
549                    "threshold": {
550                        "type": "integer",
551                        "description": "Minimum waste percentage to report (default: 10)"
552                    },
553                    "include_info": {
554                        "type": "boolean",
555                        "description": "Include info-level suggestions (default: false)"
556                    },
557                    "include_system": {
558                        "type": "boolean",
559                        "description": "Include system namespaces like kube-system (default: false)"
560                    },
561                    "full": {
562                        "type": "boolean",
563                        "description": "Run FULL comprehensive analysis: optimize + kubelint security + helmlint. Use when user asks for 'full analysis' or 'check everything'."
564                    },
565                    "cluster": {
566                        "type": "string",
567                        "description": "Connect to a Kubernetes cluster for live analysis (kubeconfig context name). Requires cluster connectivity."
568                    },
569                    "prometheus": {
570                        "type": "string",
571                        "description": "Prometheus URL for historical metrics (from prometheus_connect tool, e.g., 'http://localhost:52431')"
572                    },
573                    "prometheus_auth_type": {
574                        "type": "string",
575                        "description": "Prometheus auth type (only for external URL, NOT for port-forward): 'none', 'basic', 'bearer'",
576                        "enum": ["none", "basic", "bearer"]
577                    },
578                    "prometheus_username": {
579                        "type": "string",
580                        "description": "Username for Prometheus basic auth (only for external URL)"
581                    },
582                    "prometheus_password": {
583                        "type": "string",
584                        "description": "Password for Prometheus basic auth (only for external URL)"
585                    },
586                    "prometheus_token": {
587                        "type": "string",
588                        "description": "Bearer token for Prometheus auth (only for external URL)"
589                    },
590                    "period": {
591                        "type": "string",
592                        "description": "Analysis period for live metrics (e.g., '7d', '24h', '1h'). Default: '7d'"
593                    },
594                    "cloud_provider": {
595                        "type": "string",
596                        "description": "Cloud provider for cost estimation: 'aws', 'gcp', 'azure', 'onprem'",
597                        "enum": ["aws", "gcp", "azure", "onprem"]
598                    },
599                    "region": {
600                        "type": "string",
601                        "description": "Cloud region for pricing (e.g., 'us-east-1', 'us-central1')"
602                    }
603                }
604            }),
605        }
606    }
607
608    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
609        let config = self.build_config(&args);
610
611        // IMPORTANT: Treat empty content as None - fixes AI agents passing empty strings
612        let mut result = if args.content.as_ref().is_some_and(|c| !c.trim().is_empty()) {
613            // Analyze non-empty inline content
614            analyze_content(args.content.as_ref().unwrap(), &config)
615        } else {
616            // Analyze path
617            let path = args.path.as_deref().unwrap_or(".");
618            let full_path = if std::path::Path::new(path).is_absolute() {
619                PathBuf::from(path)
620            } else {
621                self.project_root.join(path)
622            };
623
624            if !full_path.exists() {
625                return Ok(format_error_for_llm(
626                    "k8s_optimize",
627                    ErrorCategory::FileNotFound,
628                    &format!("Path not found: {}", full_path.display()),
629                    Some(vec![
630                        "Check if the path is correct",
631                        "Common locations: k8s/, manifests/, deploy/, charts/",
632                        "Use content parameter for inline YAML analysis",
633                        "Use list_directory tool to explore the project structure",
634                    ]),
635                ));
636            }
637
638            analyze(&full_path, &config)
639        };
640
641        // Handle empty directory (no K8s manifests found)
642        if result.summary.resources_analyzed == 0 && result.summary.containers_analyzed == 0 {
643            return Ok(format_error_for_llm(
644                "k8s_optimize",
645                ErrorCategory::ValidationFailed,
646                "No Kubernetes resources found to analyze",
647                Some(vec![
648                    "Ensure the path contains valid K8s YAML manifests",
649                    "Check for Deployment, StatefulSet, DaemonSet, Job, or CronJob resources",
650                    "Common K8s manifest locations: k8s/, manifests/, deploy/, charts/",
651                    "Use content parameter to analyze inline YAML",
652                ]),
653            ));
654        }
655
656        // If prometheus URL provided, enhance recommendations with live data
657        let (prometheus_enhancement, prometheus_error) = if let Some(prometheus_url) =
658            &args.prometheus
659        {
660            let auth = Self::build_prometheus_auth(&args);
661            match PrometheusClient::with_auth(prometheus_url, auth) {
662                Ok(client) => {
663                    if client.is_available().await {
664                        let period = args.period.as_deref().unwrap_or("7d");
665                        (
666                            Some(
667                                self.enhance_with_prometheus(&mut result, &client, period)
668                                    .await,
669                            ),
670                            None,
671                        )
672                    } else {
673                        // Prometheus URL provided but not reachable
674                        (
675                            None,
676                            Some(format!(
677                                "Prometheus at {} is not reachable. Continuing with static analysis.",
678                                prometheus_url
679                            )),
680                        )
681                    }
682                }
683                Err(e) => (
684                    None,
685                    Some(format!(
686                        "Failed to connect to Prometheus at {}: {}. Continuing with static analysis.",
687                        prometheus_url, e
688                    )),
689                ),
690            }
691        } else {
692            (None, None)
693        };
694
695        // If full mode, also run kubelint and helmlint
696        let mut output = self.format_for_agent(&result, &args);
697
698        if args.full {
699            let path = args.path.as_deref().unwrap_or(".");
700            let full_path = if std::path::Path::new(path).is_absolute() {
701                PathBuf::from(path)
702            } else {
703                self.project_root.join(path)
704            };
705
706            // Run kubelint for security
707            let kubelint_config =
708                crate::analyzer::kubelint::KubelintConfig::default().with_all_builtin();
709            let kubelint_result = crate::analyzer::kubelint::lint(&full_path, &kubelint_config);
710
711            output["security_analysis"] = json!({
712                "objects_analyzed": kubelint_result.summary.objects_analyzed,
713                "checks_run": kubelint_result.summary.checks_run,
714                "issues_found": kubelint_result.failures.len(),
715                "findings": kubelint_result.failures.iter().take(20).map(|f| {
716                    json!({
717                        "code": f.code.to_string(),
718                        "severity": format!("{:?}", f.severity).to_lowercase(),
719                        "object": format!("{}/{}", f.object_kind, f.object_name),
720                        "message": f.message,
721                        "remediation": f.remediation,
722                    })
723                }).collect::<Vec<_>>(),
724            });
725
726            // Run helmlint on Helm charts if any
727            let helm_charts = find_helm_charts(&full_path);
728            if !helm_charts.is_empty() {
729                let helmlint_config = crate::analyzer::helmlint::HelmlintConfig::default();
730                let mut chart_results: Vec<serde_json::Value> = Vec::new();
731
732                for chart_path in &helm_charts {
733                    let chart_name = chart_path
734                        .file_name()
735                        .map(|n| n.to_string_lossy().to_string())
736                        .unwrap_or_else(|| "unknown".to_string());
737                    let helmlint_result =
738                        crate::analyzer::helmlint::lint_chart(chart_path, &helmlint_config);
739
740                    chart_results.push(json!({
741                        "chart": chart_name,
742                        "issues": helmlint_result.failures.iter().map(|f| {
743                            json!({
744                                "code": f.code.to_string(),
745                                "severity": format!("{:?}", f.severity).to_lowercase(),
746                                "message": f.message,
747                            })
748                        }).collect::<Vec<_>>(),
749                    }));
750                }
751
752                output["helm_validation"] = json!({
753                    "charts_analyzed": helm_charts.len(),
754                    "results": chart_results,
755                });
756            }
757
758            output["analysis_mode"] = json!("full");
759        }
760
761        // Add Prometheus enhancement data if available
762        if let Some(enhancement) = prometheus_enhancement {
763            output["prometheus_analysis"] = json!({
764                "enabled": true,
765                "url": args.prometheus,
766                "period": args.period.clone().unwrap_or_else(|| "7d".to_string()),
767                "workloads_enhanced": enhancement.enhanced_count,
768                "workloads_no_data": enhancement.no_data_count,
769                "mode": if enhancement.enhanced_count > 0 { "data-driven" } else { "static" },
770                "historical_data": enhancement.prometheus_data,
771                "note": if enhancement.enhanced_count > 0 {
772                    format!(
773                        "Recommendations for {} workloads are based on actual P99 usage from Prometheus. {} workloads had no historical data.",
774                        enhancement.enhanced_count,
775                        enhancement.no_data_count
776                    )
777                } else {
778                    "No historical data found in Prometheus for the analyzed workloads. Recommendations are heuristic-based.".to_string()
779                }
780            });
781
782            // Update summary mode
783            if enhancement.enhanced_count > 0 {
784                output["summary"]["mode"] = json!("prometheus");
785            }
786        } else if let Some(prom_error) = prometheus_error {
787            // Add prometheus connection error info (graceful degradation)
788            output["prometheus_analysis"] = json!({
789                "enabled": false,
790                "url": args.prometheus,
791                "error": prom_error,
792                "mode": "static",
793                "suggestions": [
794                    "Verify Prometheus is running and accessible",
795                    "For cluster Prometheus, use prometheus_connect tool first to set up port-forward",
796                    "Check firewall rules if using external Prometheus URL",
797                    "Analysis continues with static/heuristic recommendations"
798                ]
799            });
800        }
801
802        // Use smart compression with RAG retrieval pattern
803        // This preserves all data while keeping context size manageable
804        let config = CompressionConfig::default();
805        Ok(compress_tool_output(&output, "k8s_optimize", &config))
806    }
807}
808
809#[cfg(test)]
810mod tests {
811    use super::*;
812
813    #[test]
814    fn test_tool_name() {
815        assert_eq!(K8sOptimizeTool::NAME, "k8s_optimize");
816    }
817
818    #[tokio::test]
819    async fn test_analyze_content() {
820        let tool = K8sOptimizeTool::new(PathBuf::from("."));
821
822        let yaml = r#"
823apiVersion: apps/v1
824kind: Deployment
825metadata:
826  name: test-app
827spec:
828  replicas: 1
829  selector:
830    matchLabels:
831      app: test
832  template:
833    spec:
834      containers:
835      - name: app
836        image: myapp:v1
837"#;
838
839        let args = K8sOptimizeArgs {
840            path: None,
841            content: Some(yaml.to_string()),
842            severity: None,
843            threshold: None,
844            include_info: false,
845            include_system: true,
846            full: false,
847            cluster: None,
848            prometheus: None,
849            prometheus_auth_type: None,
850            prometheus_username: None,
851            prometheus_password: None,
852            prometheus_token: None,
853            period: None,
854            cloud_provider: None,
855            region: None,
856        };
857
858        let result = tool.call(args).await.unwrap();
859        assert!(result.contains("summary"));
860        assert!(result.contains("recommendations"));
861        assert!(result.contains("rule_codes"));
862    }
863
864    #[tokio::test]
865    async fn test_build_config() {
866        let tool = K8sOptimizeTool::new(PathBuf::from("."));
867
868        let args = K8sOptimizeArgs {
869            path: None,
870            content: None,
871            severity: Some("high".to_string()),
872            threshold: Some(20),
873            include_info: true,
874            include_system: true,
875            full: false,
876            cluster: None,
877            prometheus: None,
878            prometheus_auth_type: None,
879            prometheus_username: None,
880            prometheus_password: None,
881            prometheus_token: None,
882            period: None,
883            cloud_provider: None,
884            region: None,
885        };
886
887        let config = tool.build_config(&args);
888        assert_eq!(config.waste_threshold_percent, 20);
889        assert!(config.include_info);
890        assert!(config.include_system);
891    }
892
893    #[tokio::test]
894    async fn test_output_format() {
895        let tool = K8sOptimizeTool::new(PathBuf::from("."));
896
897        let yaml = r#"
898apiVersion: apps/v1
899kind: Deployment
900metadata:
901  name: over-provisioned
902spec:
903  replicas: 1
904  selector:
905    matchLabels:
906      app: test
907  template:
908    spec:
909      containers:
910      - name: nginx
911        image: nginx:1.21
912        resources:
913          requests:
914            cpu: 4000m
915            memory: 8Gi
916          limits:
917            cpu: 8000m
918            memory: 16Gi
919"#;
920
921        let args = K8sOptimizeArgs {
922            path: None,
923            content: Some(yaml.to_string()),
924            severity: None,
925            threshold: None,
926            include_info: false,
927            include_system: true,
928            full: false,
929            cluster: None,
930            prometheus: None,
931            prometheus_auth_type: None,
932            prometheus_username: None,
933            prometheus_password: None,
934            prometheus_token: None,
935            period: None,
936            cloud_provider: Some("aws".to_string()),
937            region: Some("us-east-1".to_string()),
938        };
939
940        let result = tool.call(args).await.unwrap();
941
942        // Parse and verify structure
943        let json: serde_json::Value = serde_json::from_str(&result).unwrap();
944
945        assert!(json.get("summary").is_some());
946        assert!(json.get("recommendations").is_some());
947        assert!(json.get("rule_codes").is_some());
948        assert!(json.get("cost_estimation").is_some());
949    }
950
951    #[test]
952    fn test_build_prometheus_auth_none() {
953        let args = K8sOptimizeArgs {
954            path: None,
955            content: None,
956            severity: None,
957            threshold: None,
958            include_info: false,
959            include_system: false,
960            full: false,
961            cluster: None,
962            prometheus: Some("http://localhost:9090".to_string()),
963            prometheus_auth_type: None,
964            prometheus_username: None,
965            prometheus_password: None,
966            prometheus_token: None,
967            period: None,
968            cloud_provider: None,
969            region: None,
970        };
971
972        let auth = K8sOptimizeTool::build_prometheus_auth(&args);
973        assert!(matches!(auth, PrometheusAuth::None));
974    }
975
976    #[test]
977    fn test_build_prometheus_auth_basic() {
978        let args = K8sOptimizeArgs {
979            path: None,
980            content: None,
981            severity: None,
982            threshold: None,
983            include_info: false,
984            include_system: false,
985            full: false,
986            cluster: None,
987            prometheus: Some("https://prometheus.example.com".to_string()),
988            prometheus_auth_type: Some("basic".to_string()),
989            prometheus_username: Some("admin".to_string()),
990            prometheus_password: Some("secret".to_string()),
991            prometheus_token: None,
992            period: None,
993            cloud_provider: None,
994            region: None,
995        };
996
997        let auth = K8sOptimizeTool::build_prometheus_auth(&args);
998        match auth {
999            PrometheusAuth::Basic { username, password } => {
1000                assert_eq!(username, "admin");
1001                assert_eq!(password, "secret");
1002            }
1003            _ => panic!("Expected Basic auth"),
1004        }
1005    }
1006
1007    #[tokio::test]
1008    async fn test_path_not_found_error() {
1009        let tool = K8sOptimizeTool::new(PathBuf::from("/tmp/test-k8s-optimize-nonexistent"));
1010
1011        let args = K8sOptimizeArgs {
1012            path: Some("nonexistent/path/to/k8s/manifests".to_string()),
1013            content: None,
1014            severity: None,
1015            threshold: None,
1016            include_info: false,
1017            include_system: false,
1018            full: false,
1019            cluster: None,
1020            prometheus: None,
1021            prometheus_auth_type: None,
1022            prometheus_username: None,
1023            prometheus_password: None,
1024            prometheus_token: None,
1025            period: None,
1026            cloud_provider: None,
1027            region: None,
1028        };
1029
1030        let result = tool.call(args).await.unwrap();
1031
1032        // Should return a structured error, not panic
1033        assert!(result.contains("FILE_NOT_FOUND"));
1034        assert!(result.contains("suggestions"));
1035        assert!(result.contains("error"));
1036
1037        // Parse as JSON to verify structure
1038        let json: serde_json::Value = serde_json::from_str(&result).unwrap();
1039        assert_eq!(json["error"], true);
1040        assert_eq!(json["code"], "FILE_NOT_FOUND");
1041        assert!(json["suggestions"].is_array());
1042    }
1043
1044    #[tokio::test]
1045    async fn test_empty_content_handled() {
1046        let tool = K8sOptimizeTool::new(PathBuf::from("."));
1047
1048        let args = K8sOptimizeArgs {
1049            path: None,
1050            content: Some("".to_string()),
1051            severity: None,
1052            threshold: None,
1053            include_info: false,
1054            include_system: false,
1055            full: false,
1056            cluster: None,
1057            prometheus: None,
1058            prometheus_auth_type: None,
1059            prometheus_username: None,
1060            prometheus_password: None,
1061            prometheus_token: None,
1062            period: None,
1063            cloud_provider: None,
1064            region: None,
1065        };
1066
1067        let result = tool.call(args).await.unwrap();
1068
1069        // Should handle gracefully with a structured response
1070        // Empty content should fall back to path analysis of "."
1071        // which will likely have no K8s manifests, returning VALIDATION_FAILED
1072        let json: serde_json::Value = serde_json::from_str(&result).unwrap();
1073
1074        // Either we get an error response (no K8s manifests) or a valid analysis
1075        if json.get("error").is_some() && json["error"] == true {
1076            // Error case - no K8s manifests found in current directory
1077            assert!(result.contains("VALIDATION_FAILED") || result.contains("FILE_NOT_FOUND"));
1078            assert!(json["suggestions"].is_array());
1079        } else {
1080            // Success case - valid analysis response
1081            assert!(json.get("summary").is_some());
1082        }
1083    }
1084
1085    #[tokio::test]
1086    async fn test_no_k8s_manifests_in_directory() {
1087        // Create a temp directory with no K8s manifests
1088        let temp_dir = std::env::temp_dir().join("test-k8s-optimize-empty");
1089        let _ = std::fs::create_dir_all(&temp_dir);
1090
1091        let tool = K8sOptimizeTool::new(temp_dir.clone());
1092
1093        let args = K8sOptimizeArgs {
1094            path: Some(".".to_string()),
1095            content: None,
1096            severity: None,
1097            threshold: None,
1098            include_info: false,
1099            include_system: false,
1100            full: false,
1101            cluster: None,
1102            prometheus: None,
1103            prometheus_auth_type: None,
1104            prometheus_username: None,
1105            prometheus_password: None,
1106            prometheus_token: None,
1107            period: None,
1108            cloud_provider: None,
1109            region: None,
1110        };
1111
1112        let result = tool.call(args).await.unwrap();
1113
1114        // Should return validation error for empty directory
1115        let json: serde_json::Value = serde_json::from_str(&result).unwrap();
1116        assert_eq!(json["error"], true);
1117        assert_eq!(json["code"], "VALIDATION_FAILED");
1118        assert!(result.contains("No Kubernetes resources found"));
1119        assert!(json["suggestions"].is_array());
1120
1121        // Cleanup
1122        let _ = std::fs::remove_dir_all(&temp_dir);
1123    }
1124}