syncable_cli/agent/tools/
k8s_costs.rs

1//! K8s Costs tool - Cost attribution and analysis for Kubernetes workloads
2//!
3//! Provides cost estimation, attribution by namespace/label, and trend analysis
4//! to help with cloud cost optimization decisions.
5//!
6//! Output is optimized for AI agent decision-making with:
7//! - Cost breakdowns by namespace, workload, and resource type
8//! - Historical trends and anomaly detection
9//! - Actionable cost reduction recommendations
10
11use rig::completion::ToolDefinition;
12use rig::tool::Tool;
13use serde::{Deserialize, Serialize};
14use serde_json::json;
15use std::path::PathBuf;
16
17use super::error::{ErrorCategory, format_error_for_llm};
18use crate::analyzer::k8s_optimize::{
19    CloudProvider, CostEstimation, K8sOptimizeConfig, analyze, calculate_from_static,
20};
21
22/// Arguments for the k8s-costs tool
23#[derive(Debug, Deserialize)]
24pub struct K8sCostsArgs {
25    /// Path to K8s manifest file or directory (relative to project root)
26    #[serde(default)]
27    pub path: Option<String>,
28
29    /// Filter by namespace
30    #[serde(default)]
31    pub namespace: Option<String>,
32
33    /// Group costs by label (e.g., "app", "team", "environment")
34    #[serde(default)]
35    pub by_label: Option<String>,
36
37    /// Cloud provider for pricing: "aws", "gcp", "azure", "onprem"
38    #[serde(default)]
39    pub cloud_provider: Option<String>,
40
41    /// Cloud region for pricing (e.g., "us-east-1", "us-central1")
42    #[serde(default)]
43    pub region: Option<String>,
44
45    /// Show detailed breakdown per workload
46    #[serde(default)]
47    pub detailed: bool,
48
49    /// Compare with another period (e.g., "7d", "30d") - for trend analysis
50    #[serde(default)]
51    pub compare_period: Option<String>,
52
53    // ========== Live Cluster Options ==========
54    /// Connect to a Kubernetes cluster (kubeconfig context name)
55    #[serde(default)]
56    pub cluster: Option<String>,
57
58    /// Prometheus URL for historical cost data
59    #[serde(default)]
60    pub prometheus: Option<String>,
61}
62
63/// Error type for k8s-costs tool
64#[derive(Debug, thiserror::Error)]
65#[error("K8s costs error: {0}")]
66pub struct K8sCostsError(String);
67
68/// Tool for analyzing Kubernetes workload costs
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct K8sCostsTool {
71    project_root: PathBuf,
72}
73
74impl K8sCostsTool {
75    /// Create a new K8sCostsTool with the given project root.
76    pub fn new(project_root: PathBuf) -> Self {
77        Self { project_root }
78    }
79
80    /// Parse cloud provider from string.
81    fn parse_provider(&self, provider: &str) -> CloudProvider {
82        match provider.to_lowercase().as_str() {
83            "aws" => CloudProvider::Aws,
84            "gcp" => CloudProvider::Gcp,
85            "azure" => CloudProvider::Azure,
86            "onprem" | "on-prem" | "on_prem" => CloudProvider::OnPrem,
87            _ => CloudProvider::Aws, // Default to AWS
88        }
89    }
90
91    /// Format cost estimation for agent consumption.
92    fn format_for_agent(
93        &self,
94        estimation: &CostEstimation,
95        args: &K8sCostsArgs,
96    ) -> serde_json::Value {
97        let mut response = json!({
98            "summary": {
99                "monthly_waste_cost_usd": estimation.monthly_waste_cost,
100                "annual_waste_cost_usd": estimation.annual_waste_cost,
101                "monthly_savings_usd": estimation.monthly_savings,
102                "annual_savings_usd": estimation.annual_savings,
103                "workload_count": estimation.workload_costs.len(),
104                "cloud_provider": format!("{:?}", estimation.provider),
105                "region": estimation.region.clone(),
106                "currency": estimation.currency.clone(),
107            },
108            "breakdown": {
109                "cpu_waste_cost_usd": estimation.breakdown.cpu_cost,
110                "memory_waste_cost_usd": estimation.breakdown.memory_cost,
111            },
112            "workloads": estimation.workload_costs.iter().map(|w| {
113                json!({
114                    "name": w.workload_name,
115                    "namespace": w.namespace,
116                    "monthly_waste_cost_usd": w.monthly_cost,
117                    "potential_savings_usd": w.monthly_savings,
118                })
119            }).collect::<Vec<_>>(),
120        });
121
122        // Add namespace grouping if requested
123        if args.namespace.is_some() || args.by_label.is_some() {
124            let mut namespace_costs: std::collections::HashMap<String, f64> =
125                std::collections::HashMap::new();
126            for workload in &estimation.workload_costs {
127                *namespace_costs
128                    .entry(workload.namespace.clone())
129                    .or_insert(0.0) += workload.monthly_cost;
130            }
131            response["by_namespace"] = json!(namespace_costs);
132        }
133
134        // Add recommendations for cost reduction
135        let mut recommendations: Vec<serde_json::Value> = Vec::new();
136
137        // Find top cost workloads
138        let mut sorted_workloads = estimation.workload_costs.clone();
139        sorted_workloads.sort_by(|a, b| {
140            b.monthly_cost
141                .partial_cmp(&a.monthly_cost)
142                .unwrap_or(std::cmp::Ordering::Equal)
143        });
144
145        let total_waste = estimation.monthly_waste_cost;
146        if let Some(top) = sorted_workloads.first()
147            && total_waste > 0.0
148            && top.monthly_cost > total_waste * 0.3
149        {
150            recommendations.push(json!({
151                "type": "high_waste_workload",
152                "workload": top.workload_name,
153                "namespace": top.namespace,
154                "waste_cost_usd": top.monthly_cost,
155                "percentage": (top.monthly_cost / total_waste * 100.0).round(),
156                "message": format!("{} accounts for over 30% of total waste. Consider optimization.", top.workload_name),
157            }));
158        }
159
160        // Check for cost imbalance (CPU vs Memory)
161        if estimation.breakdown.cpu_cost > estimation.breakdown.memory_cost * 3.0 {
162            recommendations.push(json!({
163                "type": "cpu_heavy",
164                "message": "CPU waste is significantly higher than memory waste. Consider if workloads are CPU over-provisioned.",
165                "cpu_waste_cost_usd": estimation.breakdown.cpu_cost,
166                "memory_waste_cost_usd": estimation.breakdown.memory_cost,
167            }));
168        }
169
170        if !recommendations.is_empty() {
171            response["recommendations"] = json!(recommendations);
172        }
173
174        // Add analysis metadata
175        response["analysis"] = json!({
176            "mode": if args.cluster.is_some() { "live" } else { "static" },
177            "path": args.path.clone().unwrap_or_else(|| ".".to_string()),
178            "pricing_note": "Estimates based on on-demand pricing. Actual costs may vary with reserved instances, spot pricing, or enterprise discounts.",
179        });
180
181        response
182    }
183}
184
185impl Tool for K8sCostsTool {
186    const NAME: &'static str = "k8s_costs";
187
188    type Args = K8sCostsArgs;
189    type Output = String;
190    type Error = K8sCostsError;
191
192    async fn definition(&self, _prompt: String) -> ToolDefinition {
193        ToolDefinition {
194            name: Self::NAME.to_string(),
195            description: r#"Analyze Kubernetes workload costs and waste.
196
197**IMPORTANT: Only use this tool when the user EXPLICITLY asks about:**
198- Cloud costs for Kubernetes
199- Cost attribution or cost breakdown
200- How much resources cost or waste
201- Budget/spending analysis for K8s
202- Which workloads cost the most
203
204**DO NOT use this tool for:**
205- General Kubernetes linting (use kubelint)
206- Resource optimization analysis (use k8s_optimize)
207- Any task where user didn't ask about costs/spending/budget
208
209## What It Does
210Estimates monthly cloud costs based on resource requests, shows cost breakdown by namespace/workload, and identifies wasted spend.
211
212## Supported Providers
213- aws, gcp, azure, onprem
214
215## Returns (analysis only - does NOT apply changes)
216- Monthly/annual waste cost estimates
217- Cost breakdown by CPU/memory
218- Per-workload cost attribution
219- Does NOT automatically modify anything"#.to_string(),
220            parameters: json!({
221                "type": "object",
222                "properties": {
223                    "path": {
224                        "type": "string",
225                        "description": "Path to K8s manifest file or directory (relative to project root). Examples: 'k8s/', 'deployments/'"
226                    },
227                    "namespace": {
228                        "type": "string",
229                        "description": "Filter costs by namespace"
230                    },
231                    "by_label": {
232                        "type": "string",
233                        "description": "Group costs by label key (e.g., 'app', 'team', 'environment')"
234                    },
235                    "cloud_provider": {
236                        "type": "string",
237                        "description": "Cloud provider for pricing: 'aws', 'gcp', 'azure', 'onprem'. Default: 'aws'",
238                        "enum": ["aws", "gcp", "azure", "onprem"]
239                    },
240                    "region": {
241                        "type": "string",
242                        "description": "Cloud region for pricing (e.g., 'us-east-1', 'us-central1'). Default: 'us-east-1'"
243                    },
244                    "detailed": {
245                        "type": "boolean",
246                        "description": "Show detailed per-workload breakdown (default: false)"
247                    },
248                    "compare_period": {
249                        "type": "string",
250                        "description": "Compare with historical period for trend analysis (e.g., '7d', '30d')"
251                    },
252                    "cluster": {
253                        "type": "string",
254                        "description": "Connect to a Kubernetes cluster for live cost analysis (kubeconfig context name)"
255                    },
256                    "prometheus": {
257                        "type": "string",
258                        "description": "Prometheus URL for historical cost metrics (e.g., 'http://prometheus:9090')"
259                    }
260                }
261            }),
262        }
263    }
264
265    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
266        // First, analyze the manifests to get resource information
267        let path = args.path.as_deref().unwrap_or(".");
268        let full_path = if std::path::Path::new(path).is_absolute() {
269            PathBuf::from(path)
270        } else {
271            self.project_root.join(path)
272        };
273
274        // Edge case: Path not found
275        if !full_path.exists() {
276            return Ok(format_error_for_llm(
277                "k8s_costs",
278                ErrorCategory::FileNotFound,
279                &format!("Path not found: {}", full_path.display()),
280                Some(vec![
281                    "Check if the path is correct",
282                    "Common locations: k8s/, manifests/, deploy/, kubernetes/",
283                    "Use list_directory to explore available paths",
284                    "Use k8s_optimize for resource analysis first",
285                ]),
286            ));
287        }
288
289        // Edge case: Check if directory is empty (no files)
290        if full_path.is_dir() {
291            let has_files = std::fs::read_dir(&full_path)
292                .map(|entries| entries.filter_map(|e| e.ok()).next().is_some())
293                .unwrap_or(false);
294
295            if !has_files {
296                return Ok(format_error_for_llm(
297                    "k8s_costs",
298                    ErrorCategory::ValidationFailed,
299                    &format!("Directory is empty: {}", full_path.display()),
300                    Some(vec![
301                        "The directory contains no files to analyze",
302                        "Check if K8s manifests exist in a subdirectory",
303                        "Use list_directory to explore the project structure",
304                    ]),
305                ));
306            }
307        }
308
309        // Run static analysis first
310        let config = K8sOptimizeConfig::default();
311        let analysis_result = analyze(&full_path, &config);
312
313        // Edge case: No K8s manifests found (empty recommendations)
314        if analysis_result.recommendations.is_empty() && analysis_result.warnings.is_empty() {
315            return Ok(format_error_for_llm(
316                "k8s_costs",
317                ErrorCategory::ValidationFailed,
318                &format!("No Kubernetes manifests found in: {}", full_path.display()),
319                Some(vec![
320                    "Ensure the path contains .yaml or .yml files",
321                    "K8s manifests should define Deployment, StatefulSet, or Pod resources",
322                    "Try specifying a more specific path (e.g., 'k8s/deployments/')",
323                    "Use kubelint to validate manifest structure",
324                ]),
325            ));
326        }
327
328        // Calculate costs from recommendations
329        let provider = self.parse_provider(args.cloud_provider.as_deref().unwrap_or("aws"));
330        let region = args
331            .region
332            .clone()
333            .unwrap_or_else(|| "us-east-1".to_string());
334
335        let cost_estimation =
336            calculate_from_static(&analysis_result.recommendations, provider, &region);
337
338        // Edge case: No cost data available (no workloads with resource requests)
339        if cost_estimation.workload_costs.is_empty() {
340            return Ok(format_error_for_llm(
341                "k8s_costs",
342                ErrorCategory::ValidationFailed,
343                "No cost data available - workloads have no resource requests defined",
344                Some(vec![
345                    "Ensure Deployments/StatefulSets have resource requests specified",
346                    "Add resources.requests.cpu and resources.requests.memory to containers",
347                    "Use k8s_optimize to get resource recommendation suggestions",
348                ]),
349            ));
350        }
351
352        // Format for agent
353        let output = self.format_for_agent(&cost_estimation, &args);
354        Ok(serde_json::to_string_pretty(&output).unwrap_or_else(|_| "{}".to_string()))
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn test_tool_name() {
364        assert_eq!(K8sCostsTool::NAME, "k8s_costs");
365    }
366
367    #[test]
368    fn test_parse_provider() {
369        let tool = K8sCostsTool::new(PathBuf::from("."));
370
371        assert!(matches!(tool.parse_provider("aws"), CloudProvider::Aws));
372        assert!(matches!(tool.parse_provider("AWS"), CloudProvider::Aws));
373        assert!(matches!(tool.parse_provider("gcp"), CloudProvider::Gcp));
374        assert!(matches!(tool.parse_provider("azure"), CloudProvider::Azure));
375        assert!(matches!(
376            tool.parse_provider("onprem"),
377            CloudProvider::OnPrem
378        ));
379        assert!(matches!(
380            tool.parse_provider("on-prem"),
381            CloudProvider::OnPrem
382        ));
383        assert!(matches!(tool.parse_provider("unknown"), CloudProvider::Aws)); // Default
384    }
385
386    #[tokio::test]
387    async fn test_definition() {
388        let tool = K8sCostsTool::new(PathBuf::from("."));
389        let def = tool.definition("".to_string()).await;
390
391        assert_eq!(def.name, "k8s_costs");
392        assert!(def.description.contains("cost"));
393    }
394
395    #[tokio::test]
396    async fn test_path_not_found_error() {
397        let tool = K8sCostsTool::new(PathBuf::from("/tmp/test-k8s-costs-nonexistent"));
398        let args = K8sCostsArgs {
399            path: Some("nonexistent/path".to_string()),
400            namespace: None,
401            by_label: None,
402            cloud_provider: None,
403            region: None,
404            detailed: false,
405            compare_period: None,
406            cluster: None,
407            prometheus: None,
408        };
409        let result = tool.call(args).await.unwrap();
410
411        // Verify it returns structured error JSON
412        assert!(result.contains("FILE_NOT_FOUND") || result.contains("error"));
413        assert!(result.contains("suggestions"));
414        assert!(result.contains("Path not found"));
415    }
416
417    #[test]
418    fn test_provider_case_insensitivity() {
419        let tool = K8sCostsTool::new(PathBuf::from("."));
420
421        // Test uppercase
422        assert!(matches!(tool.parse_provider("AWS"), CloudProvider::Aws));
423        assert!(matches!(tool.parse_provider("GCP"), CloudProvider::Gcp));
424        assert!(matches!(tool.parse_provider("AZURE"), CloudProvider::Azure));
425        assert!(matches!(
426            tool.parse_provider("ONPREM"),
427            CloudProvider::OnPrem
428        ));
429
430        // Test mixed case
431        assert!(matches!(tool.parse_provider("Aws"), CloudProvider::Aws));
432        assert!(matches!(tool.parse_provider("Gcp"), CloudProvider::Gcp));
433        assert!(matches!(tool.parse_provider("Azure"), CloudProvider::Azure));
434        assert!(matches!(
435            tool.parse_provider("OnPrem"),
436            CloudProvider::OnPrem
437        ));
438
439        // Test lowercase
440        assert!(matches!(tool.parse_provider("aws"), CloudProvider::Aws));
441        assert!(matches!(tool.parse_provider("gcp"), CloudProvider::Gcp));
442        assert!(matches!(tool.parse_provider("azure"), CloudProvider::Azure));
443        assert!(matches!(
444            tool.parse_provider("onprem"),
445            CloudProvider::OnPrem
446        ));
447
448        // Test alternative formats
449        assert!(matches!(
450            tool.parse_provider("on-prem"),
451            CloudProvider::OnPrem
452        ));
453        assert!(matches!(
454            tool.parse_provider("on_prem"),
455            CloudProvider::OnPrem
456        ));
457        assert!(matches!(
458            tool.parse_provider("ON-PREM"),
459            CloudProvider::OnPrem
460        ));
461    }
462}