syncable_cli/agent/tools/
k8s_costs.rs

1//! K8s Costs tool - Cost attribution and analysis for Kubernetes workloads
2//!
3//! Provides cost estimation, attribution by namespace/label, and trend analysis
4//! to help with cloud cost optimization decisions.
5//!
6//! Output is optimized for AI agent decision-making with:
7//! - Cost breakdowns by namespace, workload, and resource type
8//! - Historical trends and anomaly detection
9//! - Actionable cost reduction recommendations
10
11use rig::completion::ToolDefinition;
12use rig::tool::Tool;
13use serde::{Deserialize, Serialize};
14use serde_json::json;
15use std::path::PathBuf;
16
17use crate::analyzer::k8s_optimize::{
18    CloudProvider, CostEstimation, K8sOptimizeConfig, analyze, calculate_from_static,
19};
20
21/// Arguments for the k8s-costs tool
22#[derive(Debug, Deserialize)]
23pub struct K8sCostsArgs {
24    /// Path to K8s manifest file or directory (relative to project root)
25    #[serde(default)]
26    pub path: Option<String>,
27
28    /// Filter by namespace
29    #[serde(default)]
30    pub namespace: Option<String>,
31
32    /// Group costs by label (e.g., "app", "team", "environment")
33    #[serde(default)]
34    pub by_label: Option<String>,
35
36    /// Cloud provider for pricing: "aws", "gcp", "azure", "onprem"
37    #[serde(default)]
38    pub cloud_provider: Option<String>,
39
40    /// Cloud region for pricing (e.g., "us-east-1", "us-central1")
41    #[serde(default)]
42    pub region: Option<String>,
43
44    /// Show detailed breakdown per workload
45    #[serde(default)]
46    pub detailed: bool,
47
48    /// Compare with another period (e.g., "7d", "30d") - for trend analysis
49    #[serde(default)]
50    pub compare_period: Option<String>,
51
52    // ========== Live Cluster Options ==========
53    /// Connect to a Kubernetes cluster (kubeconfig context name)
54    #[serde(default)]
55    pub cluster: Option<String>,
56
57    /// Prometheus URL for historical cost data
58    #[serde(default)]
59    pub prometheus: Option<String>,
60}
61
62/// Error type for k8s-costs tool
63#[derive(Debug, thiserror::Error)]
64#[error("K8s costs error: {0}")]
65pub struct K8sCostsError(String);
66
67/// Tool for analyzing Kubernetes workload costs
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct K8sCostsTool {
70    project_root: PathBuf,
71}
72
73impl K8sCostsTool {
74    /// Create a new K8sCostsTool with the given project root.
75    pub fn new(project_root: PathBuf) -> Self {
76        Self { project_root }
77    }
78
79    /// Parse cloud provider from string.
80    fn parse_provider(&self, provider: &str) -> CloudProvider {
81        match provider.to_lowercase().as_str() {
82            "aws" => CloudProvider::Aws,
83            "gcp" => CloudProvider::Gcp,
84            "azure" => CloudProvider::Azure,
85            "onprem" | "on-prem" | "on_prem" => CloudProvider::OnPrem,
86            _ => CloudProvider::Aws, // Default to AWS
87        }
88    }
89
90    /// Format cost estimation for agent consumption.
91    fn format_for_agent(
92        &self,
93        estimation: &CostEstimation,
94        args: &K8sCostsArgs,
95    ) -> serde_json::Value {
96        let mut response = json!({
97            "summary": {
98                "monthly_waste_cost_usd": estimation.monthly_waste_cost,
99                "annual_waste_cost_usd": estimation.annual_waste_cost,
100                "monthly_savings_usd": estimation.monthly_savings,
101                "annual_savings_usd": estimation.annual_savings,
102                "workload_count": estimation.workload_costs.len(),
103                "cloud_provider": format!("{:?}", estimation.provider),
104                "region": estimation.region.clone(),
105                "currency": estimation.currency.clone(),
106            },
107            "breakdown": {
108                "cpu_waste_cost_usd": estimation.breakdown.cpu_cost,
109                "memory_waste_cost_usd": estimation.breakdown.memory_cost,
110            },
111            "workloads": estimation.workload_costs.iter().map(|w| {
112                json!({
113                    "name": w.workload_name,
114                    "namespace": w.namespace,
115                    "monthly_waste_cost_usd": w.monthly_cost,
116                    "potential_savings_usd": w.monthly_savings,
117                })
118            }).collect::<Vec<_>>(),
119        });
120
121        // Add namespace grouping if requested
122        if args.namespace.is_some() || args.by_label.is_some() {
123            let mut namespace_costs: std::collections::HashMap<String, f64> =
124                std::collections::HashMap::new();
125            for workload in &estimation.workload_costs {
126                *namespace_costs
127                    .entry(workload.namespace.clone())
128                    .or_insert(0.0) += workload.monthly_cost;
129            }
130            response["by_namespace"] = json!(namespace_costs);
131        }
132
133        // Add recommendations for cost reduction
134        let mut recommendations: Vec<serde_json::Value> = Vec::new();
135
136        // Find top cost workloads
137        let mut sorted_workloads = estimation.workload_costs.clone();
138        sorted_workloads.sort_by(|a, b| {
139            b.monthly_cost
140                .partial_cmp(&a.monthly_cost)
141                .unwrap_or(std::cmp::Ordering::Equal)
142        });
143
144        let total_waste = estimation.monthly_waste_cost;
145        if let Some(top) = sorted_workloads.first() {
146            if total_waste > 0.0 && top.monthly_cost > total_waste * 0.3 {
147                recommendations.push(json!({
148                    "type": "high_waste_workload",
149                    "workload": top.workload_name,
150                    "namespace": top.namespace,
151                    "waste_cost_usd": top.monthly_cost,
152                    "percentage": (top.monthly_cost / total_waste * 100.0).round(),
153                    "message": format!("{} accounts for over 30% of total waste. Consider optimization.", top.workload_name),
154                }));
155            }
156        }
157
158        // Check for cost imbalance (CPU vs Memory)
159        if estimation.breakdown.cpu_cost > estimation.breakdown.memory_cost * 3.0 {
160            recommendations.push(json!({
161                "type": "cpu_heavy",
162                "message": "CPU waste is significantly higher than memory waste. Consider if workloads are CPU over-provisioned.",
163                "cpu_waste_cost_usd": estimation.breakdown.cpu_cost,
164                "memory_waste_cost_usd": estimation.breakdown.memory_cost,
165            }));
166        }
167
168        if !recommendations.is_empty() {
169            response["recommendations"] = json!(recommendations);
170        }
171
172        // Add analysis metadata
173        response["analysis"] = json!({
174            "mode": if args.cluster.is_some() { "live" } else { "static" },
175            "path": args.path.clone().unwrap_or_else(|| ".".to_string()),
176            "pricing_note": "Estimates based on on-demand pricing. Actual costs may vary with reserved instances, spot pricing, or enterprise discounts.",
177        });
178
179        response
180    }
181}
182
183impl Tool for K8sCostsTool {
184    const NAME: &'static str = "k8s_costs";
185
186    type Args = K8sCostsArgs;
187    type Output = String;
188    type Error = K8sCostsError;
189
190    async fn definition(&self, _prompt: String) -> ToolDefinition {
191        ToolDefinition {
192            name: Self::NAME.to_string(),
193            description: r#"Analyze Kubernetes workload costs and waste.
194
195**IMPORTANT: Only use this tool when the user EXPLICITLY asks about:**
196- Cloud costs for Kubernetes
197- Cost attribution or cost breakdown
198- How much resources cost or waste
199- Budget/spending analysis for K8s
200- Which workloads cost the most
201
202**DO NOT use this tool for:**
203- General Kubernetes linting (use kubelint)
204- Resource optimization analysis (use k8s_optimize)
205- Any task where user didn't ask about costs/spending/budget
206
207## What It Does
208Estimates monthly cloud costs based on resource requests, shows cost breakdown by namespace/workload, and identifies wasted spend.
209
210## Supported Providers
211- aws, gcp, azure, onprem
212
213## Returns (analysis only - does NOT apply changes)
214- Monthly/annual waste cost estimates
215- Cost breakdown by CPU/memory
216- Per-workload cost attribution
217- Does NOT automatically modify anything"#.to_string(),
218            parameters: json!({
219                "type": "object",
220                "properties": {
221                    "path": {
222                        "type": "string",
223                        "description": "Path to K8s manifest file or directory (relative to project root). Examples: 'k8s/', 'deployments/'"
224                    },
225                    "namespace": {
226                        "type": "string",
227                        "description": "Filter costs by namespace"
228                    },
229                    "by_label": {
230                        "type": "string",
231                        "description": "Group costs by label key (e.g., 'app', 'team', 'environment')"
232                    },
233                    "cloud_provider": {
234                        "type": "string",
235                        "description": "Cloud provider for pricing: 'aws', 'gcp', 'azure', 'onprem'. Default: 'aws'",
236                        "enum": ["aws", "gcp", "azure", "onprem"]
237                    },
238                    "region": {
239                        "type": "string",
240                        "description": "Cloud region for pricing (e.g., 'us-east-1', 'us-central1'). Default: 'us-east-1'"
241                    },
242                    "detailed": {
243                        "type": "boolean",
244                        "description": "Show detailed per-workload breakdown (default: false)"
245                    },
246                    "compare_period": {
247                        "type": "string",
248                        "description": "Compare with historical period for trend analysis (e.g., '7d', '30d')"
249                    },
250                    "cluster": {
251                        "type": "string",
252                        "description": "Connect to a Kubernetes cluster for live cost analysis (kubeconfig context name)"
253                    },
254                    "prometheus": {
255                        "type": "string",
256                        "description": "Prometheus URL for historical cost metrics (e.g., 'http://prometheus:9090')"
257                    }
258                }
259            }),
260        }
261    }
262
263    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
264        // First, analyze the manifests to get resource information
265        let path = args.path.as_deref().unwrap_or(".");
266        let full_path = if std::path::Path::new(path).is_absolute() {
267            PathBuf::from(path)
268        } else {
269            self.project_root.join(path)
270        };
271
272        if !full_path.exists() {
273            return Err(K8sCostsError(format!(
274                "Path not found: {}",
275                full_path.display()
276            )));
277        }
278
279        // Run static analysis first
280        let config = K8sOptimizeConfig::default();
281        let analysis_result = analyze(&full_path, &config);
282
283        // Calculate costs from recommendations
284        let provider = self.parse_provider(args.cloud_provider.as_deref().unwrap_or("aws"));
285        let region = args
286            .region
287            .clone()
288            .unwrap_or_else(|| "us-east-1".to_string());
289
290        let cost_estimation =
291            calculate_from_static(&analysis_result.recommendations, provider, &region);
292
293        // Format for agent
294        let output = self.format_for_agent(&cost_estimation, &args);
295        Ok(serde_json::to_string_pretty(&output).unwrap_or_else(|_| "{}".to_string()))
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    #[test]
304    fn test_tool_name() {
305        assert_eq!(K8sCostsTool::NAME, "k8s_costs");
306    }
307
308    #[test]
309    fn test_parse_provider() {
310        let tool = K8sCostsTool::new(PathBuf::from("."));
311
312        assert!(matches!(tool.parse_provider("aws"), CloudProvider::Aws));
313        assert!(matches!(tool.parse_provider("AWS"), CloudProvider::Aws));
314        assert!(matches!(tool.parse_provider("gcp"), CloudProvider::Gcp));
315        assert!(matches!(tool.parse_provider("azure"), CloudProvider::Azure));
316        assert!(matches!(
317            tool.parse_provider("onprem"),
318            CloudProvider::OnPrem
319        ));
320        assert!(matches!(
321            tool.parse_provider("on-prem"),
322            CloudProvider::OnPrem
323        ));
324        assert!(matches!(tool.parse_provider("unknown"), CloudProvider::Aws)); // Default
325    }
326
327    #[tokio::test]
328    async fn test_definition() {
329        let tool = K8sCostsTool::new(PathBuf::from("."));
330        let def = tool.definition("".to_string()).await;
331
332        assert_eq!(def.name, "k8s_costs");
333        assert!(def.description.contains("cost"));
334    }
335}