syncable_cli/agent/tools/
k8s_drift.rs

1//! K8s Drift tool - Detect configuration drift between manifests and live cluster
2//!
3//! Compares declared Kubernetes manifests against the live cluster state to identify
4//! resource drift, especially in CPU/memory limits and requests.
5//!
6//! Output is optimized for AI agent decision-making with:
7//! - Clear drift detection results
8//! - Resource-specific differences
9//! - Remediation suggestions
10
11use rig::completion::ToolDefinition;
12use rig::tool::Tool;
13use serde::{Deserialize, Serialize};
14use serde_json::json;
15use std::path::PathBuf;
16
17use crate::analyzer::k8s_optimize::{K8sOptimizeConfig, analyze};
18
19/// Arguments for the k8s-drift tool
20#[derive(Debug, Deserialize)]
21pub struct K8sDriftArgs {
22    /// Path to K8s manifest file or directory (relative to project root)
23    pub path: String,
24
25    /// Kubernetes cluster context name (from kubeconfig)
26    #[serde(default)]
27    pub cluster: Option<String>,
28
29    /// Filter by namespace
30    #[serde(default)]
31    pub namespace: Option<String>,
32
33    /// Only check resource fields (requests/limits)
34    #[serde(default)]
35    pub resources_only: bool,
36
37    /// Include all fields in diff, not just resource-related
38    #[serde(default)]
39    pub full_diff: bool,
40
41    /// Output format: "summary", "detailed", "remediation"
42    #[serde(default)]
43    pub output_format: Option<String>,
44}
45
46/// Error type for k8s-drift tool
47#[derive(Debug, thiserror::Error)]
48#[error("K8s drift error: {0}")]
49pub struct K8sDriftError(String);
50
51/// Represents a single drift item
52#[derive(Debug, Clone, Serialize)]
53pub struct DriftItem {
54    pub resource_kind: String,
55    pub resource_name: String,
56    pub namespace: String,
57    pub container: Option<String>,
58    pub field: String,
59    pub declared_value: Option<String>,
60    pub actual_value: Option<String>,
61    pub drift_type: DriftType,
62    pub severity: DriftSeverity,
63}
64
65/// Type of drift detected
66#[derive(Debug, Clone, Serialize)]
67#[serde(rename_all = "snake_case")]
68pub enum DriftType {
69    /// Value differs between manifest and cluster
70    ValueChanged,
71    /// Field exists in manifest but not in cluster
72    MissingInCluster,
73    /// Field exists in cluster but not in manifest
74    ExtraInCluster,
75    /// Resource exists in manifest but not in cluster
76    ResourceMissing,
77    /// Resource exists in cluster but not in manifest
78    ResourceExtra,
79}
80
81/// Severity of the drift
82#[derive(Debug, Clone, Serialize)]
83#[serde(rename_all = "lowercase")]
84pub enum DriftSeverity {
85    Critical,
86    High,
87    Medium,
88    Low,
89    Info,
90}
91
92/// Tool for detecting Kubernetes configuration drift
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct K8sDriftTool {
95    project_root: PathBuf,
96}
97
98impl K8sDriftTool {
99    /// Create a new K8sDriftTool with the given project root.
100    pub fn new(project_root: PathBuf) -> Self {
101        Self { project_root }
102    }
103
104    /// Analyze manifests and detect drift (static analysis placeholder).
105    ///
106    /// In production, this would connect to the cluster and compare.
107    /// For now, it analyzes manifests and prepares a drift detection structure.
108    fn analyze_drift(&self, args: &K8sDriftArgs) -> Result<Vec<DriftItem>, K8sDriftError> {
109        let path = &args.path;
110        let full_path = if std::path::Path::new(path).is_absolute() {
111            PathBuf::from(path)
112        } else {
113            self.project_root.join(path)
114        };
115
116        if !full_path.exists() {
117            return Err(K8sDriftError(format!(
118                "Path not found: {}",
119                full_path.display()
120            )));
121        }
122
123        // Run static analysis to understand what's declared
124        let config = K8sOptimizeConfig::default();
125        let result = analyze(&full_path, &config);
126
127        // Without live cluster connection, we can only report what we'd check
128        // This is a placeholder - full implementation requires kube-rs integration
129        let mut drift_items: Vec<DriftItem> = Vec::new();
130
131        // If no cluster specified, return info about what would be checked
132        if args.cluster.is_none() {
133            // Add informational items about what resources exist in manifests
134            for rec in &result.recommendations {
135                // These aren't real drifts, but they indicate what we'd compare
136                drift_items.push(DriftItem {
137                    resource_kind: rec.resource_kind.clone(),
138                    resource_name: rec.resource_name.clone(),
139                    namespace: rec
140                        .namespace
141                        .clone()
142                        .unwrap_or_else(|| "default".to_string()),
143                    container: Some(rec.container.clone()),
144                    field: "resources".to_string(),
145                    declared_value: Some(format!(
146                        "cpu_req={}, mem_req={}",
147                        rec.current.cpu_request.as_deref().unwrap_or("none"),
148                        rec.current.memory_request.as_deref().unwrap_or("none")
149                    )),
150                    actual_value: None, // Would be populated with cluster data
151                    drift_type: DriftType::ValueChanged,
152                    severity: DriftSeverity::Info,
153                });
154            }
155        }
156
157        Ok(drift_items)
158    }
159
160    /// Format drift results for agent consumption.
161    fn format_for_agent(
162        &self,
163        drift_items: &[DriftItem],
164        args: &K8sDriftArgs,
165    ) -> serde_json::Value {
166        let cluster_connected = args.cluster.is_some();
167
168        // Group by severity
169        let critical_count = drift_items
170            .iter()
171            .filter(|d| matches!(d.severity, DriftSeverity::Critical))
172            .count();
173        let high_count = drift_items
174            .iter()
175            .filter(|d| matches!(d.severity, DriftSeverity::High))
176            .count();
177        let medium_count = drift_items
178            .iter()
179            .filter(|d| matches!(d.severity, DriftSeverity::Medium))
180            .count();
181        let low_count = drift_items
182            .iter()
183            .filter(|d| matches!(d.severity, DriftSeverity::Low))
184            .count();
185        let info_count = drift_items
186            .iter()
187            .filter(|d| matches!(d.severity, DriftSeverity::Info))
188            .count();
189
190        let mut response = json!({
191            "summary": {
192                "total_drifts": drift_items.len(),
193                "critical": critical_count,
194                "high": high_count,
195                "medium": medium_count,
196                "low": low_count,
197                "info": info_count,
198                "cluster_connected": cluster_connected,
199                "path_analyzed": args.path,
200            },
201        });
202
203        if cluster_connected {
204            response["drifts"] = json!(drift_items.iter().map(|d| {
205                json!({
206                    "resource": format!("{}/{}", d.resource_kind, d.resource_name),
207                    "namespace": d.namespace,
208                    "container": d.container,
209                    "field": d.field,
210                    "drift_type": d.drift_type,
211                    "severity": d.severity,
212                    "declared": d.declared_value,
213                    "actual": d.actual_value,
214                    "remediation": match d.drift_type {
215                        DriftType::ValueChanged => "Update manifest or apply kubectl to sync",
216                        DriftType::MissingInCluster => "Apply manifest with kubectl apply",
217                        DriftType::ExtraInCluster => "Remove from cluster or add to manifest",
218                        DriftType::ResourceMissing => "Deploy resource with kubectl apply",
219                        DriftType::ResourceExtra => "Consider adding to version control",
220                    },
221                })
222            }).collect::<Vec<_>>());
223        } else {
224            // Without cluster connection, provide guidance
225            response["status"] = json!("no_cluster_connection");
226            response["message"] = json!(
227                "No cluster context specified. To detect actual drift, provide a cluster name. \
228                 Currently showing resources that would be checked."
229            );
230            response["resources_to_check"] = json!(
231                drift_items
232                    .iter()
233                    .map(|d| {
234                        json!({
235                            "resource": format!("{}/{}", d.resource_kind, d.resource_name),
236                            "namespace": d.namespace,
237                            "container": d.container,
238                            "declared_resources": d.declared_value,
239                        })
240                    })
241                    .collect::<Vec<_>>()
242            );
243            response["next_steps"] = json!([
244                "Specify 'cluster' parameter with your kubeconfig context name",
245                "Run: kubectl config get-contexts to see available contexts",
246                "Example: k8s_drift with cluster='my-cluster-context'",
247            ]);
248        }
249
250        // Add remediation commands if drifts found
251        if cluster_connected && !drift_items.is_empty() {
252            let mut commands: Vec<String> = Vec::new();
253
254            // Generate kubectl commands for remediation
255            for drift in drift_items
256                .iter()
257                .filter(|d| matches!(d.severity, DriftSeverity::Critical | DriftSeverity::High))
258            {
259                match drift.drift_type {
260                    DriftType::ValueChanged | DriftType::MissingInCluster => {
261                        commands.push(format!(
262                            "kubectl apply -f {} -n {}",
263                            args.path, drift.namespace
264                        ));
265                    }
266                    DriftType::ResourceMissing => {
267                        commands.push(format!(
268                            "kubectl apply -f {} -n {}",
269                            args.path, drift.namespace
270                        ));
271                    }
272                    _ => {}
273                }
274            }
275
276            if !commands.is_empty() {
277                // Deduplicate commands
278                commands.sort();
279                commands.dedup();
280                response["remediation_commands"] = json!(commands);
281            }
282        }
283
284        response
285    }
286}
287
288impl Tool for K8sDriftTool {
289    const NAME: &'static str = "k8s_drift";
290
291    type Args = K8sDriftArgs;
292    type Output = String;
293    type Error = K8sDriftError;
294
295    async fn definition(&self, _prompt: String) -> ToolDefinition {
296        ToolDefinition {
297            name: Self::NAME.to_string(),
298            description: r#"Detect configuration drift between Kubernetes manifests and live cluster.
299
300**IMPORTANT: Only use this tool when the user EXPLICITLY asks about:**
301- Drift detection between manifests and cluster
302- What's different between declared and actual state
303- GitOps compliance or sync status
304- Whether manifests match what's running
305
306**DO NOT use this tool for:**
307- General Kubernetes linting (use kubelint)
308- Resource optimization (use k8s_optimize)
309- Cost analysis (use k8s_costs)
310- Any task where user didn't ask about drift/sync/compliance
311
312## What It Does
313Compares manifest files against live cluster state (when cluster is connected) to find differences in resource configurations.
314
315## Returns (analysis only - does NOT apply changes)
316- Summary of drift counts by severity
317- Per-resource drift information
318- Suggested remediation commands
319- Does NOT automatically sync or modify anything"#.to_string(),
320            parameters: json!({
321                "type": "object",
322                "properties": {
323                    "path": {
324                        "type": "string",
325                        "description": "Path to K8s manifest file or directory (required)"
326                    },
327                    "cluster": {
328                        "type": "string",
329                        "description": "Kubernetes cluster context name (from kubeconfig). Required for actual drift detection."
330                    },
331                    "namespace": {
332                        "type": "string",
333                        "description": "Filter drift detection to specific namespace"
334                    },
335                    "resources_only": {
336                        "type": "boolean",
337                        "description": "Only check resource requests/limits fields (default: false)"
338                    },
339                    "full_diff": {
340                        "type": "boolean",
341                        "description": "Include all fields in comparison, not just resources (default: false)"
342                    },
343                    "output_format": {
344                        "type": "string",
345                        "description": "Output format: 'summary', 'detailed', 'remediation'",
346                        "enum": ["summary", "detailed", "remediation"]
347                    }
348                },
349                "required": ["path"]
350            }),
351        }
352    }
353
354    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
355        let drift_items = self.analyze_drift(&args)?;
356        let output = self.format_for_agent(&drift_items, &args);
357        Ok(serde_json::to_string_pretty(&output).unwrap_or_else(|_| "{}".to_string()))
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn test_tool_name() {
367        assert_eq!(K8sDriftTool::NAME, "k8s_drift");
368    }
369
370    #[test]
371    fn test_drift_type_serialization() {
372        let drift = DriftItem {
373            resource_kind: "Deployment".to_string(),
374            resource_name: "my-app".to_string(),
375            namespace: "default".to_string(),
376            container: Some("app".to_string()),
377            field: "resources.limits.cpu".to_string(),
378            declared_value: Some("500m".to_string()),
379            actual_value: Some("1000m".to_string()),
380            drift_type: DriftType::ValueChanged,
381            severity: DriftSeverity::High,
382        };
383
384        let json = serde_json::to_string(&drift).unwrap();
385        assert!(json.contains("value_changed"));
386        assert!(json.contains("high"));
387    }
388
389    #[tokio::test]
390    async fn test_definition() {
391        let tool = K8sDriftTool::new(PathBuf::from("."));
392        let def = tool.definition("".to_string()).await;
393
394        assert_eq!(def.name, "k8s_drift");
395        assert!(def.description.contains("drift"));
396    }
397
398    #[tokio::test]
399    async fn test_no_cluster_output() {
400        let tool = K8sDriftTool::new(PathBuf::from("."));
401
402        // Without cluster, should return guidance
403        let args = K8sDriftArgs {
404            path: ".".to_string(),
405            cluster: None,
406            namespace: None,
407            resources_only: false,
408            full_diff: false,
409            output_format: None,
410        };
411
412        let result = tool.call(args).await.unwrap();
413        let json: serde_json::Value = serde_json::from_str(&result).unwrap();
414
415        assert_eq!(json["status"], "no_cluster_connection");
416        assert!(json["next_steps"].is_array());
417    }
418}