Skip to main content

syncable_cli/agent/tools/
output_store.rs

1//! RAG Storage Layer for Tool Outputs
2//!
3//! Stores full tool outputs to disk for later retrieval by the agent.
4//! Implements the storage part of the RAG (Retrieval-Augmented Generation) pattern.
5//!
6//! ## Session Tracking
7//!
8//! All stored outputs are tracked in a session registry, so the agent always knows
9//! what data is available for retrieval. Every compressed output includes the full
10//! list of available refs.
11
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14use std::fs;
15use std::path::PathBuf;
16use std::sync::Mutex;
17use std::time::{SystemTime, UNIX_EPOCH};
18
19/// Directory where outputs are stored
20const OUTPUT_DIR: &str = "/tmp/syncable-cli/outputs";
21
22/// Maximum age of stored outputs in seconds (1 hour)
23const MAX_AGE_SECS: u64 = 3600;
24
25/// Session registry entry - tracks what's available for retrieval
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct SessionRef {
28    /// Reference ID for retrieval
29    pub ref_id: String,
30    /// Tool that generated this output
31    pub tool: String,
32    /// What this output contains (brief description)
33    pub contains: String,
34    /// Summary counts (e.g., "47 issues: 3 critical, 12 high")
35    pub summary: String,
36    /// Timestamp when stored
37    pub timestamp: u64,
38    /// Size in bytes
39    pub size_bytes: usize,
40}
41
42/// Global session registry - tracks all stored outputs in current session
43static SESSION_REGISTRY: Mutex<Vec<SessionRef>> = Mutex::new(Vec::new());
44
45/// Register a new output in the session registry
46pub fn register_session_ref(
47    ref_id: &str,
48    tool: &str,
49    contains: &str,
50    summary: &str,
51    size_bytes: usize,
52) {
53    if let Ok(mut registry) = SESSION_REGISTRY.lock() {
54        // Remove any existing entry for this ref_id (in case of re-runs)
55        registry.retain(|r| r.ref_id != ref_id);
56
57        registry.push(SessionRef {
58            ref_id: ref_id.to_string(),
59            tool: tool.to_string(),
60            contains: contains.to_string(),
61            summary: summary.to_string(),
62            timestamp: SystemTime::now()
63                .duration_since(UNIX_EPOCH)
64                .map(|d| d.as_secs())
65                .unwrap_or(0),
66            size_bytes,
67        });
68    }
69}
70
71/// Get all session refs for inclusion in compressed outputs
72pub fn get_session_refs() -> Vec<SessionRef> {
73    SESSION_REGISTRY
74        .lock()
75        .map(|r| r.clone())
76        .unwrap_or_default()
77}
78
79/// Clear old entries from session registry (called periodically)
80pub fn cleanup_session_registry() {
81    let now = SystemTime::now()
82        .duration_since(UNIX_EPOCH)
83        .map(|d| d.as_secs())
84        .unwrap_or(0);
85
86    if let Ok(mut registry) = SESSION_REGISTRY.lock() {
87        registry.retain(|r| now - r.timestamp < MAX_AGE_SECS);
88    }
89}
90
91/// Format session refs as a user-friendly string for the agent
92pub fn format_session_refs_for_agent() -> String {
93    let refs = get_session_refs();
94
95    if refs.is_empty() {
96        return String::new();
97    }
98
99    let mut output = String::from("\nšŸ“¦ AVAILABLE DATA FOR RETRIEVAL:\n");
100    output.push_str("─────────────────────────────────\n");
101
102    for r in &refs {
103        let age = SystemTime::now()
104            .duration_since(UNIX_EPOCH)
105            .map(|d| d.as_secs())
106            .unwrap_or(0)
107            .saturating_sub(r.timestamp);
108
109        let age_str = if age < 60 {
110            format!("{}s ago", age)
111        } else {
112            format!("{}m ago", age / 60)
113        };
114
115        output.push_str(&format!(
116            "\n• {} [{}]\n  Contains: {}\n  Summary: {}\n  Retrieve: retrieve_output(\"{}\") or with query\n",
117            r.ref_id, age_str, r.contains, r.summary, r.ref_id
118        ));
119    }
120
121    output.push_str("\n─────────────────────────────────\n");
122    output.push_str(
123        "Query examples: \"severity:critical\", \"file:deployment.yaml\", \"code:DL3008\"\n",
124    );
125
126    output
127}
128
129/// Generate a short unique reference ID
130fn generate_ref_id() -> String {
131    let timestamp = SystemTime::now()
132        .duration_since(UNIX_EPOCH)
133        .map(|d| d.as_millis())
134        .unwrap_or(0);
135
136    // Use last 8 chars of timestamp + random suffix
137    let ts_part = format!("{:x}", timestamp)
138        .chars()
139        .rev()
140        .take(6)
141        .collect::<String>();
142    let rand_part: String = (0..4)
143        .map(|_| {
144            let idx = (timestamp as usize + rand_simple()) % 36;
145            "abcdefghijklmnopqrstuvwxyz0123456789"
146                .chars()
147                .nth(idx)
148                .unwrap()
149        })
150        .collect();
151
152    format!("{}_{}", ts_part, rand_part)
153}
154
155/// Simple pseudo-random number (no external deps)
156fn rand_simple() -> usize {
157    let ptr = Box::into_raw(Box::new(0u8));
158    let addr = ptr as usize;
159    unsafe { drop(Box::from_raw(ptr)) };
160    addr.wrapping_mul(1103515245).wrapping_add(12345) % (1 << 31)
161}
162
163/// Ensure output directory exists
164fn ensure_output_dir() -> std::io::Result<PathBuf> {
165    let path = PathBuf::from(OUTPUT_DIR);
166    if !path.exists() {
167        fs::create_dir_all(&path)?;
168    }
169    Ok(path)
170}
171
172/// Store output to disk and return reference ID
173///
174/// # Arguments
175/// * `output` - The JSON value to store
176/// * `tool_name` - Name of the tool (used as prefix in ref_id)
177///
178/// # Returns
179/// Reference ID that can be used to retrieve the output later
180pub fn store_output(output: &Value, tool_name: &str) -> String {
181    let ref_id = format!("{}_{}", tool_name, generate_ref_id());
182
183    if let Ok(dir) = ensure_output_dir() {
184        let path = dir.join(format!("{}.json", ref_id));
185
186        // Store with metadata
187        let stored = serde_json::json!({
188            "ref_id": ref_id,
189            "tool": tool_name,
190            "timestamp": SystemTime::now()
191                .duration_since(UNIX_EPOCH)
192                .map(|d| d.as_secs())
193                .unwrap_or(0),
194            "data": output
195        });
196
197        if let Ok(json_str) = serde_json::to_string(&stored) {
198            let _ = fs::write(&path, json_str);
199        }
200    }
201
202    ref_id
203}
204
205/// Retrieve stored output by reference ID
206///
207/// # Arguments
208/// * `ref_id` - The reference ID returned from `store_output`
209///
210/// # Returns
211/// The stored JSON value, or None if not found
212pub fn retrieve_output(ref_id: &str) -> Option<Value> {
213    let path = PathBuf::from(OUTPUT_DIR).join(format!("{}.json", ref_id));
214
215    if !path.exists() {
216        return None;
217    }
218
219    let content = fs::read_to_string(&path).ok()?;
220    let stored: Value = serde_json::from_str(&content).ok()?;
221
222    // Return just the data portion
223    stored.get("data").cloned()
224}
225
226/// Retrieve and filter output by query
227///
228/// # Arguments
229/// * `ref_id` - The reference ID
230/// * `query` - Optional filter query (e.g., "severity:critical", "file:path", "code:DL3008")
231///
232/// For analyze_project outputs, supports:
233/// - section:summary - Top-level info
234/// - section:projects - List projects
235/// - section:frameworks - All frameworks
236/// - section:languages - All languages
237/// - section:services - All services
238/// - project:name - Specific project details
239/// - service:name - Specific service
240/// - language:Go - Language details
241/// - framework:* - Framework details
242/// - compact:true - Compacted output (default for analyze_project)
243///
244/// # Returns
245/// Filtered JSON value, or None if not found
246pub fn retrieve_filtered(
247    ref_id: &str,
248    query: Option<&str>,
249    limit: usize,
250    offset: usize,
251) -> Option<Value> {
252    let data = retrieve_output(ref_id)?;
253
254    // Check if this is an analyze_project output
255    if is_analyze_project_output(&data) {
256        return retrieve_analyze_project(&data, query);
257    }
258
259    let query = match query {
260        Some(q) if !q.is_empty() => q,
261        _ => return Some(data),
262    };
263
264    // Parse query
265    let (filter_type, filter_value) = parse_query(query);
266
267    // Find issues/findings array in data
268    let issues = find_issues_array(&data).unwrap_or_default();
269
270    // Filter issues
271    let filtered: Vec<Value> = issues
272        .iter()
273        .filter(|issue| matches_filter(issue, &filter_type, &filter_value))
274        .cloned()
275        .collect();
276
277    let total_matches = filtered.len();
278
279    // Apply pagination
280    let page: Vec<Value> = filtered
281        .into_iter()
282        .skip(offset)
283        .take(limit)
284        .map(|v| truncate_result_value(v))
285        .collect();
286
287    let showing = page.len();
288    let has_more = offset + showing < total_matches;
289
290    let mut result = serde_json::json!({
291        "query": query,
292        "total_matches": total_matches,
293        "showing": showing,
294        "offset": offset,
295        "has_more": has_more,
296        "results": page
297    });
298
299    if has_more {
300        result.as_object_mut().unwrap().insert(
301            "next_command".to_string(),
302            Value::String(format!(
303                "sync-ctl retrieve '{}' --query '{}' --offset {} --limit {}",
304                ref_id,
305                query,
306                offset + limit,
307                limit
308            )),
309        );
310    }
311
312    Some(result)
313}
314
315/// Truncate large fields in a single result to keep output compact.
316fn truncate_result_value(mut value: Value) -> Value {
317    if let Some(obj) = value.as_object_mut() {
318        // Truncate long description fields
319        for field in ["description", "message", "details"] {
320            if let Some(s) = obj.get(field).and_then(|v| v.as_str()) {
321                if s.len() > 200 {
322                    let truncated = format!("{}...", &s[..200]);
323                    obj.insert(field.to_string(), Value::String(truncated));
324                }
325            }
326        }
327
328        // Cap references/urls arrays
329        if let Some(refs) = obj.get("references").and_then(|v| v.as_array()) {
330            if refs.len() > 3 {
331                let truncated: Vec<Value> = refs.iter().take(3).cloned().collect();
332                let remaining = refs.len() - 3;
333                obj.insert("references".to_string(), Value::Array(truncated));
334                obj.insert(
335                    "references_truncated".to_string(),
336                    Value::Number(remaining.into()),
337                );
338            }
339        }
340    }
341    value
342}
343
344/// Parse a query string into type and value
345fn parse_query(query: &str) -> (String, String) {
346    if let Some(idx) = query.find(':') {
347        let (t, v) = query.split_at(idx);
348        (t.to_lowercase(), v[1..].to_string())
349    } else {
350        // Treat as general search term
351        ("any".to_string(), query.to_string())
352    }
353}
354
355/// Find issues/findings array in a JSON value
356fn find_issues_array(data: &Value) -> Option<Vec<Value>> {
357    let issue_fields = [
358        "issues",
359        "findings",
360        "violations",
361        "warnings",
362        "errors",
363        "recommendations",
364        "results",
365        "failures",
366        "diagnostics",
367        "vulnerable_dependencies",
368        "dependencies",
369    ];
370
371    for field in &issue_fields {
372        if let Some(arr) = data.get(field).and_then(|v| v.as_array()) {
373            // Flatten vulnerable_dependencies: each dep has inner vulnerabilities[]
374            if *field == "vulnerable_dependencies" && !arr.is_empty() {
375                let mut flat = Vec::new();
376                for dep in arr {
377                    let dep_name = dep
378                        .get("name")
379                        .and_then(|v| v.as_str())
380                        .unwrap_or("unknown");
381                    let dep_version = dep.get("version").and_then(|v| v.as_str()).unwrap_or("?");
382                    let source_dir = dep.get("source_dir").cloned();
383                    let language = dep.get("language").cloned();
384                    if let Some(vulns) = dep.get("vulnerabilities").and_then(|v| v.as_array()) {
385                        for vuln in vulns {
386                            let mut entry = vuln.clone();
387                            if let Some(obj) = entry.as_object_mut() {
388                                obj.insert(
389                                    "package".to_string(),
390                                    Value::String(dep_name.to_string()),
391                                );
392                                obj.insert(
393                                    "package_version".to_string(),
394                                    Value::String(dep_version.to_string()),
395                                );
396                                if let Some(sd) = &source_dir {
397                                    obj.insert("source_dir".to_string(), sd.clone());
398                                }
399                                if let Some(lang) = &language {
400                                    obj.insert("language".to_string(), lang.clone());
401                                }
402                            }
403                            flat.push(entry);
404                        }
405                    }
406                }
407                return Some(flat);
408            }
409            return Some(arr.clone());
410        }
411    }
412
413    // Check if data itself is an array
414    if let Some(arr) = data.as_array() {
415        return Some(arr.clone());
416    }
417
418    None
419}
420
421/// Check if an issue matches a filter
422fn matches_filter(issue: &Value, filter_type: &str, filter_value: &str) -> bool {
423    match filter_type {
424        "severity" | "level" => {
425            let sev = issue
426                .get("severity")
427                .or_else(|| issue.get("level"))
428                .and_then(|v| v.as_str())
429                .unwrap_or("");
430            sev.to_lowercase().contains(&filter_value.to_lowercase())
431        }
432        "file" | "path" => {
433            let file = issue
434                .get("file")
435                .or_else(|| issue.get("path"))
436                .or_else(|| issue.get("filename"))
437                .and_then(|v| v.as_str())
438                .unwrap_or("");
439            file.to_lowercase().contains(&filter_value.to_lowercase())
440        }
441        "code" | "rule" => {
442            let code = issue
443                .get("code")
444                .or_else(|| issue.get("rule"))
445                .or_else(|| issue.get("rule_id"))
446                .and_then(|v| v.as_str())
447                .unwrap_or("");
448            code.to_lowercase().contains(&filter_value.to_lowercase())
449        }
450        "container" | "resource" => {
451            let container = issue
452                .get("container")
453                .or_else(|| issue.get("resource"))
454                .or_else(|| issue.get("name"))
455                .and_then(|v| v.as_str())
456                .unwrap_or("");
457            container
458                .to_lowercase()
459                .contains(&filter_value.to_lowercase())
460        }
461        _ => {
462            // Search in all string values
463            let issue_str = serde_json::to_string(issue).unwrap_or_default();
464            issue_str
465                .to_lowercase()
466                .contains(&filter_value.to_lowercase())
467        }
468    }
469}
470
471// ============================================================================
472// Smart Retrieval for different output types
473// ============================================================================
474
475/// Output type detection for smart retrieval
476#[derive(Debug, Clone, Copy, PartialEq, Eq)]
477pub enum OutputType {
478    /// MonorepoAnalysis - has "projects" array and/or "is_monorepo"
479    MonorepoAnalysis,
480    /// ProjectAnalysis - flat structure with "languages" + "analysis_metadata"
481    ProjectAnalysis,
482    /// LintResult - has "failures" array (kubelint, hadolint, dclint, helmlint)
483    LintResult,
484    /// OptimizationResult - has "recommendations" array (k8s_optimize)
485    OptimizationResult,
486    /// Generic - fallback for unknown structures
487    Generic,
488}
489
490/// Detect the output type for smart retrieval routing
491pub fn detect_output_type(data: &Value) -> OutputType {
492    // MonorepoAnalysis: has projects array or is_monorepo flag
493    if data.get("projects").is_some() || data.get("is_monorepo").is_some() {
494        return OutputType::MonorepoAnalysis;
495    }
496
497    // ProjectAnalysis: has languages array + analysis_metadata (flat structure)
498    if data.get("languages").is_some() && data.get("analysis_metadata").is_some() {
499        return OutputType::ProjectAnalysis;
500    }
501
502    // LintResult: has failures array
503    if data.get("failures").is_some() {
504        return OutputType::LintResult;
505    }
506
507    // OptimizationResult: has recommendations array
508    if data.get("recommendations").is_some() {
509        return OutputType::OptimizationResult;
510    }
511
512    OutputType::Generic
513}
514
515/// Check if data is an analyze_project output (either type)
516fn is_analyze_project_output(data: &Value) -> bool {
517    matches!(
518        detect_output_type(data),
519        OutputType::MonorepoAnalysis | OutputType::ProjectAnalysis
520    )
521}
522
523/// Smart retrieval for analyze_project outputs
524/// Supports queries like:
525/// - section:summary - Top-level info without nested data
526/// - section:projects - List project names and categories
527/// - project:name - Get specific project details (compacted)
528/// - service:name - Get specific service details
529/// - language:Go - Get language details for a specific language
530/// - framework:* - List all detected frameworks
531/// - compact:true - Strip file arrays, return counts
532pub fn retrieve_analyze_project(data: &Value, query: Option<&str>) -> Option<Value> {
533    let query = query.unwrap_or("compact:true");
534    let (query_type, query_value) = parse_query(query);
535
536    match query_type.as_str() {
537        "section" => match query_value.as_str() {
538            "summary" => Some(extract_summary(data)),
539            "projects" => Some(extract_projects_list(data)),
540            "frameworks" => Some(extract_all_frameworks(data)),
541            "languages" => Some(extract_all_languages(data)),
542            "services" => Some(extract_all_services(data)),
543            _ => Some(compact_analyze_output(data)),
544        },
545        "project" => extract_project_by_name(data, &query_value),
546        "service" => extract_service_by_name(data, &query_value),
547        "language" => extract_language_details(data, &query_value),
548        "framework" => extract_framework_details(data, &query_value),
549        "compact" => Some(compact_analyze_output(data)),
550        _ => {
551            // Default: return compacted output
552            Some(compact_analyze_output(data))
553        }
554    }
555}
556
557/// Extract top-level summary without nested data
558fn extract_summary(data: &Value) -> Value {
559    let mut summary = serde_json::Map::new();
560
561    // Handle MonorepoAnalysis structure
562    if let Some(root) = data.get("root_path").and_then(|v| v.as_str()) {
563        summary.insert("root_path".to_string(), Value::String(root.to_string()));
564    }
565    if let Some(mono) = data.get("is_monorepo").and_then(|v| v.as_bool()) {
566        summary.insert("is_monorepo".to_string(), Value::Bool(mono));
567    }
568
569    // Handle ProjectAnalysis structure (flat)
570    if let Some(root) = data.get("project_root").and_then(|v| v.as_str()) {
571        summary.insert("project_root".to_string(), Value::String(root.to_string()));
572    }
573    if let Some(arch) = data.get("architecture_type").and_then(|v| v.as_str()) {
574        summary.insert(
575            "architecture_type".to_string(),
576            Value::String(arch.to_string()),
577        );
578    }
579
580    // Count projects (MonorepoAnalysis)
581    if let Some(projects) = data.get("projects").and_then(|v| v.as_array()) {
582        summary.insert(
583            "project_count".to_string(),
584            Value::Number(projects.len().into()),
585        );
586
587        // Extract project names
588        let names: Vec<Value> = projects
589            .iter()
590            .filter_map(|p| p.get("name").and_then(|n| n.as_str()))
591            .map(|n| Value::String(n.to_string()))
592            .collect();
593        summary.insert("project_names".to_string(), Value::Array(names));
594    }
595
596    // Extract languages (ProjectAnalysis flat structure)
597    if let Some(languages) = data.get("languages").and_then(|v| v.as_array()) {
598        let names: Vec<Value> = languages
599            .iter()
600            .filter_map(|l| l.get("name").and_then(|n| n.as_str()))
601            .map(|n| Value::String(n.to_string()))
602            .collect();
603        summary.insert("languages".to_string(), Value::Array(names));
604    }
605
606    // Extract technologies (ProjectAnalysis flat structure)
607    if let Some(techs) = data.get("technologies").and_then(|v| v.as_array()) {
608        let names: Vec<Value> = techs
609            .iter()
610            .filter_map(|t| t.get("name").and_then(|n| n.as_str()))
611            .map(|n| Value::String(n.to_string()))
612            .collect();
613        summary.insert("technologies".to_string(), Value::Array(names));
614    }
615
616    // Extract services (ProjectAnalysis flat structure) - include names, not just count
617    if let Some(services) = data.get("services").and_then(|v| v.as_array()) {
618        summary.insert(
619            "services_count".to_string(),
620            Value::Number(services.len().into()),
621        );
622        // Include service names so agent knows what microservices exist
623        let service_names: Vec<Value> = services
624            .iter()
625            .filter_map(|s| s.get("name").and_then(|n| n.as_str()))
626            .map(|n| Value::String(n.to_string()))
627            .collect();
628        if !service_names.is_empty() {
629            summary.insert("services".to_string(), Value::Array(service_names));
630        }
631    }
632
633    Value::Object(summary)
634}
635
636/// Extract list of projects with basic info (no file arrays)
637fn extract_projects_list(data: &Value) -> Value {
638    let projects = data.get("projects").and_then(|v| v.as_array());
639
640    let list: Vec<Value> = projects
641        .map(|arr| {
642            arr.iter()
643                .map(|p| {
644                    let mut proj = serde_json::Map::new();
645                    if let Some(name) = p.get("name") {
646                        proj.insert("name".to_string(), name.clone());
647                    }
648                    if let Some(path) = p.get("path") {
649                        proj.insert("path".to_string(), path.clone());
650                    }
651                    if let Some(cat) = p.get("project_category") {
652                        proj.insert("category".to_string(), cat.clone());
653                    }
654                    // Add language/framework counts
655                    if let Some(analysis) = p.get("analysis") {
656                        if let Some(langs) = analysis.get("languages").and_then(|v| v.as_array()) {
657                            let lang_names: Vec<Value> = langs
658                                .iter()
659                                .filter_map(|l| l.get("name").and_then(|n| n.as_str()))
660                                .map(|n| Value::String(n.to_string()))
661                                .collect();
662                            proj.insert("languages".to_string(), Value::Array(lang_names));
663                        }
664                        if let Some(fws) = analysis.get("frameworks").and_then(|v| v.as_array()) {
665                            let fw_names: Vec<Value> = fws
666                                .iter()
667                                .filter_map(|f| f.get("name").and_then(|n| n.as_str()))
668                                .map(|n| Value::String(n.to_string()))
669                                .collect();
670                            proj.insert("frameworks".to_string(), Value::Array(fw_names));
671                        }
672                    }
673                    Value::Object(proj)
674                })
675                .collect()
676        })
677        .unwrap_or_default();
678
679    serde_json::json!({
680        "total_projects": list.len(),
681        "projects": list
682    })
683}
684
685/// Extract specific project by name
686fn extract_project_by_name(data: &Value, name: &str) -> Option<Value> {
687    let projects = data.get("projects").and_then(|v| v.as_array())?;
688
689    let project = projects.iter().find(|p| {
690        p.get("name")
691            .and_then(|n| n.as_str())
692            .map(|n| n.to_lowercase().contains(&name.to_lowercase()))
693            .unwrap_or(false)
694    })?;
695
696    Some(compact_project(project))
697}
698
699/// Extract specific service by name
700fn extract_service_by_name(data: &Value, name: &str) -> Option<Value> {
701    let projects = data.get("projects").and_then(|v| v.as_array())?;
702
703    for project in projects {
704        if let Some(services) = project
705            .get("analysis")
706            .and_then(|a| a.get("services"))
707            .and_then(|s| s.as_array())
708            && let Some(service) = services.iter().find(|s| {
709                s.get("name")
710                    .and_then(|n| n.as_str())
711                    .map(|n| n.to_lowercase().contains(&name.to_lowercase()))
712                    .unwrap_or(false)
713            })
714        {
715            return Some(service.clone());
716        }
717    }
718    None
719}
720
721/// Extract language detection details (with file count instead of file list)
722fn extract_language_details(data: &Value, lang_name: &str) -> Option<Value> {
723    let mut results = Vec::new();
724
725    // Helper to process a languages array
726    let process_languages = |languages: &[Value], proj_name: &str, results: &mut Vec<Value>| {
727        for lang in languages {
728            let name = lang.get("name").and_then(|n| n.as_str()).unwrap_or("");
729            if lang_name == "*" || name.to_lowercase().contains(&lang_name.to_lowercase()) {
730                let mut compact_lang = serde_json::Map::new();
731                if !proj_name.is_empty() {
732                    compact_lang
733                        .insert("project".to_string(), Value::String(proj_name.to_string()));
734                }
735                compact_lang.insert(
736                    "name".to_string(),
737                    lang.get("name").cloned().unwrap_or(Value::Null),
738                );
739                compact_lang.insert(
740                    "version".to_string(),
741                    lang.get("version").cloned().unwrap_or(Value::Null),
742                );
743                compact_lang.insert(
744                    "confidence".to_string(),
745                    lang.get("confidence").cloned().unwrap_or(Value::Null),
746                );
747
748                // Replace file array with count
749                if let Some(files) = lang.get("files").and_then(|f| f.as_array()) {
750                    compact_lang
751                        .insert("file_count".to_string(), Value::Number(files.len().into()));
752                }
753
754                results.push(Value::Object(compact_lang));
755            }
756        }
757    };
758
759    // Handle ProjectAnalysis flat structure (languages at top level)
760    if let Some(languages) = data.get("languages").and_then(|v| v.as_array()) {
761        process_languages(languages, "", &mut results);
762    }
763
764    // Handle MonorepoAnalysis structure (languages nested in projects)
765    if let Some(projects) = data.get("projects").and_then(|v| v.as_array()) {
766        for project in projects {
767            let proj_name = project
768                .get("name")
769                .and_then(|n| n.as_str())
770                .unwrap_or("unknown");
771
772            if let Some(languages) = project
773                .get("analysis")
774                .and_then(|a| a.get("languages"))
775                .and_then(|l| l.as_array())
776            {
777                process_languages(languages, proj_name, &mut results);
778            }
779        }
780    }
781
782    Some(serde_json::json!({
783        "query": format!("language:{}", lang_name),
784        "total_matches": results.len(),
785        "results": results
786    }))
787}
788
789/// Extract framework/technology details
790fn extract_framework_details(data: &Value, fw_name: &str) -> Option<Value> {
791    let mut results = Vec::new();
792
793    // Helper to process a frameworks/technologies array
794    let process_techs = |techs: &[Value], proj_name: &str, results: &mut Vec<Value>| {
795        for tech in techs {
796            let name = tech.get("name").and_then(|n| n.as_str()).unwrap_or("");
797            if fw_name == "*" || name.to_lowercase().contains(&fw_name.to_lowercase()) {
798                let mut compact_fw = serde_json::Map::new();
799                if !proj_name.is_empty() {
800                    compact_fw.insert("project".to_string(), Value::String(proj_name.to_string()));
801                }
802                if let Some(v) = tech.get("name") {
803                    compact_fw.insert("name".to_string(), v.clone());
804                }
805                if let Some(v) = tech.get("version") {
806                    compact_fw.insert("version".to_string(), v.clone());
807                }
808                if let Some(v) = tech.get("category") {
809                    compact_fw.insert("category".to_string(), v.clone());
810                }
811                results.push(Value::Object(compact_fw));
812            }
813        }
814    };
815
816    // Handle ProjectAnalysis flat structure (technologies at top level)
817    if let Some(techs) = data.get("technologies").and_then(|v| v.as_array()) {
818        process_techs(techs, "", &mut results);
819    }
820
821    // Also check frameworks field (deprecated but may exist)
822    if let Some(fws) = data.get("frameworks").and_then(|v| v.as_array()) {
823        process_techs(fws, "", &mut results);
824    }
825
826    // Handle MonorepoAnalysis structure (frameworks nested in projects)
827    if let Some(projects) = data.get("projects").and_then(|v| v.as_array()) {
828        for project in projects {
829            let proj_name = project
830                .get("name")
831                .and_then(|n| n.as_str())
832                .unwrap_or("unknown");
833
834            if let Some(frameworks) = project
835                .get("analysis")
836                .and_then(|a| a.get("frameworks"))
837                .and_then(|f| f.as_array())
838            {
839                process_techs(frameworks, proj_name, &mut results);
840            }
841        }
842    }
843
844    Some(serde_json::json!({
845        "query": format!("framework:{}", fw_name),
846        "total_matches": results.len(),
847        "results": results
848    }))
849}
850
851/// Extract all frameworks across all projects
852fn extract_all_frameworks(data: &Value) -> Value {
853    extract_framework_details(data, "*").unwrap_or(serde_json::json!({"results": []}))
854}
855
856/// Extract all languages across all projects
857fn extract_all_languages(data: &Value) -> Value {
858    extract_language_details(data, "*").unwrap_or(serde_json::json!({"results": []}))
859}
860
861/// Extract all services across all projects
862/// In a monorepo, projects ARE services - so we return projects data
863fn extract_all_services(data: &Value) -> Value {
864    // In monorepos, projects = services. Return projects list as services.
865    // This is because the `services` field in ProjectAnalysis was never implemented.
866    extract_projects_list(data)
867}
868
869/// Compact entire analyze_project output (strip file arrays)
870fn compact_analyze_output(data: &Value) -> Value {
871    let mut result = serde_json::Map::new();
872
873    // Handle MonorepoAnalysis structure
874    if let Some(v) = data.get("root_path") {
875        result.insert("root_path".to_string(), v.clone());
876    }
877    if let Some(v) = data.get("is_monorepo") {
878        result.insert("is_monorepo".to_string(), v.clone());
879    }
880
881    // Compact projects (MonorepoAnalysis)
882    if let Some(projects) = data.get("projects").and_then(|v| v.as_array()) {
883        let compacted: Vec<Value> = projects.iter().map(compact_project).collect();
884        result.insert("projects".to_string(), Value::Array(compacted));
885        return Value::Object(result);
886    }
887
888    // Handle ProjectAnalysis flat structure
889    if let Some(v) = data.get("project_root") {
890        result.insert("project_root".to_string(), v.clone());
891    }
892    if let Some(v) = data.get("architecture_type") {
893        result.insert("architecture_type".to_string(), v.clone());
894    }
895    if let Some(v) = data.get("project_type") {
896        result.insert("project_type".to_string(), v.clone());
897    }
898
899    // Compact languages (replace files array with count)
900    if let Some(languages) = data.get("languages").and_then(|v| v.as_array()) {
901        let compacted: Vec<Value> = languages
902            .iter()
903            .map(|lang| {
904                let mut compact_lang = serde_json::Map::new();
905                for key in &["name", "version", "confidence"] {
906                    if let Some(v) = lang.get(*key) {
907                        compact_lang.insert(key.to_string(), v.clone());
908                    }
909                }
910                // Replace files array with count
911                if let Some(files) = lang.get("files").and_then(|f| f.as_array()) {
912                    compact_lang
913                        .insert("file_count".to_string(), Value::Number(files.len().into()));
914                }
915                Value::Object(compact_lang)
916            })
917            .collect();
918        result.insert("languages".to_string(), Value::Array(compacted));
919    }
920
921    // Include technologies (usually not huge)
922    if let Some(techs) = data.get("technologies").and_then(|v| v.as_array()) {
923        let compacted: Vec<Value> = techs
924            .iter()
925            .map(|tech| {
926                let mut compact_tech = serde_json::Map::new();
927                for key in &["name", "version", "category", "confidence"] {
928                    if let Some(v) = tech.get(*key) {
929                        compact_tech.insert(key.to_string(), v.clone());
930                    }
931                }
932                Value::Object(compact_tech)
933            })
934            .collect();
935        result.insert("technologies".to_string(), Value::Array(compacted));
936    }
937
938    // Include services (usually small)
939    if let Some(services) = data.get("services").and_then(|v| v.as_array()) {
940        result.insert("services".to_string(), Value::Array(services.clone()));
941    }
942
943    // Include analysis_metadata
944    if let Some(meta) = data.get("analysis_metadata") {
945        result.insert("analysis_metadata".to_string(), meta.clone());
946    }
947
948    Value::Object(result)
949}
950
951/// Compact a single project (strip file arrays, replace with counts)
952fn compact_project(project: &Value) -> Value {
953    let mut compact = serde_json::Map::new();
954
955    // Copy basic fields
956    for key in &["name", "path", "project_category"] {
957        if let Some(v) = project.get(*key) {
958            compact.insert(key.to_string(), v.clone());
959        }
960    }
961
962    // Compact analysis
963    if let Some(analysis) = project.get("analysis") {
964        let mut compact_analysis = serde_json::Map::new();
965
966        // Copy project_root
967        if let Some(v) = analysis.get("project_root") {
968            compact_analysis.insert("project_root".to_string(), v.clone());
969        }
970
971        // Compact languages (strip files, add file_count)
972        if let Some(languages) = analysis.get("languages").and_then(|v| v.as_array()) {
973            let compacted: Vec<Value> = languages
974                .iter()
975                .map(|lang| {
976                    let mut compact_lang = serde_json::Map::new();
977                    for key in &["name", "version", "confidence"] {
978                        if let Some(v) = lang.get(*key) {
979                            compact_lang.insert(key.to_string(), v.clone());
980                        }
981                    }
982                    // Replace files array with count
983                    if let Some(files) = lang.get("files").and_then(|f| f.as_array()) {
984                        compact_lang
985                            .insert("file_count".to_string(), Value::Number(files.len().into()));
986                    }
987                    Value::Object(compact_lang)
988                })
989                .collect();
990            compact_analysis.insert("languages".to_string(), Value::Array(compacted));
991        }
992
993        // Copy frameworks, databases, services as-is (usually not huge)
994        for key in &[
995            "frameworks",
996            "databases",
997            "services",
998            "build_tools",
999            "package_managers",
1000        ] {
1001            if let Some(v) = analysis.get(*key) {
1002                compact_analysis.insert(key.to_string(), v.clone());
1003            }
1004        }
1005
1006        compact.insert("analysis".to_string(), Value::Object(compact_analysis));
1007    }
1008
1009    Value::Object(compact)
1010}
1011
1012/// List all stored outputs
1013pub fn list_outputs() -> Vec<OutputInfo> {
1014    let dir = match ensure_output_dir() {
1015        Ok(d) => d,
1016        Err(_) => return Vec::new(),
1017    };
1018
1019    let mut outputs = Vec::new();
1020
1021    if let Ok(entries) = fs::read_dir(&dir) {
1022        for entry in entries.flatten() {
1023            if let Some(filename) = entry.file_name().to_str()
1024                && filename.ends_with(".json")
1025            {
1026                let ref_id = filename.trim_end_matches(".json").to_string();
1027
1028                // Read metadata
1029                if let Ok(content) = fs::read_to_string(entry.path())
1030                    && let Ok(stored) = serde_json::from_str::<Value>(&content)
1031                {
1032                    let tool = stored
1033                        .get("tool")
1034                        .and_then(|v| v.as_str())
1035                        .unwrap_or("unknown")
1036                        .to_string();
1037                    let timestamp = stored
1038                        .get("timestamp")
1039                        .and_then(|v| v.as_u64())
1040                        .unwrap_or(0);
1041                    let size = content.len();
1042
1043                    outputs.push(OutputInfo {
1044                        ref_id,
1045                        tool,
1046                        timestamp,
1047                        size_bytes: size,
1048                    });
1049                }
1050            }
1051        }
1052    }
1053
1054    // Sort by timestamp (newest first)
1055    outputs.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
1056    outputs
1057}
1058
1059/// Resolve "latest" to the most recent ref_id by scanning disk files.
1060/// Works across separate CLI invocations (no in-memory state dependency).
1061pub fn resolve_latest() -> Option<String> {
1062    let output_dir = std::path::Path::new("/tmp/syncable-cli/outputs");
1063    if !output_dir.exists() {
1064        return None;
1065    }
1066
1067    let mut newest: Option<(u64, String)> = None;
1068
1069    if let Ok(entries) = std::fs::read_dir(output_dir) {
1070        for entry in entries.flatten() {
1071            let path = entry.path();
1072            if path.extension().map_or(true, |e| e != "json") {
1073                continue;
1074            }
1075
1076            if let Ok(contents) = std::fs::read_to_string(&path) {
1077                if let Ok(data) = serde_json::from_str::<Value>(&contents) {
1078                    if let Some(ts) = data.get("timestamp").and_then(|v| v.as_u64()) {
1079                        if let Some(ref_id) = data.get("ref_id").and_then(|v| v.as_str()) {
1080                            match &newest {
1081                                Some((best_ts, _)) if ts > *best_ts => {
1082                                    newest = Some((ts, ref_id.to_string()));
1083                                }
1084                                None => {
1085                                    newest = Some((ts, ref_id.to_string()));
1086                                }
1087                                _ => {}
1088                            }
1089                        }
1090                    }
1091                }
1092            }
1093        }
1094    }
1095
1096    newest.map(|(_, ref_id)| ref_id)
1097}
1098
1099/// Information about a stored output
1100#[derive(Debug, Clone)]
1101pub struct OutputInfo {
1102    pub ref_id: String,
1103    pub tool: String,
1104    pub timestamp: u64,
1105    pub size_bytes: usize,
1106}
1107
1108/// Clean up old stored outputs
1109pub fn cleanup_old_outputs() {
1110    let dir = match ensure_output_dir() {
1111        Ok(d) => d,
1112        Err(_) => return,
1113    };
1114
1115    let now = SystemTime::now()
1116        .duration_since(UNIX_EPOCH)
1117        .map(|d| d.as_secs())
1118        .unwrap_or(0);
1119
1120    if let Ok(entries) = fs::read_dir(&dir) {
1121        for entry in entries.flatten() {
1122            if let Ok(content) = fs::read_to_string(entry.path())
1123                && let Ok(stored) = serde_json::from_str::<Value>(&content)
1124            {
1125                let timestamp = stored
1126                    .get("timestamp")
1127                    .and_then(|v| v.as_u64())
1128                    .unwrap_or(0);
1129
1130                if now - timestamp > MAX_AGE_SECS {
1131                    let _ = fs::remove_file(entry.path());
1132                }
1133            }
1134        }
1135    }
1136}
1137
1138#[cfg(test)]
1139mod tests {
1140    use super::*;
1141
1142    #[test]
1143    fn test_store_and_retrieve() {
1144        let data = serde_json::json!({
1145            "issues": [
1146                { "code": "test1", "severity": "high", "file": "test.yaml" }
1147            ]
1148        });
1149
1150        let ref_id = store_output(&data, "test_tool");
1151        assert!(ref_id.starts_with("test_tool_"));
1152
1153        let retrieved = retrieve_output(&ref_id);
1154        assert!(retrieved.is_some());
1155        assert_eq!(retrieved.unwrap(), data);
1156    }
1157
1158    #[test]
1159    fn test_filtered_retrieval() {
1160        let data = serde_json::json!({
1161            "issues": [
1162                { "code": "DL3008", "severity": "warning", "file": "Dockerfile1" },
1163                { "code": "DL3009", "severity": "info", "file": "Dockerfile2" },
1164                { "code": "DL3008", "severity": "warning", "file": "Dockerfile3" }
1165            ]
1166        });
1167
1168        let ref_id = store_output(&data, "filter_test");
1169
1170        // Filter by code
1171        let filtered = retrieve_filtered(&ref_id, Some("code:DL3008"), 100, 0);
1172        assert!(filtered.is_some());
1173        let results = filtered.unwrap();
1174        assert_eq!(results["total_matches"], 2);
1175
1176        // Filter by severity
1177        let filtered = retrieve_filtered(&ref_id, Some("severity:info"), 100, 0);
1178        assert!(filtered.is_some());
1179        let results = filtered.unwrap();
1180        assert_eq!(results["total_matches"], 1);
1181    }
1182
1183    #[test]
1184    fn test_parse_query() {
1185        assert_eq!(
1186            parse_query("severity:critical"),
1187            ("severity".to_string(), "critical".to_string())
1188        );
1189        assert_eq!(
1190            parse_query("searchterm"),
1191            ("any".to_string(), "searchterm".to_string())
1192        );
1193    }
1194
1195    #[test]
1196    fn test_analyze_project_detection() {
1197        let analyze_data = serde_json::json!({
1198            "root_path": "/test",
1199            "is_monorepo": true,
1200            "projects": []
1201        });
1202        assert!(is_analyze_project_output(&analyze_data));
1203
1204        let lint_data = serde_json::json!({
1205            "issues": [{ "code": "DL3008" }]
1206        });
1207        assert!(!is_analyze_project_output(&lint_data));
1208    }
1209
1210    #[test]
1211    fn test_analyze_project_summary() {
1212        let data = serde_json::json!({
1213            "root_path": "/test/monorepo",
1214            "is_monorepo": true,
1215            "projects": [
1216                { "name": "api-gateway", "path": "services/api" },
1217                { "name": "web-app", "path": "apps/web" }
1218            ]
1219        });
1220
1221        let summary = extract_summary(&data);
1222        assert_eq!(summary["root_path"], "/test/monorepo");
1223        assert_eq!(summary["is_monorepo"], true);
1224        assert_eq!(summary["project_count"], 2);
1225    }
1226
1227    #[test]
1228    fn test_analyze_project_compact() {
1229        // Simulates massive analyze_project output with 1000s of files
1230        let files: Vec<String> = (0..1000).map(|i| format!("/src/file{}.ts", i)).collect();
1231
1232        let data = serde_json::json!({
1233            "root_path": "/test",
1234            "is_monorepo": false,
1235            "projects": [{
1236                "name": "test-project",
1237                "path": "",
1238                "project_category": "Api",
1239                "analysis": {
1240                    "project_root": "/test",
1241                    "languages": [{
1242                        "name": "TypeScript",
1243                        "version": "5.0",
1244                        "confidence": 0.95,
1245                        "files": files
1246                    }],
1247                    "frameworks": [{
1248                        "name": "React",
1249                        "version": "18.0"
1250                    }]
1251                }
1252            }]
1253        });
1254
1255        let ref_id = store_output(&data, "analyze_project_test");
1256
1257        // Default retrieval should return compacted output
1258        let result = retrieve_filtered(&ref_id, None, 100, 0);
1259        assert!(result.is_some());
1260
1261        let compacted = result.unwrap();
1262
1263        // Verify files array was replaced with file_count
1264        let project = &compacted["projects"][0];
1265        let lang = &project["analysis"]["languages"][0];
1266        assert_eq!(lang["name"], "TypeScript");
1267        assert_eq!(lang["file_count"], 1000);
1268        assert!(lang.get("files").is_none()); // No files array
1269
1270        // The compacted JSON should be much smaller
1271        let compacted_str = serde_json::to_string(&compacted).unwrap();
1272        let original_str = serde_json::to_string(&data).unwrap();
1273        assert!(compacted_str.len() < original_str.len() / 10); // At least 10x smaller
1274    }
1275
1276    #[test]
1277    fn test_analyze_project_section_queries() {
1278        let data = serde_json::json!({
1279            "root_path": "/test",
1280            "is_monorepo": true,
1281            "projects": [{
1282                "name": "api-service",
1283                "path": "services/api",
1284                "project_category": "Api",
1285                "analysis": {
1286                    "languages": [{
1287                        "name": "Go",
1288                        "version": "1.21",
1289                        "confidence": 0.9,
1290                        "files": ["/main.go", "/handler.go"]
1291                    }],
1292                    "frameworks": [{
1293                        "name": "Gin",
1294                        "version": "1.9",
1295                        "category": "Web"
1296                    }],
1297                    "services": [{
1298                        "name": "api-http",
1299                        "type": "http",
1300                        "port": 8080
1301                    }]
1302                }
1303            }]
1304        });
1305
1306        let ref_id = store_output(&data, "analyze_query_test");
1307
1308        // Test section:projects
1309        let projects = retrieve_filtered(&ref_id, Some("section:projects"), 100, 0);
1310        assert!(projects.is_some());
1311        assert_eq!(projects.as_ref().unwrap()["total_projects"], 1);
1312
1313        // Test section:frameworks
1314        let frameworks = retrieve_filtered(&ref_id, Some("section:frameworks"), 100, 0);
1315        assert!(frameworks.is_some());
1316        assert_eq!(frameworks.as_ref().unwrap()["total_matches"], 1);
1317        assert_eq!(frameworks.as_ref().unwrap()["results"][0]["name"], "Gin");
1318
1319        // Test section:languages
1320        let languages = retrieve_filtered(&ref_id, Some("section:languages"), 100, 0);
1321        assert!(languages.is_some());
1322        assert_eq!(languages.as_ref().unwrap()["total_matches"], 1);
1323        assert_eq!(languages.as_ref().unwrap()["results"][0]["name"], "Go");
1324        // Files should be replaced with count
1325        assert_eq!(languages.as_ref().unwrap()["results"][0]["file_count"], 2);
1326
1327        // Test language:Go specific query
1328        let go = retrieve_filtered(&ref_id, Some("language:Go"), 100, 0);
1329        assert!(go.is_some());
1330        assert_eq!(go.as_ref().unwrap()["total_matches"], 1);
1331
1332        // Test framework:Gin specific query
1333        let gin = retrieve_filtered(&ref_id, Some("framework:Gin"), 100, 0);
1334        assert!(gin.is_some());
1335        assert_eq!(gin.as_ref().unwrap()["total_matches"], 1);
1336    }
1337
1338    #[test]
1339    fn test_find_issues_array_failures_field() {
1340        let data = serde_json::json!({
1341            "failures": [
1342                {"code": "DL3008", "severity": "warning", "message": "Pin versions"},
1343                {"code": "DL3009", "severity": "info", "message": "Delete apt cache"}
1344            ]
1345        });
1346        let result = find_issues_array(&data);
1347        assert!(result.is_some());
1348        assert_eq!(result.unwrap().len(), 2);
1349    }
1350
1351    #[test]
1352    fn test_find_issues_array_diagnostics_field() {
1353        let data = serde_json::json!({
1354            "diagnostics": [
1355                {"code": "DC001", "severity": "error", "message": "Invalid compose version"}
1356            ]
1357        });
1358        let result = find_issues_array(&data);
1359        assert!(result.is_some());
1360        assert_eq!(result.unwrap().len(), 1);
1361    }
1362
1363    #[test]
1364    fn test_resolve_latest_returns_most_recent() {
1365        use std::fs;
1366        use std::path::Path;
1367
1368        let output_dir = Path::new("/tmp/syncable-cli/outputs");
1369        fs::create_dir_all(output_dir).unwrap();
1370
1371        // Clean up any existing test files
1372        let _ = fs::remove_file(output_dir.join("test_old_aaa111.json"));
1373        let _ = fs::remove_file(output_dir.join("test_new_bbb222.json"));
1374
1375        let now = std::time::SystemTime::now()
1376            .duration_since(std::time::UNIX_EPOCH)
1377            .unwrap()
1378            .as_secs();
1379
1380        // Write two files with different timestamps.
1381        // Use a far-future timestamp for the "new" file so it's always
1382        // the most recent, even when other tests run concurrently and
1383        // call store_output() with the current time.
1384        let old_data = serde_json::json!({
1385            "ref_id": "test_old_aaa111",
1386            "tool": "test_old",
1387            "timestamp": now - 60,
1388            "data": {}
1389        });
1390        let new_data = serde_json::json!({
1391            "ref_id": "test_new_bbb222",
1392            "tool": "test_new",
1393            "timestamp": now + 9_999_999,
1394            "data": {}
1395        });
1396
1397        fs::write(
1398            output_dir.join("test_old_aaa111.json"),
1399            serde_json::to_string(&old_data).unwrap(),
1400        )
1401        .unwrap();
1402        fs::write(
1403            output_dir.join("test_new_bbb222.json"),
1404            serde_json::to_string(&new_data).unwrap(),
1405        )
1406        .unwrap();
1407
1408        let latest = resolve_latest();
1409        assert!(latest.is_some());
1410        assert_eq!(latest.unwrap(), "test_new_bbb222");
1411
1412        // Cleanup
1413        let _ = fs::remove_file(output_dir.join("test_old_aaa111.json"));
1414        let _ = fs::remove_file(output_dir.join("test_new_bbb222.json"));
1415    }
1416}