Skip to main content

kardo_core/llm/
deep.rs

1//! Deep analysis mode — multi-document holistic project health analysis.
2//!
3//! Uses a larger LLM model (Qwen3-1.7B) to analyze multiple issues and file
4//! contents together, producing prioritized action items.
5
6use super::ollama::OllamaClient;
7use super::{GenerateRequest, LlmError};
8use crate::scoring::QualityIssue;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12const DEEP_MODEL: &str = "qwen3:1.7b";
13
14/// Safely truncate a string to at most `max_bytes` bytes without splitting a
15/// multi-byte UTF-8 character.
16fn safe_truncate(s: &str, max_bytes: usize) -> &str {
17    if s.len() <= max_bytes {
18        return s;
19    }
20    let mut end = max_bytes;
21    while end > 0 && !s.is_char_boundary(end) {
22        end -= 1;
23    }
24    &s[..end]
25}
26
27/// Result of a deep project health analysis.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct DeepAnalysis {
30    /// High-level summary of project health.
31    pub summary: String,
32    /// Prioritized list of improvement actions.
33    pub priority_actions: Vec<PriorityAction>,
34    /// Estimated score improvement if all actions are taken (percentage points).
35    pub estimated_improvement: f64,
36}
37
38/// A single prioritized action item.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct PriorityAction {
41    /// What to do.
42    pub action: String,
43    /// Which file to modify (if applicable).
44    pub file_path: Option<String>,
45    /// Impact level: "high", "medium", "low".
46    pub impact: String,
47    /// Effort level: "quick" (<5 min), "moderate" (5-30 min), "significant" (>30 min).
48    pub effort: String,
49}
50
51/// Deep analyzer for multi-document project health analysis.
52pub struct DeepAnalyzer {
53    ollama: OllamaClient,
54}
55
56impl DeepAnalyzer {
57    /// Create a new deep analyzer with the default deep model.
58    pub fn new() -> Self {
59        Self {
60            ollama: OllamaClient::new().with_model(DEEP_MODEL),
61        }
62    }
63
64    /// Analyze project health using LLM, with fallback to rule-based analysis.
65    pub async fn analyze_project_health(
66        &self,
67        issues: &[QualityIssue],
68        file_contents: &HashMap<String, String>,
69    ) -> DeepAnalysis {
70        // Try LLM-based analysis first
71        match self.analyze_with_llm(issues, file_contents).await {
72            Ok(analysis) => analysis,
73            Err(_) => {
74                // Fallback to rule-based analysis
75                self.rule_based_analysis(issues, file_contents)
76            }
77        }
78    }
79
80    /// LLM-based deep analysis.
81    async fn analyze_with_llm(
82        &self,
83        issues: &[QualityIssue],
84        file_contents: &HashMap<String, String>,
85    ) -> Result<DeepAnalysis, LlmError> {
86        let prompt = build_deep_prompt(issues, file_contents);
87
88        let response = self
89            .ollama
90            .generate(&GenerateRequest {
91                prompt,
92                max_tokens: 1024,
93                temperature: 0.3,
94            })
95            .await?;
96
97        parse_deep_response(&response.text, issues)
98    }
99
100    /// Rule-based fallback analysis when LLM is not available.
101    pub fn rule_based_analysis(
102        &self,
103        issues: &[QualityIssue],
104        _file_contents: &HashMap<String, String>,
105    ) -> DeepAnalysis {
106        rule_based_fallback(issues)
107    }
108}
109
110impl Default for DeepAnalyzer {
111    fn default() -> Self {
112        Self::new()
113    }
114}
115
116/// Build the prompt for deep analysis.
117pub fn build_deep_prompt(
118    issues: &[QualityIssue],
119    file_contents: &HashMap<String, String>,
120) -> String {
121    let mut prompt = String::from(
122        r#"You are a project health analyzer. Analyze the following quality issues and file contents.
123
124Output a JSON object with this exact format:
125{"summary":"<2-3 sentence summary>","actions":[{"action":"<what to do>","file":"<path or null>","impact":"high|medium|low","effort":"quick|moderate|significant"}],"improvement":<number>}
126
127## Quality Issues Found:
128"#,
129    );
130
131    for (i, issue) in issues.iter().take(20).enumerate() {
132        prompt.push_str(&format!(
133            "{}. [{}] {} — {} (file: {})\n",
134            i + 1,
135            format!("{:?}", issue.severity).to_uppercase(),
136            issue.title,
137            issue.attribution,
138            issue.file_path.as_deref().unwrap_or("project-wide"),
139        ));
140        if let Some(suggestion) = &issue.suggestion {
141            prompt.push_str(&format!("   Suggestion: {}\n", suggestion));
142        }
143    }
144
145    if !file_contents.is_empty() {
146        prompt.push_str("\n## Key File Contents:\n");
147        for (path, content) in file_contents.iter().take(5) {
148            let truncated = safe_truncate(content, 500);
149            prompt.push_str(&format!("\n### {}\n```\n{}\n```\n", path, truncated));
150        }
151    }
152
153    prompt.push_str(
154        "\nAnalyze these issues holistically. Group related issues, identify root causes, and suggest the highest-impact fixes first. Return ONLY the JSON object.\n",
155    );
156
157    prompt
158}
159
160/// Parse the LLM response into a DeepAnalysis.
161fn parse_deep_response(response: &str, _issues: &[QualityIssue]) -> Result<DeepAnalysis, LlmError> {
162    // Try to extract JSON from the response
163    let json_str = extract_json(response);
164
165    if let Some(json_str) = json_str {
166        if let Ok(value) = serde_json::from_str::<serde_json::Value>(&json_str) {
167            let summary = value["summary"]
168                .as_str()
169                .unwrap_or("Analysis completed.")
170                .to_string();
171
172            let mut priority_actions = Vec::new();
173
174            if let Some(actions) = value["actions"].as_array() {
175                for action_val in actions.iter().take(10) {
176                    priority_actions.push(PriorityAction {
177                        action: action_val["action"]
178                            .as_str()
179                            .unwrap_or("Review and fix")
180                            .to_string(),
181                        file_path: action_val["file"]
182                            .as_str()
183                            .filter(|s| *s != "null")
184                            .map(|s| s.to_string()),
185                        impact: action_val["impact"]
186                            .as_str()
187                            .unwrap_or("medium")
188                            .to_string(),
189                        effort: action_val["effort"]
190                            .as_str()
191                            .unwrap_or("moderate")
192                            .to_string(),
193                    });
194                }
195            }
196
197            let estimated_improvement = value["improvement"]
198                .as_f64()
199                .unwrap_or(5.0);
200
201            return Ok(DeepAnalysis {
202                summary,
203                priority_actions,
204                estimated_improvement,
205            });
206        }
207    }
208
209    // If JSON parsing fails, fall back to rule-based
210    Err(LlmError::Parse(format!(
211        "Could not parse LLM response as JSON. Raw: {}",
212        &response[..response.len().min(200)]
213    )))
214}
215
216/// Try to extract a JSON object from a string that may contain other text.
217fn extract_json(text: &str) -> Option<String> {
218    // Find the first { and last }
219    let start = text.find('{')?;
220    let end = text.rfind('}')?;
221    if end > start {
222        Some(text[start..=end].to_string())
223    } else {
224        None
225    }
226}
227
228/// Rule-based fallback when LLM is not available.
229pub fn rule_based_fallback(issues: &[QualityIssue]) -> DeepAnalysis {
230    use crate::scoring::IssueSeverity;
231
232    let high_count = issues
233        .iter()
234        .filter(|i| matches!(i.severity, IssueSeverity::Blocking | IssueSeverity::High))
235        .count();
236    let medium_count = issues
237        .iter()
238        .filter(|i| i.severity == IssueSeverity::Medium)
239        .count();
240    let total = issues.len();
241
242    let summary = if high_count == 0 && medium_count == 0 {
243        "Your project is in excellent shape with no significant issues detected.".to_string()
244    } else if high_count == 0 {
245        format!(
246            "Your project has {} minor issues that could be improved. No critical problems found.",
247            total
248        )
249    } else {
250        format!(
251            "Found {} critical issues and {} warnings across your project. Priority fixes recommended.",
252            high_count, medium_count
253        )
254    };
255
256    // Generate priority actions from issues
257    let mut priority_actions: Vec<PriorityAction> = issues
258        .iter()
259        .filter(|i| matches!(i.severity, IssueSeverity::Blocking | IssueSeverity::High))
260        .take(5)
261        .map(|issue| PriorityAction {
262            action: issue.suggestion.clone().unwrap_or_else(|| issue.title.clone()),
263            file_path: issue.file_path.clone(),
264            impact: "high".to_string(),
265            effort: "moderate".to_string(),
266        })
267        .collect();
268
269    // Add medium-severity actions
270    let medium_actions: Vec<PriorityAction> = issues
271        .iter()
272        .filter(|i| i.severity == IssueSeverity::Medium)
273        .take(3)
274        .map(|issue| PriorityAction {
275            action: issue.suggestion.clone().unwrap_or_else(|| issue.title.clone()),
276            file_path: issue.file_path.clone(),
277            impact: "medium".to_string(),
278            effort: "quick".to_string(),
279        })
280        .collect();
281
282    priority_actions.extend(medium_actions);
283
284    // Estimate improvement: ~3 points per high issue, ~1 per medium
285    let estimated_improvement = (high_count as f64 * 3.0 + medium_count as f64 * 1.0).min(30.0);
286
287    DeepAnalysis {
288        summary,
289        priority_actions,
290        estimated_improvement,
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use crate::scoring::{IssueCategory, IssueSeverity};
298
299    fn make_issue(severity: IssueSeverity, title: &str, file: Option<&str>) -> QualityIssue {
300        QualityIssue::new(
301            format!("test-{}", title.replace(' ', "-")),
302            file.map(|s| s.to_string()),
303            IssueCategory::Freshness,
304            severity,
305            title.to_string(),
306            "test".to_string(),
307            Some(format!("Fix: {}", title)),
308        )
309    }
310
311    #[test]
312    fn test_build_deep_prompt_includes_issues() {
313        let issues = vec![
314            make_issue(IssueSeverity::High, "Stale README", Some("README.md")),
315            make_issue(IssueSeverity::Medium, "Missing docs", None),
316        ];
317        let files = HashMap::new();
318        let prompt = build_deep_prompt(&issues, &files);
319
320        assert!(prompt.contains("Stale README"), "Prompt should include issue title");
321        assert!(prompt.contains("README.md"), "Prompt should include file path");
322        assert!(prompt.contains("Missing docs"), "Prompt should include all issues");
323        assert!(prompt.contains("Quality Issues Found"), "Prompt should have issues section");
324    }
325
326    #[test]
327    fn test_build_deep_prompt_includes_file_contents() {
328        let issues = vec![];
329        let mut files = HashMap::new();
330        files.insert("README.md".to_string(), "# My Project\nThis is a readme.".to_string());
331        let prompt = build_deep_prompt(&issues, &files);
332
333        assert!(prompt.contains("Key File Contents"), "Prompt should have files section");
334        assert!(prompt.contains("# My Project"), "Prompt should include file content");
335    }
336
337    #[test]
338    fn test_build_deep_prompt_truncates_long_content() {
339        let issues = vec![];
340        let mut files = HashMap::new();
341        files.insert("big.md".to_string(), "x".repeat(1000));
342        let prompt = build_deep_prompt(&issues, &files);
343
344        // Should be truncated to ~500 chars
345        assert!(prompt.len() < 1200, "Prompt should truncate long content");
346    }
347
348    #[test]
349    fn test_rule_based_fallback_no_issues() {
350        let result = rule_based_fallback(&[]);
351        assert!(result.summary.contains("excellent"), "No issues should be excellent");
352        assert!(result.priority_actions.is_empty());
353        assert_eq!(result.estimated_improvement, 0.0);
354    }
355
356    #[test]
357    fn test_rule_based_fallback_with_high_issues() {
358        let issues = vec![
359            make_issue(IssueSeverity::High, "Stale docs", Some("README.md")),
360            make_issue(IssueSeverity::High, "Missing config", None),
361            make_issue(IssueSeverity::Medium, "Old changelog", Some("CHANGELOG.md")),
362        ];
363
364        let result = rule_based_fallback(&issues);
365        assert!(result.summary.contains("critical"), "Should mention critical issues");
366        assert!(!result.priority_actions.is_empty(), "Should have priority actions");
367
368        // High issues first
369        assert_eq!(result.priority_actions[0].impact, "high");
370
371        // Estimated improvement: 2 high * 3 + 1 medium * 1 = 7
372        assert!((result.estimated_improvement - 7.0).abs() < 0.1);
373    }
374
375    #[test]
376    fn test_rule_based_fallback_only_medium_issues() {
377        let issues = vec![
378            make_issue(IssueSeverity::Medium, "Minor issue 1", None),
379            make_issue(IssueSeverity::Medium, "Minor issue 2", None),
380        ];
381
382        let result = rule_based_fallback(&issues);
383        assert!(result.summary.contains("minor"), "Should mention minor issues");
384        assert!(!result.priority_actions.is_empty());
385    }
386
387    #[test]
388    fn test_rule_based_fallback_caps_improvement() {
389        let issues: Vec<QualityIssue> = (0..20)
390            .map(|i| make_issue(IssueSeverity::High, &format!("Issue {}", i), None))
391            .collect();
392
393        let result = rule_based_fallback(&issues);
394        assert!(result.estimated_improvement <= 30.0, "Should cap at 30");
395    }
396
397    #[test]
398    fn test_extract_json_basic() {
399        let text = r#"Here is the analysis: {"summary":"test","actions":[],"improvement":5}"#;
400        let json = extract_json(text);
401        assert!(json.is_some());
402        let parsed: serde_json::Value = serde_json::from_str(&json.unwrap()).unwrap();
403        assert_eq!(parsed["summary"], "test");
404    }
405
406    #[test]
407    fn test_extract_json_no_json() {
408        let text = "This has no JSON at all";
409        assert!(extract_json(text).is_none());
410    }
411
412    #[test]
413    fn test_extract_json_with_markdown() {
414        let text = r#"```json
415{"summary":"project needs work","actions":[{"action":"update readme","file":"README.md","impact":"high","effort":"quick"}],"improvement":10}
416```"#;
417        let json = extract_json(text);
418        assert!(json.is_some());
419        let parsed: serde_json::Value = serde_json::from_str(&json.unwrap()).unwrap();
420        assert_eq!(parsed["actions"].as_array().unwrap().len(), 1);
421    }
422
423    #[test]
424    fn test_parse_deep_response_valid() {
425        let response = r#"{"summary":"Good project","actions":[{"action":"Update README","file":"README.md","impact":"high","effort":"quick"}],"improvement":8.5}"#;
426        let result = parse_deep_response(response, &[]);
427        assert!(result.is_ok());
428        let analysis = result.unwrap();
429        assert_eq!(analysis.summary, "Good project");
430        assert_eq!(analysis.priority_actions.len(), 1);
431        assert_eq!(analysis.priority_actions[0].action, "Update README");
432        assert_eq!(analysis.priority_actions[0].file_path, Some("README.md".to_string()));
433        assert!((analysis.estimated_improvement - 8.5).abs() < 0.1);
434    }
435
436    #[test]
437    fn test_parse_deep_response_invalid() {
438        let response = "This is not JSON at all, just plain text.";
439        let result = parse_deep_response(response, &[]);
440        assert!(result.is_err());
441    }
442
443    #[test]
444    fn test_deep_analyzer_creation() {
445        let analyzer = DeepAnalyzer::new();
446        // Just verify it can be created
447        let _ = analyzer;
448    }
449
450    #[test]
451    fn test_priority_action_serialization() {
452        let action = PriorityAction {
453            action: "Update README.md".to_string(),
454            file_path: Some("README.md".to_string()),
455            impact: "high".to_string(),
456            effort: "quick".to_string(),
457        };
458
459        let json = serde_json::to_string(&action).unwrap();
460        assert!(json.contains("Update README.md"));
461
462        let deserialized: PriorityAction = serde_json::from_str(&json).unwrap();
463        assert_eq!(deserialized.action, "Update README.md");
464    }
465
466    #[test]
467    fn test_deep_analysis_serialization() {
468        let analysis = DeepAnalysis {
469            summary: "Project health is good".to_string(),
470            priority_actions: vec![PriorityAction {
471                action: "Fix stale docs".to_string(),
472                file_path: None,
473                impact: "medium".to_string(),
474                effort: "moderate".to_string(),
475            }],
476            estimated_improvement: 5.0,
477        };
478
479        let json = serde_json::to_string(&analysis).unwrap();
480        let deserialized: DeepAnalysis = serde_json::from_str(&json).unwrap();
481        assert_eq!(deserialized.summary, analysis.summary);
482        assert_eq!(deserialized.priority_actions.len(), 1);
483    }
484}