garbage-code-hunter 0.2.0

A humorous Rust code quality detector that roasts your garbage code
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
//! Roast provider abstraction for generating code review messages.
//!
//! This module defines the `RoastProvider` trait and two implementations:
//! - `LocalRoastProvider`: Uses hardcoded roast messages from the i18n module.
//! - `LlmRoastProvider`: Calls an LLM endpoint to generate dynamic, context-aware roasts.

use std::collections::HashMap;
use std::path::PathBuf;

use crate::analyzer::CodeIssue;
use crate::i18n::I18n;

use super::client::{LlmClient, LlmConfig};
use super::prompt::build_roast_prompt;

/// A map from issue key to roast message.
///
/// Issue key format: `"{file_path}:{line}:{rule_name}"`
pub type RoastMap = HashMap<String, String>;

/// Trait for generating roast messages for code issues.
///
/// Implementors can use local hardcoded messages or call external LLM services.
pub trait RoastProvider {
    /// Generate roast messages for the given issues.
    ///
    /// Returns a `RoastMap` mapping issue keys to roast messages.
    fn generate_roasts(&self, issues: &[CodeIssue], lang: &str) -> RoastMap;
}

/// Local roast provider using hardcoded messages from the i18n module.
///
/// This is the default provider and serves as the fallback when LLM calls fail.
pub struct LocalRoastProvider;

impl RoastProvider for LocalRoastProvider {
    fn generate_roasts(&self, issues: &[CodeIssue], lang: &str) -> RoastMap {
        let i18n = I18n::new(lang);
        let mut map = RoastMap::new();

        for issue in issues {
            let key = format!(
                "{}:{}:{}",
                issue.file_path.display(),
                issue.line,
                issue.rule_name
            );
            let messages = i18n.get_roast_messages(&issue.rule_name);
            let roast = if !messages.is_empty() {
                messages[issue.line % messages.len()].clone()
            } else {
                issue.message.clone()
            };
            map.insert(key, roast);
        }

        map
    }
}

/// LLM-powered roast provider that generates dynamic, context-aware roasts.
///
/// Falls back to `LocalRoastProvider` if the LLM call fails or returns invalid data.
pub struct LlmRoastProvider {
    client: LlmClient,
    fallback: LocalRoastProvider,
}

impl LlmRoastProvider {
    /// Create a new LLM roast provider with the given configuration.
    pub fn new(config: LlmConfig) -> Self {
        Self {
            client: LlmClient::new(config),
            fallback: LocalRoastProvider,
        }
    }
}

impl RoastProvider for LlmRoastProvider {
    fn generate_roasts(&self, issues: &[CodeIssue], lang: &str) -> RoastMap {
        let contexts = extract_code_contexts(issues);
        let prompt = build_roast_prompt(issues, &contexts, lang);

        tracing::debug!("Calling LLM with {} issues...", issues.len());
        tracing::debug!(
            "Prompt (first 500 chars): {}",
            &prompt[..prompt.len().min(500)]
        );

        match self.client.call_blocking(&prompt) {
            Ok(response) => {
                tracing::debug!("LLM response received ({} chars)", response.len());
                match parse_llm_response(&response, issues) {
                    Ok(roasts) => {
                        tracing::debug!("Parsed {} roasts from LLM", roasts.len());
                        roasts
                    }
                    Err(e) => {
                        tracing::warn!(
                            "Failed to parse LLM response: {:#}. Falling back to local roasts.",
                            e
                        );
                        self.fallback.generate_roasts(issues, lang)
                    }
                }
            }
            Err(e) => {
                tracing::warn!("LLM call failed: {:#}. Falling back to local roasts.", e);
                self.fallback.generate_roasts(issues, lang)
            }
        }
    }
}

/// Extract code context (±5 lines) around each issue for the LLM prompt.
///
/// Groups issues by file to avoid reading the same file multiple times.
fn extract_code_contexts(issues: &[CodeIssue]) -> HashMap<String, String> {
    // Collect unique file paths
    let file_paths: Vec<PathBuf> = issues
        .iter()
        .map(|i| i.file_path.clone())
        .collect::<std::collections::HashSet<_>>()
        .into_iter()
        .collect();

    // Read all file contents upfront
    let file_contents: HashMap<PathBuf, Vec<String>> = file_paths
        .into_iter()
        .filter_map(|path| {
            let content = std::fs::read_to_string(&path).ok()?;
            let lines: Vec<String> = content.lines().map(String::from).collect();
            Some((path, lines))
        })
        .collect();

    // Extract context window for each issue
    let mut contexts = HashMap::new();
    for issue in issues {
        let key = format!(
            "{}:{}:{}",
            issue.file_path.display(),
            issue.line,
            issue.rule_name
        );

        if let Some(lines) = file_contents.get(&issue.file_path) {
            let start = issue.line.saturating_sub(6);
            let end = (issue.line + 5).min(lines.len());
            let context: String = lines[start..end]
                .iter()
                .enumerate()
                .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
                .collect::<Vec<_>>()
                .join("\n");
            contexts.insert(key, context);
        }
    }

    contexts
}

/// Parse the LLM response JSON into a RoastMap.
///
/// Expected format: `{"0": "roast message", "1": "roast message", ...}`
/// where keys are issue indices (0-based) matching the order in the prompt.
fn parse_llm_response(response: &str, issues: &[CodeIssue]) -> Result<RoastMap, anyhow::Error> {
    let json_str = extract_json_from_response(response);
    // LLMs often produce trailing commas in JSON — strip them for robustness
    let cleaned = fix_trailing_commas(json_str);
    let parsed: HashMap<String, String> = serde_json::from_str(&cleaned)?;

    let mut roasts = RoastMap::new();
    for (idx_str, roast) in parsed {
        let Ok(idx) = idx_str.parse::<usize>() else {
            continue;
        };
        if idx >= issues.len() {
            continue;
        }
        let issue = &issues[idx];
        let key = format!(
            "{}:{}:{}",
            issue.file_path.display(),
            issue.line,
            issue.rule_name
        );
        roasts.insert(key, roast);
    }

    Ok(roasts)
}

/// Extract JSON from LLM response, handling markdown code fences and plain JSON.
fn extract_json_from_response(response: &str) -> &str {
    // Handle ```json ... ``` wrapper
    if let Some(start) = response.find("```json") {
        let json_start = start + 7;
        if let Some(end) = response[json_start..].find("```") {
            return response[json_start..json_start + end].trim();
        }
    }

    // Handle ``` ... ``` wrapper (without json tag)
    if let Some(start) = response.find("```") {
        let fence_start = start + 3;
        // Skip the optional language tag on the same line
        let content_start = response[fence_start..]
            .find('\n')
            .map(|n| fence_start + n + 1)
            .unwrap_or(fence_start);
        if let Some(end) = response[content_start..].find("```") {
            return response[content_start..content_start + end].trim();
        }
    }

    // Handle plain JSON object
    if let Some(start) = response.find('{') {
        if let Some(end) = response.rfind('}') {
            return &response[start..=end];
        }
    }

    response
}

/// Remove trailing commas from JSON before closing braces/brackets.
///
/// LLMs frequently produce invalid JSON like `{"a": 1, "b": 2,}` —
/// this function strips the trailing comma to make it valid.
fn fix_trailing_commas(json: &str) -> String {
    let mut result = String::with_capacity(json.len());
    let bytes = json.as_bytes();
    let len = bytes.len();

    for i in 0..len {
        if bytes[i] == b',' {
            // Look ahead past whitespace for `}` or `]`
            let rest = &json[i + 1..];
            let trimmed = rest.trim_start();
            if trimmed.starts_with('}') || trimmed.starts_with(']') {
                // Skip this comma
                continue;
            }
        }
        result.push(bytes[i] as char);
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::analyzer::Severity;

    /// Helper to create a test CodeIssue with minimal fields.
    fn make_issue(rule: &str, line: usize) -> CodeIssue {
        CodeIssue {
            file_path: PathBuf::from("test.rs"),
            line,
            column: 1,
            rule_name: rule.to_string(),
            message: "test message".to_string(),
            severity: Severity::Spicy,
        }
    }

    #[test]
    fn test_extract_json_from_plain_object() {
        // Objective: Verify plain JSON objects are extracted correctly.
        // Invariants: Output must match the input when it is a valid JSON object.
        let response = r#"{"0": "roast one", "1": "roast two"}"#;
        let result = extract_json_from_response(response);
        assert_eq!(result, response, "Plain JSON should be returned as-is");
    }

    #[test]
    fn test_extract_json_from_markdown_fence() {
        // Objective: Verify JSON wrapped in ```json fences is extracted.
        // Invariants: Only the JSON content between fences is returned.
        let response = "Here is the JSON:\n```json\n{\"0\": \"roast\"}\n```\nDone.";
        let result = extract_json_from_response(response);
        assert_eq!(
            result, "{\"0\": \"roast\"}",
            "JSON inside markdown fences should be extracted"
        );
    }

    #[test]
    fn test_parse_response_maps_indices_to_issue_keys() {
        // Objective: Verify LLM response indices map to correct issue keys.
        // Invariants: Each index maps to the corresponding issue's key format.
        let issues = vec![
            make_issue("unwrap-abuse", 10),
            make_issue("deep-nesting", 25),
        ];
        let response = r#"{"0": "nice unwrap", "1": "so deep"}"#;
        let roasts = parse_llm_response(response, &issues).unwrap();

        assert_eq!(roasts.len(), 2, "Should have roasts for both issues");
        assert!(
            roasts.contains_key("test.rs:10:unwrap-abuse"),
            "First issue key must be test.rs:10:unwrap-abuse"
        );
        assert!(
            roasts.contains_key("test.rs:25:deep-nesting"),
            "Second issue key must be test.rs:25:deep-nesting"
        );
    }

    #[test]
    fn test_parse_response_skips_out_of_range_indices() {
        // Objective: Verify out-of-range indices are silently ignored.
        // Invariants: Only valid indices produce roasts; invalid ones are skipped.
        let issues = vec![make_issue("unwrap-abuse", 10)];
        let response = r#"{"0": "valid", "5": "out of range", "abc": "not a number"}"#;
        let roasts = parse_llm_response(response, &issues).unwrap();

        assert_eq!(
            roasts.len(),
            1,
            "Only the valid index should produce a roast"
        );
        assert!(
            roasts.contains_key("test.rs:10:unwrap-abuse"),
            "Valid index 0 should map to the first issue"
        );
    }

    #[test]
    fn test_local_provider_returns_roasts_for_known_rules() {
        // Objective: Verify LocalRoastProvider produces roasts for rules with i18n messages.
        // Invariants: At least one roast must be returned for a known rule name.
        let issues = vec![make_issue("unwrap-abuse", 1)];
        let provider = LocalRoastProvider;
        let roasts = provider.generate_roasts(&issues, "en-US");

        assert!(
            !roasts.is_empty(),
            "LocalRoastProvider must return at least one roast for known rules"
        );
        assert!(
            roasts.contains_key("test.rs:1:unwrap-abuse"),
            "Roast key must match the issue key format"
        );
    }

    #[test]
    fn test_local_provider_returns_something_for_unknown_rules() {
        // Objective: Verify unknown rules still produce a roast message.
        // Invariants: The i18n module returns a catch-all message for unknown rules.
        let issues = vec![make_issue("unknown-rule-xyz", 42)];
        let provider = LocalRoastProvider;
        let roasts = provider.generate_roasts(&issues, "en-US");

        assert_eq!(
            roasts.len(),
            1,
            "Should have exactly one roast for one issue"
        );
        let roast = roasts.get("test.rs:42:unknown-rule-xyz").unwrap();
        assert!(
            !roast.is_empty(),
            "Unknown rules must still produce a non-empty roast message"
        );
    }

    #[test]
    fn test_parse_response_with_markdown_wrapped_json() {
        // Objective: Verify end-to-end parsing with markdown-wrapped LLM output.
        // Invariants: JSON inside code fences must parse correctly.
        let issues = vec![make_issue("deep-nesting", 5)];
        let response =
            "Sure, here are the roasts:\n```json\n{\"0\": \"nested deeper than inception\"}\n```";
        let roasts = parse_llm_response(response, &issues).unwrap();

        assert_eq!(roasts.len(), 1, "Should parse one roast from fenced JSON");
        let roast = roasts.get("test.rs:5:deep-nesting").unwrap();
        assert_eq!(
            roast, "nested deeper than inception",
            "Roast content must match the JSON value"
        );
    }

    #[test]
    fn test_fix_trailing_commas_before_brace() {
        let input = r#"{"0": "a", "1": "b",}"#;
        let result = fix_trailing_commas(input);
        assert_eq!(result, r#"{"0": "a", "1": "b"}"#);
    }

    #[test]
    fn test_fix_trailing_commas_before_bracket() {
        let input = r#"["a", "b",]"#;
        let result = fix_trailing_commas(input);
        assert_eq!(result, r#"["a", "b"]"#);
    }

    #[test]
    fn test_fix_trailing_commas_preserves_valid_json() {
        let input = r#"{"0": "a", "1": "b"}"#;
        let result = fix_trailing_commas(input);
        assert_eq!(result, input, "Valid JSON should be unchanged");
    }

    #[test]
    fn test_fix_trailing_commas_handles_whitespace() {
        // " ,  \n}" -> comma removed -> "   \n}" (space before comma + spaces after)
        let input = "{\"0\": \"a\" ,  \n}";
        let result = fix_trailing_commas(input);
        assert!(!result.contains(",}"), "Trailing comma must be removed");
        assert!(result.contains("\"a\""), "Content must be preserved");
    }

    #[test]
    fn test_parse_response_with_trailing_comma() {
        // Objective: Verify LLM output with trailing commas is handled.
        // Invariants: Trailing commas must be stripped before parsing.
        let issues = vec![
            make_issue("unwrap-abuse", 10),
            make_issue("deep-nesting", 25),
        ];
        let response = "```json\n{\"0\": \"nice unwrap\", \"1\": \"so deep\",}\n```";
        let roasts = parse_llm_response(response, &issues).unwrap();

        assert_eq!(
            roasts.len(),
            2,
            "Should parse both roasts despite trailing comma"
        );
    }

    #[test]
    fn test_extract_json_from_generic_code_fence() {
        // Objective: Verify JSON in ``` fences (without json tag) is extracted.
        let response = "Here:\n```\n{\"0\": \"roast\"}\n```";
        let result = extract_json_from_response(response);
        assert_eq!(result, "{\"0\": \"roast\"}");
    }
}