Skip to main content

semantic_diff/grouper/
llm.rs

1use super::{GroupingResponse, SemanticGroup};
2use std::collections::HashSet;
3use std::time::Duration;
4use tokio::process::Command;
5
6/// Maximum bytes to read from LLM stdout (1MB). Prevents OOM from malicious/broken LLM.
7const MAX_RESPONSE_BYTES: usize = 1_048_576;
8/// Maximum JSON string size before deserialization (100KB).
9const MAX_JSON_SIZE: usize = 102_400;
10/// Maximum number of semantic groups from LLM.
11const MAX_GROUPS: usize = 20;
12/// Maximum changes per group.
13const MAX_CHANGES_PER_GROUP: usize = 200;
14/// Maximum label length (characters).
15const MAX_LABEL_LEN: usize = 80;
16/// Maximum description length (characters).
17const MAX_DESC_LEN: usize = 500;
18
19/// Which LLM backend is available for semantic grouping.
20#[derive(Debug, Clone, Copy, PartialEq)]
21pub enum LlmBackend {
22    Claude,
23    Copilot,
24}
25
26/// Request semantic grouping from the detected LLM backend with a 30-second timeout.
27pub async fn request_grouping_with_timeout(
28    backend: LlmBackend,
29    model: &str,
30    summaries: &str,
31) -> anyhow::Result<Vec<SemanticGroup>> {
32    let model = model.to_string();
33    tokio::time::timeout(
34        Duration::from_secs(60),
35        request_grouping(backend, &model, summaries),
36    )
37    .await
38    .map_err(|_| anyhow::anyhow!("LLM timed out after 60s"))?
39}
40
41/// Invoke the LLM backend to group hunks by semantic intent.
42///
43/// Prompts are piped via stdin to prevent process table exposure of code diffs.
44/// Uses `tokio::process::Command::spawn()` so that aborting the JoinHandle
45/// drops the Child, which sends SIGKILL (critical for ROB-05 cancellation).
46pub async fn request_grouping(
47    backend: LlmBackend,
48    model: &str,
49    hunk_summaries: &str,
50) -> anyhow::Result<Vec<SemanticGroup>> {
51    let prompt = format!(
52        "Group these code changes by semantic intent at the HUNK level. \
53         Related hunks across different files should be in the same group.\n\
54         Return ONLY valid JSON.\n\
55         Schema: {{\"groups\": [{{\"label\": \"short name\", \"description\": \"one sentence\", \
56         \"changes\": [{{\"file\": \"path\", \"hunks\": [0, 1]}}]}}]}}\n\
57         Rules:\n\
58         - Every hunk of every file must appear in exactly one group\n\
59         - Use 2-5 groups (fewer for small changesets)\n\
60         - Labels should describe the PURPOSE (e.g. \"Auth refactor\", \"Test coverage\")\n\
61         - The \"hunks\" array contains 0-based hunk indices as shown in HUNK N: headers\n\
62         - A single file's hunks may be split across different groups if they serve different purposes\n\n\
63         Changed files and hunks:\n{hunk_summaries}",
64    );
65
66    let output = match backend {
67        LlmBackend::Claude => invoke_claude(&prompt, model).await?,
68        LlmBackend::Copilot => invoke_copilot(&prompt, model).await?,
69    };
70
71    // Extract JSON from potential markdown code fences
72    let json_str = extract_json(&output)?;
73
74    // FINDING-12: Validate JSON size before deserialization
75    if json_str.len() > MAX_JSON_SIZE {
76        anyhow::bail!(
77            "LLM JSON response too large ({} bytes, max {})",
78            json_str.len(),
79            MAX_JSON_SIZE
80        );
81    }
82
83    let response: GroupingResponse = serde_json::from_str(&json_str)?;
84
85    // Build set of valid (file, hunk_count) for validation
86    let known_files: HashSet<&str> = hunk_summaries
87        .lines()
88        .filter_map(|line| {
89            let line = line.trim();
90            if let Some(rest) = line.strip_prefix("FILE: ") {
91                let end = rest.find(" (")?;
92                Some(&rest[..end])
93            } else {
94                None
95            }
96        })
97        .collect();
98
99    // Validate: drop unknown files, enforce bounds (FINDING-13, 14, 15)
100    let validated_groups: Vec<SemanticGroup> = response
101        .groups
102        .into_iter()
103        .take(MAX_GROUPS) // FINDING-15: cap group count
104        .map(|group| {
105            let valid_changes: Vec<super::GroupedChange> = group
106                .changes()
107                .into_iter()
108                .filter(|change| {
109                    // Existing: check against known_files
110                    let known = known_files.contains(change.file.as_str());
111                    // FINDING-14: reject traversal paths and absolute paths
112                    let safe = !change.file.contains("..") && !change.file.starts_with('/');
113                    if !safe {
114                        tracing::warn!("Rejected LLM file path with traversal: {}", change.file);
115                    }
116                    known && safe
117                })
118                .take(MAX_CHANGES_PER_GROUP) // cap changes per group
119                .collect();
120            // FINDING-13: truncate label and description
121            SemanticGroup::new(
122                truncate_string(&group.label, MAX_LABEL_LEN),
123                truncate_string(&group.description, MAX_DESC_LEN),
124                valid_changes,
125            )
126        })
127        .filter(|group| !group.changes().is_empty())
128        .collect();
129
130    Ok(validated_groups)
131}
132
133/// Request incremental grouping: assign new/modified hunks to existing groups or create new ones.
134///
135/// The `summaries` parameter already contains the existing group context prepended
136/// (from `incremental_hunk_summaries`), so we just need a different system prompt.
137pub async fn request_incremental_grouping(
138    backend: LlmBackend,
139    model: &str,
140    summaries: &str,
141) -> anyhow::Result<Vec<SemanticGroup>> {
142    let model = model.to_string();
143    tokio::time::timeout(
144        Duration::from_secs(60),
145        request_incremental(backend, &model, summaries),
146    )
147    .await
148    .map_err(|_| anyhow::anyhow!("LLM timed out after 60s"))?
149}
150
151async fn request_incremental(
152    backend: LlmBackend,
153    model: &str,
154    hunk_summaries: &str,
155) -> anyhow::Result<Vec<SemanticGroup>> {
156    let prompt = format!(
157        "You are updating an existing grouping of code changes. \
158         New or modified files have been added to the working tree.\n\
159         Assign the NEW/MODIFIED hunks to the EXISTING groups listed above, or create new groups if they don't fit.\n\
160         Return ONLY valid JSON with assignments for the NEW/MODIFIED files only.\n\
161         Schema: {{\"groups\": [{{\"label\": \"short name\", \"description\": \"one sentence\", \
162         \"changes\": [{{\"file\": \"path\", \"hunks\": [0, 1]}}]}}]}}\n\
163         Rules:\n\
164         - Every hunk of every NEW/MODIFIED file must appear in exactly one group\n\
165         - Reuse existing group labels when the change fits that group's purpose\n\
166         - Create new groups only when a change serves a genuinely different purpose\n\
167         - Use the same label string (case-sensitive) when assigning to an existing group\n\
168         - The \"hunks\" array contains 0-based hunk indices\n\
169         - Do NOT include unchanged files in your response\n\n\
170         {hunk_summaries}",
171    );
172
173    let output = match backend {
174        LlmBackend::Claude => invoke_claude(&prompt, model).await?,
175        LlmBackend::Copilot => invoke_copilot(&prompt, model).await?,
176    };
177
178    let json_str = extract_json(&output)?;
179
180    if json_str.len() > MAX_JSON_SIZE {
181        anyhow::bail!(
182            "LLM JSON response too large ({} bytes, max {})",
183            json_str.len(),
184            MAX_JSON_SIZE
185        );
186    }
187
188    let response: GroupingResponse = serde_json::from_str(&json_str)?;
189
190    // Build set of valid files from the summaries (only the NEW/MODIFIED section)
191    let known_files: HashSet<&str> = hunk_summaries
192        .lines()
193        .filter_map(|line| {
194            let line = line.trim();
195            if let Some(rest) = line.strip_prefix("FILE: ") {
196                let end = rest.find(" (")?;
197                Some(&rest[..end])
198            } else {
199                None
200            }
201        })
202        .collect();
203
204    let validated_groups: Vec<SemanticGroup> = response
205        .groups
206        .into_iter()
207        .take(MAX_GROUPS)
208        .map(|group| {
209            let valid_changes: Vec<super::GroupedChange> = group
210                .changes()
211                .into_iter()
212                .filter(|change| {
213                    let known = known_files.contains(change.file.as_str());
214                    let safe = !change.file.contains("..") && !change.file.starts_with('/');
215                    if !safe {
216                        tracing::warn!("Rejected LLM file path with traversal: {}", change.file);
217                    }
218                    known && safe
219                })
220                .take(MAX_CHANGES_PER_GROUP)
221                .collect();
222            SemanticGroup::new(
223                truncate_string(&group.label, MAX_LABEL_LEN),
224                truncate_string(&group.description, MAX_DESC_LEN),
225                valid_changes,
226            )
227        })
228        .filter(|group| !group.changes().is_empty())
229        .collect();
230
231    Ok(validated_groups)
232}
233
234/// Invoke the LLM backend with text output format (for free-form markdown responses).
235/// For Claude, uses `--output-format text` instead of JSON to avoid the wrapper.
236pub async fn invoke_llm_text(
237    backend: LlmBackend,
238    model: &str,
239    prompt: &str,
240) -> anyhow::Result<String> {
241    match backend {
242        LlmBackend::Claude => invoke_claude_text(prompt, model).await,
243        LlmBackend::Copilot => invoke_copilot(prompt, model).await,
244    }
245}
246
247/// Invoke the `claude` CLI and return the LLM response text.
248///
249/// Pipes the prompt via stdin to avoid exposing code diffs in the process table.
250/// The `-p` flag without an argument causes claude to read from stdin.
251async fn invoke_claude(prompt: &str, model: &str) -> anyhow::Result<String> {
252    use std::process::Stdio;
253    use tokio::io::{AsyncReadExt, AsyncWriteExt};
254
255    let mut child = Command::new("claude")
256        .args([
257            "-p",
258            "--output-format",
259            "json",
260            "--model",
261            model,
262            "--max-turns",
263            "1",
264        ])
265        .stdin(Stdio::piped())
266        .stdout(Stdio::piped())
267        .stderr(Stdio::piped())
268        .spawn()?;
269
270    // Write prompt to stdin, then close it
271    if let Some(mut stdin) = child.stdin.take() {
272        stdin.write_all(prompt.as_bytes()).await?;
273        // stdin is dropped here, closing the pipe
274    }
275
276    // Bounded read from stdout (FINDING-11: prevent OOM from oversized LLM response)
277    let stdout_pipe = child.stdout.take()
278        .ok_or_else(|| anyhow::anyhow!("failed to capture claude stdout"))?;
279    let mut limited = stdout_pipe.take(MAX_RESPONSE_BYTES as u64);
280    let mut buf = Vec::with_capacity(8192);
281    let bytes_read = limited.read_to_end(&mut buf).await?;
282
283    if bytes_read >= MAX_RESPONSE_BYTES {
284        child.kill().await.ok();
285        anyhow::bail!("LLM response exceeded {MAX_RESPONSE_BYTES} byte limit");
286    }
287
288    let status = child.wait().await?;
289    if !status.success() {
290        // Try to read stderr for diagnostics
291        let mut stderr_buf = Vec::new();
292        if let Some(mut stderr) = child.stderr.take() {
293            stderr.read_to_end(&mut stderr_buf).await.ok();
294        }
295        let stderr_str = String::from_utf8_lossy(&stderr_buf);
296        anyhow::bail!("claude exited with status {status}: {stderr_str}");
297    }
298
299    let stdout_str = String::from_utf8(buf)?;
300    let wrapper: serde_json::Value = serde_json::from_str(&stdout_str)?;
301    let result_text = wrapper["result"]
302        .as_str()
303        .ok_or_else(|| anyhow::anyhow!("missing result field in claude JSON output"))?;
304
305    Ok(result_text.to_string())
306}
307
308/// Invoke the `claude` CLI with text output format (no JSON wrapper).
309/// Used for free-form responses like review summaries.
310async fn invoke_claude_text(prompt: &str, model: &str) -> anyhow::Result<String> {
311    use std::process::Stdio;
312    use tokio::io::{AsyncReadExt, AsyncWriteExt};
313
314    let mut child = Command::new("claude")
315        .args([
316            "-p",
317            "--output-format",
318            "text",
319            "--model",
320            model,
321        ])
322        .stdin(Stdio::piped())
323        .stdout(Stdio::piped())
324        .stderr(Stdio::piped())
325        .spawn()?;
326
327    if let Some(mut stdin) = child.stdin.take() {
328        stdin.write_all(prompt.as_bytes()).await?;
329    }
330
331    // Read stdout and stderr concurrently to avoid deadlock if stderr pipe fills up
332    let stdout_pipe = child.stdout.take()
333        .ok_or_else(|| anyhow::anyhow!("failed to capture claude stdout"))?;
334    let stderr_pipe = child.stderr.take();
335
336    let stdout_fut = async {
337        let mut limited = stdout_pipe.take(MAX_RESPONSE_BYTES as u64);
338        let mut buf = Vec::with_capacity(8192);
339        let bytes_read = limited.read_to_end(&mut buf).await?;
340        Ok::<(Vec<u8>, usize), std::io::Error>((buf, bytes_read))
341    };
342    let stderr_fut = async {
343        let mut stderr_buf = Vec::new();
344        if let Some(mut stderr) = stderr_pipe {
345            stderr.read_to_end(&mut stderr_buf).await.ok();
346        }
347        stderr_buf
348    };
349
350    let (stdout_result, stderr_buf) = tokio::join!(stdout_fut, stderr_fut);
351    let (buf, bytes_read) = stdout_result?;
352
353    if bytes_read >= MAX_RESPONSE_BYTES {
354        child.kill().await.ok();
355        anyhow::bail!("LLM response exceeded {MAX_RESPONSE_BYTES} byte limit");
356    }
357
358    let status = child.wait().await?;
359    if !status.success() {
360        let stderr_str = String::from_utf8_lossy(&stderr_buf);
361        anyhow::bail!("claude exited with status {status}: {stderr_str}");
362    }
363
364    Ok(String::from_utf8(buf)?)
365}
366
367/// Invoke `copilot --yolo` and return the LLM response text.
368///
369/// Pipes the prompt via stdin to avoid exposing code diffs in the process table.
370/// Without a positional prompt argument, copilot reads from stdin.
371async fn invoke_copilot(prompt: &str, model: &str) -> anyhow::Result<String> {
372    use std::process::Stdio;
373    use tokio::io::{AsyncReadExt, AsyncWriteExt};
374
375    let mut child = Command::new("copilot")
376        .args(["--yolo", "--model", model])
377        .stdin(Stdio::piped())
378        .stdout(Stdio::piped())
379        .stderr(Stdio::piped())
380        .spawn()?;
381
382    // Write prompt to stdin, then close it
383    if let Some(mut stdin) = child.stdin.take() {
384        stdin.write_all(prompt.as_bytes()).await?;
385    }
386
387    // Bounded read from stdout (FINDING-11: prevent OOM from oversized LLM response)
388    let stdout_pipe = child.stdout.take()
389        .ok_or_else(|| anyhow::anyhow!("failed to capture copilot stdout"))?;
390    let mut limited = stdout_pipe.take(MAX_RESPONSE_BYTES as u64);
391    let mut buf = Vec::with_capacity(8192);
392    let bytes_read = limited.read_to_end(&mut buf).await?;
393
394    if bytes_read >= MAX_RESPONSE_BYTES {
395        child.kill().await.ok();
396        anyhow::bail!("LLM response exceeded {MAX_RESPONSE_BYTES} byte limit");
397    }
398
399    let status = child.wait().await?;
400    if !status.success() {
401        let mut stderr_buf = Vec::new();
402        if let Some(mut stderr) = child.stderr.take() {
403            stderr.read_to_end(&mut stderr_buf).await.ok();
404        }
405        let stderr_str = String::from_utf8_lossy(&stderr_buf);
406        anyhow::bail!("copilot exited with status {status}: {stderr_str}");
407    }
408
409    Ok(String::from_utf8(buf)?)
410}
411
412/// Extract JSON from text that may be wrapped in ```json ... ``` code fences.
413fn extract_json(text: &str) -> anyhow::Result<String> {
414    let trimmed = text.trim();
415    // Try direct parse first
416    if trimmed.starts_with('{') {
417        return Ok(trimmed.to_string());
418    }
419    // Try extracting from code fences — find first `{` to last `}`
420    if let Some(start) = trimmed.find('{') {
421        if let Some(end) = trimmed.rfind('}') {
422            return Ok(trimmed[start..=end].to_string());
423        }
424    }
425    anyhow::bail!("no JSON object found in response")
426}
427
428/// Truncate a string to at most `max` characters, respecting UTF-8 boundaries.
429fn truncate_string(s: &str, max: usize) -> String {
430    if s.chars().count() <= max {
431        s.to_string()
432    } else {
433        s.chars().take(max).collect()
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440
441    #[test]
442    fn test_extract_json_direct() {
443        let input = r#"{"groups": []}"#;
444        assert_eq!(extract_json(input).unwrap(), input);
445    }
446
447    #[test]
448    fn test_extract_json_code_fences() {
449        let input = "```json\n{\"groups\": []}\n```";
450        assert_eq!(extract_json(input).unwrap(), r#"{"groups": []}"#);
451    }
452
453    #[test]
454    fn test_extract_json_no_json() {
455        assert!(extract_json("no json here").is_err());
456    }
457
458    #[test]
459    fn test_parse_hunk_level_response() {
460        let json = r#"{
461            "groups": [{
462                "label": "Auth refactor",
463                "description": "Refactored auth flow",
464                "changes": [
465                    {"file": "src/auth.rs", "hunks": [0, 2]},
466                    {"file": "src/middleware.rs", "hunks": [1]}
467                ]
468            }]
469        }"#;
470        let response: GroupingResponse = serde_json::from_str(json).unwrap();
471        assert_eq!(response.groups.len(), 1);
472        assert_eq!(response.groups[0].changes().len(), 2);
473        assert_eq!(response.groups[0].changes()[0].hunks, vec![0, 2]);
474    }
475
476    #[test]
477    fn test_parse_empty_hunks_means_all() {
478        let json = r#"{
479            "groups": [{
480                "label": "Config",
481                "description": "Config changes",
482                "changes": [{"file": "config.toml", "hunks": []}]
483            }]
484        }"#;
485        let response: GroupingResponse = serde_json::from_str(json).unwrap();
486        assert!(response.groups[0].changes()[0].hunks.is_empty());
487    }
488
489    /// Verify invoke_claude uses Stdio::piped for stdin (structural test).
490    /// This reads the source file and checks that the invoke_claude function
491    /// uses stdin(Stdio::piped()) instead of passing prompt as CLI arg.
492    #[test]
493    fn test_invoke_claude_uses_stdin_pipe() {
494        let src = include_str!("llm.rs");
495        // Find the invoke_claude function body
496        let claude_start = src.find("async fn invoke_claude").expect("invoke_claude not found");
497        let claude_body = &src[claude_start..];
498        // Find the end of the function (next "async fn" or end of non-test code)
499        let end = claude_body[1..].find("\nasync fn").unwrap_or(claude_body.len());
500        let claude_fn = &claude_body[..end];
501
502        assert!(
503            claude_fn.contains("Stdio::piped()"),
504            "invoke_claude must use Stdio::piped() for stdin"
505        );
506        assert!(
507            claude_fn.contains("write_all"),
508            "invoke_claude must write prompt to stdin via write_all"
509        );
510        // Prompt should NOT appear inside the .args([...]) array
511        if let Some(args_start) = claude_fn.find(".args([") {
512            let args_section = &claude_fn[args_start..];
513            let args_end = args_section.find("])").expect("unclosed .args");
514            let args_content = &args_section[..args_end];
515            assert!(
516                !args_content.contains("prompt"),
517                "invoke_claude must not pass prompt in .args()"
518            );
519        }
520    }
521
522    /// Verify invoke_copilot uses Stdio::piped for stdin (structural test).
523    #[test]
524    fn test_invoke_copilot_uses_stdin_pipe() {
525        let src = include_str!("llm.rs");
526        let copilot_start = src.find("async fn invoke_copilot").expect("invoke_copilot not found");
527        let copilot_body = &src[copilot_start..];
528        let end = copilot_body[1..].find("\n/// ").or_else(|| copilot_body[1..].find("\n#[cfg(test)]")).unwrap_or(copilot_body.len());
529        let copilot_fn = &copilot_body[..end];
530
531        assert!(
532            copilot_fn.contains("Stdio::piped()"),
533            "invoke_copilot must use Stdio::piped() for stdin"
534        );
535        assert!(
536            copilot_fn.contains("write_all"),
537            "invoke_copilot must write prompt to stdin via write_all"
538        );
539    }
540
541    /// Verify neither invoke function passes prompt string in .args()
542    #[test]
543    fn test_no_prompt_in_args() {
544        let src = include_str!("llm.rs");
545        // Check invoke_claude: the .args array should not contain "prompt"
546        let claude_start = src.find("async fn invoke_claude").expect("invoke_claude not found");
547        let claude_body = &src[claude_start..];
548        let end = claude_body[1..].find("\nasync fn").unwrap_or(claude_body.len());
549        let claude_fn = &claude_body[..end];
550
551        // Find the .args([...]) block and ensure "prompt" is not inside it
552        if let Some(args_start) = claude_fn.find(".args([") {
553            let args_section = &claude_fn[args_start..];
554            let args_end = args_section.find("])").expect("unclosed .args");
555            let args_content = &args_section[..args_end];
556            assert!(
557                !args_content.contains("prompt"),
558                "invoke_claude .args() must not contain prompt variable"
559            );
560        }
561
562        // Check invoke_copilot
563        let copilot_start = src.find("async fn invoke_copilot").expect("invoke_copilot not found");
564        let copilot_body = &src[copilot_start..];
565        let end2 = copilot_body[1..].find("\n/// ").or_else(|| copilot_body[1..].find("\n#[cfg(test)]")).unwrap_or(copilot_body.len());
566        let copilot_fn = &copilot_body[..end2];
567
568        if let Some(args_start) = copilot_fn.find(".args([") {
569            let args_section = &copilot_fn[args_start..];
570            let args_end = args_section.find("])").expect("unclosed .args");
571            let args_content = &args_section[..args_end];
572            assert!(
573                !args_content.contains("prompt"),
574                "invoke_copilot .args() must not contain prompt variable"
575            );
576        }
577    }
578
579    #[test]
580    fn test_parse_files_fallback() {
581        // LLM returns old "files" format instead of "changes"
582        let json = r#"{
583            "groups": [{
584                "label": "Refactor",
585                "description": "Code cleanup",
586                "files": ["src/app.rs", "src/main.rs"]
587            }]
588        }"#;
589        let response: GroupingResponse = serde_json::from_str(json).unwrap();
590        let changes = response.groups[0].changes();
591        assert_eq!(changes.len(), 2);
592        assert_eq!(changes[0].file, "src/app.rs");
593        assert!(changes[0].hunks.is_empty()); // all hunks
594    }
595
596    // --- Bounded reading tests ---
597
598    #[test]
599    fn test_read_bounded_under_limit() {
600        // Simulate: content under MAX_RESPONSE_BYTES should be fully read
601        let data = "hello world";
602        assert!(data.len() < MAX_RESPONSE_BYTES);
603        // The bounded read logic uses .take() -- we test the constant is reasonable
604        assert_eq!(MAX_RESPONSE_BYTES, 1_048_576);
605    }
606
607    #[test]
608    fn test_read_bounded_over_limit_constant() {
609        // Verify the constant is 1MB
610        assert_eq!(MAX_RESPONSE_BYTES, 1_048_576);
611        // A response at or over this limit should be rejected
612        let oversized = vec![b'x'; MAX_RESPONSE_BYTES];
613        assert!(oversized.len() >= MAX_RESPONSE_BYTES);
614    }
615
616    // --- Validation tests ---
617
618    #[test]
619    fn test_validate_rejects_oversized_json() {
620        // JSON string > MAX_JSON_SIZE (100KB) should be rejected
621        let large_json = format!(r#"{{"groups": [{{"label": "x", "description": "{}", "changes": []}}]}}"#,
622            "a".repeat(MAX_JSON_SIZE + 1));
623        assert!(large_json.len() > MAX_JSON_SIZE);
624        // In request_grouping, this would bail before deserialization
625    }
626
627    #[test]
628    fn test_validate_caps_groups_at_max() {
629        // Build JSON with more than MAX_GROUPS groups
630        let mut groups_json = Vec::new();
631        for i in 0..30 {
632            groups_json.push(format!(
633                r#"{{"label": "Group {}", "description": "desc", "changes": [{{"file": "src/f{}.rs", "hunks": [0]}}]}}"#,
634                i, i
635            ));
636        }
637        let json = format!(r#"{{"groups": [{}]}}"#, groups_json.join(","));
638        let response: GroupingResponse = serde_json::from_str(&json).unwrap();
639        assert_eq!(response.groups.len(), 30);
640        // After validation, only MAX_GROUPS (20) should remain
641        let capped: Vec<_> = response.groups.into_iter().take(MAX_GROUPS).collect();
642        assert_eq!(capped.len(), 20);
643    }
644
645    #[test]
646    fn test_validate_rejects_path_traversal() {
647        let json = r#"{
648            "groups": [{
649                "label": "Evil",
650                "description": "traversal",
651                "changes": [{"file": "../../../etc/passwd", "hunks": [0]}]
652            }]
653        }"#;
654        let response: GroupingResponse = serde_json::from_str(json).unwrap();
655        let change = &response.groups[0].changes()[0];
656        assert!(change.file.contains(".."), "path should contain traversal");
657        // In validation, this would be filtered out
658    }
659
660    #[test]
661    fn test_validate_rejects_absolute_paths() {
662        let json = r#"{
663            "groups": [{
664                "label": "Evil",
665                "description": "absolute",
666                "changes": [{"file": "/etc/passwd", "hunks": [0]}]
667            }]
668        }"#;
669        let response: GroupingResponse = serde_json::from_str(json).unwrap();
670        let change = &response.groups[0].changes()[0];
671        assert!(change.file.starts_with('/'), "path should be absolute");
672        // In validation, this would be filtered out
673    }
674
675    #[test]
676    fn test_truncate_string_label() {
677        let long_label = "a".repeat(100);
678        let truncated = truncate_string(&long_label, MAX_LABEL_LEN);
679        assert_eq!(truncated.chars().count(), MAX_LABEL_LEN);
680    }
681
682    #[test]
683    fn test_truncate_string_description() {
684        let long_desc = "b".repeat(600);
685        let truncated = truncate_string(&long_desc, MAX_DESC_LEN);
686        assert_eq!(truncated.chars().count(), MAX_DESC_LEN);
687    }
688
689    #[test]
690    fn test_validate_caps_changes_per_group() {
691        // Build a group with more than MAX_CHANGES_PER_GROUP changes
692        let mut changes = Vec::new();
693        for i in 0..250 {
694            changes.push(format!(r#"{{"file": "src/f{}.rs", "hunks": [0]}}"#, i));
695        }
696        let json = format!(
697            r#"{{"groups": [{{"label": "Big", "description": "lots", "changes": [{}]}}]}}"#,
698            changes.join(",")
699        );
700        let response: GroupingResponse = serde_json::from_str(&json).unwrap();
701        assert_eq!(response.groups[0].changes().len(), 250);
702        // After validation, changes should be capped
703        let capped: Vec<_> = response.groups[0].changes().into_iter().take(MAX_CHANGES_PER_GROUP).collect();
704        assert_eq!(capped.len(), 200);
705    }
706}