Skip to main content

rs_guard/
diff.rs

1//! Diff fetching from GitHub Pull Requests and local git staging.
2//!
3//! Provides [`fetch_pr_diff`] for retrieving PR diffs via the GitHub REST API
4//! and [`fetch_local_diff`] for reading `git diff --cached` output.
5
6use crate::error::RsGuardError;
7use crate::http::{build_github_http_client, github_diff_headers, validate_github_base_url};
8use crate::retry::with_retry_simple;
9use std::borrow::Cow;
10
11/// Maximum allowed diff size in bytes (100 KB).
12const MAX_DIFF_BYTES: usize = 100 * 1024;
13
14/// Maximum allowed diff line count.
15const MAX_DIFF_LINES: usize = 1500;
16
17/// HTTP request timeout for diff fetching.
18const REQUEST_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
19
20/// Default number of lines to preserve from the head when chunking a large diff.
21///
22/// Raised from 50 to 400 to better utilise modern LLM context windows
23/// (DeepSeek 64K, Kimi/GPT-4o-mini 128K). The combined default of 800 lines
24/// covers the full diff for the vast majority of real PRs while still fitting
25/// comfortably within the smallest supported context window.
26pub const DEFAULT_CHUNK_HEAD_LINES: usize = 400;
27
28/// Default number of lines to preserve from the tail when chunking a large diff.
29pub const DEFAULT_CHUNK_TAIL_LINES: usize = 400;
30
31/// Placeholder inserted in place of chunked middle lines.
32const CHUNK_PLACEHOLDER: &str = "\n# ... [diff truncated: {removed} lines omitted] ...\n";
33
34/// Result of a successful diff fetch operation.
35#[derive(Debug, Clone)]
36#[must_use = "DiffResult should be used for review processing"]
37pub struct DiffResult {
38    /// The raw diff content.
39    pub content: String,
40    /// Size of the diff in bytes.
41    pub size_bytes: usize,
42    /// Number of lines in the diff.
43    pub line_count: usize,
44}
45
46/// Validates that the response body looks like a diff and not a JSON error.
47///
48/// Checks for common diff markers (`diff --git`, `@@`, `---`, `+++`) and
49/// rejects responses that appear to be JSON error bodies from the API.
50///
51/// # Errors
52///
53/// Returns [`RsGuardError::InvalidDiffContent`] if the content does not
54/// appear to be a valid diff.
55fn validate_diff_content(content: &str) -> Result<(), RsGuardError> {
56    let trimmed = content.trim_start();
57
58    if trimmed.starts_with('{') || trimmed.starts_with('[') {
59        return Err(RsGuardError::InvalidDiffContent);
60    }
61
62    let has_diff_markers = content.contains("diff --git")
63        || content.contains("@@ ")
64        || content.contains("--- a/")
65        || content.contains("+++ b/")
66        || content.starts_with("diff ")
67        || content.starts_with("index ");
68
69    if !has_diff_markers {
70        return Err(RsGuardError::InvalidDiffContent);
71    }
72
73    Ok(())
74}
75
76/// Chunks a large diff by preserving the first N and last N lines.
77///
78/// When the diff exceeds `head_lines + tail_lines`, the middle section is
79/// replaced with a placeholder. Returns the original content unchanged (as a
80/// borrowed reference) when no truncation is needed, avoiding allocation.
81///
82/// Uses [`DEFAULT_CHUNK_HEAD_LINES`] and [`DEFAULT_CHUNK_TAIL_LINES`] as
83/// defaults. Pass explicit values via [`chunk_diff_with_params`] when the
84/// caller has per-provider or user-configured thresholds.
85///
86/// # Arguments
87///
88/// * `content` — The full diff content.
89///
90/// # Returns
91///
92/// A tuple of `(chunked_content, was_truncated, removed_lines)`.
93pub fn chunk_diff(content: &str) -> (Cow<'_, str>, bool, usize) {
94    chunk_diff_with_params(content, DEFAULT_CHUNK_HEAD_LINES, DEFAULT_CHUNK_TAIL_LINES)
95}
96
97/// Chunks a large diff with explicit head and tail line counts.
98///
99/// When the diff exceeds `head_lines + tail_lines`, the middle section is
100/// replaced with a placeholder. Returns the original content unchanged (as a
101/// borrowed reference) when no truncation is needed, avoiding allocation.
102///
103/// # Arguments
104///
105/// * `content` — The full diff content.
106/// * `head_lines` — Number of lines to keep from the beginning.
107/// * `tail_lines` — Number of lines to keep from the end.
108///
109/// # Returns
110///
111/// A tuple of `(chunked_content, was_truncated, removed_lines)`.
112pub fn chunk_diff_with_params(
113    content: &str,
114    head_lines: usize,
115    tail_lines: usize,
116) -> (Cow<'_, str>, bool, usize) {
117    // Detect line ending style from the original content
118    let has_crlf = content.contains("\r\n");
119    let line_ending = if has_crlf { "\r\n" } else { "\n" };
120    let ends_with_newline = content.ends_with('\n') || content.ends_with("\r\n");
121
122    let lines: Vec<&str> = content.lines().collect();
123    let total = lines.len();
124    let threshold = head_lines + tail_lines;
125
126    if total <= threshold {
127        return (Cow::Borrowed(content), false, 0);
128    }
129
130    let head = &lines[..head_lines];
131    let tail = &lines[total - tail_lines..];
132    let removed = total - head_lines - tail_lines;
133    let placeholder = CHUNK_PLACEHOLDER.replace("{removed}", &removed.to_string());
134
135    let mut result = String::with_capacity(content.len() / 2);
136
137    // Add head lines with detected line endings
138    for line in head {
139        result.push_str(line);
140        result.push_str(line_ending);
141    }
142
143    result.push_str(&placeholder);
144
145    // Add tail lines with detected line endings
146    for (i, line) in tail.iter().enumerate() {
147        result.push_str(line);
148        // Add line ending after each tail line except the last one if original didn't end with newline
149        if i < tail.len() - 1 || ends_with_newline {
150            result.push_str(line_ending);
151        }
152    }
153
154    (Cow::Owned(result), true, removed)
155}
156
157/// Fetches the diff for a GitHub Pull Request.
158///
159/// Sends a GET request to the GitHub API with the `application/vnd.github.v3.diff`
160/// accept header. Automatically retries on transient failures (429, 5xx, timeouts).
161///
162/// The `base_url` is validated against an allowlist before any request is made,
163/// preventing `Authorization` headers from being sent to untrusted hosts.
164///
165/// # Arguments
166///
167/// * `base_url` — GitHub API base URL (e.g. `"https://api.github.com"`).
168/// * `owner` — Repository owner.
169/// * `repo` — Repository name.
170/// * `pr_number` — Pull request number.
171/// * `token` — GitHub authentication token.
172///
173/// # Errors
174///
175/// Returns [`RsGuardError::Config`] if `base_url` is not allowlisted,
176/// [`RsGuardError::GitHubApi`] on HTTP errors,
177/// [`RsGuardError::EmptyDiff`] if the diff is empty,
178/// [`RsGuardError::InvalidDiffContent`] if the response is not a valid diff,
179/// or [`RsGuardError::DiffTooLarge`] if the diff exceeds size limits.
180pub async fn fetch_pr_diff(
181    base_url: &str,
182    owner: &str,
183    repo: &str,
184    pr_number: u64,
185    token: &str,
186) -> Result<DiffResult, RsGuardError> {
187    validate_github_base_url(base_url)?;
188
189    let client = build_github_http_client(REQUEST_TIMEOUT)?;
190
191    let url = format!(
192        "{}/repos/{}/{}/pulls/{}",
193        base_url.trim_end_matches('/'),
194        owner,
195        repo,
196        pr_number
197    );
198    let headers = github_diff_headers(token)?;
199
200    let response = with_retry_simple(|| async {
201        let resp = client
202            .get(&url)
203            .headers(headers.clone())
204            .send()
205            .await
206            .map_err(|e| {
207                let status = e.status().map(|s| s.as_u16()).unwrap_or(0);
208                RsGuardError::GitHubApi {
209                    status,
210                    message: e.to_string(),
211                }
212            })?;
213
214        let status = resp.status();
215        if !status.is_success() {
216            let body = resp
217                .text()
218                .await
219                .unwrap_or_else(|e| format!("[failed to read response body: {}]", e));
220            return Err(RsGuardError::GitHubApi {
221                status: status.as_u16(),
222                message: body,
223            });
224        }
225
226        let body = resp.text().await.map_err(|e| RsGuardError::GitHubApi {
227            status: 0,
228            message: e.to_string(),
229        })?;
230
231        Ok(body)
232    })
233    .await?;
234
235    if response.is_empty() {
236        return Err(RsGuardError::EmptyDiff);
237    }
238
239    validate_diff_content(&response)?;
240
241    let size_bytes = response.len();
242    let line_count = response.lines().count();
243
244    if size_bytes > MAX_DIFF_BYTES || line_count > MAX_DIFF_LINES {
245        return Err(RsGuardError::DiffTooLarge {
246            size_bytes,
247            line_count,
248        });
249    }
250
251    Ok(DiffResult {
252        content: response,
253        size_bytes,
254        line_count,
255    })
256}
257
258/// Fetches diff content from a pre-existing file on disk.
259///
260/// Reads the file, validates that it looks like a diff, and checks size
261/// limits. Used when `--diff-file` is specified to skip the GitHub API call.
262///
263/// # Errors
264///
265/// Returns [`RsGuardError::Config`] if the file does not exist or cannot
266/// be read, [`RsGuardError::EmptyDiff`] if the file is empty,
267/// [`RsGuardError::InvalidDiffContent`] if the content does not look
268/// like a diff, or [`RsGuardError::DiffTooLarge`] if it exceeds size limits.
269pub fn fetch_file_diff(path: &str) -> Result<DiffResult, RsGuardError> {
270    let content = std::fs::read_to_string(path)
271        .map_err(|e| RsGuardError::Config(format!("Failed to read diff file '{}': {}", path, e)))?;
272
273    if content.is_empty() {
274        return Err(RsGuardError::EmptyDiff);
275    }
276
277    validate_diff_content(&content)?;
278
279    let size_bytes = content.len();
280    let line_count = content.lines().count();
281
282    if size_bytes > MAX_DIFF_BYTES || line_count > MAX_DIFF_LINES {
283        return Err(RsGuardError::DiffTooLarge {
284            size_bytes,
285            line_count,
286        });
287    }
288
289    Ok(DiffResult {
290        content,
291        size_bytes,
292        line_count,
293    })
294}
295
296/// Fetches the locally staged diff via `git diff --cached`.
297///
298/// # Errors
299///
300/// Returns [`RsGuardError::Io`] if the git command fails,
301/// [`RsGuardError::Config`] if `git diff --cached` exits with a non-zero status,
302/// [`RsGuardError::EmptyDiff`] if there are no staged changes,
303/// [`RsGuardError::InvalidDiffContent`] if the output does not look like a diff,
304/// or [`RsGuardError::DiffTooLarge`] if the diff exceeds size limits.
305pub fn fetch_local_diff() -> Result<DiffResult, RsGuardError> {
306    let output = std::process::Command::new("git")
307        .args(["diff", "--cached"])
308        .output()
309        .map_err(RsGuardError::Io)?;
310
311    if !output.status.success() {
312        let stderr = String::from_utf8_lossy(&output.stderr);
313        return Err(RsGuardError::Config(format!(
314            "git diff --cached failed: {}",
315            stderr
316        )));
317    }
318
319    let content = String::from_utf8_lossy(&output.stdout).to_string();
320    build_local_diff_result(content)
321}
322
323/// Builds a [`DiffResult`] from already-validated local diff content.
324///
325/// Extracted from [`fetch_local_diff`] to enable unit testing of content
326/// validation without spawning a git process.
327///
328/// # Errors
329///
330/// Returns [`RsGuardError::EmptyDiff`], [`RsGuardError::InvalidDiffContent`],
331/// or [`RsGuardError::DiffTooLarge`] based on the content.
332pub(crate) fn build_local_diff_result(content: String) -> Result<DiffResult, RsGuardError> {
333    if content.is_empty() {
334        return Err(RsGuardError::EmptyDiff);
335    }
336
337    validate_diff_content(&content)?;
338
339    let size_bytes = content.len();
340    let line_count = content.lines().count();
341
342    if size_bytes > MAX_DIFF_BYTES || line_count > MAX_DIFF_LINES {
343        return Err(RsGuardError::DiffTooLarge {
344            size_bytes,
345            line_count,
346        });
347    }
348
349    Ok(DiffResult {
350        content,
351        size_bytes,
352        line_count,
353    })
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use wiremock::matchers::{header, method, path};
360    use wiremock::{Mock, MockServer, ResponseTemplate};
361
362    #[tokio::test]
363    async fn test_fetch_pr_diff_success() {
364        let mock_server = MockServer::start().await;
365
366        Mock::given(method("GET"))
367            .and(path("/repos/test-owner/test-repo/pulls/42"))
368            .and(header("Accept", "application/vnd.github.v3.diff"))
369            .respond_with(ResponseTemplate::new(200).set_body_string(
370                "diff --git a/file.rs b/file.rs\n--- a/file.rs\n+++ b/file.rs\n@@ -1,2 +1,3 @@\n+line",
371            ))
372            .mount(&mock_server)
373            .await;
374
375        let result = fetch_pr_diff(
376            &mock_server.uri(),
377            "test-owner",
378            "test-repo",
379            42,
380            "test-token",
381        )
382        .await;
383
384        assert!(result.is_ok());
385        let diff = result.unwrap();
386        assert!(diff.content.contains("diff --git"));
387        assert!(diff.line_count > 0);
388    }
389
390    #[tokio::test]
391    async fn test_fetch_pr_diff_not_found() {
392        let mock_server = MockServer::start().await;
393
394        Mock::given(method("GET"))
395            .and(path("/repos/test-owner/test-repo/pulls/999"))
396            .respond_with(ResponseTemplate::new(404).set_body_string("Not Found"))
397            .mount(&mock_server)
398            .await;
399
400        let result = fetch_pr_diff(
401            &mock_server.uri(),
402            "test-owner",
403            "test-repo",
404            999,
405            "test-token",
406        )
407        .await;
408
409        assert!(result.is_err());
410        assert!(result.unwrap_err().to_string().contains("404"));
411    }
412
413    #[tokio::test]
414    async fn test_fetch_pr_diff_rejects_json_response() {
415        let mock_server = MockServer::start().await;
416
417        Mock::given(method("GET"))
418            .and(path("/repos/test-owner/test-repo/pulls/42"))
419            .respond_with(
420                ResponseTemplate::new(200)
421                    .set_body_string(r#"{"message": "Not Found", "documentation_url": "..." }"#),
422            )
423            .mount(&mock_server)
424            .await;
425
426        let result = fetch_pr_diff(
427            &mock_server.uri(),
428            "test-owner",
429            "test-repo",
430            42,
431            "test-token",
432        )
433        .await;
434
435        assert!(result.is_err());
436        assert!(result
437            .unwrap_err()
438            .to_string()
439            .contains("not appear to be a diff"));
440    }
441
442    // --- Issue #24: Boundary tests for fetch_pr_diff at 100KB ---
443
444    #[tokio::test]
445    async fn test_fetch_pr_diff_exactly_100kb_passes() {
446        let mock_server = MockServer::start().await;
447
448        // Create a diff that is exactly 100KB
449        let diff_header =
450            "diff --git a/file.rs b/file.rs\n--- a/file.rs\n+++ b/file.rs\n@@ -1,2 +1,3 @@\n";
451        let header_bytes = diff_header.len();
452        let content_bytes = 100 * 1024 - header_bytes;
453        let diff_content = format!("{}{}", diff_header, "+".repeat(content_bytes));
454
455        Mock::given(method("GET"))
456            .and(path("/repos/test-owner/test-repo/pulls/42"))
457            .and(header("Accept", "application/vnd.github.v3.diff"))
458            .respond_with(ResponseTemplate::new(200).set_body_string(diff_content))
459            .mount(&mock_server)
460            .await;
461
462        let result = fetch_pr_diff(
463            &mock_server.uri(),
464            "test-owner",
465            "test-repo",
466            42,
467            "test-token",
468        )
469        .await;
470
471        assert!(result.is_ok(), "Exactly 100KB diff should pass");
472        let diff = result.unwrap();
473        assert_eq!(diff.size_bytes, 100 * 1024);
474    }
475
476    #[tokio::test]
477    async fn test_fetch_pr_diff_100kb_plus_1_fails() {
478        let mock_server = MockServer::start().await;
479
480        // Create a diff that is 100KB + 1 byte
481        let diff_header =
482            "diff --git a/file.rs b/file.rs\n--- a/file.rs\n+++ b/file.rs\n@@ -1,2 +1,3 @@\n";
483        let header_bytes = diff_header.len();
484        let content_bytes = 100 * 1024 - header_bytes + 1;
485        let diff_content = format!("{}{}", diff_header, "+".repeat(content_bytes));
486
487        Mock::given(method("GET"))
488            .and(path("/repos/test-owner/test-repo/pulls/42"))
489            .and(header("Accept", "application/vnd.github.v3.diff"))
490            .respond_with(ResponseTemplate::new(200).set_body_string(diff_content))
491            .mount(&mock_server)
492            .await;
493
494        let result = fetch_pr_diff(
495            &mock_server.uri(),
496            "test-owner",
497            "test-repo",
498            42,
499            "test-token",
500        )
501        .await;
502
503        assert!(result.is_err(), "100KB + 1 byte diff should fail");
504        assert!(matches!(result, Err(RsGuardError::DiffTooLarge { .. })));
505    }
506
507    #[tokio::test]
508    async fn test_fetch_pr_diff_1501_lines_fails() {
509        let mock_server = MockServer::start().await;
510
511        // Create a diff with 1501 lines
512        // diff_header has 4 lines, so we need 1497 more lines
513        let diff_header =
514            "diff --git a/file.rs b/file.rs\n--- a/file.rs\n+++ b/file.rs\n@@ -1,2 +1,3 @@\n";
515        let lines: Vec<String> = (0..1497).map(|i| format!("+line {}", i)).collect();
516        let diff_content = format!("{}{}", diff_header, lines.join("\n"));
517
518        Mock::given(method("GET"))
519            .and(path("/repos/test-owner/test-repo/pulls/42"))
520            .and(header("Accept", "application/vnd.github.v3.diff"))
521            .respond_with(ResponseTemplate::new(200).set_body_string(diff_content))
522            .mount(&mock_server)
523            .await;
524
525        let result = fetch_pr_diff(
526            &mock_server.uri(),
527            "test-owner",
528            "test-repo",
529            42,
530            "test-token",
531        )
532        .await;
533
534        assert!(result.is_err(), "1501 lines should fail");
535        assert!(matches!(result, Err(RsGuardError::DiffTooLarge { .. })));
536    }
537
538    #[test]
539    fn test_validate_diff_content_valid() {
540        assert!(validate_diff_content("diff --git a/f.rs b/f.rs\n").is_ok());
541        assert!(validate_diff_content("@@ -1,3 +1,4 @@\n").is_ok());
542        assert!(validate_diff_content("--- a/f.rs\n+++ b/f.rs\n").is_ok());
543        assert!(validate_diff_content("index abc123..def456 100644\n").is_ok());
544    }
545
546    #[test]
547    fn test_validate_diff_content_json() {
548        assert!(validate_diff_content(r#"{"message": "error"}"#).is_err());
549        assert!(validate_diff_content(r#"[{"error": true}]"#).is_err());
550    }
551
552    #[test]
553    fn test_validate_diff_content_no_markers() {
554        assert!(validate_diff_content("just some random text\nwith no diff markers").is_err());
555    }
556
557    #[test]
558    fn test_chunk_diff_small_diff_unchanged() {
559        let content = "line1\nline2\nline3";
560        let (result, truncated, _) = chunk_diff(content);
561        assert!(!truncated);
562        assert_eq!(result.as_ref(), content);
563    }
564
565    #[test]
566    fn test_chunk_diff_truncates_large_diff() {
567        // Use explicit 50/50 params to test truncation behaviour
568        // independent of the default constants.
569        let lines: Vec<String> = (0..200).map(|i| format!("line {}", i)).collect();
570        let content = lines.join("\n");
571
572        let (result, truncated, removed) = chunk_diff_with_params(&content, 50, 50);
573        assert!(truncated);
574        // 200 - 50 - 50 = 100 removed
575        assert_eq!(removed, 100);
576        // Result should have head + placeholder + tail
577        assert!(result.contains("line 0"));
578        assert!(result.contains("line 49"));
579        assert!(result.contains("line 150"));
580        assert!(result.contains("line 199"));
581        assert!(result.contains("100 lines omitted"));
582        // Middle lines should NOT be present
583        assert!(!result.contains("line 100"));
584    }
585
586    #[test]
587    fn test_chunk_diff_exactly_at_threshold_unchanged() {
588        // 100 lines = exactly threshold with explicit 50+50 params
589        let lines: Vec<String> = (0..100).map(|i| format!("line {}", i)).collect();
590        let content = lines.join("\n");
591
592        let (result, truncated, _) = chunk_diff_with_params(&content, 50, 50);
593        assert!(!truncated);
594        assert_eq!(result.as_ref(), content);
595    }
596
597    #[test]
598    fn test_chunk_diff_preserves_head_and_tail_order() {
599        let lines: Vec<String> = (0..150).map(|i| format!("line {}", i)).collect();
600        let content = lines.join("\n");
601
602        let (result, truncated, _) = chunk_diff_with_params(&content, 50, 50);
603        assert!(truncated);
604
605        // Head lines should appear before the placeholder
606        let head_pos = result.find("line 0").unwrap();
607        let placeholder_pos = result.find("lines omitted").unwrap();
608        let tail_pos = result.find("line 100").unwrap();
609
610        assert!(head_pos < placeholder_pos);
611        assert!(placeholder_pos < tail_pos);
612    }
613
614    #[test]
615    fn test_chunk_diff_preserves_line_endings() {
616        // Test with content that has trailing newline, using explicit 50+50 params
617        let lines: Vec<String> = (0..150).map(|i| format!("line {}", i)).collect();
618        let content = lines.join("\n") + "\n";
619
620        let (result, truncated, _) = chunk_diff_with_params(&content, 50, 50);
621        assert!(truncated);
622        assert!(result.ends_with('\n'));
623    }
624
625    #[test]
626    fn test_chunk_diff_preserves_crlf_line_endings() {
627        // Test with CRLF line endings (Windows-style), using explicit 50+50 params
628        let lines: Vec<String> = (0..150).map(|i| format!("line {}", i)).collect();
629        let content = lines.join("\r\n") + "\r\n";
630
631        let (result, truncated, removed) = chunk_diff_with_params(&content, 50, 50);
632        assert!(truncated);
633        assert_eq!(removed, 50); // 150 - 50 head - 50 tail
634                                 // Result should use CRLF line endings
635        assert!(result.contains("\r\n"));
636        assert!(result.ends_with("\r\n"));
637    }
638
639    #[test]
640    fn test_chunk_diff_small_crlf_unchanged() {
641        let content = "line1\r\nline2\r\nline3\r\n";
642        let (result, truncated, _) = chunk_diff(content);
643        assert!(!truncated);
644        assert_eq!(result.as_ref(), content);
645    }
646
647    #[test]
648    fn test_chunk_diff_no_allocation_when_small() {
649        // Verify that small diffs don't allocate (Cow::Borrowed)
650        let content = "line1\nline2\nline3";
651        let (result, truncated, _) = chunk_diff(content);
652        assert!(!truncated);
653        // This would fail to compile if result was not Cow
654        assert!(matches!(result, Cow::Borrowed(_)));
655    }
656
657    // --- New default-threshold tests (issues #7 & #29) ---
658
659    #[test]
660    fn test_chunk_diff_default_does_not_truncate_200_lines() {
661        // 200 lines is well below the new 800-line default threshold — should pass unchanged
662        let lines: Vec<String> = (0..200).map(|i| format!("line {}", i)).collect();
663        let content = lines.join("\n");
664
665        let (result, truncated, removed) = chunk_diff(&content);
666        assert!(
667            !truncated,
668            "200-line diff should not be truncated at new 800-line default"
669        );
670        assert_eq!(removed, 0);
671        assert!(matches!(result, Cow::Borrowed(_)));
672    }
673
674    #[test]
675    fn test_chunk_diff_default_truncates_at_1000_lines() {
676        // 1000 lines exceeds the 800-line default threshold
677        let lines: Vec<String> = (0..1000).map(|i| format!("line {}", i)).collect();
678        let content = lines.join("\n");
679
680        let (result, truncated, removed) = chunk_diff(&content);
681        assert!(
682            truncated,
683            "1000-line diff should be truncated at 800-line default"
684        );
685        // 1000 - 400 head - 400 tail = 200 removed
686        assert_eq!(removed, 200);
687        assert!(result.contains("200 lines omitted"));
688    }
689
690    #[test]
691    fn test_chunk_diff_default_exactly_at_threshold() {
692        // 800 lines = exactly the new default threshold, should NOT truncate
693        let lines: Vec<String> = (0..800).map(|i| format!("line {}", i)).collect();
694        let content = lines.join("\n");
695
696        let (result, truncated, _) = chunk_diff(&content);
697        assert!(
698            !truncated,
699            "800-line diff at threshold should not be truncated"
700        );
701        assert!(matches!(result, Cow::Borrowed(_)));
702    }
703
704    #[test]
705    fn test_chunk_diff_with_params_custom_thresholds() {
706        // Verify chunk_diff_with_params honours custom head/tail counts
707        let lines: Vec<String> = (0..100).map(|i| format!("line {}", i)).collect();
708        let content = lines.join("\n");
709
710        let (result, truncated, removed) = chunk_diff_with_params(&content, 20, 20);
711        assert!(truncated);
712        assert_eq!(removed, 60); // 100 - 20 - 20
713        assert!(result.contains("line 0"));
714        assert!(result.contains("line 19"));
715        assert!(result.contains("line 80"));
716        assert!(result.contains("line 99"));
717        assert!(!result.contains("line 50")); // middle omitted
718    }
719
720    #[test]
721    fn test_fetch_file_diff_valid() {
722        let dir = tempfile::tempdir().unwrap();
723        let diff_path = dir.path().join("test.diff");
724        let diff_content =
725            "diff --git a/f.rs b/f.rs\n--- a/f.rs\n+++ b/f.rs\n@@ -1 +1,2 @@\n+line1\n line0";
726        std::fs::write(&diff_path, diff_content).unwrap();
727
728        let result = fetch_file_diff(diff_path.to_str().unwrap()).unwrap();
729        assert_eq!(result.content, diff_content);
730        assert!(result.size_bytes > 0);
731        assert!(result.line_count > 0);
732    }
733
734    #[test]
735    fn test_fetch_file_diff_empty() {
736        let dir = tempfile::tempdir().unwrap();
737        let diff_path = dir.path().join("empty.diff");
738        std::fs::write(&diff_path, "").unwrap();
739
740        let result = fetch_file_diff(diff_path.to_str().unwrap());
741        assert!(matches!(result, Err(RsGuardError::EmptyDiff)));
742    }
743
744    #[test]
745    fn test_fetch_file_diff_invalid_content() {
746        let dir = tempfile::tempdir().unwrap();
747        let diff_path = dir.path().join("invalid.diff");
748        std::fs::write(&diff_path, "not a diff").unwrap();
749
750        let result = fetch_file_diff(diff_path.to_str().unwrap());
751        assert!(matches!(result, Err(RsGuardError::InvalidDiffContent)));
752    }
753
754    #[test]
755    fn test_fetch_file_diff_too_large() {
756        let dir = tempfile::tempdir().unwrap();
757        let diff_path = dir.path().join("large.diff");
758        // Create a valid diff header followed by large content to exceed MAX_DIFF_BYTES (100KB)
759        let diff_header = "diff --git a/f.rs b/f.rs\n--- a/f.rs\n+++ b/f.rs\n@@ -1 +1,2 @@\n";
760        let large_content = format!("{}{}", diff_header, "+line\n".repeat(200 * 1024));
761        std::fs::write(&diff_path, &large_content).unwrap();
762
763        let result = fetch_file_diff(diff_path.to_str().unwrap());
764        assert!(matches!(result, Err(RsGuardError::DiffTooLarge { .. })));
765    }
766
767    #[test]
768    fn test_fetch_file_diff_not_found() {
769        let result = fetch_file_diff("/nonexistent/path.diff");
770        assert!(matches!(result, Err(RsGuardError::Config(_))));
771    }
772
773    #[test]
774    #[serial_test::serial]
775    fn test_fetch_local_diff_requires_git_repo() {
776        // Calling fetch_local_diff outside a git repo returns an error
777        let dir = tempfile::tempdir().unwrap();
778        let original_dir = std::env::current_dir().unwrap();
779        std::env::set_current_dir(dir.path()).unwrap();
780
781        let result = fetch_local_diff();
782        // Depending on environment, git may not be installed (Io error),
783        // may return non-zero exit (Config error), or may succeed with
784        // empty output (EmptyDiff). All are valid error states.
785        assert!(result.is_err(), "expected error, got Ok");
786
787        let _ = std::env::set_current_dir(&original_dir);
788    }
789
790    // --- build_local_diff_result unit tests (issue #8) ---
791
792    #[test]
793    fn test_build_local_diff_result_rejects_invalid_content() {
794        // Non-diff content (e.g. corrupted git output) must be rejected
795        let result = build_local_diff_result("this is not a diff at all".to_string());
796        assert!(
797            matches!(result, Err(RsGuardError::InvalidDiffContent)),
798            "expected InvalidDiffContent, got {:?}",
799            result
800        );
801    }
802
803    #[test]
804    fn test_build_local_diff_result_rejects_json_content() {
805        // JSON error bodies from git should be rejected (e.g. corrupt stdout)
806        let result = build_local_diff_result(r#"{"error": "something went wrong"}"#.to_string());
807        assert!(
808            matches!(result, Err(RsGuardError::InvalidDiffContent)),
809            "expected InvalidDiffContent, got {:?}",
810            result
811        );
812    }
813
814    #[test]
815    fn test_build_local_diff_result_rejects_empty() {
816        let result = build_local_diff_result(String::new());
817        assert!(matches!(result, Err(RsGuardError::EmptyDiff)));
818    }
819
820    #[test]
821    fn test_build_local_diff_result_accepts_valid_diff() {
822        let content = "diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1,2 @@\n+new line\n old line".to_string();
823        let result = build_local_diff_result(content.clone());
824        assert!(result.is_ok(), "expected Ok, got {:?}", result);
825        let diff = result.unwrap();
826        assert_eq!(diff.content, content);
827        assert!(diff.size_bytes > 0);
828        assert!(diff.line_count > 0);
829    }
830
831    #[test]
832    fn test_build_local_diff_result_rejects_too_large() {
833        let header = "diff --git a/f.rs b/f.rs\n--- a/f.rs\n+++ b/f.rs\n@@ -1 +1,2 @@\n";
834        let huge = format!("{}{}", header, "+line\n".repeat(200 * 1024));
835        let result = build_local_diff_result(huge);
836        assert!(matches!(result, Err(RsGuardError::DiffTooLarge { .. })));
837    }
838
839    // --- Issue #20: Boundary tests for chunk_diff ---
840
841    #[test]
842    fn test_chunk_diff_101_lines_truncates() {
843        // With 50/50 params, threshold is 100 lines, so 101 should truncate
844        let lines: Vec<String> = (0..101).map(|i| format!("line {}", i)).collect();
845        let content = lines.join("\n");
846
847        let (result, truncated, removed) = chunk_diff_with_params(&content, 50, 50);
848        assert!(truncated, "101 lines should truncate with 50/50 params");
849        assert_eq!(removed, 1); // 101 - 50 - 50 = 1
850        assert!(result.contains("1 lines omitted"));
851        assert!(result.contains("line 0"));
852        assert!(result.contains("line 49"));
853        assert!(result.contains("line 51"));
854        assert!(result.contains("line 100"));
855    }
856
857    #[test]
858    fn test_chunk_diff_100_lines_no_truncate() {
859        // With 50/50 params, threshold is 100 lines, so 100 should NOT truncate
860        let lines: Vec<String> = (0..100).map(|i| format!("line {}", i)).collect();
861        let content = lines.join("\n");
862
863        let (result, truncated, removed) = chunk_diff_with_params(&content, 50, 50);
864        assert!(
865            !truncated,
866            "100 lines should not truncate with 50/50 params"
867        );
868        assert_eq!(removed, 0);
869        assert!(!result.contains("lines omitted"));
870        assert_eq!(result.as_ref(), content);
871    }
872
873    // --- Issue #21: Non-UTF8 output in fetch_local_diff ---
874
875    #[test]
876    #[serial_test::serial]
877    fn test_build_local_diff_result_handles_non_utf8_lossy() {
878        // Create a diff with non-UTF8 bytes (simulating binary file diff)
879        let mut content = "diff --git a/binary.bin b/binary.bin\n--- a/binary.bin\n+++ b/binary.bin\n@@ -1 +1,2 @@\n".as_bytes().to_vec();
880        // Append some non-UTF8 bytes
881        content.extend_from_slice(&[0xFF, 0xFE, 0xFD]);
882        content.extend_from_slice(b"+some content\n");
883
884        // Convert to String using lossy conversion
885        let lossy_string = String::from_utf8_lossy(&content).to_string();
886
887        // The result should be accepted (lossy conversion allows it to proceed)
888        // In practice, git diff outputs UTF-8, but we handle binary gracefully
889        let result = build_local_diff_result(lossy_string);
890        // Should succeed because the diff markers are present
891        assert!(
892            result.is_ok(),
893            "non-UTF8 diff with valid markers should be accepted"
894        );
895    }
896}