commitbee 0.6.0

AI-powered commit message generator using tree-sitter semantic analysis and local LLMs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
// SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io>
//
// SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Commercial

mod helpers;

use commitbee::config::CommitFormat;
use commitbee::services::sanitizer::{CommitSanitizer, CommitValidator, StructuredCommit};
use proptest::prelude::*;

fn default_format() -> CommitFormat {
    CommitFormat::default()
}

// ─── JSON parsing tests ───────────────────────────────────────────────────────

#[test]
fn sanitize_valid_json() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}

#[test]
fn sanitize_json_in_code_fence() {
    let raw = r#"```json
{"type": "fix", "scope": "git", "subject": "handle detached HEAD state", "body": null}
```"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"fix(git): handle detached HEAD state");
}

#[test]
fn sanitize_json_in_plain_fence() {
    let raw = r#"```
{"type": "refactor", "scope": "context", "subject": "extract token budget logic", "body": null}
```"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"refactor(context): extract token budget logic");
}

#[test]
fn sanitize_json_with_body() {
    let raw = r#"{"type": "feat", "scope": "llm", "subject": "add streaming support", "body": "Uses tokio-stream to stream tokens from Ollama.\nImproves perceived latency for long responses."}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    // Body is non-trivial — use non-inline snapshot
    insta::assert_snapshot!(result);
}

#[test]
fn sanitize_json_invalid_type() {
    let raw = r#"{"type": "yolo", "scope": "cli", "subject": "ship it", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format());
    assert!(
        result.is_err(),
        "expected Err for invalid commit type 'yolo'"
    );
}

// ─── Plain text tests ─────────────────────────────────────────────────────────

#[test]
fn sanitize_plain_text_conventional() {
    let raw = "feat(cli): add --dry-run flag";
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add --dry-run flag");
}

#[test]
fn sanitize_plain_with_preamble() {
    // Single-line preamble so only one pattern fires (avoids the multi-pattern
    // overlap bug where "commit message:" is a substring of the already-matched
    // "here's the commit message" pattern).
    let raw = "Suggested commit: feat(cli): add --dry-run flag";
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add --dry-run flag");
}

#[test]
fn sanitize_plain_with_quotes() {
    let raw = r#""fix(git): handle missing remote""#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"fix(git): handle missing remote");
}

#[test]
fn sanitize_invalid_no_type() {
    let raw = "just some random text without a valid type prefix";
    let result = CommitSanitizer::sanitize(raw, &default_format());
    assert!(
        result.is_err(),
        "expected Err for input with no valid commit type"
    );
}

// ─── Edge cases ───────────────────────────────────────────────────────────────

#[test]
fn sanitize_empty_input() {
    let result = CommitSanitizer::sanitize("", &default_format());
    assert!(result.is_err(), "expected Err for empty input");
}

#[test]
fn sanitize_whitespace_only() {
    let result = CommitSanitizer::sanitize("   \n\t  ", &default_format());
    assert!(result.is_err(), "expected Err for whitespace-only input");
}

// ─── UTF-8 safety (FR-001) ────────────────────────────────────────────────────

#[test]
fn sanitize_unicode_emoji_in_subject() {
    // Emoji are multi-byte; a very long subject with emoji should truncate safely (no panic)
    let long_subject = "🦀".repeat(100);
    let raw = format!(
        r#"{{"type": "chore", "scope": null, "subject": "{}", "body": null}}"#,
        long_subject
    );
    // Must not panic — result can be Ok or Err
    let _ = CommitSanitizer::sanitize(&raw, &default_format());
}

#[test]
fn sanitize_cjk_characters() {
    // CJK characters are 3 bytes each; ensure no mid-char slicing
    let raw = r#"{"type": "docs", "scope": "readme", "subject": "添加中文说明文档以便于理解项目架构和使用方式", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format());
    // Must not panic; validate if Ok that the string is valid UTF-8
    if let Ok(msg) = result {
        assert!(std::str::from_utf8(msg.as_bytes()).is_ok());
    }
}

#[test]
fn sanitize_accented_characters() {
    // Accented characters (2 bytes each in UTF-8) in a long subject
    let long_accented = "é".repeat(80);
    let raw = format!(
        r#"{{"type": "fix", "scope": null, "subject": "{}", "body": null}}"#,
        long_accented
    );
    // Must not panic
    let result = CommitSanitizer::sanitize(&raw, &default_format());
    if let Ok(msg) = result {
        // Result must be valid UTF-8 and first line within 72 chars
        let first_line = msg.lines().next().unwrap_or("");
        assert!(first_line.chars().count() <= 72);
    }
}

// ─── Format options ───────────────────────────────────────────────────────────

#[test]
fn sanitize_no_scope() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
    let format = CommitFormat {
        include_scope: false,
        ..CommitFormat::default()
    };
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    insta::assert_snapshot!(result, @"feat: add verbose flag");
}

#[test]
fn sanitize_no_body() {
    let raw = r#"{"type": "feat", "scope": "llm", "subject": "add streaming support", "body": "This is the body text."}"#;
    let format = CommitFormat {
        include_body: false,
        ..CommitFormat::default()
    };
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    insta::assert_snapshot!(result, @"feat(llm): add streaming support");
}

#[test]
fn sanitize_no_lowercase() {
    let raw =
        r#"{"type": "fix", "scope": "git", "subject": "Handle Detached HEAD State", "body": null}"#;
    let format = CommitFormat {
        lowercase_subject: false,
        ..CommitFormat::default()
    };
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    insta::assert_snapshot!(result, @"fix(git): Handle Detached HEAD State");
}

// ─── Scope handling ──────────────────────────────────────────────────────────

#[test]
fn sanitize_scope_with_spaces() {
    let raw = r#"{"type": "feat", "scope": "my scope", "subject": "add feature", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(my-scope): add feature");
}

#[test]
fn sanitize_scope_invalid_chars() {
    let raw = r#"{"type": "feat", "scope": "@#$%", "subject": "add feature", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat: add feature");
}

// ─── Truncation boundary ─────────────────────────────────────────────────────

#[test]
fn sanitize_truncation_boundary_72() {
    // "feat: " is 6 chars, so subject needs to be 66 chars for exactly 72
    let subject_66 = "a".repeat(66);
    let raw = format!(
        r#"{{"type": "feat", "scope": null, "subject": "{}", "body": null}}"#,
        subject_66
    );
    let result = CommitSanitizer::sanitize(&raw, &default_format()).unwrap();
    assert_eq!(
        result.chars().count(),
        72,
        "exactly 72 chars should not be truncated"
    );

    // 67 chars → first line = 73 chars → should be rejected with error
    let subject_67 = "b".repeat(67);
    let raw = format!(
        r#"{{"type": "feat", "scope": null, "subject": "{}", "body": null}}"#,
        subject_67
    );
    let result = CommitSanitizer::sanitize(&raw, &default_format());
    assert!(result.is_err(), "73+ char first line should be rejected");
    let err_msg = result.unwrap_err().to_string();
    assert!(
        err_msg.contains("73 chars") && err_msg.contains("max 72"),
        "error should mention the char count and limit, got: {}",
        err_msg,
    );
}

#[test]
fn sanitize_plain_text_rejects_long_first_line() {
    let long_subject = "a".repeat(67); // "feat: " + 67 = 73 > 72
    let raw = format!("feat: {}", long_subject);
    let result = CommitSanitizer::sanitize(&raw, &default_format());
    assert!(
        result.is_err(),
        "plain text with 73+ char first line should be rejected"
    );
}

// ─── Subject normalization ───────────────────────────────────────────────────

#[test]
fn sanitize_subject_trailing_period() {
    let raw =
        r#"{"type": "fix", "scope": "git", "subject": "resolve merge conflicts.", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"fix(git): resolve merge conflicts");
}

#[test]
fn sanitize_uppercase_type_in_json() {
    let raw = r#"{"type": "FEAT", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}

// ─── Body handling ───────────────────────────────────────────────────────────

#[test]
fn sanitize_json_null_body() {
    // Explicit null
    let raw_null = r#"{"type": "fix", "scope": null, "subject": "patch bug", "body": null}"#;
    let result_null = CommitSanitizer::sanitize(raw_null, &default_format()).unwrap();

    // Missing body field entirely — serde_json deserializes missing Option<String>
    // as None, so both variants parse successfully and produce identical output.
    let raw_missing = r#"{"type": "fix", "scope": null, "subject": "patch bug"}"#;
    let result_missing = CommitSanitizer::sanitize(raw_missing, &default_format()).unwrap();

    assert_eq!(
        result_null, result_missing,
        "null body and missing body should produce identical output"
    );
    insta::assert_snapshot!(result_null, @"fix: patch bug");
}

// ─── Code fence stripping ────────────────────────────────────────────────────

#[test]
fn sanitize_code_fence_in_plain_text() {
    let raw = "```\nsome preamble\n```\nfeat(cli): add verbose flag";
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}

// ─── Proptest: never panics ───────────────────────────────────────────────────

proptest! {
    #[test]
    fn sanitizer_never_panics(raw in ".*") {
        let format = CommitFormat::default();
        // Any input must produce Ok or Err — never a panic
        let _ = CommitSanitizer::sanitize(&raw, &format);
    }
}

// ─── Body wrapping tests ─────────────────────────────────────────────────────

#[test]
fn sanitize_json_body_wrapped_at_72() {
    let long_body = "This is a very long body line that should be wrapped because it exceeds the seventy-two character limit for conventional commit body lines.";
    let json = format!(
        r#"{{"type": "feat", "scope": "core", "subject": "add new feature", "body": "{}"}}"#,
        long_body
    );
    let result = CommitSanitizer::sanitize(&json, &default_format()).unwrap();

    let lines: Vec<&str> = result.lines().collect();
    // Skip header line and blank separator line
    for line in &lines[2..] {
        assert!(
            line.chars().count() <= 72,
            "Body line exceeds 72 chars: '{}' ({})",
            line,
            line.chars().count()
        );
    }
    // Verify the body content is preserved (not lost)
    let body_text: String = lines[2..].join(" ");
    assert!(body_text.contains("seventy-two character limit"));
}

#[test]
fn sanitize_json_body_short_not_wrapped() {
    let json = r#"{"type": "fix", "scope": null, "subject": "fix bug", "body": "Short body."}"#;
    let result = CommitSanitizer::sanitize(json, &default_format()).unwrap();

    let lines: Vec<&str> = result.lines().collect();
    assert_eq!(lines.len(), 3); // header + blank + body
    assert_eq!(lines[2], "Short body.");
}

#[test]
fn sanitize_json_body_preserves_paragraphs() {
    let json = r#"{"type": "feat", "scope": null, "subject": "add feature", "body": "First paragraph.\n\nSecond paragraph."}"#;
    let result = CommitSanitizer::sanitize(json, &default_format()).unwrap();

    let lines: Vec<&str> = result.lines().collect();
    assert!(lines.contains(&"First paragraph."));
    assert!(lines.contains(&"Second paragraph."));
}

// ─── Breaking changes ────────────────────────────────────────────────────────

#[test]
fn sanitize_breaking_change_json_no_scope() {
    let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "v1 endpoints removed, migrate to /v2"}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result);
    // feat!: drop v1 API
    //
    // BREAKING CHANGE: v1 endpoints removed, migrate to /v2
}

#[test]
fn sanitize_breaking_change_json_with_scope() {
    let raw = r#"{"type": "feat", "scope": "api", "subject": "remove deprecated endpoint", "body": null, "breaking_change": "GET /api/v1/users removed, use GET /api/v2/users instead"}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result);
    // feat(api)!: remove deprecated endpoint
    //
    // BREAKING CHANGE: GET /api/v1/users removed, use GET /api/v2/users instead
}

#[test]
fn sanitize_breaking_change_json_with_body_and_footer() {
    let raw = r#"{"type": "chore", "scope": "config", "subject": "rename timeout key", "body": "Aligns the config schema with the 2.0 release standard.", "breaking_change": "config key 'timeout' renamed to 'timeout_secs', update your commitbee.toml"}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result);
    // chore(config)!: rename timeout key
    //
    // Aligns the config schema with the 2.0 release standard.
    //
    // BREAKING CHANGE: config key 'timeout' renamed to 'timeout_secs', update your commitbee.toml
}

#[test]
fn sanitize_breaking_change_null_is_non_breaking() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add flag");
}

#[test]
fn sanitize_breaking_change_empty_string_is_non_breaking() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": ""}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add flag");
}

#[test]
fn sanitize_breaking_change_whitespace_only_is_non_breaking() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": "   "}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add flag");
}

#[test]
fn sanitize_breaking_change_string_null_is_non_breaking() {
    // Models sometimes write the string "null" instead of JSON null when following a template.
    // The sanitizer must treat this as non-breaking to prevent spurious feat! and BREAKING CHANGE:.
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": "null"}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add flag");
}

#[test]
fn sanitize_breaking_change_missing_field_is_non_breaking() {
    let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
    insta::assert_snapshot!(result, @"feat(cli): add flag");
}

#[test]
fn sanitize_plain_text_bang_no_scope_passes_validation() {
    let raw = "feat!: remove legacy authentication middleware";
    let result = CommitSanitizer::sanitize(raw, &default_format());
    assert!(result.is_ok(), "expected Ok for feat!: plain text");
    assert_eq!(
        result.unwrap(),
        "feat!: remove legacy authentication middleware"
    );
}

#[test]
fn sanitize_plain_text_scope_and_bang_passes_validation() {
    let raw = "feat(api)!: remove deprecated endpoint";
    let result = CommitSanitizer::sanitize(raw, &default_format());
    assert!(result.is_ok(), "expected Ok for feat(scope)!: plain text");
    assert_eq!(result.unwrap(), "feat(api)!: remove deprecated endpoint");
}

#[test]
fn sanitize_breaking_change_emitted_when_include_body_false() {
    let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "v1 endpoints removed"}"#;
    let format = CommitFormat {
        include_body: false,
        ..CommitFormat::default()
    };
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    insta::assert_snapshot!(result);
    // feat!: drop v1 API
    //
    // BREAKING CHANGE: v1 endpoints removed
}

#[test]
fn sanitize_breaking_change_include_scope_false() {
    let raw = r#"{"type": "feat", "scope": "api", "subject": "remove deprecated endpoint", "body": null, "breaking_change": "v1 endpoint removed"}"#;
    let format = CommitFormat {
        include_scope: false,
        ..CommitFormat::default()
    };
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    insta::assert_snapshot!(result);
    // feat!: remove deprecated endpoint
    //
    // BREAKING CHANGE: v1 endpoint removed
}

#[test]
fn sanitize_breaking_change_invalid_type_returns_error() {
    let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": true}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format());
    assert!(
        result.is_err(),
        "invalid typed breaking_change must not produce a valid commit"
    );
}

#[test]
fn sanitize_breaking_footer_continuation_lines_indented() {
    let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "this description is intentionally long so the footer must wrap onto a continuation line for parser compatibility"}"#;
    let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();

    let mut iter = result
        .lines()
        .skip_while(|line| !line.starts_with("BREAKING CHANGE:"));

    assert!(
        iter.next().is_some(),
        "BREAKING CHANGE: header line must exist"
    );

    let continuation_lines: Vec<&str> = iter.collect();
    assert!(
        !continuation_lines.is_empty(),
        "footer must have continuation lines"
    );
    for (i, line) in continuation_lines.iter().enumerate() {
        assert!(
            line.starts_with("  "),
            "continuation line {} must start with two spaces: '{}'",
            i + 1,
            line
        );
    }
}

// ─── CommitValidator tests ──────────────────────────────────────────────────

fn make_commit(commit_type: &str, breaking_change: Option<&str>) -> StructuredCommit {
    StructuredCommit {
        commit_type: commit_type.to_string(),
        scope: None,
        subject: "test subject".to_string(),
        body: None,
        breaking_change: breaking_change.map(|s| s.to_string()),
    }
}

#[test]
fn validator_rejects_fix_without_bug_evidence() {
    let commit = make_commit("fix", None);
    let violations = CommitValidator::validate(&commit, false, false, 0, false);
    assert!(
        violations.iter().any(|v| v.contains("refactor")),
        "should reject fix type when no bug evidence: {:?}",
        violations
    );
}

#[test]
fn validator_accepts_fix_with_bug_evidence() {
    let commit = make_commit("fix", None);
    let violations = CommitValidator::validate(&commit, true, false, 0, false);
    assert!(
        violations.is_empty(),
        "should accept fix type when bug evidence exists: {:?}",
        violations
    );
}

#[test]
fn validator_rejects_missing_breaking_change() {
    let commit = make_commit("refactor", None);
    let violations = CommitValidator::validate(&commit, false, false, 2, false);
    assert!(
        violations
            .iter()
            .any(|v| v.contains("breaking_change is null")),
        "should reject missing breaking_change when public API removed: {:?}",
        violations
    );
}

#[test]
fn validator_rejects_copied_field_names() {
    let commit = make_commit("refactor", Some("public_api_removed"));
    let violations = CommitValidator::validate(&commit, false, false, 2, false);
    assert!(
        violations
            .iter()
            .any(|v| v.contains("internal label names")),
        "should reject breaking_change that copies field names: {:?}",
        violations
    );
}

#[test]
fn validator_rejects_mechanical_feat() {
    let commit = make_commit("feat", None);
    let violations = CommitValidator::validate(&commit, false, true, 0, false);
    assert!(
        violations.iter().any(|v| v.contains("mechanical")),
        "should reject feat for mechanical transform: {:?}",
        violations
    );
}

#[test]
fn validator_rejects_non_chore_for_deps() {
    let commit = make_commit("feat", None);
    let violations = CommitValidator::validate(&commit, false, false, 0, true);
    assert!(
        violations.iter().any(|v| v.contains("chore")),
        "should reject non-chore for dependency-only changes: {:?}",
        violations
    );
}

#[test]
fn validator_accepts_valid_commit() {
    let commit = make_commit(
        "refactor",
        Some("removed `old_method()`, use `new_method()` instead"),
    );
    let violations = CommitValidator::validate(&commit, false, false, 1, false);
    assert!(
        violations.is_empty(),
        "should accept valid commit: {:?}",
        violations
    );
}

#[test]
fn validator_rejects_long_subject() {
    // "refactor(services): " = 20 chars, so subject budget = 52
    let long_subject = "a".repeat(60); // 20 + 60 = 80 > 72
    let commit = StructuredCommit {
        commit_type: "refactor".to_string(),
        scope: Some("services".to_string()),
        subject: long_subject,
        body: None,
        breaking_change: None,
    };
    let violations = CommitValidator::validate(&commit, false, false, 0, false);
    assert!(
        violations.iter().any(|v| v.contains("Shorten")),
        "should reject subject that exceeds 72-char first line: {:?}",
        violations
    );
}

#[test]
fn validator_accepts_subject_at_boundary() {
    // "feat: " = 6 chars, subject = 66 chars → exactly 72
    let subject = "a".repeat(66);
    let commit = StructuredCommit {
        commit_type: "feat".to_string(),
        scope: None,
        subject,
        body: None,
        breaking_change: None,
    };
    let violations = CommitValidator::validate(&commit, false, false, 0, false);
    assert!(
        !violations.iter().any(|v| v.contains("Shorten")),
        "exactly 72 chars should not trigger length violation: {:?}",
        violations
    );
}

#[test]
fn validator_corrections_format() {
    let violations = vec![
        "Type is wrong.".to_string(),
        "Breaking change missing.".to_string(),
    ];
    let corrections = CommitValidator::format_corrections(&violations);
    assert!(corrections.contains("CORRECTIONS"));
    assert!(corrections.contains("Type is wrong."));
    assert!(corrections.contains("Breaking change missing."));
}

#[test]
fn sanitize_with_preceding_thought_block() {
    let raw = r#"<thought>
The core change is the addition of the CommitValidator struct to enforce subject specificity and evidence-based rules.
</thought>
{
  "type": "feat",
  "scope": "sanitizer",
  "subject": "add CommitValidator for evidence-based validation",
  "body": "Implements deterministic validation rules against code analysis signals.",
  "breaking_change": null
}"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert!(
        result.starts_with("feat(sanitizer): add CommitValidator for evidence-based validation")
    );
}

#[test]
fn sanitize_plain_text_with_thought_block() {
    let raw = r#"<thought>
The core change is renaming the function.
</thought>
refactor: rename process to process_all"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(result, "refactor: rename process to process_all");
}

#[test]
fn sanitize_with_thought_block_containing_braces() {
    let raw = r#"<thought>
I should generate a JSON like this: { "foo": "bar" }
</thought>
{
  "type": "refactor",
  "scope": "splitter",
  "subject": "upgrade clustering to hybrid Jaccard similarity",
  "body": null,
  "breaking_change": null
}"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(
        result,
        "refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
    );
}

#[test]
fn sanitize_with_unclosed_thought_block() {
    // LLM might forget to close tag but still output the message
    let raw = r#"<thought>
I will refactor the splitter to use Jaccard similarity.

refactor(splitter): upgrade clustering to hybrid Jaccard similarity"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(
        result,
        "refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
    );
}

#[test]
fn sanitize_with_noise_containing_braces_before_json() {
    let raw = r#"<thought>...</thought>
The diff spans several files and adds a new field { "foo": 1 } to the config.
{
  "type": "refactor",
  "scope": "sanitizer",
  "subject": "harden JSON extraction",
  "body": null,
  "breaking_change": null
}"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(result, "refactor(sanitizer): harden JSON extraction");
}

#[test]
fn sanitize_with_noise_before_plain_text() {
    let raw = r#"The diff spans several files. refactor: improve thing"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(result, "refactor: improve thing");
}

#[test]
fn sanitize_with_think_block_json() {
    // qwen3/ollama native thinking uses <think> tags, not <thought>
    let raw = r#"<think>
I need to analyze the diff. The main change is adding a new struct.
</think>
{
  "type": "feat",
  "scope": "core",
  "subject": "add DiffFingerprint struct for similarity comparison",
  "body": null,
  "breaking_change": null
}"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(
        result,
        "feat(core): add DiffFingerprint struct for similarity comparison"
    );
}

#[test]
fn sanitize_with_unclosed_think_block() {
    let raw = r#"<think>
I will analyze the changes...

feat: add DiffFingerprint struct for similarity comparison"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(
        result,
        "feat: add DiffFingerprint struct for similarity comparison"
    );
}

#[test]
fn sanitize_conversational_preamble_with_json() {
    // Claude/Anthropic sometimes outputs conversational text before JSON
    let raw = r#"Let me analyze the changes in the diff.

{"type": "refactor", "scope": "splitter", "subject": "upgrade clustering to hybrid Jaccard similarity", "body": null, "breaking_change": null}"#;
    let format = CommitFormat::default();
    let result = CommitSanitizer::sanitize(raw, &format).unwrap();
    assert_eq!(
        result,
        "refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
    );
}