mod helpers;
use commitbee::config::CommitFormat;
use commitbee::services::sanitizer::{CommitSanitizer, CommitValidator, StructuredCommit};
use proptest::prelude::*;
fn default_format() -> CommitFormat {
CommitFormat::default()
}
#[test]
fn sanitize_valid_json() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}
#[test]
fn sanitize_json_in_code_fence() {
let raw = r#"```json
{"type": "fix", "scope": "git", "subject": "handle detached HEAD state", "body": null}
```"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"fix(git): handle detached HEAD state");
}
#[test]
fn sanitize_json_in_plain_fence() {
let raw = r#"```
{"type": "refactor", "scope": "context", "subject": "extract token budget logic", "body": null}
```"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"refactor(context): extract token budget logic");
}
#[test]
fn sanitize_json_with_body() {
let raw = r#"{"type": "feat", "scope": "llm", "subject": "add streaming support", "body": "Uses tokio-stream to stream tokens from Ollama.\nImproves perceived latency for long responses."}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_json_invalid_type() {
let raw = r#"{"type": "yolo", "scope": "cli", "subject": "ship it", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format());
assert!(
result.is_err(),
"expected Err for invalid commit type 'yolo'"
);
}
#[test]
fn sanitize_plain_text_conventional() {
let raw = "feat(cli): add --dry-run flag";
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add --dry-run flag");
}
#[test]
fn sanitize_plain_with_preamble() {
let raw = "Suggested commit: feat(cli): add --dry-run flag";
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add --dry-run flag");
}
#[test]
fn sanitize_plain_with_quotes() {
let raw = r#""fix(git): handle missing remote""#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"fix(git): handle missing remote");
}
#[test]
fn sanitize_invalid_no_type() {
let raw = "just some random text without a valid type prefix";
let result = CommitSanitizer::sanitize(raw, &default_format());
assert!(
result.is_err(),
"expected Err for input with no valid commit type"
);
}
#[test]
fn sanitize_empty_input() {
let result = CommitSanitizer::sanitize("", &default_format());
assert!(result.is_err(), "expected Err for empty input");
}
#[test]
fn sanitize_whitespace_only() {
let result = CommitSanitizer::sanitize(" \n\t ", &default_format());
assert!(result.is_err(), "expected Err for whitespace-only input");
}
#[test]
fn sanitize_unicode_emoji_in_subject() {
let long_subject = "🦀".repeat(100);
let raw = format!(
r#"{{"type": "chore", "scope": null, "subject": "{}", "body": null}}"#,
long_subject
);
let _ = CommitSanitizer::sanitize(&raw, &default_format());
}
#[test]
fn sanitize_cjk_characters() {
let raw = r#"{"type": "docs", "scope": "readme", "subject": "添加中文说明文档以便于理解项目架构和使用方式", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format());
if let Ok(msg) = result {
assert!(std::str::from_utf8(msg.as_bytes()).is_ok());
}
}
#[test]
fn sanitize_accented_characters() {
let long_accented = "é".repeat(80);
let raw = format!(
r#"{{"type": "fix", "scope": null, "subject": "{}", "body": null}}"#,
long_accented
);
let result = CommitSanitizer::sanitize(&raw, &default_format());
if let Ok(msg) = result {
let first_line = msg.lines().next().unwrap_or("");
assert!(first_line.chars().count() <= 72);
}
}
#[test]
fn sanitize_no_scope() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
let format = CommitFormat {
include_scope: false,
..CommitFormat::default()
};
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
insta::assert_snapshot!(result, @"feat: add verbose flag");
}
#[test]
fn sanitize_no_body() {
let raw = r#"{"type": "feat", "scope": "llm", "subject": "add streaming support", "body": "This is the body text."}"#;
let format = CommitFormat {
include_body: false,
..CommitFormat::default()
};
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
insta::assert_snapshot!(result, @"feat(llm): add streaming support");
}
#[test]
fn sanitize_no_lowercase() {
let raw =
r#"{"type": "fix", "scope": "git", "subject": "Handle Detached HEAD State", "body": null}"#;
let format = CommitFormat {
lowercase_subject: false,
..CommitFormat::default()
};
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
insta::assert_snapshot!(result, @"fix(git): Handle Detached HEAD State");
}
#[test]
fn sanitize_scope_with_spaces() {
let raw = r#"{"type": "feat", "scope": "my scope", "subject": "add feature", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(my-scope): add feature");
}
#[test]
fn sanitize_scope_invalid_chars() {
let raw = r#"{"type": "feat", "scope": "@#$%", "subject": "add feature", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat: add feature");
}
#[test]
fn sanitize_truncation_boundary_72() {
let subject_66 = "a".repeat(66);
let raw = format!(
r#"{{"type": "feat", "scope": null, "subject": "{}", "body": null}}"#,
subject_66
);
let result = CommitSanitizer::sanitize(&raw, &default_format()).unwrap();
assert_eq!(
result.chars().count(),
72,
"exactly 72 chars should not be truncated"
);
let subject_67 = "b".repeat(67);
let raw = format!(
r#"{{"type": "feat", "scope": null, "subject": "{}", "body": null}}"#,
subject_67
);
let result = CommitSanitizer::sanitize(&raw, &default_format());
assert!(result.is_err(), "73+ char first line should be rejected");
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("73 chars") && err_msg.contains("max 72"),
"error should mention the char count and limit, got: {}",
err_msg,
);
}
#[test]
fn sanitize_plain_text_rejects_long_first_line() {
let long_subject = "a".repeat(67); let raw = format!("feat: {}", long_subject);
let result = CommitSanitizer::sanitize(&raw, &default_format());
assert!(
result.is_err(),
"plain text with 73+ char first line should be rejected"
);
}
#[test]
fn sanitize_subject_trailing_period() {
let raw =
r#"{"type": "fix", "scope": "git", "subject": "resolve merge conflicts.", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"fix(git): resolve merge conflicts");
}
#[test]
fn sanitize_uppercase_type_in_json() {
let raw = r#"{"type": "FEAT", "scope": "cli", "subject": "add verbose flag", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}
#[test]
fn sanitize_json_null_body() {
let raw_null = r#"{"type": "fix", "scope": null, "subject": "patch bug", "body": null}"#;
let result_null = CommitSanitizer::sanitize(raw_null, &default_format()).unwrap();
let raw_missing = r#"{"type": "fix", "scope": null, "subject": "patch bug"}"#;
let result_missing = CommitSanitizer::sanitize(raw_missing, &default_format()).unwrap();
assert_eq!(
result_null, result_missing,
"null body and missing body should produce identical output"
);
insta::assert_snapshot!(result_null, @"fix: patch bug");
}
#[test]
fn sanitize_code_fence_in_plain_text() {
let raw = "```\nsome preamble\n```\nfeat(cli): add verbose flag";
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add verbose flag");
}
proptest! {
#[test]
fn sanitizer_never_panics(raw in ".*") {
let format = CommitFormat::default();
let _ = CommitSanitizer::sanitize(&raw, &format);
}
}
#[test]
fn sanitize_json_body_wrapped_at_72() {
let long_body = "This is a very long body line that should be wrapped because it exceeds the seventy-two character limit for conventional commit body lines.";
let json = format!(
r#"{{"type": "feat", "scope": "core", "subject": "add new feature", "body": "{}"}}"#,
long_body
);
let result = CommitSanitizer::sanitize(&json, &default_format()).unwrap();
let lines: Vec<&str> = result.lines().collect();
for line in &lines[2..] {
assert!(
line.chars().count() <= 72,
"Body line exceeds 72 chars: '{}' ({})",
line,
line.chars().count()
);
}
let body_text: String = lines[2..].join(" ");
assert!(body_text.contains("seventy-two character limit"));
}
#[test]
fn sanitize_json_body_short_not_wrapped() {
let json = r#"{"type": "fix", "scope": null, "subject": "fix bug", "body": "Short body."}"#;
let result = CommitSanitizer::sanitize(json, &default_format()).unwrap();
let lines: Vec<&str> = result.lines().collect();
assert_eq!(lines.len(), 3); assert_eq!(lines[2], "Short body.");
}
#[test]
fn sanitize_json_body_preserves_paragraphs() {
let json = r#"{"type": "feat", "scope": null, "subject": "add feature", "body": "First paragraph.\n\nSecond paragraph."}"#;
let result = CommitSanitizer::sanitize(json, &default_format()).unwrap();
let lines: Vec<&str> = result.lines().collect();
assert!(lines.contains(&"First paragraph."));
assert!(lines.contains(&"Second paragraph."));
}
#[test]
fn sanitize_breaking_change_json_no_scope() {
let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "v1 endpoints removed, migrate to /v2"}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_breaking_change_json_with_scope() {
let raw = r#"{"type": "feat", "scope": "api", "subject": "remove deprecated endpoint", "body": null, "breaking_change": "GET /api/v1/users removed, use GET /api/v2/users instead"}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_breaking_change_json_with_body_and_footer() {
let raw = r#"{"type": "chore", "scope": "config", "subject": "rename timeout key", "body": "Aligns the config schema with the 2.0 release standard.", "breaking_change": "config key 'timeout' renamed to 'timeout_secs', update your commitbee.toml"}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_breaking_change_null_is_non_breaking() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add flag");
}
#[test]
fn sanitize_breaking_change_empty_string_is_non_breaking() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": ""}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add flag");
}
#[test]
fn sanitize_breaking_change_whitespace_only_is_non_breaking() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": " "}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add flag");
}
#[test]
fn sanitize_breaking_change_string_null_is_non_breaking() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null, "breaking_change": "null"}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add flag");
}
#[test]
fn sanitize_breaking_change_missing_field_is_non_breaking() {
let raw = r#"{"type": "feat", "scope": "cli", "subject": "add flag", "body": null}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
insta::assert_snapshot!(result, @"feat(cli): add flag");
}
#[test]
fn sanitize_plain_text_bang_no_scope_passes_validation() {
let raw = "feat!: remove legacy authentication middleware";
let result = CommitSanitizer::sanitize(raw, &default_format());
assert!(result.is_ok(), "expected Ok for feat!: plain text");
assert_eq!(
result.unwrap(),
"feat!: remove legacy authentication middleware"
);
}
#[test]
fn sanitize_plain_text_scope_and_bang_passes_validation() {
let raw = "feat(api)!: remove deprecated endpoint";
let result = CommitSanitizer::sanitize(raw, &default_format());
assert!(result.is_ok(), "expected Ok for feat(scope)!: plain text");
assert_eq!(result.unwrap(), "feat(api)!: remove deprecated endpoint");
}
#[test]
fn sanitize_breaking_change_emitted_when_include_body_false() {
let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "v1 endpoints removed"}"#;
let format = CommitFormat {
include_body: false,
..CommitFormat::default()
};
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_breaking_change_include_scope_false() {
let raw = r#"{"type": "feat", "scope": "api", "subject": "remove deprecated endpoint", "body": null, "breaking_change": "v1 endpoint removed"}"#;
let format = CommitFormat {
include_scope: false,
..CommitFormat::default()
};
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
insta::assert_snapshot!(result);
}
#[test]
fn sanitize_breaking_change_invalid_type_returns_error() {
let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": true}"#;
let result = CommitSanitizer::sanitize(raw, &default_format());
assert!(
result.is_err(),
"invalid typed breaking_change must not produce a valid commit"
);
}
#[test]
fn sanitize_breaking_footer_continuation_lines_indented() {
let raw = r#"{"type": "feat", "scope": null, "subject": "drop v1 API", "body": null, "breaking_change": "this description is intentionally long so the footer must wrap onto a continuation line for parser compatibility"}"#;
let result = CommitSanitizer::sanitize(raw, &default_format()).unwrap();
let mut iter = result
.lines()
.skip_while(|line| !line.starts_with("BREAKING CHANGE:"));
assert!(
iter.next().is_some(),
"BREAKING CHANGE: header line must exist"
);
let continuation_lines: Vec<&str> = iter.collect();
assert!(
!continuation_lines.is_empty(),
"footer must have continuation lines"
);
for (i, line) in continuation_lines.iter().enumerate() {
assert!(
line.starts_with(" "),
"continuation line {} must start with two spaces: '{}'",
i + 1,
line
);
}
}
fn make_commit(commit_type: &str, breaking_change: Option<&str>) -> StructuredCommit {
StructuredCommit {
commit_type: commit_type.to_string(),
scope: None,
subject: "test subject".to_string(),
body: None,
breaking_change: breaking_change.map(|s| s.to_string()),
}
}
#[test]
fn validator_rejects_fix_without_bug_evidence() {
let commit = make_commit("fix", None);
let violations = CommitValidator::validate(&commit, false, false, 0, false);
assert!(
violations.iter().any(|v| v.contains("refactor")),
"should reject fix type when no bug evidence: {:?}",
violations
);
}
#[test]
fn validator_accepts_fix_with_bug_evidence() {
let commit = make_commit("fix", None);
let violations = CommitValidator::validate(&commit, true, false, 0, false);
assert!(
violations.is_empty(),
"should accept fix type when bug evidence exists: {:?}",
violations
);
}
#[test]
fn validator_rejects_missing_breaking_change() {
let commit = make_commit("refactor", None);
let violations = CommitValidator::validate(&commit, false, false, 2, false);
assert!(
violations
.iter()
.any(|v| v.contains("breaking_change is null")),
"should reject missing breaking_change when public API removed: {:?}",
violations
);
}
#[test]
fn validator_rejects_copied_field_names() {
let commit = make_commit("refactor", Some("public_api_removed"));
let violations = CommitValidator::validate(&commit, false, false, 2, false);
assert!(
violations
.iter()
.any(|v| v.contains("internal label names")),
"should reject breaking_change that copies field names: {:?}",
violations
);
}
#[test]
fn validator_rejects_mechanical_feat() {
let commit = make_commit("feat", None);
let violations = CommitValidator::validate(&commit, false, true, 0, false);
assert!(
violations.iter().any(|v| v.contains("mechanical")),
"should reject feat for mechanical transform: {:?}",
violations
);
}
#[test]
fn validator_rejects_non_chore_for_deps() {
let commit = make_commit("feat", None);
let violations = CommitValidator::validate(&commit, false, false, 0, true);
assert!(
violations.iter().any(|v| v.contains("chore")),
"should reject non-chore for dependency-only changes: {:?}",
violations
);
}
#[test]
fn validator_accepts_valid_commit() {
let commit = make_commit(
"refactor",
Some("removed `old_method()`, use `new_method()` instead"),
);
let violations = CommitValidator::validate(&commit, false, false, 1, false);
assert!(
violations.is_empty(),
"should accept valid commit: {:?}",
violations
);
}
#[test]
fn validator_rejects_long_subject() {
let long_subject = "a".repeat(60); let commit = StructuredCommit {
commit_type: "refactor".to_string(),
scope: Some("services".to_string()),
subject: long_subject,
body: None,
breaking_change: None,
};
let violations = CommitValidator::validate(&commit, false, false, 0, false);
assert!(
violations.iter().any(|v| v.contains("Shorten")),
"should reject subject that exceeds 72-char first line: {:?}",
violations
);
}
#[test]
fn validator_accepts_subject_at_boundary() {
let subject = "a".repeat(66);
let commit = StructuredCommit {
commit_type: "feat".to_string(),
scope: None,
subject,
body: None,
breaking_change: None,
};
let violations = CommitValidator::validate(&commit, false, false, 0, false);
assert!(
!violations.iter().any(|v| v.contains("Shorten")),
"exactly 72 chars should not trigger length violation: {:?}",
violations
);
}
#[test]
fn validator_corrections_format() {
let violations = vec![
"Type is wrong.".to_string(),
"Breaking change missing.".to_string(),
];
let corrections = CommitValidator::format_corrections(&violations);
assert!(corrections.contains("CORRECTIONS"));
assert!(corrections.contains("Type is wrong."));
assert!(corrections.contains("Breaking change missing."));
}
#[test]
fn sanitize_with_preceding_thought_block() {
let raw = r#"<thought>
The core change is the addition of the CommitValidator struct to enforce subject specificity and evidence-based rules.
</thought>
{
"type": "feat",
"scope": "sanitizer",
"subject": "add CommitValidator for evidence-based validation",
"body": "Implements deterministic validation rules against code analysis signals.",
"breaking_change": null
}"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert!(
result.starts_with("feat(sanitizer): add CommitValidator for evidence-based validation")
);
}
#[test]
fn sanitize_plain_text_with_thought_block() {
let raw = r#"<thought>
The core change is renaming the function.
</thought>
refactor: rename process to process_all"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(result, "refactor: rename process to process_all");
}
#[test]
fn sanitize_with_thought_block_containing_braces() {
let raw = r#"<thought>
I should generate a JSON like this: { "foo": "bar" }
</thought>
{
"type": "refactor",
"scope": "splitter",
"subject": "upgrade clustering to hybrid Jaccard similarity",
"body": null,
"breaking_change": null
}"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(
result,
"refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
);
}
#[test]
fn sanitize_with_unclosed_thought_block() {
let raw = r#"<thought>
I will refactor the splitter to use Jaccard similarity.
refactor(splitter): upgrade clustering to hybrid Jaccard similarity"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(
result,
"refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
);
}
#[test]
fn sanitize_with_noise_containing_braces_before_json() {
let raw = r#"<thought>...</thought>
The diff spans several files and adds a new field { "foo": 1 } to the config.
{
"type": "refactor",
"scope": "sanitizer",
"subject": "harden JSON extraction",
"body": null,
"breaking_change": null
}"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(result, "refactor(sanitizer): harden JSON extraction");
}
#[test]
fn sanitize_with_noise_before_plain_text() {
let raw = r#"The diff spans several files. refactor: improve thing"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(result, "refactor: improve thing");
}
#[test]
fn sanitize_with_think_block_json() {
let raw = r#"<think>
I need to analyze the diff. The main change is adding a new struct.
</think>
{
"type": "feat",
"scope": "core",
"subject": "add DiffFingerprint struct for similarity comparison",
"body": null,
"breaking_change": null
}"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(
result,
"feat(core): add DiffFingerprint struct for similarity comparison"
);
}
#[test]
fn sanitize_with_unclosed_think_block() {
let raw = r#"<think>
I will analyze the changes...
feat: add DiffFingerprint struct for similarity comparison"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(
result,
"feat: add DiffFingerprint struct for similarity comparison"
);
}
#[test]
fn sanitize_conversational_preamble_with_json() {
let raw = r#"Let me analyze the changes in the diff.
{"type": "refactor", "scope": "splitter", "subject": "upgrade clustering to hybrid Jaccard similarity", "body": null, "breaking_change": null}"#;
let format = CommitFormat::default();
let result = CommitSanitizer::sanitize(raw, &format).unwrap();
assert_eq!(
result,
"refactor(splitter): upgrade clustering to hybrid Jaccard similarity"
);
}