1use regex::Regex;
8use serde::Deserialize;
9use std::sync::OnceLock;
10
11use super::cleaning::{
12 final_escape_sequence_cleanup, find_conventional_commit_start, looks_like_analysis_text,
13 unescape_json_strings, unescape_json_strings_aggressive,
14};
15use crate::agents::AgentErrorKind;
16
17pub fn detect_agent_errors_in_output(content: &str) -> Option<AgentErrorKind> {
24 let content_lower = content.to_lowercase();
25
26 if content_lower.contains("prompt is too long")
29 || content_lower.contains("token limit exceeded")
30 || content_lower.contains("context length exceeded")
31 || content_lower.contains("maximum context")
32 || content_lower.contains("input too large")
33 || content_lower.contains("context window")
34 || content_lower.contains("max tokens")
35 || content_lower.contains("token limit")
36 || content_lower.contains("too many tokens")
37 || content_lower.contains("exceeds context")
38 || content_lower.contains("model's context length")
39 || content_lower.contains("input exceeds")
40 {
41 return Some(AgentErrorKind::TokenExhausted);
42 }
43
44 if content_lower.contains("invalid request")
47 || content_lower.contains("request failed")
48 || content_lower.contains("api error")
49 || content_lower.contains("rate limit")
50 || content_lower.contains("service unavailable")
51 {
52 return Some(AgentErrorKind::InvalidResponse);
53 }
54
55 None
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum CommitExtractionResult {
64 Extracted(String),
66 Salvaged(String),
68 Fallback(String),
70 AgentError(AgentErrorKind),
72}
73
74impl CommitExtractionResult {
75 pub fn into_message(self) -> String {
80 match self {
81 Self::Extracted(msg) | Self::Salvaged(msg) | Self::Fallback(msg) => {
82 render_final_commit_message(&msg)
83 }
84 Self::AgentError(_) => String::new(), }
86 }
87
88 pub const fn is_fallback(&self) -> bool {
90 matches!(self, Self::Fallback(_))
91 }
92
93 pub const fn is_agent_error(&self) -> bool {
95 matches!(self, Self::AgentError(_))
96 }
97
98 pub const fn error_kind(&self) -> Option<AgentErrorKind> {
100 match self {
101 Self::AgentError(kind) => Some(*kind),
102 _ => None,
103 }
104 }
105}
106
107#[derive(Debug, Deserialize)]
109struct StructuredCommitMessage {
110 subject: String,
111 body: Option<String>,
112}
113
114pub fn try_extract_xml_commit_with_trace(content: &str) -> (Option<String>, String) {
140 let content_len = content.len();
141 let content_preview = if content_len > 50 {
142 format!("{}...", &content[..50].replace('\n', "\\n"))
143 } else {
144 content.replace('\n', "\\n")
145 };
146
147 let Some(commit_start) = content.find("<ralph-commit>") else {
149 return (
150 None,
151 format!(
152 "No <ralph-commit> tag found (content length: {content_len}, starts with: '{content_preview}')"
153 ),
154 );
155 };
156
157 let Some(commit_end) = content.find("</ralph-commit>") else {
158 return (
159 None,
160 format!(
161 "Found <ralph-commit> at pos {commit_start}, but no closing </ralph-commit> tag"
162 ),
163 );
164 };
165
166 if commit_start >= commit_end {
167 return (
168 None,
169 format!(
170 "Malformed XML: </ralph-commit> at {commit_end} appears before <ralph-commit> at {commit_start}"
171 ),
172 );
173 }
174
175 let commit_block = &content[commit_start + "<ralph-commit>".len()..commit_end];
177
178 let Some(subject) = extract_xml_tag_content(commit_block, "ralph-subject") else {
180 return (
181 None,
182 format!(
183 "Found <ralph-commit> at {commit_start}, but <ralph-subject> tag not found within commit block"
184 ),
185 );
186 };
187
188 let subject = subject.trim();
189 if subject.is_empty() {
190 return (
191 None,
192 format!("Found <ralph-subject> but it is empty (at pos {commit_start})"),
193 );
194 }
195
196 if !is_conventional_commit_subject(subject) {
198 return (
199 None,
200 format!(
201 "Found subject '{}' but it doesn't match conventional commit format (type: ...)",
202 if subject.len() > 50 {
203 format!("{}...", &subject[..50])
204 } else {
205 subject.to_string()
206 }
207 ),
208 );
209 }
210
211 let body = extract_xml_tag_content(commit_block, "ralph-body");
213
214 let has_body = body.as_ref().is_some_and(|b| !b.trim().is_empty());
216 let message = match &body {
217 Some(body_content) if !body_content.trim().is_empty() => {
218 format!("{}\n\n{}", subject, body_content.trim())
219 }
220 _ => subject.to_string(),
221 };
222 (
223 Some(message.clone()),
224 format!(
225 "Found <ralph-commit> at pos {commit_start}, <ralph-subject> extracted, body={}, message: '{}'",
226 if has_body { "present" } else { "absent" },
227 if message.len() > 80 {
228 format!("{}...", &message[..80].replace('\n', "\\n"))
229 } else {
230 message.replace('\n', "\\n")
231 }
232 ),
233 )
234}
235
236pub fn try_extract_structured_commit_with_trace(content: &str) -> (Option<String>, String) {
240 let trimmed = content.trim();
241 let content_len = trimmed.len();
242
243 if looks_like_ndjson(trimmed) {
245 for line in trimmed.lines() {
246 let line = line.trim();
247 if !line.starts_with('{') {
248 continue;
249 }
250 if let Ok(json) = serde_json::from_str::<serde_json::Value>(line) {
251 if json.get("type").and_then(|v| v.as_str()) == Some("result") {
252 if let Some(result_str) = json.get("result").and_then(|v| v.as_str()) {
253 if let Some(msg) = try_extract_from_text(result_str) {
254 return (
255 Some(msg.clone()),
256 format!(
257 "Extracted from NDJSON result field, message: '{}'",
258 if msg.len() > 80 {
259 format!("{}...", &msg[..80].replace('\n', "\\n"))
260 } else {
261 msg.replace('\n', "\\n")
262 }
263 ),
264 );
265 }
266 }
267 }
268 }
269 }
270 return (
271 None,
272 format!("Content looks like NDJSON ({content_len} chars) but no valid commit found in result field"),
273 );
274 }
275
276 if let Some(msg) = try_extract_from_text(trimmed) {
278 return (
279 Some(msg.clone()),
280 format!(
281 "Extracted from JSON/text content, message: '{}'",
282 if msg.len() > 80 {
283 format!("{}...", &msg[..80].replace('\n', "\\n"))
284 } else {
285 msg.replace('\n', "\\n")
286 }
287 ),
288 );
289 }
290
291 let has_brace = trimmed.contains('{');
293 let has_subject_key = trimmed.contains("\"subject\"");
294
295 if has_brace && has_subject_key {
296 (
297 None,
298 format!(
299 "Content has JSON-like structure ({content_len} chars, has '{{': {has_brace}, has 'subject' key: {has_subject_key}) but parsing failed"
300 ),
301 )
302 } else if has_brace {
303 (
304 None,
305 format!("Content has '{{' but no 'subject' key found ({content_len} chars)"),
306 )
307 } else {
308 (
309 None,
310 format!("Content does not appear to be JSON ({content_len} chars, no '{{' found)"),
311 )
312 }
313}
314
315fn extract_xml_tag_content(content: &str, tag_name: &str) -> Option<String> {
327 let open_tag = format!("<{tag_name}>");
328 let close_tag = format!("</{tag_name}>");
329
330 let start = content.find(&open_tag)?;
331 let end = content.find(&close_tag)?;
332
333 if start >= end {
334 return None;
335 }
336
337 let inner = &content[start + open_tag.len()..end];
338
339 if inner.is_empty() {
340 None
341 } else {
342 Some(inner.to_string())
343 }
344}
345
346fn try_extract_from_text(content: &str) -> Option<String> {
353 let trimmed = content.trim();
354
355 if let Some(json_content) = extract_json_from_code_fence(trimmed) {
357 if let Ok(msg) = serde_json::from_str::<StructuredCommitMessage>(&json_content) {
358 return format_structured_commit(&msg);
359 }
360 }
361
362 if let Ok(msg) = serde_json::from_str::<StructuredCommitMessage>(trimmed) {
364 return format_structured_commit(&msg);
365 }
366
367 if let Some(start) = trimmed.find('{') {
369 if let Some(end) = trimmed.rfind('}') {
370 if start < end {
371 let json_str = &trimmed[start..=end];
372 if let Ok(msg) = serde_json::from_str::<StructuredCommitMessage>(json_str) {
373 return format_structured_commit(&msg);
374 }
375 }
376 }
377 }
378
379 None
380}
381
382fn format_structured_commit(msg: &StructuredCommitMessage) -> Option<String> {
384 let subject = msg.subject.trim();
385
386 let subject = unescape_json_strings(subject);
391
392 if !is_conventional_commit_subject(&subject) {
394 return None;
395 }
396
397 match &msg.body {
399 Some(body) if !body.trim().is_empty() => {
400 let body = unescape_json_strings(body.trim());
401 Some(format!("{subject}\n\n{body}",))
402 }
403 _ => Some(subject),
404 }
405}
406
407fn is_conventional_commit_subject(subject: &str) -> bool {
409 let valid_types = [
410 "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
411 ];
412
413 let Some(colon_pos) = subject.find(':') else {
415 return false;
416 };
417
418 let prefix = &subject[..colon_pos];
419
420 let type_end = prefix
422 .find('(')
423 .unwrap_or_else(|| prefix.find('!').unwrap_or(prefix.len()));
424 let commit_type = &prefix[..type_end];
425
426 valid_types.contains(&commit_type)
427}
428
429fn extract_json_from_code_fence(content: &str) -> Option<String> {
433 let fence_start = content.find("```json")?;
435 let after_fence = &content[fence_start + 7..]; let fence_end = after_fence.find("\n```")?;
439 let json_content = after_fence[..fence_end].trim();
440
441 if json_content.is_empty() {
442 None
443 } else {
444 Some(json_content.to_string())
445 }
446}
447
448fn looks_like_ndjson(content: &str) -> bool {
450 content.lines().count() > 1 && content.contains("{\"type\":")
451}
452
453fn file_count_pattern_regex() -> &'static Regex {
456 static RE: OnceLock<Regex> = OnceLock::new();
457 RE.get_or_init(|| {
458 Regex::new(r"^chore:\s*\d+\s+(?:file\(s\)|files?)\s+changed$")
459 .expect("file count regex should be valid")
460 })
461}
462
463fn validate_basic_length(content: &str) -> Result<(), String> {
469 if content.is_empty() {
471 return Err("Commit message is empty".to_string());
472 }
473
474 if content.len() < 5 {
476 return Err(format!(
477 "Commit message too short ({} chars, minimum 5)",
478 content.len()
479 ));
480 }
481
482 if content.len() > 2000 {
484 return Err(format!(
485 "Commit message too long ({} chars, maximum 2000)",
486 content.len()
487 ));
488 }
489
490 Ok(())
491}
492
493fn validate_no_json_artifacts(content: &str) -> Result<(), String> {
495 let json_indicators = [
496 r#"{"type":"#,
497 r#"{"result":"#,
498 r#"{"content":"#,
499 r#"{"subject":"#, r#"{"body":"#, r#""session_id":"#,
502 r#""timestamp":"#,
503 "stream_event",
504 "content_block",
505 ];
506 for indicator in json_indicators {
507 if content.contains(indicator) {
508 return Err(format!(
509 "Commit message contains JSON artifacts: {}...",
510 &indicator[..indicator.len().min(20)]
511 ));
512 }
513 }
514 Ok(())
515}
516
517fn validate_no_literal_escape_sequences(content: &str) -> Result<(), String> {
519 let lines: Vec<&str> = content.lines().collect();
523 if lines.len() >= 2 {
524 let second_line = lines[1].trim();
525 if second_line == "\\n" || second_line == "\\n\\n" || second_line.starts_with("\\n\\n") {
527 return Err(
528 "Commit message body appears to contain literal escape sequences (\\n\\n). \
529 This indicates JSON was not properly unescaped. \
530 Expected actual newlines after subject line."
531 .to_string(),
532 );
533 }
534 }
535
536 let json_and_escape_patterns = [
542 (r#"{"type":"#, "\\n"),
543 (r#"{"result":"#, "\\n"),
544 (r#"{"content":"#, "\\n"),
545 (r#""session_id":"#, "\\n"),
546 ];
547 for (json_pattern, escape_pattern) in json_and_escape_patterns {
548 if content.contains(json_pattern) && content.contains(escape_pattern) {
549 return Err(format!(
550 "Commit message contains both JSON artifacts ({json_pattern}) and literal escape sequences ({escape_pattern}). This indicates JSON parsing failure."
551 ));
552 }
553 }
554
555 if content.contains("\\n\\n\\n") || content.contains("\\n\\n\\n\\n") {
558 return Err(
559 "Commit message contains repeated literal escape sequences (\\n\\n\\n). \
560 This indicates JSON string values were not properly unescaped."
561 .to_string(),
562 );
563 }
564
565 Ok(())
566}
567
568fn validate_no_error_markers(content: &str) -> Result<(), String> {
570 let error_markers = [
571 "error:",
572 "failed to",
573 "unable to",
574 "i cannot",
575 "i'm unable",
576 "as an ai",
577 "i don't have access",
578 "cannot generate",
579 ];
580 let content_lower = content.to_lowercase();
581 for marker in error_markers {
582 if content_lower.starts_with(marker) {
583 return Err(format!("Commit message starts with error marker: {marker}"));
584 }
585 }
586 Ok(())
587}
588
589fn validate_no_agent_errors(content: &str) -> Result<(), String> {
591 let agent_error_patterns = [
595 "prompt is too long",
596 "token limit exceeded",
597 "context length exceeded",
598 "maximum context",
599 "input too large",
600 "invalid request",
601 "request failed",
602 ];
603 let content_lower = content.to_lowercase();
604 for pattern in &agent_error_patterns {
605 if content_lower.contains(pattern) {
606 return Err(format!(
607 "Output contains agent error message ({pattern}). Cannot use as commit message."
608 ));
609 }
610 }
611 Ok(())
612}
613
614fn validate_no_thought_process_leakage(content: &str) -> Result<(), String> {
616 let content_lower = content.to_lowercase();
617
618 let thought_process_prefixes = [
622 "looking at this diff",
623 "i can see",
624 "the main changes are",
625 "several distinct categories",
626 "key categories",
627 "based on the diff",
628 "analyzing the changes",
629 "this diff shows",
630 "looking at the changes",
631 "i've analyzed",
632 "after reviewing",
633 "based on the git diff",
635 "here are the changes",
636 "here's what changed",
637 "here is what changed",
638 "the following changes",
639 "changes include",
640 "after reviewing the diff",
641 "after reviewing the changes",
642 "after analyzing",
643 "i've analyzed the changes",
644 "i've analyzed the diff",
645 "key changes",
646 "several changes",
647 "distinct changes",
648 "key changes include",
649 "several changes include",
650 "this diff shows the following",
651 ];
652 for prefix in &thought_process_prefixes {
653 if content_lower.starts_with(prefix) {
654 return Err(format!(
655 "Commit message starts with AI thought process ({prefix}). This indicates a bug in the thought process filtering."
656 ));
657 }
658 }
659
660 if content.trim_start().starts_with("1. ")
662 || content.trim_start().starts_with("1)\n")
663 || content_lower.starts_with("- first")
664 || content_lower.starts_with("* first")
665 {
666 return Err(
667 "Commit message starts with numbered analysis. This indicates AI thought process leakage.".to_string()
668 );
669 }
670
671 let formatted_thinking_patterns = [
674 "[claude] thinking:",
675 "[claude] Thinking:",
676 "[agent] thinking:",
677 "[agent] Thinking:",
678 "[assistant] thinking:",
679 "[assistant] Thinking:",
680 "] thinking:",
681 "] Thinking:",
682 ];
683 for pattern in &formatted_thinking_patterns {
684 if content_lower.starts_with(pattern) || content.contains(pattern) {
685 return Err(format!(
686 "Commit message contains formatted thinking pattern ({pattern}). This indicates AI thinking output leaked into the commit message."
687 ));
688 }
689 }
690
691 Ok(())
692}
693
694fn validate_no_placeholders(content: &str) -> Result<(), String> {
721 let valid_template_vars = ["{{prompt}}", "{{plan}}", "{{diff}}"];
723 let content_lower = content.to_lowercase();
724
725 let is_only_valid_template_var = valid_template_vars.iter().any(|v| content_lower == *v);
728
729 let placeholder_patterns = [
732 r"(?i)\[commit message\]", r"(?i)<commit message>", r"(?i)\byour commit message here\b", r"(?i)\[commit\s*\]", r"(?i)<commit\s*>", r"(?i)\[insert\b", r"(?i)<insert\b", ];
740
741 let combined_pattern = placeholder_patterns.join("|");
743 if let Ok(re) = regex::Regex::new(&combined_pattern) {
744 if re.is_match(content) {
745 if !is_only_valid_template_var {
748 return Err(
749 "Commit message contains placeholder text (e.g., '[commit message]', '<commit message>', or similar)".to_string()
750 );
751 }
752 }
753 }
754
755 let placeholder_context_patterns = [
759 r"(?i)\bplaceholder\s+for\b", r"(?i)\bplaceholder\s+text\b", r"(?i)\bplaceholder\s+here\b", r"(?i)\bplaceholder\s*\)", r"(?i)is\s+(?:a\s+)?placeholder\b", r"(?i)this\s+is\s+(?:a\s+)?placeholder\b", ];
766
767 let combined_placeholder_context = placeholder_context_patterns.join("|");
768 if let Ok(re) = regex::Regex::new(&combined_placeholder_context) {
769 if re.is_match(content) {
770 return Err(
771 "Commit message contains placeholder text (e.g., 'placeholder for', 'placeholder text', or similar)".to_string()
772 );
773 }
774 }
775
776 let bare_placeholder = regex::Regex::new(r"(?i)\bplaceholder\b").unwrap();
780 if bare_placeholder.is_match(content) {
781 let content_lower_for_ctx = content.to_lowercase();
782
783 let valid_action_patterns = [
787 r"(?i)\bremove\s+.*?\bplaceholder\b",
788 r"(?i)\bdelete\s+.*?\bplaceholder\b",
789 r"(?i)\bfix\s+.*?\bplaceholder\b",
790 r"(?i)\bclear\s+.*?\bplaceholder\b",
791 r"(?i)\bplaceholder\s+in\s+(?:the\s+)?(?:ui|form|input|field)\b",
793 r"(?i)\bfrom\s+(?:the\s+)?(?:ui|form|input|field).*\bplaceholder\b",
794 r"(?i)\bplaceholder\s+(?:text|value|content)\b",
795 ];
796
797 let combined_actions = valid_action_patterns.join("|");
799 if let Ok(action_re) = regex::Regex::new(&combined_actions) {
800 if action_re.is_match(content) {
801 return Ok(());
802 }
803 }
804
805 let valid_contexts = [
807 "placeholder attribute",
809 "placeholder element",
810 "placeholder div",
811 "placeholder span",
812 "placeholder class",
813 "template placeholders",
815 "template placeholder",
816 "placeholder variable",
817 "placeholder variables",
818 "placeholder value",
819 "placeholder values",
820 "substitute placeholder",
822 "substituting placeholder",
823 "replace placeholder",
824 "replacing placeholder",
825 ];
826
827 let mut in_valid_context = false;
828 for valid_ctx in &valid_contexts {
829 if content_lower_for_ctx.contains(valid_ctx) {
830 in_valid_context = true;
831 break;
832 }
833 }
834
835 if !in_valid_context {
836 return Err(
837 "Commit message contains 'placeholder'. If this refers to technical concepts (templates, variables, etc.), use more specific language like 'template placeholders' or 'placeholder variable'".to_string()
838 );
839 }
840 }
841
842 Ok(())
843}
844
845fn validate_no_bad_patterns(content: &str) -> Result<(), String> {
847 let content_lower = content.to_lowercase();
848
849 if file_count_pattern_regex().is_match(&content_lower) {
855 return Err(format!(
856 "Commit message matches bad pattern (file count pattern): '{content}'. Use semantic description instead."
857 ));
858 }
859
860 let vague_patterns = [
862 ("chore: apply changes", "vague 'apply changes' pattern"),
863 ("chore: update code", "vague 'update code' pattern"),
864 ];
865 for (pattern, description) in vague_patterns {
866 if content_lower == pattern {
867 return Err(format!(
868 "Commit message matches bad pattern ({description}): {pattern}"
869 ));
870 }
871 }
872
873 let first_line = content.lines().next().unwrap_or(content);
876 let first_line_lower = first_line.to_lowercase();
877
878 if first_line_lower.starts_with("chore: update ") || first_line_lower.starts_with("chore:") {
880 let subject = first_line_lower
881 .replacen("chore: update ", "", 1)
882 .replacen("chore:", "", 1)
883 .trim()
884 .to_string();
885
886 let code_extensions = [
895 ".rs", ".js", ".ts", ".py", ".go", ".java", ".c", ".cpp", ".h", ".cs", ".php", ".rb",
896 ".swift", ".kt",
897 ];
898
899 let looks_like_file_list = subject.contains('/')
901 || subject.contains('\\') || code_extensions.iter().any(|ext| subject.ends_with(ext));
903
904 let has_comma_separated_files =
906 subject.contains(", ") && code_extensions.iter().any(|ext| subject.contains(ext));
907
908 let has_and_separated_files =
910 subject.contains(" and ") && code_extensions.iter().any(|ext| subject.contains(ext));
911
912 if looks_like_file_list || has_comma_separated_files || has_and_separated_files {
913 return Err(format!(
914 "Commit message appears to be a file list: '{}'. Use semantic description instead.",
915 first_line.trim()
916 ));
917 }
918 }
919
920 Ok(())
921}
922
923pub fn validate_commit_message(content: &str) -> Result<(), String> {
929 let content = content.trim();
930
931 validate_basic_length(content)?;
933 validate_no_json_artifacts(content)?;
934 validate_no_literal_escape_sequences(content)?;
935 validate_no_error_markers(content)?;
936 validate_no_agent_errors(content)?;
937 validate_no_thought_process_leakage(content)?;
938 validate_no_placeholders(content)?;
939 validate_no_bad_patterns(content)?;
940
941 Ok(())
942}
943
944#[derive(Debug, Clone)]
946pub struct ValidationCheckResult {
947 pub name: &'static str,
949 pub passed: bool,
951 pub error: Option<String>,
953}
954
955impl ValidationCheckResult {
956 const fn pass(name: &'static str) -> Self {
958 Self {
959 name,
960 passed: true,
961 error: None,
962 }
963 }
964
965 const fn fail(name: &'static str, error: String) -> Self {
967 Self {
968 name,
969 passed: false,
970 error: Some(error),
971 }
972 }
973}
974
975#[derive(Debug, Clone)]
980pub struct ValidationReport {
981 pub checks: Vec<ValidationCheckResult>,
983}
984
985impl ValidationReport {
986 pub fn all_passed(&self) -> bool {
988 self.checks.iter().all(|c| c.passed)
989 }
990
991 pub fn format_failures(&self) -> Option<String> {
993 let failures: Vec<_> = self
994 .checks
995 .iter()
996 .filter(|c| !c.passed)
997 .map(|c| format!("{}: {}", c.name, c.error.as_deref().unwrap_or("failed")))
998 .collect();
999
1000 if failures.is_empty() {
1001 None
1002 } else {
1003 Some(failures.join("; "))
1004 }
1005 }
1006}
1007
1008pub fn validate_commit_message_with_report(content: &str) -> ValidationReport {
1013 let content = content.trim();
1014
1015 let checks = vec![
1017 match validate_basic_length(content) {
1018 Ok(()) => ValidationCheckResult::pass("basic_length"),
1019 Err(e) => ValidationCheckResult::fail("basic_length", e),
1020 },
1021 match validate_no_json_artifacts(content) {
1022 Ok(()) => ValidationCheckResult::pass("no_json_artifacts"),
1023 Err(e) => ValidationCheckResult::fail("no_json_artifacts", e),
1024 },
1025 match validate_no_literal_escape_sequences(content) {
1026 Ok(()) => ValidationCheckResult::pass("no_literal_escape_sequences"),
1027 Err(e) => ValidationCheckResult::fail("no_literal_escape_sequences", e),
1028 },
1029 match validate_no_error_markers(content) {
1030 Ok(()) => ValidationCheckResult::pass("no_error_markers"),
1031 Err(e) => ValidationCheckResult::fail("no_error_markers", e),
1032 },
1033 match validate_no_agent_errors(content) {
1034 Ok(()) => ValidationCheckResult::pass("no_agent_errors"),
1035 Err(e) => ValidationCheckResult::fail("no_agent_errors", e),
1036 },
1037 match validate_no_thought_process_leakage(content) {
1038 Ok(()) => ValidationCheckResult::pass("no_thought_process_leakage"),
1039 Err(e) => ValidationCheckResult::fail("no_thought_process_leakage", e),
1040 },
1041 match validate_no_placeholders(content) {
1042 Ok(()) => ValidationCheckResult::pass("no_placeholders"),
1043 Err(e) => ValidationCheckResult::fail("no_placeholders", e),
1044 },
1045 match validate_no_bad_patterns(content) {
1046 Ok(()) => ValidationCheckResult::pass("no_bad_patterns"),
1047 Err(e) => ValidationCheckResult::fail("no_bad_patterns", e),
1048 },
1049 ];
1050
1051 ValidationReport { checks }
1052}
1053
1054pub fn render_final_commit_message(message: &str) -> String {
1077 let mut result = message.to_string();
1078
1079 result = final_escape_sequence_cleanup(&result);
1082
1083 if let Err(e) = validate_commit_message(&result) {
1086 let error_lower = e.to_lowercase();
1088 if error_lower.contains("escape sequence") || error_lower.contains("\\n") {
1089 result = unescape_json_strings_aggressive(&result);
1091 }
1092 }
1095
1096 result = result
1098 .lines()
1099 .map(str::trim)
1100 .filter(|l| !l.is_empty())
1101 .collect::<Vec<_>>()
1102 .join("\n");
1103
1104 result
1105}
1106
1107pub fn try_salvage_commit_message(content: &str) -> Option<String> {
1123 let commit_pos = find_conventional_commit_start(content)?;
1125
1126 let from_commit = &content[commit_pos..];
1128
1129 let lines: Vec<&str> = from_commit.lines().collect();
1132
1133 if lines.is_empty() {
1134 return None;
1135 }
1136
1137 let subject = lines[0].trim();
1139 if subject.is_empty() {
1140 return None;
1141 }
1142
1143 let mut body_lines: Vec<&str> = Vec::new();
1145 let mut found_blank = false;
1146
1147 for line in lines.iter().skip(1) {
1148 let trimmed: &str = line.trim();
1149
1150 if trimmed.is_empty() {
1151 if found_blank {
1152 break;
1154 }
1155 found_blank = true;
1156 body_lines.push("");
1157 continue;
1158 }
1159
1160 if looks_like_analysis_text(trimmed)
1162 || trimmed.starts_with("1. ")
1163 || trimmed.starts_with("- ")
1164 || trimmed.starts_with("* ")
1165 {
1166 break;
1167 }
1168
1169 body_lines.push(trimmed);
1170 found_blank = false;
1171 }
1172
1173 let mut salvaged = subject.to_string();
1175 if !body_lines.is_empty() {
1176 while body_lines.last().is_some_and(|l| l.is_empty()) {
1178 body_lines.pop();
1179 }
1180 if !body_lines.is_empty() {
1181 salvaged.push('\n');
1182 salvaged.push_str(&body_lines.join("\n"));
1183 }
1184 }
1185
1186 match validate_commit_message(&salvaged) {
1188 Ok(()) => Some(salvaged),
1189 Err(_) => None,
1190 }
1191}
1192
1193pub fn generate_fallback_commit_message(diff: &str) -> String {
1210 let files = extract_files_from_diff(diff);
1211
1212 if files.is_empty() {
1213 return "chore: apply automated changes".to_string();
1215 }
1216
1217 let common_dir = find_common_directory(&files);
1219
1220 let scope = common_dir
1222 .as_ref()
1223 .and_then(|dir| derive_scope_from_path(dir));
1224
1225 let file_count = files.len();
1227
1228 match (file_count, scope) {
1230 (1, Some(scope)) => {
1231 format!("chore({scope}): update module")
1233 }
1234 (1, None) => {
1235 files
1237 .first()
1238 .and_then(|f| derive_scope_from_path(f))
1239 .map_or_else(
1240 || "chore: update module".to_string(),
1241 |component| format!("chore({component}): update module"),
1242 )
1243 }
1244 (n, Some(scope)) => {
1245 format!("chore({scope}): update {n} components")
1247 }
1248 (n, None) => {
1249 files
1252 .first()
1253 .and_then(|f| derive_scope_from_path(f))
1254 .map_or_else(
1255 || format!("chore: update {n} components"),
1256 |component| format!("chore({component}): update {n} components"),
1257 )
1258 }
1259 }
1260}
1261
1262fn extract_files_from_diff(diff: &str) -> Vec<String> {
1266 let mut files = Vec::new();
1267
1268 for line in diff.lines() {
1269 if let Some(rest) = line.strip_prefix("diff --git a/") {
1271 if let Some(space_b_pos) = rest.find(" b/") {
1273 let path = &rest[..space_b_pos];
1274 if !path.is_empty() {
1275 files.push(path.to_string());
1276 }
1277 }
1278 }
1279 }
1280
1281 files
1282}
1283
1284fn find_common_directory(paths: &[String]) -> Option<String> {
1288 if paths.is_empty() {
1289 return None;
1290 }
1291
1292 if paths.len() == 1 {
1293 let path = &paths[0];
1295 if let Some(last_slash) = path.rfind('/') {
1296 return Some(path[..last_slash].to_string());
1297 }
1298 return None;
1299 }
1300
1301 let split_paths: Vec<Vec<&str>> = paths.iter().map(|p| p.split('/').collect()).collect();
1303
1304 let mut common_components: Vec<&str> = Vec::new();
1306
1307 let first = &split_paths[0];
1309 for (i, component) in first.iter().enumerate() {
1310 let all_match = split_paths.iter().skip(1).all(|path| {
1312 i < path.len().saturating_sub(1) && path.get(i) == Some(component)
1314 });
1315
1316 if all_match && i < first.len().saturating_sub(1) {
1317 common_components.push(component);
1318 } else {
1319 break;
1320 }
1321 }
1322
1323 if common_components.is_empty() {
1324 None
1325 } else {
1326 Some(common_components.join("/"))
1327 }
1328}
1329
1330fn derive_scope_from_path(path: &str) -> Option<String> {
1336 let components: Vec<&str> = path.split('/').collect();
1337
1338 if components.is_empty() {
1339 return None;
1340 }
1341
1342 let skip_dirs = ["src", "lib", "bin", "tests", "test", "benches", "examples"];
1344
1345 for component in components.iter().rev().skip(1) {
1347 let comp_lower = component.to_lowercase();
1348 if !skip_dirs.contains(&comp_lower.as_str()) && !component.is_empty() {
1349 return Some(component.to_string());
1350 }
1351 }
1352
1353 for component in &components {
1355 if !skip_dirs.contains(&component.to_lowercase().as_str())
1356 && !component.is_empty()
1357 && !component.contains('.')
1358 {
1359 return Some(component.to_string());
1360 }
1361 }
1362
1363 None
1364}
1365
1366#[cfg(test)]
1367mod tests {
1368 use super::*;
1369
1370 #[test]
1371 fn test_validate_empty_message() {
1372 let result = validate_commit_message("");
1373 assert!(result.is_err());
1374 assert!(result.unwrap_err().contains("empty"));
1375 }
1376
1377 #[test]
1378 fn test_validate_too_short() {
1379 let result = validate_commit_message("fix");
1380 assert!(result.is_err());
1381 assert!(result.unwrap_err().contains("too short"));
1382 }
1383
1384 #[test]
1385 fn test_validate_valid_message() {
1386 let result = validate_commit_message("feat: add new feature");
1387 assert!(result.is_ok());
1388 }
1389
1390 #[test]
1391 fn test_validate_json_artifacts() {
1392 let result = validate_commit_message("feat: add feature {\"type\":\"result\"}");
1393 assert!(result.is_err());
1394 assert!(result.unwrap_err().contains("JSON artifacts"));
1395 }
1396
1397 #[test]
1398 fn test_validate_error_markers() {
1399 let result = validate_commit_message("error: unable to generate");
1400 assert!(result.is_err());
1401 assert!(result.unwrap_err().contains("error marker"));
1402 }
1403
1404 #[test]
1405 fn test_validate_thought_process_leakage() {
1406 let result = validate_commit_message("Looking at this diff, I can see changes");
1407 assert!(result.is_err());
1408 assert!(result.unwrap_err().contains("AI thought process"));
1409 }
1410
1411 #[test]
1412 fn test_validate_numbered_analysis() {
1413 let result = validate_commit_message("1. First change\n2. Second change");
1414 assert!(result.is_err());
1415 assert!(result.unwrap_err().contains("numbered analysis"));
1416 }
1417
1418 #[test]
1419 fn test_validate_bad_file_count_pattern() {
1420 let result = validate_commit_message("chore: 5 files changed");
1421 assert!(result.is_err());
1422 assert!(result.unwrap_err().contains("file count pattern"));
1423 }
1424
1425 #[test]
1426 fn test_validate_file_list_pattern() {
1427 let result = validate_commit_message("chore: update src/file.rs");
1428 assert!(result.is_err());
1429 assert!(result.unwrap_err().contains("file list"));
1430 }
1431
1432 #[test]
1433 fn test_try_salvage_commit_message() {
1434 let content = "Looking at this diff...\n\nfeat: add feature";
1435 let salvaged = try_salvage_commit_message(content);
1436 assert!(salvaged.is_some());
1437 assert_eq!(salvaged.unwrap(), "feat: add feature");
1438 }
1439
1440 #[test]
1441 fn test_try_salvage_with_body() {
1442 let content = "Analysis text\n\nfix(parser): resolve bug\n\nAdd proper error handling.";
1443 let salvaged = try_salvage_commit_message(content);
1444 assert!(salvaged.is_some());
1445 let msg = salvaged.unwrap();
1446 assert!(msg.starts_with("fix(parser):"));
1447 assert!(msg.contains("Add proper error handling"));
1448 }
1449
1450 #[test]
1451 fn test_generate_fallback_empty_diff() {
1452 let fallback = generate_fallback_commit_message("");
1453 assert_eq!(fallback, "chore: apply automated changes");
1454 }
1455
1456 #[test]
1457 fn test_generate_fallback_single_file() {
1458 let diff = r"diff --git a/src/files/extraction.rs b/src/files/extraction.rs";
1459 let fallback = generate_fallback_commit_message(diff);
1460 assert!(validate_commit_message(&fallback).is_ok());
1461 assert!(fallback.contains("files") || fallback.contains("update"));
1462 }
1463
1464 #[test]
1465 fn test_generate_fallback_multiple_files_same_dir() {
1466 let diff = r"diff --git a/src/files/a.rs b/src/files/a.rs
1467diff --git a/src/files/b.rs b/src/files/b.rs";
1468 let fallback = generate_fallback_commit_message(diff);
1469 assert!(validate_commit_message(&fallback).is_ok());
1470 assert!(fallback.contains("files") || fallback.contains("components"));
1471 }
1472
1473 #[test]
1474 fn test_generate_fallback_multiple_dirs() {
1475 let diff = r"diff --git a/src/a.rs b/src/a.rs
1476diff --git a/lib/b.rs b/lib/b.rs
1477diff --git a/tests/c.rs b/tests/c.rs";
1478 let fallback = generate_fallback_commit_message(diff);
1479 assert!(validate_commit_message(&fallback).is_ok());
1480 assert!(fallback.contains("3 components") || fallback.contains("chore"));
1481 }
1482
1483 #[test]
1484 fn test_regression_thinking_leakage_recovery() {
1485 let log_content = r"[Claude] Thinking: Looking at this diff, I need to analyze...
1487
1488feat(pipeline): add recovery mechanism
1489
1490When commit validation fails, attempt to salvage valid message.";
1491
1492 let salvaged = try_salvage_commit_message(log_content);
1494 assert!(salvaged.is_some());
1495 let msg = salvaged.unwrap();
1496 assert!(validate_commit_message(&msg).is_ok());
1497 assert!(msg.starts_with("feat(pipeline):"));
1498 }
1499
1500 #[test]
1501 fn test_extract_files_from_diff() {
1502 let diff = r"diff --git a/src/files/extraction.rs b/src/files/extraction.rs
1503--- a/src/files/extraction.rs
1504+++ b/src/files/extraction.rs
1505diff --git a/src/phases/commit.rs b/src/phases/commit.rs
1506--- a/src/phases/commit.rs
1507+++ b/src/phases/commit.rs";
1508
1509 let files = extract_files_from_diff(diff);
1510 assert_eq!(files.len(), 2);
1511 assert_eq!(files[0], "src/files/extraction.rs");
1512 assert_eq!(files[1], "src/phases/commit.rs");
1513 }
1514
1515 #[test]
1516 fn test_find_common_directory_same_dir() {
1517 let paths = vec![
1518 "src/files/a.rs".to_string(),
1519 "src/files/b.rs".to_string(),
1520 "src/files/c.rs".to_string(),
1521 ];
1522 let common = find_common_directory(&paths);
1523 assert_eq!(common, Some("src/files".to_string()));
1524 }
1525
1526 #[test]
1527 fn test_find_common_directory_partial_overlap() {
1528 let paths = vec![
1529 "src/files/extraction.rs".to_string(),
1530 "src/phases/commit.rs".to_string(),
1531 ];
1532 let common = find_common_directory(&paths);
1533 assert_eq!(common, Some("src".to_string()));
1534 }
1535
1536 #[test]
1537 fn test_find_common_directory_no_overlap() {
1538 let paths = vec!["src/a.rs".to_string(), "lib/b.rs".to_string()];
1539 let common = find_common_directory(&paths);
1540 assert!(common.is_none());
1541 }
1542
1543 #[test]
1544 fn test_derive_scope_from_path() {
1545 assert_eq!(
1547 derive_scope_from_path("src/files/extraction.rs"),
1548 Some("files".to_string())
1549 );
1550
1551 assert_eq!(
1553 derive_scope_from_path("src/phases/commit.rs"),
1554 Some("phases".to_string())
1555 );
1556
1557 assert_ne!(
1559 derive_scope_from_path("src/files/foo.rs"),
1560 Some("src".to_string())
1561 );
1562 }
1563
1564 #[test]
1565 fn test_derive_scope_from_shallow_path() {
1566 let scope = derive_scope_from_path("lib.rs");
1568 assert!(scope.is_none());
1570 }
1571
1572 #[test]
1577 fn test_detect_agent_errors_in_output_prompt_too_long() {
1578 let content = r#"{"type":"result","result":"Prompt is too long"}"#;
1580 assert_eq!(
1581 detect_agent_errors_in_output(content),
1582 Some(AgentErrorKind::TokenExhausted)
1583 );
1584 }
1585
1586 #[test]
1587 fn test_detect_agent_errors_in_output_token_limit() {
1588 let content = r#"{"type":"result","result":"token limit exceeded"}"#;
1590 assert_eq!(
1591 detect_agent_errors_in_output(content),
1592 Some(AgentErrorKind::TokenExhausted)
1593 );
1594 }
1595
1596 #[test]
1597 fn test_detect_agent_errors_in_output_context_length() {
1598 let content = "error: context length exceeded for this model";
1600 assert_eq!(
1601 detect_agent_errors_in_output(content),
1602 Some(AgentErrorKind::TokenExhausted)
1603 );
1604 }
1605
1606 #[test]
1607 fn test_detect_agent_errors_in_output_maximum_context() {
1608 let content = "maximum context size reached";
1610 assert_eq!(
1611 detect_agent_errors_in_output(content),
1612 Some(AgentErrorKind::TokenExhausted)
1613 );
1614 }
1615
1616 #[test]
1617 fn test_detect_agent_errors_in_output_input_too_large() {
1618 let content = "input too large for this model";
1620 assert_eq!(
1621 detect_agent_errors_in_output(content),
1622 Some(AgentErrorKind::TokenExhausted)
1623 );
1624 }
1625
1626 #[test]
1627 fn test_detect_agent_errors_in_output_invalid_request() {
1628 let content = "invalid request to the API";
1630 assert_eq!(
1631 detect_agent_errors_in_output(content),
1632 Some(AgentErrorKind::InvalidResponse)
1633 );
1634 }
1635
1636 #[test]
1637 fn test_detect_agent_errors_in_output_request_failed() {
1638 let content = "request failed due to server error";
1640 assert_eq!(
1641 detect_agent_errors_in_output(content),
1642 Some(AgentErrorKind::InvalidResponse)
1643 );
1644 }
1645
1646 #[test]
1647 fn test_detect_agent_errors_in_output_valid_commit_message() {
1648 let content = r#"{"type":"result","result":"feat: add feature"}"#;
1650 assert_eq!(detect_agent_errors_in_output(content), None);
1651 }
1652
1653 #[test]
1654 fn test_detect_agent_errors_in_output_case_insensitive() {
1655 let content = "PROMPT IS TOO LONG";
1657 assert_eq!(
1658 detect_agent_errors_in_output(content),
1659 Some(AgentErrorKind::TokenExhausted)
1660 );
1661 }
1662
1663 #[test]
1668 fn test_detect_agent_errors_context_window() {
1669 let content = "error: context window exceeded";
1671 assert_eq!(
1672 detect_agent_errors_in_output(content),
1673 Some(AgentErrorKind::TokenExhausted)
1674 );
1675 }
1676
1677 #[test]
1678 fn test_detect_agent_errors_max_tokens() {
1679 let content = "max tokens exceeded for this request";
1681 assert_eq!(
1682 detect_agent_errors_in_output(content),
1683 Some(AgentErrorKind::TokenExhausted)
1684 );
1685 }
1686
1687 #[test]
1688 fn test_detect_agent_errors_token_limit() {
1689 let content = "token limit reached";
1691 assert_eq!(
1692 detect_agent_errors_in_output(content),
1693 Some(AgentErrorKind::TokenExhausted)
1694 );
1695 }
1696
1697 #[test]
1698 fn test_detect_agent_errors_too_many_tokens() {
1699 let content = "error: too many tokens in input";
1701 assert_eq!(
1702 detect_agent_errors_in_output(content),
1703 Some(AgentErrorKind::TokenExhausted)
1704 );
1705 }
1706
1707 #[test]
1708 fn test_detect_agent_errors_exceeds_context() {
1709 let content = "input exceeds context length";
1711 assert_eq!(
1712 detect_agent_errors_in_output(content),
1713 Some(AgentErrorKind::TokenExhausted)
1714 );
1715 }
1716
1717 #[test]
1718 fn test_detect_agent_errors_model_context_length() {
1719 let content = "input exceeds the model's context length";
1721 assert_eq!(
1722 detect_agent_errors_in_output(content),
1723 Some(AgentErrorKind::TokenExhausted)
1724 );
1725 }
1726
1727 #[test]
1728 fn test_detect_agent_errors_input_exceeds() {
1729 let content = "input exceeds maximum length";
1731 assert_eq!(
1732 detect_agent_errors_in_output(content),
1733 Some(AgentErrorKind::TokenExhausted)
1734 );
1735 }
1736
1737 #[test]
1738 fn test_detect_agent_errors_api_error() {
1739 let content = "api error occurred";
1741 assert_eq!(
1742 detect_agent_errors_in_output(content),
1743 Some(AgentErrorKind::InvalidResponse)
1744 );
1745 }
1746
1747 #[test]
1748 fn test_detect_agent_errors_rate_limit() {
1749 let content = "rate limit exceeded";
1751 assert_eq!(
1752 detect_agent_errors_in_output(content),
1753 Some(AgentErrorKind::InvalidResponse)
1754 );
1755 }
1756
1757 #[test]
1758 fn test_detect_agent_errors_service_unavailable() {
1759 let content = "service unavailable, try again later";
1761 assert_eq!(
1762 detect_agent_errors_in_output(content),
1763 Some(AgentErrorKind::InvalidResponse)
1764 );
1765 }
1766
1767 #[test]
1768 fn test_validate_rejects_prompt_too_long() {
1769 let result = validate_commit_message("Prompt is too long");
1771 assert!(result.is_err());
1772 assert!(result.unwrap_err().contains("agent error"));
1773 }
1774
1775 #[test]
1776 fn test_validate_rejects_token_limit_exceeded() {
1777 let result = validate_commit_message("token limit exceeded");
1779 assert!(result.is_err());
1780 assert!(result.unwrap_err().contains("agent error"));
1781 }
1782
1783 #[test]
1784 fn test_validate_rejects_context_length() {
1785 let result = validate_commit_message("The context length exceeded for this model");
1789 assert!(result.is_err());
1790 assert!(result.unwrap_err().contains("agent error"));
1791 }
1792
1793 #[test]
1794 fn test_validate_accepts_valid_message_with_error_words() {
1795 let result = validate_commit_message("fix(parser): resolve parsing error");
1798 assert!(result.is_ok());
1799 }
1800
1801 #[test]
1802 fn test_validate_rejects_json_artifacts_with_escape_sequences() {
1803 let result = validate_commit_message(r#"feat: add feature{"type":"result"}\\nBody text"#);
1805 assert!(result.is_err());
1806 assert!(result.unwrap_err().contains("JSON artifacts"));
1808 }
1809
1810 #[test]
1811 fn test_validate_rejects_json_artifacts_without_escape_sequences() {
1812 let result = validate_commit_message(r#"feat: add feature{"type":"result"}Body text"#);
1814 assert!(result.is_err());
1815 assert!(result.unwrap_err().contains("JSON artifacts"));
1816 }
1817
1818 #[test]
1819 fn test_validate_accepts_literal_escape_without_json_artifacts() {
1820 let result = validate_commit_message("feat: add feature\\nBody text");
1823 assert!(result.is_ok());
1824 }
1825
1826 #[test]
1827 fn test_validate_accepts_literal_tab_without_json_artifacts() {
1828 let result = validate_commit_message("feat: add feature\\t- bullet");
1830 assert!(result.is_ok());
1831 }
1832
1833 #[test]
1834 fn test_validate_accepts_actual_newlines() {
1835 let result = validate_commit_message("feat: add feature\n\nBody text here");
1837 assert!(result.is_ok());
1838 }
1839
1840 #[test]
1845 fn test_validate_rejects_body_starts_with_literal_newline_sequences() {
1846 let result = validate_commit_message("feat: add feature\n\\n\\nBody text here");
1851 assert!(result.is_err());
1852 assert!(result.unwrap_err().contains("literal escape sequences"));
1853 }
1854
1855 #[test]
1856 fn test_validate_rejects_body_second_line_is_literal_escape() {
1857 let result = validate_commit_message("feat: add feature\n\\n");
1859 assert!(result.is_err());
1860 assert!(result.unwrap_err().contains("literal escape sequences"));
1861 }
1862
1863 #[test]
1864 fn test_validate_rejects_body_second_line_is_double_literal_escape() {
1865 let result = validate_commit_message("feat: add feature\n\\n\\n");
1867 assert!(result.is_err());
1868 assert!(result.unwrap_err().contains("literal escape sequences"));
1869 }
1870
1871 #[test]
1872 fn test_validate_rejects_repeated_literal_escape_sequences() {
1873 let result = validate_commit_message("feat: add feature\\n\\n\\nBody text");
1875 assert!(result.is_err());
1876 assert!(result
1877 .unwrap_err()
1878 .contains("repeated literal escape sequences"));
1879 }
1880
1881 #[test]
1882 fn test_validate_rejects_quadruple_literal_escape_sequences() {
1883 let result = validate_commit_message("feat: add feature\\n\\n\\n\\nBody text");
1885 assert!(result.is_err());
1886 assert!(result
1887 .unwrap_err()
1888 .contains("repeated literal escape sequences"));
1889 }
1890
1891 #[test]
1892 fn test_validate_accepts_legitimate_single_escape_in_middle() {
1893 let result = validate_commit_message("feat: handle backslash-n in parser");
1895 assert!(result.is_ok());
1896 }
1897
1898 #[test]
1899 fn test_validate_accepts_body_with_actual_newlines() {
1900 let result =
1902 validate_commit_message("feat: add feature\n\nThis is the body\nwith multiple lines");
1903 assert!(result.is_ok());
1904 }
1905
1906 #[test]
1911 fn test_commit_extraction_result_agent_error() {
1912 let result = CommitExtractionResult::AgentError(AgentErrorKind::TokenExhausted);
1914
1915 assert!(result.is_agent_error());
1916 assert!(!result.is_fallback());
1917 assert_eq!(result.error_kind(), Some(AgentErrorKind::TokenExhausted));
1918 assert_eq!(result.into_message(), String::new());
1919 }
1920
1921 #[test]
1922 fn test_commit_extraction_result_extracted_not_agent_error() {
1923 let result = CommitExtractionResult::Extracted("feat: add feature".to_string());
1925
1926 assert!(!result.is_agent_error());
1927 assert!(!result.is_fallback());
1928 assert_eq!(result.error_kind(), None);
1929 assert_eq!(result.into_message(), "feat: add feature");
1930 }
1931
1932 #[test]
1937 fn test_format_structured_commit_unescapes_body_newlines() {
1938 let msg = StructuredCommitMessage {
1940 subject: "feat: add feature".to_string(),
1941 body: Some("Line 1\\nLine 2\\nLine 3".to_string()),
1942 };
1943 let result = format_structured_commit(&msg);
1944 assert!(result.is_some());
1945 let formatted = result.unwrap();
1946 assert!(formatted.contains("Line 1\nLine 2\nLine 3"));
1947 assert!(!formatted.contains("\\n"));
1948 }
1949
1950 #[test]
1951 fn test_format_structured_commit_unescapes_subject_newlines() {
1952 let msg = StructuredCommitMessage {
1958 subject: "feat: add\\nfeature".to_string(),
1959 body: None,
1960 };
1961 let result = format_structured_commit(&msg);
1962 assert!(result.is_some());
1964 assert!(result.unwrap().contains('\n'));
1966 }
1967
1968 #[test]
1969 fn test_format_structured_commit_with_empty_body() {
1970 let msg = StructuredCommitMessage {
1972 subject: "fix: resolve bug".to_string(),
1973 body: None,
1974 };
1975 let result = format_structured_commit(&msg);
1976 assert_eq!(result, Some("fix: resolve bug".to_string()));
1977 }
1978
1979 #[test]
1980 fn test_format_structured_commit_with_body_containing_tabs() {
1981 let msg = StructuredCommitMessage {
1983 subject: "feat: add feature".to_string(),
1984 body: Some("- item 1\\t- item 2".to_string()),
1985 };
1986 let result = format_structured_commit(&msg);
1987 assert!(result.is_some());
1988 let formatted = result.unwrap();
1989 assert!(formatted.contains("- item 1\t- item 2"));
1990 assert!(!formatted.contains("\\t"));
1991 }
1992
1993 #[test]
1998 fn test_render_final_commit_message_with_literal_escapes() {
1999 let input = "feat: add feature\n\\n\\nBody with literal escapes";
2002 let result = render_final_commit_message(input);
2003 assert_eq!(result, "feat: add feature\nBody with literal escapes");
2004 }
2005
2006 #[test]
2007 fn test_render_final_commit_message_already_clean() {
2008 let input = "feat: add feature\n\nBody text here";
2010 let result = render_final_commit_message(input);
2011 assert_eq!(result, "feat: add feature\nBody text here");
2012 }
2013
2014 #[test]
2015 fn test_render_final_commit_message_with_tabs() {
2016 let input = "feat: add feature\\n\\t- item 1\\n\\t- item 2";
2018 let result = render_final_commit_message(input);
2019 assert_eq!(result, "feat: add feature\n- item 1\n- item 2");
2021 }
2022
2023 #[test]
2024 fn test_render_final_commit_message_with_carriage_returns() {
2025 let input = "feat: add feature\\r\\nBody text";
2027 let result = render_final_commit_message(input);
2028 assert_eq!(result, "feat: add feature\nBody text");
2030 }
2031
2032 #[test]
2033 fn test_render_final_commit_message_double_escaped() {
2034 let input = "feat: add feature\n\\\\n\\\\nDouble escaped";
2036 let result = render_final_commit_message(input);
2037 assert_eq!(result, "feat: add feature\n\\\n\\\nDouble escaped");
2040 }
2041
2042 #[test]
2043 fn test_render_final_commit_message_whitespace_cleanup() {
2044 let input = "feat: add feature\n\nBody text\n\n\n \n ";
2046 let result = render_final_commit_message(input);
2047 assert_eq!(result, "feat: add feature\nBody text");
2048 }
2049
2050 #[test]
2051 fn test_render_final_commit_message_mixed_escape_sequences() {
2052 let input = "feat: add feature\\n\\nDetails:\\r\\n\\t- item 1\\n\\t- item 2";
2054 let result = render_final_commit_message(input);
2055 assert_eq!(result, "feat: add feature\nDetails:\n- item 1\n- item 2");
2057 }
2058
2059 #[test]
2060 fn test_render_final_commit_message_trailing_whitespace_lines() {
2061 let input = "feat: add feature\n\\n\\n Body with spaces \\n \\n ";
2063 let result = render_final_commit_message(input);
2064 assert_eq!(result, "feat: add feature\nBody with spaces");
2066 }
2067
2068 #[test]
2073 fn test_try_extract_structured_commit_direct_json() {
2074 let json = r#"{"subject":"fix(commit): try simpler prompts after agent errors","body":"When all agents fail for a prompt variant, keep iterating through progressively simpler prompt strategies instead of aborting the retry loop."}"#;
2076 let result = try_extract_structured_commit_with_trace(json).0;
2077 assert!(result.is_some(), "Should extract commit from direct JSON");
2078 let msg = result.unwrap();
2079 assert!(msg.starts_with("fix(commit):"), "Should start with type");
2080 assert!(msg.contains("try simpler prompts after agent errors"));
2081 assert!(msg.contains("When all agents fail"));
2082 }
2083
2084 #[test]
2085 fn test_try_extract_structured_commit_json_no_body() {
2086 let json = r#"{"subject":"feat: add new feature"}"#;
2088 let result = try_extract_structured_commit_with_trace(json).0;
2089 assert!(result.is_some());
2090 assert_eq!(result.unwrap(), "feat: add new feature");
2091 }
2092
2093 #[test]
2094 fn test_try_extract_structured_commit_code_fence() {
2095 let content = r#"Here is the commit message:
2097```json
2098{"subject":"fix: resolve bug","body":"Details about the fix."}
2099```
2100"#;
2101 let result = try_extract_structured_commit_with_trace(content).0;
2102 assert!(result.is_some());
2103 let msg = result.unwrap();
2104 assert!(msg.starts_with("fix: resolve bug"));
2105 assert!(msg.contains("Details about the fix"));
2106 }
2107
2108 #[test]
2109 fn test_try_extract_structured_commit_with_preamble() {
2110 let content = r#"Based on the diff, here is my commit:
2112{"subject":"refactor: simplify logic","body":"Removed unnecessary complexity."}"#;
2113 let result = try_extract_structured_commit_with_trace(content).0;
2114 assert!(result.is_some());
2115 let msg = result.unwrap();
2116 assert!(msg.starts_with("refactor:"));
2117 }
2118
2119 #[test]
2120 fn test_try_extract_structured_commit_invalid_type() {
2121 let json = r#"{"subject":"invalid: not a real type","body":"Body"}"#;
2123 let result = try_extract_structured_commit_with_trace(json).0;
2124 assert!(result.is_none(), "Should reject invalid commit type");
2125 }
2126
2127 #[test]
2128 fn test_try_extract_structured_commit_from_ndjson() {
2129 let ndjson = r#"{"type":"stream_event","data":"..."}
2131{"type":"result","result":"{\"subject\":\"docs: update readme\",\"body\":\"Add usage examples.\"}"}
2132"#;
2133 let result = try_extract_structured_commit_with_trace(ndjson).0;
2134 assert!(result.is_some(), "Should extract from NDJSON result field");
2135 let msg = result.unwrap();
2136 assert!(msg.starts_with("docs: update readme"));
2137 }
2138
2139 #[test]
2140 fn test_try_extract_structured_commit_from_ndjson_with_markdown_fence() {
2141 let ndjson = r#"{"type":"stream_event","data":"..."}
2144{"type":"result","result":"The changes look clean. Now I'll generate the commit message:\n\n```json\n{\n \"subject\": \"refactor(review): pass diff directly to all review prompts\",\n \"body\": \"Previously, review prompts would tell agents to run git commands to\\nfetch the diff. This change:\\n\\n1. Fetches the diff once at the start of build_review_prompt\\n2. Passes it directly to all review prompt functions\"\n}\n```"}
2145"#;
2146 let result = try_extract_structured_commit_with_trace(ndjson).0;
2147 assert!(
2148 result.is_some(),
2149 "Should extract from NDJSON result field with markdown code fence"
2150 );
2151 let msg = result.unwrap();
2152 assert!(msg.starts_with("refactor(review):"));
2153 assert!(msg.contains("pass diff directly"));
2154 }
2155
2156 #[test]
2161 fn test_validate_commit_message_raw_json_structure() {
2162 let raw_json = r#"{"subject":"fix: something","body":"Details"}"#;
2164 let result = validate_commit_message(raw_json);
2165 assert!(result.is_err(), "Raw JSON should be rejected");
2166 assert!(
2167 result.unwrap_err().contains("JSON"),
2168 "Error should mention JSON"
2169 );
2170 }
2171
2172 #[test]
2173 fn test_validate_commit_message_json_with_subject_key() {
2174 let bad_msg = r#"{"subject":"feat: add feature","body":"Some body"}"#;
2176 let result = validate_commit_message(bad_msg);
2177 assert!(
2178 result.is_err(),
2179 "Commit message containing {{\"subject\":}} should be rejected"
2180 );
2181 }
2182
2183 #[test]
2188 fn test_xml_extract_basic_subject_only() {
2189 let content = r"<ralph-commit>
2191<ralph-subject>feat: add new feature</ralph-subject>
2192</ralph-commit>";
2193 let result = try_extract_xml_commit_with_trace(content).0;
2194 assert!(result.is_some(), "Should extract from basic XML");
2195 assert_eq!(result.unwrap(), "feat: add new feature");
2196 }
2197
2198 #[test]
2199 fn test_xml_extract_with_body() {
2200 let content = r"<ralph-commit>
2202<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
2203<ralph-body>Implement Google and GitHub OAuth providers.
2204Add session management for OAuth tokens.</ralph-body>
2205</ralph-commit>";
2206 let result = try_extract_xml_commit_with_trace(content).0;
2207 assert!(result.is_some(), "Should extract from XML with body");
2208 let msg = result.unwrap();
2209 assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
2210 assert!(msg.contains("Implement Google and GitHub OAuth providers"));
2211 assert!(msg.contains("Add session management"));
2212 }
2213
2214 #[test]
2215 fn test_xml_extract_with_empty_body() {
2216 let content = r"<ralph-commit>
2218<ralph-subject>fix: resolve bug</ralph-subject>
2219<ralph-body></ralph-body>
2220</ralph-commit>";
2221 let result = try_extract_xml_commit_with_trace(content).0;
2222 assert!(result.is_some(), "Should extract even with empty body");
2223 assert_eq!(result.unwrap(), "fix: resolve bug");
2225 }
2226
2227 #[test]
2228 fn test_xml_extract_ignores_preamble() {
2229 let content = r"Here is the commit message based on my analysis:
2231
2232Looking at the diff, I can see...
2233
2234<ralph-commit>
2235<ralph-subject>refactor: simplify logic</ralph-subject>
2236</ralph-commit>
2237
2238That's all!";
2239 let result = try_extract_xml_commit_with_trace(content).0;
2240 assert!(result.is_some(), "Should ignore preamble and extract XML");
2241 assert_eq!(result.unwrap(), "refactor: simplify logic");
2242 }
2243
2244 #[test]
2245 fn test_xml_extract_fails_missing_tags() {
2246 let content = "Just some text without XML tags";
2248 let result = try_extract_xml_commit_with_trace(content).0;
2249 assert!(result.is_none(), "Should fail when XML tags are missing");
2250 }
2251
2252 #[test]
2253 fn test_xml_extract_fails_invalid_commit_type() {
2254 let content = r"<ralph-commit>
2256<ralph-subject>invalid: not a real type</ralph-subject>
2257</ralph-commit>";
2258 let result = try_extract_xml_commit_with_trace(content).0;
2259 assert!(result.is_none(), "Should reject invalid commit type");
2260 }
2261
2262 #[test]
2263 fn test_xml_extract_fails_missing_subject() {
2264 let content = r"<ralph-commit>
2266<ralph-body>Just a body, no subject</ralph-body>
2267</ralph-commit>";
2268 let result = try_extract_xml_commit_with_trace(content).0;
2269 assert!(result.is_none(), "Should fail when subject is missing");
2270 }
2271
2272 #[test]
2273 fn test_xml_extract_fails_empty_subject() {
2274 let content = r"<ralph-commit>
2276<ralph-subject></ralph-subject>
2277</ralph-commit>";
2278 let result = try_extract_xml_commit_with_trace(content).0;
2279 assert!(result.is_none(), "Should fail when subject is empty");
2280 }
2281
2282 #[test]
2283 fn test_xml_extract_handles_whitespace_in_subject() {
2284 let content = r"<ralph-commit>
2286<ralph-subject> docs: update readme </ralph-subject>
2287</ralph-commit>";
2288 let result = try_extract_xml_commit_with_trace(content).0;
2289 assert!(result.is_some(), "Should handle whitespace in subject");
2290 assert_eq!(result.unwrap(), "docs: update readme");
2291 }
2292
2293 #[test]
2294 fn test_xml_extract_with_breaking_change() {
2295 let content = r"<ralph-commit>
2297<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
2298<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
2299</ralph-commit>";
2300 let result = try_extract_xml_commit_with_trace(content).0;
2301 assert!(result.is_some(), "Should handle breaking change indicator");
2302 let msg = result.unwrap();
2303 assert!(msg.starts_with("feat!:"));
2304 assert!(msg.contains("BREAKING CHANGE"));
2305 }
2306
2307 #[test]
2308 fn test_xml_extract_with_scope() {
2309 let content = r"<ralph-commit>
2311<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
2312</ralph-commit>";
2313 let result = try_extract_xml_commit_with_trace(content).0;
2314 assert!(result.is_some(), "Should handle scope in subject");
2315 assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
2316 }
2317
2318 #[test]
2319 fn test_xml_extract_body_preserves_newlines() {
2320 let content = r"<ralph-commit>
2322<ralph-subject>feat: add feature</ralph-subject>
2323<ralph-body>Line 1
2324Line 2
2325Line 3</ralph-body>
2326</ralph-commit>";
2327 let result = try_extract_xml_commit_with_trace(content).0;
2328 assert!(result.is_some(), "Should preserve newlines in body");
2329 let msg = result.unwrap();
2330 assert!(msg.contains("Line 1\nLine 2\nLine 3"));
2331 }
2332
2333 #[test]
2334 fn test_xml_extract_fails_malformed_tags() {
2335 let content = r"</ralph-commit>
2337<ralph-subject>feat: add feature</ralph-subject>
2338<ralph-commit>";
2339 let result = try_extract_xml_commit_with_trace(content).0;
2340 assert!(result.is_none(), "Should fail for malformed tags");
2341 }
2342
2343 #[test]
2344 fn test_xml_extract_handles_markdown_code_fence() {
2345 let content = r"```xml
2348<ralph-commit>
2349<ralph-subject>feat: add feature</ralph-subject>
2350</ralph-commit>
2351```";
2352 let result = try_extract_xml_commit_with_trace(content).0;
2355 assert!(
2356 result.is_some(),
2357 "Should extract from XML even inside code fence"
2358 );
2359 }
2360
2361 #[test]
2363 fn test_validate_accepts_template_placeholders() {
2364 let result = validate_commit_message("feat: substitute template placeholders in config");
2366 assert!(
2367 result.is_ok(),
2368 "Should accept 'template placeholders' as technical context"
2369 );
2370 }
2371
2372 #[test]
2373 fn test_validate_accepts_template_placeholder() {
2374 let result = validate_commit_message("fix: update template placeholder handling");
2376 assert!(
2377 result.is_ok(),
2378 "Should accept 'template placeholder' as technical context"
2379 );
2380 }
2381
2382 #[test]
2383 fn test_validate_accepts_placeholder_variable() {
2384 let result = validate_commit_message("refactor: rename placeholder variable in template");
2386 assert!(
2387 result.is_ok(),
2388 "Should accept 'placeholder variable' as technical context"
2389 );
2390 }
2391
2392 #[test]
2393 fn test_validate_accepts_placeholder_variables() {
2394 let result = validate_commit_message("docs: document placeholder variables usage");
2396 assert!(
2397 result.is_ok(),
2398 "Should accept 'placeholder variables' as technical context"
2399 );
2400 }
2401
2402 #[test]
2403 fn test_validate_accepts_placeholder_value() {
2404 let result = validate_commit_message("fix: set default placeholder value");
2406 assert!(
2407 result.is_ok(),
2408 "Should accept 'placeholder value' as technical context"
2409 );
2410 }
2411
2412 #[test]
2413 fn test_validate_accepts_placeholder_values() {
2414 let result = validate_commit_message("feat: add support for placeholder values");
2416 assert!(
2417 result.is_ok(),
2418 "Should accept 'placeholder values' as technical context"
2419 );
2420 }
2421
2422 #[test]
2423 fn test_validate_accepts_substitute_placeholder() {
2424 let result = validate_commit_message("fix: properly substitute placeholder in output");
2426 assert!(
2427 result.is_ok(),
2428 "Should accept 'substitute placeholder' as technical context"
2429 );
2430 }
2431
2432 #[test]
2433 fn test_validate_accepts_substituting_placeholder() {
2434 let result =
2436 validate_commit_message("refactor: add logic for substituting placeholder tokens");
2437 assert!(
2438 result.is_ok(),
2439 "Should accept 'substituting placeholder' as technical context"
2440 );
2441 }
2442
2443 #[test]
2444 fn test_validate_accepts_replace_placeholder() {
2445 let result = validate_commit_message("feat: replace placeholder with actual value");
2447 assert!(
2448 result.is_ok(),
2449 "Should accept 'replace placeholder' as technical context"
2450 );
2451 }
2452
2453 #[test]
2454 fn test_validate_rejects_actual_placeholder_filler() {
2455 let result = validate_commit_message("feat: [placeholder]");
2457 assert!(
2458 result.is_err(),
2459 "Should reject '[placeholder]' filler pattern"
2460 );
2461 }
2462
2463 #[test]
2464 fn test_validate_rejects_placeholder_for() {
2465 let result = validate_commit_message("feat: this is a placeholder for the real thing");
2467 assert!(
2468 result.is_err(),
2469 "Should reject 'placeholder for' as filler pattern"
2470 );
2471 }
2472
2473 #[test]
2474 fn test_validate_rejects_is_a_placeholder() {
2475 let result = validate_commit_message("feat: this text is a placeholder");
2477 assert!(
2478 result.is_err(),
2479 "Should reject 'is a placeholder' as filler pattern"
2480 );
2481 }
2482
2483 #[test]
2484 fn test_validate_rejects_bare_placeholder() {
2485 let result = validate_commit_message("feat: add placeholder support");
2487 assert!(
2488 result.is_err(),
2489 "Should reject bare 'placeholder' without technical context"
2490 );
2491 }
2492
2493 #[test]
2494 fn test_validate_accepts_remove_placeholder_from_ui() {
2495 let result = validate_commit_message("fix: remove placeholder from UI");
2497 assert!(
2498 result.is_ok(),
2499 "Should accept 'remove placeholder from UI' as it describes a real change to placeholder functionality"
2500 );
2501 }
2502
2503 #[test]
2504 fn test_validate_accepts_delete_placeholder() {
2505 let result = validate_commit_message("refactor: delete placeholder from login form");
2507 assert!(
2508 result.is_ok(),
2509 "Should accept 'delete placeholder' as it describes a real change"
2510 );
2511 }
2512
2513 #[test]
2514 fn test_validate_accepts_placeholder_at_beginning() {
2515 let result = validate_commit_message("placeholder attribute added to input field");
2517 assert!(
2518 result.is_ok(),
2519 "Should accept 'placeholder' at beginning with valid technical context"
2520 );
2521 }
2522
2523 #[test]
2524 fn test_validate_accepts_placeholder_at_end() {
2525 let result = validate_commit_message("fix: clear input placeholder");
2527 assert!(
2528 result.is_ok(),
2529 "Should accept 'placeholder' at end with valid action"
2530 );
2531 }
2532}