ralph_workflow/files/llm_output_extraction/
commit.rs1use super::cleaning::{final_escape_sequence_cleanup, unescape_json_strings_aggressive};
7use super::xml_extraction::extract_xml_commit;
8use super::xsd_validation::validate_xml_against_xsd;
9use crate::common::truncate_text;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct CommitExtractionResult(String);
16
17impl CommitExtractionResult {
18 pub fn new(message: String) -> Self {
20 Self(message)
21 }
22
23 pub fn into_message(self) -> String {
28 render_final_commit_message(&self.0)
29 }
30}
31
32pub fn try_extract_xml_commit_with_trace(content: &str) -> (Option<String>, String) {
74 let (xml_block, extraction_pattern) = match extract_xml_commit(content) {
79 Some(xml) => {
80 let pattern = if content.trim().starts_with("<ralph-commit>") {
82 "direct XML"
83 } else if content.contains("```xml") || content.contains("```\n<ralph-commit>") {
84 "markdown code fence"
85 } else if content.contains("{\"result\":") || content.contains("\"result\":") {
86 "JSON string"
87 } else {
88 "embedded search"
89 };
90 (xml, pattern)
91 }
92 None => {
93 (content.to_string(), "raw content (no XML tags found)")
96 }
97 };
98
99 let xsd_result = validate_xml_against_xsd(&xml_block);
101
102 let message = match xsd_result {
103 Ok(elements) => {
104 let body = elements.format_body();
106 if body.is_empty() {
107 elements.subject.clone()
108 } else {
109 format!("{}\n\n{}", elements.subject, body)
110 }
111 }
112 Err(e) => {
113 let error_msg = e.format_for_ai_retry();
115 return (None, format!("XSD validation failed: {}", error_msg));
116 }
117 };
118
119 let has_body = message.lines().count() > 1;
121
122 let message_preview = {
124 let escaped = message.replace('\n', "\\n");
125 truncate_text(&escaped, 83) };
127
128 (
129 Some(message.clone()),
130 format!(
131 "Found <ralph-commit> via {}, XSD validation passed, body={}, message: '{}'",
132 extraction_pattern,
133 if has_body { "present" } else { "absent" },
134 message_preview
135 ),
136 )
137}
138
139pub fn is_conventional_commit_subject(subject: &str) -> bool {
141 let valid_types = [
142 "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
143 ];
144
145 let Some(colon_pos) = subject.find(':') else {
147 return false;
148 };
149
150 let prefix = &subject[..colon_pos];
151
152 let type_end = prefix
154 .find('(')
155 .unwrap_or_else(|| prefix.find('!').unwrap_or(prefix.len()));
156 let commit_type = &prefix[..type_end];
157
158 valid_types.contains(&commit_type)
159}
160
161pub fn render_final_commit_message(message: &str) -> String {
180 let mut result = message.to_string();
181
182 result = final_escape_sequence_cleanup(&result);
185
186 if result.contains("\\n") || result.contains("\\t") || result.contains("\\r") {
188 result = unescape_json_strings_aggressive(&result);
189 }
190
191 result = result
193 .lines()
194 .map(str::trim)
195 .filter(|l| !l.is_empty())
196 .collect::<Vec<_>>()
197 .join("\n");
198
199 result
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
211 fn test_commit_extraction_result_into_message() {
212 let result = CommitExtractionResult::new("feat: add feature".to_string());
213 assert_eq!(result.into_message(), "feat: add feature");
214 }
215
216 #[test]
221 fn test_render_final_commit_message_with_literal_escapes() {
222 let input = "feat: add feature\n\\n\\nBody with literal escapes";
225 let result = render_final_commit_message(input);
226 assert_eq!(result, "feat: add feature\nBody with literal escapes");
227 }
228
229 #[test]
230 fn test_render_final_commit_message_already_clean() {
231 let input = "feat: add feature\n\nBody text here";
233 let result = render_final_commit_message(input);
234 assert_eq!(result, "feat: add feature\nBody text here");
235 }
236
237 #[test]
238 fn test_render_final_commit_message_with_tabs() {
239 let input = "feat: add feature\\n\\t- item 1\\n\\t- item 2";
241 let result = render_final_commit_message(input);
242 assert_eq!(result, "feat: add feature\n- item 1\n- item 2");
244 }
245
246 #[test]
247 fn test_render_final_commit_message_with_carriage_returns() {
248 let input = "feat: add feature\\r\\nBody text";
250 let result = render_final_commit_message(input);
251 assert_eq!(result, "feat: add feature\nBody text");
253 }
254
255 #[test]
256 fn test_render_final_commit_message_whitespace_cleanup() {
257 let input = "feat: add feature\n\nBody text\n\n\n \n ";
259 let result = render_final_commit_message(input);
260 assert_eq!(result, "feat: add feature\nBody text");
261 }
262
263 #[test]
264 fn test_render_final_commit_message_mixed_escape_sequences() {
265 let input = "feat: add feature\\n\\nDetails:\\r\\n\\t- item 1\\n\\t- item 2";
267 let result = render_final_commit_message(input);
268 assert_eq!(result, "feat: add feature\nDetails:\n- item 1\n- item 2");
270 }
271
272 #[test]
277 fn test_conventional_commit_subject_valid() {
278 assert!(is_conventional_commit_subject("feat: add feature"));
279 assert!(is_conventional_commit_subject("fix: resolve bug"));
280 assert!(is_conventional_commit_subject("docs: update readme"));
281 assert!(is_conventional_commit_subject(
282 "refactor(core): simplify logic"
283 ));
284 assert!(is_conventional_commit_subject("feat!: breaking change"));
285 assert!(is_conventional_commit_subject("fix(api)!: breaking fix"));
286 }
287
288 #[test]
289 fn test_conventional_commit_subject_invalid() {
290 assert!(!is_conventional_commit_subject("invalid: not a type"));
291 assert!(!is_conventional_commit_subject("no colon here"));
292 assert!(!is_conventional_commit_subject(""));
293 assert!(!is_conventional_commit_subject("Feature: capitalize"));
294 }
295
296 #[test]
301 fn test_xml_extract_basic_subject_only() {
302 let content = r"<ralph-commit>
304<ralph-subject>feat: add new feature</ralph-subject>
305</ralph-commit>";
306 let (result, reason) = try_extract_xml_commit_with_trace(content);
307 assert!(
308 result.is_some(),
309 "Should extract from basic XML. Reason: {}",
310 reason
311 );
312 assert_eq!(result.unwrap(), "feat: add new feature");
313 }
314
315 #[test]
316 fn test_xml_extract_with_body() {
317 let content = r"<ralph-commit>
319<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
320<ralph-body>Implement Google and GitHub OAuth providers.
321Add session management for OAuth tokens.</ralph-body>
322</ralph-commit>";
323 let result = try_extract_xml_commit_with_trace(content).0;
324 assert!(result.is_some(), "Should extract from XML with body");
325 let msg = result.unwrap();
326 assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
327 assert!(msg.contains("Implement Google and GitHub OAuth providers"));
328 assert!(msg.contains("Add session management"));
329 }
330
331 #[test]
332 fn test_xml_extract_with_empty_body() {
333 let content = r"<ralph-commit>
335<ralph-subject>fix: resolve bug</ralph-subject>
336<ralph-body></ralph-body>
337</ralph-commit>";
338 let result = try_extract_xml_commit_with_trace(content).0;
339 assert!(result.is_some(), "Should extract even with empty body");
340 assert_eq!(result.unwrap(), "fix: resolve bug");
342 }
343
344 #[test]
345 fn test_xml_extract_ignores_preamble() {
346 let content = r"Here is the commit message based on my analysis:
348
349Looking at the diff, I can see...
350
351<ralph-commit>
352<ralph-subject>refactor: simplify logic</ralph-subject>
353</ralph-commit>
354
355That's all!";
356 let result = try_extract_xml_commit_with_trace(content).0;
357 assert!(result.is_some(), "Should ignore preamble and extract XML");
358 assert_eq!(result.unwrap(), "refactor: simplify logic");
359 }
360
361 #[test]
362 fn test_xml_extract_fails_missing_tags() {
363 let content = "Just some text without XML tags";
365 let result = try_extract_xml_commit_with_trace(content).0;
366 assert!(result.is_none(), "Should fail when XML tags are missing");
367 }
368
369 #[test]
370 fn test_xml_extract_fails_invalid_commit_type() {
371 let content = r"<ralph-commit>
373<ralph-subject>invalid: not a real type</ralph-subject>
374</ralph-commit>";
375 let result = try_extract_xml_commit_with_trace(content).0;
376 assert!(result.is_none(), "Should reject invalid commit type");
377 }
378
379 #[test]
380 fn test_xml_extract_fails_missing_subject() {
381 let content = r"<ralph-commit>
383<ralph-body>Just a body, no subject</ralph-body>
384</ralph-commit>";
385 let result = try_extract_xml_commit_with_trace(content).0;
386 assert!(result.is_none(), "Should fail when subject is missing");
387 }
388
389 #[test]
390 fn test_xml_extract_fails_empty_subject() {
391 let content = r"<ralph-commit>
393<ralph-subject></ralph-subject>
394</ralph-commit>";
395 let result = try_extract_xml_commit_with_trace(content).0;
396 assert!(result.is_none(), "Should fail when subject is empty");
397 }
398
399 #[test]
400 fn test_xml_extract_handles_whitespace_in_subject() {
401 let content = r"<ralph-commit>
403<ralph-subject> docs: update readme </ralph-subject>
404</ralph-commit>";
405 let result = try_extract_xml_commit_with_trace(content).0;
406 assert!(result.is_some(), "Should handle whitespace in subject");
407 assert_eq!(result.unwrap(), "docs: update readme");
408 }
409
410 #[test]
411 fn test_xml_extract_with_breaking_change() {
412 let content = r"<ralph-commit>
414<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
415<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
416</ralph-commit>";
417 let result = try_extract_xml_commit_with_trace(content).0;
418 assert!(result.is_some(), "Should handle breaking change indicator");
419 let msg = result.unwrap();
420 assert!(msg.starts_with("feat!:"));
421 assert!(msg.contains("BREAKING CHANGE"));
422 }
423
424 #[test]
425 fn test_xml_extract_with_scope() {
426 let content = r"<ralph-commit>
428<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
429</ralph-commit>";
430 let result = try_extract_xml_commit_with_trace(content).0;
431 assert!(result.is_some(), "Should handle scope in subject");
432 assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
433 }
434
435 #[test]
436 fn test_xml_extract_body_preserves_newlines() {
437 let content = r"<ralph-commit>
439<ralph-subject>feat: add feature</ralph-subject>
440<ralph-body>Line 1
441Line 2
442Line 3</ralph-body>
443</ralph-commit>";
444 let result = try_extract_xml_commit_with_trace(content).0;
445 assert!(result.is_some(), "Should preserve newlines in body");
446 let msg = result.unwrap();
447 assert!(msg.contains("Line 1\nLine 2\nLine 3"));
448 }
449
450 #[test]
451 fn test_xml_extract_fails_malformed_tags() {
452 let content = r"</ralph-commit>
454<ralph-subject>feat: add feature</ralph-subject>
455<ralph-commit>";
456 let result = try_extract_xml_commit_with_trace(content).0;
457 assert!(result.is_none(), "Should fail for malformed tags");
458 }
459
460 #[test]
461 fn test_xml_extract_handles_markdown_code_fence() {
462 let content = r"```xml
464<ralph-commit>
465<ralph-subject>feat: add feature</ralph-subject>
466</ralph-commit>
467```";
468 let result = try_extract_xml_commit_with_trace(content).0;
471 assert!(
472 result.is_some(),
473 "Should extract from XML even inside code fence"
474 );
475 }
476
477 #[test]
478 fn test_xml_extract_with_thinking_preamble() {
479 let log_content = r"[Claude] Thinking: Looking at this diff, I need to analyze...
481
482<ralph-commit>
483<ralph-subject>feat(pipeline): add recovery mechanism</ralph-subject>
484<ralph-body>When commit validation fails, attempt to salvage valid message.</ralph-body>
485</ralph-commit>";
486
487 let (result, _reason) = try_extract_xml_commit_with_trace(log_content);
488 assert!(result.is_some());
489 let msg = result.unwrap();
490 assert!(msg.starts_with("feat(pipeline):"));
491 }
492
493 #[test]
495 fn test_xsd_validation_integrated_in_extraction() {
496 let xml = r#"Some text before
499<ralph-commit>
500<ralph-subject>fix: resolve bug</ralph-subject>
501</ralph-commit>
502Some text after"#;
503 let (msg, trace) = try_extract_xml_commit_with_trace(xml);
504 assert!(msg.is_some(), "Should extract valid message");
505 assert!(trace.contains("XSD"), "Trace should mention XSD validation");
507 }
508}