ralph_workflow/files/llm_output_extraction/
commit.rs1use super::cleaning::{final_escape_sequence_cleanup, unescape_json_strings_aggressive};
7use super::xml_extraction::extract_xml_commit;
8use super::xsd_validation::validate_xml_against_xsd;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct CommitExtractionResult(String);
15
16impl CommitExtractionResult {
17 pub fn new(message: String) -> Self {
19 Self(message)
20 }
21
22 pub fn into_message(self) -> String {
27 render_final_commit_message(&self.0)
28 }
29}
30
31pub fn try_extract_xml_commit_with_trace(content: &str) -> (Option<String>, String) {
73 let (xml_block, extraction_pattern) = match extract_xml_commit(content) {
78 Some(xml) => {
79 let pattern = if content.trim().starts_with("<ralph-commit>") {
81 "direct XML"
82 } else if content.contains("```xml") || content.contains("```\n<ralph-commit>") {
83 "markdown code fence"
84 } else if content.contains("{\"result\":") || content.contains("\"result\":") {
85 "JSON string"
86 } else {
87 "embedded search"
88 };
89 (xml, pattern)
90 }
91 None => {
92 (content.to_string(), "raw content (no XML tags found)")
95 }
96 };
97
98 let xsd_result = validate_xml_against_xsd(&xml_block);
100
101 let message = match xsd_result {
102 Ok(elements) => {
103 let body = elements.format_body();
105 if body.is_empty() {
106 elements.subject.clone()
107 } else {
108 format!("{}\n\n{}", elements.subject, body)
109 }
110 }
111 Err(e) => {
112 let error_msg = e.format_for_ai_retry();
114 return (None, format!("XSD validation failed: {}", error_msg));
115 }
116 };
117
118 let has_body = message.lines().count() > 1;
120
121 (
122 Some(message.clone()),
123 format!(
124 "Found <ralph-commit> via {}, XSD validation passed, body={}, message: '{}'",
125 extraction_pattern,
126 if has_body { "present" } else { "absent" },
127 if message.len() > 80 {
128 format!("{}...", &message[..80].replace('\n', "\\n"))
129 } else {
130 message.replace('\n', "\\n")
131 }
132 ),
133 )
134}
135
136pub fn is_conventional_commit_subject(subject: &str) -> bool {
138 let valid_types = [
139 "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
140 ];
141
142 let Some(colon_pos) = subject.find(':') else {
144 return false;
145 };
146
147 let prefix = &subject[..colon_pos];
148
149 let type_end = prefix
151 .find('(')
152 .unwrap_or_else(|| prefix.find('!').unwrap_or(prefix.len()));
153 let commit_type = &prefix[..type_end];
154
155 valid_types.contains(&commit_type)
156}
157
158pub fn render_final_commit_message(message: &str) -> String {
177 let mut result = message.to_string();
178
179 result = final_escape_sequence_cleanup(&result);
182
183 if result.contains("\\n") || result.contains("\\t") || result.contains("\\r") {
185 result = unescape_json_strings_aggressive(&result);
186 }
187
188 result = result
190 .lines()
191 .map(str::trim)
192 .filter(|l| !l.is_empty())
193 .collect::<Vec<_>>()
194 .join("\n");
195
196 result
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 #[test]
208 fn test_commit_extraction_result_into_message() {
209 let result = CommitExtractionResult::new("feat: add feature".to_string());
210 assert_eq!(result.into_message(), "feat: add feature");
211 }
212
213 #[test]
218 fn test_render_final_commit_message_with_literal_escapes() {
219 let input = "feat: add feature\n\\n\\nBody with literal escapes";
222 let result = render_final_commit_message(input);
223 assert_eq!(result, "feat: add feature\nBody with literal escapes");
224 }
225
226 #[test]
227 fn test_render_final_commit_message_already_clean() {
228 let input = "feat: add feature\n\nBody text here";
230 let result = render_final_commit_message(input);
231 assert_eq!(result, "feat: add feature\nBody text here");
232 }
233
234 #[test]
235 fn test_render_final_commit_message_with_tabs() {
236 let input = "feat: add feature\\n\\t- item 1\\n\\t- item 2";
238 let result = render_final_commit_message(input);
239 assert_eq!(result, "feat: add feature\n- item 1\n- item 2");
241 }
242
243 #[test]
244 fn test_render_final_commit_message_with_carriage_returns() {
245 let input = "feat: add feature\\r\\nBody text";
247 let result = render_final_commit_message(input);
248 assert_eq!(result, "feat: add feature\nBody text");
250 }
251
252 #[test]
253 fn test_render_final_commit_message_whitespace_cleanup() {
254 let input = "feat: add feature\n\nBody text\n\n\n \n ";
256 let result = render_final_commit_message(input);
257 assert_eq!(result, "feat: add feature\nBody text");
258 }
259
260 #[test]
261 fn test_render_final_commit_message_mixed_escape_sequences() {
262 let input = "feat: add feature\\n\\nDetails:\\r\\n\\t- item 1\\n\\t- item 2";
264 let result = render_final_commit_message(input);
265 assert_eq!(result, "feat: add feature\nDetails:\n- item 1\n- item 2");
267 }
268
269 #[test]
274 fn test_conventional_commit_subject_valid() {
275 assert!(is_conventional_commit_subject("feat: add feature"));
276 assert!(is_conventional_commit_subject("fix: resolve bug"));
277 assert!(is_conventional_commit_subject("docs: update readme"));
278 assert!(is_conventional_commit_subject(
279 "refactor(core): simplify logic"
280 ));
281 assert!(is_conventional_commit_subject("feat!: breaking change"));
282 assert!(is_conventional_commit_subject("fix(api)!: breaking fix"));
283 }
284
285 #[test]
286 fn test_conventional_commit_subject_invalid() {
287 assert!(!is_conventional_commit_subject("invalid: not a type"));
288 assert!(!is_conventional_commit_subject("no colon here"));
289 assert!(!is_conventional_commit_subject(""));
290 assert!(!is_conventional_commit_subject("Feature: capitalize"));
291 }
292
293 #[test]
298 fn test_xml_extract_basic_subject_only() {
299 let content = r"<ralph-commit>
301<ralph-subject>feat: add new feature</ralph-subject>
302</ralph-commit>";
303 let (result, reason) = try_extract_xml_commit_with_trace(content);
304 assert!(
305 result.is_some(),
306 "Should extract from basic XML. Reason: {}",
307 reason
308 );
309 assert_eq!(result.unwrap(), "feat: add new feature");
310 }
311
312 #[test]
313 fn test_xml_extract_with_body() {
314 let content = r"<ralph-commit>
316<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
317<ralph-body>Implement Google and GitHub OAuth providers.
318Add session management for OAuth tokens.</ralph-body>
319</ralph-commit>";
320 let result = try_extract_xml_commit_with_trace(content).0;
321 assert!(result.is_some(), "Should extract from XML with body");
322 let msg = result.unwrap();
323 assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
324 assert!(msg.contains("Implement Google and GitHub OAuth providers"));
325 assert!(msg.contains("Add session management"));
326 }
327
328 #[test]
329 fn test_xml_extract_with_empty_body() {
330 let content = r"<ralph-commit>
332<ralph-subject>fix: resolve bug</ralph-subject>
333<ralph-body></ralph-body>
334</ralph-commit>";
335 let result = try_extract_xml_commit_with_trace(content).0;
336 assert!(result.is_some(), "Should extract even with empty body");
337 assert_eq!(result.unwrap(), "fix: resolve bug");
339 }
340
341 #[test]
342 fn test_xml_extract_ignores_preamble() {
343 let content = r"Here is the commit message based on my analysis:
345
346Looking at the diff, I can see...
347
348<ralph-commit>
349<ralph-subject>refactor: simplify logic</ralph-subject>
350</ralph-commit>
351
352That's all!";
353 let result = try_extract_xml_commit_with_trace(content).0;
354 assert!(result.is_some(), "Should ignore preamble and extract XML");
355 assert_eq!(result.unwrap(), "refactor: simplify logic");
356 }
357
358 #[test]
359 fn test_xml_extract_fails_missing_tags() {
360 let content = "Just some text without XML tags";
362 let result = try_extract_xml_commit_with_trace(content).0;
363 assert!(result.is_none(), "Should fail when XML tags are missing");
364 }
365
366 #[test]
367 fn test_xml_extract_fails_invalid_commit_type() {
368 let content = r"<ralph-commit>
370<ralph-subject>invalid: not a real type</ralph-subject>
371</ralph-commit>";
372 let result = try_extract_xml_commit_with_trace(content).0;
373 assert!(result.is_none(), "Should reject invalid commit type");
374 }
375
376 #[test]
377 fn test_xml_extract_fails_missing_subject() {
378 let content = r"<ralph-commit>
380<ralph-body>Just a body, no subject</ralph-body>
381</ralph-commit>";
382 let result = try_extract_xml_commit_with_trace(content).0;
383 assert!(result.is_none(), "Should fail when subject is missing");
384 }
385
386 #[test]
387 fn test_xml_extract_fails_empty_subject() {
388 let content = r"<ralph-commit>
390<ralph-subject></ralph-subject>
391</ralph-commit>";
392 let result = try_extract_xml_commit_with_trace(content).0;
393 assert!(result.is_none(), "Should fail when subject is empty");
394 }
395
396 #[test]
397 fn test_xml_extract_handles_whitespace_in_subject() {
398 let content = r"<ralph-commit>
400<ralph-subject> docs: update readme </ralph-subject>
401</ralph-commit>";
402 let result = try_extract_xml_commit_with_trace(content).0;
403 assert!(result.is_some(), "Should handle whitespace in subject");
404 assert_eq!(result.unwrap(), "docs: update readme");
405 }
406
407 #[test]
408 fn test_xml_extract_with_breaking_change() {
409 let content = r"<ralph-commit>
411<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
412<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
413</ralph-commit>";
414 let result = try_extract_xml_commit_with_trace(content).0;
415 assert!(result.is_some(), "Should handle breaking change indicator");
416 let msg = result.unwrap();
417 assert!(msg.starts_with("feat!:"));
418 assert!(msg.contains("BREAKING CHANGE"));
419 }
420
421 #[test]
422 fn test_xml_extract_with_scope() {
423 let content = r"<ralph-commit>
425<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
426</ralph-commit>";
427 let result = try_extract_xml_commit_with_trace(content).0;
428 assert!(result.is_some(), "Should handle scope in subject");
429 assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
430 }
431
432 #[test]
433 fn test_xml_extract_body_preserves_newlines() {
434 let content = r"<ralph-commit>
436<ralph-subject>feat: add feature</ralph-subject>
437<ralph-body>Line 1
438Line 2
439Line 3</ralph-body>
440</ralph-commit>";
441 let result = try_extract_xml_commit_with_trace(content).0;
442 assert!(result.is_some(), "Should preserve newlines in body");
443 let msg = result.unwrap();
444 assert!(msg.contains("Line 1\nLine 2\nLine 3"));
445 }
446
447 #[test]
448 fn test_xml_extract_fails_malformed_tags() {
449 let content = r"</ralph-commit>
451<ralph-subject>feat: add feature</ralph-subject>
452<ralph-commit>";
453 let result = try_extract_xml_commit_with_trace(content).0;
454 assert!(result.is_none(), "Should fail for malformed tags");
455 }
456
457 #[test]
458 fn test_xml_extract_handles_markdown_code_fence() {
459 let content = r"```xml
461<ralph-commit>
462<ralph-subject>feat: add feature</ralph-subject>
463</ralph-commit>
464```";
465 let result = try_extract_xml_commit_with_trace(content).0;
468 assert!(
469 result.is_some(),
470 "Should extract from XML even inside code fence"
471 );
472 }
473
474 #[test]
475 fn test_xml_extract_with_thinking_preamble() {
476 let log_content = r"[Claude] Thinking: Looking at this diff, I need to analyze...
478
479<ralph-commit>
480<ralph-subject>feat(pipeline): add recovery mechanism</ralph-subject>
481<ralph-body>When commit validation fails, attempt to salvage valid message.</ralph-body>
482</ralph-commit>";
483
484 let (result, _reason) = try_extract_xml_commit_with_trace(log_content);
485 assert!(result.is_some());
486 let msg = result.unwrap();
487 assert!(msg.starts_with("feat(pipeline):"));
488 }
489
490 #[test]
492 fn test_xsd_validation_integrated_in_extraction() {
493 let xml = r#"Some text before
496<ralph-commit>
497<ralph-subject>fix: resolve bug</ralph-subject>
498</ralph-commit>
499Some text after"#;
500 let (msg, trace) = try_extract_xml_commit_with_trace(xml);
501 assert!(msg.is_some(), "Should extract valid message");
502 assert!(trace.contains("XSD"), "Trace should mention XSD validation");
504 }
505}