ralph_workflow/files/llm_output_extraction/
commit.rs1use super::cleaning::{final_escape_sequence_cleanup, unescape_json_strings_aggressive};
7use super::xml_extraction::extract_xml_commit;
8use super::xsd_validation::validate_xml_against_xsd;
9use crate::common::truncate_text;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct CommitExtractionResult(String);
16
17impl CommitExtractionResult {
18 pub fn new(message: String) -> Self {
20 Self(message)
21 }
22
23 pub fn into_message(self) -> String {
28 render_final_commit_message(&self.0)
29 }
30}
31
32pub fn try_extract_xml_commit_with_trace(content: &str) -> (Option<String>, String) {
37 let (xml_block, extraction_pattern) = match extract_xml_commit(content) {
42 Some(xml) => {
43 let pattern = if content.trim().starts_with("<ralph-commit>") {
45 "direct XML"
46 } else if content.contains("```xml") || content.contains("```\n<ralph-commit>") {
47 "markdown code fence"
48 } else if content.contains("{\"result\":") || content.contains("\"result\":") {
49 "JSON string"
50 } else {
51 "embedded search"
52 };
53 (xml, pattern)
54 }
55 None => {
56 (content.to_string(), "raw content (no XML tags found)")
59 }
60 };
61
62 let xsd_result = validate_xml_against_xsd(&xml_block);
64
65 let message = match xsd_result {
66 Ok(elements) => {
67 let body = elements.format_body();
69 if body.is_empty() {
70 elements.subject.clone()
71 } else {
72 format!("{}\n\n{}", elements.subject, body)
73 }
74 }
75 Err(e) => {
76 let error_msg = e.format_for_ai_retry();
78 return (None, format!("XSD validation failed: {}", error_msg));
79 }
80 };
81
82 let has_body = message.lines().count() > 1;
84
85 let message_preview = {
87 let escaped = message.replace('\n', "\\n");
88 truncate_text(&escaped, 83) };
90
91 (
92 Some(message.clone()),
93 format!(
94 "Found <ralph-commit> via {}, XSD validation passed, body={}, message: '{}'",
95 extraction_pattern,
96 if has_body { "present" } else { "absent" },
97 message_preview
98 ),
99 )
100}
101
102pub fn is_conventional_commit_subject(subject: &str) -> bool {
104 let valid_types = [
105 "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
106 ];
107
108 let Some(colon_pos) = subject.find(':') else {
110 return false;
111 };
112
113 let prefix = &subject[..colon_pos];
114
115 let type_end = prefix
117 .find('(')
118 .unwrap_or_else(|| prefix.find('!').unwrap_or(prefix.len()));
119 let commit_type = &prefix[..type_end];
120
121 valid_types.contains(&commit_type)
122}
123
124pub fn render_final_commit_message(message: &str) -> String {
143 let mut result = message.to_string();
144
145 result = final_escape_sequence_cleanup(&result);
148
149 if result.contains("\\n") || result.contains("\\t") || result.contains("\\r") {
151 result = unescape_json_strings_aggressive(&result);
152 }
153
154 result = result
156 .lines()
157 .map(str::trim)
158 .filter(|l| !l.is_empty())
159 .collect::<Vec<_>>()
160 .join("\n");
161
162 result
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 #[test]
174 fn test_commit_extraction_result_into_message() {
175 let result = CommitExtractionResult::new("feat: add feature".to_string());
176 assert_eq!(result.into_message(), "feat: add feature");
177 }
178
179 #[test]
184 fn test_render_final_commit_message_with_literal_escapes() {
185 let input = "feat: add feature\n\\n\\nBody with literal escapes";
188 let result = render_final_commit_message(input);
189 assert_eq!(result, "feat: add feature\nBody with literal escapes");
190 }
191
192 #[test]
193 fn test_render_final_commit_message_already_clean() {
194 let input = "feat: add feature\n\nBody text here";
196 let result = render_final_commit_message(input);
197 assert_eq!(result, "feat: add feature\nBody text here");
198 }
199
200 #[test]
201 fn test_render_final_commit_message_with_tabs() {
202 let input = "feat: add feature\\n\\t- item 1\\n\\t- item 2";
204 let result = render_final_commit_message(input);
205 assert_eq!(result, "feat: add feature\n- item 1\n- item 2");
207 }
208
209 #[test]
210 fn test_render_final_commit_message_with_carriage_returns() {
211 let input = "feat: add feature\\r\\nBody text";
213 let result = render_final_commit_message(input);
214 assert_eq!(result, "feat: add feature\nBody text");
216 }
217
218 #[test]
219 fn test_render_final_commit_message_whitespace_cleanup() {
220 let input = "feat: add feature\n\nBody text\n\n\n \n ";
222 let result = render_final_commit_message(input);
223 assert_eq!(result, "feat: add feature\nBody text");
224 }
225
226 #[test]
227 fn test_render_final_commit_message_mixed_escape_sequences() {
228 let input = "feat: add feature\\n\\nDetails:\\r\\n\\t- item 1\\n\\t- item 2";
230 let result = render_final_commit_message(input);
231 assert_eq!(result, "feat: add feature\nDetails:\n- item 1\n- item 2");
233 }
234
235 #[test]
240 fn test_conventional_commit_subject_valid() {
241 assert!(is_conventional_commit_subject("feat: add feature"));
242 assert!(is_conventional_commit_subject("fix: resolve bug"));
243 assert!(is_conventional_commit_subject("docs: update readme"));
244 assert!(is_conventional_commit_subject(
245 "refactor(core): simplify logic"
246 ));
247 assert!(is_conventional_commit_subject("feat!: breaking change"));
248 assert!(is_conventional_commit_subject("fix(api)!: breaking fix"));
249 }
250
251 #[test]
252 fn test_conventional_commit_subject_invalid() {
253 assert!(!is_conventional_commit_subject("invalid: not a type"));
254 assert!(!is_conventional_commit_subject("no colon here"));
255 assert!(!is_conventional_commit_subject(""));
256 assert!(!is_conventional_commit_subject("Feature: capitalize"));
257 }
258
259 #[test]
264 fn test_xml_extract_basic_subject_only() {
265 let content = r"<ralph-commit>
267<ralph-subject>feat: add new feature</ralph-subject>
268</ralph-commit>";
269 let (result, reason) = try_extract_xml_commit_with_trace(content);
270 assert!(
271 result.is_some(),
272 "Should extract from basic XML. Reason: {}",
273 reason
274 );
275 assert_eq!(result.unwrap(), "feat: add new feature");
276 }
277
278 #[test]
279 fn test_xml_extract_with_body() {
280 let content = r"<ralph-commit>
282<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
283<ralph-body>Implement Google and GitHub OAuth providers.
284Add session management for OAuth tokens.</ralph-body>
285</ralph-commit>";
286 let result = try_extract_xml_commit_with_trace(content).0;
287 assert!(result.is_some(), "Should extract from XML with body");
288 let msg = result.unwrap();
289 assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
290 assert!(msg.contains("Implement Google and GitHub OAuth providers"));
291 assert!(msg.contains("Add session management"));
292 }
293
294 #[test]
295 fn test_xml_extract_with_empty_body() {
296 let content = r"<ralph-commit>
298<ralph-subject>fix: resolve bug</ralph-subject>
299<ralph-body></ralph-body>
300</ralph-commit>";
301 let result = try_extract_xml_commit_with_trace(content).0;
302 assert!(result.is_some(), "Should extract even with empty body");
303 assert_eq!(result.unwrap(), "fix: resolve bug");
305 }
306
307 #[test]
308 fn test_xml_extract_ignores_preamble() {
309 let content = r"Here is the commit message based on my analysis:
311
312Looking at the diff, I can see...
313
314<ralph-commit>
315<ralph-subject>refactor: simplify logic</ralph-subject>
316</ralph-commit>
317
318That's all!";
319 let result = try_extract_xml_commit_with_trace(content).0;
320 assert!(result.is_some(), "Should ignore preamble and extract XML");
321 assert_eq!(result.unwrap(), "refactor: simplify logic");
322 }
323
324 #[test]
325 fn test_xml_extract_fails_missing_tags() {
326 let content = "Just some text without XML tags";
328 let result = try_extract_xml_commit_with_trace(content).0;
329 assert!(result.is_none(), "Should fail when XML tags are missing");
330 }
331
332 #[test]
333 fn test_xml_extract_fails_invalid_commit_type() {
334 let content = r"<ralph-commit>
336<ralph-subject>invalid: not a real type</ralph-subject>
337</ralph-commit>";
338 let result = try_extract_xml_commit_with_trace(content).0;
339 assert!(result.is_none(), "Should reject invalid commit type");
340 }
341
342 #[test]
343 fn test_xml_extract_fails_missing_subject() {
344 let content = r"<ralph-commit>
346<ralph-body>Just a body, no subject</ralph-body>
347</ralph-commit>";
348 let result = try_extract_xml_commit_with_trace(content).0;
349 assert!(result.is_none(), "Should fail when subject is missing");
350 }
351
352 #[test]
353 fn test_xml_extract_fails_empty_subject() {
354 let content = r"<ralph-commit>
356<ralph-subject></ralph-subject>
357</ralph-commit>";
358 let result = try_extract_xml_commit_with_trace(content).0;
359 assert!(result.is_none(), "Should fail when subject is empty");
360 }
361
362 #[test]
363 fn test_xml_extract_handles_whitespace_in_subject() {
364 let content = r"<ralph-commit>
366<ralph-subject> docs: update readme </ralph-subject>
367</ralph-commit>";
368 let result = try_extract_xml_commit_with_trace(content).0;
369 assert!(result.is_some(), "Should handle whitespace in subject");
370 assert_eq!(result.unwrap(), "docs: update readme");
371 }
372
373 #[test]
374 fn test_xml_extract_with_breaking_change() {
375 let content = r"<ralph-commit>
377<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
378<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
379</ralph-commit>";
380 let result = try_extract_xml_commit_with_trace(content).0;
381 assert!(result.is_some(), "Should handle breaking change indicator");
382 let msg = result.unwrap();
383 assert!(msg.starts_with("feat!:"));
384 assert!(msg.contains("BREAKING CHANGE"));
385 }
386
387 #[test]
388 fn test_xml_extract_with_scope() {
389 let content = r"<ralph-commit>
391<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
392</ralph-commit>";
393 let result = try_extract_xml_commit_with_trace(content).0;
394 assert!(result.is_some(), "Should handle scope in subject");
395 assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
396 }
397
398 #[test]
399 fn test_xml_extract_body_preserves_newlines() {
400 let content = r"<ralph-commit>
402<ralph-subject>feat: add feature</ralph-subject>
403<ralph-body>Line 1
404Line 2
405Line 3</ralph-body>
406</ralph-commit>";
407 let result = try_extract_xml_commit_with_trace(content).0;
408 assert!(result.is_some(), "Should preserve newlines in body");
409 let msg = result.unwrap();
410 assert!(msg.contains("Line 1\nLine 2\nLine 3"));
411 }
412
413 #[test]
414 fn test_xml_extract_fails_malformed_tags() {
415 let content = r"</ralph-commit>
417<ralph-subject>feat: add feature</ralph-subject>
418<ralph-commit>";
419 let result = try_extract_xml_commit_with_trace(content).0;
420 assert!(result.is_none(), "Should fail for malformed tags");
421 }
422
423 #[test]
424 fn test_xml_extract_handles_markdown_code_fence() {
425 let content = r"```xml
427<ralph-commit>
428<ralph-subject>feat: add feature</ralph-subject>
429</ralph-commit>
430```";
431 let result = try_extract_xml_commit_with_trace(content).0;
434 assert!(
435 result.is_some(),
436 "Should extract from XML even inside code fence"
437 );
438 }
439
440 #[test]
441 fn test_xml_extract_with_thinking_preamble() {
442 let log_content = r"[Claude] Thinking: Looking at this diff, I need to analyze...
444
445<ralph-commit>
446<ralph-subject>feat(pipeline): add recovery mechanism</ralph-subject>
447<ralph-body>When commit validation fails, attempt to salvage valid message.</ralph-body>
448</ralph-commit>";
449
450 let (result, _reason) = try_extract_xml_commit_with_trace(log_content);
451 assert!(result.is_some());
452 let msg = result.unwrap();
453 assert!(msg.starts_with("feat(pipeline):"));
454 }
455
456 #[test]
458 fn test_xsd_validation_integrated_in_extraction() {
459 let xml = r#"Some text before
462<ralph-commit>
463<ralph-subject>fix: resolve bug</ralph-subject>
464</ralph-commit>
465Some text after"#;
466 let (msg, trace) = try_extract_xml_commit_with_trace(xml);
467 assert!(msg.is_some(), "Should extract valid message");
468 assert!(trace.contains("XSD"), "Trace should mention XSD validation");
470 }
471}