ralph_workflow/files/llm_output_extraction/commit/
extraction.rs1use super::rendering::render_final_commit_message;
4use crate::common::truncate_text;
5use crate::files::llm_output_extraction::xsd_validation::validate_xml_against_xsd;
6use crate::reducer::state::pipeline::ExcludedFile;
7
8fn extract_commit_message(
9 elements: crate::files::llm_output_extraction::xsd_validation::CommitMessageElements,
10 extraction_pattern: &str,
11) -> (
12 Option<String>,
13 Option<String>,
14 Vec<String>,
15 Vec<ExcludedFile>,
16 String,
17) {
18 if let Some(reason) = elements.skip_reason {
19 return (
20 None,
21 Some(reason.clone()),
22 vec![],
23 vec![],
24 format!("Found <ralph-skip> via {extraction_pattern}, reason: '{reason}'"),
25 );
26 }
27
28 let files = elements.files.clone();
29 let excluded_files = elements.excluded_files.clone();
30 let body = elements.format_body();
31 let message = if body.is_empty() {
32 elements.subject
33 } else {
34 format!("{}\n\n{}", elements.subject, body)
35 };
36
37 let has_body = message.lines().count() > 1;
38 let message_preview = {
39 let escaped = message.replace('\n', "\\n");
40 truncate_text(&escaped, 83)
41 };
42 let files_note = if files.is_empty() {
43 String::new()
44 } else {
45 format!(", files={}", files.len())
46 };
47 let excluded_note = if excluded_files.is_empty() {
48 String::new()
49 } else {
50 format!(", excluded={}", excluded_files.len())
51 };
52
53 (
54 Some(message),
55 None,
56 files,
57 excluded_files,
58 format!(
59 "Found <ralph-commit> via {}, XSD validation passed, body={}{}{}, message: '{}'",
60 extraction_pattern,
61 if has_body { "present" } else { "absent" },
62 files_note,
63 excluded_note,
64 message_preview
65 ),
66 )
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct CommitExtractionResult(String);
72
73impl CommitExtractionResult {
74 #[must_use]
75 pub const fn new(message: String) -> Self {
76 Self(message)
77 }
78
79 #[must_use]
80 pub fn into_message(self) -> String {
81 render_final_commit_message(&self.0)
82 }
83}
84
85#[must_use]
87pub fn try_extract_xml_commit_document_with_trace(
88 content: &str,
89) -> (
90 Option<String>,
91 Option<String>,
92 Vec<String>,
93 Vec<ExcludedFile>,
94 String,
95) {
96 match validate_xml_against_xsd(content.trim()) {
97 Ok(elements) => extract_commit_message(elements, "strict XML document"),
98 Err(e) => {
99 let error_msg = e.format_for_ai_retry();
100 (
101 None,
102 None,
103 vec![],
104 vec![],
105 format!("XSD validation failed: {error_msg}"),
106 )
107 }
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114
115 #[test]
116 fn test_commit_extraction_result_into_message() {
117 let result = CommitExtractionResult::new("feat: add feature".to_string());
118 assert_eq!(result.into_message(), "feat: add feature");
119 }
120
121 #[test]
122 fn test_xml_extract_basic_subject_only() {
123 let content = r"<ralph-commit>
124<ralph-subject>feat: add new feature</ralph-subject>
125</ralph-commit>";
126 let (result, skip, _files, _excluded, reason) =
127 try_extract_xml_commit_document_with_trace(content);
128 assert!(
129 result.is_some(),
130 "Should extract from basic XML. Reason: {reason}"
131 );
132 assert!(skip.is_none());
133 assert_eq!(result.unwrap(), "feat: add new feature");
134 }
135
136 #[test]
137 fn test_xml_extract_with_body() {
138 let content = r"<ralph-commit>
139<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
140<ralph-body>Implement Google and GitHub OAuth providers.
141Add session management for OAuth tokens.</ralph-body>
142</ralph-commit>";
143 let (result, skip, _files, _excluded, _) =
144 try_extract_xml_commit_document_with_trace(content);
145 assert!(result.is_some(), "Should extract from XML with body");
146 assert!(skip.is_none());
147 let msg = result.unwrap();
148 assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
149 assert!(msg.contains("Implement Google and GitHub OAuth providers"));
150 assert!(msg.contains("Add session management"));
151 }
152
153 #[test]
154 fn test_xml_extract_with_empty_body() {
155 let content = r"<ralph-commit>
156<ralph-subject>fix: resolve bug</ralph-subject>
157<ralph-body></ralph-body>
158</ralph-commit>";
159 let (result, skip, _files, _excluded, _) =
160 try_extract_xml_commit_document_with_trace(content);
161 assert!(result.is_some(), "Should extract even with empty body");
162 assert!(skip.is_none());
163 assert_eq!(result.unwrap(), "fix: resolve bug");
164 }
165
166 #[test]
167 fn test_xml_extract_fails_missing_tags() {
168 let content = "Just some text without XML tags";
169 let (result, _skip, _files, _excluded, _) =
170 try_extract_xml_commit_document_with_trace(content);
171 assert!(result.is_none(), "Should fail when XML tags are missing");
172 }
173
174 #[test]
175 fn test_xml_extract_fails_invalid_commit_type() {
176 let content = r"<ralph-commit>
177<ralph-subject>invalid: not a real type</ralph-subject>
178</ralph-commit>";
179 let (result, _skip, _files, _excluded, _) =
180 try_extract_xml_commit_document_with_trace(content);
181 assert!(result.is_none(), "Should reject invalid commit type");
182 }
183
184 #[test]
185 fn test_xml_extract_fails_missing_subject() {
186 let content = r"<ralph-commit>
187<ralph-body>Just a body, no subject</ralph-body>
188</ralph-commit>";
189 let (result, _skip, _files, _excluded, _) =
190 try_extract_xml_commit_document_with_trace(content);
191 assert!(result.is_none(), "Should fail when subject is missing");
192 }
193
194 #[test]
195 fn test_xml_extract_fails_empty_subject() {
196 let content = r"<ralph-commit>
197<ralph-subject></ralph-subject>
198</ralph-commit>";
199 let (result, _skip, _files, _excluded, _) =
200 try_extract_xml_commit_document_with_trace(content);
201 assert!(result.is_none(), "Should fail when subject is empty");
202 }
203
204 #[test]
205 fn test_xml_extract_handles_whitespace_in_subject() {
206 let content = r"<ralph-commit>
207<ralph-subject> docs: update readme </ralph-subject>
208</ralph-commit>";
209 let (result, _skip, _files, _excluded, _) =
210 try_extract_xml_commit_document_with_trace(content);
211 assert!(result.is_some(), "Should handle whitespace in subject");
212 assert_eq!(result.unwrap(), "docs: update readme");
213 }
214
215 #[test]
216 fn test_xml_extract_with_breaking_change() {
217 let content = r"<ralph-commit>
218<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
219<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
220</ralph-commit>";
221 let (result, _skip, _files, _excluded, _) =
222 try_extract_xml_commit_document_with_trace(content);
223 assert!(result.is_some(), "Should handle breaking change indicator");
224 let msg = result.unwrap();
225 assert!(msg.starts_with("feat!:"));
226 assert!(msg.contains("BREAKING CHANGE"));
227 }
228
229 #[test]
230 fn test_xml_extract_with_scope() {
231 let content = r"<ralph-commit>
232<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
233</ralph-commit>";
234 let (result, _skip, _files, _excluded, _) =
235 try_extract_xml_commit_document_with_trace(content);
236 assert!(result.is_some(), "Should handle scope in subject");
237 assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
238 }
239
240 #[test]
241 fn test_xml_extract_body_preserves_newlines() {
242 let content = r"<ralph-commit>
243<ralph-subject>feat: add feature</ralph-subject>
244<ralph-body>Line 1
245Line 2
246Line 3</ralph-body>
247</ralph-commit>";
248 let (result, _skip, _files, _excluded, _) =
249 try_extract_xml_commit_document_with_trace(content);
250 assert!(result.is_some(), "Should preserve newlines in body");
251 let msg = result.unwrap();
252 assert!(msg.contains("Line 1\nLine 2\nLine 3"));
253 }
254
255 #[test]
256 fn test_xml_extract_fails_malformed_tags() {
257 let content = r"</ralph-commit>
258<ralph-subject>feat: add feature</ralph-subject>
259<ralph-commit>";
260 let (result, _skip, _files, _excluded, _) =
261 try_extract_xml_commit_document_with_trace(content);
262 assert!(result.is_none(), "Should fail for malformed tags");
263 }
264
265 #[test]
266 fn test_xsd_validation_integrated_in_extraction() {
267 let xml = r"<ralph-commit>
268<ralph-subject>fix: resolve bug</ralph-subject>
269</ralph-commit>";
270 let (msg, _skip, _files, _excluded, trace) =
271 try_extract_xml_commit_document_with_trace(xml);
272 assert!(msg.is_some(), "Should extract valid message");
273 assert!(trace.contains("XSD"), "Trace should mention XSD validation");
274 }
275
276 #[test]
277 fn test_xml_extract_with_excluded_files_returns_them() {
278 let content = r#"<ralph-commit>
279<ralph-subject>feat: add feature</ralph-subject>
280<ralph-files>
281<ralph-file>src/main.rs</ralph-file>
282</ralph-files>
283<ralph-excluded-files>
284<ralph-excluded-file reason="deferred">src/other.rs</ralph-excluded-file>
285</ralph-excluded-files>
286</ralph-commit>"#;
287 let (result, _skip, files, excluded, _trace) =
288 try_extract_xml_commit_document_with_trace(content);
289 assert!(result.is_some(), "Should extract commit message");
290 assert_eq!(files.len(), 1);
291 assert_eq!(excluded.len(), 1);
292 assert_eq!(excluded[0].path, "src/other.rs");
293 assert!(matches!(
294 excluded[0].reason,
295 crate::reducer::state::pipeline::ExcludedFileReason::Deferred
296 ));
297 }
298
299 #[test]
300 fn test_xml_extract_without_excluded_files_returns_empty_vec() {
301 let content = r"<ralph-commit>
302<ralph-subject>feat: add feature</ralph-subject>
303</ralph-commit>";
304 let (_result, _skip, _files, excluded, _trace) =
305 try_extract_xml_commit_document_with_trace(content);
306 assert!(excluded.is_empty(), "Should return empty excluded_files");
307 }
308
309 #[test]
310 fn test_xml_document_extract_rejects_markdown_wrapped_xml() {
311 let content =
312 "```xml\n<ralph-commit><ralph-subject>fix: wrapped</ralph-subject></ralph-commit>\n```";
313
314 let (result, skip, _files, _excluded, trace) =
315 try_extract_xml_commit_document_with_trace(content);
316
317 assert!(
318 result.is_none(),
319 "strict extractor should reject wrapped xml"
320 );
321 assert!(skip.is_none());
322 assert!(trace.contains("XSD validation failed"));
323 }
324}