Skip to main content

ralph_workflow/files/llm_output_extraction/commit/
extraction.rs

1//! Commit message extraction from strict XML documents.
2
3use super::rendering::render_final_commit_message;
4use crate::common::truncate_text;
5use crate::files::llm_output_extraction::xsd_validation::validate_xml_against_xsd;
6use crate::reducer::state::pipeline::ExcludedFile;
7
8fn extract_commit_message(
9    elements: crate::files::llm_output_extraction::xsd_validation::CommitMessageElements,
10    extraction_pattern: &str,
11) -> (
12    Option<String>,
13    Option<String>,
14    Vec<String>,
15    Vec<ExcludedFile>,
16    String,
17) {
18    if let Some(reason) = elements.skip_reason {
19        return (
20            None,
21            Some(reason.clone()),
22            vec![],
23            vec![],
24            format!("Found <ralph-skip> via {extraction_pattern}, reason: '{reason}'"),
25        );
26    }
27
28    let files = elements.files.clone();
29    let excluded_files = elements.excluded_files.clone();
30    let body = elements.format_body();
31    let message = if body.is_empty() {
32        elements.subject
33    } else {
34        format!("{}\n\n{}", elements.subject, body)
35    };
36
37    let has_body = message.lines().count() > 1;
38    let message_preview = {
39        let escaped = message.replace('\n', "\\n");
40        truncate_text(&escaped, 83)
41    };
42    let files_note = if files.is_empty() {
43        String::new()
44    } else {
45        format!(", files={}", files.len())
46    };
47    let excluded_note = if excluded_files.is_empty() {
48        String::new()
49    } else {
50        format!(", excluded={}", excluded_files.len())
51    };
52
53    (
54        Some(message),
55        None,
56        files,
57        excluded_files,
58        format!(
59            "Found <ralph-commit> via {}, XSD validation passed, body={}{}{}, message: '{}'",
60            extraction_pattern,
61            if has_body { "present" } else { "absent" },
62            files_note,
63            excluded_note,
64            message_preview
65        ),
66    )
67}
68
69/// Result of commit message extraction.
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct CommitExtractionResult(String);
72
73impl CommitExtractionResult {
74    #[must_use]
75    pub const fn new(message: String) -> Self {
76        Self(message)
77    }
78
79    #[must_use]
80    pub fn into_message(self) -> String {
81        render_final_commit_message(&self.0)
82    }
83}
84
85/// Extract a commit message from a strict XML document.
86#[must_use]
87pub fn try_extract_xml_commit_document_with_trace(
88    content: &str,
89) -> (
90    Option<String>,
91    Option<String>,
92    Vec<String>,
93    Vec<ExcludedFile>,
94    String,
95) {
96    match validate_xml_against_xsd(content.trim()) {
97        Ok(elements) => extract_commit_message(elements, "strict XML document"),
98        Err(e) => {
99            let error_msg = e.format_for_ai_retry();
100            (
101                None,
102                None,
103                vec![],
104                vec![],
105                format!("XSD validation failed: {error_msg}"),
106            )
107        }
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn test_commit_extraction_result_into_message() {
117        let result = CommitExtractionResult::new("feat: add feature".to_string());
118        assert_eq!(result.into_message(), "feat: add feature");
119    }
120
121    #[test]
122    fn test_xml_extract_basic_subject_only() {
123        let content = r"<ralph-commit>
124<ralph-subject>feat: add new feature</ralph-subject>
125</ralph-commit>";
126        let (result, skip, _files, _excluded, reason) =
127            try_extract_xml_commit_document_with_trace(content);
128        assert!(
129            result.is_some(),
130            "Should extract from basic XML. Reason: {reason}"
131        );
132        assert!(skip.is_none());
133        assert_eq!(result.unwrap(), "feat: add new feature");
134    }
135
136    #[test]
137    fn test_xml_extract_with_body() {
138        let content = r"<ralph-commit>
139<ralph-subject>feat(auth): add OAuth2 login flow</ralph-subject>
140<ralph-body>Implement Google and GitHub OAuth providers.
141Add session management for OAuth tokens.</ralph-body>
142</ralph-commit>";
143        let (result, skip, _files, _excluded, _) =
144            try_extract_xml_commit_document_with_trace(content);
145        assert!(result.is_some(), "Should extract from XML with body");
146        assert!(skip.is_none());
147        let msg = result.unwrap();
148        assert!(msg.starts_with("feat(auth): add OAuth2 login flow"));
149        assert!(msg.contains("Implement Google and GitHub OAuth providers"));
150        assert!(msg.contains("Add session management"));
151    }
152
153    #[test]
154    fn test_xml_extract_with_empty_body() {
155        let content = r"<ralph-commit>
156<ralph-subject>fix: resolve bug</ralph-subject>
157<ralph-body></ralph-body>
158</ralph-commit>";
159        let (result, skip, _files, _excluded, _) =
160            try_extract_xml_commit_document_with_trace(content);
161        assert!(result.is_some(), "Should extract even with empty body");
162        assert!(skip.is_none());
163        assert_eq!(result.unwrap(), "fix: resolve bug");
164    }
165
166    #[test]
167    fn test_xml_extract_fails_missing_tags() {
168        let content = "Just some text without XML tags";
169        let (result, _skip, _files, _excluded, _) =
170            try_extract_xml_commit_document_with_trace(content);
171        assert!(result.is_none(), "Should fail when XML tags are missing");
172    }
173
174    #[test]
175    fn test_xml_extract_fails_invalid_commit_type() {
176        let content = r"<ralph-commit>
177<ralph-subject>invalid: not a real type</ralph-subject>
178</ralph-commit>";
179        let (result, _skip, _files, _excluded, _) =
180            try_extract_xml_commit_document_with_trace(content);
181        assert!(result.is_none(), "Should reject invalid commit type");
182    }
183
184    #[test]
185    fn test_xml_extract_fails_missing_subject() {
186        let content = r"<ralph-commit>
187<ralph-body>Just a body, no subject</ralph-body>
188</ralph-commit>";
189        let (result, _skip, _files, _excluded, _) =
190            try_extract_xml_commit_document_with_trace(content);
191        assert!(result.is_none(), "Should fail when subject is missing");
192    }
193
194    #[test]
195    fn test_xml_extract_fails_empty_subject() {
196        let content = r"<ralph-commit>
197<ralph-subject></ralph-subject>
198</ralph-commit>";
199        let (result, _skip, _files, _excluded, _) =
200            try_extract_xml_commit_document_with_trace(content);
201        assert!(result.is_none(), "Should fail when subject is empty");
202    }
203
204    #[test]
205    fn test_xml_extract_handles_whitespace_in_subject() {
206        let content = r"<ralph-commit>
207<ralph-subject>   docs: update readme   </ralph-subject>
208</ralph-commit>";
209        let (result, _skip, _files, _excluded, _) =
210            try_extract_xml_commit_document_with_trace(content);
211        assert!(result.is_some(), "Should handle whitespace in subject");
212        assert_eq!(result.unwrap(), "docs: update readme");
213    }
214
215    #[test]
216    fn test_xml_extract_with_breaking_change() {
217        let content = r"<ralph-commit>
218<ralph-subject>feat!: drop Python 3.7 support</ralph-subject>
219<ralph-body>BREAKING CHANGE: Minimum Python version is now 3.8.</ralph-body>
220</ralph-commit>";
221        let (result, _skip, _files, _excluded, _) =
222            try_extract_xml_commit_document_with_trace(content);
223        assert!(result.is_some(), "Should handle breaking change indicator");
224        let msg = result.unwrap();
225        assert!(msg.starts_with("feat!:"));
226        assert!(msg.contains("BREAKING CHANGE"));
227    }
228
229    #[test]
230    fn test_xml_extract_with_scope() {
231        let content = r"<ralph-commit>
232<ralph-subject>test(parser): add coverage for edge cases</ralph-subject>
233</ralph-commit>";
234        let (result, _skip, _files, _excluded, _) =
235            try_extract_xml_commit_document_with_trace(content);
236        assert!(result.is_some(), "Should handle scope in subject");
237        assert_eq!(result.unwrap(), "test(parser): add coverage for edge cases");
238    }
239
240    #[test]
241    fn test_xml_extract_body_preserves_newlines() {
242        let content = r"<ralph-commit>
243<ralph-subject>feat: add feature</ralph-subject>
244<ralph-body>Line 1
245Line 2
246Line 3</ralph-body>
247</ralph-commit>";
248        let (result, _skip, _files, _excluded, _) =
249            try_extract_xml_commit_document_with_trace(content);
250        assert!(result.is_some(), "Should preserve newlines in body");
251        let msg = result.unwrap();
252        assert!(msg.contains("Line 1\nLine 2\nLine 3"));
253    }
254
255    #[test]
256    fn test_xml_extract_fails_malformed_tags() {
257        let content = r"</ralph-commit>
258<ralph-subject>feat: add feature</ralph-subject>
259<ralph-commit>";
260        let (result, _skip, _files, _excluded, _) =
261            try_extract_xml_commit_document_with_trace(content);
262        assert!(result.is_none(), "Should fail for malformed tags");
263    }
264
265    #[test]
266    fn test_xsd_validation_integrated_in_extraction() {
267        let xml = r"<ralph-commit>
268<ralph-subject>fix: resolve bug</ralph-subject>
269</ralph-commit>";
270        let (msg, _skip, _files, _excluded, trace) =
271            try_extract_xml_commit_document_with_trace(xml);
272        assert!(msg.is_some(), "Should extract valid message");
273        assert!(trace.contains("XSD"), "Trace should mention XSD validation");
274    }
275
276    #[test]
277    fn test_xml_extract_with_excluded_files_returns_them() {
278        let content = r#"<ralph-commit>
279<ralph-subject>feat: add feature</ralph-subject>
280<ralph-files>
281<ralph-file>src/main.rs</ralph-file>
282</ralph-files>
283<ralph-excluded-files>
284<ralph-excluded-file reason="deferred">src/other.rs</ralph-excluded-file>
285</ralph-excluded-files>
286</ralph-commit>"#;
287        let (result, _skip, files, excluded, _trace) =
288            try_extract_xml_commit_document_with_trace(content);
289        assert!(result.is_some(), "Should extract commit message");
290        assert_eq!(files.len(), 1);
291        assert_eq!(excluded.len(), 1);
292        assert_eq!(excluded[0].path, "src/other.rs");
293        assert!(matches!(
294            excluded[0].reason,
295            crate::reducer::state::pipeline::ExcludedFileReason::Deferred
296        ));
297    }
298
299    #[test]
300    fn test_xml_extract_without_excluded_files_returns_empty_vec() {
301        let content = r"<ralph-commit>
302<ralph-subject>feat: add feature</ralph-subject>
303</ralph-commit>";
304        let (_result, _skip, _files, excluded, _trace) =
305            try_extract_xml_commit_document_with_trace(content);
306        assert!(excluded.is_empty(), "Should return empty excluded_files");
307    }
308
309    #[test]
310    fn test_xml_document_extract_rejects_markdown_wrapped_xml() {
311        let content =
312            "```xml\n<ralph-commit><ralph-subject>fix: wrapped</ralph-subject></ralph-commit>\n```";
313
314        let (result, skip, _files, _excluded, trace) =
315            try_extract_xml_commit_document_with_trace(content);
316
317        assert!(
318            result.is_none(),
319            "strict extractor should reject wrapped xml"
320        );
321        assert!(skip.is_none());
322        assert!(trace.contains("XSD validation failed"));
323    }
324}