ralph_workflow/files/llm_output_extraction/
xsd_validation_issues.rs1use crate::files::llm_output_extraction::xsd_validation::XsdValidationError;
7
8pub fn validate_issues_xml(xml_content: &str) -> Result<IssuesElements, XsdValidationError> {
38 let content = xml_content.trim();
39
40 let content = if content.starts_with("<?xml") {
42 if let Some(end) = content.find("?>") {
43 &content[end + 2..]
44 } else {
45 return Err(XsdValidationError {
46 error_type:
47 crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MalformedXml,
48 element_path: "xml".to_string(),
49 expected: "valid XML declaration ending with ?>".to_string(),
50 found: "unclosed XML declaration".to_string(),
51 suggestion: "Ensure XML declaration is properly closed with ?>".to_string(),
52 });
53 }
54 } else {
55 content
56 };
57
58 let content = content.trim();
59
60 if !content.starts_with("<ralph-issues>") {
62 return Err(XsdValidationError {
63 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
64 element_path: "ralph-issues".to_string(),
65 expected: "<ralph-issues> as root element".to_string(),
66 found: if content.is_empty() {
67 "empty content".to_string()
68 } else if content.len() < 50 {
69 content.to_string()
70 } else {
71 format!("{}...", &content[..50])
72 },
73 suggestion: "Wrap your issues in <ralph-issues> tags".to_string(),
74 });
75 }
76
77 if !content.ends_with("</ralph-issues>") {
78 return Err(XsdValidationError {
79 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
80 element_path: "ralph-issues".to_string(),
81 expected: "closing </ralph-issues> tag".to_string(),
82 found: "missing closing tag".to_string(),
83 suggestion: "Add </ralph-issues> at the end of your issues".to_string(),
84 });
85 }
86
87 let root_start = "<ralph-issues>".len();
89 let root_end = content.len() - "</ralph-issues>".len();
90 let issues_content = &content[root_start..root_end];
91
92 let mut issues = Vec::new();
94 let mut no_issues_found = None;
95
96 let mut remaining = issues_content.trim();
98
99 while !remaining.is_empty() {
100 if let Some(tag_content) = extract_tag_content(remaining, "ralph-issue") {
102 if no_issues_found.is_some() {
104 return Err(XsdValidationError {
105 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
106 element_path: "ralph-issue".to_string(),
107 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
108 found: "mixed issues and no-issues-found".to_string(),
109 suggestion: "Use either <ralph-issue> elements when issues exist, or <ralph-no-issues-found> when no issues exist, not both".to_string(),
110 });
111 }
112 issues.push(tag_content);
113 remaining = advance_past_tag(remaining, "ralph-issue");
114 continue;
115 }
116
117 if let Some(tag_content) = extract_tag_content(remaining, "ralph-no-issues-found") {
119 if !issues.is_empty() {
121 return Err(XsdValidationError {
122 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
123 element_path: "ralph-no-issues-found".to_string(),
124 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
125 found: "mixed issues and no-issues-found".to_string(),
126 suggestion: "Use either <ralph-issue> elements when issues exist, or <ralph-no-issues-found> when no issues exist, not both".to_string(),
127 });
128 }
129 if no_issues_found.is_some() {
130 return Err(XsdValidationError {
131 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
132 element_path: "ralph-no-issues-found".to_string(),
133 expected: "only one <ralph-no-issues-found> element".to_string(),
134 found: "duplicate <ralph-no-issues-found> element".to_string(),
135 suggestion: "Include only one <ralph-no-issues-found> element".to_string(),
136 });
137 }
138 no_issues_found = Some(tag_content);
139 remaining = advance_past_tag(remaining, "ralph-no-issues-found");
140 continue;
141 }
142
143 let first_fifty = if remaining.len() > 50 {
145 format!("{}...", &remaining[..50])
146 } else {
147 remaining.to_string()
148 };
149
150 if remaining.starts_with('<') {
152 if let Some(tag_end) = remaining.find('>') {
153 let potential_tag = &remaining[..tag_end + 1];
154 return Err(XsdValidationError {
155 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
156 element_path: potential_tag.to_string(),
157 expected: "only valid issues tags".to_string(),
158 found: format!("unexpected tag: {potential_tag}"),
159 suggestion: "Remove the unexpected tag. Valid tags are: <ralph-issue>, <ralph-no-issues-found>".to_string(),
160 });
161 }
162 }
163
164 return Err(XsdValidationError {
165 error_type:
166 crate::files::llm_output_extraction::xsd_validation::XsdErrorType::InvalidContent,
167 element_path: "content".to_string(),
168 expected: "only XML tags".to_string(),
169 found: first_fifty,
170 suggestion:
171 "Remove any text outside of XML tags. All content must be within appropriate tags."
172 .to_string(),
173 });
174 }
175
176 let filtered_issues: Vec<String> = issues
179 .into_iter()
180 .map(|s| s.trim().to_string())
181 .filter(|s| !s.is_empty())
182 .collect();
183 let filtered_no_issues = no_issues_found
184 .map(|s| s.trim().to_string())
185 .filter(|s| !s.is_empty());
186
187 if filtered_issues.is_empty() && filtered_no_issues.is_none() {
188 return Err(XsdValidationError {
189 error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
190 element_path: "ralph-issues".to_string(),
191 expected: "expected at least one <ralph-issue> element OR <ralph-no-issues-found>".to_string(),
192 found: "no issues or no-issues-found element".to_string(),
193 suggestion: "Add either <ralph-issue> elements for issues found, or <ralph-no-issues-found> if no issues exist".to_string(),
194 });
195 }
196
197 Ok(IssuesElements {
198 issues: filtered_issues,
199 no_issues_found: filtered_no_issues,
200 })
201}
202
203fn extract_tag_content(content: &str, tag_name: &str) -> Option<String> {
205 let open_tag = format!("<{tag_name}>");
206 let close_tag = format!("</{tag_name}>");
207
208 let content_trimmed = content.trim_start();
209 if !content_trimmed.starts_with(&open_tag) {
210 return None;
211 }
212
213 let open_pos = content.len() - content_trimmed.len();
214 let content_after_open = &content[open_pos + open_tag.len()..];
215
216 let close_pos = content_after_open.find(&close_tag)?;
217 let inner = &content_after_open[..close_pos];
218 Some(inner.to_string())
219}
220
221fn advance_past_tag<'a>(content: &'a str, tag_name: &str) -> &'a str {
223 let close_tag = format!("</{tag_name}>");
224 let trimmed = content.trim_start();
225
226 if let Some(pos) = trimmed.find(&close_tag) {
227 let after_close = &trimmed[pos + close_tag.len()..];
228 after_close.trim_start()
229 } else {
230 &content[content.len()..]
231 }
232}
233
234#[derive(Debug, Clone, PartialEq, Eq)]
236pub struct IssuesElements {
237 pub issues: Vec<String>,
239 pub no_issues_found: Option<String>,
241}
242
243impl IssuesElements {
244 #[cfg(any(test, feature = "test-utils"))]
246 pub fn is_empty(&self) -> bool {
247 self.issues.is_empty() && self.no_issues_found.is_some()
248 }
249
250 #[cfg(any(test, feature = "test-utils"))]
252 pub fn issue_count(&self) -> usize {
253 self.issues.len()
254 }
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn test_validate_valid_single_issue() {
263 let xml = r#"<ralph-issues>
264<ralph-issue>First issue description</ralph-issue>
265</ralph-issues>"#;
266
267 let result = validate_issues_xml(xml);
268 assert!(result.is_ok());
269 let elements = result.unwrap();
270 assert_eq!(elements.issues.len(), 1);
271 assert_eq!(elements.issues[0], "First issue description");
272 assert!(elements.no_issues_found.is_none());
273 }
274
275 #[test]
276 fn test_validate_valid_multiple_issues() {
277 let xml = r#"<ralph-issues>
278<ralph-issue>First issue</ralph-issue>
279<ralph-issue>Second issue</ralph-issue>
280<ralph-issue>Third issue</ralph-issue>
281</ralph-issues>"#;
282
283 let result = validate_issues_xml(xml);
284 assert!(result.is_ok());
285 let elements = result.unwrap();
286 assert_eq!(elements.issues.len(), 3);
287 assert_eq!(elements.issue_count(), 3);
288 }
289
290 #[test]
291 fn test_validate_valid_no_issues_found() {
292 let xml = r#"<ralph-issues>
293<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
294</ralph-issues>"#;
295
296 let result = validate_issues_xml(xml);
297 assert!(result.is_ok());
298 let elements = result.unwrap();
299 assert!(elements.issues.is_empty());
300 assert!(elements.no_issues_found.is_some());
301 assert!(elements.is_empty());
302 }
303
304 #[test]
305 fn test_validate_missing_root_element() {
306 let xml = r#"Some random text without proper XML tags"#;
307
308 let result = validate_issues_xml(xml);
309 assert!(result.is_err());
310 let error = result.unwrap_err();
311 assert_eq!(error.element_path, "ralph-issues");
312 }
313
314 #[test]
315 fn test_validate_empty_issues() {
316 let xml = r#"<ralph-issues>
317</ralph-issues>"#;
318
319 let result = validate_issues_xml(xml);
320 assert!(result.is_err());
321 let error = result.unwrap_err();
322 assert!(error.expected.contains("at least one"));
323 }
324
325 #[test]
326 fn test_validate_mixed_issues_and_no_issues_found() {
327 let xml = r#"<ralph-issues>
328<ralph-issue>First issue</ralph-issue>
329<ralph-no-issues-found>No issues</ralph-no-issues-found>
330</ralph-issues>"#;
331
332 let result = validate_issues_xml(xml);
333 assert!(result.is_err());
334 let error = result.unwrap_err();
335 assert!(error.suggestion.contains("not both"));
336 }
337
338 #[test]
339 fn test_validate_duplicate_no_issues_found() {
340 let xml = r#"<ralph-issues>
341<ralph-no-issues-found>No issues</ralph-no-issues-found>
342<ralph-no-issues-found>Also no issues</ralph-no-issues-found>
343</ralph-issues>"#;
344
345 let result = validate_issues_xml(xml);
346 assert!(result.is_err());
347 }
348}