ralph_workflow/files/llm_output_extraction/
xsd_validation_issues.rs1use crate::files::llm_output_extraction::xml_helpers::{
9 create_reader, duplicate_element_error, format_content_preview, malformed_xml_error,
10 read_text_until_end, skip_to_end, text_outside_tags_error, unexpected_element_error,
11};
12use crate::files::llm_output_extraction::xsd_validation::{XsdErrorType, XsdValidationError};
13use quick_xml::events::Event;
14
15const EXAMPLE_ISSUES_XML: &str = r#"<ralph-issues>
17<ralph-issue>Missing error handling in API endpoint</ralph-issue>
18<ralph-issue>Variable shadowing in loop construct</ralph-issue>
19</ralph-issues>"#;
20
21const EXAMPLE_NO_ISSUES_XML: &str = r#"<ralph-issues>
23<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
24</ralph-issues>"#;
25
26pub fn validate_issues_xml(xml_content: &str) -> Result<IssuesElements, XsdValidationError> {
31 let content = xml_content.trim();
32
33 use crate::files::llm_output_extraction::xml_helpers::check_for_illegal_xml_characters;
36 check_for_illegal_xml_characters(content)?;
37
38 let mut reader = create_reader(content);
39 let mut buf = Vec::new();
40
41 loop {
43 match reader.read_event_into(&mut buf) {
44 Ok(Event::Start(e)) if e.name().as_ref() == b"ralph-issues" => break,
45 Ok(Event::Start(e)) => {
46 let name_bytes = e.name();
47 let tag_name = String::from_utf8_lossy(name_bytes.as_ref());
48 return Err(XsdValidationError {
49 error_type: XsdErrorType::MissingRequiredElement,
50 element_path: "ralph-issues".to_string(),
51 expected: "<ralph-issues> as root element".to_string(),
52 found: format!("<{}> (wrong root element)", tag_name),
53 suggestion: "Use <ralph-issues> as the root element.".to_string(),
54 example: Some(EXAMPLE_ISSUES_XML.into()),
55 });
56 }
57 Ok(Event::Text(_)) => {
58 }
60 Ok(Event::Eof) => {
61 return Err(XsdValidationError {
62 error_type: XsdErrorType::MissingRequiredElement,
63 element_path: "ralph-issues".to_string(),
64 expected: "<ralph-issues> as root element".to_string(),
65 found: format_content_preview(content),
66 suggestion: "Wrap your issues in <ralph-issues>...</ralph-issues> tags."
67 .to_string(),
68 example: Some(EXAMPLE_ISSUES_XML.into()),
69 });
70 }
71 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
73 }
74 buf.clear();
75 }
76
77 let mut issues: Vec<String> = Vec::new();
79 let mut no_issues_found: Option<String> = None;
80
81 const VALID_TAGS: [&str; 2] = ["ralph-issue", "ralph-no-issues-found"];
82
83 loop {
84 buf.clear();
85 match reader.read_event_into(&mut buf) {
86 Ok(Event::Start(e)) => {
87 match e.name().as_ref() {
88 b"ralph-issue" => {
89 if no_issues_found.is_some() {
91 return Err(XsdValidationError {
92 error_type: XsdErrorType::UnexpectedElement,
93 element_path: "ralph-issues/ralph-issue".to_string(),
94 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
95 found: "mixed issues and no-issues-found".to_string(),
96 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
97 example: Some(EXAMPLE_ISSUES_XML.into()),
98 });
99 }
100 let issue_text = read_text_until_end(&mut reader, b"ralph-issue")?;
101 issues.push(issue_text);
102 }
103 b"ralph-no-issues-found" => {
104 if !issues.is_empty() {
106 return Err(XsdValidationError {
107 error_type: XsdErrorType::UnexpectedElement,
108 element_path: "ralph-issues/ralph-no-issues-found".to_string(),
109 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
110 found: "mixed issues and no-issues-found".to_string(),
111 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
112 example: Some(EXAMPLE_NO_ISSUES_XML.into()),
113 });
114 }
115 if no_issues_found.is_some() {
116 return Err(duplicate_element_error(
117 "ralph-no-issues-found",
118 "ralph-issues",
119 ));
120 }
121 no_issues_found =
122 Some(read_text_until_end(&mut reader, b"ralph-no-issues-found")?);
123 }
124 other => {
125 let _ = skip_to_end(&mut reader, other);
126 return Err(unexpected_element_error(other, &VALID_TAGS, "ralph-issues"));
127 }
128 }
129 }
130 Ok(Event::Text(e)) => {
131 let text = e.unescape().unwrap_or_default();
132 let trimmed = text.trim();
133 if !trimmed.is_empty() {
134 return Err(text_outside_tags_error(trimmed, "ralph-issues"));
135 }
136 }
137 Ok(Event::End(e)) if e.name().as_ref() == b"ralph-issues" => break,
138 Ok(Event::Eof) => {
139 return Err(XsdValidationError {
140 error_type: XsdErrorType::MalformedXml,
141 element_path: "ralph-issues".to_string(),
142 expected: "closing </ralph-issues> tag".to_string(),
143 found: "end of content without closing tag".to_string(),
144 suggestion: "Add </ralph-issues> at the end.".to_string(),
145 example: Some(EXAMPLE_ISSUES_XML.into()),
146 });
147 }
148 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
150 }
151 }
152
153 let filtered_issues: Vec<String> = issues.into_iter().filter(|s| !s.is_empty()).collect();
155 let filtered_no_issues = no_issues_found.filter(|s| !s.is_empty());
156
157 if filtered_issues.is_empty() && filtered_no_issues.is_none() {
159 return Err(XsdValidationError {
160 error_type: XsdErrorType::MissingRequiredElement,
161 element_path: "ralph-issues".to_string(),
162 expected: "at least one <ralph-issue> element OR <ralph-no-issues-found>".to_string(),
163 found: "empty <ralph-issues> element".to_string(),
164 suggestion:
165 "Add <ralph-issue> elements for issues found, or <ralph-no-issues-found> if no issues exist."
166 .to_string(),
167 example: Some(EXAMPLE_ISSUES_XML.into()),
168 });
169 }
170
171 Ok(IssuesElements {
172 issues: filtered_issues,
173 no_issues_found: filtered_no_issues,
174 })
175}
176
177#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct IssuesElements {
180 pub issues: Vec<String>,
182 pub no_issues_found: Option<String>,
184}
185
186impl IssuesElements {
187 #[cfg(any(test, feature = "test-utils"))]
189 pub fn is_empty(&self) -> bool {
190 self.issues.is_empty() && self.no_issues_found.is_some()
191 }
192
193 #[cfg(any(test, feature = "test-utils"))]
195 pub fn issue_count(&self) -> usize {
196 self.issues.len()
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn test_validate_valid_single_issue() {
206 let xml = r#"<ralph-issues>
207<ralph-issue>First issue description</ralph-issue>
208</ralph-issues>"#;
209
210 let result = validate_issues_xml(xml);
211 assert!(result.is_ok());
212 let elements = result.unwrap();
213 assert_eq!(elements.issues.len(), 1);
214 assert_eq!(elements.issues[0], "First issue description");
215 assert!(elements.no_issues_found.is_none());
216 }
217
218 #[test]
219 fn test_validate_valid_multiple_issues() {
220 let xml = r#"<ralph-issues>
221<ralph-issue>First issue</ralph-issue>
222<ralph-issue>Second issue</ralph-issue>
223<ralph-issue>Third issue</ralph-issue>
224</ralph-issues>"#;
225
226 let result = validate_issues_xml(xml);
227 assert!(result.is_ok());
228 let elements = result.unwrap();
229 assert_eq!(elements.issues.len(), 3);
230 assert_eq!(elements.issue_count(), 3);
231 }
232
233 #[test]
234 fn test_validate_valid_no_issues_found() {
235 let xml = r#"<ralph-issues><ralph-no-issues-found>No issues were found during review</ralph-no-issues-found></ralph-issues>"#;
236
237 let result = validate_issues_xml(xml);
238 assert!(result.is_ok());
239 let elements = result.unwrap();
240 assert!(elements.issues.is_empty());
241 assert!(elements.no_issues_found.is_some());
242 assert!(elements.is_empty());
243 }
244
245 #[test]
246 fn test_validate_missing_root_element() {
247 let xml = r#"Some random text without proper XML tags"#;
248
249 let result = validate_issues_xml(xml);
250 assert!(result.is_err());
251 let error = result.unwrap_err();
252 assert_eq!(error.element_path, "ralph-issues");
253 }
254
255 #[test]
256 fn test_validate_empty_issues() {
257 let xml = r#"<ralph-issues></ralph-issues>"#;
258
259 let result = validate_issues_xml(xml);
260 assert!(result.is_err());
261 let error = result.unwrap_err();
262 assert!(error.expected.contains("at least one"));
263 }
264
265 #[test]
266 fn test_validate_mixed_issues_and_no_issues_found() {
267 let xml = r#"<ralph-issues><ralph-issue>First issue</ralph-issue><ralph-no-issues-found>No issues</ralph-no-issues-found></ralph-issues>"#;
268
269 let result = validate_issues_xml(xml);
270 assert!(result.is_err());
271 let error = result.unwrap_err();
272 assert!(error.suggestion.contains("not both") || error.expected.contains("not both"));
273 }
274
275 #[test]
276 fn test_validate_duplicate_no_issues_found() {
277 let xml = r#"<ralph-issues><ralph-no-issues-found>No issues</ralph-no-issues-found><ralph-no-issues-found>Also no issues</ralph-no-issues-found></ralph-issues>"#;
278
279 let result = validate_issues_xml(xml);
280 assert!(result.is_err());
281 }
282
283 #[test]
284 fn test_validate_whitespace_handling() {
285 let xml =
287 " <ralph-issues> \n <ralph-issue>Issue text</ralph-issue> \n </ralph-issues> ";
288
289 let result = validate_issues_xml(xml);
290 assert!(result.is_ok());
291 }
292
293 #[test]
294 fn test_validate_with_xml_declaration() {
295 let xml = r#"<?xml version="1.0"?><ralph-issues><ralph-issue>Issue text</ralph-issue></ralph-issues>"#;
296
297 let result = validate_issues_xml(xml);
298 assert!(result.is_ok());
299 }
300
301 #[test]
302 fn test_validate_issue_with_code_element() {
303 let xml = r#"<ralph-issues><ralph-issue>Check if <code>a < b</code> is valid</ralph-issue></ralph-issues>"#;
305
306 let result = validate_issues_xml(xml);
307 assert!(result.is_ok());
308 let elements = result.unwrap();
309 assert_eq!(elements.issues.len(), 1);
310 assert!(elements.issues[0].contains("Check if"));
312 assert!(elements.issues[0].contains("a < b"));
313 assert!(elements.issues[0].contains("is valid"));
314 }
315
316 #[test]
317 fn test_validate_no_issues_with_code_element() {
318 let xml = r#"<ralph-issues><ralph-no-issues-found>All <code>Record<string, T></code> types are correct</ralph-no-issues-found></ralph-issues>"#;
319
320 let result = validate_issues_xml(xml);
321 assert!(result.is_ok());
322 let elements = result.unwrap();
323 assert!(elements.no_issues_found.is_some());
324 let msg = elements.no_issues_found.unwrap();
325 assert!(msg.contains("Record<string, T>"));
326 }
327
328 #[test]
334 fn test_llm_realistic_issue_with_generic_type_escaped() {
335 let xml = r#"<ralph-issues>
337<ralph-issue>[High] src/parser.rs:42 - The function <code>parse<T></code> does not handle empty input.
338Suggested fix: Add a check for empty input before parsing.</ralph-issue>
339</ralph-issues>"#;
340
341 let result = validate_issues_xml(xml);
342 assert!(result.is_ok(), "Should parse escaped generic: {:?}", result);
343 let elements = result.unwrap();
344 assert!(elements.issues[0].contains("parse<T>"));
345 }
346
347 #[test]
348 fn test_llm_realistic_issue_with_comparison_escaped() {
349 let xml = r#"<ralph-issues>
351<ralph-issue>[Medium] src/validate.rs:15 - The condition <code>count < 0</code> should be <code>count <= 0</code>.
352Suggested fix: Change the comparison operator.</ralph-issue>
353</ralph-issues>"#;
354
355 let result = validate_issues_xml(xml);
356 assert!(
357 result.is_ok(),
358 "Should parse escaped comparisons: {:?}",
359 result
360 );
361 let elements = result.unwrap();
362 assert!(elements.issues[0].contains("count < 0"));
363 assert!(elements.issues[0].contains("count <= 0"));
364 }
365
366 #[test]
367 fn test_llm_realistic_issue_with_logical_operators_escaped() {
368 let xml = r#"<ralph-issues><ralph-issue>[Low] src/filter.rs:88 - The expression <code>a && b || c</code> has ambiguous precedence.
370Suggested fix: Add explicit parentheses.</ralph-issue></ralph-issues>"#;
371
372 let result = validate_issues_xml(xml);
373 assert!(
374 result.is_ok(),
375 "Should parse escaped logical operators: {:?}",
376 result
377 );
378 let elements = result.unwrap();
379 assert!(elements.issues[0].contains("a && b || c"));
380 }
381
382 #[test]
383 fn test_llm_realistic_issue_with_rust_lifetime() {
384 let xml = r#"<ralph-issues><ralph-issue>[High] src/buffer.rs:23 - The lifetime <code>&'a str</code> should match the struct lifetime.
386Suggested fix: Ensure lifetime annotations are consistent.</ralph-issue></ralph-issues>"#;
387
388 let result = validate_issues_xml(xml);
389 assert!(result.is_ok(), "Should parse lifetime syntax: {:?}", result);
390 let elements = result.unwrap();
391 assert!(elements.issues[0].contains("&'a str"));
392 }
393
394 #[test]
395 fn test_llm_realistic_issue_with_html_in_description() {
396 let xml = r#"<ralph-issues><ralph-issue>[Medium] src/template.rs:56 - The HTML template uses <code><div class="container"></code> but should use semantic tags.
398Suggested fix: Replace with appropriate semantic HTML elements.</ralph-issue></ralph-issues>"#;
399
400 let result = validate_issues_xml(xml);
401 assert!(result.is_ok(), "Should parse HTML in code: {:?}", result);
402 let elements = result.unwrap();
403 assert!(elements.issues[0].contains("<div class=\"container\">"));
404 }
405
406 #[test]
407 fn test_llm_realistic_no_issues_with_detailed_explanation() {
408 let xml = "<ralph-issues><ralph-no-issues-found>The implementation correctly handles all edge cases:\n- Input validation properly rejects values where <code>x < 0</code>\n- The generic <code>Result<T, E></code> type is used consistently\n- Error handling follows the project's established patterns\nNo issues require attention.</ralph-no-issues-found></ralph-issues>";
410
411 let result = validate_issues_xml(xml);
412 assert!(
413 result.is_ok(),
414 "Should parse detailed no-issues: {:?}",
415 result
416 );
417 let elements = result.unwrap();
418 let msg = elements.no_issues_found.unwrap();
419 assert!(msg.contains("x < 0"));
420 assert!(msg.contains("Result<T, E>"));
421 }
422
423 #[test]
424 fn test_llm_realistic_multiple_issues_with_mixed_content() {
425 let xml = r#"<ralph-issues><ralph-issue>[Critical] src/auth.rs:12 - SQL injection vulnerability: user input in <code>query && filter</code> is not sanitized.</ralph-issue><ralph-issue>[High] src/api.rs:45 - Missing null check: <code>response.data</code> may be undefined when <code>status < 200</code>.</ralph-issue><ralph-issue>[Medium] src/utils.rs:78 - The type <code>Option<Vec<T>></code> could be simplified to <code>Vec<T></code> with empty default.</ralph-issue></ralph-issues>"#;
427
428 let result = validate_issues_xml(xml);
429 assert!(
430 result.is_ok(),
431 "Should parse multiple issues with mixed content: {:?}",
432 result
433 );
434 let elements = result.unwrap();
435 assert_eq!(elements.issues.len(), 3);
436 assert!(elements.issues[0].contains("query && filter"));
437 assert!(elements.issues[1].contains("status < 200"));
438 assert!(elements.issues[2].contains("Option<Vec<T>>"));
439 }
440
441 #[test]
442 fn test_llm_mistake_unescaped_less_than_fails() {
443 let xml = r#"<ralph-issues><ralph-issue>[High] src/compare.rs:10 - The condition a < b is wrong.</ralph-issue></ralph-issues>"#;
445
446 let result = validate_issues_xml(xml);
447 assert!(
448 result.is_err(),
449 "Unescaped < should fail XML parsing: {:?}",
450 result
451 );
452 }
453
454 #[test]
455 fn test_llm_mistake_unescaped_generic_fails() {
456 let xml = r#"<ralph-issues><ralph-issue>[High] src/types.rs:5 - The type Vec<String> is incorrect.</ralph-issue></ralph-issues>"#;
458
459 let result = validate_issues_xml(xml);
460 assert!(
461 result.is_err(),
462 "Unescaped generic should fail XML parsing: {:?}",
463 result
464 );
465 }
466
467 #[test]
468 fn test_llm_mistake_unescaped_ampersand_fails() {
469 let xml = r#"<ralph-issues><ralph-issue>[High] src/logic.rs:20 - The expression a && b is wrong.</ralph-issue></ralph-issues>"#;
471
472 let result = validate_issues_xml(xml);
473 assert!(
474 result.is_err(),
475 "Unescaped && should fail XML parsing: {:?}",
476 result
477 );
478 }
479
480 #[test]
481 fn test_llm_uses_cdata_for_code_content() {
482 let xml = r#"<ralph-issues><ralph-issue>[High] src/cmp.rs:10 - The condition <code><![CDATA[a < b && c > d]]></code> has issues.</ralph-issue></ralph-issues>"#;
484
485 let result = validate_issues_xml(xml);
486 assert!(result.is_ok(), "CDATA should be valid: {:?}", result);
487 let elements = result.unwrap();
488 assert!(elements.issues[0].contains("a < b && c > d"));
489 }
490
491 #[test]
496 fn test_validate_nul_byte_from_nbsp_typo() {
497 let xml =
501 "<ralph-issues><ralph-issue>Check git\u{0000}A0diff usage</ralph-issue></ralph-issues>";
502
503 let result = validate_issues_xml(xml);
504 assert!(result.is_err(), "NUL byte should be rejected");
505
506 let error = result.unwrap_err();
507 assert!(
508 error.found.contains("NUL") || error.found.contains("0x00"),
509 "Error should identify NUL byte, got: {}",
510 error.found
511 );
512 assert!(
513 error.suggestion.contains("\\u00A0") || error.suggestion.contains("non-breaking space"),
514 "Error should suggest NBSP as common fix, got: {}",
515 error.suggestion
516 );
517 }
518}