ralph_workflow/files/llm_output_extraction/
xsd_validation_issues.rs1use crate::files::llm_output_extraction::xml_helpers::{
9 create_reader, duplicate_element_error, malformed_xml_error, read_text_until_end, skip_to_end,
10 text_outside_tags_error, unexpected_element_error,
11};
12use crate::files::llm_output_extraction::xsd_validation::{XsdErrorType, XsdValidationError};
13use quick_xml::events::Event;
14
15const EXAMPLE_ISSUES_XML: &str = r#"<ralph-issues>
17<ralph-issue>Missing error handling in API endpoint</ralph-issue>
18<ralph-issue>Variable shadowing in loop construct</ralph-issue>
19</ralph-issues>"#;
20
21const EXAMPLE_NO_ISSUES_XML: &str = r#"<ralph-issues>
23<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
24</ralph-issues>"#;
25
26pub fn validate_issues_xml(xml_content: &str) -> Result<IssuesElements, XsdValidationError> {
56 let content = xml_content.trim();
57 let mut reader = create_reader(content);
58 let mut buf = Vec::new();
59
60 loop {
62 match reader.read_event_into(&mut buf) {
63 Ok(Event::Start(e)) if e.name().as_ref() == b"ralph-issues" => break,
64 Ok(Event::Start(e)) => {
65 let name_bytes = e.name();
66 let tag_name = String::from_utf8_lossy(name_bytes.as_ref());
67 return Err(XsdValidationError {
68 error_type: XsdErrorType::MissingRequiredElement,
69 element_path: "ralph-issues".to_string(),
70 expected: "<ralph-issues> as root element".to_string(),
71 found: format!("<{}> (wrong root element)", tag_name),
72 suggestion: "Use <ralph-issues> as the root element.".to_string(),
73 example: Some(EXAMPLE_ISSUES_XML.into()),
74 });
75 }
76 Ok(Event::Text(_)) => {
77 }
79 Ok(Event::Eof) => {
80 return Err(XsdValidationError {
81 error_type: XsdErrorType::MissingRequiredElement,
82 element_path: "ralph-issues".to_string(),
83 expected: "<ralph-issues> as root element".to_string(),
84 found: if content.is_empty() {
85 "empty content".to_string()
86 } else if content.len() <= 60 {
87 content.to_string()
88 } else {
89 format!("{}...", &content[..60])
90 },
91 suggestion: "Wrap your issues in <ralph-issues>...</ralph-issues> tags."
92 .to_string(),
93 example: Some(EXAMPLE_ISSUES_XML.into()),
94 });
95 }
96 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
98 }
99 buf.clear();
100 }
101
102 let mut issues: Vec<String> = Vec::new();
104 let mut no_issues_found: Option<String> = None;
105
106 const VALID_TAGS: [&str; 2] = ["ralph-issue", "ralph-no-issues-found"];
107
108 loop {
109 buf.clear();
110 match reader.read_event_into(&mut buf) {
111 Ok(Event::Start(e)) => {
112 match e.name().as_ref() {
113 b"ralph-issue" => {
114 if no_issues_found.is_some() {
116 return Err(XsdValidationError {
117 error_type: XsdErrorType::UnexpectedElement,
118 element_path: "ralph-issues/ralph-issue".to_string(),
119 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
120 found: "mixed issues and no-issues-found".to_string(),
121 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
122 example: Some(EXAMPLE_ISSUES_XML.into()),
123 });
124 }
125 let issue_text = read_text_until_end(&mut reader, b"ralph-issue")?;
126 issues.push(issue_text);
127 }
128 b"ralph-no-issues-found" => {
129 if !issues.is_empty() {
131 return Err(XsdValidationError {
132 error_type: XsdErrorType::UnexpectedElement,
133 element_path: "ralph-issues/ralph-no-issues-found".to_string(),
134 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
135 found: "mixed issues and no-issues-found".to_string(),
136 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
137 example: Some(EXAMPLE_NO_ISSUES_XML.into()),
138 });
139 }
140 if no_issues_found.is_some() {
141 return Err(duplicate_element_error(
142 "ralph-no-issues-found",
143 "ralph-issues",
144 ));
145 }
146 no_issues_found =
147 Some(read_text_until_end(&mut reader, b"ralph-no-issues-found")?);
148 }
149 other => {
150 let _ = skip_to_end(&mut reader, other);
151 return Err(unexpected_element_error(other, &VALID_TAGS, "ralph-issues"));
152 }
153 }
154 }
155 Ok(Event::Text(e)) => {
156 let text = e.unescape().unwrap_or_default();
157 let trimmed = text.trim();
158 if !trimmed.is_empty() {
159 return Err(text_outside_tags_error(trimmed, "ralph-issues"));
160 }
161 }
162 Ok(Event::End(e)) if e.name().as_ref() == b"ralph-issues" => break,
163 Ok(Event::Eof) => {
164 return Err(XsdValidationError {
165 error_type: XsdErrorType::MalformedXml,
166 element_path: "ralph-issues".to_string(),
167 expected: "closing </ralph-issues> tag".to_string(),
168 found: "end of content without closing tag".to_string(),
169 suggestion: "Add </ralph-issues> at the end.".to_string(),
170 example: Some(EXAMPLE_ISSUES_XML.into()),
171 });
172 }
173 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
175 }
176 }
177
178 let filtered_issues: Vec<String> = issues.into_iter().filter(|s| !s.is_empty()).collect();
180 let filtered_no_issues = no_issues_found.filter(|s| !s.is_empty());
181
182 if filtered_issues.is_empty() && filtered_no_issues.is_none() {
184 return Err(XsdValidationError {
185 error_type: XsdErrorType::MissingRequiredElement,
186 element_path: "ralph-issues".to_string(),
187 expected: "at least one <ralph-issue> element OR <ralph-no-issues-found>".to_string(),
188 found: "empty <ralph-issues> element".to_string(),
189 suggestion:
190 "Add <ralph-issue> elements for issues found, or <ralph-no-issues-found> if no issues exist."
191 .to_string(),
192 example: Some(EXAMPLE_ISSUES_XML.into()),
193 });
194 }
195
196 Ok(IssuesElements {
197 issues: filtered_issues,
198 no_issues_found: filtered_no_issues,
199 })
200}
201
202#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct IssuesElements {
205 pub issues: Vec<String>,
207 pub no_issues_found: Option<String>,
209}
210
211impl IssuesElements {
212 #[cfg(any(test, feature = "test-utils"))]
214 pub fn is_empty(&self) -> bool {
215 self.issues.is_empty() && self.no_issues_found.is_some()
216 }
217
218 #[cfg(any(test, feature = "test-utils"))]
220 pub fn issue_count(&self) -> usize {
221 self.issues.len()
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn test_validate_valid_single_issue() {
231 let xml = r#"<ralph-issues>
232<ralph-issue>First issue description</ralph-issue>
233</ralph-issues>"#;
234
235 let result = validate_issues_xml(xml);
236 assert!(result.is_ok());
237 let elements = result.unwrap();
238 assert_eq!(elements.issues.len(), 1);
239 assert_eq!(elements.issues[0], "First issue description");
240 assert!(elements.no_issues_found.is_none());
241 }
242
243 #[test]
244 fn test_validate_valid_multiple_issues() {
245 let xml = r#"<ralph-issues>
246<ralph-issue>First issue</ralph-issue>
247<ralph-issue>Second issue</ralph-issue>
248<ralph-issue>Third issue</ralph-issue>
249</ralph-issues>"#;
250
251 let result = validate_issues_xml(xml);
252 assert!(result.is_ok());
253 let elements = result.unwrap();
254 assert_eq!(elements.issues.len(), 3);
255 assert_eq!(elements.issue_count(), 3);
256 }
257
258 #[test]
259 fn test_validate_valid_no_issues_found() {
260 let xml = r#"<ralph-issues>
261<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
262</ralph-issues>"#;
263
264 let result = validate_issues_xml(xml);
265 assert!(result.is_ok());
266 let elements = result.unwrap();
267 assert!(elements.issues.is_empty());
268 assert!(elements.no_issues_found.is_some());
269 assert!(elements.is_empty());
270 }
271
272 #[test]
273 fn test_validate_missing_root_element() {
274 let xml = r#"Some random text without proper XML tags"#;
275
276 let result = validate_issues_xml(xml);
277 assert!(result.is_err());
278 let error = result.unwrap_err();
279 assert_eq!(error.element_path, "ralph-issues");
280 }
281
282 #[test]
283 fn test_validate_empty_issues() {
284 let xml = r#"<ralph-issues>
285</ralph-issues>"#;
286
287 let result = validate_issues_xml(xml);
288 assert!(result.is_err());
289 let error = result.unwrap_err();
290 assert!(error.expected.contains("at least one"));
291 }
292
293 #[test]
294 fn test_validate_mixed_issues_and_no_issues_found() {
295 let xml = r#"<ralph-issues>
296<ralph-issue>First issue</ralph-issue>
297<ralph-no-issues-found>No issues</ralph-no-issues-found>
298</ralph-issues>"#;
299
300 let result = validate_issues_xml(xml);
301 assert!(result.is_err());
302 let error = result.unwrap_err();
303 assert!(error.suggestion.contains("not both") || error.expected.contains("not both"));
304 }
305
306 #[test]
307 fn test_validate_duplicate_no_issues_found() {
308 let xml = r#"<ralph-issues>
309<ralph-no-issues-found>No issues</ralph-no-issues-found>
310<ralph-no-issues-found>Also no issues</ralph-no-issues-found>
311</ralph-issues>"#;
312
313 let result = validate_issues_xml(xml);
314 assert!(result.is_err());
315 }
316
317 #[test]
318 fn test_validate_whitespace_handling() {
319 let xml =
321 " <ralph-issues> \n <ralph-issue>Issue text</ralph-issue> \n </ralph-issues> ";
322
323 let result = validate_issues_xml(xml);
324 assert!(result.is_ok());
325 }
326
327 #[test]
328 fn test_validate_with_xml_declaration() {
329 let xml = r#"<?xml version="1.0"?>
330<ralph-issues>
331<ralph-issue>Issue text</ralph-issue>
332</ralph-issues>"#;
333
334 let result = validate_issues_xml(xml);
335 assert!(result.is_ok());
336 }
337
338 #[test]
339 fn test_validate_issue_with_code_element() {
340 let xml = r#"<ralph-issues>
342<ralph-issue>Check if <code>a < b</code> is valid</ralph-issue>
343</ralph-issues>"#;
344
345 let result = validate_issues_xml(xml);
346 assert!(result.is_ok());
347 let elements = result.unwrap();
348 assert_eq!(elements.issues.len(), 1);
349 assert!(elements.issues[0].contains("Check if"));
351 assert!(elements.issues[0].contains("a < b"));
352 assert!(elements.issues[0].contains("is valid"));
353 }
354
355 #[test]
356 fn test_validate_no_issues_with_code_element() {
357 let xml = r#"<ralph-issues>
358<ralph-no-issues-found>All <code>Record<string, T></code> types are correct</ralph-no-issues-found>
359</ralph-issues>"#;
360
361 let result = validate_issues_xml(xml);
362 assert!(result.is_ok());
363 let elements = result.unwrap();
364 assert!(elements.no_issues_found.is_some());
365 let msg = elements.no_issues_found.unwrap();
366 assert!(msg.contains("Record<string, T>"));
367 }
368
369 #[test]
375 fn test_llm_realistic_issue_with_generic_type_escaped() {
376 let xml = r#"<ralph-issues>
378<ralph-issue>[High] src/parser.rs:42 - The function <code>parse<T></code> does not handle empty input.
379Suggested fix: Add a check for empty input before parsing.</ralph-issue>
380</ralph-issues>"#;
381
382 let result = validate_issues_xml(xml);
383 assert!(result.is_ok(), "Should parse escaped generic: {:?}", result);
384 let elements = result.unwrap();
385 assert!(elements.issues[0].contains("parse<T>"));
386 }
387
388 #[test]
389 fn test_llm_realistic_issue_with_comparison_escaped() {
390 let xml = r#"<ralph-issues>
392<ralph-issue>[Medium] src/validate.rs:15 - The condition <code>count < 0</code> should be <code>count <= 0</code>.
393Suggested fix: Change the comparison operator.</ralph-issue>
394</ralph-issues>"#;
395
396 let result = validate_issues_xml(xml);
397 assert!(
398 result.is_ok(),
399 "Should parse escaped comparisons: {:?}",
400 result
401 );
402 let elements = result.unwrap();
403 assert!(elements.issues[0].contains("count < 0"));
404 assert!(elements.issues[0].contains("count <= 0"));
405 }
406
407 #[test]
408 fn test_llm_realistic_issue_with_logical_operators_escaped() {
409 let xml = r#"<ralph-issues>
411<ralph-issue>[Low] src/filter.rs:88 - The expression <code>a && b || c</code> has ambiguous precedence.
412Suggested fix: Add explicit parentheses.</ralph-issue>
413</ralph-issues>"#;
414
415 let result = validate_issues_xml(xml);
416 assert!(
417 result.is_ok(),
418 "Should parse escaped logical operators: {:?}",
419 result
420 );
421 let elements = result.unwrap();
422 assert!(elements.issues[0].contains("a && b || c"));
423 }
424
425 #[test]
426 fn test_llm_realistic_issue_with_rust_lifetime() {
427 let xml = r#"<ralph-issues>
429<ralph-issue>[High] src/buffer.rs:23 - The lifetime <code>&'a str</code> should match the struct lifetime.
430Suggested fix: Ensure lifetime annotations are consistent.</ralph-issue>
431</ralph-issues>"#;
432
433 let result = validate_issues_xml(xml);
434 assert!(result.is_ok(), "Should parse lifetime syntax: {:?}", result);
435 let elements = result.unwrap();
436 assert!(elements.issues[0].contains("&'a str"));
437 }
438
439 #[test]
440 fn test_llm_realistic_issue_with_html_in_description() {
441 let xml = r#"<ralph-issues>
443<ralph-issue>[Medium] src/template.rs:56 - The HTML template uses <code><div class="container"></code> but should use semantic tags.
444Suggested fix: Replace with appropriate semantic HTML elements.</ralph-issue>
445</ralph-issues>"#;
446
447 let result = validate_issues_xml(xml);
448 assert!(result.is_ok(), "Should parse HTML in code: {:?}", result);
449 let elements = result.unwrap();
450 assert!(elements.issues[0].contains("<div class=\"container\">"));
451 }
452
453 #[test]
454 fn test_llm_realistic_no_issues_with_detailed_explanation() {
455 let xml = r#"<ralph-issues>
457<ralph-no-issues-found>The implementation correctly handles all edge cases:
458- Input validation properly rejects values where <code>x < 0</code>
459- The generic <code>Result<T, E></code> type is used consistently
460- Error handling follows the project's established patterns
461No issues require attention.</ralph-no-issues-found>
462</ralph-issues>"#;
463
464 let result = validate_issues_xml(xml);
465 assert!(
466 result.is_ok(),
467 "Should parse detailed no-issues: {:?}",
468 result
469 );
470 let elements = result.unwrap();
471 let msg = elements.no_issues_found.unwrap();
472 assert!(msg.contains("x < 0"));
473 assert!(msg.contains("Result<T, E>"));
474 }
475
476 #[test]
477 fn test_llm_realistic_multiple_issues_with_mixed_content() {
478 let xml = r#"<ralph-issues>
480<ralph-issue>[Critical] src/auth.rs:12 - SQL injection vulnerability: user input in <code>query && filter</code> is not sanitized.</ralph-issue>
481<ralph-issue>[High] src/api.rs:45 - Missing null check: <code>response.data</code> may be undefined when <code>status < 200</code>.</ralph-issue>
482<ralph-issue>[Medium] src/utils.rs:78 - The type <code>Option<Vec<T>></code> could be simplified to <code>Vec<T></code> with empty default.</ralph-issue>
483</ralph-issues>"#;
484
485 let result = validate_issues_xml(xml);
486 assert!(
487 result.is_ok(),
488 "Should parse multiple issues with mixed content: {:?}",
489 result
490 );
491 let elements = result.unwrap();
492 assert_eq!(elements.issues.len(), 3);
493 assert!(elements.issues[0].contains("query && filter"));
494 assert!(elements.issues[1].contains("status < 200"));
495 assert!(elements.issues[2].contains("Option<Vec<T>>"));
496 }
497
498 #[test]
499 fn test_llm_mistake_unescaped_less_than_fails() {
500 let xml = r#"<ralph-issues>
502<ralph-issue>[High] src/compare.rs:10 - The condition a < b is wrong.</ralph-issue>
503</ralph-issues>"#;
504
505 let result = validate_issues_xml(xml);
506 assert!(
507 result.is_err(),
508 "Unescaped < should fail XML parsing: {:?}",
509 result
510 );
511 }
512
513 #[test]
514 fn test_llm_mistake_unescaped_generic_fails() {
515 let xml = r#"<ralph-issues>
517<ralph-issue>[High] src/types.rs:5 - The type Vec<String> is incorrect.</ralph-issue>
518</ralph-issues>"#;
519
520 let result = validate_issues_xml(xml);
521 assert!(
522 result.is_err(),
523 "Unescaped generic should fail XML parsing: {:?}",
524 result
525 );
526 }
527
528 #[test]
529 fn test_llm_mistake_unescaped_ampersand_fails() {
530 let xml = r#"<ralph-issues>
532<ralph-issue>[High] src/logic.rs:20 - The expression a && b is wrong.</ralph-issue>
533</ralph-issues>"#;
534
535 let result = validate_issues_xml(xml);
536 assert!(
537 result.is_err(),
538 "Unescaped && should fail XML parsing: {:?}",
539 result
540 );
541 }
542
543 #[test]
544 fn test_llm_uses_cdata_for_code_content() {
545 let xml = r#"<ralph-issues>
547<ralph-issue>[High] src/cmp.rs:10 - The condition <code><![CDATA[a < b && c > d]]></code> has issues.</ralph-issue>
548</ralph-issues>"#;
549
550 let result = validate_issues_xml(xml);
551 assert!(result.is_ok(), "CDATA should be valid: {:?}", result);
552 let elements = result.unwrap();
553 assert!(elements.issues[0].contains("a < b && c > d"));
554 }
555}